1 /** 2 * Create SQL queries from string containing D expressions. 3 4 * This module allows you to generate SQL from a template, containing D expressions. Depending on these expressions 5 * parts of the template will be included in the SQL or not. 6 7 * Authors: Robert Klotzner, robert.klotzner at gmail.com 8 * Date: January 7, 2013 9 * License: GNU General Public License version 3 <http://www.gnu.org/licenses/> 10 * Copyright: 2013 Robert Klotzner 11 12 * Bugs: Unicode not really supported. I haven't really explored that yet, but 13 * I believe that at the moment string processing will fail for multibyte 14 * utf-8 characters in the input string. 15 */ 16 module arsd.querygenerator; 17 18 import std.exception; 19 import std.uni; 20 import std.string; 21 import std.variant; 22 import std.conv; 23 import std.typetuple; 24 25 26 /** 27 * The generated query + args. 28 * 29 * It offers support for concatenation, so you can create your query in parts and concatenate them afterwards. 30 * Ths string prepend will be inserted inbetween two CreatedQuery.query strings at concatation if and only if both query strings are non empty. 31 * The resulting CreatedQuery has a prepend string equal to the one of the left side of the '~' operation. 32 33 * Beware that because of the handling of the prepend string, the result of multiple concatenations depends on the order of evaluation (if empty strings are involved). 34 * You might need parantheses or splitting 35 * it up in multiple statements to achieve the desired effect. 36 */ 37 struct CreatedQuery { 38 /** 39 * Concatenation with automatically inserted prepend string in between. 40 41 * The prepend string gets inserted inbetween the two query strings if and only if both query strings are non empty. 42 * The prepend string of the resulting object is the one of the left object. Thus the resulting object depends on the order of execution if empty strings are involved. 43 * See the unittest to this struct for details. 44 */ 45 CreatedQuery opBinary(string op)(CreatedQuery right) if(op=="~") { 46 CreatedQuery res=this; 47 res~=right; 48 return res; 49 } 50 51 CreatedQuery opBinary(string op)(string right) if(op=="~") { 52 CreatedQuery res=this; 53 res.query~=right; 54 return res; 55 } 56 57 CreatedQuery opBinaryRight(string op)(string left) if(op=="~") { 58 CreatedQuery res=this; 59 if(left.length && query.length) 60 left~=prepend; 61 res.query=left~res.query; 62 return res; 63 } 64 65 66 ref CreatedQuery opOpAssign(string op)(string right) if(op=="~") { 67 query~=right; 68 return this; 69 } 70 ref CreatedQuery opOpAssign(string op)(CreatedQuery right) if(op=="~") { 71 query~=(query.length && right.query.length) ? right.prepend : ""; 72 query~=right.query; 73 args~=right.args; 74 return this; 75 } 76 /// Currently not const because of bug: 77 /// http://d.puremagic.com/issues/show_bug.cgi?id=8759 78 bool opEquals( CreatedQuery other) { 79 return query==other.query && args==other.args && prepend==other.prepend; 80 } 81 string query; 82 Variant[] args; 83 string prepend; 84 } 85 86 unittest { 87 auto q1=CreatedQuery("select * from table"); 88 CreatedQuery q2=CreatedQuery("", [], " where "); 89 auto q3=CreatedQuery("col1=? and col2=?", [Variant(7), Variant("huhu")], " and "); 90 auto q4=(q1~(q2~q3)); 91 q1~=q2~q3; 92 import std.stdio; 93 writefln("q4: %s, q1: %s", q4, q1); 94 assert(q4==q1); 95 assert(q1==CreatedQuery("select * from table where col1=? and col2=?", [Variant(7), Variant("huhu")], "")); 96 } 97 98 /** 99 * This method generates D code from the preSql data, which can be mixed in for generating the resulting sql and the resulting Variant[] array. 100 * The variable sql_ will contain the generated SQL. 101 * The variable args_ is the Variant[] array. 102 * Use createQuery for directly generating the resulting SQL and args. 103 * Params: 104 * preSql = The to be processed SQL containing D expressions in #{} blocks, sub blocks in {} and declarations in ${}. 105 * Returns: D code which can be mixed in. 106 **/ 107 string createQueryGenerator(string preSql) { 108 string out_cmd; 109 out_cmd~="string sql_;\n"; 110 out_cmd~="Variant[] args_;\n"; 111 out_cmd~="with(data_) {\n"; 112 out_cmd~=preSqlParser(preSql); 113 out_cmd~="\n}"; 114 return out_cmd; 115 } 116 117 /** 118 * Uses createQueryGenerator for actually doing the job. 119 * data_ will be made available with D's 'with' statement to the embedded code in queryString. So you can access the elements just as regular variables. 120 * 121 * Params: 122 * queryString = An SQL template. An SQL template consists of nested blocks. The 123 * uppermost block is the queryString itself, you create subblocks by 124 * enclosing it in braces '{}'. 125 126 __Declaration part__ 127 128 Each block might start with a declaration part. It does 129 so if the first character in the block is a '$'. The 130 declaration part ends with a colon ':'. 131 It consists of declarations which start with '${' and end 132 with '}'. Two types of declarations are allowed: 133 134 1. variable assignment of the form: ${a=5} This will 135 result in a D expression in the generated code of the 136 form 'auto a=5;'. 137 138 2. Foreach loop declaration. This one might only occur 139 once in the declaration part and causes the block in 140 which it is declared to be executed in a loop. 141 It looks like: ${a in someArray} 142 143 3. Special assignment of ${queryGenSep=" or "} : This one 144 is specially treated. If you specified a loop the 145 generated string of each iteration will be concatenated 146 and the queryGenSep will be used to separate them. If you 147 don't specify one, the default will be " or ". 148 149 Variable assignments before the optional loop declaration 150 will be declared outside the loop, the ones after the 151 loop declaration will be declared within the loop and 152 might access the loop variable, ('a' in our case). 153 154 Multiple declarations of the form '${}' might be present 155 within the declaration part, they may be separated by 156 white space. 157 158 __Body part__ 159 160 If no declaration part is present, then the whole content 161 of the block is the body part. (This is, if the first 162 character is no '$'.) Otherwise it starts after the colon 163 at the end of the declaration part. 164 165 Everything in the body part will be echoed to the 166 resulting SQL so you can just write plain SQL now. There 167 are only a few exceptions: 168 169 1. A '{' will introduce a child block. 170 171 2. A '#{' will start a D expression, which ends with 172 another '}' 173 174 3. A '$' will trigger an error as it is only valid in the 175 declaration part. 176 177 4. SQL string which can either start with " or with '. It 178 might contain any of '{' '}' '$' '#' they all will be 179 ignored within a string. Apart from this, no escaping of 180 these characters is possible at the moment. 181 182 __D expression__ 183 184 A D expression might occur any amount of times within the 185 body part of a block. It contents will be evaluated as D 186 code. Which must evaluate to some value. If this value is 187 different from the value's types init value it is 188 considered valid, then it will be included in the Variant[] array and a '?' 189 gets inserted into the resulting SQL. If the returned value equals the type's 190 init value then no '?' gets inserted and the value will not be present in the 191 args array. 192 193 So if the D expression evaluates to a string and is the 194 empty string, it will not be included. 195 196 The expression is contained within #{ D expression }. 197 198 The D code has access to the passed params via params[0] 199 .. params[n], all declarations made in the declarations 200 parts of the blocks above it and the contents of the data_ struct. 201 It is made available with D's 'with' statement so 202 data_.foo will just be 'foo'. 203 204 __Blocks__ 205 206 Blocks are the building blocks of the resulting 207 expression. If a block contains a D expression or a 208 subblock which contains D expressions, then its contents 209 will only be included in the output if at least one of 210 the D expressions are valid or if it contains a text only sub block. 211 So if you have a SQL expression of the 212 form {someColumn=#{some D code}} there will be no output 213 at all if #{some D code} was not valid. If a block does 214 not contain any D code and neither do any sub blocks then 215 its output will be included in the output of the 216 containing block. If you want to ensure that a particular 217 text does not get dropped, include it in its own block. 218 219 The second property that makes blocks useful is that if a 220 block produces no output, for the above mentioned 221 reasons, then the text before it will be dropped if there 222 were no valid sub blocks found already in the parent block. 223 It will also be dropped if the preceding block vanished. This way you can 224 concatenate blocks with ' and ' and ' or ' and they will 225 be dropped if not applicable. A leading ' where ' on the 226 other hand would not be dropped in any case, except the 227 whole output vanishes. 228 229 For examples, see the unittests, a more complex example, I used in reality is here: 230 `({${db=params[0]} : ` 231 `{` 232 `${d in datespan} ${arData=db.autoRunData(d.autorun)}: {({date>=#{d.from}} and {date<=#{d.to}} and ` 233 `{({task_run.id>=#{arData.firstId==long.max ? 0 : arData.firstId}} and {task_run.id<=#{arData.lastId}} and {comment like #{d.autorun.length ? "%:Autorun:"~d.autorun~":Autorun:%" : ""}})})}` 234 `}` 235 `})` 236 237 Right at the start I am starting the uppermost block with a 238 '(' which means it has no declaration part, then I start 239 the first sub block, followed by declaration I needed. 240 Then I start another block with '{' followed by a loop 241 declaration (d will iterate over the datespan array which 242 comes from the data_ struct) followed by an inner 243 variable declaration of arData, you can see both d (the 244 looping variable) and db from the outer block are 245 accessible. Afterwards the body starts with a 246 block containing '(' and subblocks of comparisons with a D 247 expression like {({date>=#{d.from}})}. 248 If all D expressions in the block like d.from evaluate to 249 an empty string, the whole containing block will not produce 250 any output. So not even '()' this is because they are 251 enclused in a subblock. The 'and' between {date>=#{d.from}} 252 and {date<=#{d.to}} for example will only appear in the 253 output if both d.from and d.to contained a valid non empty 254 string. 255 256 The outputs of each loop iteration will be separated with " or " by default, 257 you can change this, by setting queryGenSep in the declaration part. 258 259 data_ = The contents of data_ will be made available to the D expressions and declarations in the queryString. (The code gets embedded withing a with(data_) { ... }) 260 261 params = Additional data you need in the queryString. You can access it via params[0], params[1], ... 262 If you happen to have a member of data_ which is named params, you will have to access it via data_.params. 263 264 */ 265 CreatedQuery createQuery(string queryString, StructT, Params...)(StructT data_, Params params) { 266 debug(queryGenerator) import std.stdio; 267 mixin(createQueryGenerator(queryString)); 268 debug(queryGenerator) writeln("Generated: ", createQueryGenerator(queryString)); 269 return CreatedQuery(sql_, args_); 270 //return CreatedQuery.init; 271 } 272 273 unittest { 274 import std.stdio; 275 struct Test1 { 276 string a; 277 string b; 278 } 279 struct Test { 280 Test1[] foo; 281 string bar; 282 int k; 283 } 284 Test t1; 285 CreatedQuery myQuery(Test t1) { 286 return createQuery!`select * from testtable where 287 {({${queryGenSep=" or "} ${f in foo} : ({col1=#{f.a}} and {col2=#{f.b}})})} or 288 {col3>#{k}}`(t1); 289 } 290 auto res=myQuery(t1); 291 writeln("Result with empty args: ", res.query); 292 assert(res.args==[]); 293 t1.k=9; 294 res=myQuery(t1); 295 writefln("Result with k set: %s, args: %s", res.query, res.args); 296 assert(res.args==[Variant(9)]); 297 t1.foo~=Test1(string.init, "Hallo du da!"); 298 res=myQuery(t1); 299 writeln("Result with foo.b and k set: ", res.query); 300 assert(res.args==[Variant("Hallo du da!"), Variant(9)]); 301 t1.foo~=Test1("Hallo du da!", string.init); 302 res=myQuery(t1); 303 writeln("Result with foo0.b and foo1.a and k set: ", res.query); 304 assert(res.args==[Variant("Hallo du da!"), Variant("Hallo du da!"), Variant(9)]); 305 t1.foo~=Test1("Hello!", "Cu!"); 306 res=myQuery(t1); 307 writeln("Result with foo0.b and foo1.a and foo2.a and foo2.b and k set: ", res.query); 308 assert(res.args==[Variant("Hallo du da!"), Variant("Hallo du da!"), Variant("Hello!"), Variant("Cu!"), Variant(9)]); 309 } 310 311 private: 312 void dropWhite(ref string buf) { 313 buf=buf.stripLeft(); 314 } 315 316 /// D's indexOf seemed not to work at compile time. 317 size_t indexOf(string heap, char needle) { 318 foreach(i, c; heap) { 319 if(needle==c) { 320 return i; 321 } 322 } 323 return -1; 324 } 325 //pragma(msg, createQueryGenerator( " (${item in datespan} : ( { date>=#{item.from} and} {date<=#{item.to} and} {comment like #{\"%:Autorun:\"~item.autorun~\":Autorun:%\"}})")); 326 //pragma(msg, createQueryGenerator( " Hello this is a test!")); 327 //pragma(msg, createQueryGenerator( ` ${item in datespan} : ( { date>=#{item.from} } and {date<=#{item.to} } and {comment like #{"%:Autorun:"~item.autorun~":Autorun:%"}})`)); 328 //pragma(msg, createQueryGenerator(`select * from testtable where 329 //{({${queryGenSep=" or "} ${f in foo} : ({col1=#{f.a}} and {col2=#{f.b}})})} or 330 //{col3>k}`)); 331 332 //Syntax: 333 // ${a=3} ${i in array} ${u=i.c} : some data #{D expression} { same again } 334 // presql : [decls :] body 335 // decls : [${variable=assignment}]* [${variable in array}] [${variable=assignment}]* 336 // body : [some_string | #{D expression} | {presql}]* 337 /// private 338 /// Handles the part before the ":" colon. (Variable declarations + loop declaration) 339 private static string doVariableProcessing(ref string part, int level) { 340 string output; 341 string for_each_decl; 342 string after_output; 343 bool queryGenSepFound=false; 344 string buf="buf"~to!string(level); 345 dropWhite(part); 346 assert(part); 347 immutable isFirst="isFirst"~to!string(level); 348 if(part[0]!='$') { 349 return "bool "~isFirst~"=false;\n"~"{\nstring "~buf~";\n"; 350 } 351 output~="bool "~isFirst~"=true;\n"; 352 while(true) { 353 assert(part.length>2); 354 assert(part[0]=='$', "Only declarations enclosed in '${' are allowed in declarations block. Invalid data found: "~part); 355 assert(part[1]=='{', "'{' in '${' is mandatory! Found at:"~part); 356 string var_name; 357 part=part[2..$]; 358 foreach(i,c; part) { 359 if(!c.isAlpha()) { 360 var_name=part[0..i]; 361 if(var_name=="queryGenSep") { 362 var_name="queryGenSep"~to!string(level); 363 queryGenSepFound=true; 364 } 365 part=part[i..$]; 366 break; 367 } 368 } 369 dropWhite(part); 370 assert(part.length, "Unexpected end of data, expected '=' or 'in'"); 371 enum Operation { 372 assignment, 373 in_array 374 } 375 Operation op; 376 switch(part[0]) { 377 case '=': op=Operation.assignment; 378 part=part[1..$]; 379 break; 380 case 'i': 381 assert(part.length>1 && part[1]=='n', "Expected 'n' after 'i' forming 'in', got: "~part); 382 part=part[2..$]; 383 op=Operation.in_array; 384 break; 385 default: assert(false, "Unexpected operation: Only variable assignment ('=') and array loop ('in') are supported, found at: "~part); 386 387 } 388 dropWhite(part); 389 string right_side; 390 foreach(i, c; part) { 391 if(c=='}') { 392 right_side=part[0..i]; 393 part=part[i+1..$]; 394 break; 395 } 396 } 397 if(op==Operation.assignment) { 398 string buff="auto "~var_name~"="~right_side~";\n"; 399 if(for_each_decl) 400 after_output~=buff; 401 else 402 output~=buff; 403 } 404 else { 405 if(for_each_decl) 406 assert(false, "Only one foreach declaration allowed, found second at: "~part); 407 for_each_decl="\nforeach("~var_name~"; "~right_side~") {\n"; 408 } 409 dropWhite(part); 410 if(part[0]==':') { 411 part=part[1..$]; 412 if(!queryGenSepFound) { 413 output~="immutable queryGenSep"~to!string(level)~"=\" or \";\n"; 414 } 415 for_each_decl = for_each_decl==[] ? "{\n" : for_each_decl; 416 after_output~="string "~buf~";\n"; 417 after_output~="if(!"~isFirst~") {\n"; 418 after_output~=buf~"=queryGenSep"~to!string(level)~";\n}\n"; 419 return output~for_each_decl~after_output; 420 } 421 else { 422 assert(part, "Unexpected end of string, expected another declaration '${}' or ':'."); 423 assert(part[0]=='$', "Expected ':' or another variable assignment ('$'), at: "~part); 424 } 425 } 426 } 427 428 // Extracts a D expression (#{}) from the string. 429 string dExpressionParser(ref string data, string buf, string validCount, string slevel, ref int count) { 430 assert(data.length>2 && data[0]=='#' && data[1]=='{'); 431 string out_cmd; 432 data=data[2..$]; 433 auto end=data.indexOf('}'); 434 assert(end>0, "Empty or non closed D expression found at: "~data); 435 string val="val"~slevel~"_"~to!string(count); 436 out_cmd~="auto "~val~"="~data[0..end]~";\n"; 437 out_cmd~="if("~val~"!=typeof("~val~").init) {\n"; 438 out_cmd~="debug(queryGenerator) writeln(\"Found valid value: \", "~val~");\n"; 439 out_cmd~=validCount~"++;\n"; 440 out_cmd~=buf~"~=\"?\";\n"; 441 out_cmd~="args_~=Variant("~val~");\n}\n"; 442 out_cmd~="else {\ndebug(queryGenerator) writeln(\"Found invalid value: \", "~val~");\n}\n"; 443 data=data[end+1..$]; 444 return out_cmd; 445 } 446 // do the parsing of an sql string ('....' or ".....") Also handles escaping ('') or ("") 447 string processString(ref string data, string buf) { 448 assert(data[0]=='\'' || data[0]=='"', "Expected ' or \""); 449 char begin=data[0]; 450 data=data[1..$]; 451 string out_cmd; 452 while(data.length) { 453 foreach(i, c; data) { 454 if(c==begin && ((i+1)>=data.length || data[i+1]!=begin) ) { // End of string (Ignore escaped end in string) 455 out_cmd~=buf~"~=`"~begin~data[0..i]~begin~"`;\n"; 456 data=data[i+1 .. $]; 457 break; 458 } 459 } 460 } 461 return out_cmd; 462 } 463 464 // Parsing starts here. 465 string preSqlParser(ref string data, int level=0) { 466 auto out_cmd="{\n"; 467 out_cmd~=doVariableProcessing(data, level); 468 debug(querygenerator) import std.stdio; 469 // dropWhite(data); //(Maybe intended, so don't drop it) 470 immutable slevel=to!string(level); 471 immutable buf="buf"~slevel; 472 immutable text="text"~slevel; 473 string upperBuf; 474 string upperWasValid; 475 int dexprCount=0; 476 if(level>0) { 477 upperBuf="buf"~(to!string(level-1)); 478 upperWasValid="wasValid"~(to!string(level-1)); 479 } 480 else 481 upperBuf="sql_"; 482 483 immutable validCount="validCount"~slevel; 484 immutable wasValid="wasValid"~slevel; 485 immutable isFirst="isFirst"~to!string(level); // Defined in doVariableProcessing 486 out_cmd~="int "~validCount~"=-1;\n"; 487 out_cmd~="int "~wasValid~";\n"; // validCount of inner level. (But never -1, if it would be -1 it is 1) 488 out_cmd~="string "~text~";\n"; 489 while(data.length) { 490 auto end=data.length; 491 foreach(i, c; data) { 492 if(c=='{' || c=='}' || c=='#' || c=='\'' || c=='"' || c=='$') { 493 end=i; 494 break; 495 } 496 } 497 out_cmd~=text~"=`"~data[0..end]~"`;\n"; 498 out_cmd~=buf~"~=`"~data[0..end]~"`;\n"; 499 data=data[end..$]; 500 if(data.length==0) 501 break; 502 debug(querygenerator) writefln("Remaining (level: %s) data: %s", level, data); 503 switch(data[0]) { 504 case '{' : 505 assert(data.length>2, "Expected some data after '{' at: "~data); 506 data=data[1..$]; 507 out_cmd~="if("~validCount~"==0) {\n"; 508 out_cmd~=buf~"="~buf~"[0..$-"~text~".length];\n}\n"; 509 out_cmd~=wasValid~"=0;\n"; // Reset, because inner level will just add to it. 510 out_cmd~=preSqlParser(data, level+1); 511 512 assert(data[0]=='}', "Expected closing '}', got: "~data); 513 data=data[1..$]; 514 515 out_cmd~="if("~wasValid~"==0 && "~validCount~">0) {\n"; // validCount has to be greater than 0 otherwise we have removed the data already. (See above) 516 out_cmd~=buf~"="~buf~"[0..$-"~text~".length];\n}\n"; 517 out_cmd~=`debug(queryGenerator) writefln("Received data from level: `~to!string(level+1)~` on level: `~slevel~`, data was valid: %s, current valid count: %s", `~wasValid~`, `~validCount~`);`~"\n"; 518 out_cmd~=validCount~"="~validCount~"==-1 ? "~wasValid~" : "~validCount~"+"~wasValid~";\n"; 519 out_cmd~=`debug(queryGenerator) writefln("Updated valid count is now: %s", `~validCount~`);`~"\n"; 520 break; 521 case '}' : 522 goto finish; 523 case '#' : 524 out_cmd~="if("~validCount~"==-1) {\n\t"; 525 out_cmd~=validCount~"=0;\n}\n"; 526 out_cmd~=dExpressionParser(data, buf, validCount, slevel, dexprCount); 527 dexprCount++; 528 break; 529 case '\'': 530 case '"': 531 out_cmd~=processString(data, buf); 532 break; 533 case '$': assert(false, "Unexpected '$'. The variable declaration block must start with a '${}' declaration otherwise you are already in the body and '${}' declarations are invalid.\n Found at: "~data); 534 default: 535 assert(false, "WTF? The if should have made this impossible!"); 536 } 537 } 538 finish: 539 out_cmd~=`debug(queryGenerator) writefln("Leaving level: %s, valid count: %s, buffer: %s", `~slevel~`, `~validCount~`, `~buf~" );\n"; 540 out_cmd~="if("~validCount~"==-1 || "~validCount~">0) {\n"; 541 out_cmd~="debug(queryGenerator) writeln(\"Appended "~buf~" with contents: \", "~buf~");\n"; 542 out_cmd~=isFirst~"=false;\n"; // No longer the first valid run, so separator should be inserted. 543 out_cmd~=upperBuf~"~="~buf~";\n}\n"; 544 if(level>0) { 545 out_cmd~=upperWasValid~"+="~validCount~"<0 ? 1 : "~validCount~";\n"; 546 } 547 // End of loop: 548 out_cmd~="}\n"; 549 // End of block: 550 out_cmd~="}\n"; 551 return out_cmd; 552 }