1 /**
2   * Create SQL queries from string containing D expressions.
3 
4   * This module allows you to generate SQL from a template, containing D expressions. Depending on these expressions
5   * parts of the template will be included in the SQL or not.
6 
7   * Authors: Robert Klotzner, robert.klotzner at gmail.com
8   * Date: January 7, 2013
9   * License: GNU General Public License version 3 <http://www.gnu.org/licenses/>
10   * Copyright: 2013 Robert Klotzner
11 
12   * Bugs: Unicode not really supported. I haven't really explored that yet, but
13   * I believe that at the moment string processing will fail for multibyte
14   * utf-8 characters in the input string.
15   */
16 module arsd.querygenerator;
17 
18 import std.exception;
19 import std.uni;
20 import std.string;
21 import std.variant;
22 import std.conv;
23 import std.typetuple;
24 
25 
26 /**
27   * The generated query + args.
28   *
29   * It offers support for concatenation, so you can create your query in parts and concatenate them afterwards.
30   * Ths string prepend will be inserted inbetween two CreatedQuery.query strings at concatation if and only if both query strings are non empty.
31   * The resulting CreatedQuery has a prepend string equal to the one of the left side of the '~' operation.
32 
33   * Beware that because of the handling of the prepend string, the result of multiple concatenations depends on the order of evaluation (if empty strings are involved).
34   *  You might need parantheses or splitting
35   * it up in multiple statements to achieve the desired effect.
36   */
37 struct CreatedQuery {
38     /**
39       * Concatenation with automatically inserted prepend string in between.
40 
41       * The prepend string gets inserted inbetween the two query strings if and only if both query strings are non empty.
42       * The prepend string of the resulting object is the one of the left object. Thus the resulting object depends on the order of execution if empty strings are involved.
43       * See the unittest to this struct for details.
44       */
45     CreatedQuery opBinary(string op)(CreatedQuery right) if(op=="~") {
46         CreatedQuery res=this;
47         res~=right;
48         return res;
49     }
50 
51     CreatedQuery opBinary(string op)(string right) if(op=="~") {
52         CreatedQuery res=this;
53         res.query~=right;
54         return res;
55     }
56 
57     CreatedQuery opBinaryRight(string op)(string left) if(op=="~") {
58         CreatedQuery res=this;
59         if(left.length && query.length)
60             left~=prepend;
61         res.query=left~res.query;
62         return res;
63     }
64 
65 
66     ref CreatedQuery opOpAssign(string op)(string right) if(op=="~") {
67         query~=right;
68         return this;
69     }
70     ref CreatedQuery opOpAssign(string op)(CreatedQuery right) if(op=="~") {
71         query~=(query.length && right.query.length) ?  right.prepend : "";
72         query~=right.query;
73         args~=right.args;
74         return this;
75     }
76     /// Currently not const because of bug:
77     ///     http://d.puremagic.com/issues/show_bug.cgi?id=8759
78     bool opEquals( CreatedQuery other) {
79         return query==other.query && args==other.args && prepend==other.prepend;
80     }
81     string query;
82     Variant[] args;
83     string prepend;
84 }
85 
86 unittest {
87     auto q1=CreatedQuery("select * from table");
88     CreatedQuery q2=CreatedQuery("", [], " where ");
89     auto q3=CreatedQuery("col1=? and col2=?", [Variant(7), Variant("huhu")], " and ");
90     auto q4=(q1~(q2~q3));
91     q1~=q2~q3;
92     import std.stdio;
93     writefln("q4: %s, q1: %s", q4, q1);
94     assert(q4==q1);
95     assert(q1==CreatedQuery("select * from table where col1=? and col2=?", [Variant(7), Variant("huhu")], ""));
96 }
97 
98 /**
99   * This method generates D code from the preSql data, which can be mixed in for generating the resulting sql and the resulting Variant[] array.
100   * The variable sql_ will contain the generated SQL.
101   * The variable args_ is the Variant[] array.
102   * Use createQuery for directly generating the resulting SQL and args.
103   * Params:
104   *     preSql = The to be processed SQL containing D expressions in #{} blocks, sub blocks in {} and declarations in ${}.
105   * Returns: D code which can be mixed in.
106 **/
107 string createQueryGenerator(string preSql) {
108     string out_cmd;
109     out_cmd~="string sql_;\n";
110     out_cmd~="Variant[] args_;\n";
111     out_cmd~="with(data_) {\n";
112     out_cmd~=preSqlParser(preSql);
113     out_cmd~="\n}";
114     return out_cmd;
115 }
116 
117 /**
118   * Uses createQueryGenerator for actually doing the job.
119   * data_ will be made available with D's 'with' statement to the embedded code in queryString. So you can access the elements just as regular variables.
120   *
121   * Params:
122   *     queryString = An SQL template. An SQL template consists of nested blocks. The
123   *                   uppermost block is the queryString itself, you create subblocks by
124   *                   enclosing it in braces '{}'.
125 
126                       __Declaration part__
127 
128                       Each block might start with a declaration part. It does
129                       so if the first character in the block is a '$'. The
130                       declaration part ends with a colon ':'.
131                       It consists of declarations which start with '${' and end
132                       with '}'. Two types of declarations are allowed:
133 
134                       1. variable assignment of the form: ${a=5}    This will
135                       result in a D expression in the generated code of the
136                       form 'auto a=5;'.
137 
138                       2. Foreach loop declaration. This one might only occur
139                       once in the declaration part and causes the block in
140                       which it is declared to be executed in a loop.
141                         It looks like: ${a in someArray}
142 
143                       3. Special assignment of ${queryGenSep=" or "} : This one
144                       is specially treated. If you specified a loop the
145                       generated string of each iteration will be concatenated
146                       and the queryGenSep will be used to separate them. If you
147                       don't specify one, the default will be " or ".
148 
149                       Variable assignments before the optional loop declaration
150                       will be declared outside the loop, the ones after the
151                       loop declaration will be declared within the loop and
152                       might access the loop variable, ('a' in our case).
153 
154                       Multiple declarations of the form '${}' might be present
155                       within the declaration part, they may be separated by
156                       white space.
157 
158                       __Body part__
159 
160                       If no declaration part is present, then the whole content
161                       of the block is the body part. (This is, if the first
162                       character is no '$'.) Otherwise it starts after the colon
163                       at the end of the declaration part.
164 
165                       Everything in the body part will be echoed to the
166                       resulting SQL so you can just write plain SQL now. There
167                       are only a few exceptions:
168 
169                       1. A '{' will introduce a child block.
170 
171                       2. A '#{' will start a D expression, which ends with
172                       another '}'
173 
174                       3. A '$' will trigger an error as it is only valid in the
175                       declaration part.
176 
177                       4. SQL string which can either start with " or with '. It
178                       might contain any of '{' '}' '$' '#' they all will be
179                       ignored within a string. Apart from this, no escaping of
180                       these characters is possible at the moment.
181 
182                       __D expression__
183 
184                       A D expression might occur any amount of times within the
185                       body part of a block. It contents will be evaluated as D
186                       code. Which must evaluate to some value. If this value is
187                       different from the value's types init value it is
188                       considered valid, then it will be included in the Variant[] array and a '?'
189                       gets inserted into the resulting SQL. If the returned value equals the type's
190                       init value then no '?' gets inserted and the value will not be present in the
191                       args array.
192 
193                       So if the D expression evaluates to a string and is the
194                       empty string, it will not be included.
195 
196                       The expression is contained within #{ D expression }.
197 
198                       The D code has access to the passed params via params[0]
199                       .. params[n], all declarations made in the declarations
200                       parts of the blocks above it and the contents of the data_ struct.
201                       It is made available with D's 'with' statement so
202                       data_.foo will just be 'foo'.
203 
204                       __Blocks__
205 
206                       Blocks are the building blocks of the resulting
207                       expression. If a block contains a D expression or a
208                       subblock which contains D expressions, then its contents
209                       will only be included in the output if at least one of
210                       the D expressions are valid or if it contains a text only sub block.
211                       So if you have a SQL expression of the
212                       form {someColumn=#{some D code}} there will be no output
213                       at all if #{some D code} was not valid. If a block does
214                       not contain any D code and neither do any sub blocks then
215                       its output will be included in the output of the
216                       containing block. If you want to ensure that a particular
217                       text does not get dropped, include it in its own block.
218 
219                       The second property that makes blocks useful is that if a
220                       block produces no output, for the above mentioned
221                       reasons, then the text before it will be dropped if there
222                       were no valid sub blocks found already in the parent block.
223                       It will also be dropped if the preceding block vanished. This way you can
224                       concatenate blocks with ' and ' and ' or ' and they will
225                       be dropped if not applicable. A leading ' where ' on the
226                       other hand would not be dropped in any case, except the
227                       whole output vanishes.
228 
229                       For examples, see the unittests, a more complex example, I used in reality is here:
230                         `({${db=params[0]} : `
231                                     `{`
232                                         `${d in datespan} ${arData=db.autoRunData(d.autorun)}: {({date>=#{d.from}} and {date<=#{d.to}} and `
233                                         `{({task_run.id>=#{arData.firstId==long.max ? 0 : arData.firstId}} and {task_run.id<=#{arData.lastId}} and {comment like #{d.autorun.length ? "%:Autorun:"~d.autorun~":Autorun:%" : ""}})})}`
234                                     `}`
235                                     `})`
236 
237                     Right at the start I am starting the uppermost block with a
238                     '(' which means it has no declaration part, then I start
239                     the first sub block, followed by declaration I needed.
240                     Then I start another block with '{' followed by a loop
241                     declaration (d will iterate over the datespan array which
242                             comes from the data_ struct) followed by an inner
243                     variable declaration of arData, you can see both d (the
244                             looping variable) and db from the outer block are
245                     accessible. Afterwards the body starts  with a
246                     block containing '(' and subblocks of comparisons with a D
247                     expression like {({date>=#{d.from}})}.
248                     If all D expressions in the block like d.from evaluate to
249                     an empty string, the whole containing block will not produce
250                     any output. So not even '()' this is because they are
251                     enclused in a subblock. The 'and' between {date>=#{d.from}}
252                     and {date<=#{d.to}}  for example will only appear in the
253                     output if both d.from and d.to contained a valid non empty
254                     string.
255 
256                     The outputs of each loop iteration will be separated with " or " by default,
257                     you can change this, by setting queryGenSep in the declaration part.
258 
259         data_   = The contents of data_ will be made available to the D expressions and declarations in the queryString. (The code gets embedded withing a with(data_) { ... })
260 
261         params  = Additional data you need in the queryString. You can access it via params[0], params[1], ...
262                   If you happen to have a member of data_ which is named params, you will have to access it via data_.params.
263 
264   */
265 CreatedQuery createQuery(string queryString, StructT, Params...)(StructT data_, Params params) {
266     debug(queryGenerator) import std.stdio;
267     mixin(createQueryGenerator(queryString));
268     debug(queryGenerator) writeln("Generated: ", createQueryGenerator(queryString));
269     return CreatedQuery(sql_, args_);
270     //return CreatedQuery.init;
271 }
272 
273 unittest {
274     import std.stdio;
275     struct Test1 {
276         string a;
277         string b;
278     }
279     struct Test {
280         Test1[] foo;
281         string bar;
282         int k;
283     }
284     Test t1;
285     CreatedQuery myQuery(Test t1) {
286         return createQuery!`select * from testtable where
287         {({${queryGenSep=" or "} ${f in foo} : ({col1=#{f.a}} and {col2=#{f.b}})})} or
288         {col3>#{k}}`(t1);
289     }
290     auto res=myQuery(t1);
291     writeln("Result with empty args: ", res.query);
292     assert(res.args==[]);
293     t1.k=9;
294     res=myQuery(t1);
295     writefln("Result with k set: %s, args: %s", res.query, res.args);
296     assert(res.args==[Variant(9)]);
297     t1.foo~=Test1(string.init, "Hallo du da!");
298     res=myQuery(t1);
299     writeln("Result with foo.b and k set: ", res.query);
300     assert(res.args==[Variant("Hallo du da!"), Variant(9)]);
301     t1.foo~=Test1("Hallo du da!", string.init);
302     res=myQuery(t1);
303     writeln("Result with foo0.b and foo1.a and k set: ", res.query);
304     assert(res.args==[Variant("Hallo du da!"), Variant("Hallo du da!"), Variant(9)]);
305     t1.foo~=Test1("Hello!", "Cu!");
306     res=myQuery(t1);
307     writeln("Result with foo0.b and foo1.a and foo2.a and foo2.b and k set: ", res.query);
308     assert(res.args==[Variant("Hallo du da!"), Variant("Hallo du da!"), Variant("Hello!"), Variant("Cu!"), Variant(9)]);
309 }
310 
311 private:
312 void dropWhite(ref string buf) {
313     buf=buf.stripLeft();
314 }
315 
316 /// D's indexOf seemed not to work at compile time.
317 size_t indexOf(string heap, char needle) {
318     foreach(i, c; heap) {
319         if(needle==c) {
320             return i;
321         }
322     }
323     return -1;
324 }
325 //pragma(msg, createQueryGenerator( " (${item in datespan} : ( { date>=#{item.from} and} {date<=#{item.to} and} {comment like #{\"%:Autorun:\"~item.autorun~\":Autorun:%\"}})"));
326 //pragma(msg, createQueryGenerator( " Hello this is a test!"));
327 //pragma(msg, createQueryGenerator( `  ${item in datespan} : ( { date>=#{item.from} } and {date<=#{item.to} } and {comment like #{"%:Autorun:"~item.autorun~":Autorun:%"}})`));
328 //pragma(msg, createQueryGenerator(`select * from testtable where
329         //{({${queryGenSep=" or "} ${f in foo} : ({col1=#{f.a}} and {col2=#{f.b}})})} or
330         //{col3>k}`));
331 
332 //Syntax:
333 // ${a=3} ${i in array} ${u=i.c} : some data #{D expression} { same again }
334 // presql : [decls :] body
335 // decls : [${variable=assignment}]* [${variable in array}] [${variable=assignment}]*
336 // body : [some_string | #{D expression} | {presql}]*
337 /// private
338 /// Handles the part before the ":" colon. (Variable declarations + loop declaration)
339 private static string doVariableProcessing(ref string part, int level) {
340     string output;
341     string for_each_decl;
342     string after_output;
343     bool queryGenSepFound=false;
344     string buf="buf"~to!string(level);
345     dropWhite(part);
346     assert(part);
347     immutable isFirst="isFirst"~to!string(level);
348     if(part[0]!='$')  {
349         return "bool "~isFirst~"=false;\n"~"{\nstring "~buf~";\n";
350     }
351     output~="bool "~isFirst~"=true;\n";
352     while(true) {
353         assert(part.length>2);
354         assert(part[0]=='$', "Only declarations enclosed in '${' are allowed in declarations block. Invalid data found: "~part);
355         assert(part[1]=='{', "'{' in '${' is mandatory! Found at:"~part);
356         string var_name;
357         part=part[2..$];
358         foreach(i,c; part) {
359             if(!c.isAlpha()) {
360                 var_name=part[0..i];
361                 if(var_name=="queryGenSep") {
362                     var_name="queryGenSep"~to!string(level);
363                     queryGenSepFound=true;
364                 }
365                 part=part[i..$];
366                 break;
367             }
368         }
369         dropWhite(part);
370         assert(part.length, "Unexpected end of data, expected '=' or 'in'");
371         enum Operation {
372             assignment,
373             in_array
374         }
375         Operation op;
376         switch(part[0]) {
377             case '=': op=Operation.assignment;
378                       part=part[1..$];
379                       break;
380             case 'i':
381                       assert(part.length>1 && part[1]=='n', "Expected 'n' after 'i' forming 'in', got: "~part);
382                       part=part[2..$];
383                       op=Operation.in_array;
384                       break;
385             default: assert(false, "Unexpected operation: Only variable assignment ('=') and array loop ('in') are supported, found at: "~part);
386 
387         }
388         dropWhite(part);
389         string right_side;
390         foreach(i, c; part) {
391             if(c=='}') {
392                 right_side=part[0..i];
393                 part=part[i+1..$];
394                 break;
395             }
396         }
397         if(op==Operation.assignment) {
398             string buff="auto "~var_name~"="~right_side~";\n";
399             if(for_each_decl)
400                 after_output~=buff;
401             else
402                 output~=buff;
403         }
404         else {
405             if(for_each_decl)
406                 assert(false, "Only one foreach declaration allowed, found second at: "~part);
407             for_each_decl="\nforeach("~var_name~"; "~right_side~") {\n";
408         }
409         dropWhite(part);
410         if(part[0]==':') {
411             part=part[1..$];
412             if(!queryGenSepFound) {
413                 output~="immutable queryGenSep"~to!string(level)~"=\" or \";\n";
414             }
415             for_each_decl = for_each_decl==[] ? "{\n" : for_each_decl;
416             after_output~="string "~buf~";\n";
417             after_output~="if(!"~isFirst~") {\n";
418             after_output~=buf~"=queryGenSep"~to!string(level)~";\n}\n";
419             return output~for_each_decl~after_output;
420         }
421         else {
422             assert(part, "Unexpected end of string, expected another declaration '${}' or ':'.");
423             assert(part[0]=='$', "Expected ':' or another variable assignment ('$'), at: "~part);
424         }
425     }
426 }
427 
428 // Extracts a D expression (#{}) from the string.
429 string dExpressionParser(ref string data, string buf, string validCount, string slevel, ref int count) {
430     assert(data.length>2 && data[0]=='#' && data[1]=='{');
431     string out_cmd;
432     data=data[2..$];
433     auto end=data.indexOf('}');
434     assert(end>0, "Empty or non closed D expression found at: "~data);
435     string val="val"~slevel~"_"~to!string(count);
436     out_cmd~="auto "~val~"="~data[0..end]~";\n";
437     out_cmd~="if("~val~"!=typeof("~val~").init) {\n";
438     out_cmd~="debug(queryGenerator) writeln(\"Found valid value: \", "~val~");\n";
439     out_cmd~=validCount~"++;\n";
440     out_cmd~=buf~"~=\"?\";\n";
441     out_cmd~="args_~=Variant("~val~");\n}\n";
442     out_cmd~="else {\ndebug(queryGenerator) writeln(\"Found invalid value: \", "~val~");\n}\n";
443     data=data[end+1..$];
444     return out_cmd;
445 }
446 // do the parsing of an sql string ('....' or ".....") Also handles escaping ('') or ("")
447 string processString(ref string data, string buf) {
448     assert(data[0]=='\'' || data[0]=='"', "Expected ' or \"");
449     char begin=data[0];
450     data=data[1..$];
451     string out_cmd;
452     while(data.length) {
453         foreach(i, c; data) {
454             if(c==begin && ((i+1)>=data.length || data[i+1]!=begin) ) { // End of string (Ignore escaped end in string)
455                 out_cmd~=buf~"~=`"~begin~data[0..i]~begin~"`;\n";
456                 data=data[i+1 .. $];
457                 break;
458             }
459         }
460     }
461     return out_cmd;
462 }
463 
464 // Parsing starts here.
465 string preSqlParser(ref string data, int level=0) {
466     auto out_cmd="{\n";
467     out_cmd~=doVariableProcessing(data, level);
468     debug(querygenerator) import std.stdio;
469     // dropWhite(data); //(Maybe intended, so don't drop it)
470     immutable slevel=to!string(level);
471     immutable buf="buf"~slevel;
472     immutable text="text"~slevel;
473     string upperBuf;
474     string upperWasValid;
475     int dexprCount=0;
476     if(level>0) {
477         upperBuf="buf"~(to!string(level-1));
478         upperWasValid="wasValid"~(to!string(level-1));
479     }
480     else
481         upperBuf="sql_";
482 
483     immutable validCount="validCount"~slevel;
484     immutable wasValid="wasValid"~slevel;
485     immutable isFirst="isFirst"~to!string(level); // Defined in doVariableProcessing
486     out_cmd~="int "~validCount~"=-1;\n";
487     out_cmd~="int "~wasValid~";\n"; // validCount of inner level. (But never -1, if it would be -1 it is 1)
488     out_cmd~="string "~text~";\n";
489     while(data.length) {
490         auto end=data.length;
491         foreach(i, c; data) {
492             if(c=='{' || c=='}' || c=='#' || c=='\'' || c=='"' || c=='$') {
493                 end=i;
494                 break;
495             }
496         }
497         out_cmd~=text~"=`"~data[0..end]~"`;\n";
498         out_cmd~=buf~"~=`"~data[0..end]~"`;\n";
499         data=data[end..$];
500         if(data.length==0)
501             break;
502         debug(querygenerator) writefln("Remaining (level: %s) data: %s", level, data);
503         switch(data[0]) {
504             case '{' :
505                 assert(data.length>2, "Expected some data after '{' at: "~data);
506                 data=data[1..$];
507                 out_cmd~="if("~validCount~"==0) {\n";
508                 out_cmd~=buf~"="~buf~"[0..$-"~text~".length];\n}\n";
509                 out_cmd~=wasValid~"=0;\n"; // Reset, because inner level will just add to it.
510                 out_cmd~=preSqlParser(data, level+1);
511 
512                 assert(data[0]=='}', "Expected closing '}', got: "~data);
513                 data=data[1..$];
514 
515                 out_cmd~="if("~wasValid~"==0 && "~validCount~">0) {\n"; // validCount has to be greater than 0 otherwise we have removed the data already. (See above)
516                 out_cmd~=buf~"="~buf~"[0..$-"~text~".length];\n}\n";
517                 out_cmd~=`debug(queryGenerator) writefln("Received data from level: `~to!string(level+1)~` on level: `~slevel~`, data was valid: %s, current valid count: %s", `~wasValid~`, `~validCount~`);`~"\n";
518                 out_cmd~=validCount~"="~validCount~"==-1 ? "~wasValid~" : "~validCount~"+"~wasValid~";\n";
519                 out_cmd~=`debug(queryGenerator) writefln("Updated valid count is now: %s", `~validCount~`);`~"\n";
520                 break;
521             case '}' :
522                 goto finish;
523             case '#' :
524                 out_cmd~="if("~validCount~"==-1) {\n\t";
525                 out_cmd~=validCount~"=0;\n}\n";
526                 out_cmd~=dExpressionParser(data, buf, validCount, slevel, dexprCount);
527                 dexprCount++;
528                 break;
529             case '\'':
530             case '"':
531                 out_cmd~=processString(data, buf);
532                 break;
533             case '$': assert(false, "Unexpected '$'. The variable declaration block must start with a '${}' declaration otherwise you are already in the body and '${}' declarations are invalid.\n Found at: "~data);
534             default:
535                 assert(false, "WTF? The if should have made this impossible!");
536         }
537     }
538 finish:
539     out_cmd~=`debug(queryGenerator) writefln("Leaving level: %s, valid count: %s, buffer: %s", `~slevel~`, `~validCount~`, `~buf~" );\n";
540     out_cmd~="if("~validCount~"==-1 || "~validCount~">0) {\n";
541     out_cmd~="debug(queryGenerator) writeln(\"Appended "~buf~" with contents: \", "~buf~");\n";
542     out_cmd~=isFirst~"=false;\n"; // No longer the first valid run, so separator should be inserted.
543     out_cmd~=upperBuf~"~="~buf~";\n}\n";
544     if(level>0) {
545         out_cmd~=upperWasValid~"+="~validCount~"<0 ? 1 : "~validCount~";\n";
546     }
547     // End of loop:
548     out_cmd~="}\n";
549     // End of block:
550     out_cmd~="}\n";
551     return out_cmd;
552 }