[Bast-commits] r5208 - in SQL-Abstract/1.x/branches/1.50_RC-extraparens: . lib/SQL/Abstract

norbi at dev.catalyst.perl.org norbi at dev.catalyst.perl.org
Wed Nov 26 21:03:36 GMT 2008


Author: norbi
Date: 2008-11-26 21:03:35 +0000 (Wed, 26 Nov 2008)
New Revision: 5208

Modified:
   SQL-Abstract/1.x/branches/1.50_RC-extraparens/
   SQL-Abstract/1.x/branches/1.50_RC-extraparens/lib/SQL/Abstract/Test.pm
Log:
 r5263 at vger:  mendel | 2008-11-26 22:01:25 +0100
  * Cleaned up tokenizer parser code.
  * Added a few more SQL keywords.



Property changes on: SQL-Abstract/1.x/branches/1.50_RC-extraparens
___________________________________________________________________
Name: svk:merge
   - 4d5fae46-8e6a-4e08-abee-817e9fb894a2:/local/bast/SQL-Abstract/1.x/branches/1.50_RC:5202
4d5fae46-8e6a-4e08-abee-817e9fb894a2:/local/bast/SQL-Abstract/1.x/branches/1.50_RC-extraparens:5262
   + 4d5fae46-8e6a-4e08-abee-817e9fb894a2:/local/bast/SQL-Abstract/1.x/branches/1.50_RC:5202
4d5fae46-8e6a-4e08-abee-817e9fb894a2:/local/bast/SQL-Abstract/1.x/branches/1.50_RC-extraparens:5263

Modified: SQL-Abstract/1.x/branches/1.50_RC-extraparens/lib/SQL/Abstract/Test.pm
===================================================================
--- SQL-Abstract/1.x/branches/1.50_RC-extraparens/lib/SQL/Abstract/Test.pm	2008-11-26 21:03:30 UTC (rev 5207)
+++ SQL-Abstract/1.x/branches/1.50_RC-extraparens/lib/SQL/Abstract/Test.pm	2008-11-26 21:03:35 UTC (rev 5208)
@@ -13,6 +13,58 @@
 our $sql_differ; # keeps track of differing portion between SQLs
 our $tb = __PACKAGE__->builder;
 
+# Parser states for _recurse_parse()
+use constant {
+  PARSE_TOP_LEVEL => 0,
+  PARSE_IN_EXPR => 1,
+  PARSE_IN_PARENS => 2,
+};
+
+# These SQL keywords always signal end of the current expression (except inside
+# of a parenthesized subexpression).
+# Format: A list of strings that will be compiled to extended syntax (ie.
+# /.../x) regexes, without capturing parentheses. They will be automatically
+# anchored to word boundaries to match the whole token).
+my @expression_terminator_sql_keywords = (
+  'FROM',
+  '(?:
+    (?:
+        (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
+        (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
+    )?
+    JOIN
+  )',
+  'ON',
+  'WHERE',
+  'GROUP \s+ BY',
+  'HAVING',
+  'ORDER \s+ BY',
+  'LIMIT',
+  'OFFSET',
+  'FOR',
+  'UNION',
+  'INTERSECT',
+  'EXCEPT',
+);
+
+my $tokenizer_re_str = join('|',
+  map { '\b' . $_ . '\b' }
+    @expression_terminator_sql_keywords, 'AND', 'OR'
+);
+
+my $tokenizer_re = qr/
+  \s*
+  (
+      \(
+    |
+      \)
+    |
+      $tokenizer_re_str
+  )
+  \s*
+/xi;
+
+
 sub is_same_sql_bind {
   my ($sql1, $bind_ref1, $sql2, $bind_ref2, $msg) = @_;
 
@@ -110,37 +162,6 @@
 sub parse {
   my $s = shift;
 
-  my $tokenizer_re = qr/
-    \s*
-    (
-        \(
-      |
-        \)
-      |
-        \b AND \b
-      |
-        \b OR \b
-      |
-        \b ON \b
-      |
-        (?:
-          (?:
-              (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
-              (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
-          )?
-          \b JOIN \b
-        )
-      |
-        \b WHERE \b
-      |
-        \b GROUP \s+ BY \b
-        \b ORDER \s+ BY \b
-        \b LIMIT \b
-        \b OFFSET \b
-    )
-    \s*
-  /xi;
-
   # tokenize string, and remove all optional whitespace
   my $tokens = [
     grep {!/^$/}
@@ -148,27 +169,28 @@
         split $tokenizer_re, $s
   ];
 
-  my $tree = _recurse_parse($tokens);
+  my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL);
   return $tree;
 }
 
 sub _recurse_parse {
-  my ($tokens, $delimiters) = @_;
+  my ($tokens, $state) = @_;
 
-  $delimiters ||= [];
-
   my $left;
   while (1) { # left-associative parsing
 
     my $lookahead = $tokens->[0];
     return $left if !defined($lookahead)
-      || grep { $lookahead =~ /^$_$/i } @$delimiters;
+      || ($state == PARSE_IN_PARENS && $lookahead eq ')')
+      || ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^$_$/xi }
+            '\)', @expression_terminator_sql_keywords
+         );
 
     my $token = shift @$tokens;
 
     # nested expression in ()
     if ($token eq '(') {
-      my $right = _recurse_parse($tokens, ['\)']);
+      my $right = _recurse_parse($tokens, PARSE_IN_PARENS);
       $token = shift @$tokens   or croak "missing ')'";
       $token eq ')'             or croak "unexpected token : $token";
       $left = $left ? [CONCAT => [$left, [PAREN => $right]]]
@@ -176,19 +198,12 @@
     }
     # AND/OR
     elsif ($token eq 'AND' || $token eq 'OR')  {
-      my $right = _recurse_parse($tokens, 
-        ['\)', '(((LEFT|RIGHT|FULL)\s+)?(CROSS|INNER|OUTER)\s+)?JOIN',
-         'WHERE', 'GROUP\s+BY', 'ORDER\s+BY', 'LIMIT', 'OFFSET']
-      );
+      my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
       $left = [$token => [$left, $right]];
     }
-    # ON, WHERE, GROUP BY, ORDER BY, LIMIT, OFFSET
-    elsif (grep { $token =~ /^$_$/i }
-      ('ON', 'WHERE', 'GROUP\s+BY', 'ORDER\s+BY', 'LIMIT', 'OFFSET')) {
-      my $right = _recurse_parse($tokens, 
-        ['\)', '(((LEFT|RIGHT|FULL)\s+)?(CROSS|INNER|OUTER)\s+)?JOIN',
-         'WHERE', 'GROUP\s+BY', 'ORDER\s+BY', 'LIMIT', 'OFFSET']
-      );
+    # expression terminator keywords (as they start a new expression)
+    elsif (grep { $token =~ /^$_$/xi } @expression_terminator_sql_keywords) {
+      my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
       $left = $left ? [CONCAT => [$left, [CONCAT => [[EXPR => $token], [PAREN => $right]]]]]
                     : [CONCAT => [[EXPR => $token], [PAREN  => $right]]];
     }




More information about the Bast-commits mailing list