[Bast-commits] r5208 - in
SQL-Abstract/1.x/branches/1.50_RC-extraparens: . lib/SQL/Abstract
norbi at dev.catalyst.perl.org
norbi at dev.catalyst.perl.org
Wed Nov 26 21:03:36 GMT 2008
Author: norbi
Date: 2008-11-26 21:03:35 +0000 (Wed, 26 Nov 2008)
New Revision: 5208
Modified:
SQL-Abstract/1.x/branches/1.50_RC-extraparens/
SQL-Abstract/1.x/branches/1.50_RC-extraparens/lib/SQL/Abstract/Test.pm
Log:
r5263 at vger: mendel | 2008-11-26 22:01:25 +0100
* Cleaned up tokenizer parser code.
* Added a few more SQL keywords.
Property changes on: SQL-Abstract/1.x/branches/1.50_RC-extraparens
___________________________________________________________________
Name: svk:merge
- 4d5fae46-8e6a-4e08-abee-817e9fb894a2:/local/bast/SQL-Abstract/1.x/branches/1.50_RC:5202
4d5fae46-8e6a-4e08-abee-817e9fb894a2:/local/bast/SQL-Abstract/1.x/branches/1.50_RC-extraparens:5262
+ 4d5fae46-8e6a-4e08-abee-817e9fb894a2:/local/bast/SQL-Abstract/1.x/branches/1.50_RC:5202
4d5fae46-8e6a-4e08-abee-817e9fb894a2:/local/bast/SQL-Abstract/1.x/branches/1.50_RC-extraparens:5263
Modified: SQL-Abstract/1.x/branches/1.50_RC-extraparens/lib/SQL/Abstract/Test.pm
===================================================================
--- SQL-Abstract/1.x/branches/1.50_RC-extraparens/lib/SQL/Abstract/Test.pm 2008-11-26 21:03:30 UTC (rev 5207)
+++ SQL-Abstract/1.x/branches/1.50_RC-extraparens/lib/SQL/Abstract/Test.pm 2008-11-26 21:03:35 UTC (rev 5208)
@@ -13,6 +13,58 @@
our $sql_differ; # keeps track of differing portion between SQLs
our $tb = __PACKAGE__->builder;
+# Parser states for _recurse_parse()
+use constant {
+ PARSE_TOP_LEVEL => 0,
+ PARSE_IN_EXPR => 1,
+ PARSE_IN_PARENS => 2,
+};
+
+# These SQL keywords always signal end of the current expression (except inside
+# of a parenthesized subexpression).
+# Format: A list of strings that will be compiled to extended syntax (ie.
+# /.../x) regexes, without capturing parentheses. They will be automatically
+# anchored to word boundaries to match the whole token).
+my @expression_terminator_sql_keywords = (
+ 'FROM',
+ '(?:
+ (?:
+ (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
+ (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
+ )?
+ JOIN
+ )',
+ 'ON',
+ 'WHERE',
+ 'GROUP \s+ BY',
+ 'HAVING',
+ 'ORDER \s+ BY',
+ 'LIMIT',
+ 'OFFSET',
+ 'FOR',
+ 'UNION',
+ 'INTERSECT',
+ 'EXCEPT',
+);
+
+my $tokenizer_re_str = join('|',
+ map { '\b' . $_ . '\b' }
+ @expression_terminator_sql_keywords, 'AND', 'OR'
+);
+
+my $tokenizer_re = qr/
+ \s*
+ (
+ \(
+ |
+ \)
+ |
+ $tokenizer_re_str
+ )
+ \s*
+/xi;
+
+
sub is_same_sql_bind {
my ($sql1, $bind_ref1, $sql2, $bind_ref2, $msg) = @_;
@@ -110,37 +162,6 @@
sub parse {
my $s = shift;
- my $tokenizer_re = qr/
- \s*
- (
- \(
- |
- \)
- |
- \b AND \b
- |
- \b OR \b
- |
- \b ON \b
- |
- (?:
- (?:
- (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
- (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
- )?
- \b JOIN \b
- )
- |
- \b WHERE \b
- |
- \b GROUP \s+ BY \b
- \b ORDER \s+ BY \b
- \b LIMIT \b
- \b OFFSET \b
- )
- \s*
- /xi;
-
# tokenize string, and remove all optional whitespace
my $tokens = [
grep {!/^$/}
@@ -148,27 +169,28 @@
split $tokenizer_re, $s
];
- my $tree = _recurse_parse($tokens);
+ my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL);
return $tree;
}
sub _recurse_parse {
- my ($tokens, $delimiters) = @_;
+ my ($tokens, $state) = @_;
- $delimiters ||= [];
-
my $left;
while (1) { # left-associative parsing
my $lookahead = $tokens->[0];
return $left if !defined($lookahead)
- || grep { $lookahead =~ /^$_$/i } @$delimiters;
+ || ($state == PARSE_IN_PARENS && $lookahead eq ')')
+ || ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^$_$/xi }
+ '\)', @expression_terminator_sql_keywords
+ );
my $token = shift @$tokens;
# nested expression in ()
if ($token eq '(') {
- my $right = _recurse_parse($tokens, ['\)']);
+ my $right = _recurse_parse($tokens, PARSE_IN_PARENS);
$token = shift @$tokens or croak "missing ')'";
$token eq ')' or croak "unexpected token : $token";
$left = $left ? [CONCAT => [$left, [PAREN => $right]]]
@@ -176,19 +198,12 @@
}
# AND/OR
elsif ($token eq 'AND' || $token eq 'OR') {
- my $right = _recurse_parse($tokens,
- ['\)', '(((LEFT|RIGHT|FULL)\s+)?(CROSS|INNER|OUTER)\s+)?JOIN',
- 'WHERE', 'GROUP\s+BY', 'ORDER\s+BY', 'LIMIT', 'OFFSET']
- );
+ my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
$left = [$token => [$left, $right]];
}
- # ON, WHERE, GROUP BY, ORDER BY, LIMIT, OFFSET
- elsif (grep { $token =~ /^$_$/i }
- ('ON', 'WHERE', 'GROUP\s+BY', 'ORDER\s+BY', 'LIMIT', 'OFFSET')) {
- my $right = _recurse_parse($tokens,
- ['\)', '(((LEFT|RIGHT|FULL)\s+)?(CROSS|INNER|OUTER)\s+)?JOIN',
- 'WHERE', 'GROUP\s+BY', 'ORDER\s+BY', 'LIMIT', 'OFFSET']
- );
+ # expression terminator keywords (as they start a new expression)
+ elsif (grep { $token =~ /^$_$/xi } @expression_terminator_sql_keywords) {
+ my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
$left = $left ? [CONCAT => [$left, [CONCAT => [[EXPR => $token], [PAREN => $right]]]]]
: [CONCAT => [[EXPR => $token], [PAREN => $right]]];
}
More information about the Bast-commits
mailing list