[Bast-commits] r9242 - SQL-Abstract/1.x/branches/sqla-tree/lib/SQL/Abstract

dhoss at dev.catalyst.perl.org dhoss at dev.catalyst.perl.org
Tue Apr 27 20:49:11 GMT 2010


Author: dhoss
Date: 2010-04-27 21:49:11 +0100 (Tue, 27 Apr 2010)
New Revision: 9242

Modified:
   SQL-Abstract/1.x/branches/sqla-tree/lib/SQL/Abstract/Tree.pm
Log:
more rippage, no tests yet

Modified: SQL-Abstract/1.x/branches/sqla-tree/lib/SQL/Abstract/Tree.pm
===================================================================
--- SQL-Abstract/1.x/branches/sqla-tree/lib/SQL/Abstract/Tree.pm	2010-04-27 18:17:35 UTC (rev 9241)
+++ SQL-Abstract/1.x/branches/sqla-tree/lib/SQL/Abstract/Tree.pm	2010-04-27 20:49:11 UTC (rev 9242)
@@ -7,82 +7,349 @@
 use Moose;
 use namespace::autoclean;
 with 'MooseX::Getopt';
-has 'case_sensitive' => ( 
-	is => 'rw', 
-	traits => [qw/Getopt/], 
-	required => 1, lazy => 1, 
-	default => 0 
+
+has 'case_sensitive' => (
+    is       => 'rw',
+    traits   => [qw/Getopt/],
+    required => 1,
+    lazy     => 1,
+    default  => 0
 );
 
-has 'parenthesis_significant' => ( 
-	is => 'rw', 
-	traits => [qw/Getopt/], 
-	required => 1, 
-	lazy => 1, 
-	default => 0 
+has 'parenthesis_significant' => (
+    is       => 'rw',
+    traits   => [qw/Getopt/],
+    required => 1,
+    lazy     => 1,
+    default  => 0
 );
 
-has 'sql_differ' => ( 
-	is => 'rw', 
-	traits => [qw/NoGetopt/], 
-	required => 1, 
-	lazy_build => 1
+has 'sql_differ' => (
+    is         => 'rw',
+    traits     => [qw/NoGetopt/],
+    required   => 1,
+    lazy_build => 1
 );    # keeps track of differing portion between SQLs
 
-sub _build_sql_differ {} # figure this out later
+sub _build_sql_differ { }    # figure this out later
 
-has 'tb' => ( 
-	is => 'rw', 
-	traits => [qw/NoGetopt/],
-	lazy_build => 1,
-	required => 1,
+has 'tb' => (
+    is         => 'rw',
+    traits     => [qw/NoGetopt/],
+    lazy_build => 1,
+    required   => 1,
 );
 
 has 'expression_terminator_sql_keywords' => (
-	is => 'ro', 
-	required => 1,
-	lazy_build => 1
+    is         => 'ro',
+    traits     => [qw/NoGetopt/],
+    required   => 1,
+    lazy_build => 1
 );
 
 sub _build_expression_terminator_sql_keywords {
-	return (
-	  'SELECT',
-	  'FROM',
-	  '(?:
+    return (
+        'SELECT',
+        'FROM',
+        '(?:
 	    (?:
 	        (?: \b (?: LEFT | RIGHT | FULL ) \s+ )?
 	        (?: \b (?: CROSS | INNER | OUTER ) \s+ )?
 	    )?
 	    JOIN
 	  )',
-	  'ON',
-	  'WHERE',
-	  'EXISTS',
-	  'GROUP \s+ BY',
-	  'HAVING',
-	  'ORDER \s+ BY',
-	  'LIMIT',
-	  'OFFSET',
-	  'FOR',
-	  'UNION',
-	  'INTERSECT',
-	  'EXCEPT',
-	  'RETURNING',
-	);
+        'ON',
+        'WHERE',
+        'EXISTS',
+        'GROUP \s+ BY',
+        'HAVING',
+        'ORDER \s+ BY',
+        'LIMIT',
+        'OFFSET',
+        'FOR',
+        'UNION',
+        'INTERSECT',
+        'EXCEPT',
+        'RETURNING',
+    );
 }
 
 sub _build_tb {
-	return __PACKAGE__->builder;
+    return __PACKAGE__->builder;
 }
 
+# These are binary operator keywords always a single LHS and RHS
+# * AND/OR are handled separately as they are N-ary
+# * so is NOT as being unary
+# * BETWEEN without paranthesis around the ANDed arguments (which
+#   makes it a non-binary op) is detected and accomodated in
+#   _recurse_parse()
+has 'stuff_around_mathops' => (
+    is         => 'ro',
+    traits     => [qw/NoGetopt/],
+    required   => 1,
+    lazy_build => 1
+);
+
+sub _build_stuff_around_mathops {
+    return qr/[\w\s\`\'\"\)]/;
+}
+
+has 'binary_op_keywords' => (
+    is         => 'ro',
+    traits     => [qw/NoGetopt/],
+    required   => 1,
+    lazy_build => 1,
+);
+
+sub _build_binary_op_keyworkds {
+    return (
+        map {
+            ' ^ ' . quotemeta($_) . "(?= \$ | $stuff_around_mathops ) ",
+              " (?<= $stuff_around_mathops)" . quotemeta($_) . "(?= \$ | $stuff_around_mathops ) ",
+          } (qw/< > != <> = <= >=/)
+      ),
+      (
+        map {
+            '\b (?: NOT \s+)?' . $_ . '\b'
+          } (qw/IN BETWEEN LIKE/)
+      ),
+    );
+}
+
+has 'tokenizer_re_str' => (
+      is         => 'ro',
+      traits     => [qw/NoGetopt/],
+      required   => 1,
+      lazy_build => 1,
+);
+
+sub _build_tokenizer_re_str {
+      my $self = shift;
+      return join( "\n\t|\n",
+          ( map { '\b' . $_ . '\b' } $self->expression_terminator_sql_keywords, 'AND', 'OR', 'NOT' ),
+          $self->binary_op_keywords, );
+}
+
+has 'tokenizer_re' => (
+      is         => 'ro',
+      traits     => [qw/NoGetopt/],
+      required   => 1,
+      lazy_build => 1,
+);
+
+sub _build_tokenizer_re {
+      my $self = shift;
+      return qr/ \s* ( $self->tokenizer_re_str | \( | \) | \? ) \s* /xi;
+}
+
+has 'unrollable_ops' => (
+      is         => 'ro',
+      traits     => [qw/NoGetopt/],
+      required   => 1,
+      lazy_build => 1,
+);
+
+sub _build_unrollable_ops {
+      return ( 'ON', 'WHERE', 'GROUP \s+ BY', 'HAVING', 'ORDER \s+ BY', );
+}
+
 # Parser states for _recurse_parse()
 use constant PARSE_TOP_LEVEL => 0;
-use constant PARSE_IN_EXPR => 1;
+use constant PARSE_IN_EXPR   => 1;
 use constant PARSE_IN_PARENS => 2;
-use constant PARSE_RHS => 3;
+use constant PARSE_RHS       => 3;
 
+sub parse {
+  my ($self, $s) = @_;
+
+  # tokenize string, and remove all optional whitespace
+  my $tokens = [];
+  foreach my $token (split $self->tokenizer_re, $s) {
+    push @$tokens, $token if (length $token) && ($token =~ /\S/);
+  }
+
+  my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL);
+  return $tree;
+}
+
+sub _recurse_parse {
+  my ($self, $tokens, $state) = @_;
+
+  my $left;
+  while (1) { # left-associative parsing
+
+    my $lookahead = $tokens->[0];
+    if ( not defined($lookahead)
+          or
+        ($state == PARSE_IN_PARENS && $lookahead eq ')')
+          or
+        ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', $self->expression_terminator_sql_keywords ) )
+          or
+        ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', $self->expression_terminator_sql_keywords, $self->binary_op_keywords, 'AND', 'OR', 'NOT' ) )
+    ) {
+      return $left;
+    }
+
+    my $token = shift @$tokens;
+
+    # nested expression in ()
+    if ($token eq '(' ) {
+      my $right = _recurse_parse($tokens, PARSE_IN_PARENS);
+      $token = shift @$tokens   or croak "missing closing ')' around block " . unparse ($right);
+      $token eq ')'             or croak "unexpected token '$token' terminating block " . unparse ($right);
+      $left = $left ? [@$left, [PAREN => [$right] ]]
+                    : [PAREN  => [$right] ];
+    }
+    # AND/OR
+    elsif ($token =~ /^ (?: OR | AND ) $/xi )  {
+      my $op = uc $token;
+      my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
+
+      # Merge chunks if logic matches
+      if (ref $right and $op eq $right->[0]) {
+        $left = [ (shift @$right ), [$left, map { @$_ } @$right] ];
+      }
+      else {
+       $left = [$op => [$left, $right]];
+      }
+    }
+    # binary operator keywords
+    elsif (grep { $token =~ /^ $_ $/xi } $self->binary_op_keywords ) {
+      my $op = uc $token;
+      my $right = _recurse_parse($tokens, PARSE_RHS);
+
+      # A between with a simple LITERAL for a 1st RHS argument needs a
+      # rerun of the search to (hopefully) find the proper AND construct
+      if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') {
+        unshift @$tokens, $right->[1][0];
+        $right = _recurse_parse($tokens, PARSE_IN_EXPR);
+      }
+
+      $left = [$op => [$left, $right] ];
+    }
+    # expression terminator keywords (as they start a new expression)
+    elsif (grep { $token =~ /^ $_ $/xi } $self->expression_terminator_sql_keywords ) {
+      my $op = uc $token;
+      my $right = _recurse_parse($tokens, PARSE_IN_EXPR);
+      $left = $left ? [@$left,  [$op => [$right] ]]
+                    : [[ $op => [$right] ]];
+    }
+    # NOT (last as to allow all other NOT X pieces first)
+    elsif ( $token =~ /^ not $/ix ) {
+      my $op = uc $token;
+      my $right = _recurse_parse ($tokens, PARSE_RHS);
+      $left = $left ? [ @$left, [$op => [$right] ]]
+                    : [[ $op => [$right] ]];
+
+    }
+    # literal (eat everything on the right until RHS termination)
+    else {
+      my $right = _recurse_parse ($tokens, PARSE_RHS);
+      $left = $left ? [$left, [LITERAL => [join ' ', $token, unparse($right)||()] ] ]
+                    : [ LITERAL => [join ' ', $token, unparse($right)||()] ];
+    }
+  }
+}
+
+sub _parenthesis_unroll {
+  my ($self, $ast) = @_;
+
+  return if $parenthesis_significant;
+  return unless (ref $ast and ref $ast->[1]);
+
+  my $changes;
+  do {
+    my @children;
+    $changes = 0;
+
+    for my $child (@{$ast->[1]}) {
+      if (not ref $child or not $child->[0] eq 'PAREN') {
+        push @children, $child;
+        next;
+      }
+
+      # unroll nested parenthesis
+      while ($child->[1][0][0] eq 'PAREN') {
+        $child = $child->[1][0];
+        $changes++;
+      }
+
+      # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list
+      if (
+        ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR')
+            and
+          $child->[1][0][0] eq $ast->[0]
+      ) {
+        push @children, @{$child->[1][0][1]};
+        $changes++;
+      }
+
+      # if the parent operator explcitly allows it nuke the parenthesis
+      elsif ( grep { $ast->[0] =~ /^ $_ $/xi } $self->unrollable_ops ) {
+        push @children, $child->[1][0];
+        $changes++;
+      }
+
+      # only one LITERAL element in the parenthesis
+      elsif (
+        @{$child->[1]} == 1 && $child->[1][0][0] eq 'LITERAL'
+      ) {
+        push @children, $child->[1][0];
+        $changes++;
+      }
+
+      # only one element in the parenthesis which is a binary op with two LITERAL sub-children
+      elsif (
+        @{$child->[1]} == 1
+          and
+        grep { $child->[1][0][0] =~ /^ $_ $/xi } ($self->binary_op_keywords)
+          and
+        $child->[1][0][1][0][0] eq 'LITERAL'
+          and
+        $child->[1][0][1][1][0] eq 'LITERAL'
+      ) {
+        push @children, $child->[1][0];
+        $changes++;
+      }
+
+      # otherwise no more mucking for this pass
+      else {
+        push @children, $child;
+      }
+    }
+
+    $ast->[1] = \@children;
+
+  } while ($changes);
+
+}
+
+sub unparse {
+  my $tree = shift;
+
+  if (not $tree ) {
+    return '';
+  }
+  elsif (ref $tree->[0]) {
+    return join (" ", map { unparse ($_) } @$tree);
+  }
+  elsif ($tree->[0] eq 'LITERAL') {
+    return $tree->[1][0];
+  }
+  elsif ($tree->[0] eq 'PAREN') {
+    return sprintf '(%s)', join (" ", map {unparse($_)} @{$tree->[1]});
+  }
+  elsif ($tree->[0] eq 'OR' or $tree->[0] eq 'AND' or (grep { $tree->[0] =~ /^ $_ $/xi } $self->binary_op_keywords ) ) {
+    return join (" $tree->[0] ", map {unparse($_)} @{$tree->[1]});
+  }
+  else {
+    return sprintf '%s %s', $tree->[0], unparse ($tree->[1]);
+  }
+}
+
 ## DO THE DAMN THANG
-sub pretty {}
+sub pretty { }
 
 __PACKAGE__->meta->make_immutable;
 1;




More information about the Bast-commits mailing list