[DBD-SQLite] [patch] fix incorrect recipes in DBD::SQLite:: Cookbook

Tue Sep 6 15:29:12 GMT 2011

Applied in the trunk. Thanks.

Kenichi

On Tue, 06 Sep 2011 04:36:28 +0400, Yuriy Kaminskiy <yumkam at mail.ru> wrote:

>Hello!
>I noticed that "optimized" variance2/variance3 function in DBD::SQLite::Cookbook
>saves state in global variables.
>I think this is incorrect (how `SELECT variance(a), variance(b) FROM t;` is
>supposed to work? besides, they even don't reset state in new).
>Patch that fixes this error (and few others typos) attached.

>Index: DBD-SQLite-1.33/lib/DBD/SQLite/Cookbook.pod
>===================================================================
>--- DBD-SQLite-1.33.orig/lib/DBD/SQLite/Cookbook.pod	2011-09-06 03:25:18.000000000 +0400
>+++ DBD-SQLite-1.33/lib/DBD/SQLite/Cookbook.pod	2011-09-06 03:55:47.000000000 +0400
>@@ -42,7 +42,7 @@ adapted from an example implementation i
>
>       my $sigma = 0;
>       foreach my $v ( @$self ) {
>-          $sigma += ($x - $mu)**2;
>+          $sigma += ($v - $mu)**2;
>       }
>       $sigma = $sigma / ($n - 1);
>
>@@ -66,41 +66,38 @@ expense of precision:
>
>   package variance2;
>
>-  my $sum   = 0;
>-  my $count = 0;
>-  my %hash;
>-
>-  sub new { bless [], shift; }
>+  sub new { bless {sum => 0, count=>0, hash=> {} }, shift; }
>
>   sub step {
>       my ( $self, $value ) = @_;
>+      my $hash = $self->{hash};
>
>       # by truncating and hashing, we can comsume many more data points
>       $value = int($value); # change depending on need for precision
>                             # use sprintf for arbitrary fp precision
>-      if (defined $hash{$value}) {
>-          $hash{$value}++;
>+      if (exists $hash->{$value}) {
>+          $hash->{$value}++;
>       } else {
>-          $hash{$value} = 1;
>+          $hash->{$value} = 1;
>       }
>-      $sum += $value;
>-      $count++;
>+      $self->{sum} += $value;
>+      $self->{count}++;
>   }
>
>   sub finalize {
>       my $self = $_[0];
>
>       # Variance is NULL unless there is more than one row
>-      return undef unless $count > 1;
>+      return undef unless $self->{count} > 1;
>
>       # calculate avg
>-      my $mu = $sum / $count;
>+      my $mu = $self->{sum} / $self->{count};
>
>       my $sigma = 0;
>-      foreach my $h (keys %hash) {
>-          $sigma += (($h - $mu)**2) * $hash{$h};
>+      while (my ($h, $v) = each %{$self->{hash}}) {
>+          $sigma += (($h - $mu)**2) * $v;
>       }
>-      $sigma = $sigma / ($count - 1);
>+      $sigma = $sigma / ($self->{count} - 1);
>
>       return $sigma;
>   }
>@@ -115,25 +112,21 @@ The function can then be used as:
>
> A third variable implementation, designed for arbitrarily large data sets:
>
>-  package variance;
>-
>-  my $mu = 0;
>-  my $count = 0;
>-  my $S = 0
>+  package variance3;
>
>-  sub new { bless [], shift; }
>+  sub new { bless {mu=>0, count=>0, S=>0}, shift; }
>
>   sub step {
>       my ( $self, $value ) = @_;
>-      $count++;
>-      $delta = $value - $mu;
>-      $mu = $mu + $delta/$count
>-      $S = $S + $delta*($value - $mu);
>+      $self->{count}++;
>+      my $delta = $value - $self->{mu};
>+      $self->{mu} += $delta/$self->{count};
>+      $self->{S} += $delta*($value - $self->{mu});
>   }
>
>   sub finalize {
>       my $self = $_[0];
>-      return $S / ($count - 1);
>+      return $self->{S} / ($self->{count} - 1);
>   }
>
> The function can then be used as:

>_______________________________________________
>DBD-SQLite mailing list
>DBD-SQLite at lists.scsys.co.uk
>http://lists.scsys.co.uk/cgi-bin/mailman/listinfo/dbd-sqlite