[Dbix-class] Complex join problem

Chris Cole chris at compbio.dundee.ac.uk
Tue Jun 9 15:35:09 GMT 2009


Peter Rabbitson wrote:
> You are not prefetching properly somewhere. Yet this is getting extremely
> confusing - please start fresh showing a "slow but correct" dbic query
> (the entire search chain) and the resulting SQL (presumably the one above).

Ok. Sorry for not getting back earlier, I've been sidetracked with
something else.

The problem stems in trying to retrieve columns from the reads2expt
table in a SELECT.

The below works as expected:

my $schema = DB::Schema->connect();
my @data;
my @rs = $schema->resultset('DbMatch')->search(
   {
      expt_id => $dataset,
      abundance => {'>' => 1},
      type => 'hairpin-miRNA'
   },
   {
      join => [
         {         # join on seq_reads -> reads2expt tables
            read_id => {
               reads2expts => 'read_id'
            }
         },
         'seq_id'  # join db_sequences table.
      ]
   }
);

foreach my $hit (@rs) {
   push @data, [ $hit->get_column('seq_id'), $hit->match_start ];
}
return(\@data);

The SQL for the above is:
SELECT me.seq_id, me.read_id, me.search_id, me.match_start,
me.match_end, me.query_start, me.mismatch FROM db_match me  JOIN
seq_reads read_id ON read_id.read_id = me.read_id LEFT JOIN reads2expt
reads2expts ON reads2expts.read_id = read_id.read_id LEFT JOIN seq_reads
read_id_2 ON read_id_2.read_id = reads2expts.read_id  JOIN db_sequences
seq_id ON seq_id.seq_id = me.seq_id WHERE ( ( abundance > ? AND expt_id
= ? AND type = ? ) ): '1', '1', 'hairpin-miRNA'


However, if I want to get the value for the 'abundance' column for each
row I need to use a search_related() step, which dramatically slows down
the process.


my $schema = DB::Schema->connect();
my @data;
my @rs = $schema->resultset('DbMatch')->search(
   {
      expt_id => $dataset,
      abundance => {'>' => 1},
      type => 'hairpin-miRNA'
   },
   {
      join => [
         {         # join on seq_reads -> reads2expt tables
            read_id => {
               reads2expts => 'read_id'
            }
         },
         'seq_id'  # join db_sequences table.
      ]
   }
);

foreach my $hit (@rs) {
   push @data, [ $hit->get_column('seq_id'), $hit->match_start,
$hit->read_id->reads2expts->search_related('read_id')->get_column('abundance')->first
];
}
return(\@data);

The SQL:
SELECT me.seq_id, me.read_id, me.search_id, me.match_start,
me.match_end, me.query_start, me.mismatch FROM db_match me  JOIN
seq_reads read_id ON read_id.read_id = me.read_id LEFT JOIN reads2expt
reads2expts ON reads2expts.read_id = read_id.read_id LEFT JOIN seq_reads
read_id_2 ON read_id_2.read_id = reads2expts.read_id  JOIN db_sequences
seq_id ON seq_id.seq_id = me.seq_id WHERE ( ( abundance > ? AND expt_id
= ? AND type = ? ) ): '1', '1', 'hairpin-miRNA'
SELECT me.read_id, me.length, me.seq FROM seq_reads me WHERE (
me.read_id = ? ): '2'
SELECT abundance FROM reads2expt me  JOIN seq_reads read_id ON
read_id.read_id = me.read_id WHERE ( me.read_id = ? ): '2'
SELECT me.read_id, me.length, me.seq FROM seq_reads me WHERE (
me.read_id = ? ): '4'
SELECT abundance FROM reads2expt me  JOIN seq_reads read_id ON
read_id.read_id = me.read_id WHERE ( me.read_id = ? ): '4'
SELECT me.read_id, me.length, me.seq FROM seq_reads me WHERE (
me.read_id = ? ): '4'
SELECT abundance FROM reads2expt me  JOIN seq_reads read_id ON
read_id.read_id = me.read_id WHERE ( me.read_id = ? ): '4'
SELECT me.read_id, me.length, me.seq FROM seq_reads me WHERE (
me.read_id = ? ): '4'
SELECT abundance FROM reads2expt me  JOIN seq_reads read_id ON
read_id.read_id = me.read_id WHERE ( me.read_id = ? ): '4'
SELECT me.read_id, me.length, me.seq FROM seq_reads me WHERE (
me.read_id = ? ): '4'
SELECT abundance FROM reads2expt me  JOIN seq_reads read_id ON
read_id.read_id = me.read_id WHERE ( me.read_id = ? ): '4'
SELECT me.read_id, me.length, me.seq FROM seq_reads me WHERE (
me.read_id = ? ): '5'
SELECT abundance FROM reads2expt me  JOIN seq_reads read_id ON
read_id.read_id = me.read_id WHERE ( me.read_id = ? ): '5'
SELECT me.read_id, me.length, me.seq FROM seq_reads me WHERE (
me.read_id = ? ): '8'
SELECT abundance FROM reads2expt me  JOIN seq_reads read_id ON
read_id.read_id = me.read_id WHERE ( me.read_id = ? ): '8'
etc...

I don't understand why it needs all the extra SELECTs, but is presumably
the cause for the slow-down. Also, I don't understand why I can specify
'abundance' as a search parameter, but then have to really hunt for the
'abundance' values for each row.

Any idea how I can speed this up and remove the need to use the
search_related() to hunt down the column values? This is quite a
simplified version of what I'd really like to do, so would like to be
able to do this more simply than using SQL statements via the DBI.

Table schemata are below for completeness.
Cheers,

Chris

__PACKAGE__->load_components("Core");
__PACKAGE__->table("db_match");
__PACKAGE__->add_columns(
  "seq_id",
  { data_type => "INT", default_value => "", is_nullable => 0, size => 10 },
  "read_id",
  { data_type => "BIGINT", default_value => "", is_nullable => 0, size
=> 20 },
  "search_id",
  { data_type => "SMALLINT", default_value => "", is_nullable => 0, size
=> 5 },
  "match_start",
  { data_type => "SMALLINT", default_value => "", is_nullable => 0, size
=> 5 },
  "match_end",
  { data_type => "SMALLINT", default_value => "", is_nullable => 0, size
=> 5 },
  "query_start",
  { data_type => "SMALLINT", default_value => "", is_nullable => 0, size
=> 5 },
  "mismatch",
  { data_type => "TINYINT", default_value => "", is_nullable => 0, size
=> 3 },
);
__PACKAGE__->set_primary_key("seq_id", "read_id", "search_id",
"match_start");
__PACKAGE__->belongs_to("seq_id", "DB::Schema::DbSequences", { seq_id =>
"seq_id" });
__PACKAGE__->belongs_to("read_id", "DB::Schema::SeqReads", { read_id =>
"read_id" });
__PACKAGE__->belongs_to(
  "search_id",
  "DB::Schema::SearchParams",
  { search_id => "search_id" },
);


__PACKAGE__->load_components("Core");
__PACKAGE__->table("seq_reads");
__PACKAGE__->add_columns(
  "read_id",
  { data_type => "BIGINT", default_value => undef, is_nullable => 0,
size => 20 },
  "length",
  { data_type => "SMALLINT", default_value => "", is_nullable => 0, size
=> 5 },
  "seq",
  { data_type => "VARCHAR", default_value => "", is_nullable => 0, size
=> 255 },
);
__PACKAGE__->set_primary_key("read_id");
__PACKAGE__->has_many(
  "db_matches",
  "DB::Schema::DbMatch",
  { "foreign.read_id" => "self.read_id" },
);
__PACKAGE__->has_many(
  "genome_matches",
  "DB::Schema::GenomeMatch",
  { "foreign.read_id" => "self.read_id" },
);
__PACKAGE__->has_many(
  "reads2expts",
  "DB::Schema::Reads2expt",
  { "foreign.read_id" => "self.read_id" },
);

__PACKAGE__->load_components("Core");
__PACKAGE__->table("reads2expt");
__PACKAGE__->add_columns(
  "expt_id",
  { data_type => "SMALLINT", default_value => "", is_nullable => 0, size
=> 5 },
  "read_id",
  { data_type => "BIGINT", default_value => "", is_nullable => 0, size
=> 20 },
  "abundance",
  { data_type => "INT", default_value => "", is_nullable => 0, size => 10 },
  "read_name",
  { data_type => "VARCHAR", default_value => "", is_nullable => 0, size
=> 255 },
);
__PACKAGE__->belongs_to("expt_id", "DB::Schema::Experiment", { expt_id
=> "expt_id" });
__PACKAGE__->belongs_to("read_id", "DB::Schema::SeqReads", { read_id =>
"read_id" });

__PACKAGE__->load_components("Core");
__PACKAGE__->table("db_sequences");
__PACKAGE__->add_columns(
  "seq_id",
  { data_type => "INT", default_value => undef, is_nullable => 0, size
=> 10 },
  "accession",
  { data_type => "VARCHAR", default_value => "", is_nullable => 0, size
=> 255 },
  "type",
  { data_type => "VARCHAR", default_value => "", is_nullable => 0, size
=> 255 },
  "seq",
  { data_type => "TEXT", default_value => "", is_nullable => 0, size =>
65535 },
  "db_id",
  { data_type => "SMALLINT", default_value => "", is_nullable => 0, size
=> 5 },
  "species_id",
  {
    data_type => "SMALLINT",
    default_value => undef,
    is_nullable => 1,
    size => 5,
  },
);
__PACKAGE__->set_primary_key("seq_id");
__PACKAGE__->has_many(
  "db_matches",
  "DB::Schema::DbMatch",
  { "foreign.seq_id" => "self.seq_id" },
);
__PACKAGE__->belongs_to("db_id", "DB::Schema::ExtrnDb", { db_id =>
"db_id" });
__PACKAGE__->belongs_to(
  "species_id",
  "DB::Schema::Species",
  { species_id => "species_id" },
);
__PACKAGE__->has_many(
  "mature2hairpin_mature_ids",
  "DB::Schema::Mature2hairpin",
  { "foreign.mature_id" => "self.seq_id" },
);
__PACKAGE__->has_many(
  "mature2hairpin_hairpin_ids",
  "DB::Schema::Mature2hairpin",
  { "foreign.hairpin_id" => "self.seq_id" },
);





More information about the DBIx-Class mailing list