[Bast-commits] r8832 - ironman/Perlanet-IronMan/lib/Perlanet
idn at dev.catalyst.perl.org
idn at dev.catalyst.perl.org
Sun Feb 28 21:06:21 GMT 2010
Author: idn
Date: 2010-02-28 21:06:21 +0000 (Sun, 28 Feb 2010)
New Revision: 8832
Modified:
ironman/Perlanet-IronMan/lib/Perlanet/IronMan.pm
Log:
Latest round of commits. Large number of fixes. Correct use of Perlanet::Entry object out of select_entries means that the feed object is correctly stored within the entry object (or a ref to it at any rate). Comments added and boilerplate pod. Lots of other stuff that I can't remember
Modified: ironman/Perlanet-IronMan/lib/Perlanet/IronMan.pm
===================================================================
--- ironman/Perlanet-IronMan/lib/Perlanet/IronMan.pm 2010-02-28 04:49:17 UTC (rev 8831)
+++ ironman/Perlanet-IronMan/lib/Perlanet/IronMan.pm 2010-02-28 21:06:21 UTC (rev 8832)
@@ -1,6 +1,10 @@
package Perlanet::IronMan;
+
+use 5.8.0;
+use strict;
+use warnings;
+
use Moose;
-
use IronMan::Schema;
use Perlanet::Feed;
use Data::Dumper;
@@ -10,97 +14,107 @@
extends 'Perlanet';
use Perlanet::Entry;
+our $VERSION = '0.01_01';
+
+=head1 NAME
+
+Perlanet::IronMan
+
+This module extends Perlanet for the specific requirements of the Enlightened
+Perl Organisation IronMan project.
+
+=head1 SYNOPSIS
+
+
+
+=head1 DESCRIPTION
+
+This module uses an IronMan::Schema database to define feeds, collect the feeds
+and then store them back into the IronMan::Schema database.
+
+=cut
+
+# This is some kind of Moose magic that I don't understand.... I think this
+# means tha schema attribute of this object is built using the _build_schema
+# method when the schema attribute is first used
+
has 'schema' => (
is => 'rw',
lazy_build => 1,
);
-sub _build_schema {
- my $self = shift;
- return IronMan::Schema->connect(
- $self->cfg->{db}{dsn},
- $self->cfg->{db}{username},
- $self->cfg->{db}{password},
- );
-}
-
-override '_build_feeds' => sub {
- my $self = shift;
- return [ map {
- Perlanet::Feed->new(
- id => $_->id,
- url => $_->url,
- website => $_->link || $_->url,
- title => $_->title,
- author => $_->owner,
- );
- } $self->schema->resultset('Feed')->all ];
-};
-
=head2 select_entries
-The select entries function takes an array of feed entries and filters it to
-remove duplicates. The non-duplicated feed entries are then returned to the
-caller as an array of feed entries.
+The select entries function takes an array of Perlanet::Feed objects and
+filters it to remove duplicates. The non-duplicated feed entries are then
+returned to the caller as an array of Perlanet::Entry objects.
-Okay, that's what I thought this function does, but it doesn't seem to quite be
-that straight forward. The objects passed in and the objects returned are not
-the same.
+ my $perlanet_entries = select_entries( @{ $perlanet_feeds });
=cut
override 'select_entries' => sub {
my ($self, @feeds) = @_;
- my @entries;
+ # Perlanet::Feed objects to return
+ my @feed_entries;
+
+ # Iterate over the feeds working on them.
for my $feed (@feeds) {
- #print(Dumper($feed));
+ # Fetch the XML::Feed:Entry objects from the Perlanet::Feed object
+ my @entries = $feed->_xml_feed->entries;
- my $unseen = $self->schema->resultset('Feed')->filter_unseen($feed->url, [ $feed->_xml_feed->entries ]);
+ # Iterate over the XML::Feed::Entry objects
+ foreach my $xml_entry (@entries) {
- push @entries, @$unseen;
- }
+ # Problem with XML::Feed's conversion of RSS to Atom
+ if ($xml_entry->issued && ! $xml_entry->modified) {
+ $xml_entry->modified($xml_entry->issued);
+ }
- return @entries;
-};
+ #print(Dumper($xml_entry));
-override 'save' => sub { };
+ # Always set category to something
+ unless(defined($xml_entry->category)) {
+ $xml_entry->category('');
+ }
-override 'render' => sub {
- my ($self, $feed, $post) = @_;
+ # Filter on keywords. This fails for HTML encoded languages.
+ # See http://onperl.ru/onperl/atom.xml for examples
+ # specifically http://onperl.ru/onperl/2010/02/post.html
+ # FIXME
+ unless($self->_filter_entry_on_keywords($xml_entry)) {
+ next;
+ }
-# my $new = [
-# map {
-#
-#
-#{
-# feed_id => 'davorg',
-# author => 'Dave',
-# tags => '',
-# url => $_->link,
-# title => $_->title,
-# posted_on => $_->issued || DateTime->from_epoch(epoch => 0),
-# summary => $_->summary->body,
-# summary_filtered => $self->clean($_->summary->body),
-# body => $_->content->body,
-# body_filtered => $self->clean($_->content->body),
-# }
+ # De-duplicate
+ unless($self->_filter_entry_for_duplicate($xml_entry)) {
+ next;
+ }
- #map($self->_db_insert($_), @{ $post->entries });
+ # Create a Perlanet::Entry object from the XML data retrieved
+ my $entry = Perlanet::Entry->new(
+ _entry => $xml_entry,
+ feed => $feed
+ );
- $self->_db_insert($feed, $post);
+ push @feed_entries, $entry;
+ }
+ }
- #print(Dumper($new));
-
- #$self->schema->resultset('Post')->populate($new);
+ return @feed_entries;
};
+=head2 render
+Given a Perlanet::Entry object, store the entry as a post in the
+Schema::IronMan database
-sub _db_insert {
+=cut
+
+override 'render' => sub {
my $self = shift;
- my $feed = shift;
my $post = shift;
my $posts = $post->entries;
@@ -110,52 +124,183 @@
foreach my $post (@{$posts}) {
- # Check we've got tags on this post
- unless(defined($post->category)) {
- next;
- }
+ # Set the summary text if not explicity specified (250 characters)
+ # This should probably be in config rather than hard coded.
+ my $summary = $post->_entry->summary->body || substr($post->_entry->content->body, 0, 250) . "...";
- my @tags = $post->category;
+ #print(Dumper($post));
+ #exit;
- # Check that ironman or perl is one of the tags or in the body
- unless(grep(/ironman/i, @tags) ||
- grep(/perl/i, @tags) ||
- $post->content->body =~ /ironman/i ||
- $post->content->body =~ /perl/i
- ) {
+ # Can't store a post if we can't work out the URL to link to it.
+ unless(defined($post->_entry->link)) {
+ print("ERROR. Can't deal with lack of URL returned from XML::Feed::Entry for feed '" . $post->feed->url . "'\n");
next;
}
- # Set the summary text if not explicity specified
- my $summary = $post->summary->body || substr($post->content->body, 0, 250) . "...";
-
- #print(Dumper($post));
- #exit;
-
try {
-
+ # Do that whole insert thing...
$self->schema->resultset('Post')->populate( [ {
- feed_id => $feed->id,
- author => $post->author,
- tags => @tags,
- url => $post->link,
- title => $post->title,
- posted_on => $post->issued || DateTime->from_epoch(epoch => 0),
+ feed_id => $post->feed->id,
+ author => $post->_entry->author,
+ tags => $post->_entry->category,
+ url => $post->_entry->link,
+ title => $post->_entry->title,
+ posted_on => $post->_entry->issued || DateTime->from_epoch(epoch => 0),
summary => $summary,
summary_filtered => $self->clean($summary),
- body => $post->content->body,
- body_filtered => $self->clean($post->content->body),
+ body => $post->_entry->content->body,
+ body_filtered => $self->clean($post->_entry->content->body),
} ] );
}
catch {
- Carp::cluck("ERROR: $!\n");
- Carp::cluck("ERROR: Link URL is '" . $post->link . "'\n");
+ Carp::cluck("ERROR: $_\n");
Carp::cluck("ERROR: Post is:\n" . Dumper($post) . "\n");
+ Carp::cluck("ERROR: Link URL is '" . $post->_entry->link . "'\n");
};
+
+ #print(Dumper($post));
+
}
+};
+
+=head2 save
+
+Save is not required within the context of this module so we override it.
+
+=cut
+
+override 'save' => sub { };
+
+=head2 _build_feeds
+
+Feeds are built from the Schema::IronMan database overriding the internal
+defaults of utilising feeds specified in either the configuration file or as
+configuration options when creating the Perlanet object.
+
+=cut
+
+override '_build_feeds' => sub {
+ my $self = shift;
+ return [ map {
+ Perlanet::Feed->new(
+ id => $_->id,
+ url => $_->url || $_->link,
+ website => $_->link || $_->url,
+ title => $_->title,
+ author => $_->owner,
+ );
+ } $self->schema->resultset('Feed')->all ];
+};
+
+=head2 _build_schema
+
+Build and return a schema object the first time that the schema attribute of
+this object is accessed.
+
+=cut
+
+sub _build_schema {
+ my $self = shift;
+ return IronMan::Schema->connect(
+ $self->cfg->{db}{dsn},
+ $self->cfg->{db}{username},
+ $self->cfg->{db}{password},
+ );
}
+=head2 _filter_entry_for_duplicate
+
+Test to see if the supplied XML::Feed::Entry passes the configured filters.
+
+Return 1 for a good entry and 0 for a bad entry.
+
+=cut
+
+sub _filter_entry_for_duplicate {
+ my $self = shift;
+ my $xml_entry = shift;
+
+ my $count = $self->schema->resultset('Post')->search(
+ { url => $xml_entry->link }
+ )->count;
+
+ if($count > 0) {
+ #print("Duplicate post found for url '" . $xml_entry->link . "'\n");
+ return 0;
+ }
+
+ #print("Post at url '" . $xml_entry->link . "' is new\n");
+
+ return 1;
+}
+
+=head2 _filter_entry_on_keywords
+
+Test to see if the supplied XML::Feed::Entry passes the configured filters.
+
+Return 1 for a good entry and 0 for a bad entry.
+
+THIS FUNCTION IS BROKEN. SEE THE NOTES AT IT'S CALL.
+
+=cut
+
+sub _filter_entry_on_keywords {
+ my $self = shift;
+ my $xml_entry = shift;
+
+ # print("Called to keyword filter url '" . $xml_entry->link . "'\n");
+
+ # If no filter is defined, then we pass.
+ unless(defined($self->{cfg}->{filter}->{keywords})) {
+ print("No filter defined so skipping this check.\n");
+ return 1;
+ }
+
+ my $filters = $self->{cfg}->{filter}->{keywords};
+
+ # Iterate through the defined filters checking them
+ foreach my $filter (@ { $filters } ) {
+
+ #print("Checking for filter '$filter'\n");
+
+ # If tags have been defined, check them.
+ if(defined($xml_entry->tags)) {
+ if(grep(/$filter/i, $xml_entry->tags)) {
+ return 1;
+ }
+ }
+
+ # Check the body
+ if($xml_entry->content->body =~ m/$filter/i) {
+ return 1;
+ }
+ }
+
+ # We got to the end and we didn't get a match. Fail.
+ return 0;
+}
+
+
+=head1 AUTHOR
+
+Oliver Charles (aCiD2) <oliver.g.charles at googlemail.com>
+Matt Troutt (mst), <mst at shadowcat.co.uk>
+Ian Norton (idn), <i.d.norton at gmail.com>
+
+=head1 SEE ALSO
+
+IronMan::Schema
+Perlanet
+
+=head1 COPYRIGHT AND LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+=cut
+
no Moose;
__PACKAGE__->meta->make_immutable;
1;
More information about the Bast-commits
mailing list