[Bast-commits] r9912 - in ironman/Perlanet-IronMan/branches/dev: . lib lib/Perlanet lib/Perlanet/IronMan t

idn at dev.catalyst.perl.org idn at dev.catalyst.perl.org
Sat Jun 30 17:35:05 GMT 2012


Author: idn
Date: 2012-06-30 17:35:05 +0000 (Sat, 30 Jun 2012)
New Revision: 9912

Added:
   ironman/Perlanet-IronMan/branches/dev/bin/
   ironman/Perlanet-IronMan/branches/dev/ironman-notes.pod
   ironman/Perlanet-IronMan/branches/dev/lib/
   ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/
   ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan.pm
   ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/
   ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/DB.pm
   ironman/Perlanet-IronMan/branches/dev/t/
   ironman/Perlanet-IronMan/branches/dev/t/01_basic.t
   ironman/Perlanet-IronMan/branches/dev/t/02_read_feed_list_from_db.t
Log:
Adding new version of Perlanet::IronMan

Added: ironman/Perlanet-IronMan/branches/dev/ironman-notes.pod
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/ironman-notes.pod	                        (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/ironman-notes.pod	2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,18 @@
+
+
+=head2 Create Perlanet::IronMan::DB
+
+Writing posts to db needs to implement:
+
+    has 'feed' => (
+
+    after 'render' => sub {
+
+Reading feeds list from the db needs to implement:
+    
+    override '_build_feeds' => sub {
+
+
+
+=head2 
+

Added: ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/DB.pm
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/DB.pm	                        (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/DB.pm	2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,53 @@
+package Perlanet::IronMan::DB;
+
+use strict;
+use warnings;
+
+use Moose::Role;
+use namespace::autoclean;
+
+=head1 NAME
+
+Perlanet::IronMan::DB
+
+=head1 SYNOPSIS
+
+=head1 DESCRIPTION
+
+=head1 ATTRIBUTES
+
+
+=cut
+
+use Carp qw( croak );
+
+
+
+
+
+
+
+
+
+
+
+=head1 AUTHOR
+
+Oliver Charles, <oliver.g.charles at googlemail.com>
+Matt Troutt (mst), <mst at shadowcat.co.uk>
+Ian Norton, <i.d.norton at gmail.com>
+
+=head1 SEE ALSO
+
+IronMan::Schema
+Perlanet
+
+=head1 COPYRIGHT AND LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+=cut
+
+1;

Added: ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan.pm
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan.pm	                        (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan.pm	2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,372 @@
+package Perlanet::IronMan;
+
+use 5.8.0;
+use strict;
+use warnings;
+
+use Moose;
+use IronMan::Schema;
+use HTML::Truncate;
+use Try::Tiny;
+
+use namespace::autoclean;
+
+use Carp;
+use YAML 'LoadFile';
+
+extends 'Perlanet';
+with qw(
+  Perlanet::Trait::Cache
+  Perlanet::Trait::Scrubber
+  Perlanet::Trait::Tidy
+);
+
+our $VERSION = '0.02_01';
+
+=head1 NAME
+
+Perlanet::Simple - a DWIM Perlanet
+
+=head1 SYNOPSIS
+
+  my $perlanet = Perlanet::Simple->new_with_config('perlanet.yaml')
+  $perlanet->run;
+
+=head1 DESCRIPTION
+
+L<Perlanet> provides the driving force behind all Perlanet applications,
+but it doesn't do a whole lot, which means you would normally have to write
+the functionality you require. However, in the motive of simplicity,
+Perlanet::Simple glues enough stuff together to allow you to get a very quick
+planet working out of the box.
+
+Perlanet::Simple takes the standard Perlanet module, and adds support for
+caching, OPML feed generation, and L<Template> rendering support. It will
+also attempt to clean each post using both L<HTML::Scrubber> and L<HTML::Tidy>.
+
+=head2 Configuration
+
+Perlanet::Simple uses L<Perlanet::Trait::YAMLConfig> to allow you to specify
+configuration through a file.
+
+=cut
+
+around '_build_ua' => sub {
+  my $orig = shift;
+  my $self = shift;
+  my $ua = $self->$orig;
+  $ua->agent($self->agent) if $self->agent;
+  return $ua;
+};
+
+=head2 clean_html
+
+Clean a html string so it is suitable for display
+
+Takes a HTML string and returns a "cleaned" HTML string.
+
+=cut
+
+around 'clean_html' => sub {
+  my $orig = shift;
+  my ($self, $html) = @_;
+
+  $html = $self->$orig($html);
+
+  # hack to remove a particularly nasty piece of blogspot HTML
+  $html =~ s|<div align="justify"></div>||g;
+
+  # We don't want to remove the style attribute completely,
+  # but a clear: both will screw our layout up...
+  $html =~ s/style="(.*?)clear:(.*?);/style="$1/;
+
+  return $html;
+};
+
+=head2 feeds
+
+Feeds are built from the Schema::IronMan database overriding the internal
+defaults of utilising feeds specified in either the configuration file or as
+configuration options when creating the Perlanet object.
+
+=cut
+
+override 'feeds' => sub {
+    my $self = shift;
+    return [ map {
+        Perlanet::Feed->new(
+            id => $_->id,
+            url => $_->url || $_->link,
+            website => $_->link || $_->url,
+            title => $_->title,
+            author => $_->owner,
+        );
+    } $self->schema->resultset('Feed')->all ];
+};
+
+=head2 render
+
+Given a Perlanet::Entry object, store the entry as a post in the
+Schema::IronMan database
+
+=cut
+
+override 'render' => sub {
+    my $self = shift;
+    my $post = shift;
+
+    my $posts = $post->entries;
+
+    foreach my $post (@{$posts}) {
+
+        # Set the summary text to the summary or body if not supplied
+        # This should probably be in config rather than hard coded.
+        my $summary = $post->_entry->summary->body || $post->_entry->content->body;
+
+#        my $truncated = eval { $self->truncator->truncate($summary) };
+#        if ($@) {
+#            warn "Truncate failed: $@";
+#            $truncated = $summary;
+#        }
+#
+#        $summary = $truncated;
+
+        # Can't store a post if we can't work out the URL to link to it.
+        unless(defined($post->_entry->link)) {
+            print("ERROR.  Can't deal with lack of URL returned from XML::Feed::Entry for feed '" . $post->feed->url . "'\n");
+            next;
+        }
+
+        # Get the entry tags
+        my @tags = $post->_entry->category;
+
+        try {
+            # Do that whole insert thing...
+            $self->schema->resultset('Post')->create( {
+                feed_id          => $post->feed->id,
+                author           => $post->_entry->author || $post->feed->title,
+                tags             => join(",", @tags),
+                url              => $post->_entry->link,
+                title            => $post->_entry->title,
+                posted_on        => $post->_entry->issued || DateTime->now,
+                summary          => $summary,
+                summary_filtered => $self->clean($summary),
+                body             => $post->_entry->content->body,
+                body_filtered    => $self->clean($post->_entry->content->body),
+            } );
+        }
+
+        catch {
+            Carp::cluck("ERROR: $_\n");
+            Carp::cluck("ERROR: Post is:\n" . Dumper($post) . "\n");
+            Carp::cluck("ERROR: Link URL is '" . $post->_entry->link . "'\n");
+        };
+    }
+};
+
+
+
+
+
+
+
+=head2 schema
+
+IronMan::Schema object must be passed in at creation time
+
+=cut
+
+has 'schema' => (
+    isa         => 'IronMan::Schema',
+    is          => 'ro',
+    required    => 1,
+);
+
+
+=head2 select_entries
+
+The select entries function takes an array of Perlanet::Feed objects and
+filters it to remove duplicates.  The non-duplicated feed entries are then
+returned to the caller as an array of Perlanet::Entry objects.
+
+    my $perlanet_entries = select_entries( @{ $perlanet_feeds });
+
+=cut
+
+override 'select_entries' => sub {
+    my ($self, @feeds) = @_;
+
+    # Perlanet::Feed objects to return
+    my @feed_entries;
+
+    # Iterate over the feeds working on them.
+    for my $feed (@feeds) {
+
+        # Fetch the XML::Feed:Entry objects from the Perlanet::Feed object
+        my @entries = $feed->_xml_feed->entries;
+
+        # Iterate over the XML::Feed::Entry objects
+        foreach my $xml_entry (@entries) {
+
+            # Problem with XML::Feed's conversion of RSS to Atom
+            if ($xml_entry->issued && ! $xml_entry->modified) {
+              $xml_entry->modified($xml_entry->issued);
+            }
+
+            # Always set category to something
+            unless(defined($xml_entry->category)) {
+                $xml_entry->category('');
+            }
+
+            #print(Dumper($xml_entry->tags));
+            #print(Dumper($xml_entry->category));
+
+            # Filter on keywords.  This fails for HTML encoded languages.
+            # See http://onperl.ru/onperl/atom.xml for examples
+            # specifically http://onperl.ru/onperl/2010/02/post.html
+            # FIXME
+            unless($self->_filter_entry_on_keywords($xml_entry)) {
+                #print("Skipping due to no keyword match for '" . $xml_entry->link . "'\n");
+                next;
+            }
+
+            # De-duplicate
+            unless($self->_filter_entry_for_duplicate($xml_entry)) {
+                #print("Skipping due to duplicate match for '" . $xml_entry->link . "'\n");
+                next;
+            }
+
+            # Create a Perlanet::Entry object from the XML data retrieved
+            my $entry = Perlanet::Entry->new(
+                            _entry => $xml_entry,
+                            feed   => $feed
+            );
+
+            push @feed_entries, $entry;
+        }
+   }
+
+    return @feed_entries;
+};
+
+has 'truncator' => (
+    is         => 'rw',
+    lazy_build => 1
+);
+
+=head2 _build_truncator
+
+Construct a HTML::Truncator object for truncating posts
+
+=cut
+
+sub _build_truncator {
+    my $self = shift;
+    my $html_truncate = HTML::Truncate->new(repair=>1);
+    $html_truncate->chars(250);
+    $html_truncate->ellipsis(" [...]");
+
+    return $html_truncate;
+}
+
+=head2 _filter_entry_for_duplicate
+
+Test to see if the supplied XML::Feed::Entry passes the configured filters.
+
+Return 1 for a good entry and 0 for a bad entry.
+
+=cut
+
+sub _filter_entry_for_duplicate {
+    my $self = shift;
+    my $xml_entry = shift;
+
+    my $count = $self->schema->resultset('Post')->search(
+        { url => $xml_entry->link }
+    )->count;
+
+    if($count > 0) {
+        #print("Duplicate post found for url '" . $xml_entry->link . "'\n");
+        return 0;
+    }
+
+    #print("Post at url '" . $xml_entry->link . "' is new\n");
+
+    return 1;
+}
+
+=head2 _filter_entry_on_keywords
+
+Test to see if the supplied XML::Feed::Entry passes the configured filters.
+
+Return 1 for a good entry and 0 for a bad entry.
+
+THIS FUNCTION IS BROKEN.  SEE THE NOTES AT IT'S CALL.
+
+=cut
+
+sub _filter_entry_on_keywords {
+    my $self = shift;
+    my $xml_entry = shift;
+
+    # print("Called to keyword filter url '" . $xml_entry->link . "'\n");
+
+    # If no filter is defined, then we pass.
+    unless(defined($self->{cfg}->{filter}->{keywords})) {
+        print("No filter defined so skipping this check.\n");
+        return 1;
+    }
+
+    my $filters = $self->{cfg}->{filter}->{keywords};
+
+    # Iterate through the defined filters checking them
+    foreach my $filter (@ { $filters } ) {
+
+        #print("Checking for filter '$filter'\n");
+
+        # If tags have been defined, check them.
+        if(defined($xml_entry->tags)) {
+            if(grep(/$filter/i, $xml_entry->tags)) {
+                return 1;
+            }
+        }
+
+        # Check the title if defined
+        if(defined($xml_entry->title)) {
+            if(grep(/$filter/i, $xml_entry->title)) {
+                return 1;
+            }
+        }
+
+        # Check the body if defined
+        if(defined($xml_entry->content->body)) {
+            if($xml_entry->content->body =~ m/$filter/i) {
+                return 1;
+            }
+        }
+    }
+
+    # We got to the end and we didn't get a match.  Fail.
+    return 0;
+}
+
+=head1 AUTHOR
+
+Oliver Charles (aCiD2) <oliver.g.charles at googlemail.com>
+Matt Troutt (mst), <mst at shadowcat.co.uk>
+Ian Norton (idn), <i.d.norton at gmail.com>
+
+=head1 SEE ALSO
+
+IronMan::Schema
+Perlanet
+
+=head1 COPYRIGHT AND LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+=cut
+
+1;

Added: ironman/Perlanet-IronMan/branches/dev/t/01_basic.t
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/t/01_basic.t	                        (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/t/01_basic.t	2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,9 @@
+use Test::More;
+use_ok('Perlanet::IronMan');
+
+my $schema = IronMan::Schema->connect("dbi:SQLite:subscriptions.db");
+
+ok(my $p = Perlanet::IronMan->new({ schema => $schema }));
+isa_ok($p, 'Perlanet');
+
+done_testing();

Added: ironman/Perlanet-IronMan/branches/dev/t/02_read_feed_list_from_db.t
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/t/02_read_feed_list_from_db.t	                        (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/t/02_read_feed_list_from_db.t	2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,16 @@
+use Test::More;
+use_ok('Perlanet::IronMan');
+
+my $schema = IronMan::Schema->connect("dbi:SQLite:subscriptions.db");
+
+ok(my $p = Perlanet::IronMan->new({ schema => $schema }));
+isa_ok($p, 'Perlanet');
+
+#use Data::Dumper;
+#print(Dumper($p->feeds));
+
+isa_ok($p->feeds, 'ARRAY');
+
+$p->run();
+
+done_testing();




More information about the Bast-commits mailing list