[Bast-commits] r9912 - in ironman/Perlanet-IronMan/branches/dev: .
lib lib/Perlanet lib/Perlanet/IronMan t
idn at dev.catalyst.perl.org
idn at dev.catalyst.perl.org
Sat Jun 30 17:35:05 GMT 2012
Author: idn
Date: 2012-06-30 17:35:05 +0000 (Sat, 30 Jun 2012)
New Revision: 9912
Added:
ironman/Perlanet-IronMan/branches/dev/bin/
ironman/Perlanet-IronMan/branches/dev/ironman-notes.pod
ironman/Perlanet-IronMan/branches/dev/lib/
ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/
ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan.pm
ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/
ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/DB.pm
ironman/Perlanet-IronMan/branches/dev/t/
ironman/Perlanet-IronMan/branches/dev/t/01_basic.t
ironman/Perlanet-IronMan/branches/dev/t/02_read_feed_list_from_db.t
Log:
Adding new version of Perlanet::IronMan
Added: ironman/Perlanet-IronMan/branches/dev/ironman-notes.pod
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/ironman-notes.pod (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/ironman-notes.pod 2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,18 @@
+
+
+=head2 Create Perlanet::IronMan::DB
+
+Writing posts to db needs to implement:
+
+ has 'feed' => (
+
+ after 'render' => sub {
+
+Reading feeds list from the db needs to implement:
+
+ override '_build_feeds' => sub {
+
+
+
+=head2
+
Added: ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/DB.pm
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/DB.pm (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan/DB.pm 2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,53 @@
+package Perlanet::IronMan::DB;
+
+use strict;
+use warnings;
+
+use Moose::Role;
+use namespace::autoclean;
+
+=head1 NAME
+
+Perlanet::IronMan::DB
+
+=head1 SYNOPSIS
+
+=head1 DESCRIPTION
+
+=head1 ATTRIBUTES
+
+
+=cut
+
+use Carp qw( croak );
+
+
+
+
+
+
+
+
+
+
+
+=head1 AUTHOR
+
+Oliver Charles, <oliver.g.charles at googlemail.com>
+Matt Troutt (mst), <mst at shadowcat.co.uk>
+Ian Norton, <i.d.norton at gmail.com>
+
+=head1 SEE ALSO
+
+IronMan::Schema
+Perlanet
+
+=head1 COPYRIGHT AND LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+=cut
+
+1;
Added: ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan.pm
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan.pm (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/lib/Perlanet/IronMan.pm 2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,372 @@
+package Perlanet::IronMan;
+
+use 5.8.0;
+use strict;
+use warnings;
+
+use Moose;
+use IronMan::Schema;
+use HTML::Truncate;
+use Try::Tiny;
+
+use namespace::autoclean;
+
+use Carp;
+use YAML 'LoadFile';
+
+extends 'Perlanet';
+with qw(
+ Perlanet::Trait::Cache
+ Perlanet::Trait::Scrubber
+ Perlanet::Trait::Tidy
+);
+
+our $VERSION = '0.02_01';
+
+=head1 NAME
+
+Perlanet::Simple - a DWIM Perlanet
+
+=head1 SYNOPSIS
+
+ my $perlanet = Perlanet::Simple->new_with_config('perlanet.yaml')
+ $perlanet->run;
+
+=head1 DESCRIPTION
+
+L<Perlanet> provides the driving force behind all Perlanet applications,
+but it doesn't do a whole lot, which means you would normally have to write
+the functionality you require. However, in the motive of simplicity,
+Perlanet::Simple glues enough stuff together to allow you to get a very quick
+planet working out of the box.
+
+Perlanet::Simple takes the standard Perlanet module, and adds support for
+caching, OPML feed generation, and L<Template> rendering support. It will
+also attempt to clean each post using both L<HTML::Scrubber> and L<HTML::Tidy>.
+
+=head2 Configuration
+
+Perlanet::Simple uses L<Perlanet::Trait::YAMLConfig> to allow you to specify
+configuration through a file.
+
+=cut
+
+around '_build_ua' => sub {
+ my $orig = shift;
+ my $self = shift;
+ my $ua = $self->$orig;
+ $ua->agent($self->agent) if $self->agent;
+ return $ua;
+};
+
+=head2 clean_html
+
+Clean a html string so it is suitable for display
+
+Takes a HTML string and returns a "cleaned" HTML string.
+
+=cut
+
+around 'clean_html' => sub {
+ my $orig = shift;
+ my ($self, $html) = @_;
+
+ $html = $self->$orig($html);
+
+ # hack to remove a particularly nasty piece of blogspot HTML
+ $html =~ s|<div align="justify"></div>||g;
+
+ # We don't want to remove the style attribute completely,
+ # but a clear: both will screw our layout up...
+ $html =~ s/style="(.*?)clear:(.*?);/style="$1/;
+
+ return $html;
+};
+
+=head2 feeds
+
+Feeds are built from the Schema::IronMan database overriding the internal
+defaults of utilising feeds specified in either the configuration file or as
+configuration options when creating the Perlanet object.
+
+=cut
+
+override 'feeds' => sub {
+ my $self = shift;
+ return [ map {
+ Perlanet::Feed->new(
+ id => $_->id,
+ url => $_->url || $_->link,
+ website => $_->link || $_->url,
+ title => $_->title,
+ author => $_->owner,
+ );
+ } $self->schema->resultset('Feed')->all ];
+};
+
+=head2 render
+
+Given a Perlanet::Entry object, store the entry as a post in the
+Schema::IronMan database
+
+=cut
+
+override 'render' => sub {
+ my $self = shift;
+ my $post = shift;
+
+ my $posts = $post->entries;
+
+ foreach my $post (@{$posts}) {
+
+ # Set the summary text to the summary or body if not supplied
+ # This should probably be in config rather than hard coded.
+ my $summary = $post->_entry->summary->body || $post->_entry->content->body;
+
+# my $truncated = eval { $self->truncator->truncate($summary) };
+# if ($@) {
+# warn "Truncate failed: $@";
+# $truncated = $summary;
+# }
+#
+# $summary = $truncated;
+
+ # Can't store a post if we can't work out the URL to link to it.
+ unless(defined($post->_entry->link)) {
+ print("ERROR. Can't deal with lack of URL returned from XML::Feed::Entry for feed '" . $post->feed->url . "'\n");
+ next;
+ }
+
+ # Get the entry tags
+ my @tags = $post->_entry->category;
+
+ try {
+ # Do that whole insert thing...
+ $self->schema->resultset('Post')->create( {
+ feed_id => $post->feed->id,
+ author => $post->_entry->author || $post->feed->title,
+ tags => join(",", @tags),
+ url => $post->_entry->link,
+ title => $post->_entry->title,
+ posted_on => $post->_entry->issued || DateTime->now,
+ summary => $summary,
+ summary_filtered => $self->clean($summary),
+ body => $post->_entry->content->body,
+ body_filtered => $self->clean($post->_entry->content->body),
+ } );
+ }
+
+ catch {
+ Carp::cluck("ERROR: $_\n");
+ Carp::cluck("ERROR: Post is:\n" . Dumper($post) . "\n");
+ Carp::cluck("ERROR: Link URL is '" . $post->_entry->link . "'\n");
+ };
+ }
+};
+
+
+
+
+
+
+
+=head2 schema
+
+IronMan::Schema object must be passed in at creation time
+
+=cut
+
+has 'schema' => (
+ isa => 'IronMan::Schema',
+ is => 'ro',
+ required => 1,
+);
+
+
+=head2 select_entries
+
+The select entries function takes an array of Perlanet::Feed objects and
+filters it to remove duplicates. The non-duplicated feed entries are then
+returned to the caller as an array of Perlanet::Entry objects.
+
+ my $perlanet_entries = select_entries( @{ $perlanet_feeds });
+
+=cut
+
+override 'select_entries' => sub {
+ my ($self, @feeds) = @_;
+
+ # Perlanet::Feed objects to return
+ my @feed_entries;
+
+ # Iterate over the feeds working on them.
+ for my $feed (@feeds) {
+
+ # Fetch the XML::Feed:Entry objects from the Perlanet::Feed object
+ my @entries = $feed->_xml_feed->entries;
+
+ # Iterate over the XML::Feed::Entry objects
+ foreach my $xml_entry (@entries) {
+
+ # Problem with XML::Feed's conversion of RSS to Atom
+ if ($xml_entry->issued && ! $xml_entry->modified) {
+ $xml_entry->modified($xml_entry->issued);
+ }
+
+ # Always set category to something
+ unless(defined($xml_entry->category)) {
+ $xml_entry->category('');
+ }
+
+ #print(Dumper($xml_entry->tags));
+ #print(Dumper($xml_entry->category));
+
+ # Filter on keywords. This fails for HTML encoded languages.
+ # See http://onperl.ru/onperl/atom.xml for examples
+ # specifically http://onperl.ru/onperl/2010/02/post.html
+ # FIXME
+ unless($self->_filter_entry_on_keywords($xml_entry)) {
+ #print("Skipping due to no keyword match for '" . $xml_entry->link . "'\n");
+ next;
+ }
+
+ # De-duplicate
+ unless($self->_filter_entry_for_duplicate($xml_entry)) {
+ #print("Skipping due to duplicate match for '" . $xml_entry->link . "'\n");
+ next;
+ }
+
+ # Create a Perlanet::Entry object from the XML data retrieved
+ my $entry = Perlanet::Entry->new(
+ _entry => $xml_entry,
+ feed => $feed
+ );
+
+ push @feed_entries, $entry;
+ }
+ }
+
+ return @feed_entries;
+};
+
+has 'truncator' => (
+ is => 'rw',
+ lazy_build => 1
+);
+
+=head2 _build_truncator
+
+Construct a HTML::Truncator object for truncating posts
+
+=cut
+
+sub _build_truncator {
+ my $self = shift;
+ my $html_truncate = HTML::Truncate->new(repair=>1);
+ $html_truncate->chars(250);
+ $html_truncate->ellipsis(" [...]");
+
+ return $html_truncate;
+}
+
+=head2 _filter_entry_for_duplicate
+
+Test to see if the supplied XML::Feed::Entry passes the configured filters.
+
+Return 1 for a good entry and 0 for a bad entry.
+
+=cut
+
+sub _filter_entry_for_duplicate {
+ my $self = shift;
+ my $xml_entry = shift;
+
+ my $count = $self->schema->resultset('Post')->search(
+ { url => $xml_entry->link }
+ )->count;
+
+ if($count > 0) {
+ #print("Duplicate post found for url '" . $xml_entry->link . "'\n");
+ return 0;
+ }
+
+ #print("Post at url '" . $xml_entry->link . "' is new\n");
+
+ return 1;
+}
+
+=head2 _filter_entry_on_keywords
+
+Test to see if the supplied XML::Feed::Entry passes the configured filters.
+
+Return 1 for a good entry and 0 for a bad entry.
+
+THIS FUNCTION IS BROKEN. SEE THE NOTES AT IT'S CALL.
+
+=cut
+
+sub _filter_entry_on_keywords {
+ my $self = shift;
+ my $xml_entry = shift;
+
+ # print("Called to keyword filter url '" . $xml_entry->link . "'\n");
+
+ # If no filter is defined, then we pass.
+ unless(defined($self->{cfg}->{filter}->{keywords})) {
+ print("No filter defined so skipping this check.\n");
+ return 1;
+ }
+
+ my $filters = $self->{cfg}->{filter}->{keywords};
+
+ # Iterate through the defined filters checking them
+ foreach my $filter (@ { $filters } ) {
+
+ #print("Checking for filter '$filter'\n");
+
+ # If tags have been defined, check them.
+ if(defined($xml_entry->tags)) {
+ if(grep(/$filter/i, $xml_entry->tags)) {
+ return 1;
+ }
+ }
+
+ # Check the title if defined
+ if(defined($xml_entry->title)) {
+ if(grep(/$filter/i, $xml_entry->title)) {
+ return 1;
+ }
+ }
+
+ # Check the body if defined
+ if(defined($xml_entry->content->body)) {
+ if($xml_entry->content->body =~ m/$filter/i) {
+ return 1;
+ }
+ }
+ }
+
+ # We got to the end and we didn't get a match. Fail.
+ return 0;
+}
+
+=head1 AUTHOR
+
+Oliver Charles (aCiD2) <oliver.g.charles at googlemail.com>
+Matt Troutt (mst), <mst at shadowcat.co.uk>
+Ian Norton (idn), <i.d.norton at gmail.com>
+
+=head1 SEE ALSO
+
+IronMan::Schema
+Perlanet
+
+=head1 COPYRIGHT AND LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+=cut
+
+1;
Added: ironman/Perlanet-IronMan/branches/dev/t/01_basic.t
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/t/01_basic.t (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/t/01_basic.t 2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,9 @@
+use Test::More;
+use_ok('Perlanet::IronMan');
+
+my $schema = IronMan::Schema->connect("dbi:SQLite:subscriptions.db");
+
+ok(my $p = Perlanet::IronMan->new({ schema => $schema }));
+isa_ok($p, 'Perlanet');
+
+done_testing();
Added: ironman/Perlanet-IronMan/branches/dev/t/02_read_feed_list_from_db.t
===================================================================
--- ironman/Perlanet-IronMan/branches/dev/t/02_read_feed_list_from_db.t (rev 0)
+++ ironman/Perlanet-IronMan/branches/dev/t/02_read_feed_list_from_db.t 2012-06-30 17:35:05 UTC (rev 9912)
@@ -0,0 +1,16 @@
+use Test::More;
+use_ok('Perlanet::IronMan');
+
+my $schema = IronMan::Schema->connect("dbi:SQLite:subscriptions.db");
+
+ok(my $p = Perlanet::IronMan->new({ schema => $schema }));
+isa_ok($p, 'Perlanet');
+
+#use Data::Dumper;
+#print(Dumper($p->feeds));
+
+isa_ok($p->feeds, 'ARRAY');
+
+$p->run();
+
+done_testing();
More information about the Bast-commits
mailing list