[Bast-commits] r9950 - in ironman/Perlanet-IronMan/tags: .
Perlanet-IronMan-0_01_02/lib/Perlanet
idn at dev.catalyst.perl.org
idn at dev.catalyst.perl.org
Sat Sep 22 11:13:43 GMT 2012
Author: idn
Date: 2012-09-22 11:13:43 +0000 (Sat, 22 Sep 2012)
New Revision: 9950
Added:
ironman/Perlanet-IronMan/tags/Perlanet-IronMan-0_01_02/
ironman/Perlanet-IronMan/tags/Perlanet-IronMan-0_01_02/lib/Perlanet/IronMan.pm
Removed:
ironman/Perlanet-IronMan/tags/Perlanet-IronMan-0_01_02/lib/Perlanet/IronMan.pm
Log:
Copy before I nuke it.
Deleted: ironman/Perlanet-IronMan/tags/Perlanet-IronMan-0_01_02/lib/Perlanet/IronMan.pm
===================================================================
--- ironman/Perlanet-IronMan/branches/ironboy/lib/Perlanet/IronMan.pm 2012-07-22 22:48:02 UTC (rev 9942)
+++ ironman/Perlanet-IronMan/tags/Perlanet-IronMan-0_01_02/lib/Perlanet/IronMan.pm 2012-09-22 11:13:43 UTC (rev 9950)
@@ -1,412 +0,0 @@
-package Perlanet::IronMan;
-
-use 5.8.0;
-use strict;
-use warnings;
-
-use Moose;
-use IronMan::Schema;
-use HTML::Truncate;
-use Perlanet::Feed;
-use Data::Dumper;
-use Try::Tiny;
-use Carp;
-
-extends 'Perlanet';
-use Perlanet::Entry;
-
-our $VERSION = '0.01_01';
-
-=head1 NAME
-
-Perlanet::IronMan
-
-This module extends Perlanet for the specific requirements of the Enlightened
-Perl Organisation IronMan project.
-
-=head1 SYNOPSIS
-
-
-=head1 DESCRIPTION
-
-This module uses an IronMan::Schema database to define feeds, collect the feeds
-and then store them back into the IronMan::Schema database.
-
-=cut
-
-# This is some kind of Moose magic that I don't understand.... I think this
-# means tha schema attribute of this object is built using the _build_schema
-# method when the schema attribute is first used
-
-has 'schema' => (
- is => 'rw',
- lazy_build => 1,
-);
-
-has 'truncator' => (
- is => 'rw',
- lazy_build => 1
-);
-
-sub _build_scrubber {
- my $self = shift;
-
- my %scrub_rules = (
- img => {
- src => qr{^http://}, # only URL with http://
- alt => 1, # alt attributes allowed
- '*' => 0, # deny all others
- },
- style => 0,
- script => 0,
- );
-
- # Definitions for HTML::Scrub
- my %scrub_def = (
- '*' => 1, # default rule, allow all attributes
- 'href' => qr{^(?!(?:java)?script)}i,
- 'src' => qr{^(?!(?:java)?script)}i,
- 'cite' => '(?i-xsm:^(?!(?:java)?script))',
- 'language' => 0,
- 'name' => 1, # could be sneaky, but hey ;)
- 'onblur' => 0,
- 'onchange' => 0,
- 'onclick' => 0,
- 'ondblclick' => 0,
- 'onerror' => 0,
- 'onfocus' => 0,
- 'onkeydown' => 0,
- 'onkeypress' => 0,
- 'onkeyup' => 0,
- 'onload' => 0,
- 'onmousedown' => 0,
- 'onmousemove' => 0,
- 'onmouseout' => 0,
- 'onmouseover' => 0,
- 'onmouseup' => 0,
- 'onreset' => 0,
- 'onselect' => 0,
- 'onsubmit' => 0,
- 'onunload' => 0,
- 'src' => 0,
- 'type' => 0,
- 'style' => 0,
- );
-
- my $scrub = HTML::Scrubber->new;
- $scrub->rules(%scrub_rules);
- $scrub->default(1, \%scrub_def);
-
- return $scrub;
-}
-
-=head2 select_entries
-
-The select entries function takes an array of Perlanet::Feed objects and
-filters it to remove duplicates. The non-duplicated feed entries are then
-returned to the caller as an array of Perlanet::Entry objects.
-
- my $perlanet_entries = select_entries( @{ $perlanet_feeds });
-
-=cut
-
-override 'select_entries' => sub {
- my ($self, @feeds) = @_;
-
- # Perlanet::Feed objects to return
- my @feed_entries;
-
- # Iterate over the feeds working on them.
- for my $feed (@feeds) {
-
- # Fetch the XML::Feed:Entry objects from the Perlanet::Feed object
- my @entries = $feed->_xml_feed->entries;
-
- # Iterate over the XML::Feed::Entry objects
- foreach my $xml_entry (@entries) {
-
- # Problem with XML::Feed's conversion of RSS to Atom
- if ($xml_entry->issued && ! $xml_entry->modified) {
- $xml_entry->modified($xml_entry->issued);
- }
-
- # Always set category to something
- unless(defined($xml_entry->category)) {
- $xml_entry->category('');
- }
-
- #print(Dumper($xml_entry->tags));
- #print(Dumper($xml_entry->category));
-
- # Filter on keywords. This fails for HTML encoded languages.
- # See http://onperl.ru/onperl/atom.xml for examples
- # specifically http://onperl.ru/onperl/2010/02/post.html
- # FIXME
- unless($self->_filter_entry_on_keywords($xml_entry)) {
- #print("Skipping due to no keyword match for '" . $xml_entry->link . "'\n");
- next;
- }
-
- # De-duplicate
- unless($self->_filter_entry_for_duplicate($xml_entry)) {
- #print("Skipping due to duplicate match for '" . $xml_entry->link . "'\n");
- next;
- }
-
- # Create a Perlanet::Entry object from the XML data retrieved
- my $entry = Perlanet::Entry->new(
- _entry => $xml_entry,
- feed => $feed
- );
-
- push @feed_entries, $entry;
- }
- }
-
- return @feed_entries;
-};
-
-=head2 render
-
-Given a Perlanet::Entry object, store the entry as a post in the
-Schema::IronMan database
-
-=cut
-
-override 'render' => sub {
- my $self = shift;
- my $post = shift;
-
- my $posts = $post->entries;
-
- #print(Dumper($posts));
- #exit;
-
- foreach my $post (@{$posts}) {
-
- # Set the summary text to the summary or body if not supplied
- # This should probably be in config rather than hard coded.
- my $summary = $post->_entry->summary->body || $post->_entry->content->body;
-
-
- my $truncated = eval { $self->truncator->truncate($summary) };
- if ($@) {
- warn "Truncate failed: $@";
- $truncated = $summary;
- }
-
- $summary = $truncated;
-
- #print(Dumper($post));
- #exit;
-
- # Can't store a post if we can't work out the URL to link to it.
- unless(defined($post->_entry->link)) {
- print("ERROR. Can't deal with lack of URL returned from XML::Feed::Entry for feed '" . $post->feed->url . "'\n");
- next;
- }
-
- # Get the entry tags
- my @tags = $post->_entry->category;
-
- try {
- # Do that whole insert thing...
- $self->schema->resultset('Post')->create( {
- feed_id => $post->feed->id,
- author => $post->_entry->author || $post->feed->title,
- tags => join(",", @tags),
- url => $post->_entry->link,
- title => $post->_entry->title,
- posted_on => $post->_entry->issued || DateTime->now,
- summary => $summary,
- summary_filtered => $self->clean($summary),
- body => $post->_entry->content->body,
- body_filtered => $self->clean($post->_entry->content->body),
- } );
- }
-
- catch {
- Carp::cluck("ERROR: $_\n");
- Carp::cluck("ERROR: Post is:\n" . Dumper($post) . "\n");
- Carp::cluck("ERROR: Link URL is '" . $post->_entry->link . "'\n");
- };
-
- #print(Dumper($post));
-
- }
-};
-
-=head2 save
-
-Save is not required within the context of this module so we override it.
-
-=cut
-
-override 'save' => sub { };
-
-=head2 _build_feeds
-
-Feeds are built from the Schema::IronMan database overriding the internal
-defaults of utilising feeds specified in either the configuration file or as
-configuration options when creating the Perlanet object.
-
-=cut
-
-override '_build_feeds' => sub {
- my $self = shift;
- return [ map {
- Perlanet::Feed->new(
- id => $_->id,
- url => $_->url || $_->link,
- website => $_->link || $_->url,
- title => $_->title,
- author => $_->owner,
- );
- } $self->schema->resultset('Feed')->all ];
-};
-
-=head2 _build_schema
-
-Build and return a schema object the first time that the schema attribute of
-this object is accessed.
-
-=cut
-
-sub _build_schema {
- my $self = shift;
- return IronMan::Schema->connect(
- $self->cfg->{db}{dsn},
- $self->cfg->{db}{username},
- $self->cfg->{db}{password},
- );
-}
-
-=head2 _build_truncator
-
-Construct a HTML::Truncator object for truncating posts
-
-=cut
-
-sub _build_truncator {
- my $self = shift;
- my $html_truncate = HTML::Truncate->new(repair=>1);
- $html_truncate->chars(250);
- $html_truncate->ellipsis(" [...]");
-
- return $html_truncate;
-}
-
-=head2 _filter_entry_for_duplicate
-
-Test to see if the supplied XML::Feed::Entry passes the configured filters.
-
-Return 1 for a good entry and 0 for a bad entry.
-
-=cut
-
-sub _filter_entry_for_duplicate {
- my $self = shift;
- my $xml_entry = shift;
-
- my $count = $self->schema->resultset('Post')->search(
- { url => $xml_entry->link }
- )->count;
-
- if($count > 0) {
- #print("Duplicate post found for url '" . $xml_entry->link . "'\n");
- return 0;
- }
-
- #print("Post at url '" . $xml_entry->link . "' is new\n");
-
- return 1;
-}
-
-=head2 _filter_entry_on_keywords
-
-Test to see if the supplied XML::Feed::Entry passes the configured filters.
-
-Return 1 for a good entry and 0 for a bad entry.
-
-THIS FUNCTION IS BROKEN. SEE THE NOTES AT IT'S CALL.
-
-=cut
-
-sub _filter_entry_on_keywords {
- my $self = shift;
- my $xml_entry = shift;
-
- # print("Called to keyword filter url '" . $xml_entry->link . "'\n");
-
- # If no filter is defined, then we pass.
- unless(defined($self->{cfg}->{filter}->{keywords})) {
- print("No filter defined so skipping this check.\n");
- return 1;
- }
-
- my $filters = $self->{cfg}->{filter}->{keywords};
-
- # Iterate through the defined filters checking them
- foreach my $filter (@ { $filters } ) {
-
- #print("Checking for filter '$filter'\n");
-
- # If tags have been defined, check them.
- if(defined($xml_entry->tags)) {
- if(grep(/$filter/i, $xml_entry->tags)) {
- return 1;
- }
- }
-
- # Check the title if defined
- if(defined($xml_entry->title)) {
- if(grep(/$filter/i, $xml_entry->title)) {
- return 1;
- }
- }
-
- # Check the body if defined
- if(defined($xml_entry->content->body)) {
- if($xml_entry->content->body =~ m/$filter/i) {
- return 1;
- }
- }
- }
-
- # We got to the end and we didn't get a match. Fail.
- return 0;
-}
-
-# We don't want to remove the style attribute completely, but a clear: both will
-# screw our layout up...
-around 'clean' => sub {
- my $orig = shift;
- my ($self, $input) = @_;
-
- my $output = $self->$orig($input);
- $output =~ s/style="(.*?)clear:(.*?);/style="$1/;
- return $output;
-};
-
-=head1 AUTHOR
-
-Oliver Charles (aCiD2) <oliver.g.charles at googlemail.com>
-Matt Troutt (mst), <mst at shadowcat.co.uk>
-Ian Norton (idn), <i.d.norton at gmail.com>
-
-=head1 SEE ALSO
-
-IronMan::Schema
-Perlanet
-
-=head1 COPYRIGHT AND LICENSE
-
-This library is free software; you can redistribute it and/or modify
-it under the same terms as Perl itself, either Perl version 5.10.0 or,
-at your option, any later version of Perl 5 you may have available.
-
-=cut
-
-no Moose;
-__PACKAGE__->meta->make_immutable;
-1;
Copied: ironman/Perlanet-IronMan/tags/Perlanet-IronMan-0_01_02/lib/Perlanet/IronMan.pm (from rev 9949, ironman/Perlanet-IronMan/branches/ironboy/lib/Perlanet/IronMan.pm)
===================================================================
--- ironman/Perlanet-IronMan/tags/Perlanet-IronMan-0_01_02/lib/Perlanet/IronMan.pm (rev 0)
+++ ironman/Perlanet-IronMan/tags/Perlanet-IronMan-0_01_02/lib/Perlanet/IronMan.pm 2012-09-22 11:13:43 UTC (rev 9950)
@@ -0,0 +1,403 @@
+package Perlanet::IronMan;
+
+use 5.8.0;
+use strict;
+use warnings;
+
+use Moose;
+use IronMan::Schema;
+use HTML::Truncate;
+use Perlanet::Feed;
+use Data::Dumper;
+use Try::Tiny;
+use Carp;
+
+extends 'Perlanet';
+use Perlanet::Entry;
+
+our $VERSION = '0.01_02';
+
+=head1 NAME
+
+Perlanet::IronMan
+
+This module extends Perlanet for the specific requirements of the Enlightened
+Perl Organisation IronMan project.
+
+=head1 SYNOPSIS
+
+
+=head1 DESCRIPTION
+
+This module uses an IronMan::Schema database to define feeds, collect the feeds
+and then store them back into the IronMan::Schema database.
+
+=cut
+
+# This is some kind of Moose magic that I don't understand.... I think this
+# means tha schema attribute of this object is built using the _build_schema
+# method when the schema attribute is first used
+
+has 'schema' => (
+ is => 'rw',
+ lazy_build => 1,
+);
+
+has 'truncator' => (
+ is => 'rw',
+ lazy_build => 1
+);
+
+sub _build_scrubber {
+ my $self = shift;
+
+ my %scrub_rules = (
+ img => {
+ src => qr{^http://}, # only URL with http://
+ alt => 1, # alt attributes allowed
+ '*' => 0, # deny all others
+ },
+ style => 0,
+ script => 0,
+ );
+
+ # Definitions for HTML::Scrub
+ my %scrub_def = (
+ '*' => 1, # default rule, allow all attributes
+ 'href' => qr{^(?!(?:java)?script)}i,
+ 'src' => qr{^(?!(?:java)?script)}i,
+ 'cite' => '(?i-xsm:^(?!(?:java)?script))',
+ 'language' => 0,
+ 'name' => 1, # could be sneaky, but hey ;)
+ 'onblur' => 0,
+ 'onchange' => 0,
+ 'onclick' => 0,
+ 'ondblclick' => 0,
+ 'onerror' => 0,
+ 'onfocus' => 0,
+ 'onkeydown' => 0,
+ 'onkeypress' => 0,
+ 'onkeyup' => 0,
+ 'onload' => 0,
+ 'onmousedown' => 0,
+ 'onmousemove' => 0,
+ 'onmouseout' => 0,
+ 'onmouseover' => 0,
+ 'onmouseup' => 0,
+ 'onreset' => 0,
+ 'onselect' => 0,
+ 'onsubmit' => 0,
+ 'onunload' => 0,
+ 'src' => 0,
+ 'type' => 0,
+ 'style' => 0,
+ );
+
+ my $scrub = HTML::Scrubber->new;
+ $scrub->rules(%scrub_rules);
+ $scrub->default(1, \%scrub_def);
+
+ return $scrub;
+}
+
+=head2 select_entries
+
+The select entries function takes an array of Perlanet::Feed objects and
+filters it to remove duplicates. The non-duplicated feed entries are then
+returned to the caller as an array of Perlanet::Entry objects.
+
+ my $perlanet_entries = select_entries( @{ $perlanet_feeds });
+
+=cut
+
+override 'select_entries' => sub {
+ my ($self, @feeds) = @_;
+
+ # Perlanet::Feed objects to return
+ my @feed_entries;
+
+ # Iterate over the feeds working on them.
+ for my $feed (@feeds) {
+
+ # Fetch the XML::Feed:Entry objects from the Perlanet::Feed object
+ my @entries = $feed->_xml_feed->entries;
+
+ # Iterate over the XML::Feed::Entry objects
+ foreach my $xml_entry (@entries) {
+
+ # Problem with XML::Feed's conversion of RSS to Atom
+ if ($xml_entry->issued && ! $xml_entry->modified) {
+ $xml_entry->modified($xml_entry->issued);
+ }
+
+ # Always set category to something
+ unless(defined($xml_entry->category)) {
+ $xml_entry->category('');
+ }
+
+ #print(Dumper($xml_entry->tags));
+ #print(Dumper($xml_entry->category));
+
+ # Filter on keywords. This fails for HTML encoded languages.
+ # See http://onperl.ru/onperl/atom.xml for examples
+ # specifically http://onperl.ru/onperl/2010/02/post.html
+ # FIXME
+ unless($self->_filter_entry_on_keywords($xml_entry)) {
+ #print("Skipping due to no keyword match for '" . $xml_entry->link . "'\n");
+ next;
+ }
+
+ # De-duplicate
+ unless($self->_filter_entry_for_duplicate($xml_entry)) {
+ #print("Skipping due to duplicate match for '" . $xml_entry->link . "'\n");
+ next;
+ }
+
+ # Create a Perlanet::Entry object from the XML data retrieved
+ my $entry = Perlanet::Entry->new(
+ _entry => $xml_entry,
+ feed => $feed
+ );
+
+ push @feed_entries, $entry;
+ }
+ }
+
+ return @feed_entries;
+};
+
+=head2 render
+
+Given a Perlanet::Entry object, store the entry as a post in the
+Schema::IronMan database
+
+=cut
+
+override 'render' => sub {
+ my $self = shift;
+ my $post = shift;
+
+ my $posts = $post->entries;
+
+ foreach my $post (@{$posts}) {
+
+ # Set the summary text to the summary or body if not supplied
+ # This should probably be in config rather than hard coded.
+ my $summary = $post->_entry->summary->body || $post->_entry->content->body;
+
+
+ my $truncated = eval { $self->truncator->truncate($summary) };
+ if ($@) {
+ warn "Truncate failed: $@";
+ $truncated = $summary;
+ }
+
+ $summary = $truncated;
+
+ # Can't store a post if we can't work out the URL to link to it.
+ unless(defined($post->_entry->link)) {
+ print("ERROR. Can't deal with lack of URL returned from XML::Feed::Entry for feed '" . $post->feed->url . "'\n");
+ next;
+ }
+
+ # Get the entry tags
+ my @tags = $post->_entry->category;
+
+ try {
+ # Do that whole insert thing...
+ $self->schema->resultset('Post')->create( {
+ feed_id => $post->feed->id,
+ author => $post->_entry->author || $post->feed->title,
+ tags => join(",", @tags),
+ url => $post->_entry->link,
+ title => $post->_entry->title,
+ posted_on => $post->_entry->issued || DateTime->now,
+ summary => $summary,
+ summary_filtered => $self->clean($summary),
+ body => $post->_entry->content->body,
+ body_filtered => $self->clean($post->_entry->content->body),
+ } );
+ }
+
+ catch {
+ Carp::cluck("ERROR: $_\n");
+ Carp::cluck("ERROR: Post is:\n" . Dumper($post) . "\n");
+ Carp::cluck("ERROR: Link URL is '" . $post->_entry->link . "'\n");
+ };
+ }
+};
+
+=head2 save
+
+Save is not required within the context of this module so we override it.
+
+=cut
+
+override 'save' => sub { };
+
+=head2 _build_feeds
+
+Feeds are built from the Schema::IronMan database overriding the internal
+defaults of utilising feeds specified in either the configuration file or as
+configuration options when creating the Perlanet object.
+
+=cut
+
+override '_build_feeds' => sub {
+ my $self = shift;
+ return [ map {
+ Perlanet::Feed->new(
+ id => $_->id,
+ url => $_->url || $_->link,
+ website => $_->link || $_->url,
+ title => $_->title,
+ author => $_->owner,
+ );
+ } $self->schema->resultset('Feed')->all ];
+};
+
+=head2 _build_schema
+
+Build and return a schema object the first time that the schema attribute of
+this object is accessed.
+
+=cut
+
+sub _build_schema {
+ my $self = shift;
+ return IronMan::Schema->connect(
+ $self->cfg->{db}{dsn},
+ $self->cfg->{db}{username},
+ $self->cfg->{db}{password},
+ );
+}
+
+=head2 _build_truncator
+
+Construct a HTML::Truncator object for truncating posts
+
+=cut
+
+sub _build_truncator {
+ my $self = shift;
+ my $html_truncate = HTML::Truncate->new(repair=>1);
+ $html_truncate->chars(250);
+ $html_truncate->ellipsis(" [...]");
+
+ return $html_truncate;
+}
+
+=head2 _filter_entry_for_duplicate
+
+Test to see if the supplied XML::Feed::Entry passes the configured filters.
+
+Return 1 for a good entry and 0 for a bad entry.
+
+=cut
+
+sub _filter_entry_for_duplicate {
+ my $self = shift;
+ my $xml_entry = shift;
+
+ my $count = $self->schema->resultset('Post')->search(
+ { url => $xml_entry->link }
+ )->count;
+
+ if($count > 0) {
+ #print("Duplicate post found for url '" . $xml_entry->link . "'\n");
+ return 0;
+ }
+
+ #print("Post at url '" . $xml_entry->link . "' is new\n");
+
+ return 1;
+}
+
+=head2 _filter_entry_on_keywords
+
+Test to see if the supplied XML::Feed::Entry passes the configured filters.
+
+Return 1 for a good entry and 0 for a bad entry.
+
+THIS FUNCTION IS BROKEN. SEE THE NOTES AT IT'S CALL.
+
+=cut
+
+sub _filter_entry_on_keywords {
+ my $self = shift;
+ my $xml_entry = shift;
+
+ # print("Called to keyword filter url '" . $xml_entry->link . "'\n");
+
+ # If no filter is defined, then we pass.
+ unless(defined($self->{cfg}->{filter}->{keywords})) {
+ print("No filter defined so skipping this check.\n");
+ return 1;
+ }
+
+ my $filters = $self->{cfg}->{filter}->{keywords};
+
+ # Iterate through the defined filters checking them
+ foreach my $filter (@ { $filters } ) {
+
+ #print("Checking for filter '$filter'\n");
+
+ # If tags have been defined, check them.
+ if(defined($xml_entry->tags)) {
+ if(grep(/\b$filter\b/i, $xml_entry->tags)) {
+ return 1;
+ }
+ }
+
+ # Check the title if defined
+ if(defined($xml_entry->title)) {
+ if(grep(/\b$filter\b/i, $xml_entry->title)) {
+ return 1;
+ }
+ }
+
+ # Check the body if defined
+ if(defined($xml_entry->content->body)) {
+ if($xml_entry->content->body =~ m/\b$filter\b/i) {
+ return 1;
+ }
+ }
+ }
+
+ # We got to the end and we didn't get a match. Fail.
+ return 0;
+}
+
+# We don't want to remove the style attribute completely, but a clear: both will
+# screw our layout up...
+around 'clean' => sub {
+ my $orig = shift;
+ my ($self, $input) = @_;
+
+ my $output = $self->$orig($input);
+ $output =~ s/style="(.*?)clear:(.*?);/style="$1/;
+ return $output;
+};
+
+=head1 AUTHOR
+
+Oliver Charles (aCiD2) <oliver.g.charles at googlemail.com>
+Matt Troutt (mst), <mst at shadowcat.co.uk>
+Ian Norton (idn), <i.d.norton at gmail.com>
+
+=head1 SEE ALSO
+
+IronMan::Schema
+Perlanet
+
+=head1 COPYRIGHT AND LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+=cut
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+1;
More information about the Bast-commits
mailing list