[Bast-commits] r8011 - in ironman/IronMan-Web: lib/IronMan
lib/IronMan/Web/Controller root script
castaway at dev.catalyst.perl.org
castaway at dev.catalyst.perl.org
Wed Dec 2 21:13:02 GMT 2009
Author: castaway
Date: 2009-12-02 21:13:02 +0000 (Wed, 02 Dec 2009)
New Revision: 8011
Modified:
ironman/IronMan-Web/lib/IronMan/Feeds.pm
ironman/IronMan-Web/lib/IronMan/Web/Controller/Root.pm
ironman/IronMan-Web/root/new_feed.tt
ironman/IronMan-Web/script/add_feed.pl
Log:
Update to ignore/throw away non-rss entries
Modified: ironman/IronMan-Web/lib/IronMan/Feeds.pm
===================================================================
--- ironman/IronMan-Web/lib/IronMan/Feeds.pm 2009-12-02 16:57:17 UTC (rev 8010)
+++ ironman/IronMan-Web/lib/IronMan/Feeds.pm 2009-12-02 21:13:02 UTC (rev 8011)
@@ -5,33 +5,54 @@
use IronMan::Schema;
use Data::UUID;
-use LWP::Simple;
+#use LWP::Simple;
+use LWP::UserAgent;
+use XML::Feed;
sub verify_feed_data {
my ($title, $url, $email, $email_conf, $errors) = @_;
if(!$url and !$email and !$title) {
- return 0;
+ return;
}
## Submitted form
- $errors = [];
if($email ne $email_conf || !$email) {
push @$errors, 'Email does not match';
}
# $c->log->debug("Getting feed");
- my $feed_xml = get($url);
+
+ ## URL must a) exist, b) not timeout after 15sec, c) be an atom/rss feed
+ my $ua = LWP::UserAgent->new();
+ $ua->timeout(15);
+ my $response = $ua->get($url);
+ my $feed_xml = $response->content();
+
# $c->log->debug("Got feed");
- if(!$url || !defined $feed_xml) {
+ if(!$url || !$response->is_success || !defined $feed_xml) {
push @$errors, "Cannot fetch feed: $url";
+ push @$errors, "GET failed with : " . $response->status_line if($response);
+ } elsif($response->header('Content-Type') =~ m{text/html}) {
+## Feed::Find is a miserable failure.
+## scalar ref to string containing html, despite what the docs say!
+ my @feeds = Feed::Find->find_in_html(\$response->decoded_content, $url);
+ if(!@feeds) {
+ push @$errors, "No feeds found in HTML page: $url";
+ } else {
+ $url = $feeds[0];
+ }
+ } elsif($response->header('Content-Type') !~ m{\Qapplication/atom+xml\E|\Qapplication/rss+xml\E|\Qapplication/rdf+xml\E|\Qapplication/xml\E}) {
+ push @$errors, "URL $url is not an Atom or RSS feed";
}
+
if(!$title) {
push @$errors, 'Missing title';
}
+ print STDERR "Errors: ", Data::Dumper::Dumper($errors);
return 0 if(@$errors);
- return 1;
+ return { feed => $url };
}
1;
Modified: ironman/IronMan-Web/lib/IronMan/Web/Controller/Root.pm
===================================================================
--- ironman/IronMan-Web/lib/IronMan/Web/Controller/Root.pm 2009-12-02 16:57:17 UTC (rev 8010)
+++ ironman/IronMan-Web/lib/IronMan/Web/Controller/Root.pm 2009-12-02 21:13:02 UTC (rev 8011)
@@ -2,8 +2,8 @@
use strict;
use warnings;
-use LWP::Simple;
use Data::UUID;
+use IronMan::Feeds;
use Email::Valid;
use base 'Catalyst::Controller';
@@ -46,48 +46,31 @@
$c->stash( params => $c->req->params);
- if(!$url and !$email and !$title) {
- return;
- }
-
- ## Submitted form
+ return 1 if(!%{ $c->req->params });
+
my $errors = [];
- if($email ne $email_conf || !$email) {
- push @$errors, 'Email does not match';
- }
- $c->log->debug("Getting feed");
- my $feed_xml = get($url);
- $c->log->debug("Got feed");
- if(!$url || !defined $feed_xml) {
- push @$errors, "Cannot fetch feed: $url";
- }
- if(!$title) {
- push @$errors, 'Missing title';
- }
-
- if(@$errors) {
+ my $feed = IronMan::Feeds::verify_feed_data($title, $url, $email, $email_conf, $errors);
+ if(!$feed || @$errors) {
+ $c->log->_dump($errors);
$c->stash( errors => $errors);
return 1;
}
-
+
$c->log->debug("input ok");
- ## Insert with new UUID
- my $uuids = Data::UUID->new;
- my $fdb = $c->model('FeedDB::Feed')->find_or_new
- ({
- id => $uuids->create_str,
- url => $url,
- title => $title,
- owner => $email,
- },
- { key => 'url' }
- );
- if($fdb->in_storage) {
+
+ ## verify_feed_data attempts to find the first feed in the given page, if it is not a feed
+ $url = $feed->{feed};
+ $c->stash(feed_url => $url);
+
+ my ($res, $fdb) = $c->model('FeedDB::Feed')->add_new_blog(title => $title,
+ url => $url,
+ email => $email);
+
+ if(!$res) {
$c->stash(existed => 1);
$c->log->debug("$url already exists, id: " . $fdb->id);
return 1;
}
- $fdb->insert;
$c->stash(inserted => $fdb->id);
}
Modified: ironman/IronMan-Web/root/new_feed.tt
===================================================================
--- ironman/IronMan-Web/root/new_feed.tt 2009-12-02 16:57:17 UTC (rev 8010)
+++ ironman/IronMan-Web/root/new_feed.tt 2009-12-02 21:13:02 UTC (rev 8011)
@@ -53,7 +53,7 @@
[% END %]
[% IF inserted %]
<p>
- [% params.url %] is now signed up to <a href="ironman.enlightenedperl.org">Ironman</a>. Please wait an hour for the first data fetch.
+ [% feed_url %] is now signed up to <a href="ironman.enlightenedperl.org">Ironman</a>. Please wait an hour for the first data fetch.
</p>
[% END %]
[% FOREACH error = errors %]
@@ -61,7 +61,7 @@
[% END %]
<hr/>
<form method="post" action="[% c.uri_for('/new_feed') %]">
- <label>Blog feed URL (atom prefered)<input type="text" name="url" value="[% params.url %]"></label><br>
+ <label>Blog feed URL (atom preferred)<input type="text" name="url" value="[% params.url %]"></label><br>
<label>Your Name ("Firstname Lastname (nick)" prefered)<input type="text" name="title" value="[% params.title %]"></label><br>
<label>Email <input type="text" name="email" value="[% params.email %]"></label><br>
<label>Email (confirmation)<input type="text" name="email_2"></label><br>
@@ -76,7 +76,7 @@
<a href="http://bobtfish.livejournal.com/">t0m</a>,
<a href="http://www.basirat.com/">mohsen</a>,
<a href="http://www.perlmonks.org/?node=theorbtwo">theorbtwo</a>,
- <a href="http://www.shadowcat.co.uk">epitaph</a></em>
+ <a href="http://www.shadowcat.co.uk">epitaph</a></em>,
<a href="http://www.shadowcat.co.uk">mdk</a></em>
<br/>Hosted by: <em><a href="http://www.shadowcat.co.uk">Shadowcat Systems</a></em>
<br/>The code repository can be <em><a href="http://dev.catalyst.perl.org/repos/bast/ironman">accessed here</a></em>
Modified: ironman/IronMan-Web/script/add_feed.pl
===================================================================
--- ironman/IronMan-Web/script/add_feed.pl 2009-12-02 16:57:17 UTC (rev 8010)
+++ ironman/IronMan-Web/script/add_feed.pl 2009-12-02 21:13:02 UTC (rev 8011)
@@ -35,7 +35,10 @@
my $schema = IronMan::Schema->connect(@$dsn);
- return $schema->resultset('Feed')->add_new_blog($title, $url, $email);
+ return $schema->resultset('Feed')->add_new_blog(title => $title,
+ url => $url,
+ email => $email
+ );
}
sub collect_data {
More information about the Bast-commits
mailing list