[Bast-commits] r8011 - in ironman/IronMan-Web: lib/IronMan lib/IronMan/Web/Controller root script

castaway at dev.catalyst.perl.org castaway at dev.catalyst.perl.org
Wed Dec 2 21:13:02 GMT 2009


Author: castaway
Date: 2009-12-02 21:13:02 +0000 (Wed, 02 Dec 2009)
New Revision: 8011

Modified:
   ironman/IronMan-Web/lib/IronMan/Feeds.pm
   ironman/IronMan-Web/lib/IronMan/Web/Controller/Root.pm
   ironman/IronMan-Web/root/new_feed.tt
   ironman/IronMan-Web/script/add_feed.pl
Log:
Update to ignore/throw away non-rss entries


Modified: ironman/IronMan-Web/lib/IronMan/Feeds.pm
===================================================================
--- ironman/IronMan-Web/lib/IronMan/Feeds.pm	2009-12-02 16:57:17 UTC (rev 8010)
+++ ironman/IronMan-Web/lib/IronMan/Feeds.pm	2009-12-02 21:13:02 UTC (rev 8011)
@@ -5,33 +5,54 @@
 
 use IronMan::Schema;
 use Data::UUID;
-use LWP::Simple;
+#use LWP::Simple;
+use LWP::UserAgent;
+use XML::Feed;
 
 sub verify_feed_data {
     my ($title, $url, $email, $email_conf, $errors) = @_;
 
     if(!$url and !$email and !$title) {
-        return 0;
+        return;
     }
     
     ## Submitted form
-    $errors = [];
     if($email ne $email_conf || !$email) {
         push @$errors, 'Email does not match';
     }
 #    $c->log->debug("Getting feed");
-    my $feed_xml = get($url);
+
+    ## URL must a) exist, b) not timeout after 15sec, c) be an atom/rss feed
+    my $ua = LWP::UserAgent->new();
+    $ua->timeout(15);
+    my $response = $ua->get($url);
+    my $feed_xml = $response->content();
+
 #    $c->log->debug("Got feed");
-    if(!$url || !defined $feed_xml) {
+    if(!$url || !$response->is_success || !defined $feed_xml) {
         push @$errors, "Cannot fetch feed: $url";
+        push @$errors, "GET failed with : " . $response->status_line if($response);
+    } elsif($response->header('Content-Type') =~ m{text/html}) {
+## Feed::Find is a miserable failure.
+## scalar ref to string containing html, despite what the docs say!
+        my @feeds = Feed::Find->find_in_html(\$response->decoded_content, $url);
+        if(!@feeds) {
+            push @$errors, "No feeds found in HTML page: $url";
+        } else {
+            $url = $feeds[0];
+        }
+    } elsif($response->header('Content-Type') !~ m{\Qapplication/atom+xml\E|\Qapplication/rss+xml\E|\Qapplication/rdf+xml\E|\Qapplication/xml\E}) {
+        push @$errors, "URL $url is not an Atom or RSS feed";
     }
+
     if(!$title) {
         push @$errors, 'Missing title';
     }
 
+    print STDERR "Errors: ", Data::Dumper::Dumper($errors);
     return 0 if(@$errors);
 
-    return 1;
+    return { feed => $url };
 }
 
 1;

Modified: ironman/IronMan-Web/lib/IronMan/Web/Controller/Root.pm
===================================================================
--- ironman/IronMan-Web/lib/IronMan/Web/Controller/Root.pm	2009-12-02 16:57:17 UTC (rev 8010)
+++ ironman/IronMan-Web/lib/IronMan/Web/Controller/Root.pm	2009-12-02 21:13:02 UTC (rev 8011)
@@ -2,8 +2,8 @@
 
 use strict;
 use warnings;
-use LWP::Simple;
 use Data::UUID;
+use IronMan::Feeds;
 use Email::Valid;
 use base 'Catalyst::Controller';
 
@@ -46,48 +46,31 @@
 
     $c->stash( params => $c->req->params);
 
-    if(!$url and !$email and !$title) {
-        return;
-    }
-    
-    ## Submitted form
+    return 1 if(!%{ $c->req->params });
+
     my $errors = [];
-    if($email ne $email_conf || !$email) {
-        push @$errors, 'Email does not match';
-    }
-    $c->log->debug("Getting feed");
-    my $feed_xml = get($url);
-    $c->log->debug("Got feed");
-    if(!$url || !defined $feed_xml) {
-        push @$errors, "Cannot fetch feed: $url";
-    }
-    if(!$title) {
-        push @$errors, 'Missing title';
-    }
-    
-    if(@$errors) {
+    my $feed = IronMan::Feeds::verify_feed_data($title, $url, $email, $email_conf, $errors);
+    if(!$feed || @$errors) {
+        $c->log->_dump($errors);
         $c->stash( errors => $errors);
         return 1;
     }
-    
+
     $c->log->debug("input ok");
-    ## Insert with new UUID
-    my $uuids = Data::UUID->new;
-    my $fdb = $c->model('FeedDB::Feed')->find_or_new
-      ({
-        id => $uuids->create_str,
-        url => $url,
-        title => $title,
-        owner => $email,
-       },
-       { key => 'url' }
-      );
-    if($fdb->in_storage) {
+
+    ## verify_feed_data attempts to find the first feed in the given page, if it is not a feed
+    $url = $feed->{feed};
+    $c->stash(feed_url => $url);
+    
+    my ($res, $fdb) = $c->model('FeedDB::Feed')->add_new_blog(title => $title, 
+                                                              url => $url, 
+                                                              email => $email);
+
+    if(!$res) {
         $c->stash(existed => 1);
         $c->log->debug("$url already exists, id: " . $fdb->id);
         return 1;
     }
-    $fdb->insert;
     
     $c->stash(inserted => $fdb->id);
 }

Modified: ironman/IronMan-Web/root/new_feed.tt
===================================================================
--- ironman/IronMan-Web/root/new_feed.tt	2009-12-02 16:57:17 UTC (rev 8010)
+++ ironman/IronMan-Web/root/new_feed.tt	2009-12-02 21:13:02 UTC (rev 8011)
@@ -53,7 +53,7 @@
      [% END %]
      [% IF inserted %]
        <p>
-       [% params.url %] is now signed up to <a href="ironman.enlightenedperl.org">Ironman</a>. Please wait an hour for the first data fetch.
+       [% feed_url %] is now signed up to <a href="ironman.enlightenedperl.org">Ironman</a>. Please wait an hour for the first data fetch.
        </p>
      [% END %]
      [% FOREACH error = errors %]
@@ -61,7 +61,7 @@
      [% END %]
      <hr/>
      <form method="post" action="[% c.uri_for('/new_feed') %]">
-       <label>Blog feed URL (atom prefered)<input type="text" name="url" value="[% params.url %]"></label><br>
+       <label>Blog feed URL (atom preferred)<input type="text" name="url" value="[% params.url %]"></label><br>
        <label>Your Name ("Firstname Lastname (nick)" prefered)<input type="text" name="title" value="[% params.title %]"></label><br>
        <label>Email <input type="text" name="email" value="[% params.email %]"></label><br>
        <label>Email (confirmation)<input type="text" name="email_2"></label><br>
@@ -76,7 +76,7 @@
      <a href="http://bobtfish.livejournal.com/">t0m</a>,
      <a href="http://www.basirat.com/">mohsen</a>, 
      <a href="http://www.perlmonks.org/?node=theorbtwo">theorbtwo</a>, 
-     <a href="http://www.shadowcat.co.uk">epitaph</a></em>
+     <a href="http://www.shadowcat.co.uk">epitaph</a></em>,
      <a href="http://www.shadowcat.co.uk">mdk</a></em>
     <br/>Hosted by: <em><a href="http://www.shadowcat.co.uk">Shadowcat Systems</a></em>
     <br/>The code repository can be <em><a href="http://dev.catalyst.perl.org/repos/bast/ironman">accessed here</a></em>

Modified: ironman/IronMan-Web/script/add_feed.pl
===================================================================
--- ironman/IronMan-Web/script/add_feed.pl	2009-12-02 16:57:17 UTC (rev 8010)
+++ ironman/IronMan-Web/script/add_feed.pl	2009-12-02 21:13:02 UTC (rev 8011)
@@ -35,7 +35,10 @@
 
     my $schema = IronMan::Schema->connect(@$dsn);
 
-    return $schema->resultset('Feed')->add_new_blog($title, $url, $email);
+    return $schema->resultset('Feed')->add_new_blog(title => $title, 
+                                                    url => $url, 
+                                                    email => $email
+        );
 }
 
 sub collect_data {




More information about the Bast-commits mailing list