[Catalyst-dev] Re: fixes to Advent calendar RSS and 3.pod

A. Pagaltzis pagaltzis at gmx.de
Fri Dec 8 22:37:12 GMT 2006

* Jonathan Rockway <jon at jrock.us> [2006-12-08 19:15]:
> A. Pagaltzis wrote:
> >* Jonathan Rockway <jon at jrock.us> [2006-12-03 12:40]:
> >>first revision converts RSS2 -> Atom,
> >
> >Uhm, why are you generating the long-deprecated Atom 0.3
> >format?
> Because XML::Feed only lets you specify "Atom" or "RSS".

Hmm, are you using an old version? According to the changelog,
the latest emits Atom 1.0 by default.

> In the latest svn version of the calendar, I switched to
> XML::Atom::SimpleFeed like I should have done originally :)

That’s another option. ;-)

> One irritation, though, is that you can't set the feed updated
> time after new(). Unfortunately, I don't know what time to use
> for updated until I iterate over the items and add them to the
> feed. (So for now, updated is set to the default.)

The API is kinda crappy; don’t blame me, wasn’t my design. ;-)


Looking at your code and the revision log I also see another
issue: API clients shouldn’t be forced to format timestamps on
their own. There should be shortcuts for just passing in
a timestamp and leaving formatting to the module.

/me scribbles notes

> >Further critique: right now, what I see is a completely
> >useless title (“Calendar entry for day $d.”) with the article
> >title being sent as the entry’s full content. What’s in
> >atom:content *isn’t* the entry content. It should be in
> >atom:summary at minimum. In fact, it should be in the title.
> Fixed. The title is the first paragraph, and the content is the
> full XHTML body of the POD. Also, the first author's name is
> extracted from an AUTHOR section. (I could parse out more
> metadata [e-mail and other authors], but I'm lazy so I didn't.
> Patches welcome.)

Attached. This version uses the first =HEAD1 as the title and the
first paragraph as the summary, and pulls out the email.

> >Who requested not to send fulltext and why? (If it’s
> >bandwidth: you can easily avoid 95% of the traffic if you add
> >support for conditional GET.
> Dunno, but I like fulltext and nobody on IRC objected this
> time. The svn version does fulltext; hopefully someone will
> push that to the site Real Soon Now.

OK, but without conditional GET, that’s going to be a bit of
a drain on bandwidth. I added support; see patch. This is the
first time I actually write such code myself; I hope it works.

> >Right now, the feed is pretty useless to me.
> Agreed, so now it's fixed :)

Much better already – thanks!

Aristotle Pagaltzis // <http://plasmasturm.org/>
Index: lib/CatalystAdvent/Controller/Calendar.pm
--- lib/CatalystAdvent/Controller/Calendar.pm	(revision 5730)
+++ lib/CatalystAdvent/Controller/Calendar.pm	(working copy)
@@ -10,6 +10,7 @@
 use File::stat;
 use XML::Atom::SimpleFeed;
 use POSIX qw(strftime);
+use List::Util qw(max);
 use CatalystAdvent::Pod;
 =head1 NAME
@@ -108,52 +109,72 @@
     $c->res->redirect( $c->uri_for('/') )
         unless ( -e $c->path_to( 'root', $year ) );
-    my $feed = XML::Atom::SimpleFeed->new( title => "Catalyst Advent Calendar $year Feed",
-					   link  => $c->req->base,
-					   link  => {rel => 'self',
-						     href => $c->uri_for("/rss/$year")
-						    },
-					   id    => $c->uri_for("/rss/$year"),
-					 );
     $c->stash->{year} = $year;
-    my ( $day, $entries ) = ( 24, 0 );
-    my $feed_mtime = 0;
-    while ( $day > 0 && $entries < 5 ) {
-        if ( -e ( my $file = $c->path_to( 'root', $year, "$day.pod" ) ) ) {
-            my $stat   = stat $file;
-            my $mtime  = $stat->mtime;
-            my $ctime  = $stat->ctime;
-	    $feed_mtime= $mtime > $feed_mtime ? $mtime : $feed_mtime;
-	    my $parser = CatalystAdvent::Pod->new(
-						  StringMode   => 1,
-						  FragmentOnly => 1,
-						  MakeIndex    => 0,
-						  TopLinks     => 0
-						 );
-	    $parser->parse_from_file("$file");
-	    $feed->add_entry( title    => $parser->first_paragraph,
-			      author   => $parser->author,
-			      content  => {type => 'xhtml', content => $parser->asString},
-			      link     => $c->uri_for("/$year/$day"),
-			      id       => $c->uri_for("/$year/$day"),
-			      published=> format_date( $ctime ),
-			      updated  => format_date( $mtime ),
-			    );
-            $entries++;
+    my $feed;
+    my @entry = reverse 1 .. 24;
+    shift @entry until -e $c->path_to( 'root', $year, "$entry[ 0 ].pod" );
+    pop @entry while @entry > 5;
+    my %path = map +{ $_ => $c->path_to( 'root', $year, "$_.pod" ) }, @entry;
+    my %stat = map +{ $_ => stat $path{ $_ } }, @entry;
+    my $latest_mtime = max map $_->mtime, values %stat;
+    my $last_mod = format_date_rfc822( $latest_mtime );
+    $c->res->header( 'Last-Modified' => $last_mod );
+    $c->res->header( 'ETag' => qq'"$last_mod"' );
+    $c->res->content_type( 'application/atom+xml' );
+    my $cond_date = $c->req->header( 'If-Modified-Since' );
+    my $cond_etag = $c->req->header( 'If-None-Match' );
+    if( $cond_date or $cond_etag ) {
+        # if both headers are present, both must match
+        my $do_send_304 = 1;
+        if( $cond_date ) { $do_send_304 = $cond_date eq $last_mod }
+        if( $cond_etag ) { $do_send_304 &&= $cond_etag eq qq'"$last_mod"' }
+        if( $do_send_304 ) {
+            $c->res->status( 304 );
+            return;
-        $day--;
-    $c->res->body( $feed->as_string);
-    $c->res->content_type('application/atom+xml');
-sub format_date {
-    return strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($_[0]));
+    my $feed = XML::Atom::SimpleFeed->new(
+        title   => "Catalyst Advent Calendar $year",
+        link    => $c->req->base,
+        link    => { rel => 'self', href => $c->uri_for("/rss/$year") },
+        id      => $c->uri_for("/rss/$year"),
+        updated => format_date_w3cdtf( $latest_mtime ),
+    );
+    for my $day ( @entry ) {
+        my $parser = CatalystAdvent::Pod->new(
+            StringMode   => 1,
+            FragmentOnly => 1,
+            MakeIndex    => 0,
+            TopLinks     => 0
+        );
+        $parser->parse_from_file( $path{ $day } );
+        $feed->add_entry(
+            title    => { type => 'text', content => $parser->title },
+            summary  => { type => 'text', content => $parser->summary },
+            content  => { type => 'xhtml', content => $parser->asString },
+            author   => { name => $parser->author, email => $parser->email },
+            link     => $c->uri_for( "/$year/$day" ),
+            id       => $c->uri_for( "/$year/$day" ),
+            published=> format_date_w3cdtf( $stat{ $day }->ctime ),
+            updated  => format_date_w3cdtf( $stat{ $day }->mtime ),
+        );
+    }
+    $c->res->body( $feed->as_string );
+sub format_date_w3cdtf { strftime '%Y-%m-%dT%H:%M:%SZ', gmtime $_[0] }
+sub format_date_rfc822 { strftime '%a, %d %b  %Y %H:%M:%S +0000', gmtime $_[0] }
 =head1 AUTHORS
 Brian Cassidy, <bricas at cpan.org>
Index: lib/CatalystAdvent/Pod.pm
--- lib/CatalystAdvent/Pod.pm	(revision 5730)
+++ lib/CatalystAdvent/Pod.pm	(working copy)
@@ -18,8 +18,8 @@
     $self->{_first_paragraph} ||= $text;
-	$text =~ /((?:[\w.]+\s+)+)/;
-	$self->{_author} = $1;
+	$text =~ /((?:[\w.]+\s+)+)/ and $self->{_author} = $1;
+	$text =~ /<([^<>@\s]+@[^<>\s]+)>/ and $self->{_email} = $1;
 	$self->{_in_author_block} = 0; # not anymore
@@ -29,14 +29,19 @@
 sub command {
     my $self = shift;
     my ($command, $paragraph, $pod_para) = @_;
+    $self->{_title} = $paragraph
+        if $command eq 'head1' and not defined $self->{_title};
     $self->{_in_author_block} = 1
-      if $paragraph =~ /AUTHOR/ && $command =~ /^head/;
+        if $command =~ /^head/ and $paragraph =~ /AUTHOR/;
     return $self->SUPER::command(@_);
-sub first_paragraph { return $_[0]->{_first_paragraph} }
-sub author { return $_[0]->{_author} };
+sub title   { $_[0]->{_title} }
+sub summary { $_[0]->{_first_paragraph} }
+sub author  { $_[0]->{_author} }
+sub email   { $_[0]->{_email} }

