位置情報を含むフィードの読み込み

http://plagger.org/trac/browser/branches/feature-geo
上記ブランチでの経緯度の対応に加え、高度も取り出せるようにしてみます。

GMLではのような属性が設定された場合に高度の表記が可能になるようですが、そこまでのチェック及び動作確認は今の所行っていません。悪しからず。

diff

http://github.com/mizar/plagger/commit/60b38bfa250355bfac5f445d6618d87979533999
※上のコミットではfile modeを幾つか間違えています、注意。

diff --git a/lib/Plagger/Entry.pm b/lib/Plagger/Entry.pm
index 81a4034..26be49a 100644
--- a/lib/Plagger/Entry.pm
+++ b/lib/Plagger/Entry.pm
@@ -10,6 +10,7 @@ use Digest::MD5;
 use DateTime::Format::Mail;
 use Storable;
 use Plagger::Util;
+use Plagger::Location;
 
 sub new {
     my $class = shift;
@@ -104,5 +105,26 @@ sub digest {
     Digest::MD5::md5_hex($data);
 }
 
+sub location {
+    my $self = shift;
+
+    if (@_ == 2) {
+        my $location = Plagger::Location->new;
+        $location->latitude($_[0]);
+        $location->longitude($_[1]);
+        $self->{location} = $location;
+    } elsif (@_ == 3) {
+        my $location = Plagger::Location->new;
+        $location->latitude($_[0]);
+        $location->longitude($_[1]);
+        $location->altitude($_[2]);
+        $self->{location} = $location;
+    } elsif (@_) {
+        $self->{location} = shift;
+    }
+
+    $self->{location};
+}
+
 1;
 
diff --git a/lib/Plagger/Location.pm b/lib/Plagger/Location.pm
new file mode 100644
index 0000000..c5ceec7
--- /dev/null
+++ b/lib/Plagger/Location.pm
@@ -0,0 +1,36 @@
+package Plagger::Location;
+use strict;
+use warnings;
+
+use base qw( Class::Accessor::Fast );
+__PACKAGE__->mk_accessors( qw( address ));
+
+# XXX add datum here?
+# For now latitude/longitude should be in WGS84
+
+sub latitude {
+    my $self = shift;
+    if (@_) {
+        $self->{latitude} = shift() + 0; # numify
+    }
+    $self->{latitude};
+}
+
+sub longitude {
+    my $self = shift;
+    if (@_) {
+        $self->{longitude} = shift() + 0; # numify
+    }
+    $self->{longitude};
+}
+
+sub altitude {
+    my $self = shift;
+    if (@_) {
+        $self->{altitude} = shift() + 0; # numify
+    }
+    $self->{altitude};
+}
+
+1;
+
diff --git a/lib/Plagger/Plugin/Namespace/Geo.pm b/lib/Plagger/Plugin/Namespace/Geo.pm
new file mode 100644
index 0000000..5252898
--- /dev/null
+++ b/lib/Plagger/Plugin/Namespace/Geo.pm
@@ -0,0 +1,67 @@
+package Plagger::Plugin::Namespace::Geo;
+use strict;
+use base qw( Plagger::Plugin );
+
+sub register {
+    my($self, $context) = @_;
+    $context->register_hook(
+        $self,
+        'aggregator.entry.fixup' => \&handle,
+    );
+}
+
+sub handle {
+    my($self, $context, $args) = @_;
+    my $geo_ns = "http://www.w3.org/2003/01/geo/wgs84_pos#";
+
+    my $entry = $args->{orig_entry}->{entry};
+
+    if (ref($entry) eq 'XML::Atom::Entry') {
+        my($lat, $long, $alt) = map $entry->get($geo_ns, $_), qw( lat long alt );
+        if (defined $lat && defined $long) {
+            if (defined $alt) {
+                $args->{entry}->location($lat, $long, $alt);
+            } else {
+                $args->{entry}->location($lat, $long);
+            }
+        }
+    }
+    elsif (ref($entry) eq 'HASH') {
+        my $geo = $entry->{$geo_ns} || {};
+        $geo = $geo->{Point}->{geo} if $geo->{Point};
+        if (defined($geo->{lat}) && defined($geo->{long})) {
+            if (defined($geo->{alt})) {
+                $args->{entry}->location($geo->{lat}, $geo->{long}, $geo->{alt});
+            } else {
+                $args->{entry}->location($geo->{lat}, $geo->{long});
+            }
+        }
+    }
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Plagger::Plugin::Namespace::Geo - Extract location using Geo RDF
+
+=head1 SYNOPSIS
+
+  - module: Namespace::Geo
+
+=head1 DESCRIPTION
+
+This plugin parses the Geo tagged feed extension and store the
+longitude and latitude coordinates in the entry's location object.
+
+=head1 AUTHOR
+
+Jean-Yves Stervinou
+Tatsuhiko Miyagawa
+
+=head1 SEE ALSO
+
+L<Plagger>
+
+=cut
diff --git a/lib/Plagger/Plugin/Namespace/GeoRSS.pm b/lib/Plagger/Plugin/Namespace/GeoRSS.pm
new file mode 100644
index 0000000..ee8bdd1
--- /dev/null
+++ b/lib/Plagger/Plugin/Namespace/GeoRSS.pm
@@ -0,0 +1,96 @@
+package Plagger::Plugin::Namespace::GeoRSS;
+use strict;
+use base qw( Plagger::Plugin );
+
+sub register {
+    my($self, $context) = @_;
+    $context->register_hook(
+        $self,
+        'aggregator.entry.fixup' => \&handle,
+    );
+}
+
+sub handle {
+    my($self, $context, $args) = @_;
+
+    my $georss = "http://www.georss.org/georss";
+    my $gml    = "http://www.opengis.net/gml";
+
+    my $entry = $args->{orig_entry}->{entry};
+
+    if (ref($entry) eq 'XML::Atom::Entry') {
+        if (my $point = $entry->get($georss, "point")) {
+            if (my $elev = $entry->get($georss, "elev")) {
+                $self->extract_point($args->{entry}, $point, $elev);
+            } else {
+                $self->extract_point($args->{entry}, $point);
+            }
+        }
+        # XXX HACK: get LibXML node using XML::Atom internal API
+        elsif (my @where = XML::Atom::Util::nodelist($entry->elem, $georss, "where")) {
+            my($p) = $where[0]->getElementsByTagName('gml:Point');
+            if ($p) {
+                $self->extract_point($args->{entry}, $p->textContent);
+            }
+        }
+    } elsif (ref($entry) eq 'HASH') {
+        if ($entry->{$georss}) {
+            if (my $point = $entry->{$georss}->{point}) {
+                if (my $elev = $entry->{$georss}->{elev}) {
+                    $self->extract_point($args->{entry}, $point, $elev);
+                } else {
+                    $self->extract_point($args->{entry}, $point);
+                }
+            }
+            elsif (my $where = $entry->{$georss}->{where}) {
+                if (my $pos = $where->{$gml}->{Point}->{$gml}->{pos}) {
+                    $self->extract_point($args->{entry}, $pos);
+                }
+            }
+        }
+    }
+}
+
+sub extract_point {
+    my($self, $entry, $point, $elev) = @_;
+    $point =~ s/^\s+|\s+$//g;
+    $elev =~ s/^\s+|\s+$//g;
+    my($lat, $lon, $alt) = split /\s+/, $point, 3;
+    if (length $lat && length $lon) {
+        if (length $elev) {
+            $entry->location($lat, $lon, $elev);
+        } elsif (length $alt) {
+            $entry->location($lat, $lon, $alt);
+        } else {
+            $entry->location($lat, $lon);
+        }
+    }
+}
+
+1;
+__END__
+
+=for stopwords GeoRSS GML
+
+=head1 NAME
+
+Plagger::Plugin::Namespace::GeoRSS - GeoRSS module extension
+
+=head1 SYNOPSIS
+
+  - module: Namespace::GeoRSS
+
+=head1 DESCRIPTION
+
+This plugin extracts Geo location information using GeoRSS
+extension. It supports both Simple and GML notation of location point.
+
+=head1 AUTHOR
+
+Tatsuhiko Miyagawa
+
+=head1 SEE ALSO
+
+L<Plagger>, L<http://www.georss.org/>
+
+=cut

実行例

USGS(アメリカ地質調査所)が公開している最近の世界の地震情報を読み込み、震源の位置情報をエントリのタグに追加するフィルタを掛ける例です。

test-usgs-georss.yaml
plugins:
  - module: Subscription::Config
    config:
      feed:
         - url: http://earthquake.usgs.gov/eqcenter/catalogs/7day-M5.xml
  - module: Namespace::GeoRSS
  - module: Aggregator::Simple
  - module: Filter::Rule
    rule:
      expression: |
        my $loc = $args->{entry}->location;
        if($loc){ unshift @{$args->{entry}->tags}, grep defined,
        sprintf("%s%.1f",($loc->latitude<0?"S":"N"),abs($loc->latitude)),
        sprintf("%s%.1f",($loc->longitude<0?"W":"E"),abs($loc->longitude)),
        (defined($loc->altitude)?sprintf("%.1fkm",.001*$loc->altitude):undef),
        }
        1;
  - module: Publish::Debug
~/plagger$ ./plagger -c ~/test-usgs-georss.yaml
(snip)
    bless( {
      'link' => '/eqcenter/recenteqsww/Quakes/us2009gxcb.php',
      'widgets' => [],
      'location' => bless( {
        'longitude' => '166.9331',
        'latitude' => '-13.4612',
        'altitude' => -106400
      }, 'Plagger::Location' ),
      'feed_link' => 'http://earthquake.usgs.gov/eqcenter/',
      'date' => bless( {
        'local_rd_secs' => 78206,
        'local_rd_days' => 733548,
        'rd_nanosecs' => 0,
        'locale' => $VAR1->{'entries'}[0]{'date'}{'locale'},
        'local_c' => {
          'hour' => 21,
          'second' => 26,
          'month' => 5,
          'quarter' => 2,
          'day_of_year' => 141,
          'day_of_quarter' => 51,
          'minute' => 43,
          'day' => 21,
          'day_of_week' => 4,
          'year' => 2009
        },
        'utc_rd_secs' => 78206,
        'formatter' => undef,
        'tz' => bless( {
          'name' => 'UTC'
        }, 'DateTime::TimeZone::UTC' ),
        'utc_year' => 2010,
        'utc_rd_days' => 733548,
        'offset_modifier' => 0
      }, 'Plagger::Date' ),
      'author' => undef,
      'meta' => {},
      'tags' => [
        'S13.5',
        'E166.9',
        '-106.4km',
        'Age'
      ],
      'body' => bless( {
        'type' => 'html',
        'data' => '<img src="http://earthquake.usgs.gov/images/globes/-15_165.jp
g" alt="13.461&#176;S 166.933&#176;E" align="left" hspace="20" /><p>Thursday, Ma
y 21, 2009 21:43:26 UTC<br>Friday, May 22, 2009 08:43:26 AM at epicenter</p><p><
strong>Depth</strong>: 106.40 km (66.11 mi)</p>'
      }, 'Plagger::Text' ),
      'rate' => 0,
      'summary' => $VAR1->{'entries'}[23]{'body'},
      'enclosures' => [],
      'id' => 'urn:earthquake-usgs-gov:us:2009gxcb',
      'title' => bless( {
        'data' => 'M 5.0, Vanuatu',
        'type' => 'text'
      }, 'Plagger::Text' )
    }, 'Plagger::Entry' ),
(snip)
test-usgs-geo.yaml
plugins:
  - module: Subscription::Config
    config:
      feed:
         - url: http://earthquake.usgs.gov/eqcenter/catalogs/eqs7day-M5.xml
  - module: Namespace::Geo
  - module: Aggregator::Simple
  - module: Filter::Rule
    rule:
      expression: |
        my $loc = $args->{entry}->location;
        if($loc){ unshift @{$args->{entry}->tags}, grep defined,
        sprintf("%s%.1f",($loc->latitude<0?"S":"N"),abs($loc->latitude)),
        sprintf("%s%.1f",($loc->longitude<0?"W":"E"),abs($loc->longitude)),
        (defined($loc->altitude)?sprintf("%.1fkm",.001*$loc->altitude):undef),
        }
        1;
  - module: Publish::Debug
~/plagger$ ./plagger -c ~/test-usgs-geo.yaml
(snip)
    bless( {
      'link' => 'http://earthquake.usgs.gov/eqcenter/recenteqsww/Quakes/us2009gxcb.php',
      'widgets' => [],
      'location' => bless( {
        'longitude' => '166.9331',
        'latitude' => '-13.4612'
      }, 'Plagger::Location' ),
      'feed_link' => 'http://earthquake.usgs.gov/eqcenter/',
      'date' => bless( {
        'local_rd_secs' => 78206,
        'local_rd_days' => 733548,
        'rd_nanosecs' => 0,
        'locale' => $VAR1->{'entries'}[0]{'date'}{'locale'},
        'local_c' => {
          'hour' => 21,
          'second' => 26,
          'month' => 5,
          'quarter' => 2,
          'day_of_year' => 141,
          'day_of_quarter' => 51,
          'minute' => 43,
          'day' => 21,
          'day_of_week' => 4,
          'year' => 2009
        },
        'utc_rd_secs' => 78206,
        'formatter' => undef,
        'tz' => bless( {
          'name' => 'UTC'
        }, 'DateTime::TimeZone::UTC' ),
        'utc_year' => 2010,
        'utc_rd_days' => 733548,
        'offset_modifier' => 0
      }, 'Plagger::Date' ),
      'author' => undef,
      'meta' => {},
      'tags' => [
        'S13.5',
        'E166.9',
        '5',
        'pastweek',
        '106.40 km'
      ],
      'body' => bless( {
        'data' => 'May 21, 2009 21:43:26 GMT',
        'type' => 'text'
      }, 'Plagger::Text' ),
      'rate' => 0,
      'summary' => $VAR1->{'entries'}[23]{'body'},
      'enclosures' => [],
      'id' => bless( {
        'isPermaLink' => 'false',
        '_content' => 'us2009gxcb',
        '_attributes' => [
          'isPermaLink'
        ]
      }, 'XML::RSS::LibXML::MagicElement' ),
      'title' => bless( {
        'data' => 'M 5.0, Vanuatu',
        'type' => 'text'
      }, 'Plagger::Text' )
    }, 'Plagger::Entry' ),
(snip)

※Namespace::Geoの例で使ったフィードにはgeo:altの要素が存在しないため、高度の情報は出力されていません。

追記(2009-05-30)

http://github.com/miyagawa/plagger/commits/
githubでの本流にpullされました。thx!

ついでにyamlのフィルタ処理部分を少し手直し。まぁどうでもいい範囲ですが。

plugins:
  - module: Subscription::Config
    config:
      feed:
        - url: http://earthquake.usgs.gov/eqcenter/catalogs/7day-M5.xml
#        - url: http://earthquake.usgs.gov/eqcenter/catalogs/eqs7day-M5.xml
  - module: Namespace::GeoRSS
  - module: Namespace::Geo
  - module: Aggregator::Simple
  - module: Filter::Rule
    rule:
      expression: |
        if(my $loc = $args->{entry}->location) {
        unshift @{$args->{entry}->tags}, grep length,
        defined($loc->latitude) &&
          (map{tr/+-/NS/;$_} sprintf "%+.1f",$loc->latitude),
        defined($loc->longitude) &&
          (map{tr/+-/EW/;$_} sprintf "%+.1f",$loc->longitude),
        defined($loc->altitude) &&
          sprintf("%.1fkm",.001*$loc->altitude),
        }
        1;
  - module: Publish::Debug

http://earthquake.usgs.gov/eqcenter/catalogs/7day-M5.xml
蛇足ついでに触れておくと、このフィードは各エントリのリンクが相対URIになっていてちょっと処理が面倒ですね(LivedoorReaderやはてなRSSでもリンクがおかしくなる)。個別にフィルタを書いてもいいですが、xml:base属性を見て汎用的にリンクを絶対URI化できるような方法はあるんでしょうか。どこの親要素にこの属性が付くか分からなそうですが…。