Initial import.
authorfredrik <fredrik@959494ce-11ee-0310-bf91-de5d638817bd>
Fri, 5 Aug 2005 00:18:12 +0000 (00:18 +0000)
committerfredrik <fredrik@959494ce-11ee-0310-bf91-de5d638817bd>
Fri, 5 Aug 2005 00:18:12 +0000 (00:18 +0000)
git-svn-id: svn+ssh://svn.dolda2000.com/srv/svn/repos/src/utils@303 959494ce-11ee-0310-bf91-de5d638817bd

ANN.pm [new file with mode: 0644]

diff --git a/ANN.pm b/ANN.pm
new file mode 100644 (file)
index 0000000..fbf08e5
--- /dev/null
+++ b/ANN.pm
@@ -0,0 +1,108 @@
+package Anime::ANN;
+
+use LWP::UserAgent;
+use HTML::Entities;
+
+$ver = "0.1";
+
+sub _get
+{
+    my($uri, $ua, $res);
+    ($uri) = @_;
+
+    $ua = LWP::UserAgent->new;
+    $ua->agent("ANNData/$ver ");
+
+    $res = $ua->request(HTTP::Request->new("GET", "$uri"));
+
+    die "could not fetch $uri\n" unless $res->is_success;
+    return $res->content;
+}
+
+sub getlist
+{
+    my($name, $il, $html, @ret);
+    ($name) = @_;
+    
+    $il = uc(($name =~ /^(.)/)[0]);
+    $il = "9" if (!($il =~ /[A-Z]/));
+    $html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il";
+    
+    # The only way to recognize entries that seems sure is to look
+    # after the "HOVERLINE" class.
+    
+    push @ret, $1 while $html =~ /<A\s.*CLASS=HOVERLINE\s.*>.*<FONT.*>([^<>]*$name[^<>]*)<\/FONT/ig;
+    
+    return(@ret);
+}
+
+sub getid
+{
+    my($name, $il, $html, $url);
+    ($name) = @_;
+    
+    $il = uc(($name =~ /^(.)/)[0]);
+    $il = "9" if (!($il =~ /[A-Z]/));
+    $html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il";
+    
+    # The only way to recognize entries that seems sure is to look
+    # after the "HOVERLINE" class.
+    
+    (($url) = ($html =~ /<A\s.*CLASS=HOVERLINE\s.*HREF=\"([^\"]+)\".*$name/i)) || return;
+    
+    return ($url =~ /\?id=(\d+)$/)[0];
+}
+
+sub getthemes
+{
+    my($html, $kind, @ret);
+    ($html, $kind) = @_;
+    
+    if($html =~ /$kind theme:<\/b>\n/igc) {
+       my(@parts, $ct, $buf);
+       while($html =~ /\G\<br\>&nbsp; &nbsp; (([^<>]|\<i\>|<\/i>)+)/igc) {
+           $buf = $1;
+           #                     0  1            2         3     4     5        6        7                     8              9         10     11
+           if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+)(\s+\((\<i\>(.*)<\/i>(;\s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps (\d+)-(\d+)?\))?/i)) {
+               $ct = {};
+               $ct->{"num"} = $parts[1] if defined $parts[1];
+               if(defined $parts[5]) {
+                   $ct->{"tit"} = $parts[5];
+                   $ct->{"jat"} = decode_entities($parts[2]) if defined $parts[2];
+               } else {
+                   $ct->{"tit"} = $parts[2] if defined $parts[2];
+               }
+               $ct->{"ent"} = $parts[7] if defined $parts[7];
+               $ct->{"prf"} = $parts[8] if defined $parts[8];
+               $ct->{"fep"} = $parts[10] if defined $parts[10];
+               $ct->{"lep"} = $parts[11] if defined $parts[11];
+               push @ret, $ct;
+           }
+       }
+    }
+    
+    return \@ret;
+}
+
+sub getseries
+{
+    my($id, $buf, $html, %ret);
+    ($id) = @_;
+    
+    $html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?id=$id";
+    
+    $ret{"url"} = "http://www.animenewsnetwork.com/encyclopedia/anime.php?id=$id";
+    ($ret{"name"}) = ($html =~ /\<TITLE\>Anime News Network - ([^<]*)<\/TITLE>/);
+    if(($buf) = ($html =~ /vintage:<\/b>\n([^<]+)</is)) {
+       $ret{"vtg"} = $buf;
+    }
+    if(($buf) = ($html =~ /number of episodes:<\/b>\n([^<]+)</is)) {
+       $ret{"eps"} = $buf;
+    }
+    $buf = getthemes $html, "opening";
+    $ret{"op"} = $buf if(@{$buf});
+    $buf = getthemes $html, "ending";
+    $ret{"ed"} = $buf if(@{$buf});
+    
+    return \%ret;
+}