package Anime::ANN; use POSIX; use Digest::MD5; use LWP::UserAgent; use HTML::Entities; $ver = "0.1"; $ua = LWP::UserAgent->new; $ua->agent("ANNData/$ver "); $usecache = 1; sub _get { my($uri, $cname, $res); ($uri) = @_; $cname = $ENV{"HOME"} . "/.ann/"; mkdir $cname unless -e $cname; $cname .= "cache/"; mkdir $cname unless -e $cname; $cname .= Digest::MD5::md5_hex $uri; if($usecache && -e $cname) { my(@s); @s = stat $cname; if((time - $s[9]) < 86400) { my($buf); open CACHE, "<:utf8", $cname; $res .= $buf while read CACHE, $buf, 1024; close CACHE; return $res; } } $res = $ua->request(HTTP::Request->new("GET", "$uri")); if(open CACHE, ">:utf8", $cname) { print CACHE $res->decoded_content; close CACHE; } return undef unless $res->is_success; return $res->decoded_content; } sub getlist { my($name, $il, $html, @ret); ($name) = @_; $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; $il = uc(($name =~ /^\W*(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { return undef; } # The only way to recognize entries that seems sure is to look # after the "HOVERLINE" class. while($html =~ /]*>(]*>)?([^<]*<\/small>)?\s*([^<]+)<\//ig) { if((substr "" . lc $4 , 0, length $name) eq lc $name) { push @ret, $4; } } # push @ret, $1 while $html =~ /.*([^<>]*$name[^<>]*)<\/FONT/ig; return @ret; } sub getid { my($name, $il, $html, $url); ($name) = @_; $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; $il = uc(($name =~ /^\W*(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { return undef; } # The only way to recognize entries that seems sure is to look # after the "HOVERLINE" class. while($html =~ /]*>(]*>)?([^<]*<\/small>)?\s*([^<]+)<\//ig) { if((substr "" . lc $4 , 0, length $name) eq lc $name) { return ($1 =~ /id=(\d+)$/)[0]; } } return undef; } sub geturl { my($id); ($id) = @_; return "http://www.animenewsnetwork.com/encyclopedia/anime.php?id=$id"; } sub getthemes { my($html, $kind, @ret); ($html, $kind) = @_; if($html =~ /$kind theme:<\/strong>\s*\n/igc) { my(@parts, $ct, $buf); while($html =~ /\G\s*\
(([^<>]|\|<\/i>)+)(]*>[^<>]*]*>[^<>]*<\/span>)?<\/div>/igc) { $buf = $1; # 0 1 2 3 4 5 6 7 8 9 10 1112 if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps? (\d+)(-(\d+))?\))?/i)) { $ct = {}; $ct->{"num"} = $parts[1] if defined $parts[1]; if(defined $parts[5]) { $ct->{"tit"} = decode_entities($parts[5]); $ct->{"jat"} = decode_entities($parts[2]) if defined $parts[2]; } else { $ct->{"tit"} = decode_entities($parts[2]) if defined $parts[2]; } $ct->{"ent"} = decode_entities($parts[7]) if defined $parts[7]; $ct->{"prf"} = decode_entities($parts[8]) if defined $parts[8]; $ct->{"fep"} = $parts[10] if defined $parts[10]; $ct->{"lep"} = $parts[12] if defined $parts[12]; push @ret, $ct; } } } return \@ret; } sub getseries { my($id, $buf, $html, %ret); ($id) = @_; if(!($html = _get geturl $id)) { return undef; } $ret{"url"} = geturl $id; ($buf) = ($html =~ /\([^<]*) - Anime News Network<\/title>/); if($buf =~ /\([^\)]+\)$/) { ($ret{"name"}, $ret{"type"}) = ($buf =~ /^(.*[^\s])\s*\(([^\)]+)\)$/); } else { $ret{"name"} = $buf; } if(($buf) = ($html =~ /vintage:<\/strong>\s*\n\s*([^<]+)\s*\n\s*([^<]+)\s*\n\s*([^<]+)