X-Git-Url: http://dolda2000.com/gitweb/?p=utils.git;a=blobdiff_plain;f=ANN.pm;h=f41147a8c988549abc09ea34ee759b9d1f757726;hp=020c54746e2f0b34aac7e25cc7fb9ddab14fd7b2;hb=0c0ef5c73fa675decb4339fc6a858c96d0878996;hpb=b36b72ecbeec32adbb8a200dc82ca4f699f6e4a0 diff --git a/ANN.pm b/ANN.pm index 020c547..f41147a 100644 --- a/ANN.pm +++ b/ANN.pm @@ -48,6 +48,7 @@ sub getlist my($name, $il, $html, @ret); ($name) = @_; + $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; $il = uc(($name =~ /^(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { @@ -57,7 +58,12 @@ sub getlist # The only way to recognize entries that seems sure is to look # after the "HOVERLINE" class. - push @ret, $1 while $html =~ /.*([^<>]*$name[^<>]*)<\/FONT/ig; + while($html =~ /(\.*\<\/small\>)?([^<]+)<\//ig) { + if((substr "" . lc $3 , 0, length $name) eq lc $name) { + push @ret, $3; + } + } + # push @ret, $1 while $html =~ /.*([^<>]*$name[^<>]*)<\/FONT/ig; return @ret; } @@ -67,6 +73,7 @@ sub getid my($name, $il, $html, $url); ($name) = @_; + $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; $il = uc(($name =~ /^(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { @@ -76,9 +83,13 @@ sub getid # The only way to recognize entries that seems sure is to look # after the "HOVERLINE" class. - (($url) = ($html =~ /(\.*\<\/small\>)?([^<]+)<\//ig) { + if((substr "" . lc $3 , 0, length $name) eq lc $name) { + return ($1 =~ /id=(\d+)$/)[0]; + } + } - return ($url =~ /\?id=(\d+)$/)[0]; + return undef; } sub geturl @@ -94,12 +105,12 @@ sub getthemes my($html, $kind, @ret); ($html, $kind) = @_; - if($html =~ /$kind theme:<\/b>\n/igc) { + if($html =~ /$kind theme:<\/strong>\s*\n/igc) { my(@parts, $ct, $buf); - while($html =~ /\G\    (([^<>]|\|<\/i>)+)/igc) { + while($html =~ /\G\s*\(([^<>]|\|<\/i>)+)<\/div>/igc) { $buf = $1; - # 0 1 2 3 4 5 6 7 8 9 10 11 - if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+)(\s*\((\(.*)<\/i>(;\s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps (\d+)-(\d+)?\))?/i)) { + # 0 1 2 3 4 5 6 7 8 9 10 11 + if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps (\d+)-(\d+)?\))?/i)) { $ct = {}; $ct->{"num"} = $parts[1] if defined $parts[1]; if(defined $parts[5]) { @@ -130,19 +141,19 @@ sub getseries } $ret{"url"} = geturl $id; - ($buf) = ($html =~ /\Anime News Network - ([^<]*)<\/TITLE>/); + ($buf) = ($html =~ /\([^<]*) - Anime News Network<\/title>/); if($buf =~ /\([^\)]+\)$/) { ($ret{"name"}, $ret{"type"}) = ($buf =~ /^(.*[^\s])\s*\(([^\)]+)\)$/); } else { $ret{"name"} = $buf; } - if(($buf) = ($html =~ /vintage:<\/b>\n([^<]+)\s*\n\s*([^<]+)\n([^<]+)\s*\n\s*([^<]+)\n([^<]+)\s*\n\s*([^<]+)