X-Git-Url: http://dolda2000.com/gitweb/?p=utils.git;a=blobdiff_plain;f=ANN.pm;h=6dd02e94fc951f0b1773cf7f5131f6ecf3359386;hp=a9e8014a1bfbb4909136e4befbe6dd320ab1f7ef;hb=8b09dc89e6081cfc76fee513be8dac9e39b6305f;hpb=26c14f89ca84cd6d23718948c1da81280ff12c0a diff --git a/ANN.pm b/ANN.pm index a9e8014..6dd02e9 100644 --- a/ANN.pm +++ b/ANN.pm @@ -35,12 +35,12 @@ sub _get $res = $ua->request(HTTP::Request->new("GET", "$uri")); if(open CACHE, ">:utf8", $cname) { - print CACHE $res->content; + print CACHE $res->decoded_content; close CACHE; } return undef unless $res->is_success; - return $res->content; + return $res->decoded_content; } sub getlist @@ -49,7 +49,7 @@ sub getlist ($name) = @_; $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; - $il = uc(($name =~ /^(.)/)[0]); + $il = uc(($name =~ /^\W*(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { return undef; @@ -58,9 +58,9 @@ sub getlist # The only way to recognize entries that seems sure is to look # after the "HOVERLINE" class. - while($html =~ /(\.*\<\/small\>)?([^<]+)<\//ig) { - if((substr "" . lc $3 , 0, length $name) eq lc $name) { - push @ret, $3; + while($html =~ /]*>(]*>)?([^<]*<\/small>)?\s*([^<]+)<\//ig) { + if((substr "" . lc $4 , 0, length $name) eq lc $name) { + push @ret, $4; } } # push @ret, $1 while $html =~ /.*([^<>]*$name[^<>]*)<\/FONT/ig; @@ -74,7 +74,7 @@ sub getid ($name) = @_; $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; - $il = uc(($name =~ /^(.)/)[0]); + $il = uc(($name =~ /^\W*(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { return undef; @@ -83,8 +83,8 @@ sub getid # The only way to recognize entries that seems sure is to look # after the "HOVERLINE" class. - while($html =~ /(\.*\<\/small\>)?([^<]+)<\//ig) { - if((substr "" . lc $3 , 0, length $name) eq lc $name) { + while($html =~ /]*>(]*>)?([^<]*<\/small>)?\s*([^<]+)<\//ig) { + if((substr "" . lc $4 , 0, length $name) eq lc $name) { return ($1 =~ /id=(\d+)$/)[0]; } } @@ -107,7 +107,7 @@ sub getthemes if($html =~ /$kind theme:<\/strong>\s*\n/igc) { my(@parts, $ct, $buf); - while($html =~ /\G\s*\
(([^<>]|\|<\/i>)+)<\/div>/igc) { + while($html =~ /\G\s*\
(([^<>]|\|<\/i>)+)(]*>[^<>]*]*>[^<>]*<\/span>)?<\/div>/igc) { $buf = $1; # 0 1 2 3 4 5 6 7 8 9 10 1112 if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps? (\d+)(-(\d+))?\))?/i)) {