X-Git-Url: http://dolda2000.com/gitweb/?p=utils.git;a=blobdiff_plain;f=ANN.pm;h=6dd02e94fc951f0b1773cf7f5131f6ecf3359386;hp=f41147a8c988549abc09ea34ee759b9d1f757726;hb=8b09dc89e6081cfc76fee513be8dac9e39b6305f;hpb=edcecb65d18376f5401aefbcbc2c563cf10e74f0 diff --git a/ANN.pm b/ANN.pm index f41147a..6dd02e9 100644 --- a/ANN.pm +++ b/ANN.pm @@ -35,12 +35,12 @@ sub _get $res = $ua->request(HTTP::Request->new("GET", "$uri")); if(open CACHE, ">:utf8", $cname) { - print CACHE $res->content; + print CACHE $res->decoded_content; close CACHE; } return undef unless $res->is_success; - return $res->content; + return $res->decoded_content; } sub getlist @@ -49,7 +49,7 @@ sub getlist ($name) = @_; $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; - $il = uc(($name =~ /^(.)/)[0]); + $il = uc(($name =~ /^\W*(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { return undef; @@ -58,9 +58,9 @@ sub getlist # The only way to recognize entries that seems sure is to look # after the "HOVERLINE" class. - while($html =~ /(\.*\<\/small\>)?([^<]+)<\//ig) { - if((substr "" . lc $3 , 0, length $name) eq lc $name) { - push @ret, $3; + while($html =~ /]*>(]*>)?([^<]*<\/small>)?\s*([^<]+)<\//ig) { + if((substr "" . lc $4 , 0, length $name) eq lc $name) { + push @ret, $4; } } # push @ret, $1 while $html =~ /.*([^<>]*$name[^<>]*)<\/FONT/ig; @@ -74,7 +74,7 @@ sub getid ($name) = @_; $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; - $il = uc(($name =~ /^(.)/)[0]); + $il = uc(($name =~ /^\W*(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { return undef; @@ -83,8 +83,8 @@ sub getid # The only way to recognize entries that seems sure is to look # after the "HOVERLINE" class. - while($html =~ /(\.*\<\/small\>)?([^<]+)<\//ig) { - if((substr "" . lc $3 , 0, length $name) eq lc $name) { + while($html =~ /]*>(]*>)?([^<]*<\/small>)?\s*([^<]+)<\//ig) { + if((substr "" . lc $4 , 0, length $name) eq lc $name) { return ($1 =~ /id=(\d+)$/)[0]; } } @@ -107,10 +107,10 @@ sub getthemes if($html =~ /$kind theme:<\/strong>\s*\n/igc) { my(@parts, $ct, $buf); - while($html =~ /\G\s*\(([^<>]|\|<\/i>)+)<\/div>/igc) { + while($html =~ /\G\s*\
(([^<>]|\|<\/i>)+)(]*>[^<>]*]*>[^<>]*<\/span>)?<\/div>/igc) { $buf = $1; - # 0 1 2 3 4 5 6 7 8 9 10 11 - if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps (\d+)-(\d+)?\))?/i)) { + # 0 1 2 3 4 5 6 7 8 9 10 1112 + if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps? (\d+)(-(\d+))?\))?/i)) { $ct = {}; $ct->{"num"} = $parts[1] if defined $parts[1]; if(defined $parts[5]) { @@ -122,7 +122,7 @@ sub getthemes $ct->{"ent"} = decode_entities($parts[7]) if defined $parts[7]; $ct->{"prf"} = decode_entities($parts[8]) if defined $parts[8]; $ct->{"fep"} = $parts[10] if defined $parts[10]; - $ct->{"lep"} = $parts[11] if defined $parts[11]; + $ct->{"lep"} = $parts[12] if defined $parts[12]; push @ret, $ct; } }