ANN: Updated HTML patterns
authorfredrik <fredrik@959494ce-11ee-0310-bf91-de5d638817bd>
Thu, 27 Aug 2009 00:03:21 +0000 (00:03 +0000)
committerfredrik <fredrik@959494ce-11ee-0310-bf91-de5d638817bd>
Thu, 27 Aug 2009 00:03:21 +0000 (00:03 +0000)
git-svn-id: svn+ssh://svn.dolda2000.com/srv/svn/repos/src/utils@1121 959494ce-11ee-0310-bf91-de5d638817bd

ANN.pm

diff --git a/ANN.pm b/ANN.pm
index a9e8014..48bd9c4 100644 (file)
--- a/ANN.pm
+++ b/ANN.pm
@@ -49,7 +49,7 @@ sub getlist
     ($name) = @_;
     
     $name = ($name =~ /^(the\s+)?(.*)$/i)[1];
     ($name) = @_;
     
     $name = ($name =~ /^(the\s+)?(.*)$/i)[1];
-    $il = uc(($name =~ /^(.)/)[0]);
+    $il = uc(($name =~ /^\W*(.)/)[0]);
     $il = "9" if (!($il =~ /[A-Z]/));
     if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) {
        return undef;
     $il = "9" if (!($il =~ /[A-Z]/));
     if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) {
        return undef;
@@ -74,7 +74,7 @@ sub getid
     ($name) = @_;
     
     $name = ($name =~ /^(the\s+)?(.*)$/i)[1];
     ($name) = @_;
     
     $name = ($name =~ /^(the\s+)?(.*)$/i)[1];
-    $il = uc(($name =~ /^(.)/)[0]);
+    $il = uc(($name =~ /^\W*(.)/)[0]);
     $il = "9" if (!($il =~ /[A-Z]/));
     if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) {
        return undef;
     $il = "9" if (!($il =~ /[A-Z]/));
     if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) {
        return undef;
@@ -107,7 +107,7 @@ sub getthemes
     
     if($html =~ /$kind theme:<\/strong>\s*\n/igc) {
        my(@parts, $ct, $buf);
     
     if($html =~ /$kind theme:<\/strong>\s*\n/igc) {
        my(@parts, $ct, $buf);
-       while($html =~ /\G\s*\<div class=\"tab\"\>(([^<>]|\<i\>|<\/i>)+)<\/div>/igc) {
+       while($html =~ /\G\s*\<div class=\"tab\"\>(([^<>]|\<i\>|<\/i>)+)(<span[^<>]*>[^<>]*<img[^<>]*>[^<>]*<\/span>)?<\/div>/igc) {
            $buf = $1;
            #                     0  1            2           3     4     5        6          7                      8              9          10   1112
            if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\<i\>(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps? (\d+)(-(\d+))?\))?/i)) {
            $buf = $1;
            #                     0  1            2           3     4     5        6          7                      8              9          10   1112
            if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\<i\>(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps? (\d+)(-(\d+))?\))?/i)) {