Separated into Anime::ANN.
[utils.git] / ANN.pm
CommitLineData
23d4abb2 1package Anime::ANN;
2
3use LWP::UserAgent;
4use HTML::Entities;
5
6$ver = "0.1";
7
8sub _get
9{
10 my($uri, $ua, $res);
11 ($uri) = @_;
12
13 $ua = LWP::UserAgent->new;
14 $ua->agent("ANNData/$ver ");
15
16 $res = $ua->request(HTTP::Request->new("GET", "$uri"));
17
18 die "could not fetch $uri\n" unless $res->is_success;
19 return $res->content;
20}
21
22sub getlist
23{
24 my($name, $il, $html, @ret);
25 ($name) = @_;
26
27 $il = uc(($name =~ /^(.)/)[0]);
28 $il = "9" if (!($il =~ /[A-Z]/));
29 $html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il";
30
31 # The only way to recognize entries that seems sure is to look
32 # after the "HOVERLINE" class.
33
34 push @ret, $1 while $html =~ /<A\s.*CLASS=HOVERLINE\s.*>.*<FONT.*>([^<>]*$name[^<>]*)<\/FONT/ig;
35
36 return(@ret);
37}
38
39sub getid
40{
41 my($name, $il, $html, $url);
42 ($name) = @_;
43
44 $il = uc(($name =~ /^(.)/)[0]);
45 $il = "9" if (!($il =~ /[A-Z]/));
46 $html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il";
47
48 # The only way to recognize entries that seems sure is to look
49 # after the "HOVERLINE" class.
50
51 (($url) = ($html =~ /<A\s.*CLASS=HOVERLINE\s.*HREF=\"([^\"]+)\".*$name/i)) || return;
52
53 return ($url =~ /\?id=(\d+)$/)[0];
54}
55
a9da6366 56sub geturl
57{
58 my($id);
59 ($id) = @_;
60
61 return "http://www.animenewsnetwork.com/encyclopedia/anime.php?id=$id";
62}
63
23d4abb2 64sub getthemes
65{
66 my($html, $kind, @ret);
67 ($html, $kind) = @_;
68
69 if($html =~ /$kind theme:<\/b>\n/igc) {
70 my(@parts, $ct, $buf);
71 while($html =~ /\G\<br\>&nbsp; &nbsp; (([^<>]|\<i\>|<\/i>)+)/igc) {
72 $buf = $1;
73 # 0 1 2 3 4 5 6 7 8 9 10 11
74 if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+)(\s+\((\<i\>(.*)<\/i>(;\s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps (\d+)-(\d+)?\))?/i)) {
75 $ct = {};
76 $ct->{"num"} = $parts[1] if defined $parts[1];
77 if(defined $parts[5]) {
a9da6366 78 $ct->{"tit"} = decode_entities($parts[5]);
23d4abb2 79 $ct->{"jat"} = decode_entities($parts[2]) if defined $parts[2];
80 } else {
a9da6366 81 $ct->{"tit"} = decode_entities($parts[2]) if defined $parts[2];
23d4abb2 82 }
a9da6366 83 $ct->{"ent"} = decode_entities($parts[7]) if defined $parts[7];
84 $ct->{"prf"} = decode_entities($parts[8]) if defined $parts[8];
23d4abb2 85 $ct->{"fep"} = $parts[10] if defined $parts[10];
86 $ct->{"lep"} = $parts[11] if defined $parts[11];
87 push @ret, $ct;
88 }
89 }
90 }
91
92 return \@ret;
93}
94
95sub getseries
96{
97 my($id, $buf, $html, %ret);
98 ($id) = @_;
99
a9da6366 100 $html = _get geturl $id;
23d4abb2 101
a9da6366 102 $ret{"url"} = geturl $id;
604af5b7 103 ($buf) = ($html =~ /\<TITLE\>Anime News Network - ([^<]*)<\/TITLE>/);
104 if($buf =~ /\([^\)]+\)$/) {
105 ($ret{"name"}, $ret{"type"}) = ($buf =~ /^(.*[^\s])\s*\(([^\)]+)\)$/);
106 } else {
107 $ret{"name"} = $buf;
108 }
23d4abb2 109 if(($buf) = ($html =~ /vintage:<\/b>\n([^<]+)</is)) {
110 $ret{"vtg"} = $buf;
111 }
112 if(($buf) = ($html =~ /number of episodes:<\/b>\n([^<]+)</is)) {
113 $ret{"eps"} = $buf;
114 }
115 $buf = getthemes $html, "opening";
116 $ret{"op"} = $buf if(@{$buf});
117 $buf = getthemes $html, "ending";
118 $ret{"ed"} = $buf if(@{$buf});
119
120 return \%ret;
121}