8593e0495492dd7d878a0a6fefd2cb2e49ac0676
[automanga.git] / manga / batoto.py
1 import urllib, re, BeautifulSoup
2 import lib, htcache
3 soup = BeautifulSoup.BeautifulSoup
4
5 def byclass(el, name, cl):
6     for ch in el.findAll(name):
7         if not isinstance(ch, BeautifulSoup.Tag): continue
8         cll = ch.get("class", "")
9         if cl in cll.split():
10             return ch
11     return None
12
13 def nextel(el):
14     while True:
15         el = el.nextSibling
16         if isinstance(el, BeautifulSoup.Tag):
17             return el
18
19 class page(lib.page):
20     def __init__(self, chapter, stack, n, url):
21         self.stack = stack
22         self.chapter = chapter
23         self.n = n
24         self.id = str(n)
25         self.name = u"Page %s" % n
26         self.url = url
27         self.ciurl = None
28
29     def iurl(self):
30         if self.ciurl is None:
31             page = soup(htcache.fetch(self.url))
32             img = nextel(page.find("div", id="full_image")).img
33             self.ciurl = img["src"].encode("us-ascii")
34         return self.ciurl
35
36     def open(self):
37         return lib.stdimgstream(self.iurl())
38
39     def __str__(self):
40         return self.name
41
42     def __repr(self):
43         return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
44
45 class chapter(lib.pagelist):
46     def __init__(self, manga, stack, id, name, url):
47         self.stack = stack
48         self.manga = manga
49         self.id = id
50         self.name = name
51         self.url = url
52         self.cpag = None
53
54     def __getitem__(self, i):
55         return self.pages()[i]
56
57     def __len__(self):
58         return len(self.pages())
59
60     pnre = re.compile(r"page (\d+)")
61     def pages(self):
62         if self.cpag is None:
63             pg = soup(htcache.fetch(self.url))
64             cpag = []
65             for opt in pg.find("select", id="page_select").findAll("option"):
66                 url = opt["value"].encode("us-ascii")
67                 n = int(self.pnre.match(opt.string).group(1))
68                 cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
69             self.cpag = cpag
70         return self.cpag
71
72     def __str__(self):
73         return self.name
74
75     def __repr__(self):
76         return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
77
78 class manga(lib.manga):
79     def __init__(self, lib, id, name, url):
80         self.lib = lib
81         self.id = id
82         self.name = name
83         self.url = url
84         self.cch = None
85         self.stack = []
86         self.cnames = None
87
88     def __getitem__(self, i):
89         return self.ch()[i]
90
91     def __len__(self):
92         return len(self.ch())
93
94     cure = re.compile(r"/read/_/(\d+)/[^/]*")
95     def ch(self):
96         if self.cch is None:
97             page = soup(htcache.fetch(self.url))
98             cls = byclass(page, u"table", u"chapters_list")
99             if cls.tbody is not None:
100                 cls = cls.tbody
101             scl = u"lang_" + self.lib.lang
102             cch = []
103             for ch in cls.childGenerator():
104                 if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr":
105                     cll = ch.get("class", "").split()
106                     if u"row" in cll and scl in cll:
107                         url = ch.td.a["href"].encode("us-ascii")
108                         m = self.cure.search(url)
109                         if m is None: raise Exception("Got weird chapter URL: %r" % url)
110                         cid = m.group(1)
111                         url = self.lib.base + "read/_/" + cid
112                         name = ch.td.a.text
113                         cch.append((cid, name, url))
114             cch.reverse()
115             rch = []
116             for n, (cid, name, url) in enumerate(cch):
117                 rch.append(chapter(self, [(self, n)], cid, name, url))
118             self.cch = rch
119         return self.cch
120
121     def altnames(self):
122         if self.cnames is None:
123             page = soup(htcache.fetch(self.url))
124             cnames = None
125             for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
126                 if tbl.tbody is not None: tbl = tbl.tbody
127                 for tr in tbl.findAll("tr"):
128                     if u"Alt Names:" in tr.td.text:
129                         nls = nextel(tr.td)
130                         if nls.name != u"td" or nls.span is None:
131                             raise Exception("Weird altnames table in " + self.id)
132                         cnames = [nm.text.strip() for nm in nls.findAll("span")]
133                         break
134                 if cnames is not None:
135                     break
136             if cnames is None:
137                 raise Exception("Could not find altnames for " + self.id)
138             self.cnames = cnames
139         return self.cnames
140
141     def __str__(self):
142         return self.name
143
144     def __repr__(self):
145         return "<batoto.manga %r>" % self.name
146
147 class library(lib.library):
148     def __init__(self):
149         self.base = "http://www.batoto.net/"
150         self.lang = u"English"
151
152     def byid(self, id):
153         url = self.base + "comic/_/comics/" + id
154         page = soup(htcache.fetch(url))
155         title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
156         if title is None:
157             raise KeyError(id)
158         return manga(self, id, title.string.strip(), url)
159
160     def _search(self, pars):
161         p = 1
162         while True:
163             _pars = dict(pars)
164             _pars["p"] = str(p)
165             resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars))
166             try:
167                 page = soup(resp.read())
168             finally:
169                 resp.close()
170             rls = page.find("div", id="comic_search_results").table
171             if rls.tbody is not None:
172                 rls = rls.tbody
173             hasmore = False
174             for child in rls.findAll("tr"):
175                 if child.th is not None: continue
176                 if child.get("id", u"")[:11] == u"comic_rowo_": continue
177                 if child.get("id") == u"show_more_row":
178                     hasmore = True
179                     continue
180                 link = child.td.strong.a
181                 url = link["href"].encode("us-ascii")
182                 m = self.rure.search(url)
183                 if m is None: raise Exception("Got weird manga URL: %r" % url)
184                 id = m.group(1)
185                 name = link.text.strip()
186                 yield manga(self, id, name, url)
187             p += 1
188             if not hasmore:
189                 break
190
191     rure = re.compile(r"/comic/_/([^/]*)$")
192     def search(self, expr):
193         if not isinstance(expr, unicode):
194             expr = expr.decode("utf8")
195         return self._search({"name": expr.encode("utf8"), "name_cond": "c"})
196
197     def byname(self, prefix):
198         if not isinstance(prefix, unicode):
199             prefix = prefix.decode("utf8")
200         for res in self._search({"name": prefix.encode("utf8"), "name_cond": "s"}):
201             if res.name[:len(prefix)].lower() == prefix.lower():
202                 yield res
203             else:
204                 for aname in res.altnames():
205                     if aname[:len(prefix)].lower() == prefix.lower():
206                         yield manga(self, res.id, aname, res.url)
207                         break
208                 else:
209                     if False:
210                         print "eliding " + res.name
211                         print res.altnames()