Fixed Batoto updates.
[automanga.git] / manga / batoto.py
CommitLineData
08e259d7
FT
1import urllib, re, BeautifulSoup
2import lib, htcache
3soup = BeautifulSoup.BeautifulSoup
4
5def byclass(el, name, cl):
6 for ch in el.findAll(name):
7 if not isinstance(ch, BeautifulSoup.Tag): continue
8 cll = ch.get("class", "")
9 if cl in cll.split():
10 return ch
11 return None
12
13def nextel(el):
14 while True:
15 el = el.nextSibling
16 if isinstance(el, BeautifulSoup.Tag):
17 return el
18
19class page(lib.page):
20 def __init__(self, chapter, stack, n, url):
21 self.stack = stack
22 self.chapter = chapter
23 self.n = n
24 self.id = str(n)
25 self.name = u"Page %s" % n
26 self.url = url
27 self.ciurl = None
28
29 def iurl(self):
30 if self.ciurl is None:
31 page = soup(htcache.fetch(self.url))
32 img = nextel(page.find("div", id="full_image")).img
33 self.ciurl = img["src"].encode("us-ascii")
34 return self.ciurl
35
36 def open(self):
37 return lib.stdimgstream(self.iurl())
38
39 def __str__(self):
40 return self.name
41
42 def __repr(self):
43 return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
44
45class chapter(lib.pagelist):
46 def __init__(self, manga, stack, id, name, url):
47 self.stack = stack
48 self.manga = manga
49 self.id = id
50 self.name = name
51 self.url = url
52 self.cpag = None
53
54 def __getitem__(self, i):
55 return self.pages()[i]
56
57 def __len__(self):
58 return len(self.pages())
59
60 pnre = re.compile(r"page (\d+)")
61 def pages(self):
62 if self.cpag is None:
63 pg = soup(htcache.fetch(self.url))
64 cpag = []
65 for opt in pg.find("select", id="page_select").findAll("option"):
66 url = opt["value"].encode("us-ascii")
67 n = int(self.pnre.match(opt.string).group(1))
68 cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
69 self.cpag = cpag
70 return self.cpag
71
72 def __str__(self):
73 return self.name
74
75 def __repr__(self):
76 return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
77
78class manga(lib.manga):
79 def __init__(self, lib, id, name, url):
80 self.lib = lib
81 self.id = id
82 self.name = name
83 self.url = url
84 self.cch = None
85 self.stack = []
1043cbdb 86 self.cnames = None
08e259d7
FT
87
88 def __getitem__(self, i):
89 return self.ch()[i]
90
91 def __len__(self):
92 return len(self.ch())
93
94 cure = re.compile(r"/read/_/(\d+)/[^/]*")
95 def ch(self):
96 if self.cch is None:
97 page = soup(htcache.fetch(self.url))
98 cls = byclass(page, u"table", u"chapters_list")
99 if cls.tbody is not None:
100 cls = cls.tbody
101 scl = u"lang_" + self.lib.lang
102 cch = []
103 for ch in cls.childGenerator():
104 if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr":
105 cll = ch.get("class", "").split()
106 if u"row" in cll and scl in cll:
107 url = ch.td.a["href"].encode("us-ascii")
108 m = self.cure.search(url)
109 if m is None: raise Exception("Got weird chapter URL: %r" % url)
110 cid = m.group(1)
111 url = self.lib.base + "read/_/" + cid
112 name = ch.td.a.text
687f2ed3 113 cch.append((cid, name, url))
08e259d7 114 cch.reverse()
687f2ed3
FT
115 rch = []
116 for n, (cid, name, url) in enumerate(cch):
117 rch.append(chapter(self, [(self, n)], cid, name, url))
118 self.cch = rch
08e259d7
FT
119 return self.cch
120
1043cbdb
FT
121 def altnames(self):
122 if self.cnames is None:
123 page = soup(htcache.fetch(self.url))
124 cnames = None
125 for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
126 if tbl.tbody is not None: tbl = tbl.tbody
127 for tr in tbl.findAll("tr"):
128 if u"Alt Names:" in tr.td.text:
129 nls = nextel(tr.td)
130 if nls.name != u"td" or nls.span is None:
131 raise Exception("Weird altnames table in " + self.id)
132 cnames = [nm.text.strip() for nm in nls.findAll("span")]
133 break
134 if cnames is not None:
135 break
136 if cnames is None:
137 raise Exception("Could not find altnames for " + self.id)
138 self.cnames = cnames
139 return self.cnames
140
08e259d7
FT
141 def __str__(self):
142 return self.name
143
144 def __repr__(self):
145 return "<batoto.manga %r>" % self.name
146
147class library(lib.library):
148 def __init__(self):
149 self.base = "http://www.batoto.net/"
150 self.lang = u"English"
151
152 def byid(self, id):
153 url = self.base + "comic/_/comics/" + id
154 page = soup(htcache.fetch(url))
155 title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
156 if title is None:
157 raise KeyError(id)
158 return manga(self, id, title.string.strip(), url)
159
24f0a3b7 160 def _search(self, pars):
1043cbdb
FT
161 p = 1
162 while True:
24f0a3b7
FT
163 _pars = dict(pars)
164 _pars["p"] = str(p)
165 resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars))
1043cbdb
FT
166 try:
167 page = soup(resp.read())
168 finally:
169 resp.close()
170 rls = page.find("div", id="comic_search_results").table
171 if rls.tbody is not None:
172 rls = rls.tbody
173 hasmore = False
174 for child in rls.findAll("tr"):
175 if child.th is not None: continue
24f0a3b7 176 if child.get("id", u"")[:11] == u"comic_rowo_": continue
1043cbdb
FT
177 if child.get("id") == u"show_more_row":
178 hasmore = True
179 continue
180 link = child.td.strong.a
181 url = link["href"].encode("us-ascii")
182 m = self.rure.search(url)
183 if m is None: raise Exception("Got weird manga URL: %r" % url)
184 id = m.group(1)
185 name = link.text.strip()
1043cbdb
FT
186 yield manga(self, id, name, url)
187 p += 1
188 if not hasmore:
189 break
24f0a3b7
FT
190
191 rure = re.compile(r"/comic/_/([^/]*)$")
192 def search(self, expr):
193 if not isinstance(expr, unicode):
194 expr = expr.decode("utf8")
195 return self._search({"name": expr.encode("utf8"), "name_cond": "c"})
196
197 def byname(self, prefix):
198 if not isinstance(prefix, unicode):
199 prefix = prefix.decode("utf8")
200 for res in self._search({"name": prefix.encode("utf8"), "name_cond": "s"}):
201 if res.name[:len(prefix)].lower() == prefix.lower():
202 yield res
203 else:
204 for aname in res.altnames():
205 if aname[:len(prefix)].lower() == prefix.lower():
206 yield manga(self, res.id, aname, res.url)
207 break
208 else:
209 if False:
210 print "eliding " + res.name
211 print res.altnames()