Group Batoto chapters into groups.
[automanga.git] / manga / batoto.py
CommitLineData
c39028a4 1import urllib.request, urllib.parse, http.cookiejar, re, bs4, os, time
81be6921
FT
2from . import profile, lib, htcache
3soup = bs4.BeautifulSoup
4soupify = lambda cont: soup(cont, "html.parser")
5
6class pageerror(Exception):
7 def __init__(self, message, page):
8 super().__init__(message)
9 self.page = page
08e259d7 10
c39028a4
FT
11def iterlast(itr, default=None):
12 if default is not None:
13 ret = default
14 try:
15 while True:
16 ret = next(itr)
17 except StopIteration:
18 return ret
19
20def find1(el, *args, **kwargs):
21 ret = el.find(*args, **kwargs)
22 if ret is None:
23 raise pageerror("could not find expected element", iterlast(el.parents, el))
24 return ret
25
08e259d7
FT
26def byclass(el, name, cl):
27 for ch in el.findAll(name):
81be6921
FT
28 if not isinstance(ch, bs4.Tag): continue
29 cll = ch.get("class", [])
30 if cl in cll:
08e259d7
FT
31 return ch
32 return None
33
34def nextel(el):
35 while True:
36 el = el.nextSibling
81be6921 37 if isinstance(el, bs4.Tag):
08e259d7
FT
38 return el
39
c39028a4
FT
40def fetchreader(lib, readerid, page):
41 pg = soupify(lib.sess.fetch(lib.base + "areader?" + urllib.parse.urlencode({"id": readerid, "p": str(page)}),
42 headers={"Referer": "http://bato.to/reader"}))
43 return pg
44
08e259d7 45class page(lib.page):
c39028a4 46 def __init__(self, chapter, stack, readerid, n):
08e259d7 47 self.stack = stack
c39028a4 48 self.lib = chapter.lib
08e259d7
FT
49 self.chapter = chapter
50 self.n = n
51 self.id = str(n)
81be6921 52 self.name = "Page %s" % n
c39028a4 53 self.readerid = readerid
08e259d7
FT
54 self.ciurl = None
55
56 def iurl(self):
57 if self.ciurl is None:
c39028a4
FT
58 page = fetchreader(self.lib, self.readerid, self.n)
59 img = find1(page, "img", id="comic_page")
81be6921 60 self.ciurl = img["src"]
08e259d7
FT
61 return self.ciurl
62
63 def open(self):
64 return lib.stdimgstream(self.iurl())
65
66 def __str__(self):
67 return self.name
68
69 def __repr(self):
5c11ebea 70 return "<batoto.page %r.%r.%r.%r>" % (self.chapter.manga.name, self.chapter.group.name, self.chapter.name, self.name)
08e259d7
FT
71
72class chapter(lib.pagelist):
5c11ebea 73 def __init__(self, group, stack, id, name, readerid):
08e259d7 74 self.stack = stack
5c11ebea
FT
75 self.group = group
76 self.manga = group.manga
77 self.lib = self.manga.lib
08e259d7
FT
78 self.id = id
79 self.name = name
c39028a4 80 self.readerid = readerid
08e259d7
FT
81 self.cpag = None
82
83 def __getitem__(self, i):
84 return self.pages()[i]
85
86 def __len__(self):
87 return len(self.pages())
88
89 pnre = re.compile(r"page (\d+)")
90 def pages(self):
91 if self.cpag is None:
c39028a4 92 pg = fetchreader(self.lib, self.readerid, 1)
08e259d7 93 cpag = []
c39028a4 94 for opt in find1(pg, "select", id="page_select").findAll("option"):
08e259d7 95 n = int(self.pnre.match(opt.string).group(1))
c39028a4 96 cpag.append(page(self, self.stack + [(self, len(cpag))], self.readerid, n))
08e259d7
FT
97 self.cpag = cpag
98 return self.cpag
99
100 def __str__(self):
101 return self.name
102
103 def __repr__(self):
5c11ebea
FT
104 return "<batoto.chapter %r.%r.%r>" % (self.manga.name, self.group.name, self.name)
105
106class group(lib.pagelist):
107 def __init__(self, manga, stack, id, name):
108 self.stack = stack
109 self.manga = manga
110 self.id = id
111 self.name = name
112 self.ch = []
113
114 def __getitem__(self, i):
115 return self.ch[i]
116
117 def __len__(self):
118 return len(self.ch)
119
120 def __str__(self):
121 return self.name
122
123 def __repr__(self):
124 return "<batoto.group %r.%r" % (self.manga.name, self.name)
08e259d7
FT
125
126class manga(lib.manga):
127 def __init__(self, lib, id, name, url):
128 self.lib = lib
81be6921 129 self.sess = lib.sess
08e259d7
FT
130 self.id = id
131 self.name = name
132 self.url = url
133 self.cch = None
134 self.stack = []
1043cbdb 135 self.cnames = None
08e259d7
FT
136
137 def __getitem__(self, i):
138 return self.ch()[i]
139
140 def __len__(self):
141 return len(self.ch())
142
81be6921
FT
143 @staticmethod
144 def vfylogin(page):
145 if page.find("div", id="register_notice"):
146 return False
147 if not byclass(page, "table", "chapters_list"):
148 return False
149 return True
150
c39028a4 151 cure = re.compile(r"/reader#([a-z0-9]+)")
08e259d7
FT
152 def ch(self):
153 if self.cch is None:
81be6921
FT
154 page = self.sess.lfetch(self.url, self.vfylogin)
155 cls = byclass(page, "table", "chapters_list")
08e259d7
FT
156 if cls.tbody is not None:
157 cls = cls.tbody
81be6921 158 scl = "lang_" + self.lib.lang
08e259d7
FT
159 cch = []
160 for ch in cls.childGenerator():
81be6921
FT
161 if isinstance(ch, bs4.Tag) and ch.name == "tr":
162 cll = ch.get("class", [])
163 if "row" in cll and scl in cll:
164 url = ch.td.a["href"]
08e259d7 165 m = self.cure.search(url)
81be6921 166 if m is None: raise pageerror("Got weird chapter URL: %r" % url, page)
c39028a4 167 readerid = m.group(1)
08e259d7 168 name = ch.td.a.text
5c11ebea
FT
169 gname = nextel(nextel(ch.td)).text.strip()
170 cch.append((readerid, name, gname))
08e259d7 171 cch.reverse()
5c11ebea
FT
172 groups = {}
173 for n, (readerid, name, gname) in enumerate(cch):
174 groups.setdefault(gname, [n, []])[1].append((readerid, name))
175 groups = sorted(groups.items(), key=lambda o: o[1][0])
176 rgrp = []
177 for n, (gname, (_, gch)) in enumerate(groups):
178 ngrp = group(self, [(self, n)], gname, gname)
179 for m, (readerid, name) in enumerate(gch):
180 ngrp.ch.append(chapter(ngrp, ngrp.stack + [(ngrp, m)], readerid, name, readerid))
181 rgrp.append(ngrp)
182 self.cch = rgrp
08e259d7
FT
183 return self.cch
184
1043cbdb
FT
185 def altnames(self):
186 if self.cnames is None:
81be6921 187 page = soupify(self.sess.fetch(self.url))
1043cbdb
FT
188 cnames = None
189 for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
190 if tbl.tbody is not None: tbl = tbl.tbody
191 for tr in tbl.findAll("tr"):
81be6921 192 if "Alt Names:" in tr.td.text:
1043cbdb 193 nls = nextel(tr.td)
81be6921
FT
194 if nls.name != "td" or nls.span is None:
195 raise pageerror("Weird altnames table in " + self.id, page)
1043cbdb
FT
196 cnames = [nm.text.strip() for nm in nls.findAll("span")]
197 break
198 if cnames is not None:
199 break
200 if cnames is None:
81be6921 201 raise pageerror("Could not find altnames for " + self.id, page)
1043cbdb
FT
202 self.cnames = cnames
203 return self.cnames
204
08e259d7
FT
205 def __str__(self):
206 return self.name
207
208 def __repr__(self):
209 return "<batoto.manga %r>" % self.name
210
81be6921
FT
211class credentials(object):
212 def __init__(self, username, password):
213 self.username = username
214 self.password = password
215
216 @classmethod
217 def fromfile(cls, path):
218 username, password = None, None
219 with open(path) as fp:
220 for words in profile.splitlines(fp):
221 if words[0] == "username":
222 username = words[1]
223 elif words[0] == "password":
224 password = words[1]
225 elif words[0] == "pass64":
226 import binascii
227 password = binascii.a2b_base64(words[1]).decode("utf8")
228 if None in (username, password):
229 raise ValueError("Incomplete profile: " + path)
230 return cls(username, password)
231
232 @classmethod
233 def default(cls):
234 path = os.path.join(profile.confdir, "batoto")
235 if os.path.exists(path):
236 return cls.fromfile(path)
237 return None
238
239class session(object):
240 def __init__(self, base, credentials):
241 self.base = base
242 self.creds = credentials
243 self.jar = http.cookiejar.CookieJar()
244 self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar))
c39028a4 245 self.lastlogin = 0
81be6921
FT
246
247 rlre = re.compile(r"Welcome, (.*) ")
c39028a4
FT
248 def dologin(self, pre=None):
249 now = time.time()
250 if now - self.lastlogin < 60:
251 raise Exception("Too soon since last login attempt")
252 if pre is None:
253 with self.web.open(self.base) as hs:
254 page = soupify(hs.read())
255 else:
256 page = pre
81be6921
FT
257
258 cur = page.find("a", id="user_link")
81be6921 259 if cur:
c39028a4 260 m = self.rlre.search(cur.text)
81be6921 261 if not m or m.group(1) != self.creds.username:
81be6921
FT
262 outurl = None
263 nav = page.find("div", id="user_navigation")
264 if nav:
265 for li in nav.findAll("li"):
266 if li.a and "Sign Out" in li.a.string:
267 outurl = li.a["href"]
268 if not outurl:
269 raise pageerror("Could not find logout URL", page)
270 with self.wep.open(outurl) as hs:
271 hs.read()
272 with self.web.open(self.base) as hs:
273 page = soupify(hs.read())
274 else:
81be6921
FT
275 return
276 else:
81be6921
FT
277
278 form = page.find("form", id="login")
c39028a4
FT
279 if not form and pre:
280 return self.dologin()
81be6921
FT
281 values = {}
282 for el in form.findAll("input", type="hidden"):
283 values[el["name"]] = el["value"]
284 values["ips_username"] = self.creds.username
285 values["ips_password"] = self.creds.password
c39028a4 286 values["rememberMe"] = "1"
81be6921
FT
287 values["anonymous"] = "1"
288 req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii"))
289 with self.web.open(req) as hs:
290 page = soupify(hs.read())
291 for resp in page.findAll("p", attrs={"class": "message"}):
292 if resp.strong and "You are now signed in" in resp.strong.string:
293 break
294 else:
295 raise pageerror("Could not log in", page)
c39028a4 296 self.lastlogin = now
81be6921
FT
297
298 def open(self, url):
299 return self.web.open(url)
300
c39028a4
FT
301 def fetch(self, url, headers=None):
302 req = urllib.request.Request(url)
303 if headers is not None:
304 for k, v in headers.items():
305 req.add_header(k, v)
306 with self.open(req) as hs:
81be6921
FT
307 return hs.read()
308
309 def lfetch(self, url, ck):
310 page = soupify(self.fetch(url))
311 if not ck(page):
c39028a4 312 self.dologin(pre=page)
81be6921
FT
313 page = soupify(self.fetch(url))
314 if not ck(page):
315 raise pageerror("Could not verify login status despite having logged in", page)
316 return page
317
08e259d7 318class library(lib.library):
81be6921
FT
319 def __init__(self, *, creds=None):
320 if creds is None:
321 creds = credentials.default()
322 self.base = "http://bato.to/"
323 self.sess = session(self.base, creds)
324 self.lang = "English"
08e259d7
FT
325
326 def byid(self, id):
327 url = self.base + "comic/_/comics/" + id
81be6921 328 page = soupify(self.sess.fetch(url))
08e259d7
FT
329 title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
330 if title is None:
331 raise KeyError(id)
332 return manga(self, id, title.string.strip(), url)
333
24f0a3b7 334 def _search(self, pars):
1043cbdb
FT
335 p = 1
336 while True:
24f0a3b7
FT
337 _pars = dict(pars)
338 _pars["p"] = str(p)
f96b068d 339 resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars))
1043cbdb 340 try:
c0d3b1a2 341 page = soupify(resp.read())
1043cbdb
FT
342 finally:
343 resp.close()
344 rls = page.find("div", id="comic_search_results").table
345 if rls.tbody is not None:
346 rls = rls.tbody
347 hasmore = False
348 for child in rls.findAll("tr"):
349 if child.th is not None: continue
81be6921
FT
350 if child.get("id", "")[:11] == "comic_rowo_": continue
351 if child.get("id") == "show_more_row":
1043cbdb
FT
352 hasmore = True
353 continue
354 link = child.td.strong.a
81be6921 355 url = link["href"]
1043cbdb
FT
356 m = self.rure.search(url)
357 if m is None: raise Exception("Got weird manga URL: %r" % url)
358 id = m.group(1)
359 name = link.text.strip()
1043cbdb
FT
360 yield manga(self, id, name, url)
361 p += 1
362 if not hasmore:
363 break
24f0a3b7
FT
364
365 rure = re.compile(r"/comic/_/([^/]*)$")
366 def search(self, expr):
81be6921 367 return self._search({"name": expr, "name_cond": "c"})
24f0a3b7
FT
368
369 def byname(self, prefix):
81be6921 370 for res in self._search({"name": prefix, "name_cond": "s"}):
24f0a3b7
FT
371 if res.name[:len(prefix)].lower() == prefix.lower():
372 yield res
373 else:
374 for aname in res.altnames():
375 if aname[:len(prefix)].lower() == prefix.lower():
376 yield manga(self, res.id, aname, res.url)
377 break
378 else:
379 if False:
81be6921
FT
380 print("eliding " + res.name)
381 print(res.altnames())