Fixed Batoto search.
[automanga.git] / manga / batoto.py
CommitLineData
c39028a4 1import urllib.request, urllib.parse, http.cookiejar, re, bs4, os, time
81be6921
FT
2from . import profile, lib, htcache
3soup = bs4.BeautifulSoup
4soupify = lambda cont: soup(cont, "html.parser")
5
6class pageerror(Exception):
7 def __init__(self, message, page):
8 super().__init__(message)
9 self.page = page
08e259d7 10
c39028a4
FT
11def iterlast(itr, default=None):
12 if default is not None:
13 ret = default
14 try:
15 while True:
16 ret = next(itr)
17 except StopIteration:
18 return ret
19
20def find1(el, *args, **kwargs):
21 ret = el.find(*args, **kwargs)
22 if ret is None:
23 raise pageerror("could not find expected element", iterlast(el.parents, el))
24 return ret
25
08e259d7
FT
26def byclass(el, name, cl):
27 for ch in el.findAll(name):
81be6921
FT
28 if not isinstance(ch, bs4.Tag): continue
29 cll = ch.get("class", [])
30 if cl in cll:
08e259d7
FT
31 return ch
32 return None
33
34def nextel(el):
35 while True:
36 el = el.nextSibling
81be6921 37 if isinstance(el, bs4.Tag):
08e259d7
FT
38 return el
39
c39028a4
FT
40def fetchreader(lib, readerid, page):
41 pg = soupify(lib.sess.fetch(lib.base + "areader?" + urllib.parse.urlencode({"id": readerid, "p": str(page)}),
42 headers={"Referer": "http://bato.to/reader"}))
43 return pg
44
08e259d7 45class page(lib.page):
c39028a4 46 def __init__(self, chapter, stack, readerid, n):
08e259d7 47 self.stack = stack
c39028a4 48 self.lib = chapter.lib
08e259d7
FT
49 self.chapter = chapter
50 self.n = n
51 self.id = str(n)
81be6921 52 self.name = "Page %s" % n
c39028a4 53 self.readerid = readerid
08e259d7
FT
54 self.ciurl = None
55
56 def iurl(self):
57 if self.ciurl is None:
c39028a4
FT
58 page = fetchreader(self.lib, self.readerid, self.n)
59 img = find1(page, "img", id="comic_page")
81be6921 60 self.ciurl = img["src"]
08e259d7
FT
61 return self.ciurl
62
63 def open(self):
64 return lib.stdimgstream(self.iurl())
65
66 def __str__(self):
67 return self.name
68
69 def __repr(self):
70 return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
71
72class chapter(lib.pagelist):
c39028a4 73 def __init__(self, manga, stack, id, name, readerid):
08e259d7
FT
74 self.stack = stack
75 self.manga = manga
c39028a4 76 self.lib = manga.lib
08e259d7
FT
77 self.id = id
78 self.name = name
c39028a4 79 self.readerid = readerid
08e259d7
FT
80 self.cpag = None
81
82 def __getitem__(self, i):
83 return self.pages()[i]
84
85 def __len__(self):
86 return len(self.pages())
87
88 pnre = re.compile(r"page (\d+)")
89 def pages(self):
90 if self.cpag is None:
c39028a4 91 pg = fetchreader(self.lib, self.readerid, 1)
08e259d7 92 cpag = []
c39028a4 93 for opt in find1(pg, "select", id="page_select").findAll("option"):
08e259d7 94 n = int(self.pnre.match(opt.string).group(1))
c39028a4 95 cpag.append(page(self, self.stack + [(self, len(cpag))], self.readerid, n))
08e259d7
FT
96 self.cpag = cpag
97 return self.cpag
98
99 def __str__(self):
100 return self.name
101
102 def __repr__(self):
103 return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
104
105class manga(lib.manga):
106 def __init__(self, lib, id, name, url):
107 self.lib = lib
81be6921 108 self.sess = lib.sess
08e259d7
FT
109 self.id = id
110 self.name = name
111 self.url = url
112 self.cch = None
113 self.stack = []
1043cbdb 114 self.cnames = None
08e259d7
FT
115
116 def __getitem__(self, i):
117 return self.ch()[i]
118
119 def __len__(self):
120 return len(self.ch())
121
81be6921
FT
122 @staticmethod
123 def vfylogin(page):
124 if page.find("div", id="register_notice"):
125 return False
126 if not byclass(page, "table", "chapters_list"):
127 return False
128 return True
129
c39028a4 130 cure = re.compile(r"/reader#([a-z0-9]+)")
08e259d7
FT
131 def ch(self):
132 if self.cch is None:
81be6921
FT
133 page = self.sess.lfetch(self.url, self.vfylogin)
134 cls = byclass(page, "table", "chapters_list")
08e259d7
FT
135 if cls.tbody is not None:
136 cls = cls.tbody
81be6921 137 scl = "lang_" + self.lib.lang
08e259d7
FT
138 cch = []
139 for ch in cls.childGenerator():
81be6921
FT
140 if isinstance(ch, bs4.Tag) and ch.name == "tr":
141 cll = ch.get("class", [])
142 if "row" in cll and scl in cll:
143 url = ch.td.a["href"]
08e259d7 144 m = self.cure.search(url)
81be6921 145 if m is None: raise pageerror("Got weird chapter URL: %r" % url, page)
c39028a4 146 readerid = m.group(1)
08e259d7 147 name = ch.td.a.text
c39028a4 148 cch.append((readerid, name))
08e259d7 149 cch.reverse()
687f2ed3 150 rch = []
c39028a4
FT
151 for n, (readerid, name) in enumerate(cch):
152 rch.append(chapter(self, [(self, n)], readerid, name, readerid))
687f2ed3 153 self.cch = rch
08e259d7
FT
154 return self.cch
155
1043cbdb
FT
156 def altnames(self):
157 if self.cnames is None:
81be6921 158 page = soupify(self.sess.fetch(self.url))
1043cbdb
FT
159 cnames = None
160 for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
161 if tbl.tbody is not None: tbl = tbl.tbody
162 for tr in tbl.findAll("tr"):
81be6921 163 if "Alt Names:" in tr.td.text:
1043cbdb 164 nls = nextel(tr.td)
81be6921
FT
165 if nls.name != "td" or nls.span is None:
166 raise pageerror("Weird altnames table in " + self.id, page)
1043cbdb
FT
167 cnames = [nm.text.strip() for nm in nls.findAll("span")]
168 break
169 if cnames is not None:
170 break
171 if cnames is None:
81be6921 172 raise pageerror("Could not find altnames for " + self.id, page)
1043cbdb
FT
173 self.cnames = cnames
174 return self.cnames
175
08e259d7
FT
176 def __str__(self):
177 return self.name
178
179 def __repr__(self):
180 return "<batoto.manga %r>" % self.name
181
81be6921
FT
182class credentials(object):
183 def __init__(self, username, password):
184 self.username = username
185 self.password = password
186
187 @classmethod
188 def fromfile(cls, path):
189 username, password = None, None
190 with open(path) as fp:
191 for words in profile.splitlines(fp):
192 if words[0] == "username":
193 username = words[1]
194 elif words[0] == "password":
195 password = words[1]
196 elif words[0] == "pass64":
197 import binascii
198 password = binascii.a2b_base64(words[1]).decode("utf8")
199 if None in (username, password):
200 raise ValueError("Incomplete profile: " + path)
201 return cls(username, password)
202
203 @classmethod
204 def default(cls):
205 path = os.path.join(profile.confdir, "batoto")
206 if os.path.exists(path):
207 return cls.fromfile(path)
208 return None
209
210class session(object):
211 def __init__(self, base, credentials):
212 self.base = base
213 self.creds = credentials
214 self.jar = http.cookiejar.CookieJar()
215 self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar))
c39028a4 216 self.lastlogin = 0
81be6921
FT
217
218 rlre = re.compile(r"Welcome, (.*) ")
c39028a4
FT
219 def dologin(self, pre=None):
220 now = time.time()
221 if now - self.lastlogin < 60:
222 raise Exception("Too soon since last login attempt")
223 if pre is None:
224 with self.web.open(self.base) as hs:
225 page = soupify(hs.read())
226 else:
227 page = pre
81be6921
FT
228
229 cur = page.find("a", id="user_link")
81be6921 230 if cur:
c39028a4 231 m = self.rlre.search(cur.text)
81be6921 232 if not m or m.group(1) != self.creds.username:
81be6921
FT
233 outurl = None
234 nav = page.find("div", id="user_navigation")
235 if nav:
236 for li in nav.findAll("li"):
237 if li.a and "Sign Out" in li.a.string:
238 outurl = li.a["href"]
239 if not outurl:
240 raise pageerror("Could not find logout URL", page)
241 with self.wep.open(outurl) as hs:
242 hs.read()
243 with self.web.open(self.base) as hs:
244 page = soupify(hs.read())
245 else:
81be6921
FT
246 return
247 else:
81be6921
FT
248
249 form = page.find("form", id="login")
c39028a4
FT
250 if not form and pre:
251 return self.dologin()
81be6921
FT
252 values = {}
253 for el in form.findAll("input", type="hidden"):
254 values[el["name"]] = el["value"]
255 values["ips_username"] = self.creds.username
256 values["ips_password"] = self.creds.password
c39028a4 257 values["rememberMe"] = "1"
81be6921
FT
258 values["anonymous"] = "1"
259 req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii"))
260 with self.web.open(req) as hs:
261 page = soupify(hs.read())
262 for resp in page.findAll("p", attrs={"class": "message"}):
263 if resp.strong and "You are now signed in" in resp.strong.string:
264 break
265 else:
266 raise pageerror("Could not log in", page)
c39028a4 267 self.lastlogin = now
81be6921
FT
268
269 def open(self, url):
270 return self.web.open(url)
271
c39028a4
FT
272 def fetch(self, url, headers=None):
273 req = urllib.request.Request(url)
274 if headers is not None:
275 for k, v in headers.items():
276 req.add_header(k, v)
277 with self.open(req) as hs:
81be6921
FT
278 return hs.read()
279
280 def lfetch(self, url, ck):
281 page = soupify(self.fetch(url))
282 if not ck(page):
c39028a4 283 self.dologin(pre=page)
81be6921
FT
284 page = soupify(self.fetch(url))
285 if not ck(page):
286 raise pageerror("Could not verify login status despite having logged in", page)
287 return page
288
08e259d7 289class library(lib.library):
81be6921
FT
290 def __init__(self, *, creds=None):
291 if creds is None:
292 creds = credentials.default()
293 self.base = "http://bato.to/"
294 self.sess = session(self.base, creds)
295 self.lang = "English"
08e259d7
FT
296
297 def byid(self, id):
298 url = self.base + "comic/_/comics/" + id
81be6921 299 page = soupify(self.sess.fetch(url))
08e259d7
FT
300 title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
301 if title is None:
302 raise KeyError(id)
303 return manga(self, id, title.string.strip(), url)
304
24f0a3b7 305 def _search(self, pars):
1043cbdb
FT
306 p = 1
307 while True:
24f0a3b7
FT
308 _pars = dict(pars)
309 _pars["p"] = str(p)
f96b068d 310 resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars))
1043cbdb 311 try:
c0d3b1a2 312 page = soupify(resp.read())
1043cbdb
FT
313 finally:
314 resp.close()
315 rls = page.find("div", id="comic_search_results").table
316 if rls.tbody is not None:
317 rls = rls.tbody
318 hasmore = False
319 for child in rls.findAll("tr"):
320 if child.th is not None: continue
81be6921
FT
321 if child.get("id", "")[:11] == "comic_rowo_": continue
322 if child.get("id") == "show_more_row":
1043cbdb
FT
323 hasmore = True
324 continue
325 link = child.td.strong.a
81be6921 326 url = link["href"]
1043cbdb
FT
327 m = self.rure.search(url)
328 if m is None: raise Exception("Got weird manga URL: %r" % url)
329 id = m.group(1)
330 name = link.text.strip()
1043cbdb
FT
331 yield manga(self, id, name, url)
332 p += 1
333 if not hasmore:
334 break
24f0a3b7
FT
335
336 rure = re.compile(r"/comic/_/([^/]*)$")
337 def search(self, expr):
81be6921 338 return self._search({"name": expr, "name_cond": "c"})
24f0a3b7
FT
339
340 def byname(self, prefix):
81be6921 341 for res in self._search({"name": prefix, "name_cond": "s"}):
24f0a3b7
FT
342 if res.name[:len(prefix)].lower() == prefix.lower():
343 yield res
344 else:
345 for aname in res.altnames():
346 if aname[:len(prefix)].lower() == prefix.lower():
347 yield manga(self, res.id, aname, res.url)
348 break
349 else:
350 if False:
81be6921
FT
351 print("eliding " + res.name)
352 print(res.altnames())