Commit | Line | Data |
---|---|---|
c39028a4 | 1 | import urllib.request, urllib.parse, http.cookiejar, re, bs4, os, time |
81be6921 FT |
2 | from . import profile, lib, htcache |
3 | soup = bs4.BeautifulSoup | |
4 | soupify = lambda cont: soup(cont, "html.parser") | |
5 | ||
6 | class pageerror(Exception): | |
7 | def __init__(self, message, page): | |
8 | super().__init__(message) | |
9 | self.page = page | |
08e259d7 | 10 | |
c39028a4 FT |
11 | def iterlast(itr, default=None): |
12 | if default is not None: | |
13 | ret = default | |
14 | try: | |
15 | while True: | |
16 | ret = next(itr) | |
17 | except StopIteration: | |
18 | return ret | |
19 | ||
20 | def find1(el, *args, **kwargs): | |
21 | ret = el.find(*args, **kwargs) | |
22 | if ret is None: | |
23 | raise pageerror("could not find expected element", iterlast(el.parents, el)) | |
24 | return ret | |
25 | ||
08e259d7 FT |
26 | def byclass(el, name, cl): |
27 | for ch in el.findAll(name): | |
81be6921 FT |
28 | if not isinstance(ch, bs4.Tag): continue |
29 | cll = ch.get("class", []) | |
30 | if cl in cll: | |
08e259d7 FT |
31 | return ch |
32 | return None | |
33 | ||
34 | def nextel(el): | |
35 | while True: | |
36 | el = el.nextSibling | |
81be6921 | 37 | if isinstance(el, bs4.Tag): |
08e259d7 FT |
38 | return el |
39 | ||
c39028a4 FT |
40 | def fetchreader(lib, readerid, page): |
41 | pg = soupify(lib.sess.fetch(lib.base + "areader?" + urllib.parse.urlencode({"id": readerid, "p": str(page)}), | |
42 | headers={"Referer": "http://bato.to/reader"})) | |
43 | return pg | |
44 | ||
08e259d7 | 45 | class page(lib.page): |
c39028a4 | 46 | def __init__(self, chapter, stack, readerid, n): |
08e259d7 | 47 | self.stack = stack |
c39028a4 | 48 | self.lib = chapter.lib |
08e259d7 FT |
49 | self.chapter = chapter |
50 | self.n = n | |
51 | self.id = str(n) | |
81be6921 | 52 | self.name = "Page %s" % n |
c39028a4 | 53 | self.readerid = readerid |
08e259d7 FT |
54 | self.ciurl = None |
55 | ||
56 | def iurl(self): | |
57 | if self.ciurl is None: | |
c39028a4 FT |
58 | page = fetchreader(self.lib, self.readerid, self.n) |
59 | img = find1(page, "img", id="comic_page") | |
81be6921 | 60 | self.ciurl = img["src"] |
08e259d7 FT |
61 | return self.ciurl |
62 | ||
63 | def open(self): | |
64 | return lib.stdimgstream(self.iurl()) | |
65 | ||
66 | def __str__(self): | |
67 | return self.name | |
68 | ||
69 | def __repr(self): | |
70 | return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name) | |
71 | ||
72 | class chapter(lib.pagelist): | |
c39028a4 | 73 | def __init__(self, manga, stack, id, name, readerid): |
08e259d7 FT |
74 | self.stack = stack |
75 | self.manga = manga | |
c39028a4 | 76 | self.lib = manga.lib |
08e259d7 FT |
77 | self.id = id |
78 | self.name = name | |
c39028a4 | 79 | self.readerid = readerid |
08e259d7 FT |
80 | self.cpag = None |
81 | ||
82 | def __getitem__(self, i): | |
83 | return self.pages()[i] | |
84 | ||
85 | def __len__(self): | |
86 | return len(self.pages()) | |
87 | ||
88 | pnre = re.compile(r"page (\d+)") | |
89 | def pages(self): | |
90 | if self.cpag is None: | |
c39028a4 | 91 | pg = fetchreader(self.lib, self.readerid, 1) |
08e259d7 | 92 | cpag = [] |
c39028a4 | 93 | for opt in find1(pg, "select", id="page_select").findAll("option"): |
08e259d7 | 94 | n = int(self.pnre.match(opt.string).group(1)) |
c39028a4 | 95 | cpag.append(page(self, self.stack + [(self, len(cpag))], self.readerid, n)) |
08e259d7 FT |
96 | self.cpag = cpag |
97 | return self.cpag | |
98 | ||
99 | def __str__(self): | |
100 | return self.name | |
101 | ||
102 | def __repr__(self): | |
103 | return "<batoto.chapter %r.%r>" % (self.manga.name, self.name) | |
104 | ||
105 | class manga(lib.manga): | |
106 | def __init__(self, lib, id, name, url): | |
107 | self.lib = lib | |
81be6921 | 108 | self.sess = lib.sess |
08e259d7 FT |
109 | self.id = id |
110 | self.name = name | |
111 | self.url = url | |
112 | self.cch = None | |
113 | self.stack = [] | |
1043cbdb | 114 | self.cnames = None |
08e259d7 FT |
115 | |
116 | def __getitem__(self, i): | |
117 | return self.ch()[i] | |
118 | ||
119 | def __len__(self): | |
120 | return len(self.ch()) | |
121 | ||
81be6921 FT |
122 | @staticmethod |
123 | def vfylogin(page): | |
124 | if page.find("div", id="register_notice"): | |
125 | return False | |
126 | if not byclass(page, "table", "chapters_list"): | |
127 | return False | |
128 | return True | |
129 | ||
c39028a4 | 130 | cure = re.compile(r"/reader#([a-z0-9]+)") |
08e259d7 FT |
131 | def ch(self): |
132 | if self.cch is None: | |
81be6921 FT |
133 | page = self.sess.lfetch(self.url, self.vfylogin) |
134 | cls = byclass(page, "table", "chapters_list") | |
08e259d7 FT |
135 | if cls.tbody is not None: |
136 | cls = cls.tbody | |
81be6921 | 137 | scl = "lang_" + self.lib.lang |
08e259d7 FT |
138 | cch = [] |
139 | for ch in cls.childGenerator(): | |
81be6921 FT |
140 | if isinstance(ch, bs4.Tag) and ch.name == "tr": |
141 | cll = ch.get("class", []) | |
142 | if "row" in cll and scl in cll: | |
143 | url = ch.td.a["href"] | |
08e259d7 | 144 | m = self.cure.search(url) |
81be6921 | 145 | if m is None: raise pageerror("Got weird chapter URL: %r" % url, page) |
c39028a4 | 146 | readerid = m.group(1) |
08e259d7 | 147 | name = ch.td.a.text |
c39028a4 | 148 | cch.append((readerid, name)) |
08e259d7 | 149 | cch.reverse() |
687f2ed3 | 150 | rch = [] |
c39028a4 FT |
151 | for n, (readerid, name) in enumerate(cch): |
152 | rch.append(chapter(self, [(self, n)], readerid, name, readerid)) | |
687f2ed3 | 153 | self.cch = rch |
08e259d7 FT |
154 | return self.cch |
155 | ||
1043cbdb FT |
156 | def altnames(self): |
157 | if self.cnames is None: | |
81be6921 | 158 | page = soupify(self.sess.fetch(self.url)) |
1043cbdb FT |
159 | cnames = None |
160 | for tbl in page.findAll("table", attrs={"class": "ipb_table"}): | |
161 | if tbl.tbody is not None: tbl = tbl.tbody | |
162 | for tr in tbl.findAll("tr"): | |
81be6921 | 163 | if "Alt Names:" in tr.td.text: |
1043cbdb | 164 | nls = nextel(tr.td) |
81be6921 FT |
165 | if nls.name != "td" or nls.span is None: |
166 | raise pageerror("Weird altnames table in " + self.id, page) | |
1043cbdb FT |
167 | cnames = [nm.text.strip() for nm in nls.findAll("span")] |
168 | break | |
169 | if cnames is not None: | |
170 | break | |
171 | if cnames is None: | |
81be6921 | 172 | raise pageerror("Could not find altnames for " + self.id, page) |
1043cbdb FT |
173 | self.cnames = cnames |
174 | return self.cnames | |
175 | ||
08e259d7 FT |
176 | def __str__(self): |
177 | return self.name | |
178 | ||
179 | def __repr__(self): | |
180 | return "<batoto.manga %r>" % self.name | |
181 | ||
81be6921 FT |
182 | class credentials(object): |
183 | def __init__(self, username, password): | |
184 | self.username = username | |
185 | self.password = password | |
186 | ||
187 | @classmethod | |
188 | def fromfile(cls, path): | |
189 | username, password = None, None | |
190 | with open(path) as fp: | |
191 | for words in profile.splitlines(fp): | |
192 | if words[0] == "username": | |
193 | username = words[1] | |
194 | elif words[0] == "password": | |
195 | password = words[1] | |
196 | elif words[0] == "pass64": | |
197 | import binascii | |
198 | password = binascii.a2b_base64(words[1]).decode("utf8") | |
199 | if None in (username, password): | |
200 | raise ValueError("Incomplete profile: " + path) | |
201 | return cls(username, password) | |
202 | ||
203 | @classmethod | |
204 | def default(cls): | |
205 | path = os.path.join(profile.confdir, "batoto") | |
206 | if os.path.exists(path): | |
207 | return cls.fromfile(path) | |
208 | return None | |
209 | ||
210 | class session(object): | |
211 | def __init__(self, base, credentials): | |
212 | self.base = base | |
213 | self.creds = credentials | |
214 | self.jar = http.cookiejar.CookieJar() | |
215 | self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar)) | |
c39028a4 | 216 | self.lastlogin = 0 |
81be6921 FT |
217 | |
218 | rlre = re.compile(r"Welcome, (.*) ") | |
c39028a4 FT |
219 | def dologin(self, pre=None): |
220 | now = time.time() | |
221 | if now - self.lastlogin < 60: | |
222 | raise Exception("Too soon since last login attempt") | |
223 | if pre is None: | |
224 | with self.web.open(self.base) as hs: | |
225 | page = soupify(hs.read()) | |
226 | else: | |
227 | page = pre | |
81be6921 FT |
228 | |
229 | cur = page.find("a", id="user_link") | |
81be6921 | 230 | if cur: |
c39028a4 | 231 | m = self.rlre.search(cur.text) |
81be6921 | 232 | if not m or m.group(1) != self.creds.username: |
81be6921 FT |
233 | outurl = None |
234 | nav = page.find("div", id="user_navigation") | |
235 | if nav: | |
236 | for li in nav.findAll("li"): | |
237 | if li.a and "Sign Out" in li.a.string: | |
238 | outurl = li.a["href"] | |
239 | if not outurl: | |
240 | raise pageerror("Could not find logout URL", page) | |
241 | with self.wep.open(outurl) as hs: | |
242 | hs.read() | |
243 | with self.web.open(self.base) as hs: | |
244 | page = soupify(hs.read()) | |
245 | else: | |
81be6921 FT |
246 | return |
247 | else: | |
81be6921 FT |
248 | |
249 | form = page.find("form", id="login") | |
c39028a4 FT |
250 | if not form and pre: |
251 | return self.dologin() | |
81be6921 FT |
252 | values = {} |
253 | for el in form.findAll("input", type="hidden"): | |
254 | values[el["name"]] = el["value"] | |
255 | values["ips_username"] = self.creds.username | |
256 | values["ips_password"] = self.creds.password | |
c39028a4 | 257 | values["rememberMe"] = "1" |
81be6921 FT |
258 | values["anonymous"] = "1" |
259 | req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii")) | |
260 | with self.web.open(req) as hs: | |
261 | page = soupify(hs.read()) | |
262 | for resp in page.findAll("p", attrs={"class": "message"}): | |
263 | if resp.strong and "You are now signed in" in resp.strong.string: | |
264 | break | |
265 | else: | |
266 | raise pageerror("Could not log in", page) | |
c39028a4 | 267 | self.lastlogin = now |
81be6921 FT |
268 | |
269 | def open(self, url): | |
270 | return self.web.open(url) | |
271 | ||
c39028a4 FT |
272 | def fetch(self, url, headers=None): |
273 | req = urllib.request.Request(url) | |
274 | if headers is not None: | |
275 | for k, v in headers.items(): | |
276 | req.add_header(k, v) | |
277 | with self.open(req) as hs: | |
81be6921 FT |
278 | return hs.read() |
279 | ||
280 | def lfetch(self, url, ck): | |
281 | page = soupify(self.fetch(url)) | |
282 | if not ck(page): | |
c39028a4 | 283 | self.dologin(pre=page) |
81be6921 FT |
284 | page = soupify(self.fetch(url)) |
285 | if not ck(page): | |
286 | raise pageerror("Could not verify login status despite having logged in", page) | |
287 | return page | |
288 | ||
08e259d7 | 289 | class library(lib.library): |
81be6921 FT |
290 | def __init__(self, *, creds=None): |
291 | if creds is None: | |
292 | creds = credentials.default() | |
293 | self.base = "http://bato.to/" | |
294 | self.sess = session(self.base, creds) | |
295 | self.lang = "English" | |
08e259d7 FT |
296 | |
297 | def byid(self, id): | |
298 | url = self.base + "comic/_/comics/" + id | |
81be6921 | 299 | page = soupify(self.sess.fetch(url)) |
08e259d7 FT |
300 | title = page.find("h1", attrs={"class": "ipsType_pagetitle"}) |
301 | if title is None: | |
302 | raise KeyError(id) | |
303 | return manga(self, id, title.string.strip(), url) | |
304 | ||
24f0a3b7 | 305 | def _search(self, pars): |
1043cbdb FT |
306 | p = 1 |
307 | while True: | |
24f0a3b7 FT |
308 | _pars = dict(pars) |
309 | _pars["p"] = str(p) | |
f96b068d | 310 | resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars)) |
1043cbdb | 311 | try: |
c0d3b1a2 | 312 | page = soupify(resp.read()) |
1043cbdb FT |
313 | finally: |
314 | resp.close() | |
315 | rls = page.find("div", id="comic_search_results").table | |
316 | if rls.tbody is not None: | |
317 | rls = rls.tbody | |
318 | hasmore = False | |
319 | for child in rls.findAll("tr"): | |
320 | if child.th is not None: continue | |
81be6921 FT |
321 | if child.get("id", "")[:11] == "comic_rowo_": continue |
322 | if child.get("id") == "show_more_row": | |
1043cbdb FT |
323 | hasmore = True |
324 | continue | |
325 | link = child.td.strong.a | |
81be6921 | 326 | url = link["href"] |
1043cbdb FT |
327 | m = self.rure.search(url) |
328 | if m is None: raise Exception("Got weird manga URL: %r" % url) | |
329 | id = m.group(1) | |
330 | name = link.text.strip() | |
1043cbdb FT |
331 | yield manga(self, id, name, url) |
332 | p += 1 | |
333 | if not hasmore: | |
334 | break | |
24f0a3b7 FT |
335 | |
336 | rure = re.compile(r"/comic/_/([^/]*)$") | |
337 | def search(self, expr): | |
81be6921 | 338 | return self._search({"name": expr, "name_cond": "c"}) |
24f0a3b7 FT |
339 | |
340 | def byname(self, prefix): | |
81be6921 | 341 | for res in self._search({"name": prefix, "name_cond": "s"}): |
24f0a3b7 FT |
342 | if res.name[:len(prefix)].lower() == prefix.lower(): |
343 | yield res | |
344 | else: | |
345 | for aname in res.altnames(): | |
346 | if aname[:len(prefix)].lower() == prefix.lower(): | |
347 | yield manga(self, res.id, aname, res.url) | |
348 | break | |
349 | else: | |
350 | if False: | |
81be6921 FT |
351 | print("eliding " + res.name) |
352 | print(res.altnames()) |