Commit | Line | Data |
---|---|---|
81be6921 FT |
1 | import urllib.request, urllib.parse, http.cookiejar, re, bs4, os |
2 | from . import profile, lib, htcache | |
3 | soup = bs4.BeautifulSoup | |
4 | soupify = lambda cont: soup(cont, "html.parser") | |
5 | ||
6 | class pageerror(Exception): | |
7 | def __init__(self, message, page): | |
8 | super().__init__(message) | |
9 | self.page = page | |
08e259d7 FT |
10 | |
11 | def byclass(el, name, cl): | |
12 | for ch in el.findAll(name): | |
81be6921 FT |
13 | if not isinstance(ch, bs4.Tag): continue |
14 | cll = ch.get("class", []) | |
15 | if cl in cll: | |
08e259d7 FT |
16 | return ch |
17 | return None | |
18 | ||
19 | def nextel(el): | |
20 | while True: | |
21 | el = el.nextSibling | |
81be6921 | 22 | if isinstance(el, bs4.Tag): |
08e259d7 FT |
23 | return el |
24 | ||
25 | class page(lib.page): | |
26 | def __init__(self, chapter, stack, n, url): | |
27 | self.stack = stack | |
28 | self.chapter = chapter | |
29 | self.n = n | |
30 | self.id = str(n) | |
81be6921 | 31 | self.name = "Page %s" % n |
08e259d7 FT |
32 | self.url = url |
33 | self.ciurl = None | |
34 | ||
35 | def iurl(self): | |
36 | if self.ciurl is None: | |
c0d3b1a2 | 37 | page = soupify(htcache.fetch(self.url)) |
08e259d7 | 38 | img = nextel(page.find("div", id="full_image")).img |
81be6921 | 39 | self.ciurl = img["src"] |
08e259d7 FT |
40 | return self.ciurl |
41 | ||
42 | def open(self): | |
43 | return lib.stdimgstream(self.iurl()) | |
44 | ||
45 | def __str__(self): | |
46 | return self.name | |
47 | ||
48 | def __repr(self): | |
49 | return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name) | |
50 | ||
51 | class chapter(lib.pagelist): | |
52 | def __init__(self, manga, stack, id, name, url): | |
53 | self.stack = stack | |
54 | self.manga = manga | |
55 | self.id = id | |
56 | self.name = name | |
57 | self.url = url | |
58 | self.cpag = None | |
59 | ||
60 | def __getitem__(self, i): | |
61 | return self.pages()[i] | |
62 | ||
63 | def __len__(self): | |
64 | return len(self.pages()) | |
65 | ||
66 | pnre = re.compile(r"page (\d+)") | |
67 | def pages(self): | |
68 | if self.cpag is None: | |
c0d3b1a2 | 69 | pg = soupify(htcache.fetch(self.url)) |
08e259d7 FT |
70 | cpag = [] |
71 | for opt in pg.find("select", id="page_select").findAll("option"): | |
81be6921 | 72 | url = opt["value"] |
08e259d7 FT |
73 | n = int(self.pnre.match(opt.string).group(1)) |
74 | cpag.append(page(self, self.stack + [(self, len(cpag))], n, url)) | |
75 | self.cpag = cpag | |
76 | return self.cpag | |
77 | ||
78 | def __str__(self): | |
79 | return self.name | |
80 | ||
81 | def __repr__(self): | |
82 | return "<batoto.chapter %r.%r>" % (self.manga.name, self.name) | |
83 | ||
84 | class manga(lib.manga): | |
85 | def __init__(self, lib, id, name, url): | |
86 | self.lib = lib | |
81be6921 | 87 | self.sess = lib.sess |
08e259d7 FT |
88 | self.id = id |
89 | self.name = name | |
90 | self.url = url | |
91 | self.cch = None | |
92 | self.stack = [] | |
1043cbdb | 93 | self.cnames = None |
08e259d7 FT |
94 | |
95 | def __getitem__(self, i): | |
96 | return self.ch()[i] | |
97 | ||
98 | def __len__(self): | |
99 | return len(self.ch()) | |
100 | ||
81be6921 FT |
101 | @staticmethod |
102 | def vfylogin(page): | |
103 | if page.find("div", id="register_notice"): | |
104 | return False | |
105 | if not byclass(page, "table", "chapters_list"): | |
106 | return False | |
107 | return True | |
108 | ||
08e259d7 FT |
109 | cure = re.compile(r"/read/_/(\d+)/[^/]*") |
110 | def ch(self): | |
111 | if self.cch is None: | |
81be6921 FT |
112 | page = self.sess.lfetch(self.url, self.vfylogin) |
113 | cls = byclass(page, "table", "chapters_list") | |
08e259d7 FT |
114 | if cls.tbody is not None: |
115 | cls = cls.tbody | |
81be6921 | 116 | scl = "lang_" + self.lib.lang |
08e259d7 FT |
117 | cch = [] |
118 | for ch in cls.childGenerator(): | |
81be6921 FT |
119 | if isinstance(ch, bs4.Tag) and ch.name == "tr": |
120 | cll = ch.get("class", []) | |
121 | if "row" in cll and scl in cll: | |
122 | url = ch.td.a["href"] | |
08e259d7 | 123 | m = self.cure.search(url) |
81be6921 | 124 | if m is None: raise pageerror("Got weird chapter URL: %r" % url, page) |
08e259d7 FT |
125 | cid = m.group(1) |
126 | url = self.lib.base + "read/_/" + cid | |
127 | name = ch.td.a.text | |
687f2ed3 | 128 | cch.append((cid, name, url)) |
08e259d7 | 129 | cch.reverse() |
687f2ed3 FT |
130 | rch = [] |
131 | for n, (cid, name, url) in enumerate(cch): | |
132 | rch.append(chapter(self, [(self, n)], cid, name, url)) | |
133 | self.cch = rch | |
08e259d7 FT |
134 | return self.cch |
135 | ||
1043cbdb FT |
136 | def altnames(self): |
137 | if self.cnames is None: | |
81be6921 | 138 | page = soupify(self.sess.fetch(self.url)) |
1043cbdb FT |
139 | cnames = None |
140 | for tbl in page.findAll("table", attrs={"class": "ipb_table"}): | |
141 | if tbl.tbody is not None: tbl = tbl.tbody | |
142 | for tr in tbl.findAll("tr"): | |
81be6921 | 143 | if "Alt Names:" in tr.td.text: |
1043cbdb | 144 | nls = nextel(tr.td) |
81be6921 FT |
145 | if nls.name != "td" or nls.span is None: |
146 | raise pageerror("Weird altnames table in " + self.id, page) | |
1043cbdb FT |
147 | cnames = [nm.text.strip() for nm in nls.findAll("span")] |
148 | break | |
149 | if cnames is not None: | |
150 | break | |
151 | if cnames is None: | |
81be6921 | 152 | raise pageerror("Could not find altnames for " + self.id, page) |
1043cbdb FT |
153 | self.cnames = cnames |
154 | return self.cnames | |
155 | ||
08e259d7 FT |
156 | def __str__(self): |
157 | return self.name | |
158 | ||
159 | def __repr__(self): | |
160 | return "<batoto.manga %r>" % self.name | |
161 | ||
81be6921 FT |
162 | class credentials(object): |
163 | def __init__(self, username, password): | |
164 | self.username = username | |
165 | self.password = password | |
166 | ||
167 | @classmethod | |
168 | def fromfile(cls, path): | |
169 | username, password = None, None | |
170 | with open(path) as fp: | |
171 | for words in profile.splitlines(fp): | |
172 | if words[0] == "username": | |
173 | username = words[1] | |
174 | elif words[0] == "password": | |
175 | password = words[1] | |
176 | elif words[0] == "pass64": | |
177 | import binascii | |
178 | password = binascii.a2b_base64(words[1]).decode("utf8") | |
179 | if None in (username, password): | |
180 | raise ValueError("Incomplete profile: " + path) | |
181 | return cls(username, password) | |
182 | ||
183 | @classmethod | |
184 | def default(cls): | |
185 | path = os.path.join(profile.confdir, "batoto") | |
186 | if os.path.exists(path): | |
187 | return cls.fromfile(path) | |
188 | return None | |
189 | ||
190 | class session(object): | |
191 | def __init__(self, base, credentials): | |
192 | self.base = base | |
193 | self.creds = credentials | |
194 | self.jar = http.cookiejar.CookieJar() | |
195 | self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar)) | |
196 | self.loggedin = False | |
197 | ||
198 | rlre = re.compile(r"Welcome, (.*) ") | |
199 | def dologin(self): | |
200 | with self.web.open(self.base) as hs: | |
201 | page = soupify(hs.read()) | |
202 | ||
203 | cur = page.find("a", id="user_link") | |
204 | print(0) | |
205 | if cur: | |
206 | m = self.rlre.search(cur.get_text()) | |
207 | if not m or m.group(1) != self.creds.username: | |
208 | print(1) | |
209 | outurl = None | |
210 | nav = page.find("div", id="user_navigation") | |
211 | if nav: | |
212 | for li in nav.findAll("li"): | |
213 | if li.a and "Sign Out" in li.a.string: | |
214 | outurl = li.a["href"] | |
215 | if not outurl: | |
216 | raise pageerror("Could not find logout URL", page) | |
217 | with self.wep.open(outurl) as hs: | |
218 | hs.read() | |
219 | with self.web.open(self.base) as hs: | |
220 | page = soupify(hs.read()) | |
221 | else: | |
222 | print(2) | |
223 | return | |
224 | else: | |
225 | print(3) | |
226 | ||
227 | form = page.find("form", id="login") | |
228 | values = {} | |
229 | for el in form.findAll("input", type="hidden"): | |
230 | values[el["name"]] = el["value"] | |
231 | values["ips_username"] = self.creds.username | |
232 | values["ips_password"] = self.creds.password | |
233 | values["anonymous"] = "1" | |
234 | req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii")) | |
235 | with self.web.open(req) as hs: | |
236 | page = soupify(hs.read()) | |
237 | for resp in page.findAll("p", attrs={"class": "message"}): | |
238 | if resp.strong and "You are now signed in" in resp.strong.string: | |
239 | break | |
240 | else: | |
241 | raise pageerror("Could not log in", page) | |
242 | ||
243 | def login(self): | |
244 | if not self.loggedin: | |
245 | if self.creds: | |
246 | self.dologin() | |
247 | self.loggedin = True | |
248 | ||
249 | def open(self, url): | |
250 | return self.web.open(url) | |
251 | ||
252 | def fetch(self, url): | |
253 | with self.open(url) as hs: | |
254 | return hs.read() | |
255 | ||
256 | def lfetch(self, url, ck): | |
257 | page = soupify(self.fetch(url)) | |
258 | if not ck(page): | |
259 | self.login() | |
260 | page = soupify(self.fetch(url)) | |
261 | if not ck(page): | |
262 | raise pageerror("Could not verify login status despite having logged in", page) | |
263 | return page | |
264 | ||
08e259d7 | 265 | class library(lib.library): |
81be6921 FT |
266 | def __init__(self, *, creds=None): |
267 | if creds is None: | |
268 | creds = credentials.default() | |
269 | self.base = "http://bato.to/" | |
270 | self.sess = session(self.base, creds) | |
271 | self.lang = "English" | |
08e259d7 FT |
272 | |
273 | def byid(self, id): | |
274 | url = self.base + "comic/_/comics/" + id | |
81be6921 | 275 | page = soupify(self.sess.fetch(url)) |
08e259d7 FT |
276 | title = page.find("h1", attrs={"class": "ipsType_pagetitle"}) |
277 | if title is None: | |
278 | raise KeyError(id) | |
279 | return manga(self, id, title.string.strip(), url) | |
280 | ||
24f0a3b7 | 281 | def _search(self, pars): |
1043cbdb FT |
282 | p = 1 |
283 | while True: | |
24f0a3b7 FT |
284 | _pars = dict(pars) |
285 | _pars["p"] = str(p) | |
81be6921 | 286 | resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars).encode("ascii")) |
1043cbdb | 287 | try: |
c0d3b1a2 | 288 | page = soupify(resp.read()) |
1043cbdb FT |
289 | finally: |
290 | resp.close() | |
291 | rls = page.find("div", id="comic_search_results").table | |
292 | if rls.tbody is not None: | |
293 | rls = rls.tbody | |
294 | hasmore = False | |
295 | for child in rls.findAll("tr"): | |
296 | if child.th is not None: continue | |
81be6921 FT |
297 | if child.get("id", "")[:11] == "comic_rowo_": continue |
298 | if child.get("id") == "show_more_row": | |
1043cbdb FT |
299 | hasmore = True |
300 | continue | |
301 | link = child.td.strong.a | |
81be6921 | 302 | url = link["href"] |
1043cbdb FT |
303 | m = self.rure.search(url) |
304 | if m is None: raise Exception("Got weird manga URL: %r" % url) | |
305 | id = m.group(1) | |
306 | name = link.text.strip() | |
1043cbdb FT |
307 | yield manga(self, id, name, url) |
308 | p += 1 | |
309 | if not hasmore: | |
310 | break | |
24f0a3b7 FT |
311 | |
312 | rure = re.compile(r"/comic/_/([^/]*)$") | |
313 | def search(self, expr): | |
81be6921 | 314 | return self._search({"name": expr, "name_cond": "c"}) |
24f0a3b7 FT |
315 | |
316 | def byname(self, prefix): | |
81be6921 | 317 | for res in self._search({"name": prefix, "name_cond": "s"}): |
24f0a3b7 FT |
318 | if res.name[:len(prefix)].lower() == prefix.lower(): |
319 | yield res | |
320 | else: | |
321 | for aname in res.altnames(): | |
322 | if aname[:len(prefix)].lower() == prefix.lower(): | |
323 | yield manga(self, res.id, aname, res.url) | |
324 | break | |
325 | else: | |
326 | if False: | |
81be6921 FT |
327 | print("eliding " + res.name) |
328 | print(res.altnames()) |