1 import urllib.request, urllib.parse, http.cookiejar, re, bs4, os
2 from . import profile, lib, htcache
3 soup = bs4.BeautifulSoup
4 soupify = lambda cont: soup(cont, "html.parser")
6 class pageerror(Exception):
7 def __init__(self, message, page):
8 super().__init__(message)
11 def byclass(el, name, cl):
12 for ch in el.findAll(name):
13 if not isinstance(ch, bs4.Tag): continue
14 cll = ch.get("class", [])
22 if isinstance(el, bs4.Tag):
26 def __init__(self, chapter, stack, n, url):
28 self.chapter = chapter
31 self.name = "Page %s" % n
36 if self.ciurl is None:
37 page = soupify(htcache.fetch(self.url))
38 img = nextel(page.find("div", id="full_image")).img
39 self.ciurl = img["src"]
43 return lib.stdimgstream(self.iurl())
49 return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
51 class chapter(lib.pagelist):
52 def __init__(self, manga, stack, id, name, url):
60 def __getitem__(self, i):
61 return self.pages()[i]
64 return len(self.pages())
66 pnre = re.compile(r"page (\d+)")
69 pg = soupify(htcache.fetch(self.url))
71 for opt in pg.find("select", id="page_select").findAll("option"):
73 n = int(self.pnre.match(opt.string).group(1))
74 cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
82 return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
84 class manga(lib.manga):
85 def __init__(self, lib, id, name, url):
95 def __getitem__(self, i):
103 if page.find("div", id="register_notice"):
105 if not byclass(page, "table", "chapters_list"):
109 cure = re.compile(r"/read/_/(\d+)/[^/]*")
112 page = self.sess.lfetch(self.url, self.vfylogin)
113 cls = byclass(page, "table", "chapters_list")
114 if cls.tbody is not None:
116 scl = "lang_" + self.lib.lang
118 for ch in cls.childGenerator():
119 if isinstance(ch, bs4.Tag) and ch.name == "tr":
120 cll = ch.get("class", [])
121 if "row" in cll and scl in cll:
122 url = ch.td.a["href"]
123 m = self.cure.search(url)
124 if m is None: raise pageerror("Got weird chapter URL: %r" % url, page)
126 url = self.lib.base + "read/_/" + cid
128 cch.append((cid, name, url))
131 for n, (cid, name, url) in enumerate(cch):
132 rch.append(chapter(self, [(self, n)], cid, name, url))
137 if self.cnames is None:
138 page = soupify(self.sess.fetch(self.url))
140 for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
141 if tbl.tbody is not None: tbl = tbl.tbody
142 for tr in tbl.findAll("tr"):
143 if "Alt Names:" in tr.td.text:
145 if nls.name != "td" or nls.span is None:
146 raise pageerror("Weird altnames table in " + self.id, page)
147 cnames = [nm.text.strip() for nm in nls.findAll("span")]
149 if cnames is not None:
152 raise pageerror("Could not find altnames for " + self.id, page)
160 return "<batoto.manga %r>" % self.name
162 class credentials(object):
163 def __init__(self, username, password):
164 self.username = username
165 self.password = password
168 def fromfile(cls, path):
169 username, password = None, None
170 with open(path) as fp:
171 for words in profile.splitlines(fp):
172 if words[0] == "username":
174 elif words[0] == "password":
176 elif words[0] == "pass64":
178 password = binascii.a2b_base64(words[1]).decode("utf8")
179 if None in (username, password):
180 raise ValueError("Incomplete profile: " + path)
181 return cls(username, password)
185 path = os.path.join(profile.confdir, "batoto")
186 if os.path.exists(path):
187 return cls.fromfile(path)
190 class session(object):
191 def __init__(self, base, credentials):
193 self.creds = credentials
194 self.jar = http.cookiejar.CookieJar()
195 self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar))
196 self.loggedin = False
198 rlre = re.compile(r"Welcome, (.*) ")
200 with self.web.open(self.base) as hs:
201 page = soupify(hs.read())
203 cur = page.find("a", id="user_link")
206 m = self.rlre.search(cur.get_text())
207 if not m or m.group(1) != self.creds.username:
210 nav = page.find("div", id="user_navigation")
212 for li in nav.findAll("li"):
213 if li.a and "Sign Out" in li.a.string:
214 outurl = li.a["href"]
216 raise pageerror("Could not find logout URL", page)
217 with self.wep.open(outurl) as hs:
219 with self.web.open(self.base) as hs:
220 page = soupify(hs.read())
227 form = page.find("form", id="login")
229 for el in form.findAll("input", type="hidden"):
230 values[el["name"]] = el["value"]
231 values["ips_username"] = self.creds.username
232 values["ips_password"] = self.creds.password
233 values["anonymous"] = "1"
234 req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii"))
235 with self.web.open(req) as hs:
236 page = soupify(hs.read())
237 for resp in page.findAll("p", attrs={"class": "message"}):
238 if resp.strong and "You are now signed in" in resp.strong.string:
241 raise pageerror("Could not log in", page)
244 if not self.loggedin:
250 return self.web.open(url)
252 def fetch(self, url):
253 with self.open(url) as hs:
256 def lfetch(self, url, ck):
257 page = soupify(self.fetch(url))
260 page = soupify(self.fetch(url))
262 raise pageerror("Could not verify login status despite having logged in", page)
265 class library(lib.library):
266 def __init__(self, *, creds=None):
268 creds = credentials.default()
269 self.base = "http://bato.to/"
270 self.sess = session(self.base, creds)
271 self.lang = "English"
274 url = self.base + "comic/_/comics/" + id
275 page = soupify(self.sess.fetch(url))
276 title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
279 return manga(self, id, title.string.strip(), url)
281 def _search(self, pars):
286 resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars).encode("ascii"))
288 page = soupify(resp.read())
291 rls = page.find("div", id="comic_search_results").table
292 if rls.tbody is not None:
295 for child in rls.findAll("tr"):
296 if child.th is not None: continue
297 if child.get("id", "")[:11] == "comic_rowo_": continue
298 if child.get("id") == "show_more_row":
301 link = child.td.strong.a
303 m = self.rure.search(url)
304 if m is None: raise Exception("Got weird manga URL: %r" % url)
306 name = link.text.strip()
307 yield manga(self, id, name, url)
312 rure = re.compile(r"/comic/_/([^/]*)$")
313 def search(self, expr):
314 return self._search({"name": expr, "name_cond": "c"})
316 def byname(self, prefix):
317 for res in self._search({"name": prefix, "name_cond": "s"}):
318 if res.name[:len(prefix)].lower() == prefix.lower():
321 for aname in res.altnames():
322 if aname[:len(prefix)].lower() == prefix.lower():
323 yield manga(self, res.id, aname, res.url)
327 print("eliding " + res.name)
328 print(res.altnames())