manga/batoto.py

   1 import urllib.request, urllib.parse, http.cookiejar, re, bs4, os
   2 from . import profile, lib, htcache
   3 soup = bs4.BeautifulSoup
   4 soupify = lambda cont: soup(cont, "html.parser")
   5
   6 class pageerror(Exception):
   7     def __init__(self, message, page):
   8         super().__init__(message)
   9         self.page = page
  10
  11 def byclass(el, name, cl):
  12     for ch in el.findAll(name):
  13         if not isinstance(ch, bs4.Tag): continue
  14         cll = ch.get("class", [])
  15         if cl in cll:
  16             return ch
  17     return None
  18
  19 def nextel(el):
  20     while True:
  21         el = el.nextSibling
  22         if isinstance(el, bs4.Tag):
  23             return el
  24
  25 class page(lib.page):
  26     def __init__(self, chapter, stack, n, url):
  27         self.stack = stack
  28         self.chapter = chapter
  29         self.n = n
  30         self.id = str(n)
  31         self.name = "Page %s" % n
  32         self.url = url
  33         self.ciurl = None
  34
  35     def iurl(self):
  36         if self.ciurl is None:
  37             page = soupify(htcache.fetch(self.url))
  38             img = nextel(page.find("div", id="full_image")).img
  39             self.ciurl = img["src"]
  40         return self.ciurl
  41
  42     def open(self):
  43         return lib.stdimgstream(self.iurl())
  44
  45     def __str__(self):
  46         return self.name
  47
  48     def __repr(self):
  49         return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
  50
  51 class chapter(lib.pagelist):
  52     def __init__(self, manga, stack, id, name, url):
  53         self.stack = stack
  54         self.manga = manga
  55         self.id = id
  56         self.name = name
  57         self.url = url
  58         self.cpag = None
  59
  60     def __getitem__(self, i):
  61         return self.pages()[i]
  62
  63     def __len__(self):
  64         return len(self.pages())
  65
  66     pnre = re.compile(r"page (\d+)")
  67     def pages(self):
  68         if self.cpag is None:
  69             pg = soupify(htcache.fetch(self.url))
  70             cpag = []
  71             for opt in pg.find("select", id="page_select").findAll("option"):
  72                 url = opt["value"]
  73                 n = int(self.pnre.match(opt.string).group(1))
  74                 cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
  75             self.cpag = cpag
  76         return self.cpag
  77
  78     def __str__(self):
  79         return self.name
  80
  81     def __repr__(self):
  82         return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
  83
  84 class manga(lib.manga):
  85     def __init__(self, lib, id, name, url):
  86         self.lib = lib
  87         self.sess = lib.sess
  88         self.id = id
  89         self.name = name
  90         self.url = url
  91         self.cch = None
  92         self.stack = []
  93         self.cnames = None
  94
  95     def __getitem__(self, i):
  96         return self.ch()[i]
  97
  98     def __len__(self):
  99         return len(self.ch())
 100
 101     @staticmethod
 102     def vfylogin(page):
 103         if page.find("div", id="register_notice"):
 104             return False
 105         if not byclass(page, "table", "chapters_list"):
 106             return False
 107         return True
 108
 109     cure = re.compile(r"/read/_/(\d+)/[^/]*")
 110     def ch(self):
 111         if self.cch is None:
 112             page = self.sess.lfetch(self.url, self.vfylogin)
 113             cls = byclass(page, "table", "chapters_list")
 114             if cls.tbody is not None:
 115                 cls = cls.tbody
 116             scl = "lang_" + self.lib.lang
 117             cch = []
 118             for ch in cls.childGenerator():
 119                 if isinstance(ch, bs4.Tag) and ch.name == "tr":
 120                     cll = ch.get("class", [])
 121                     if "row" in cll and scl in cll:
 122                         url = ch.td.a["href"]
 123                         m = self.cure.search(url)
 124                         if m is None: raise pageerror("Got weird chapter URL: %r" % url, page)
 125                         cid = m.group(1)
 126                         url = self.lib.base + "read/_/" + cid
 127                         name = ch.td.a.text
 128                         cch.append((cid, name, url))
 129             cch.reverse()
 130             rch = []
 131             for n, (cid, name, url) in enumerate(cch):
 132                 rch.append(chapter(self, [(self, n)], cid, name, url))
 133             self.cch = rch
 134         return self.cch
 135
 136     def altnames(self):
 137         if self.cnames is None:
 138             page = soupify(self.sess.fetch(self.url))
 139             cnames = None
 140             for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
 141                 if tbl.tbody is not None: tbl = tbl.tbody
 142                 for tr in tbl.findAll("tr"):
 143                     if "Alt Names:" in tr.td.text:
 144                         nls = nextel(tr.td)
 145                         if nls.name != "td" or nls.span is None:
 146                             raise pageerror("Weird altnames table in " + self.id, page)
 147                         cnames = [nm.text.strip() for nm in nls.findAll("span")]
 148                         break
 149                 if cnames is not None:
 150                     break
 151             if cnames is None:
 152                 raise pageerror("Could not find altnames for " + self.id, page)
 153             self.cnames = cnames
 154         return self.cnames
 155
 156     def __str__(self):
 157         return self.name
 158
 159     def __repr__(self):
 160         return "<batoto.manga %r>" % self.name
 161
 162 class credentials(object):
 163     def __init__(self, username, password):
 164         self.username = username
 165         self.password = password
 166
 167     @classmethod
 168     def fromfile(cls, path):
 169         username, password = None, None
 170         with open(path) as fp:
 171             for words in profile.splitlines(fp):
 172                 if words[0] == "username":
 173                     username = words[1]
 174                 elif words[0] == "password":
 175                     password = words[1]
 176                 elif words[0] == "pass64":
 177                     import binascii
 178                     password = binascii.a2b_base64(words[1]).decode("utf8")
 179         if None in (username, password):
 180             raise ValueError("Incomplete profile: " + path)
 181         return cls(username, password)
 182
 183     @classmethod
 184     def default(cls):
 185         path = os.path.join(profile.confdir, "batoto")
 186         if os.path.exists(path):
 187             return cls.fromfile(path)
 188         return None
 189
 190 class session(object):
 191     def __init__(self, base, credentials):
 192         self.base = base
 193         self.creds = credentials
 194         self.jar = http.cookiejar.CookieJar()
 195         self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar))
 196         self.loggedin = False
 197
 198     rlre = re.compile(r"Welcome, (.*) ")
 199     def dologin(self):
 200         with self.web.open(self.base) as hs:
 201             page = soupify(hs.read())
 202
 203         cur = page.find("a", id="user_link")
 204         print(0)
 205         if cur:
 206             m = self.rlre.search(cur.get_text())
 207             if not m or m.group(1) != self.creds.username:
 208                 print(1)
 209                 outurl = None
 210                 nav = page.find("div", id="user_navigation")
 211                 if nav:
 212                     for li in nav.findAll("li"):
 213                         if li.a and "Sign Out" in li.a.string:
 214                             outurl = li.a["href"]
 215                 if not outurl:
 216                     raise pageerror("Could not find logout URL", page)
 217                 with self.wep.open(outurl) as hs:
 218                     hs.read()
 219                 with self.web.open(self.base) as hs:
 220                     page = soupify(hs.read())
 221             else:
 222                 print(2)
 223                 return
 224         else:
 225             print(3)
 226
 227         form = page.find("form", id="login")
 228         values = {}
 229         for el in form.findAll("input", type="hidden"):
 230             values[el["name"]] = el["value"]
 231         values["ips_username"] = self.creds.username
 232         values["ips_password"] = self.creds.password
 233         values["anonymous"] = "1"
 234         req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii"))
 235         with self.web.open(req) as hs:
 236             page = soupify(hs.read())
 237         for resp in page.findAll("p", attrs={"class": "message"}):
 238             if resp.strong and "You are now signed in" in resp.strong.string:
 239                 break
 240         else:
 241             raise pageerror("Could not log in", page)
 242
 243     def login(self):
 244         if not self.loggedin:
 245             if self.creds:
 246                 self.dologin()
 247             self.loggedin = True
 248
 249     def open(self, url):
 250         return self.web.open(url)
 251
 252     def fetch(self, url):
 253         with self.open(url) as hs:
 254             return hs.read()
 255
 256     def lfetch(self, url, ck):
 257         page = soupify(self.fetch(url))
 258         if not ck(page):
 259             self.login()
 260             page = soupify(self.fetch(url))
 261             if not ck(page):
 262                 raise pageerror("Could not verify login status despite having logged in", page)
 263         return page
 264
 265 class library(lib.library):
 266     def __init__(self, *, creds=None):
 267         if creds is None:
 268             creds = credentials.default()
 269         self.base = "http://bato.to/"
 270         self.sess = session(self.base, creds)
 271         self.lang = "English"
 272
 273     def byid(self, id):
 274         url = self.base + "comic/_/comics/" + id
 275         page = soupify(self.sess.fetch(url))
 276         title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
 277         if title is None:
 278             raise KeyError(id)
 279         return manga(self, id, title.string.strip(), url)
 280
 281     def _search(self, pars):
 282         p = 1
 283         while True:
 284             _pars = dict(pars)
 285             _pars["p"] = str(p)
 286             resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars).encode("ascii"))
 287             try:
 288                 page = soupify(resp.read())
 289             finally:
 290                 resp.close()
 291             rls = page.find("div", id="comic_search_results").table
 292             if rls.tbody is not None:
 293                 rls = rls.tbody
 294             hasmore = False
 295             for child in rls.findAll("tr"):
 296                 if child.th is not None: continue
 297                 if child.get("id", "")[:11] == "comic_rowo_": continue
 298                 if child.get("id") == "show_more_row":
 299                     hasmore = True
 300                     continue
 301                 link = child.td.strong.a
 302                 url = link["href"]
 303                 m = self.rure.search(url)
 304                 if m is None: raise Exception("Got weird manga URL: %r" % url)
 305                 id = m.group(1)
 306                 name = link.text.strip()
 307                 yield manga(self, id, name, url)
 308             p += 1
 309             if not hasmore:
 310                 break
 311
 312     rure = re.compile(r"/comic/_/([^/]*)$")
 313     def search(self, expr):
 314         return self._search({"name": expr, "name_cond": "c"})
 315
 316     def byname(self, prefix):
 317         for res in self._search({"name": prefix, "name_cond": "s"}):
 318             if res.name[:len(prefix)].lower() == prefix.lower():
 319                 yield res
 320             else:
 321                 for aname in res.altnames():
 322                     if aname[:len(prefix)].lower() == prefix.lower():
 323                         yield manga(self, res.id, aname, res.url)
 324                         break
 325                 else:
 326                     if False:
 327                         print("eliding " + res.name)
 328                         print(res.altnames())