X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fbatoto.py;h=37ab8b4dfa69abccc68f5be765691eb61d9ec8f3;hb=81be6921788ee49cf63c91e4de3e6375b10654f6;hp=cab29b5f56123da274c5314072fb1b82216222f2;hpb=c72e69b8c6044cf4393b496bcfe73700698f4fa6;p=automanga.git

diff --git a/manga/batoto.py b/manga/batoto.py
index cab29b5..37ab8b4 100644
--- a/manga/batoto.py
+++ b/manga/batoto.py
@@ -1,19 +1,25 @@
-import urllib, re, BeautifulSoup
-import lib, htcache
-soup = BeautifulSoup.BeautifulSoup
+import urllib.request, urllib.parse, http.cookiejar, re, bs4, os
+from . import profile, lib, htcache
+soup = bs4.BeautifulSoup
+soupify = lambda cont: soup(cont, "html.parser")
+
+class pageerror(Exception):
+    def __init__(self, message, page):
+        super().__init__(message)
+        self.page = page
 
 def byclass(el, name, cl):
     for ch in el.findAll(name):
-        if not isinstance(ch, BeautifulSoup.Tag): continue
-        cll = ch.get("class", "")
-        if cl in cll.split():
+        if not isinstance(ch, bs4.Tag): continue
+        cll = ch.get("class", [])
+        if cl in cll:
             return ch
     return None
 
 def nextel(el):
     while True:
         el = el.nextSibling
-        if isinstance(el, BeautifulSoup.Tag):
+        if isinstance(el, bs4.Tag):
             return el
 
 class page(lib.page):
@@ -22,15 +28,15 @@ class page(lib.page):
         self.chapter = chapter
         self.n = n
         self.id = str(n)
-        self.name = u"Page %s" % n
+        self.name = "Page %s" % n
         self.url = url
         self.ciurl = None
 
     def iurl(self):
         if self.ciurl is None:
-            page = soup(htcache.fetch(self.url))
+            page = soupify(htcache.fetch(self.url))
             img = nextel(page.find("div", id="full_image")).img
-            self.ciurl = img["src"].encode("us-ascii")
+            self.ciurl = img["src"]
         return self.ciurl
 
     def open(self):
@@ -60,10 +66,10 @@ class chapter(lib.pagelist):
     pnre = re.compile(r"page (\d+)")
     def pages(self):
         if self.cpag is None:
-            pg = soup(htcache.fetch(self.url))
+            pg = soupify(htcache.fetch(self.url))
             cpag = []
             for opt in pg.find("select", id="page_select").findAll("option"):
-                url = opt["value"].encode("us-ascii")
+                url = opt["value"]
                 n = int(self.pnre.match(opt.string).group(1))
                 cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
             self.cpag = cpag
@@ -78,11 +84,13 @@ class chapter(lib.pagelist):
 class manga(lib.manga):
     def __init__(self, lib, id, name, url):
         self.lib = lib
+        self.sess = lib.sess
         self.id = id
         self.name = name
         self.url = url
         self.cch = None
         self.stack = []
+        self.cnames = None
 
     def __getitem__(self, i):
         return self.ch()[i]
@@ -90,22 +98,30 @@ class manga(lib.manga):
     def __len__(self):
         return len(self.ch())
 
+    @staticmethod
+    def vfylogin(page):
+        if page.find("div", id="register_notice"):
+            return False
+        if not byclass(page, "table", "chapters_list"):
+            return False
+        return True
+
     cure = re.compile(r"/read/_/(\d+)/[^/]*")
     def ch(self):
         if self.cch is None:
-            page = soup(htcache.fetch(self.url))
-            cls = byclass(page, u"table", u"chapters_list")
+            page = self.sess.lfetch(self.url, self.vfylogin)
+            cls = byclass(page, "table", "chapters_list")
             if cls.tbody is not None:
                 cls = cls.tbody
-            scl = u"lang_" + self.lib.lang
+            scl = "lang_" + self.lib.lang
             cch = []
             for ch in cls.childGenerator():
-                if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr":
-                    cll = ch.get("class", "").split()
-                    if u"row" in cll and scl in cll:
-                        url = ch.td.a["href"].encode("us-ascii")
+                if isinstance(ch, bs4.Tag) and ch.name == "tr":
+                    cll = ch.get("class", [])
+                    if "row" in cll and scl in cll:
+                        url = ch.td.a["href"]
                         m = self.cure.search(url)
-                        if m is None: raise Exception("Got weird chapter URL: %r" % url)
+                        if m is None: raise pageerror("Got weird chapter URL: %r" % url, page)
                         cid = m.group(1)
                         url = self.lib.base + "read/_/" + cid
                         name = ch.td.a.text
@@ -117,44 +133,196 @@ class manga(lib.manga):
             self.cch = rch
         return self.cch
 
+    def altnames(self):
+        if self.cnames is None:
+            page = soupify(self.sess.fetch(self.url))
+            cnames = None
+            for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
+                if tbl.tbody is not None: tbl = tbl.tbody
+                for tr in tbl.findAll("tr"):
+                    if "Alt Names:" in tr.td.text:
+                        nls = nextel(tr.td)
+                        if nls.name != "td" or nls.span is None:
+                            raise pageerror("Weird altnames table in " + self.id, page)
+                        cnames = [nm.text.strip() for nm in nls.findAll("span")]
+                        break
+                if cnames is not None:
+                    break
+            if cnames is None:
+                raise pageerror("Could not find altnames for " + self.id, page)
+            self.cnames = cnames
+        return self.cnames
+
     def __str__(self):
         return self.name
 
     def __repr__(self):
         return "<batoto.manga %r>" % self.name
 
+class credentials(object):
+    def __init__(self, username, password):
+        self.username = username
+        self.password = password
+
+    @classmethod
+    def fromfile(cls, path):
+        username, password = None, None
+        with open(path) as fp:
+            for words in profile.splitlines(fp):
+                if words[0] == "username":
+                    username = words[1]
+                elif words[0] == "password":
+                    password = words[1]
+                elif words[0] == "pass64":
+                    import binascii
+                    password = binascii.a2b_base64(words[1]).decode("utf8")
+        if None in (username, password):
+            raise ValueError("Incomplete profile: " + path)
+        return cls(username, password)
+
+    @classmethod
+    def default(cls):
+        path = os.path.join(profile.confdir, "batoto")
+        if os.path.exists(path):
+            return cls.fromfile(path)
+        return None
+
+class session(object):
+    def __init__(self, base, credentials):
+        self.base = base
+        self.creds = credentials
+        self.jar = http.cookiejar.CookieJar()
+        self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar))
+        self.loggedin = False
+
+    rlre = re.compile(r"Welcome, (.*) ")
+    def dologin(self):
+        with self.web.open(self.base) as hs:
+            page = soupify(hs.read())
+
+        cur = page.find("a", id="user_link")
+        print(0)
+        if cur:
+            m = self.rlre.search(cur.get_text())
+            if not m or m.group(1) != self.creds.username:
+                print(1)
+                outurl = None
+                nav = page.find("div", id="user_navigation")
+                if nav:
+                    for li in nav.findAll("li"):
+                        if li.a and "Sign Out" in li.a.string:
+                            outurl = li.a["href"]
+                if not outurl:
+                    raise pageerror("Could not find logout URL", page)
+                with self.wep.open(outurl) as hs:
+                    hs.read()
+                with self.web.open(self.base) as hs:
+                    page = soupify(hs.read())
+            else:
+                print(2)
+                return
+        else:
+            print(3)
+
+        form = page.find("form", id="login")
+        values = {}
+        for el in form.findAll("input", type="hidden"):
+            values[el["name"]] = el["value"]
+        values["ips_username"] = self.creds.username
+        values["ips_password"] = self.creds.password
+        values["anonymous"] = "1"
+        req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii"))
+        with self.web.open(req) as hs:
+            page = soupify(hs.read())
+        for resp in page.findAll("p", attrs={"class": "message"}):
+            if resp.strong and "You are now signed in" in resp.strong.string:
+                break
+        else:
+            raise pageerror("Could not log in", page)
+
+    def login(self):
+        if not self.loggedin:
+            if self.creds:
+                self.dologin()
+            self.loggedin = True
+
+    def open(self, url):
+        return self.web.open(url)
+
+    def fetch(self, url):
+        with self.open(url) as hs:
+            return hs.read()
+
+    def lfetch(self, url, ck):
+        page = soupify(self.fetch(url))
+        if not ck(page):
+            self.login()
+            page = soupify(self.fetch(url))
+            if not ck(page):
+                raise pageerror("Could not verify login status despite having logged in", page)
+        return page
+
 class library(lib.library):
-    def __init__(self):
-        self.base = "http://www.batoto.net/"
-        self.lang = u"English"
+    def __init__(self, *, creds=None):
+        if creds is None:
+            creds = credentials.default()
+        self.base = "http://bato.to/"
+        self.sess = session(self.base, creds)
+        self.lang = "English"
 
     def byid(self, id):
         url = self.base + "comic/_/comics/" + id
-        page = soup(htcache.fetch(url))
+        page = soupify(self.sess.fetch(url))
         title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
         if title is None:
             raise KeyError(id)
         return manga(self, id, title.string.strip(), url)
 
-    mure = re.compile(r"/comic/_/comics/([^/]*)$")
-    def search(self, expr):
-        resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1",
-                              urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"}))
-        try:
-            page = soup(resp.read())
-        finally:
-            resp.close()
-        none = page.find("p", attrs={"class": "no_messages"})
-        if none is not None and u"No results" in none.text:
-            return []
-        ret = []
-        for child in page.find("div", id="search_results").ol.childGenerator():
-            if isinstance(child, BeautifulSoup.Tag) and child.name == u"li":
-                info = child.find("div", attrs={"class": "result_info"})
-                url = info.h3.a["href"].encode("us-ascii")
-                m = self.mure.search(url)
+    def _search(self, pars):
+        p = 1
+        while True:
+            _pars = dict(pars)
+            _pars["p"] = str(p)
+            resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars).encode("ascii"))
+            try:
+                page = soupify(resp.read())
+            finally:
+                resp.close()
+            rls = page.find("div", id="comic_search_results").table
+            if rls.tbody is not None:
+                rls = rls.tbody
+            hasmore = False
+            for child in rls.findAll("tr"):
+                if child.th is not None: continue
+                if child.get("id", "")[:11] == "comic_rowo_": continue
+                if child.get("id") == "show_more_row":
+                    hasmore = True
+                    continue
+                link = child.td.strong.a
+                url = link["href"]
+                m = self.rure.search(url)
                 if m is None: raise Exception("Got weird manga URL: %r" % url)
                 id = m.group(1)
-                name = info.h3.a.string.strip()
-                ret.append(manga(self, id, name, url))
-        return ret
+                name = link.text.strip()
+                yield manga(self, id, name, url)
+            p += 1
+            if not hasmore:
+                break
+
+    rure = re.compile(r"/comic/_/([^/]*)$")
+    def search(self, expr):
+        return self._search({"name": expr, "name_cond": "c"})
+
+    def byname(self, prefix):
+        for res in self._search({"name": prefix, "name_cond": "s"}):
+            if res.name[:len(prefix)].lower() == prefix.lower():
+                yield res
+            else:
+                for aname in res.altnames():
+                    if aname[:len(prefix)].lower() == prefix.lower():
+                        yield manga(self, res.id, aname, res.url)
+                        break
+                else:
+                    if False:
+                        print("eliding " + res.name)
+                        print(res.altnames())