X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fmangafox.py;h=c17ef314ec7c114c8272c070cab3b359a68d0c55;hb=HEAD;hp=0e8c2e9ecee9baef634fee680c2371d768244646;hpb=20a9e62afa4842b32b664d764c7a95ee1ca7cf89;p=automanga.git diff --git a/manga/mangafox.py b/manga/mangafox.py index 0e8c2e9..c17ef31 100644 --- a/manga/mangafox.py +++ b/manga/mangafox.py @@ -1,8 +1,8 @@ -import urllib.request, re +import urllib.request, urllib.parse, re import bs4, json from . import lib, htcache soup = bs4.BeautifulSoup -soupify = lambda cont: soup(cont) +soupify = lambda cont: soup(cont, "html.parser") class page(lib.page): def __init__(self, chapter, stack, n, url): @@ -121,21 +121,21 @@ class manga(lib.manga): raise Exception("parse error: weird volume list for %r" % self) for o, ch in enumerate(reversed(cls.findAll("li"))): n = ch.div.h3 or ch.div.h4 - name = n.a.string + chid = name = n.a.string for span in ch("span"): try: if "title" in span["class"]: name += " " + span.string except KeyError: pass - url = n.a["href"] + url = urllib.parse.urljoin(self.url, n.a["href"]) if url[-7:] == "/1.html": url = url[:-6] elif self.cure.search(url) is not None: pass else: raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url)) - vol.ch.append(chapter(vol, vol.stack + [(vol, o)], name, name, url)) + vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url)) cvol.append(vol) self.cvol = cvol return self.cvol @@ -158,17 +158,18 @@ class library(lib.library): self.base = "http://mangafox.me/" def alphapage(self, pno): - page = soupify(htcache.fetch(self.base + ("directory/%i.htm?az" % pno))) + abase = self.base + ("directory/%i.htm?az" % pno) + page = soupify(htcache.fetch(abase)) ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li") ret = [] - ubase = self.base + "manga/" for m in ls: t = m.find("div", attrs={"class": "manga_text"}).find("a", attrs={"class": "title"}) name = t.string - url = t["href"] - if url[:len(ubase)] != ubase or url.find('/', len(ubase)) != (len(url) - 1): + url = urllib.parse.urljoin(abase, t["href"]) + p = url.find("/manga/") + if p < 0 or url.find('/', p + 7) != (len(url) - 1): raise Exception("parse error: unexpected manga URL for %r: %s" % (name, url)) - ret.append(manga(self, url[len(ubase):-1], name, url)) + ret.append(manga(self, url[p + 7:-1], name, url)) return ret def alphapages(self):