X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fmangafox.py;h=c17ef314ec7c114c8272c070cab3b359a68d0c55;hb=HEAD;hp=110be990c4b08e827688489f5412dae9207bea3a;hpb=699d0c171c7482ce6c6a2835df7e0ba7b7505729;p=automanga.git diff --git a/manga/mangafox.py b/manga/mangafox.py index 110be99..c17ef31 100644 --- a/manga/mangafox.py +++ b/manga/mangafox.py @@ -1,25 +1,8 @@ -import urllib -import BeautifulSoup -import lib, htcache -soup = BeautifulSoup.BeautifulSoup - -class imgstream(lib.imgstream): - def __init__(self, url): - self.bk = urllib.urlopen(url) - self.ctype = self.bk.info()["Content-Type"] - self.clen = int(self.bk.info()["Content-Length"]) - - def fileno(self): - return self.bk.fileno() - - def close(self): - self.bk.close() - - def read(self, sz = None): - if sz is None: - return self.bk.read() - else: - return self.bk.read(sz) +import urllib.request, urllib.parse, re +import bs4, json +from . import lib, htcache +soup = bs4.BeautifulSoup +soupify = lambda cont: soup(cont, "html.parser") class page(lib.page): def __init__(self, chapter, stack, n, url): @@ -29,18 +12,18 @@ class page(lib.page): self.manga = self.volume.manga self.n = n self.id = str(n) - self.name = u"Page %s" % n + self.name = "Page %s" % n self.url = url self.ciurl = None def iurl(self): if self.ciurl is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"] return self.ciurl def open(self): - return imgstream(self.iurl()) + return lib.stdimgstream(self.iurl()) def __str__(self): return self.name @@ -66,14 +49,14 @@ class chapter(lib.pagelist): def pages(self): if self.cpag is None: - pg = soup(htcache.fetch(self.url + "1.html")) + pg = soupify(htcache.fetch(self.url + "1.html")) l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"}) if len(l.contents) != 3: raise Exception("parse error: weird page list for %r" % self) m = l.contents[2].strip() - if m[:3] != u"of ": + if m[:3] != "of ": raise Exception("parse error: weird page list for %r" % self) - self.cpag = [page(self, self.stack + [(self, n)], n + 1, self.url + ("%i.html" % (n + 1))) for n in xrange(int(m[3:]))] + self.cpag = [page(self, self.stack + [(self, n)], n + 1, self.url + ("%i.html" % (n + 1))) for n in range(int(m[3:]))] return self.cpag def __str__(self): @@ -105,10 +88,12 @@ class volume(lib.pagelist): def nextel(el): while True: el = el.nextSibling - if isinstance(el, BeautifulSoup.Tag): + if isinstance(el, bs4.Tag): return el class manga(lib.manga): + cure = re.compile(r"/c[\d.]+/$") + def __init__(self, lib, id, name, url): self.lib = lib self.id = id @@ -125,31 +110,34 @@ class manga(lib.manga): def vols(self): if self.cvol is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"}) - self.cvol = [] + cvol = [] for i, vn in enumerate(reversed(vls)): name = vn.find("h3", attrs={"class": "volume"}).contents[0].strip() - vid = name.encode("utf8") - vol = volume(self, [(self, i)], vid, name) + vol = volume(self, [(self, i)], name, name) cls = nextel(vn) - if cls.name != u"ul" or cls["class"] != u"chlist": + if cls.name != "ul" or "chlist" not in cls["class"]: raise Exception("parse error: weird volume list for %r" % self) for o, ch in enumerate(reversed(cls.findAll("li"))): n = ch.div.h3 or ch.div.h4 - name = n.a.string - chid = name.encode("utf8") + chid = name = n.a.string for span in ch("span"): try: - if u" title " in (u" " + span["class"] + u" "): + if "title" in span["class"]: name += " " + span.string except KeyError: pass - url = n.a["href"].encode("us-ascii") - if url[-7:] != "/1.html": + url = urllib.parse.urljoin(self.url, n.a["href"]) + if url[-7:] == "/1.html": + url = url[:-6] + elif self.cure.search(url) is not None: + pass + else: raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url)) - vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url[:-6])) - self.cvol.append(vol) + vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url)) + cvol.append(vol) + self.cvol = cvol return self.cvol def __str__(self): @@ -159,34 +147,37 @@ class manga(lib.manga): return "" % self.name def libalphacmp(a, b): - return cmp(a.upper(), b.upper()) + if a.upper() < b.upper(): + return -1 + elif a.upper() > b.upper(): + return 1 + return 0 class library(lib.library): def __init__(self): self.base = "http://mangafox.me/" def alphapage(self, pno): - page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno))) + abase = self.base + ("directory/%i.htm?az" % pno) + page = soupify(htcache.fetch(abase)) ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li") ret = [] - ubase = self.base + "manga/" for m in ls: t = m.find("div", attrs={"class": "manga_text"}).find("a", attrs={"class": "title"}) name = t.string - url = t["href"].encode("us-ascii") - if url[:len(ubase)] != ubase or url.find('/', len(ubase)) != (len(url) - 1): + url = urllib.parse.urljoin(abase, t["href"]) + p = url.find("/manga/") + if p < 0 or url.find('/', p + 7) != (len(url) - 1): raise Exception("parse error: unexpected manga URL for %r: %s" % (name, url)) - ret.append(manga(self, url[len(ubase):-1], name, url)) + ret.append(manga(self, url[p + 7:-1], name, url)) return ret def alphapages(self): - page = soup(htcache.fetch(self.base + "directory/?az")) + page = soupify(htcache.fetch(self.base + "directory/?az")) ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li") return int(ls[-2].find("a").string) def byname(self, prefix): - if not isinstance(prefix, unicode): - prefix = prefix.decode("utf8") l = 1 r = self.alphapages() while True: @@ -218,9 +209,16 @@ class library(lib.library): ls = self.alphapage(pno) i = 0 + def search(self, expr): + req = urllib.request.Request(self.base + ("ajax/search.php?term=%s" % urllib.parse.quote(expr)), + headers={"User-Agent": "automanga/1"}) + with urllib.request.urlopen(req) as resp: + rc = json.loads(resp.read().decode("utf-8")) + return [manga(self, id, name, self.base + ("manga/%s/" % id)) for num, name, id, genres, author in rc] + def byid(self, id): url = self.base + ("manga/%s/" % id) - page = soup(htcache.fetch(url)) + page = soupify(htcache.fetch(url)) if page.find("div", id="title") is None: # Assume we got the search page raise KeyError(id)