X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fmangafox.py;h=ef84eb0106b8cd7a17616ec9bb5ef5e99ae5a321;hb=c0d3b1a2e6671763efcce90b089bc0f9100e8d2f;hp=572853c82b5cb1dcd0f8444735472fb22ded80c9;hpb=9948db89162b12bd94971dbad4b0f765dd1b47f5;p=automanga.git diff --git a/manga/mangafox.py b/manga/mangafox.py index 572853c..ef84eb0 100644 --- a/manga/mangafox.py +++ b/manga/mangafox.py @@ -1,25 +1,8 @@ -import urllib -import BeautifulSoup +import urllib, re +import BeautifulSoup, json import lib, htcache soup = BeautifulSoup.BeautifulSoup - -class imgstream(lib.imgstream): - def __init__(self, url): - self.bk = urllib.urlopen(url) - self.ctype = self.bk.info()["Content-Type"] - self.clen = int(self.bk.info()["Content-Length"]) - - def fileno(self): - return self.bk.fileno() - - def close(self): - self.bk.close() - - def read(self, sz = None): - if sz is None: - return self.bk.read() - else: - return self.bk.read(sz) +soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES) class page(lib.page): def __init__(self, chapter, stack, n, url): @@ -29,17 +12,24 @@ class page(lib.page): self.manga = self.volume.manga self.n = n self.id = str(n) + self.name = u"Page %s" % n self.url = url self.ciurl = None def iurl(self): if self.ciurl is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"] return self.ciurl def open(self): - return imgstream(self.iurl()) + return lib.stdimgstream(self.iurl()) + + def __str__(self): + return self.name + + def __repr__(self): + return "" % (self.manga.name, self.volume.name, self.chapter.name, self.name) class chapter(lib.pagelist): def __init__(self, volume, stack, id, name, url): @@ -59,7 +49,7 @@ class chapter(lib.pagelist): def pages(self): if self.cpag is None: - pg = soup(htcache.fetch(self.url + "1.html")) + pg = soupify(htcache.fetch(self.url + "1.html")) l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"}) if len(l.contents) != 3: raise Exception("parse error: weird page list for %r" % self) @@ -102,6 +92,8 @@ def nextel(el): return el class manga(lib.manga): + cure = re.compile(r"/c[\d.]+/$") + def __init__(self, lib, id, name, url): self.lib = lib self.id = id @@ -118,9 +110,9 @@ class manga(lib.manga): def vols(self): if self.cvol is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"}) - self.cvol = [] + cvol = [] for i, vn in enumerate(reversed(vls)): name = vn.find("h3", attrs={"class": "volume"}).contents[0].strip() vid = name.encode("utf8") @@ -139,10 +131,15 @@ class manga(lib.manga): except KeyError: pass url = n.a["href"].encode("us-ascii") - if url[-7:] != "/1.html": + if url[-7:] == "/1.html": + url = url[:-6] + elif self.cure.search(url) is not None: + pass + else: raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url)) - vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url[:-6])) - self.cvol.append(vol) + vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url)) + cvol.append(vol) + self.cvol = cvol return self.cvol def __str__(self): @@ -159,7 +156,7 @@ class library(lib.library): self.base = "http://mangafox.me/" def alphapage(self, pno): - page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno))) + page = soupify(htcache.fetch(self.base + ("directory/%i.htm?az" % pno))) ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li") ret = [] ubase = self.base + "manga/" @@ -173,7 +170,7 @@ class library(lib.library): return ret def alphapages(self): - page = soup(htcache.fetch(self.base + "directory/?az")) + page = soupify(htcache.fetch(self.base + "directory/?az")) ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li") return int(ls[-2].find("a").string) @@ -211,9 +208,17 @@ class library(lib.library): ls = self.alphapage(pno) i = 0 + def search(self, expr): + resp = urllib.urlopen(self.base + ("ajax/search.php?term=%s" % urllib.quote(expr))) + try: + rc = json.load(resp) + finally: + resp.close() + return [manga(self, id.encode("utf8"), name, self.base + ("manga/%s/" % id.encode("utf8"))) for num, name, id, genres, author in rc] + def byid(self, id): url = self.base + ("manga/%s/" % id) - page = soup(htcache.fetch(url)) + page = soupify(htcache.fetch(url)) if page.find("div", id="title") is None: # Assume we got the search page raise KeyError(id)