-import urllib
-import BeautifulSoup
+import urllib, re
+import BeautifulSoup, json
import lib, htcache
soup = BeautifulSoup.BeautifulSoup
-
-class imgstream(lib.imgstream):
- def __init__(self, url):
- self.bk = urllib.urlopen(url)
- self.ctype = self.bk.info()["Content-Type"]
-
- def close(self):
- self.bk.close()
-
- def read(self, sz = None):
- if sz is None:
- return self.bk.read()
- else:
- return self.bk.read(sz)
+soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
class page(lib.page):
def __init__(self, chapter, stack, n, url):
self.manga = self.volume.manga
self.n = n
self.id = str(n)
+ self.name = u"Page %s" % n
self.url = url
self.ciurl = None
def iurl(self):
if self.ciurl is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"]
return self.ciurl
def open(self):
- return imgstream(self.iurl())
+ return lib.stdimgstream(self.iurl())
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return "<mangafox.page %r.%r.%r.%r>" % (self.manga.name, self.volume.name, self.chapter.name, self.name)
class chapter(lib.pagelist):
def __init__(self, volume, stack, id, name, url):
def pages(self):
if self.cpag is None:
- pg = soup(htcache.fetch(self.url + "1.html"))
+ pg = soupify(htcache.fetch(self.url + "1.html"))
l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"})
if len(l.contents) != 3:
raise Exception("parse error: weird page list for %r" % self)
return el
class manga(lib.manga):
+ cure = re.compile(r"/c[\d.]+/$")
+
def __init__(self, lib, id, name, url):
self.lib = lib
self.id = id
def vols(self):
if self.cvol is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"})
- self.cvol = []
+ cvol = []
for i, vn in enumerate(reversed(vls)):
name = vn.find("h3", attrs={"class": "volume"}).contents[0].strip()
vid = name.encode("utf8")
except KeyError:
pass
url = n.a["href"].encode("us-ascii")
- if url[-7:] != "/1.html":
+ if url[-7:] == "/1.html":
+ url = url[:-6]
+ elif self.cure.search(url) is not None:
+ pass
+ else:
raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url))
- vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url[:-6]))
- self.cvol.append(vol)
+ vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url))
+ cvol.append(vol)
+ self.cvol = cvol
return self.cvol
def __str__(self):
self.base = "http://mangafox.me/"
def alphapage(self, pno):
- page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno)))
+ page = soupify(htcache.fetch(self.base + ("directory/%i.htm?az" % pno)))
ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li")
ret = []
ubase = self.base + "manga/"
return ret
def alphapages(self):
- page = soup(htcache.fetch(self.base + "directory/?az"))
+ page = soupify(htcache.fetch(self.base + "directory/?az"))
ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li")
return int(ls[-2].find("a").string)
ls = self.alphapage(pno)
i = 0
+ def search(self, expr):
+ resp = urllib.urlopen(self.base + ("ajax/search.php?term=%s" % urllib.quote(expr)))
+ try:
+ rc = json.load(resp)
+ finally:
+ resp.close()
+ return [manga(self, id.encode("utf8"), name, self.base + ("manga/%s/" % id.encode("utf8"))) for num, name, id, genres, author in rc]
+
def byid(self, id):
url = self.base + ("manga/%s/" % id)
- page = soup(htcache.fetch(url))
+ page = soupify(htcache.fetch(url))
if page.find("div", id="title") is None:
# Assume we got the search page
raise KeyError(id)