X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fmrnet.py;h=05052964772989c376359c76e63b68ddb451606f;hb=c0d3b1a2e6671763efcce90b089bc0f9100e8d2f;hp=ab2f6a640b6f609c2a50cfe4d840352e7f826252;hpb=bbcdf86a60aac903b127ce5c038834a85eac883f;p=automanga.git diff --git a/manga/mrnet.py b/manga/mrnet.py index ab2f6a6..0505296 100644 --- a/manga/mrnet.py +++ b/manga/mrnet.py @@ -1,33 +1,7 @@ -import urllib import BeautifulSoup, urlparse import lib, htcache soup = BeautifulSoup.BeautifulSoup - -class imgstream(lib.imgstream): - def __init__(self, url): - self.bk = urllib.urlopen(url) - ok = False - try: - if self.bk.getcode() != 200: - raise IOError("Server error: " + str(self.bk.getcode())) - self.ctype = self.bk.info()["Content-Type"] - self.clen = int(self.bk.info()["Content-Length"]) - ok = True - finally: - if not ok: - self.bk.close() - - def fileno(self): - return self.bk.fileno() - - def close(self): - self.bk.close() - - def read(self, sz = None): - if sz is None: - return self.bk.read() - else: - return self.bk.read(sz) +soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES) class page(lib.page): def __init__(self, chapter, stack, n, url): @@ -42,12 +16,12 @@ class page(lib.page): def iurl(self): if self.ciurl is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"].encode("us-ascii") return self.ciurl def open(self): - return imgstream(self.iurl()) + return lib.stdimgstream(self.iurl()) def __str__(self): return self.name @@ -72,7 +46,7 @@ class chapter(lib.pagelist): def pages(self): if self.cpag is None: - pg = soup(htcache.fetch(self.url)) + pg = soupify(htcache.fetch(self.url)) pag = [] for opt in pg.find("div", id="selectpage").find("select", id="pageMenu").findAll("option"): url = urlparse.urljoin(self.url, opt["value"].encode("us-ascii")) @@ -104,7 +78,7 @@ class manga(lib.manga): def ch(self): if self.cch is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) cls = page.find("div", id="chapterlist").find("table", id="listing") i = 0 cch = [] @@ -117,7 +91,7 @@ class manga(lib.manga): cid = name.encode("utf8") if isinstance(cla.nextSibling, unicode): ncont = unicode(cla.nextSibling) - if ncont[:3] == u" : ": + if len(ncont) > 3 and ncont[:3] == u" : ": name += u": " + ncont[3:] cch.append(chapter(self, [(self, len(cch))], cid, name, url)) self.cch = cch @@ -135,8 +109,40 @@ class library(lib.library): def byid(self, id): url = self.base + id - page = soup(htcache.fetch(url)) + page = soupify(htcache.fetch(url)) if page.find("h2", attrs={"class": "aname"}) is None: raise KeyError(id) name = page.find("h2", attrs={"class": "aname"}).string return manga(self, id, name, url) + + def __iter__(self): + page = soupify(htcache.fetch(self.base + "alphabetical")) + for sec in page.findAll("div", attrs={"class": "series_alpha"}): + for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"): + url = li.a["href"].encode("us-ascii") + name = li.a.string + if url[:1] != "/": continue + id = url[1:] + if '/' in id: + # Does this distinction mean something? + id = id[id.rindex('/') + 1:] + if id[-5:] != ".html": + continue + id = id[:-5] + yield manga(self, id, name, urlparse.urljoin(self.base, url)) + + def byname(self, prefix): + if not isinstance(prefix, unicode): + prefix = prefix.decode("utf8") + prefix = prefix.lower() + for manga in self: + if manga.name.lower()[:len(prefix)] == prefix: + yield manga + + def search(self, expr): + if not isinstance(expr, unicode): + expr = expr.decode("utf8") + expr = expr.lower() + for manga in self: + if expr in manga.name.lower(): + yield manga