X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fmrnet.py;h=fca97b187cedcfa9a9e2f4f8c4a2404f63db95bd;hb=HEAD;hp=1439f09e030da5d9700de7f5d25d976e8cdbd731;hpb=b9e558ac507f4e6c11c8c9837b5bf22b5da90fce;p=automanga.git diff --git a/manga/mrnet.py b/manga/mrnet.py index 1439f09..fca97b1 100644 --- a/manga/mrnet.py +++ b/manga/mrnet.py @@ -1,6 +1,8 @@ -import BeautifulSoup, urlparse -import lib, htcache -soup = BeautifulSoup.BeautifulSoup +import bs4 +from urllib.parse import urljoin +from . import lib, htcache +soup = bs4.BeautifulSoup +soupify = lambda cont: soup(cont, "html.parser") class page(lib.page): def __init__(self, chapter, stack, n, url): @@ -9,14 +11,14 @@ class page(lib.page): self.manga = chapter.manga self.n = n self.id = str(n) - self.name = u"Page %s" % n + self.name = "Page %s" % n self.url = url self.ciurl = None def iurl(self): if self.ciurl is None: - page = soup(htcache.fetch(self.url)) - self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"].encode("us-ascii") + page = soupify(htcache.fetch(self.url)) + self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"] return self.ciurl def open(self): @@ -45,10 +47,10 @@ class chapter(lib.pagelist): def pages(self): if self.cpag is None: - pg = soup(htcache.fetch(self.url)) + pg = soupify(htcache.fetch(self.url)) pag = [] for opt in pg.find("div", id="selectpage").find("select", id="pageMenu").findAll("option"): - url = urlparse.urljoin(self.url, opt["value"].encode("us-ascii")) + url = urljoin(self.url, opt["value"]) n = int(opt.string) pag.append(page(self, self.stack + [(self, len(pag))], n, url)) self.cpag = pag @@ -77,7 +79,7 @@ class manga(lib.manga): def ch(self): if self.cch is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) cls = page.find("div", id="chapterlist").find("table", id="listing") i = 0 cch = [] @@ -85,13 +87,12 @@ class manga(lib.manga): td = tr.find("td") if td is None: continue cla = td.find("a") - url = urlparse.urljoin(self.url, cla["href"].encode("us-ascii")) - name = cla.string - cid = name.encode("utf8") - if isinstance(cla.nextSibling, unicode): - ncont = unicode(cla.nextSibling) - if len(ncont) > 3 and ncont[:3] == u" : ": - name += u": " + ncont[3:] + url = urljoin(self.url, cla["href"]) + cid = name = cla.string + if isinstance(cla.nextSibling, str): + ncont = str(cla.nextSibling) + if len(ncont) > 3 and ncont[:3] == " : ": + name += ": " + ncont[3:] cch.append(chapter(self, [(self, len(cch))], cid, name, url)) self.cch = cch return self.cch @@ -108,17 +109,17 @@ class library(lib.library): def byid(self, id): url = self.base + id - page = soup(htcache.fetch(url)) + page = soupify(htcache.fetch(url)) if page.find("h2", attrs={"class": "aname"}) is None: raise KeyError(id) name = page.find("h2", attrs={"class": "aname"}).string return manga(self, id, name, url) def __iter__(self): - page = soup(htcache.fetch(self.base + "alphabetical")) + page = soupify(htcache.fetch(self.base + "alphabetical")) for sec in page.findAll("div", attrs={"class": "series_alpha"}): for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"): - url = li.a["href"].encode("us-ascii") + url = li.a["href"] name = li.a.string if url[:1] != "/": continue id = url[1:] @@ -128,19 +129,15 @@ class library(lib.library): if id[-5:] != ".html": continue id = id[:-5] - yield manga(self, id, name, urlparse.urljoin(self.base, url)) + yield manga(self, id, name, urljoin(self.base, url)) def byname(self, prefix): - if not isinstance(prefix, unicode): - prefix = prefix.decode("utf8") prefix = prefix.lower() for manga in self: if manga.name.lower()[:len(prefix)] == prefix: yield manga def search(self, expr): - if not isinstance(expr, unicode): - expr = expr.decode("utf8") expr = expr.lower() for manga in self: if expr in manga.name.lower():