X-Git-Url: http://dolda2000.com/gitweb/?p=automanga.git;a=blobdiff_plain;f=manga%2Frawsen.py;h=c22ac8aaf110eae612ff3fcc394e67e9d58dd6c7;hp=214a4b81909a5a61b1a9be13d9c0311ca98b0236;hb=e7cc76067ff041bf8edd6c79f2d96df97c8eaf58;hpb=50f7a2153ff875b9706ba95f62f23e9e0610c46f diff --git a/manga/rawsen.py b/manga/rawsen.py index 214a4b8..c22ac8a 100644 --- a/manga/rawsen.py +++ b/manga/rawsen.py @@ -1,6 +1,8 @@ -import BeautifulSoup, urlparse -import lib, htcache -soup = BeautifulSoup.BeautifulSoup +import bs4 +from . import lib, htcache +from urllib.parse import urljoin +soup = bs4.BeautifulSoup +soupify = lambda cont: soup(cont) class page(lib.page): def __init__(self, chapter, stack, n, url): @@ -9,17 +11,17 @@ class page(lib.page): self.manga = chapter.manga self.n = n self.id = str(n) - self.name = u"Page " + unicode(n) + self.name = "Page " + unicode(n) self.url = url self.ciurl = None def iurl(self): if self.ciurl is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) for tr in page.findAll("tr"): img = tr.find("img", id="picture") if img is not None: - self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii")) + self.ciurl = urljoin(self.url, img["src"]) if self.ciurl is None: raise Exception("parse error: could not find image url for %r" % self) return self.ciurl @@ -53,11 +55,11 @@ class chapter(lib.pagelist): if self.url[-2:] != "/1": raise Exception("parse error: unexpected first page url for %r" % self) base = self.url[:-1] - pg = soup(htcache.fetch(self.url)) + pg = soupify(htcache.fetch(self.url)) pag = [] for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"): n = int(opt["value"]) - url = urlparse.urljoin(base, str(n)) + url = urljoin(base, str(n)) pag.append(page(self, self.stack + [(self, len(pag))], n, url)) self.cpag = pag return self.cpag @@ -85,10 +87,10 @@ class manga(lib.manga): def ch(self): if self.cch is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) cls = None for div in page.findAll("div", attrs={"class": "post"}): - if div.h3 is not None and u"Chapter List" in div.h3.string: + if div.h3 is not None and "Chapter List" in div.h3.string: cls = div break if cls is None: @@ -98,9 +100,9 @@ class manga(lib.manga): lcol = tr.findAll("td")[1] if lcol.a is None: continue link = lcol.a - url = link["href"].encode("us-ascii") + url = link["href"] name = link["title"] - cid = name.encode("utf-8") + cid = name cch.append(chapter(self, [(self, len(cch))], cid, name, url)) self.cch = cch return self.cch @@ -116,32 +118,32 @@ class library(lib.library): self.base = "http://raw.senmanga.com/" def byid(self, id): - url = urlparse.urljoin(self.base, id + "/") - page = soup(htcache.fetch(url)) + url = urljoin(self.base, id + "/") + page = soupify(htcache.fetch(url)) name = None - for div in page.findAll("div", attrs={"class": "post"}): - if div.h2 is not None and div.h2.a is not None: - curl = div.h2.a["href"].encode("us-ascii") + for div in page.findAll("div", id="post"): + if div.h1 is not None and div.h1.a is not None: + curl = div.h1.a["href"] if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue - name = div.h2.a.string + name = div.h1.a.string if name is None: raise KeyError(id) return manga(self, id, name, url) def __iter__(self): - page = soup(htcache.fetch(self.base + "Manga/")) + page = soupify(htcache.fetch(self.base + "Manga/")) for part in page.find("div", attrs={"class": "post"}).findAll("table"): for row in part.findAll("tr"): link = row.findAll("td")[1].a if link is None: continue - url = link["href"].encode("us-ascii") + url = link["href"] name = link.string if len(url) < 3 or url[:1] != '/' or url[-1:] != '/': continue id = url[1:-1] - yield manga(self, id, name, urlparse.urljoin(self.base, url)) + yield manga(self, id, name, urljoin(self.base, url)) def byname(self, prefix): if not isinstance(prefix, unicode):