X-Git-Url: http://dolda2000.com/gitweb/?p=automanga.git;a=blobdiff_plain;f=manga%2Frawsen.py;h=a87d52e27a19d21a873926f379d2e9e1e2ece1a6;hp=214a4b81909a5a61b1a9be13d9c0311ca98b0236;hb=c0d3b1a2e6671763efcce90b089bc0f9100e8d2f;hpb=50f7a2153ff875b9706ba95f62f23e9e0610c46f diff --git a/manga/rawsen.py b/manga/rawsen.py index 214a4b8..a87d52e 100644 --- a/manga/rawsen.py +++ b/manga/rawsen.py @@ -1,6 +1,7 @@ import BeautifulSoup, urlparse import lib, htcache soup = BeautifulSoup.BeautifulSoup +soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES) class page(lib.page): def __init__(self, chapter, stack, n, url): @@ -15,7 +16,7 @@ class page(lib.page): def iurl(self): if self.ciurl is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) for tr in page.findAll("tr"): img = tr.find("img", id="picture") if img is not None: @@ -53,7 +54,7 @@ class chapter(lib.pagelist): if self.url[-2:] != "/1": raise Exception("parse error: unexpected first page url for %r" % self) base = self.url[:-1] - pg = soup(htcache.fetch(self.url)) + pg = soupify(htcache.fetch(self.url)) pag = [] for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"): n = int(opt["value"]) @@ -85,7 +86,7 @@ class manga(lib.manga): def ch(self): if self.cch is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) cls = None for div in page.findAll("div", attrs={"class": "post"}): if div.h3 is not None and u"Chapter List" in div.h3.string: @@ -117,7 +118,7 @@ class library(lib.library): def byid(self, id): url = urlparse.urljoin(self.base, id + "/") - page = soup(htcache.fetch(url)) + page = soupify(htcache.fetch(url)) name = None for div in page.findAll("div", attrs={"class": "post"}): if div.h2 is not None and div.h2.a is not None: @@ -130,7 +131,7 @@ class library(lib.library): return manga(self, id, name, url) def __iter__(self): - page = soup(htcache.fetch(self.base + "Manga/")) + page = soupify(htcache.fetch(self.base + "Manga/")) for part in page.find("div", attrs={"class": "post"}).findAll("table"): for row in part.findAll("tr"): link = row.findAll("td")[1].a