import urllib, re, BeautifulSoup
import lib, htcache
soup = BeautifulSoup.BeautifulSoup
+soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
def byclass(el, name, cl):
for ch in el.findAll(name):
def iurl(self):
if self.ciurl is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
img = nextel(page.find("div", id="full_image")).img
self.ciurl = img["src"].encode("us-ascii")
return self.ciurl
pnre = re.compile(r"page (\d+)")
def pages(self):
if self.cpag is None:
- pg = soup(htcache.fetch(self.url))
+ pg = soupify(htcache.fetch(self.url))
cpag = []
for opt in pg.find("select", id="page_select").findAll("option"):
url = opt["value"].encode("us-ascii")
cure = re.compile(r"/read/_/(\d+)/[^/]*")
def ch(self):
if self.cch is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
cls = byclass(page, u"table", u"chapters_list")
if cls.tbody is not None:
cls = cls.tbody
def altnames(self):
if self.cnames is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
cnames = None
for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
if tbl.tbody is not None: tbl = tbl.tbody
def byid(self, id):
url = self.base + "comic/_/comics/" + id
- page = soup(htcache.fetch(url))
+ page = soupify(htcache.fetch(url))
title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
if title is None:
raise KeyError(id)
_pars["p"] = str(p)
resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars))
try:
- page = soup(resp.read())
+ page = soupify(resp.read())
finally:
resp.close()
rls = page.find("div", id="comic_search_results").table
import BeautifulSoup, json
import lib, htcache
soup = BeautifulSoup.BeautifulSoup
+soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
class page(lib.page):
def __init__(self, chapter, stack, n, url):
def iurl(self):
if self.ciurl is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"]
return self.ciurl
def pages(self):
if self.cpag is None:
- pg = soup(htcache.fetch(self.url + "1.html"))
+ pg = soupify(htcache.fetch(self.url + "1.html"))
l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"})
if len(l.contents) != 3:
raise Exception("parse error: weird page list for %r" % self)
def vols(self):
if self.cvol is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"})
cvol = []
for i, vn in enumerate(reversed(vls)):
self.base = "http://mangafox.me/"
def alphapage(self, pno):
- page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno)))
+ page = soupify(htcache.fetch(self.base + ("directory/%i.htm?az" % pno)))
ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li")
ret = []
ubase = self.base + "manga/"
return ret
def alphapages(self):
- page = soup(htcache.fetch(self.base + "directory/?az"))
+ page = soupify(htcache.fetch(self.base + "directory/?az"))
ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li")
return int(ls[-2].find("a").string)
def byid(self, id):
url = self.base + ("manga/%s/" % id)
- page = soup(htcache.fetch(url))
+ page = soupify(htcache.fetch(url))
if page.find("div", id="title") is None:
# Assume we got the search page
raise KeyError(id)
import BeautifulSoup, urlparse
import lib, htcache
soup = BeautifulSoup.BeautifulSoup
+soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
class page(lib.page):
def __init__(self, chapter, stack, n, url):
def iurl(self):
if self.ciurl is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"].encode("us-ascii")
return self.ciurl
def pages(self):
if self.cpag is None:
- pg = soup(htcache.fetch(self.url))
+ pg = soupify(htcache.fetch(self.url))
pag = []
for opt in pg.find("div", id="selectpage").find("select", id="pageMenu").findAll("option"):
url = urlparse.urljoin(self.url, opt["value"].encode("us-ascii"))
def ch(self):
if self.cch is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
cls = page.find("div", id="chapterlist").find("table", id="listing")
i = 0
cch = []
def byid(self, id):
url = self.base + id
- page = soup(htcache.fetch(url))
+ page = soupify(htcache.fetch(url))
if page.find("h2", attrs={"class": "aname"}) is None:
raise KeyError(id)
name = page.find("h2", attrs={"class": "aname"}).string
return manga(self, id, name, url)
def __iter__(self):
- page = soup(htcache.fetch(self.base + "alphabetical"))
+ page = soupify(htcache.fetch(self.base + "alphabetical"))
for sec in page.findAll("div", attrs={"class": "series_alpha"}):
for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"):
url = li.a["href"].encode("us-ascii")
import BeautifulSoup, urlparse
import lib, htcache
soup = BeautifulSoup.BeautifulSoup
+soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
class page(lib.page):
def __init__(self, chapter, stack, n, url):
def iurl(self):
if self.ciurl is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
for tr in page.findAll("tr"):
img = tr.find("img", id="picture")
if img is not None:
if self.url[-2:] != "/1":
raise Exception("parse error: unexpected first page url for %r" % self)
base = self.url[:-1]
- pg = soup(htcache.fetch(self.url))
+ pg = soupify(htcache.fetch(self.url))
pag = []
for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
n = int(opt["value"])
def ch(self):
if self.cch is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
cls = None
for div in page.findAll("div", attrs={"class": "post"}):
if div.h3 is not None and u"Chapter List" in div.h3.string:
def byid(self, id):
url = urlparse.urljoin(self.base, id + "/")
- page = soup(htcache.fetch(url))
+ page = soupify(htcache.fetch(url))
name = None
for div in page.findAll("div", attrs={"class": "post"}):
if div.h2 is not None and div.h2.a is not None:
return manga(self, id, name, url)
def __iter__(self):
- page = soup(htcache.fetch(self.base + "Manga/"))
+ page = soupify(htcache.fetch(self.base + "Manga/"))
for part in page.find("div", attrs={"class": "post"}).findAll("table"):
for row in part.findAll("tr"):
link = row.findAll("td")[1].a