X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fbatoto.py;h=baa602f261156f6f1395a252a797ce0906f89918;hb=e7cc76067ff041bf8edd6c79f2d96df97c8eaf58;hp=686997373ce51926e832c1dbf6ee9e848160f841;hpb=687f2ed3a817fb4cd40364522254724d68d015ee;p=automanga.git diff --git a/manga/batoto.py b/manga/batoto.py index 6869973..baa602f 100644 --- a/manga/batoto.py +++ b/manga/batoto.py @@ -1,6 +1,7 @@ import urllib, re, BeautifulSoup import lib, htcache soup = BeautifulSoup.BeautifulSoup +soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES) def byclass(el, name, cl): for ch in el.findAll(name): @@ -28,7 +29,7 @@ class page(lib.page): def iurl(self): if self.ciurl is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) img = nextel(page.find("div", id="full_image")).img self.ciurl = img["src"].encode("us-ascii") return self.ciurl @@ -60,7 +61,7 @@ class chapter(lib.pagelist): pnre = re.compile(r"page (\d+)") def pages(self): if self.cpag is None: - pg = soup(htcache.fetch(self.url)) + pg = soupify(htcache.fetch(self.url)) cpag = [] for opt in pg.find("select", id="page_select").findAll("option"): url = opt["value"].encode("us-ascii") @@ -83,6 +84,7 @@ class manga(lib.manga): self.url = url self.cch = None self.stack = [] + self.cnames = None def __getitem__(self, i): return self.ch()[i] @@ -93,7 +95,7 @@ class manga(lib.manga): cure = re.compile(r"/read/_/(\d+)/[^/]*") def ch(self): if self.cch is None: - page = soup(htcache.fetch(self.url)) + page = soupify(htcache.fetch(self.url)) cls = byclass(page, u"table", u"chapters_list") if cls.tbody is not None: cls = cls.tbody @@ -117,6 +119,26 @@ class manga(lib.manga): self.cch = rch return self.cch + def altnames(self): + if self.cnames is None: + page = soupify(htcache.fetch(self.url)) + cnames = None + for tbl in page.findAll("table", attrs={"class": "ipb_table"}): + if tbl.tbody is not None: tbl = tbl.tbody + for tr in tbl.findAll("tr"): + if u"Alt Names:" in tr.td.text: + nls = nextel(tr.td) + if nls.name != u"td" or nls.span is None: + raise Exception("Weird altnames table in " + self.id) + cnames = [nm.text.strip() for nm in nls.findAll("span")] + break + if cnames is not None: + break + if cnames is None: + raise Exception("Could not find altnames for " + self.id) + self.cnames = cnames + return self.cnames + def __str__(self): return self.name @@ -130,28 +152,61 @@ class library(lib.library): def byid(self, id): url = self.base + "comic/_/comics/" + id - page = soup(htcache.fetch(url)) + page = soupify(htcache.fetch(url)) title = page.find("h1", attrs={"class": "ipsType_pagetitle"}) if title is None: raise KeyError(id) return manga(self, id, title.string.strip(), url) - mure = re.compile(r"/comic/_/comics/([^/]*)$") - def search(self, expr): - resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1", - urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"})) - try: - page = soup(resp.read()) - finally: - resp.close() - ret = [] - for child in page.find("div", id="search_results").ol.childGenerator(): - if isinstance(child, BeautifulSoup.Tag) and child.name == u"li": - info = child.find("div", attrs={"class": "result_info"}) - url = info.h3.a["href"].encode("us-ascii") - m = self.mure.search(url) + def _search(self, pars): + p = 1 + while True: + _pars = dict(pars) + _pars["p"] = str(p) + resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars)) + try: + page = soupify(resp.read()) + finally: + resp.close() + rls = page.find("div", id="comic_search_results").table + if rls.tbody is not None: + rls = rls.tbody + hasmore = False + for child in rls.findAll("tr"): + if child.th is not None: continue + if child.get("id", u"")[:11] == u"comic_rowo_": continue + if child.get("id") == u"show_more_row": + hasmore = True + continue + link = child.td.strong.a + url = link["href"].encode("us-ascii") + m = self.rure.search(url) if m is None: raise Exception("Got weird manga URL: %r" % url) id = m.group(1) - name = info.h3.a.string.strip() - ret.append(manga(self, id, name, url)) - return ret + name = link.text.strip() + yield manga(self, id, name, url) + p += 1 + if not hasmore: + break + + rure = re.compile(r"/comic/_/([^/]*)$") + def search(self, expr): + if not isinstance(expr, unicode): + expr = expr.decode("utf8") + return self._search({"name": expr.encode("utf8"), "name_cond": "c"}) + + def byname(self, prefix): + if not isinstance(prefix, unicode): + prefix = prefix.decode("utf8") + for res in self._search({"name": prefix.encode("utf8"), "name_cond": "s"}): + if res.name[:len(prefix)].lower() == prefix.lower(): + yield res + else: + for aname in res.altnames(): + if aname[:len(prefix)].lower() == prefix.lower(): + yield manga(self, res.id, aname, res.url) + break + else: + if False: + print "eliding " + res.name + print res.altnames()