X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fbatoto.py;h=8593e0495492dd7d878a0a6fefd2cb2e49ac0676;hb=24f0a3b7536a0366efb49bbe47f3a784002646ff;hp=cab29b5f56123da274c5314072fb1b82216222f2;hpb=c72e69b8c6044cf4393b496bcfe73700698f4fa6;p=automanga.git diff --git a/manga/batoto.py b/manga/batoto.py index cab29b5..8593e04 100644 --- a/manga/batoto.py +++ b/manga/batoto.py @@ -83,6 +83,7 @@ class manga(lib.manga): self.url = url self.cch = None self.stack = [] + self.cnames = None def __getitem__(self, i): return self.ch()[i] @@ -117,6 +118,26 @@ class manga(lib.manga): self.cch = rch return self.cch + def altnames(self): + if self.cnames is None: + page = soup(htcache.fetch(self.url)) + cnames = None + for tbl in page.findAll("table", attrs={"class": "ipb_table"}): + if tbl.tbody is not None: tbl = tbl.tbody + for tr in tbl.findAll("tr"): + if u"Alt Names:" in tr.td.text: + nls = nextel(tr.td) + if nls.name != u"td" or nls.span is None: + raise Exception("Weird altnames table in " + self.id) + cnames = [nm.text.strip() for nm in nls.findAll("span")] + break + if cnames is not None: + break + if cnames is None: + raise Exception("Could not find altnames for " + self.id) + self.cnames = cnames + return self.cnames + def __str__(self): return self.name @@ -136,25 +157,55 @@ class library(lib.library): raise KeyError(id) return manga(self, id, title.string.strip(), url) - mure = re.compile(r"/comic/_/comics/([^/]*)$") - def search(self, expr): - resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1", - urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"})) - try: - page = soup(resp.read()) - finally: - resp.close() - none = page.find("p", attrs={"class": "no_messages"}) - if none is not None and u"No results" in none.text: - return [] - ret = [] - for child in page.find("div", id="search_results").ol.childGenerator(): - if isinstance(child, BeautifulSoup.Tag) and child.name == u"li": - info = child.find("div", attrs={"class": "result_info"}) - url = info.h3.a["href"].encode("us-ascii") - m = self.mure.search(url) + def _search(self, pars): + p = 1 + while True: + _pars = dict(pars) + _pars["p"] = str(p) + resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars)) + try: + page = soup(resp.read()) + finally: + resp.close() + rls = page.find("div", id="comic_search_results").table + if rls.tbody is not None: + rls = rls.tbody + hasmore = False + for child in rls.findAll("tr"): + if child.th is not None: continue + if child.get("id", u"")[:11] == u"comic_rowo_": continue + if child.get("id") == u"show_more_row": + hasmore = True + continue + link = child.td.strong.a + url = link["href"].encode("us-ascii") + m = self.rure.search(url) if m is None: raise Exception("Got weird manga URL: %r" % url) id = m.group(1) - name = info.h3.a.string.strip() - ret.append(manga(self, id, name, url)) - return ret + name = link.text.strip() + yield manga(self, id, name, url) + p += 1 + if not hasmore: + break + + rure = re.compile(r"/comic/_/([^/]*)$") + def search(self, expr): + if not isinstance(expr, unicode): + expr = expr.decode("utf8") + return self._search({"name": expr.encode("utf8"), "name_cond": "c"}) + + def byname(self, prefix): + if not isinstance(prefix, unicode): + prefix = prefix.decode("utf8") + for res in self._search({"name": prefix.encode("utf8"), "name_cond": "s"}): + if res.name[:len(prefix)].lower() == prefix.lower(): + yield res + else: + for aname in res.altnames(): + if aname[:len(prefix)].lower() == prefix.lower(): + yield manga(self, res.id, aname, res.url) + break + else: + if False: + print "eliding " + res.name + print res.altnames()