Extracted the standard imgstream from mrnet and mangafox and put it in lib.
[automanga.git] / manga / mrnet.py
index ab2f6a6..1439f09 100644 (file)
@@ -1,34 +1,7 @@
-import urllib
 import BeautifulSoup, urlparse
 import lib, htcache
 soup = BeautifulSoup.BeautifulSoup
 
-class imgstream(lib.imgstream):
-    def __init__(self, url):
-        self.bk = urllib.urlopen(url)
-        ok = False
-        try:
-            if self.bk.getcode() != 200:
-                raise IOError("Server error: " + str(self.bk.getcode()))
-            self.ctype = self.bk.info()["Content-Type"]
-            self.clen = int(self.bk.info()["Content-Length"])
-            ok = True
-        finally:
-            if not ok:
-                self.bk.close()
-
-    def fileno(self):
-        return self.bk.fileno()
-
-    def close(self):
-        self.bk.close()
-
-    def read(self, sz = None):
-        if sz is None:
-            return self.bk.read()
-        else:
-            return self.bk.read(sz)
-
 class page(lib.page):
     def __init__(self, chapter, stack, n, url):
         self.stack = stack
@@ -47,7 +20,7 @@ class page(lib.page):
         return self.ciurl
 
     def open(self):
-        return imgstream(self.iurl())
+        return lib.stdimgstream(self.iurl())
 
     def __str__(self):
         return self.name
@@ -117,7 +90,7 @@ class manga(lib.manga):
                 cid = name.encode("utf8")
                 if isinstance(cla.nextSibling, unicode):
                     ncont = unicode(cla.nextSibling)
-                    if ncont[:3] == u" : ":
+                    if len(ncont) > 3 and ncont[:3] == u" : ":
                         name += u": " + ncont[3:]
                 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
             self.cch = cch
@@ -140,3 +113,35 @@ class library(lib.library):
             raise KeyError(id)
         name = page.find("h2", attrs={"class": "aname"}).string
         return manga(self, id, name, url)
+
+    def __iter__(self):
+        page = soup(htcache.fetch(self.base + "alphabetical"))
+        for sec in page.findAll("div", attrs={"class": "series_alpha"}):
+            for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"):
+                url = li.a["href"].encode("us-ascii")
+                name = li.a.string
+                if url[:1] != "/": continue
+                id = url[1:]
+                if '/' in id:
+                    # Does this distinction mean something?
+                    id = id[id.rindex('/') + 1:]
+                    if id[-5:] != ".html":
+                        continue
+                    id = id[:-5]
+                yield manga(self, id, name, urlparse.urljoin(self.base, url))
+
+    def byname(self, prefix):
+        if not isinstance(prefix, unicode):
+            prefix = prefix.decode("utf8")
+        prefix = prefix.lower()
+        for manga in self:
+            if manga.name.lower()[:len(prefix)] == prefix:
+                yield manga
+
+    def search(self, expr):
+        if not isinstance(expr, unicode):
+            expr = expr.decode("utf8")
+        expr = expr.lower()
+        for manga in self:
+            if expr in manga.name.lower():
+                yield manga