Added simplistic keyword searching to mrnet and local libraries.
[automanga.git] / manga / mrnet.py
1 import urllib
2 import BeautifulSoup, urlparse
3 import lib, htcache
4 soup = BeautifulSoup.BeautifulSoup
5
6 class imgstream(lib.imgstream):
7     def __init__(self, url):
8         self.bk = urllib.urlopen(url)
9         ok = False
10         try:
11             if self.bk.getcode() != 200:
12                 raise IOError("Server error: " + str(self.bk.getcode()))
13             self.ctype = self.bk.info()["Content-Type"]
14             self.clen = int(self.bk.info()["Content-Length"])
15             ok = True
16         finally:
17             if not ok:
18                 self.bk.close()
19
20     def fileno(self):
21         return self.bk.fileno()
22
23     def close(self):
24         self.bk.close()
25
26     def read(self, sz = None):
27         if sz is None:
28             return self.bk.read()
29         else:
30             return self.bk.read(sz)
31
32 class page(lib.page):
33     def __init__(self, chapter, stack, n, url):
34         self.stack = stack
35         self.chapter = chapter
36         self.manga = chapter.manga
37         self.n = n
38         self.id = str(n)
39         self.name = u"Page %s" % n
40         self.url = url
41         self.ciurl = None
42
43     def iurl(self):
44         if self.ciurl is None:
45             page = soup(htcache.fetch(self.url))
46             self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"].encode("us-ascii")
47         return self.ciurl
48
49     def open(self):
50         return imgstream(self.iurl())
51
52     def __str__(self):
53         return self.name
54
55     def __repr__(self):
56         return "<mrnet.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
57
58 class chapter(lib.pagelist):
59     def __init__(self, manga, stack, id, name, url):
60         self.stack = stack
61         self.manga = manga
62         self.id = id
63         self.name = name
64         self.url = url
65         self.cpag = None
66
67     def __getitem__(self, i):
68         return self.pages()[i]
69
70     def __len__(self):
71         return len(self.pages())
72
73     def pages(self):
74         if self.cpag is None:
75             pg = soup(htcache.fetch(self.url))
76             pag = []
77             for opt in pg.find("div", id="selectpage").find("select", id="pageMenu").findAll("option"):
78                 url = urlparse.urljoin(self.url, opt["value"].encode("us-ascii"))
79                 n = int(opt.string)
80                 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
81             self.cpag = pag
82         return self.cpag
83
84     def __str__(self):
85         return self.name
86
87     def __repr__(self):
88         return "<mrnet.chapter %r.%r>" % (self.manga.name, self.name)
89
90 class manga(lib.manga):
91     def __init__(self, lib, id, name, url):
92         self.lib = lib
93         self.id = id
94         self.name = name
95         self.url = url
96         self.cch = None
97         self.stack = []
98
99     def __getitem__(self, i):
100         return self.ch()[i]
101
102     def __len__(self):
103         return len(self.ch())
104
105     def ch(self):
106         if self.cch is None:
107             page = soup(htcache.fetch(self.url))
108             cls = page.find("div", id="chapterlist").find("table", id="listing")
109             i = 0
110             cch = []
111             for tr in cls.findAll("tr"):
112                 td = tr.find("td")
113                 if td is None: continue
114                 cla = td.find("a")
115                 url = urlparse.urljoin(self.url, cla["href"].encode("us-ascii"))
116                 name = cla.string
117                 cid = name.encode("utf8")
118                 if isinstance(cla.nextSibling, unicode):
119                     ncont = unicode(cla.nextSibling)
120                     if len(ncont) > 3 and ncont[:3] == u" : ":
121                         name += u": " + ncont[3:]
122                 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
123             self.cch = cch
124         return self.cch
125
126     def __str__(self):
127         return self.name
128
129     def __repr__(self):
130         return "<mrnet.manga %r>" % self.name
131
132 class library(lib.library):
133     def __init__(self):
134         self.base = "http://www.mangareader.net/"
135
136     def byid(self, id):
137         url = self.base + id
138         page = soup(htcache.fetch(url))
139         if page.find("h2", attrs={"class": "aname"}) is None:
140             raise KeyError(id)
141         name = page.find("h2", attrs={"class": "aname"}).string
142         return manga(self, id, name, url)
143
144     def __iter__(self):
145         page = soup(htcache.fetch(self.base + "alphabetical"))
146         for sec in page.findAll("div", attrs={"class": "series_alpha"}):
147             for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"):
148                 url = li.a["href"].encode("us-ascii")
149                 name = li.a.string
150                 if url[:1] != "/": continue
151                 id = url[1:]
152                 if '/' in id:
153                     # Does this distinction mean something?
154                     id = id[id.rindex('/') + 1:]
155                     if id[-5:] != ".html":
156                         continue
157                     id = id[:-5]
158                 yield manga(self, id, name, urlparse.urljoin(self.base, url))
159
160     def byname(self, prefix):
161         if not isinstance(prefix, unicode):
162             prefix = prefix.decode("utf8")
163         prefix = prefix.lower()
164         for manga in self:
165             if manga.name.lower()[:len(prefix)] == prefix:
166                 yield manga
167
168     def search(self, expr):
169         if not isinstance(expr, unicode):
170             expr = expr.decode("utf8")
171         expr = expr.lower()
172         for manga in self:
173             if expr in manga.name.lower():
174                 yield manga