Added simplistic keyword searching to mrnet and local libraries.
[automanga.git] / manga / mrnet.py
CommitLineData
bbcdf86a
FT
1import urllib
2import BeautifulSoup, urlparse
3import lib, htcache
4soup = BeautifulSoup.BeautifulSoup
5
6class imgstream(lib.imgstream):
7 def __init__(self, url):
8 self.bk = urllib.urlopen(url)
9 ok = False
10 try:
11 if self.bk.getcode() != 200:
12 raise IOError("Server error: " + str(self.bk.getcode()))
13 self.ctype = self.bk.info()["Content-Type"]
14 self.clen = int(self.bk.info()["Content-Length"])
15 ok = True
16 finally:
17 if not ok:
18 self.bk.close()
19
20 def fileno(self):
21 return self.bk.fileno()
22
23 def close(self):
24 self.bk.close()
25
26 def read(self, sz = None):
27 if sz is None:
28 return self.bk.read()
29 else:
30 return self.bk.read(sz)
31
32class page(lib.page):
33 def __init__(self, chapter, stack, n, url):
34 self.stack = stack
35 self.chapter = chapter
36 self.manga = chapter.manga
37 self.n = n
38 self.id = str(n)
39 self.name = u"Page %s" % n
40 self.url = url
41 self.ciurl = None
42
43 def iurl(self):
44 if self.ciurl is None:
45 page = soup(htcache.fetch(self.url))
46 self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"].encode("us-ascii")
47 return self.ciurl
48
49 def open(self):
50 return imgstream(self.iurl())
51
52 def __str__(self):
53 return self.name
54
55 def __repr__(self):
56 return "<mrnet.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
57
58class chapter(lib.pagelist):
59 def __init__(self, manga, stack, id, name, url):
60 self.stack = stack
61 self.manga = manga
62 self.id = id
63 self.name = name
64 self.url = url
65 self.cpag = None
66
67 def __getitem__(self, i):
68 return self.pages()[i]
69
70 def __len__(self):
71 return len(self.pages())
72
73 def pages(self):
74 if self.cpag is None:
75 pg = soup(htcache.fetch(self.url))
76 pag = []
77 for opt in pg.find("div", id="selectpage").find("select", id="pageMenu").findAll("option"):
78 url = urlparse.urljoin(self.url, opt["value"].encode("us-ascii"))
79 n = int(opt.string)
80 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
81 self.cpag = pag
82 return self.cpag
83
84 def __str__(self):
85 return self.name
86
87 def __repr__(self):
88 return "<mrnet.chapter %r.%r>" % (self.manga.name, self.name)
89
90class manga(lib.manga):
91 def __init__(self, lib, id, name, url):
92 self.lib = lib
93 self.id = id
94 self.name = name
95 self.url = url
96 self.cch = None
97 self.stack = []
98
99 def __getitem__(self, i):
100 return self.ch()[i]
101
102 def __len__(self):
103 return len(self.ch())
104
105 def ch(self):
106 if self.cch is None:
107 page = soup(htcache.fetch(self.url))
108 cls = page.find("div", id="chapterlist").find("table", id="listing")
109 i = 0
110 cch = []
111 for tr in cls.findAll("tr"):
112 td = tr.find("td")
113 if td is None: continue
114 cla = td.find("a")
115 url = urlparse.urljoin(self.url, cla["href"].encode("us-ascii"))
116 name = cla.string
117 cid = name.encode("utf8")
118 if isinstance(cla.nextSibling, unicode):
119 ncont = unicode(cla.nextSibling)
68143b85 120 if len(ncont) > 3 and ncont[:3] == u" : ":
bbcdf86a
FT
121 name += u": " + ncont[3:]
122 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
123 self.cch = cch
124 return self.cch
125
126 def __str__(self):
127 return self.name
128
129 def __repr__(self):
130 return "<mrnet.manga %r>" % self.name
131
132class library(lib.library):
133 def __init__(self):
134 self.base = "http://www.mangareader.net/"
135
136 def byid(self, id):
137 url = self.base + id
138 page = soup(htcache.fetch(url))
139 if page.find("h2", attrs={"class": "aname"}) is None:
140 raise KeyError(id)
141 name = page.find("h2", attrs={"class": "aname"}).string
142 return manga(self, id, name, url)
68143b85
FT
143
144 def __iter__(self):
145 page = soup(htcache.fetch(self.base + "alphabetical"))
146 for sec in page.findAll("div", attrs={"class": "series_alpha"}):
147 for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"):
148 url = li.a["href"].encode("us-ascii")
149 name = li.a.string
150 if url[:1] != "/": continue
151 id = url[1:]
152 if '/' in id:
153 # Does this distinction mean something?
154 id = id[id.rindex('/') + 1:]
155 if id[-5:] != ".html":
156 continue
157 id = id[:-5]
158 yield manga(self, id, name, urlparse.urljoin(self.base, url))
159
160 def byname(self, prefix):
161 if not isinstance(prefix, unicode):
162 prefix = prefix.decode("utf8")
163 prefix = prefix.lower()
164 for manga in self:
165 if manga.name.lower()[:len(prefix)] == prefix:
166 yield manga
ffd12e71
FT
167
168 def search(self, expr):
169 if not isinstance(expr, unicode):
170 expr = expr.decode("utf8")
171 expr = expr.lower()
172 for manga in self:
173 if expr in manga.name.lower():
174 yield manga