Added a fileno function on imgstreams, for polling.
[automanga.git] / manga / mangafox.py
CommitLineData
f3ad0817
FT
1import urllib
2import BeautifulSoup
3import lib, htcache
4soup = BeautifulSoup.BeautifulSoup
5
3bba3a7b 6class imgstream(lib.imgstream):
f3ad0817
FT
7 def __init__(self, url):
8 self.bk = urllib.urlopen(url)
9 self.ctype = self.bk.info()["Content-Type"]
10
af730068
FT
11 def fileno(self):
12 return self.bk.fileno()
13
f3ad0817
FT
14 def close(self):
15 self.bk.close()
16
f3ad0817
FT
17 def read(self, sz = None):
18 if sz is None:
19 return self.bk.read()
20 else:
21 return self.bk.read(sz)
22
23class page(lib.page):
3683ab38
FT
24 def __init__(self, chapter, stack, n, url):
25 self.stack = stack
f3ad0817
FT
26 self.chapter = chapter
27 self.volume = self.chapter.volume
28 self.manga = self.volume.manga
29 self.n = n
46b3b90e 30 self.id = str(n)
f3ad0817
FT
31 self.url = url
32 self.ciurl = None
33
34 def iurl(self):
35 if self.ciurl is None:
36 page = soup(htcache.fetch(self.url))
37 self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"]
38 return self.ciurl
39
40 def open(self):
41 return imgstream(self.iurl())
42
43class chapter(lib.pagelist):
46b3b90e 44 def __init__(self, volume, stack, id, name, url):
3683ab38 45 self.stack = stack
f3ad0817
FT
46 self.volume = volume
47 self.manga = volume.manga
46b3b90e 48 self.id = id
f3ad0817
FT
49 self.name = name
50 self.url = url
51 self.cpag = None
52
53 def __getitem__(self, i):
54 return self.pages()[i]
55
56 def __len__(self):
57 return len(self.pages())
58
59 def pages(self):
60 if self.cpag is None:
61 pg = soup(htcache.fetch(self.url + "1.html"))
62 l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"})
63 if len(l.contents) != 3:
64 raise Exception("parse error: weird page list for %r" % self)
65 m = l.contents[2].strip()
66 if m[:3] != u"of ":
67 raise Exception("parse error: weird page list for %r" % self)
3683ab38 68 self.cpag = [page(self, self.stack + [(self, n)], n + 1, self.url + ("%i.html" % (n + 1))) for n in xrange(int(m[3:]))]
f3ad0817
FT
69 return self.cpag
70
71 def __str__(self):
72 return self.name
73
74 def __repr__(self):
75 return "<mangafox.chapter %r.%r.%r>" % (self.manga.name, self.volume.name, self.name)
76
77class volume(lib.pagelist):
46b3b90e 78 def __init__(self, manga, stack, id, name):
3683ab38 79 self.stack = stack
f3ad0817 80 self.manga = manga
46b3b90e 81 self.id = id
f3ad0817
FT
82 self.name = name
83 self.ch = []
84
85 def __getitem__(self, i):
86 return self.ch[i]
87
88 def __len__(self):
89 return len(self.ch)
90
91 def __str__(self):
92 return self.name
93
94 def __repr__(self):
95 return "<mangafox.volume %r.%r>" % (self.manga.name, self.name)
96
97def nextel(el):
98 while True:
99 el = el.nextSibling
100 if isinstance(el, BeautifulSoup.Tag):
101 return el
102
103class manga(lib.manga):
46b3b90e 104 def __init__(self, lib, id, name, url):
f3ad0817 105 self.lib = lib
46b3b90e 106 self.id = id
f3ad0817
FT
107 self.name = name
108 self.url = url
109 self.cvol = None
3683ab38 110 self.stack = []
f3ad0817
FT
111
112 def __getitem__(self, i):
113 return self.vols()[i]
114
115 def __len__(self):
116 return len(self.vols())
117
118 def vols(self):
119 if self.cvol is None:
120 page = soup(htcache.fetch(self.url))
121 vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"})
122 self.cvol = []
3683ab38 123 for i, vn in enumerate(reversed(vls)):
46b3b90e
FT
124 name = vn.find("h3", attrs={"class": "volume"}).contents[0].strip()
125 vid = name.encode("utf8")
126 vol = volume(self, [(self, i)], vid, name)
3683ab38 127 cls = nextel(vn)
f3ad0817
FT
128 if cls.name != u"ul" or cls["class"] != u"chlist":
129 raise Exception("parse error: weird volume list for %r" % self)
3683ab38 130 for o, ch in enumerate(reversed(cls.findAll("li"))):
f3ad0817
FT
131 n = ch.div.h3 or ch.div.h4
132 name = n.a.string
46b3b90e 133 chid = name.encode("utf8")
f3ad0817
FT
134 for span in ch("span"):
135 try:
136 if u" title " in (u" " + span["class"] + u" "):
137 name += " " + span.string
138 except KeyError:
139 pass
140 url = n.a["href"].encode("us-ascii")
141 if url[-7:] != "/1.html":
142 raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url))
46b3b90e 143 vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url[:-6]))
3683ab38 144 self.cvol.append(vol)
f3ad0817
FT
145 return self.cvol
146
147 def __str__(self):
148 return self.name
149
150 def __repr__(self):
151 return "<mangafox.manga %r>" % self.name
152
153def libalphacmp(a, b):
154 return cmp(a.upper(), b.upper())
155
156class library(lib.library):
157 def __init__(self):
6fab7b55 158 self.base = "http://mangafox.me/"
f3ad0817
FT
159
160 def alphapage(self, pno):
161 page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno)))
162 ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li")
163 ret = []
46b3b90e 164 ubase = self.base + "manga/"
f3ad0817
FT
165 for m in ls:
166 t = m.find("div", attrs={"class": "manga_text"}).find("a", attrs={"class": "title"})
167 name = t.string
168 url = t["href"].encode("us-ascii")
46b3b90e
FT
169 if url[:len(ubase)] != ubase or url.find('/', len(ubase)) != (len(url) - 1):
170 raise Exception("parse error: unexpected manga URL for %r: %s" % (name, url))
171 ret.append(manga(self, url[len(ubase):-1], name, url))
f3ad0817
FT
172 return ret
173
174 def alphapages(self):
175 page = soup(htcache.fetch(self.base + "directory/?az"))
176 ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li")
177 return int(ls[-2].find("a").string)
178
179 def byname(self, prefix):
180 if not isinstance(prefix, unicode):
181 prefix = prefix.decode("utf8")
182 l = 1
183 r = self.alphapages()
184 while True:
185 if l > r:
186 return
187 c = l + ((r + 1 - l) // 2)
188 ls = self.alphapage(c)
189 if libalphacmp(ls[0].name, prefix) > 0:
190 r = c - 1
191 elif libalphacmp(ls[-1].name, prefix) < 0:
192 l = c + 1
193 else:
194 pno = c
195 break
196 i = 0
197 while i < len(ls):
198 m = ls[i]
199 if libalphacmp(m.name, prefix) >= 0:
200 break
201 i += 1
202 while True:
203 while i < len(ls):
204 m = ls[i]
205 if not m.name[:len(prefix)].upper() == prefix.upper():
206 return
207 yield m
208 i += 1
209 pno += 1
210 ls = self.alphapage(pno)
211 i = 0
943a9376 212
46b3b90e
FT
213 def byid(self, id):
214 url = self.base + ("manga/%s/" % id)
215 page = soup(htcache.fetch(url))
216 if page.find("div", id="title") is None:
217 # Assume we got the search page
218 raise KeyError(id)
219 name = page.find("div", id="series_info").find("div", attrs={"class": "cover"}).img["alt"]
220 return manga(self, id, name, url)
221
943a9376
FT
222 def __iter__(self):
223 raise NotImplementedError("mangafox iterator")