Decode HTML entities correctly.
[automanga.git] / manga / mrnet.py
CommitLineData
bbcdf86a
FT
1import BeautifulSoup, urlparse
2import lib, htcache
3soup = BeautifulSoup.BeautifulSoup
c0d3b1a2 4soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
bbcdf86a 5
bbcdf86a
FT
6class page(lib.page):
7 def __init__(self, chapter, stack, n, url):
8 self.stack = stack
9 self.chapter = chapter
10 self.manga = chapter.manga
11 self.n = n
12 self.id = str(n)
13 self.name = u"Page %s" % n
14 self.url = url
15 self.ciurl = None
16
17 def iurl(self):
18 if self.ciurl is None:
c0d3b1a2 19 page = soupify(htcache.fetch(self.url))
bbcdf86a
FT
20 self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"].encode("us-ascii")
21 return self.ciurl
22
23 def open(self):
b9e558ac 24 return lib.stdimgstream(self.iurl())
bbcdf86a
FT
25
26 def __str__(self):
27 return self.name
28
29 def __repr__(self):
30 return "<mrnet.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
31
32class chapter(lib.pagelist):
33 def __init__(self, manga, stack, id, name, url):
34 self.stack = stack
35 self.manga = manga
36 self.id = id
37 self.name = name
38 self.url = url
39 self.cpag = None
40
41 def __getitem__(self, i):
42 return self.pages()[i]
43
44 def __len__(self):
45 return len(self.pages())
46
47 def pages(self):
48 if self.cpag is None:
c0d3b1a2 49 pg = soupify(htcache.fetch(self.url))
bbcdf86a
FT
50 pag = []
51 for opt in pg.find("div", id="selectpage").find("select", id="pageMenu").findAll("option"):
52 url = urlparse.urljoin(self.url, opt["value"].encode("us-ascii"))
53 n = int(opt.string)
54 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
55 self.cpag = pag
56 return self.cpag
57
58 def __str__(self):
59 return self.name
60
61 def __repr__(self):
62 return "<mrnet.chapter %r.%r>" % (self.manga.name, self.name)
63
64class manga(lib.manga):
65 def __init__(self, lib, id, name, url):
66 self.lib = lib
67 self.id = id
68 self.name = name
69 self.url = url
70 self.cch = None
71 self.stack = []
72
73 def __getitem__(self, i):
74 return self.ch()[i]
75
76 def __len__(self):
77 return len(self.ch())
78
79 def ch(self):
80 if self.cch is None:
c0d3b1a2 81 page = soupify(htcache.fetch(self.url))
bbcdf86a
FT
82 cls = page.find("div", id="chapterlist").find("table", id="listing")
83 i = 0
84 cch = []
85 for tr in cls.findAll("tr"):
86 td = tr.find("td")
87 if td is None: continue
88 cla = td.find("a")
89 url = urlparse.urljoin(self.url, cla["href"].encode("us-ascii"))
90 name = cla.string
91 cid = name.encode("utf8")
92 if isinstance(cla.nextSibling, unicode):
93 ncont = unicode(cla.nextSibling)
68143b85 94 if len(ncont) > 3 and ncont[:3] == u" : ":
bbcdf86a
FT
95 name += u": " + ncont[3:]
96 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
97 self.cch = cch
98 return self.cch
99
100 def __str__(self):
101 return self.name
102
103 def __repr__(self):
104 return "<mrnet.manga %r>" % self.name
105
106class library(lib.library):
107 def __init__(self):
108 self.base = "http://www.mangareader.net/"
109
110 def byid(self, id):
111 url = self.base + id
c0d3b1a2 112 page = soupify(htcache.fetch(url))
bbcdf86a
FT
113 if page.find("h2", attrs={"class": "aname"}) is None:
114 raise KeyError(id)
115 name = page.find("h2", attrs={"class": "aname"}).string
116 return manga(self, id, name, url)
68143b85
FT
117
118 def __iter__(self):
c0d3b1a2 119 page = soupify(htcache.fetch(self.base + "alphabetical"))
68143b85
FT
120 for sec in page.findAll("div", attrs={"class": "series_alpha"}):
121 for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"):
122 url = li.a["href"].encode("us-ascii")
123 name = li.a.string
124 if url[:1] != "/": continue
125 id = url[1:]
126 if '/' in id:
127 # Does this distinction mean something?
128 id = id[id.rindex('/') + 1:]
129 if id[-5:] != ".html":
130 continue
131 id = id[:-5]
132 yield manga(self, id, name, urlparse.urljoin(self.base, url))
133
134 def byname(self, prefix):
135 if not isinstance(prefix, unicode):
136 prefix = prefix.decode("utf8")
137 prefix = prefix.lower()
138 for manga in self:
139 if manga.name.lower()[:len(prefix)] == prefix:
140 yield manga
ffd12e71
FT
141
142 def search(self, expr):
143 if not isinstance(expr, unicode):
144 expr = expr.decode("utf8")
145 expr = expr.lower()
146 for manga in self:
147 if expr in manga.name.lower():
148 yield manga