Added library for raw.senmanga.com.
[automanga.git] / manga / rawsen.py
CommitLineData
50f7a215
FT
1import BeautifulSoup, urlparse
2import lib, htcache
3soup = BeautifulSoup.BeautifulSoup
4
5class page(lib.page):
6 def __init__(self, chapter, stack, n, url):
7 self.stack = stack
8 self.chapter = chapter
9 self.manga = chapter.manga
10 self.n = n
11 self.id = str(n)
12 self.name = u"Page " + unicode(n)
13 self.url = url
14 self.ciurl = None
15
16 def iurl(self):
17 if self.ciurl is None:
18 page = soup(htcache.fetch(self.url))
19 for tr in page.findAll("tr"):
20 img = tr.find("img", id="picture")
21 if img is not None:
22 self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii"))
23 if self.ciurl is None:
24 raise Exception("parse error: could not find image url for %r" % self)
25 return self.ciurl
26
27 def open(self):
28 return lib.stdimgstream(self.iurl())
29
30 def __str__(self):
31 return self.name
32
33 def __repr__(self):
34 return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
35
36class chapter(lib.pagelist):
37 def __init__(self, manga, stack, id, name, url):
38 self.stack = stack
39 self.manga = manga
40 self.id = id
41 self.name = name
42 self.url = url
43 self.cpag = None
44
45 def __getitem__(self, i):
46 return self.pages()[i]
47
48 def __len__(self):
49 return len(self.pages())
50
51 def pages(self):
52 if self.cpag is None:
53 if self.url[-2:] != "/1":
54 raise Exception("parse error: unexpected first page url for %r" % self)
55 base = self.url[:-1]
56 pg = soup(htcache.fetch(self.url))
57 pag = []
58 for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
59 n = int(opt["value"])
60 url = urlparse.urljoin(base, str(n))
61 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
62 self.cpag = pag
63 return self.cpag
64
65 def __str__(self):
66 return self.name
67
68 def __repr__(self):
69 return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
70
71class manga(lib.manga):
72 def __init__(self, lib, id, name, url):
73 self.lib = lib
74 self.id = id
75 self.name = name
76 self.url = url
77 self.cch = None
78 self.stack = []
79
80 def __getitem__(self, i):
81 return self.ch()[i]
82
83 def __len__(self):
84 return len(self.ch())
85
86 def ch(self):
87 if self.cch is None:
88 page = soup(htcache.fetch(self.url))
89 cls = None
90 for div in page.findAll("div", attrs={"class": "post"}):
91 if div.h3 is not None and u"Chapter List" in div.h3.string:
92 cls = div
93 break
94 if cls is None:
95 raise Exception("parse error: no chapter list found for %r" % self)
96 cch = []
97 for tr in cls.table.findAll("tr"):
98 lcol = tr.findAll("td")[1]
99 if lcol.a is None: continue
100 link = lcol.a
101 url = link["href"].encode("us-ascii")
102 name = link["title"]
103 cid = name.encode("utf-8")
104 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
105 self.cch = cch
106 return self.cch
107
108 def __str__(self):
109 return self.name
110
111 def __repr__(self):
112 return "<rawsen.manga %r>" % self.name
113
114class library(lib.library):
115 def __init__(self):
116 self.base = "http://raw.senmanga.com/"
117
118 def byid(self, id):
119 url = urlparse.urljoin(self.base, id + "/")
120 page = soup(htcache.fetch(url))
121 name = None
122 for div in page.findAll("div", attrs={"class": "post"}):
123 if div.h2 is not None and div.h2.a is not None:
124 curl = div.h2.a["href"].encode("us-ascii")
125 if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
126 if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
127 name = div.h2.a.string
128 if name is None:
129 raise KeyError(id)
130 return manga(self, id, name, url)
131
132 def __iter__(self):
133 page = soup(htcache.fetch(self.base + "Manga/"))
134 for part in page.find("div", attrs={"class": "post"}).findAll("table"):
135 for row in part.findAll("tr"):
136 link = row.findAll("td")[1].a
137 if link is None:
138 continue
139 url = link["href"].encode("us-ascii")
140 name = link.string
141 if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
142 continue
143 id = url[1:-1]
144 yield manga(self, id, name, urlparse.urljoin(self.base, url))
145
146 def byname(self, prefix):
147 if not isinstance(prefix, unicode):
148 prefix = prefix.decode("utf8")
149 prefix = prefix.lower()
150 for manga in self:
151 if manga.name.lower()[:len(prefix)] == prefix:
152 yield manga
153
154 def search(self, expr):
155 if not isinstance(expr, unicode):
156 expr = expr.decode("utf8")
157 expr = expr.lower()
158 for manga in self:
159 if expr in manga.name.lower():
160 yield manga