Began porting rawsen to Python3.
[automanga.git] / manga / rawsen.py
CommitLineData
e7cc7606
FT
1import bs4
2from . import lib, htcache
3from urllib.parse import urljoin
4soup = bs4.BeautifulSoup
5soupify = lambda cont: soup(cont)
50f7a215
FT
6
7class page(lib.page):
8 def __init__(self, chapter, stack, n, url):
9 self.stack = stack
10 self.chapter = chapter
11 self.manga = chapter.manga
12 self.n = n
13 self.id = str(n)
e7cc7606 14 self.name = "Page " + unicode(n)
50f7a215
FT
15 self.url = url
16 self.ciurl = None
17
18 def iurl(self):
19 if self.ciurl is None:
c0d3b1a2 20 page = soupify(htcache.fetch(self.url))
50f7a215
FT
21 for tr in page.findAll("tr"):
22 img = tr.find("img", id="picture")
23 if img is not None:
e7cc7606 24 self.ciurl = urljoin(self.url, img["src"])
50f7a215
FT
25 if self.ciurl is None:
26 raise Exception("parse error: could not find image url for %r" % self)
27 return self.ciurl
28
29 def open(self):
30 return lib.stdimgstream(self.iurl())
31
32 def __str__(self):
33 return self.name
34
35 def __repr__(self):
36 return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
37
38class chapter(lib.pagelist):
39 def __init__(self, manga, stack, id, name, url):
40 self.stack = stack
41 self.manga = manga
42 self.id = id
43 self.name = name
44 self.url = url
45 self.cpag = None
46
47 def __getitem__(self, i):
48 return self.pages()[i]
49
50 def __len__(self):
51 return len(self.pages())
52
53 def pages(self):
54 if self.cpag is None:
55 if self.url[-2:] != "/1":
56 raise Exception("parse error: unexpected first page url for %r" % self)
57 base = self.url[:-1]
c0d3b1a2 58 pg = soupify(htcache.fetch(self.url))
50f7a215
FT
59 pag = []
60 for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
61 n = int(opt["value"])
e7cc7606 62 url = urljoin(base, str(n))
50f7a215
FT
63 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
64 self.cpag = pag
65 return self.cpag
66
67 def __str__(self):
68 return self.name
69
70 def __repr__(self):
71 return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
72
73class manga(lib.manga):
74 def __init__(self, lib, id, name, url):
75 self.lib = lib
76 self.id = id
77 self.name = name
78 self.url = url
79 self.cch = None
80 self.stack = []
81
82 def __getitem__(self, i):
83 return self.ch()[i]
84
85 def __len__(self):
86 return len(self.ch())
87
88 def ch(self):
89 if self.cch is None:
c0d3b1a2 90 page = soupify(htcache.fetch(self.url))
50f7a215
FT
91 cls = None
92 for div in page.findAll("div", attrs={"class": "post"}):
e7cc7606 93 if div.h3 is not None and "Chapter List" in div.h3.string:
50f7a215
FT
94 cls = div
95 break
96 if cls is None:
97 raise Exception("parse error: no chapter list found for %r" % self)
98 cch = []
99 for tr in cls.table.findAll("tr"):
100 lcol = tr.findAll("td")[1]
101 if lcol.a is None: continue
102 link = lcol.a
e7cc7606 103 url = link["href"]
50f7a215 104 name = link["title"]
e7cc7606 105 cid = name
50f7a215
FT
106 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
107 self.cch = cch
108 return self.cch
109
110 def __str__(self):
111 return self.name
112
113 def __repr__(self):
114 return "<rawsen.manga %r>" % self.name
115
116class library(lib.library):
117 def __init__(self):
118 self.base = "http://raw.senmanga.com/"
119
120 def byid(self, id):
e7cc7606 121 url = urljoin(self.base, id + "/")
c0d3b1a2 122 page = soupify(htcache.fetch(url))
50f7a215 123 name = None
e7cc7606
FT
124 for div in page.findAll("div", id="post"):
125 if div.h1 is not None and div.h1.a is not None:
126 curl = div.h1.a["href"]
50f7a215
FT
127 if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
128 if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
e7cc7606 129 name = div.h1.a.string
50f7a215
FT
130 if name is None:
131 raise KeyError(id)
132 return manga(self, id, name, url)
133
134 def __iter__(self):
c0d3b1a2 135 page = soupify(htcache.fetch(self.base + "Manga/"))
50f7a215
FT
136 for part in page.find("div", attrs={"class": "post"}).findAll("table"):
137 for row in part.findAll("tr"):
138 link = row.findAll("td")[1].a
139 if link is None:
140 continue
e7cc7606 141 url = link["href"]
50f7a215
FT
142 name = link.string
143 if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
144 continue
145 id = url[1:-1]
e7cc7606 146 yield manga(self, id, name, urljoin(self.base, url))
50f7a215
FT
147
148 def byname(self, prefix):
149 if not isinstance(prefix, unicode):
150 prefix = prefix.decode("utf8")
151 prefix = prefix.lower()
152 for manga in self:
153 if manga.name.lower()[:len(prefix)] == prefix:
154 yield manga
155
156 def search(self, expr):
157 if not isinstance(expr, unicode):
158 expr = expr.decode("utf8")
159 expr = expr.lower()
160 for manga in self:
161 if expr in manga.name.lower():
162 yield manga