local: Treat dots/periods as digits in destructuring directories.
[automanga.git] / manga / mrnet.py
1 import bs4
2 from urllib.parse import urljoin
3 from . import lib, htcache
4 soup = bs4.BeautifulSoup
5 soupify = lambda cont: soup(cont, "html.parser")
6
7 class page(lib.page):
8     def __init__(self, chapter, stack, n, url):
9         self.stack = stack
10         self.chapter = chapter
11         self.manga = chapter.manga
12         self.n = n
13         self.id = str(n)
14         self.name = "Page %s" % n
15         self.url = url
16         self.ciurl = None
17
18     def iurl(self):
19         if self.ciurl is None:
20             page = soupify(htcache.fetch(self.url))
21             self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"]
22         return self.ciurl
23
24     def open(self):
25         return lib.stdimgstream(self.iurl())
26
27     def __str__(self):
28         return self.name
29
30     def __repr__(self):
31         return "<mrnet.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
32
33 class chapter(lib.pagelist):
34     def __init__(self, manga, stack, id, name, url):
35         self.stack = stack
36         self.manga = manga
37         self.id = id
38         self.name = name
39         self.url = url
40         self.cpag = None
41
42     def __getitem__(self, i):
43         return self.pages()[i]
44
45     def __len__(self):
46         return len(self.pages())
47
48     def pages(self):
49         if self.cpag is None:
50             pg = soupify(htcache.fetch(self.url))
51             pag = []
52             for opt in pg.find("div", id="selectpage").find("select", id="pageMenu").findAll("option"):
53                 url = urljoin(self.url, opt["value"])
54                 n = int(opt.string)
55                 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
56             self.cpag = pag
57         return self.cpag
58
59     def __str__(self):
60         return self.name
61
62     def __repr__(self):
63         return "<mrnet.chapter %r.%r>" % (self.manga.name, self.name)
64
65 class manga(lib.manga):
66     def __init__(self, lib, id, name, url):
67         self.lib = lib
68         self.id = id
69         self.name = name
70         self.url = url
71         self.cch = None
72         self.stack = []
73
74     def __getitem__(self, i):
75         return self.ch()[i]
76
77     def __len__(self):
78         return len(self.ch())
79
80     def ch(self):
81         if self.cch is None:
82             page = soupify(htcache.fetch(self.url))
83             cls = page.find("div", id="chapterlist").find("table", id="listing")
84             i = 0
85             cch = []
86             for tr in cls.findAll("tr"):
87                 td = tr.find("td")
88                 if td is None: continue
89                 cla = td.find("a")
90                 url = urljoin(self.url, cla["href"])
91                 cid = name = cla.string
92                 if isinstance(cla.nextSibling, str):
93                     ncont = str(cla.nextSibling)
94                     if len(ncont) > 3 and ncont[:3] == " : ":
95                         name += ": " + ncont[3:]
96                 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
97             self.cch = cch
98         return self.cch
99
100     def __str__(self):
101         return self.name
102
103     def __repr__(self):
104         return "<mrnet.manga %r>" % self.name
105
106 class library(lib.library):
107     def __init__(self):
108         self.base = "http://www.mangareader.net/"
109
110     def byid(self, id):
111         url = self.base + id
112         page = soupify(htcache.fetch(url))
113         if page.find("h2", attrs={"class": "aname"}) is None:
114             raise KeyError(id)
115         name = page.find("h2", attrs={"class": "aname"}).string
116         return manga(self, id, name, url)
117
118     def __iter__(self):
119         page = soupify(htcache.fetch(self.base + "alphabetical"))
120         for sec in page.findAll("div", attrs={"class": "series_alpha"}):
121             for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"):
122                 url = li.a["href"]
123                 name = li.a.string
124                 if url[:1] != "/": continue
125                 id = url[1:]
126                 if '/' in id:
127                     # Does this distinction mean something?
128                     id = id[id.rindex('/') + 1:]
129                     if id[-5:] != ".html":
130                         continue
131                     id = id[:-5]
132                 yield manga(self, id, name, urljoin(self.base, url))
133
134     def byname(self, prefix):
135         prefix = prefix.lower()
136         for manga in self:
137             if manga.name.lower()[:len(prefix)] == prefix:
138                 yield manga
139
140     def search(self, expr):
141         expr = expr.lower()
142         for manga in self:
143             if expr in manga.name.lower():
144                 yield manga