[automanga.git] / manga / rawsen.py

import bs4
from . import lib, htcache
from urllib.parse import urljoin
soup = bs4.BeautifulSoup
soupify = lambda cont: soup(cont)

class page(lib.page):
    def __init__(self, chapter, stack, n, url):
        self.stack = stack
        self.chapter = chapter
        self.manga = chapter.manga
        self.n = n
        self.id = str(n)
        self.name = "Page " + unicode(n)
        self.url = url
        self.ciurl = None

    def iurl(self):
        if self.ciurl is None:
            page = soupify(htcache.fetch(self.url))
            for tr in page.findAll("tr"):
                img = tr.find("img", id="picture")
                if img is not None:
                    self.ciurl = urljoin(self.url, img["src"])
            if self.ciurl is None:
                raise Exception("parse error: could not find image url for %r" % self)
        return self.ciurl

    def open(self):
        return lib.stdimgstream(self.iurl())

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)

class chapter(lib.pagelist):
    def __init__(self, manga, stack, id, name, url):
        self.stack = stack
        self.manga = manga
        self.id = id
        self.name = name
        self.url = url
        self.cpag = None

    def __getitem__(self, i):
        return self.pages()[i]

    def __len__(self):
        return len(self.pages())

    def pages(self):
        if self.cpag is None:
            if self.url[-2:] != "/1":
                raise Exception("parse error: unexpected first page url for %r" % self)
            base = self.url[:-1]
            pg = soupify(htcache.fetch(self.url))
            pag = []
            for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
                n = int(opt["value"])
                url = urljoin(base, str(n))
                pag.append(page(self, self.stack + [(self, len(pag))], n, url))
            self.cpag = pag
        return self.cpag

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)

class manga(lib.manga):
    def __init__(self, lib, id, name, url):
        self.lib = lib
        self.id = id
        self.name = name
        self.url = url
        self.cch = None
        self.stack = []

    def __getitem__(self, i):
        return self.ch()[i]

    def __len__(self):
        return len(self.ch())

    def ch(self):
        if self.cch is None:
            page = soupify(htcache.fetch(self.url))
            cls = None
            for div in page.findAll("div", attrs={"class": "post"}):
                if div.h3 is not None and "Chapter List" in div.h3.string:
                    cls = div
                    break
            if cls is None:
                raise Exception("parse error: no chapter list found for %r" % self)
            cch = []
            for tr in cls.table.findAll("tr"):
                lcol = tr.findAll("td")[1]
                if lcol.a is None: continue
                link = lcol.a
                url = link["href"]
                name = link["title"]
                cid = name
                cch.append(chapter(self, [(self, len(cch))], cid, name, url))
            self.cch = cch
        return self.cch

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.manga %r>" % self.name

class library(lib.library):
    def __init__(self):
        self.base = "http://raw.senmanga.com/"

    def byid(self, id):
        url = urljoin(self.base, id + "/")
        page = soupify(htcache.fetch(url))
        name = None
        for div in page.findAll("div", id="post"):
            if div.h1 is not None and div.h1.a is not None:
                curl = div.h1.a["href"]
                if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
                if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
                name = div.h1.a.string
        if name is None:
            raise KeyError(id)
        return manga(self, id, name, url)

    def __iter__(self):
        page = soupify(htcache.fetch(self.base + "Manga/"))
        for part in page.find("div", attrs={"class": "post"}).findAll("table"):
            for row in part.findAll("tr"):
                link = row.findAll("td")[1].a
                if link is None:
                    continue
                url = link["href"]
                name = link.string
                if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
                    continue
                id = url[1:-1]
                yield manga(self, id, name, urljoin(self.base, url))

    def byname(self, prefix):
        if not isinstance(prefix, unicode):
            prefix = prefix.decode("utf8")
        prefix = prefix.lower()
        for manga in self:
            if manga.name.lower()[:len(prefix)] == prefix:
                yield manga

    def search(self, expr):
        if not isinstance(expr, unicode):
            expr = expr.decode("utf8")
        expr = expr.lower()
        for manga in self:
            if expr in manga.name.lower():
                yield manga
Commit	Line	Data
e7cc7606 FT	1	import bs4
	2	from . import lib, htcache
	3	from urllib.parse import urljoin
	4	soup = bs4.BeautifulSoup
	5	soupify = lambda cont: soup(cont)
50f7a215 FT	6
	7	class page(lib.page):
	8	def __init__(self, chapter, stack, n, url):
	9	self.stack = stack
	10	self.chapter = chapter
	11	self.manga = chapter.manga
	12	self.n = n
	13	self.id = str(n)
e7cc7606	14	self.name = "Page " + unicode(n)
50f7a215 FT	15	self.url = url
	16	self.ciurl = None
	17
	18	def iurl(self):
	19	if self.ciurl is None:
c0d3b1a2	20	page = soupify(htcache.fetch(self.url))
50f7a215 FT	21	for tr in page.findAll("tr"):
	22	img = tr.find("img", id="picture")
	23	if img is not None:
e7cc7606	24	self.ciurl = urljoin(self.url, img["src"])
50f7a215 FT	25	if self.ciurl is None:
	26	raise Exception("parse error: could not find image url for %r" % self)
	27	return self.ciurl
	28
	29	def open(self):
	30	return lib.stdimgstream(self.iurl())
	31
	32	def __str__(self):
	33	return self.name
	34
	35	def __repr__(self):
	36	return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
	37
	38	class chapter(lib.pagelist):
	39	def __init__(self, manga, stack, id, name, url):
	40	self.stack = stack
	41	self.manga = manga
	42	self.id = id
	43	self.name = name
	44	self.url = url
	45	self.cpag = None
	46
	47	def __getitem__(self, i):
	48	return self.pages()[i]
	49
	50	def __len__(self):
	51	return len(self.pages())
	52
	53	def pages(self):
	54	if self.cpag is None:
	55	if self.url[-2:] != "/1":
	56	raise Exception("parse error: unexpected first page url for %r" % self)
	57	base = self.url[:-1]
c0d3b1a2	58	pg = soupify(htcache.fetch(self.url))
50f7a215 FT	59	pag = []
	60	for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
	61	n = int(opt["value"])
e7cc7606	62	url = urljoin(base, str(n))
50f7a215 FT	63	pag.append(page(self, self.stack + [(self, len(pag))], n, url))
	64	self.cpag = pag
	65	return self.cpag
	66
	67	def __str__(self):
	68	return self.name
	69
	70	def __repr__(self):
	71	return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
	72
	73	class manga(lib.manga):
	74	def __init__(self, lib, id, name, url):
	75	self.lib = lib
	76	self.id = id
	77	self.name = name
	78	self.url = url
	79	self.cch = None
	80	self.stack = []
	81
	82	def __getitem__(self, i):
	83	return self.ch()[i]
	84
	85	def __len__(self):
	86	return len(self.ch())
	87
	88	def ch(self):
	89	if self.cch is None:
c0d3b1a2	90	page = soupify(htcache.fetch(self.url))
50f7a215 FT	91	cls = None
50f7a215 FT	92	for div in page.findAll("div", attrs={"class": "post"}):
e7cc7606	93	if div.h3 is not None and "Chapter List" in div.h3.string:
50f7a215 FT	94	cls = div
	95	break
	96	if cls is None:
	97	raise Exception("parse error: no chapter list found for %r" % self)
	98	cch = []
	99	for tr in cls.table.findAll("tr"):
	100	lcol = tr.findAll("td")[1]
	101	if lcol.a is None: continue
	102	link = lcol.a
e7cc7606	103	url = link["href"]
50f7a215	104	name = link["title"]
e7cc7606	105	cid = name
50f7a215 FT	106	cch.append(chapter(self, [(self, len(cch))], cid, name, url))
	107	self.cch = cch
	108	return self.cch
	109
	110	def __str__(self):
	111	return self.name
	112
	113	def __repr__(self):
	114	return "<rawsen.manga %r>" % self.name
	115
	116	class library(lib.library):
	117	def __init__(self):
	118	self.base = "http://raw.senmanga.com/"
	119
	120	def byid(self, id):
e7cc7606	121	url = urljoin(self.base, id + "/")
c0d3b1a2	122	page = soupify(htcache.fetch(url))
50f7a215	123	name = None
e7cc7606 FT	124	for div in page.findAll("div", id="post"):
	125	if div.h1 is not None and div.h1.a is not None:
	126	curl = div.h1.a["href"]
50f7a215 FT	127	if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
50f7a215 FT	128	if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
e7cc7606	129	name = div.h1.a.string
50f7a215 FT	130	if name is None:
	131	raise KeyError(id)
	132	return manga(self, id, name, url)
	133
	134	def __iter__(self):
c0d3b1a2	135	page = soupify(htcache.fetch(self.base + "Manga/"))
50f7a215 FT	136	for part in page.find("div", attrs={"class": "post"}).findAll("table"):
	137	for row in part.findAll("tr"):
	138	link = row.findAll("td")[1].a
	139	if link is None:
	140	continue
e7cc7606	141	url = link["href"]
50f7a215 FT	142	name = link.string
	143	if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
	144	continue
	145	id = url[1:-1]
e7cc7606	146	yield manga(self, id, name, urljoin(self.base, url))
50f7a215 FT	147
	148	def byname(self, prefix):
	149	if not isinstance(prefix, unicode):
	150	prefix = prefix.decode("utf8")
	151	prefix = prefix.lower()
	152	for manga in self:
	153	if manga.name.lower()[:len(prefix)] == prefix:
	154	yield manga
	155
	156	def search(self, expr):
	157	if not isinstance(expr, unicode):
	158	expr = expr.decode("utf8")
	159	expr = expr.lower()
	160	for manga in self:
	161	if expr in manga.name.lower():
	162	yield manga