X-Git-Url: http://dolda2000.com/gitweb/?a=blobdiff_plain;f=manga%2Fhtcache.py;h=a53aa45a595e9cbfe13696ae8f5f21e4232c3396;hb=90b3abc10f9013fc043a82b8eb8e17397da0bb61;hp=2f71f9aa693b5ec8ceafcf86f5c999c8d7d859b8;hpb=d6c0e1899d26991207f124fb01e4ca40bc4ce5c1;p=automanga.git diff --git a/manga/htcache.py b/manga/htcache.py index 2f71f9a..a53aa45 100644 --- a/manga/htcache.py +++ b/manga/htcache.py @@ -1,39 +1,50 @@ -import os, md5, urllib, time +import os, hashlib, urllib.request, time +from . import profile pj = os.path.join +class notfound(Exception): + pass + class cache(object): def __init__(self, dir): self.dir = dir def mangle(self, url): - n = md5.new() - n.update(url) + n = hashlib.md5() + n.update(url.encode("ascii")) return n.hexdigest() + def open(self, url): + req = urllib.request.Request(url, headers={"User-Agent": "automanga/1"}) + return urllib.request.urlopen(req) + def miss(self, url): - s = urllib.urlopen(url) try: + s = self.open(url) + except urllib.error.HTTPError as exc: + if exc.code == 404: + raise notfound(url) + raise + with s: + if s.headers.get("content-encoding") == "gzip": + import gzip, io + return gzip.GzipFile(fileobj=io.BytesIO(s.read()), mode="r").read() return s.read() - finally: - s.close() - def fetch(self, url, expire = 3600): + def fetch(self, url, expire=3600): path = pj(self.dir, self.mangle(url)) if os.path.exists(path): if time.time() - os.stat(path).st_mtime < expire: - with open(path) as f: + with open(path, "rb") as f: return f.read() data = self.miss(url) if not os.path.isdir(self.dir): os.makedirs(self.dir) - with open(path, "w") as f: + with open(path, "wb") as f: f.write(data) return data -home = os.getenv("HOME") -if home is None or not os.path.isdir(home): - raise Exception("Could not find home directory for HTTP caching") -default = cache(pj(home, ".manga", "htcache")) +default = cache(pj(profile.confdir, "htcache")) -def fetch(url, expire = 3600): +def fetch(url, expire=3600): return default.fetch(url, expire)