From ef200411850ce1eac804c85e37fa1d7d28f0269e Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Tue, 24 Aug 2010 19:08:03 +0200 Subject: [PATCH] cleanup --- oxweb/aaaarg.py | 61 --------------------- oxweb/auth.py | 17 +++++- oxweb/imdb.py | 19 ++++--- oxweb/karagarga.py | 128 --------------------------------------------- 4 files changed, 26 insertions(+), 199 deletions(-) delete mode 100644 oxweb/aaaarg.py delete mode 100644 oxweb/karagarga.py diff --git a/oxweb/aaaarg.py b/oxweb/aaaarg.py deleted file mode 100644 index 4e5462a..0000000 --- a/oxweb/aaaarg.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -# vi:si:et:sw=4:sts=4:ts=4 -import re -import os -import string - -from oxlib import cache -from oxlib.html import stripTags, decodeHtml -from oxlib.text import findRe -from oxlib.normalize import canonicalName -import auth - - -def readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None): - headers = headers.copy() - headers["Cookie"] = auth.get("aaaarg.cookie") - return cache.readUrl(url, data, headers, timeout) - -def readUrlUnicode(url, timeout=cache.cache_timeout): - return cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout) - -def downloadText(id, filename=None): - #FIXME, what about the cache, this keeps all pdfs in oxcache... - url='http://a.aaaarg.org/node/%d/download' % id - data = readUrl(url, timeout=-1) - headers = cache.getHeaders(url, timeout=-1) - if filename: - with open(filename, "w") as f: - f.write(data) - return - return data - -def getTextByLetter(letter): - texts = [] - url = 'http://a.aaaarg.org/library/%s' % letter - data = readUrlUnicode(url) - txts = re.compile('
  • (.*?)
  • (.*?)
  • ').findall(data) - author = 'Unknown Author' - for r in txts: - if r[0] != ' ': - author = r[0] - link = r[1] - id = findRe(link, '/(\d+)') - title = decodeHtml(r[2]) - author_foder = canonicalName(author) - author_foder = os.path.join(author_foder[0], author_foder) - filename = os.path.join(author_foder, '%s (aaarg %s).pdf' % (title.replace('/', '_'), id)) - texts.append({ - 'author': author, - 'title': title, - 'id': id, - 'filename': filename, - }) - return texts - -def getTexts(): - texts = [] - for letter in string.letters[:26]: - texts += getTextByLetter(letter) - return texts - diff --git a/oxweb/auth.py b/oxweb/auth.py index fdb283d..8d1cde9 100644 --- a/oxweb/auth.py +++ b/oxweb/auth.py @@ -2,7 +2,7 @@ # vi:si:et:sw=4:sts=4:ts=4 # GPL 2009 import os -import simplejson +import simplejson as json def get(key): @@ -12,9 +12,22 @@ def get(key): f = open(user_auth, "r") data = f.read() f.close() - auth = simplejson.loads(data) + auth = json.loads(data) if key in auth: return auth[key] print "please add key %s to json file '%s'" % (key, user_auth) return "" +def update(key, value): + user_auth = os.environ.get('oxAUTH', os.path.expanduser('~/.ox/auth.json')) + auth = {} + if os.path.exists(user_auth): + f = open(user_auth, "r") + data = f.read() + f.close() + auth = json.loads(data) + auth[key] = value + f = open(user_auth, "w") + f.write(json.dumps(auth, indent=2)) + f.close() + diff --git a/oxweb/imdb.py b/oxweb/imdb.py index f95f847..ad4b040 100644 --- a/oxweb/imdb.py +++ b/oxweb/imdb.py @@ -368,19 +368,22 @@ def getMoviePosters(imdbId): def getMovieTrivia(imdbId): url = "%strivia" % getUrlBase(imdbId) - data = readUrlUnicode(url) - data = findRe(data, '') + data_ = readUrlUnicode(url) + data = findRe(data_, '') trivia = re.compile('
  • (.*?)
  • ', re.DOTALL).findall(data) def clean(t): t = decodeHtml(t) - t = t.replace(u'”', '"') - if t.endswith('

    '): - t = t[:-8] - if t.endswith('
    \n
    '): - t = t[:-len('
    \n
    ')] + t = t.replace(u'”', '"').strip() + for s in ('

    ', '
    \n
    ', '
    '): + if t.endswith(s): + t = t[:-len(s)].strip() return t.strip() + if len(trivia) == 0: + trivia = re.compile('
    (.*?)(.*?)') - results = re.compile('(.*?)(.*?)', re.DOTALL).findall(result) - for name, size in results: - data['files'].append({ - 'name': name, - 'size': '%s %s' % (size[:-2], size[-2:].strip().upper()) - }) - data['format'] = '' - if html.find('genreimages/dvdr.png') != -1: - data['format'] = 'DVD' - elif html.find('genreimages/hdrip.png') != -1: - data['format'] = 'HD' - data['genre'] = [] - result = parseTable(html, 'Genres') - for string in result.split('\n'): - string = stripTags(findRe(string, '(.*?)')) - if string: - data['genre'].append(string) - data['id'] = id - data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})') - data['language'] = stripTags(parseTable(html, 'Language')) - data['leechers'] = int(findRe(html, 'seeder\(s\), (.*?) leecher\(s\)')) - data['link'] = stripTags(parseTable(html, 'Internet Link')) - data['links'] = [] - results = re.compile('(.*?)', re.DOTALL).findall(parseTable(html, 'Description')) - for (url, title) in results: - if url.find('javascript') == -1: - data['links'].append({ - 'title': title, - 'url': url.replace('http://anonym.to/?', '') - }) - data['people'] = 0 - result = stripTags(findRe(html, '(.*?) seeder\(s\)')) - data['size'] = int(findRe(parseTable(html, 'Size'), '\((.*?) ').replace(',', '')) - data['snatched'] = int(findRe(html, '.*?colspan=2>(.*?) ')) - data['subtitle'] = findRe(parseTable(html, 'Subtitles'), '>(.*?)
    ').replace('included: ', '') - data['subtitles'] = [] - results = re.compile('
    (.*?)', re.DOTALL).findall(parseTable(html, 'Subtitles')) - for (url, language) in results: - data['subtitles'].append({ - 'language': language.replace('click here for ', ''), - 'url': url - }) - data['torrent'] = 'http://karagarga.net/%s' % findRe(html, '(down.php/.*?)"') - data['year'] = stripTags(parseTable(html, 'Year')) - data['title'] = stripTags(findRe(html, '

    (.*?)

    ')).strip() - data['title'] = re.sub('^%s - ' % re.escape(data['director']), '', data['title']) - data['title'] = re.sub(' \(%s\)$' % re.escape(data['year']), '', data['title']) - return data - -def getId(url): - return url.split("=")[-1] - -def getTorrent(id): - return readUrl(getData(id)['torrent']) - -def getIds(lastId = 20): - lastId = '%s' % lastId - ids = [] - page = 0 - while True: - for id in getIdsByPage(page): - if not id in ids: - ids.append(id) - if lastId in ids: - break - page += 1 - return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids)))) - -def getIdsByPage(page): - ids = [] - url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page - html = readUrlUnicode(url, timeout = 23*60*60) #get new ids once per day - strings = html.split('') - strings.pop(0) - for string in strings: - ids.append(findRe(string, '"details.php\?id=(.*?)"')) - return ids - -def getUrl(id): - return "http://karagarga.net/details.php?id=%s" % id - -def parseTable(html, title): - if title == 'Genres': - return findRe(html, '