From ef200411850ce1eac804c85e37fa1d7d28f0269e Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 24 Aug 2010 19:08:03 +0200
Subject: [PATCH] cleanup
---
oxweb/aaaarg.py | 61 ---------------------
oxweb/auth.py | 17 +++++-
oxweb/imdb.py | 19 ++++---
oxweb/karagarga.py | 128 ---------------------------------------------
4 files changed, 26 insertions(+), 199 deletions(-)
delete mode 100644 oxweb/aaaarg.py
delete mode 100644 oxweb/karagarga.py
diff --git a/oxweb/aaaarg.py b/oxweb/aaaarg.py
deleted file mode 100644
index 4e5462a..0000000
--- a/oxweb/aaaarg.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-import os
-import string
-
-from oxlib import cache
-from oxlib.html import stripTags, decodeHtml
-from oxlib.text import findRe
-from oxlib.normalize import canonicalName
-import auth
-
-
-def readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
- headers = headers.copy()
- headers["Cookie"] = auth.get("aaaarg.cookie")
- return cache.readUrl(url, data, headers, timeout)
-
-def readUrlUnicode(url, timeout=cache.cache_timeout):
- return cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout)
-
-def downloadText(id, filename=None):
- #FIXME, what about the cache, this keeps all pdfs in oxcache...
- url='http://a.aaaarg.org/node/%d/download' % id
- data = readUrl(url, timeout=-1)
- headers = cache.getHeaders(url, timeout=-1)
- if filename:
- with open(filename, "w") as f:
- f.write(data)
- return
- return data
-
-def getTextByLetter(letter):
- texts = []
- url = 'http://a.aaaarg.org/library/%s' % letter
- data = readUrlUnicode(url)
- txts = re.compile('
(.*?)
(.*?)')
- results = re.compile('(.*?) | (.*?) | ', re.DOTALL).findall(result)
- for name, size in results:
- data['files'].append({
- 'name': name,
- 'size': '%s %s' % (size[:-2], size[-2:].strip().upper())
- })
- data['format'] = ''
- if html.find('genreimages/dvdr.png') != -1:
- data['format'] = 'DVD'
- elif html.find('genreimages/hdrip.png') != -1:
- data['format'] = 'HD'
- data['genre'] = []
- result = parseTable(html, 'Genres')
- for string in result.split('\n'):
- string = stripTags(findRe(string, '
(.*?)'))
- if string:
- data['genre'].append(string)
- data['id'] = id
- data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})')
- data['language'] = stripTags(parseTable(html, 'Language'))
- data['leechers'] = int(findRe(html, 'seeder\(s\), (.*?) leecher\(s\)'))
- data['link'] = stripTags(parseTable(html, 'Internet Link'))
- data['links'] = []
- results = re.compile('
(.*?)', re.DOTALL).findall(parseTable(html, 'Description'))
- for (url, title) in results:
- if url.find('javascript') == -1:
- data['links'].append({
- 'title': title,
- 'url': url.replace('http://anonym.to/?', '')
- })
- data['people'] = 0
- result = stripTags(findRe(html, '
(.*?) seeder\(s\)'))
- data['size'] = int(findRe(parseTable(html, 'Size'), '\((.*?) ').replace(',', ''))
- data['snatched'] = int(findRe(html, '.*?colspan=2>(.*?) '))
- data['subtitle'] = findRe(parseTable(html, 'Subtitles'), '>(.*?)
').replace('included: ', '')
- data['subtitles'] = []
- results = re.compile('(.*?)', re.DOTALL).findall(parseTable(html, 'Subtitles'))
- for (url, language) in results:
- data['subtitles'].append({
- 'language': language.replace('click here for ', ''),
- 'url': url
- })
- data['torrent'] = 'http://karagarga.net/%s' % findRe(html, '(down.php/.*?)"')
- data['year'] = stripTags(parseTable(html, 'Year'))
- data['title'] = stripTags(findRe(html, '
(.*?)
')).strip()
- data['title'] = re.sub('^%s - ' % re.escape(data['director']), '', data['title'])
- data['title'] = re.sub(' \(%s\)$' % re.escape(data['year']), '', data['title'])
- return data
-
-def getId(url):
- return url.split("=")[-1]
-
-def getTorrent(id):
- return readUrl(getData(id)['torrent'])
-
-def getIds(lastId = 20):
- lastId = '%s' % lastId
- ids = []
- page = 0
- while True:
- for id in getIdsByPage(page):
- if not id in ids:
- ids.append(id)
- if lastId in ids:
- break
- page += 1
- return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
-
-def getIdsByPage(page):
- ids = []
- url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
- html = readUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
- strings = html.split('
')
- strings.pop(0)
- for string in strings:
- ids.append(findRe(string, '"details.php\?id=(.*?)"'))
- return ids
-
-def getUrl(id):
- return "http://karagarga.net/details.php?id=%s" % id
-
-def parseTable(html, title):
- if title == 'Genres':
- return findRe(html, ' | %s | (.*?)' % title)
- else:
- return findRe(html, '
%s | (.*?)' % title)
-
-if __name__ == "__main__":
- print getIds("79317")
- print getData("79317")