diff --git a/ox/web/karagarga.py b/ox/web/karagarga.py deleted file mode 100644 index 27b4d0a..0000000 --- a/ox/web/karagarga.py +++ /dev/null @@ -1,128 +0,0 @@ -# -*- coding: utf-8 -*- -# vi:si:et:sw=4:sts=4:ts=4 -import re -from ox import cache -from ox.html import stripTags -from ox.text import findRe - -import auth - - -def readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None): - headers = headers.copy() - headers["Cookie"] = auth.get("karagarga.cookie") - return cache.readUrl(url, data, headers, timeout) - -def readUrlUnicode(url, timeout=cache.cache_timeout): - return cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout) - -def getData(id): - data = { - "url": getUrl(id) - } - html = readUrlUnicode("%s%s" % (data["url"], "&filelist=1")) - if 'No torrent with ID' in html: - return False - data['added'] = stripTags(parseTable(html, 'Added')) - data['country'] = findRe(html, 'title="([\w ]*?)" border="0" width="32" height="20"') - # data['description'] = parseTable(html, 'Description') - data['director'] = stripTags(parseTable(html, 'Director / Artist')) - data['files'] = [] - result = findRe(html, '(.*?)
') - results = re.compile('(.*?)(.*?)', re.DOTALL).findall(result) - for name, size in results: - data['files'].append({ - 'name': name, - 'size': '%s %s' % (size[:-2], size[-2:].strip().upper()) - }) - data['format'] = '' - if html.find('genreimages/dvdr.png') != -1: - data['format'] = 'DVD' - elif html.find('genreimages/hdrip.png') != -1: - data['format'] = 'HD' - data['genre'] = [] - result = parseTable(html, 'Genres') - for string in result.split('\n'): - string = stripTags(findRe(string, '(.*?)')) - if string: - data['genre'].append(string) - data['id'] = id - data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})') - data['language'] = stripTags(parseTable(html, 'Language')) - data['leechers'] = int(findRe(html, 'seeder\(s\), (.*?) leecher\(s\)')) - data['link'] = stripTags(parseTable(html, 'Internet Link')) - data['links'] = [] - results = re.compile('(.*?)', re.DOTALL).findall(parseTable(html, 'Description')) - for (url, title) in results: - if url.find('javascript') == -1: - data['links'].append({ - 'title': title, - 'url': url.replace('http://anonym.to/?', '') - }) - data['people'] = 0 - result = stripTags(findRe(html, '(.*?) seeder\(s\)')) - data['size'] = int(findRe(parseTable(html, 'Size'), '\((.*?) ').replace(',', '')) - data['snatched'] = int(findRe(html, '.*?colspan=2>(.*?) ')) - data['subtitle'] = findRe(parseTable(html, 'Subtitles'), '>(.*?)
').replace('included: ', '') - data['subtitles'] = [] - results = re.compile('
(.*?)', re.DOTALL).findall(parseTable(html, 'Subtitles')) - for (url, language) in results: - data['subtitles'].append({ - 'language': language.replace('click here for ', ''), - 'url': url - }) - data['torrent'] = 'http://karagarga.net/%s' % findRe(html, '(down.php/.*?)"') - data['year'] = stripTags(parseTable(html, 'Year')) - data['title'] = stripTags(findRe(html, '

(.*?)

')).strip() - data['title'] = re.sub('^%s - ' % re.escape(data['director']), '', data['title']) - data['title'] = re.sub(' \(%s\)$' % re.escape(data['year']), '', data['title']) - return data - -def getId(url): - return url.split("=")[-1] - -def getTorrent(id): - return readUrl(getData(id)['torrent']) - -def getIds(lastId = 20): - lastId = '%s' % lastId - ids = [] - page = 0 - while True: - for id in getIdsByPage(page): - if not id in ids: - ids.append(id) - if lastId in ids: - break - page += 1 - return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids)))) - -def getIdsByPage(page): - ids = [] - url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page - html = readUrlUnicode(url, timeout = 23*60*60) #get new ids once per day - strings = html.split('') - strings.pop(0) - for string in strings: - ids.append(findRe(string, '"details.php\?id=(.*?)"')) - return ids - -def getUrl(id): - return "http://karagarga.net/details.php?id=%s" % id - -def parseTable(html, title): - if title == 'Genres': - return findRe(html, '