import re
from oxlib.cache import getUrlUnicode, DEFAULT_HEADERS
from oxlib.html import stripTags
from oxlib.text import findRe
headers = DEFAULT_HEADERS
headers["Cookie"] = "uid=9829; pass=cd08329f960450b32218bd73a39f90f1"
def getData(id):
data = {
"url": getUrl(id)
}
html = getUrlUnicode("%s%s" % (data["url"], "&filelist=1"))
if 'No torrent with ID' in html:
return False
data['added'] = stripTags(parseTable(html, 'Added'))
data['country'] = findRe(html, 'title="([\w ]*?)" border="0" width="32" height="20"')
# data['description'] = parseTable(html, 'Description')
data['director'] = stripTags(parseTable(html, 'Director / Artist'))
data['files'] = []
result = findRe(html, '
')
results = re.compile('(.*?) | (.*?) | ', re.DOTALL).findall(result)
for name, size in results:
data['files'].append({
'name': name,
'size': '%s %s' % (size[:-2], size[-2:].strip().upper())
})
data['format'] = ''
if html.find('genreimages/dvdr.png') != -1:
data['format'] = 'DVD'
elif html.find('genreimages/hdrip.png') != -1:
data['format'] = 'HD'
data['genre'] = []
result = parseTable(html, 'Genres')
for string in result.split('\n'):
string = stripTags(findRe(string, '(.*?)'))
if string:
data['genre'].append(string)
data['id'] = id
data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})')
data['language'] = stripTags(parseTable(html, 'Language'))
data['leechers'] = int(findRe(html, 'seeder\(s\), (.*?) leecher\(s\)'))
data['link'] = stripTags(parseTable(html, 'Internet Link'))
data['links'] = []
results = re.compile('(.*?)', re.DOTALL).findall(parseTable(html, 'Description'))
for (url, title) in results:
if url.find('javascript') == -1:
data['links'].append({
'title': title,
'url': url.replace('http://anonym.to/?', '')
})
data['people'] = 0
result = stripTags(findRe(html, '(.*?) seeder\(s\)'))
data['size'] = int(findRe(parseTable(html, 'Size'), '\((.*?) ').replace(',', ''))
data['snatched'] = int(findRe(html, '.*?colspan=2>(.*?) '))
data['subtitle'] = findRe(parseTable(html, 'Subtitles'), '>(.*?)
').replace('included: ', '')
data['subtitles'] = []
results = re.compile('(.*?)', re.DOTALL).findall(parseTable(html, 'Subtitles'))
for (url, language) in results:
data['subtitles'].append({
'language': language.replace('click here for ', ''),
'url': url
})
data['torrent'] = 'http://karagarga.net/%s' % findRe(html, '(down.php/.*?)"')
data['year'] = stripTags(parseTable(html, 'Year'))
data['title'] = stripTags(findRe(html, '(.*?)
')).strip()
data['title'] = re.sub('^%s - ' % re.escape(data['director']), '', data['title'])
data['title'] = re.sub(' \(%s\)$' % re.escape(data['year']), '', data['title'])
return data
def getId(url):
return url.split("=")[-1]
def getIds(lastId = 20):
lastId = '%s' % lastId
ids = []
page = 0
while True:
for id in getIdsByPage(page):
if not id in ids:
ids.append(id)
if lastId in ids:
break
page += 1
return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
def getIdsByPage(page):
ids = []
url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
html = getUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
strings = html.split('')
strings.pop(0)
for string in strings:
ids.append(findRe(string, '"details.php\?id=(.*?)"'))
return ids
def getUrl(id):
return "http://karagarga.net/details.php?id=%s" % id
def parseTable(html, title):
if title == 'Genres':
return findRe(html, ' | %s | (.*?)' % title)
else:
return findRe(html, '%s | (.*?)' % title)
if __name__ == "__main__":
print getIds("79317")
print getData("79317")