add some sites, fix some bugs
This commit is contained in:
parent
1101c6bec6
commit
7c90d07438
7 changed files with 86 additions and 10 deletions
|
@ -5,4 +5,5 @@ __version__ = '0.1.0'
|
||||||
import imdb
|
import imdb
|
||||||
import wikipedia
|
import wikipedia
|
||||||
import google
|
import google
|
||||||
|
import piratecinema
|
||||||
|
import oxdb
|
||||||
|
|
|
@ -30,14 +30,20 @@ def getShowData(url):
|
||||||
for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
|
for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
|
||||||
air_date = episode[3].strip()
|
air_date = episode[3].strip()
|
||||||
#'22 Sep 04' -> 2004-09-22
|
#'22 Sep 04' -> 2004-09-22
|
||||||
|
try:
|
||||||
air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
|
air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
s = episode[1].split('-')[0].strip()
|
s = episode[1].split('-')[0].strip()
|
||||||
e = episode[1].split('-')[-1].strip()
|
e = episode[1].split('-')[-1].strip()
|
||||||
|
try:
|
||||||
r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
|
r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
|
||||||
'prod code': episode[2],
|
'prod code': episode[2],
|
||||||
'air date': air_date,
|
'air date': air_date,
|
||||||
'url': episode[4],
|
'url': episode[4],
|
||||||
'title':episode[5],
|
'title':episode[5],
|
||||||
}
|
}
|
||||||
|
except:
|
||||||
|
print "oxweb.epguides failed,", url
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
|
@ -81,6 +81,8 @@ def getMovieInfo(imdbId):
|
||||||
txt = [cleanUp(k) for k in txt.split(', ')]
|
txt = [cleanUp(k) for k in txt.split(', ')]
|
||||||
elif title in ('country', 'language'):
|
elif title in ('country', 'language'):
|
||||||
txt = [cleanUp(txt), ]
|
txt = [cleanUp(txt), ]
|
||||||
|
if title == 'tv series':
|
||||||
|
info['series_imdb'] = findRe(i[1], 'tt(\d{7})')
|
||||||
if title == 'original air date':
|
if title == 'original air date':
|
||||||
info['series_episode_info'] = txt.split('\n')[-1].strip()
|
info['series_episode_info'] = txt.split('\n')[-1].strip()
|
||||||
txt = txt.split('\n')[0].strip()
|
txt = txt.split('\n')[0].strip()
|
||||||
|
@ -646,7 +648,7 @@ def guess(title, director=''):
|
||||||
search = 'site:imdb.com "%s"' % title
|
search = 'site:imdb.com "%s"' % title
|
||||||
for (name, url, desc) in google.find(search, 2):
|
for (name, url, desc) in google.find(search, 2):
|
||||||
if url.startswith('http://www.imdb.com/title/tt'):
|
if url.startswith('http://www.imdb.com/title/tt'):
|
||||||
return url[28:35]
|
return normalizeImdbId(int(oxlib.intValue(url)))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
|
||||||
|
|
45
oxweb/metacritic.py
Normal file
45
oxweb/metacritic.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
import re
|
||||||
|
from urllib import quote
|
||||||
|
|
||||||
|
from oxlib.cache import getUrl, getUrlUnicode
|
||||||
|
from oxlib import findRe, decodeHtml, stripTags
|
||||||
|
|
||||||
|
|
||||||
|
def getMetacriticShowUrl(title):
|
||||||
|
title = quote(title)
|
||||||
|
url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
|
||||||
|
data = getUrl(url)
|
||||||
|
return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
|
||||||
|
|
||||||
|
def getData(title, url=None):
|
||||||
|
if not url:
|
||||||
|
url = getMetacriticShowUrl(title)
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
data = getUrlUnicode(url)
|
||||||
|
score = findRe(data, 'ALT="Metascore: (.*?)"')
|
||||||
|
if score:
|
||||||
|
score = int(score)
|
||||||
|
else:
|
||||||
|
score = -1
|
||||||
|
|
||||||
|
reviews = re.compile(
|
||||||
|
'<div class="scoreandreview"><div class="criticscore">(.*?)</div>'
|
||||||
|
'.*?<span class="publication">(.*?)</span>'
|
||||||
|
'.*?<span class="criticname">(.*?)</span></div>'
|
||||||
|
'.*?<div class="quote">(.*?)<br>'
|
||||||
|
'.*?<a href="(.*?)" ', re.DOTALL).findall(data)
|
||||||
|
|
||||||
|
metacritics = []
|
||||||
|
for review in reviews:
|
||||||
|
metacritics.append({
|
||||||
|
'score': int(review[0]),
|
||||||
|
'publication':review[1],
|
||||||
|
'critic':decodeHtml(review[2]),
|
||||||
|
'quote': stripTags(review[3]).strip(),
|
||||||
|
'link': review[4],
|
||||||
|
})
|
||||||
|
return dict(score = score, critics = metacritics, url = url)
|
||||||
|
|
10
oxweb/oxdb.py
Normal file
10
oxweb/oxdb.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
import oxlib.cache
|
||||||
|
|
||||||
|
def getPosterUrl(id):
|
||||||
|
url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
|
||||||
|
if oxlib.cache.exists(url):
|
||||||
|
return url
|
||||||
|
return ''
|
||||||
|
|
12
oxweb/piratecinema.py
Normal file
12
oxweb/piratecinema.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
import oxlib.cache
|
||||||
|
from oxlib.cache import exists
|
||||||
|
|
||||||
|
|
||||||
|
def getPosterUrl(id):
|
||||||
|
url = "http://piratecinema.org/posters/%s/%s.jpg" % (id[:4], id)
|
||||||
|
if oxlib.cache.exists(url):
|
||||||
|
return url
|
||||||
|
return ''
|
||||||
|
|
|
@ -16,7 +16,7 @@ def getEpisodeData(url):
|
||||||
'''
|
'''
|
||||||
data = getUrlUnicode(url)
|
data = getUrlUnicode(url)
|
||||||
r = {}
|
r = {}
|
||||||
r['description'] = findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0]
|
r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
|
||||||
r['show'] = findRe(data, '<h1>(.*?)</h1>')
|
r['show'] = findRe(data, '<h1>(.*?)</h1>')
|
||||||
r['title'] = findRe(data, '<title>.*?: (.*?) - TV.com </title>')
|
r['title'] = findRe(data, '<title>.*?: (.*?) - TV.com </title>')
|
||||||
#episode score
|
#episode score
|
||||||
|
|
Loading…
Reference in a new issue