some more tv show sites
This commit is contained in:
parent
68f7621ac7
commit
f653e02887
4 changed files with 110 additions and 1 deletions
43
oxweb/epguides.py
Normal file
43
oxweb/epguides.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from oxlib import stripTags, findRe
|
||||||
|
from oxlib.cache import getUrlUnicode
|
||||||
|
|
||||||
|
import google
|
||||||
|
|
||||||
|
|
||||||
|
def getUrl(title):
|
||||||
|
'''
|
||||||
|
Search Epguide Url for Show via Show Title.
|
||||||
|
Use Google to search the url, this is also done on Epguide.
|
||||||
|
'''
|
||||||
|
for (name, url, desc) in google.find('allintitle: site:epguides.com %s' % title, 1):
|
||||||
|
if url.startswith('http://epguides.com'):
|
||||||
|
if re.search(title, name):
|
||||||
|
return url
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getShowData(url):
|
||||||
|
data = getUrlUnicode(url)
|
||||||
|
r = {}
|
||||||
|
r['title'] = stripTags(findRe(data, '<h1>(.*?)</h1>'))
|
||||||
|
r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
|
||||||
|
r['episodes'] = {}
|
||||||
|
#1. 1- 1 1001 7 Aug 05 You Can't Miss the Bear
|
||||||
|
for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
|
||||||
|
air_date = episode[3].strip()
|
||||||
|
#'22 Sep 04' -> 2004-09-22
|
||||||
|
air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
|
||||||
|
s = episode[1].split('-')[0].strip()
|
||||||
|
e = episode[1].split('-')[-1].strip()
|
||||||
|
r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
|
||||||
|
'prod code': episode[2],
|
||||||
|
'air date': air_date,
|
||||||
|
'url': episode[4],
|
||||||
|
'title':episode[5],
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
|
34
oxweb/rottentomatoes.py
Normal file
34
oxweb/rottentomatoes.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
import re
|
||||||
|
|
||||||
|
from oxlib.cache import getHeaders, getUrl, getUrlUnicode
|
||||||
|
from oxlib import findRe, stripTags
|
||||||
|
|
||||||
|
|
||||||
|
def getUrlByImdb(imdb):
|
||||||
|
#this would also wor but does not cache:
|
||||||
|
'''
|
||||||
|
from urllib2 import urlopen
|
||||||
|
u = urlopen(url)
|
||||||
|
return u.url
|
||||||
|
'''
|
||||||
|
url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
|
||||||
|
data = getUrl(url)
|
||||||
|
if "movie_title" in data:
|
||||||
|
movies = re.compile('(/m/.*?/)').findall(data)
|
||||||
|
if movies:
|
||||||
|
return "http://www.rottentomatoes.com" + movies[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getData(url):
|
||||||
|
data = getUrlUnicode(url)
|
||||||
|
r = {}
|
||||||
|
r['title'] = findRe(data, '<h1 class="movie_title">(.*?)</h1>')
|
||||||
|
if '(' in r['title']:
|
||||||
|
r['year'] = findRe(r['title'], '\((\d*?)\)')
|
||||||
|
r['title'] = re.sub('\((\d*?)\)', '', r['title']).strip()
|
||||||
|
r['synopsis'] = findRe(data, '<span id="movie_synopsis_all".*?>(.*?)</span>')
|
||||||
|
r['average rating'] = findRe(data, '<div id="bubble_allCritics".*?>(.*?)</div>').strip()
|
||||||
|
return r
|
||||||
|
|
32
oxweb/tv.py
Normal file
32
oxweb/tv.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from oxlib import stripTags, findRe
|
||||||
|
from oxlib.cache import getUrlUnicode
|
||||||
|
|
||||||
|
|
||||||
|
def getEpisodeData(url):
|
||||||
|
'''
|
||||||
|
prases informatin on tvcom episode pages
|
||||||
|
returns dict with title, show, description, score
|
||||||
|
example:
|
||||||
|
getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
|
||||||
|
'''
|
||||||
|
data = getUrlUnicode(url)
|
||||||
|
r = {}
|
||||||
|
r['description'] = findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0]
|
||||||
|
r['show'] = findRe(data, '<h1>(.*?)</h1>')
|
||||||
|
r['title'] = findRe(data, '<title>.*?: (.*?) - TV.com </title>')
|
||||||
|
#episode score
|
||||||
|
r['episode score'] = findRe(data, '<span class="f-28 f-bold mt-10 mb-10 f-FF9 db lh-18">(.*?)</span>')
|
||||||
|
|
||||||
|
match = re.compile('Episode Number: (\d*?) Season Num: (\d*?) First Aired: (.*?)  ').findall(data)
|
||||||
|
if match:
|
||||||
|
r['season'] = int(match[0][1])
|
||||||
|
r['episode'] = int(match[0][0])
|
||||||
|
#'Wednesday September 29, 2004' -> 2004-09-29
|
||||||
|
r['air date'] = time.strftime('%Y-%m-%d', time.strptime(match[0][2], '%A %B %d, %Y'))
|
||||||
|
return r
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -17,7 +17,7 @@ setup(
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'oxutils',
|
'oxlib',
|
||||||
'feedparser',
|
'feedparser',
|
||||||
'beautifulsoup',
|
'beautifulsoup',
|
||||||
],
|
],
|
||||||
|
|
Loading…
Reference in a new issue