diff --git a/oxweb/epguides.py b/oxweb/epguides.py new file mode 100644 index 0000000..51b8b4d --- /dev/null +++ b/oxweb/epguides.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import re +import time + +from oxlib import stripTags, findRe +from oxlib.cache import getUrlUnicode + +import google + + +def getUrl(title): + ''' + Search Epguide Url for Show via Show Title. + Use Google to search the url, this is also done on Epguide. + ''' + for (name, url, desc) in google.find('allintitle: site:epguides.com %s' % title, 1): + if url.startswith('http://epguides.com'): + if re.search(title, name): + return url + return None + +def getShowData(url): + data = getUrlUnicode(url) + r = {} + r['title'] = stripTags(findRe(data, '

(.*?)

')) + r['imdb'] = findRe(data, '

.*?

') + r['episodes'] = {} + #1. 1- 1 1001 7 Aug 05 You Can't Miss the Bear + for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) (.*?)').findall(data): + air_date = episode[3].strip() + #'22 Sep 04' -> 2004-09-22 + air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y')) + s = episode[1].split('-')[0].strip() + e = episode[1].split('-')[-1].strip() + r['episodes']['S%02dE%02d' % (int(s), int(e))] = { + 'prod code': episode[2], + 'air date': air_date, + 'url': episode[4], + 'title':episode[5], + } + return r + diff --git a/oxweb/rottentomatoes.py b/oxweb/rottentomatoes.py new file mode 100644 index 0000000..d567564 --- /dev/null +++ b/oxweb/rottentomatoes.py @@ -0,0 +1,34 @@ +# -*- coding: UTF-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import re + +from oxlib.cache import getHeaders, getUrl, getUrlUnicode +from oxlib import findRe, stripTags + + +def getUrlByImdb(imdb): + #this would also wor but does not cache: + ''' + from urllib2 import urlopen + u = urlopen(url) + return u.url + ''' + url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb + data = getUrl(url) + if "movie_title" in data: + movies = re.compile('(/m/.*?/)').findall(data) + if movies: + return "http://www.rottentomatoes.com" + movies[0] + return None + +def getData(url): + data = getUrlUnicode(url) + r = {} + r['title'] = findRe(data, '

(.*?)

') + if '(' in r['title']: + r['year'] = findRe(r['title'], '\((\d*?)\)') + r['title'] = re.sub('\((\d*?)\)', '', r['title']).strip() + r['synopsis'] = findRe(data, '(.*?)') + r['average rating'] = findRe(data, '
(.*?)
').strip() + return r + diff --git a/oxweb/tv.py b/oxweb/tv.py new file mode 100644 index 0000000..6f7cb20 --- /dev/null +++ b/oxweb/tv.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import re +import time + +from oxlib import stripTags, findRe +from oxlib.cache import getUrlUnicode + + +def getEpisodeData(url): + ''' + prases informatin on tvcom episode pages + returns dict with title, show, description, score + example: + getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html') + ''' + data = getUrlUnicode(url) + r = {} + r['description'] = findRe(data, 'div id="main-col">.*?
(.*?)(.*?)') + r['title'] = findRe(data, '.*?: (.*?) - TV.com ') + #episode score + r['episode score'] = findRe(data, '(.*?)') + + match = re.compile('Episode Number: (\d*?)    Season Num: (\d*?)    First Aired: (.*?)  ').findall(data) + if match: + r['season'] = int(match[0][1]) + r['episode'] = int(match[0][0]) + #'Wednesday September 29, 2004' -> 2004-09-29 + r['air date'] = time.strftime('%Y-%m-%d', time.strptime(match[0][2], '%A %B %d, %Y')) + return r + diff --git a/setup.py b/setup.py index b9b6df2..e4a39fe 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ setup( packages=find_packages(), zip_safe=False, install_requires=[ - 'oxutils', + 'oxlib', 'feedparser', 'beautifulsoup', ],