some more tv show sites

2008-07-03 22:22:32 +02:00 · 2008-07-03 22:22:32 +02:00 · f653e02887
commit f653e02887
parent 68f7621ac7
4 changed files with 110 additions and 1 deletions
--- a/oxweb/epguides.py
+++ b/oxweb/epguides.py
@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+import time
+
+from oxlib import stripTags, findRe
+from oxlib.cache import getUrlUnicode
+
+import google
+
+
+def getUrl(title):
+    ''' 
+    Search Epguide Url for Show via Show Title.
+    Use Google to search the url, this is also done on Epguide.
+    '''
+    for (name, url, desc) in google.find('allintitle: site:epguides.com %s' % title, 1):
+        if url.startswith('http://epguides.com'):
+              if re.search(title, name):
+                    return url
+    return None
+
+def getShowData(url):
+    data = getUrlUnicode(url)
+    r = {}
+    r['title'] = stripTags(findRe(data, '<h1>(.*?)</h1>'))
+    r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
+    r['episodes'] = {}
+    #1.   1- 1       1001      7 Aug 05   You Can't Miss the Bear
+    for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
+        air_date = episode[3].strip()
+        #'22 Sep 04' -> 2004-09-22 
+        air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
+        s = episode[1].split('-')[0].strip()
+        e = episode[1].split('-')[-1].strip()
+        r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
+            'prod code': episode[2],
+            'air date': air_date,
+            'url': episode[4],
+            'title':episode[5],
+        }
+    return r
+
--- a/oxweb/rottentomatoes.py
+++ b/oxweb/rottentomatoes.py
@ -0,0 +1,34 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+from oxlib.cache import getHeaders, getUrl, getUrlUnicode
+from oxlib import findRe, stripTags
+
+
+def getUrlByImdb(imdb):
+    #this would also wor but does not cache:
+    '''
+    from urllib2 import urlopen
+    u = urlopen(url)
+    return u.url
+    '''
+    url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
+    data = getUrl(url)
+    if "movie_title" in data:
+        movies = re.compile('(/m/.*?/)').findall(data)
+        if movies:
+            return "http://www.rottentomatoes.com" + movies[0]
+    return None
+
+def getData(url):
+    data = getUrlUnicode(url)
+    r = {}
+    r['title'] = findRe(data, '<h1 class="movie_title">(.*?)</h1>')
+    if '(' in r['title']:
+        r['year'] = findRe(r['title'], '\((\d*?)\)')
+        r['title'] = re.sub('\((\d*?)\)', '', r['title']).strip()
+    r['synopsis'] = findRe(data, '<span id="movie_synopsis_all".*?>(.*?)</span>')
+    r['average rating'] = findRe(data, '<div id="bubble_allCritics".*?>(.*?)</div>').strip()
+    return r
+
--- a/oxweb/tv.py
+++ b/oxweb/tv.py
@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+import time
+
+from oxlib import stripTags, findRe
+from oxlib.cache import getUrlUnicode
+
+
+def getEpisodeData(url):
+    '''
+      prases informatin on tvcom episode pages
+      returns dict with title, show, description, score
+      example:
+        getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
+    '''
+    data = getUrlUnicode(url)
+    r = {}
+    r['description'] = findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0]
+    r['show'] = findRe(data, '<h1>(.*?)</h1>')
+    r['title'] =  findRe(data, '<title>.*?: (.*?) - TV.com  </title>')
+    #episode score
+    r['episode score'] = findRe(data, '<span class="f-28 f-bold mt-10 mb-10 f-FF9 db lh-18">(.*?)</span>')
+
+    match = re.compile('Episode Number: (\d*?) &nbsp;&nbsp; Season Num: (\d*?) &nbsp;&nbsp; First Aired: (.*?) &nbsp').findall(data) 
+    if match:
+        r['season'] = int(match[0][1])
+        r['episode'] = int(match[0][0])
+        #'Wednesday September 29, 2004' -> 2004-09-29 
+        r['air date'] = time.strftime('%Y-%m-%d', time.strptime(match[0][2], '%A %B %d, %Y'))
+    return r
+
--- a/setup.py
+++ b/setup.py
@ -17,7 +17,7 @@ setup(
    packages=find_packages(),
    zip_safe=False,
    install_requires=[
-          'oxutils',
+          'oxlib',
          'feedparser',
          'beautifulsoup',
    ],