add some sites, fix some bugs

2008-07-29 19:04:23 +02:00 · 2008-07-29 19:04:23 +02:00 · 7c90d07438
commit 7c90d07438
parent 1101c6bec6
7 changed files with 86 additions and 10 deletions
--- a/oxweb/init.py
+++ b/oxweb/init.py
@ -5,4 +5,5 @@ __version__ = '0.1.0'
 import imdb
 import wikipedia
 import google
-
+import piratecinema
+import oxdb
--- a/oxweb/epguides.py
+++ b/oxweb/epguides.py
@ -30,14 +30,20 @@ def getShowData(url):
    for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
        air_date = episode[3].strip()
        #'22 Sep 04' -> 2004-09-22 
-        air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
+        try:
+            air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
+        except:
+            pass
        s = episode[1].split('-')[0].strip()
        e = episode[1].split('-')[-1].strip()
-        r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
-            'prod code': episode[2],
-            'air date': air_date,
-            'url': episode[4],
-            'title':episode[5],
-        }
+        try:
+            r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
+                'prod code': episode[2],
+                'air date': air_date,
+                'url': episode[4],
+                'title':episode[5],
+            }
+        except:
+            print "oxweb.epguides failed,", url
    return r

--- a/oxweb/imdb.py
+++ b/oxweb/imdb.py
@ -81,6 +81,8 @@ def getMovieInfo(imdbId):
                txt = [cleanUp(k) for k in txt.split(', ')]
            elif title in ('country', 'language'):
                txt = [cleanUp(txt), ]
+        if title == 'tv series':
+            info['series_imdb'] = findRe(i[1], 'tt(\d{7})')
        if title == 'original air date':
            info['series_episode_info'] = txt.split('\n')[-1].strip()
            txt = txt.split('\n')[0].strip()
@ -646,7 +648,7 @@ def guess(title, director=''):
        search = 'site:imdb.com "%s"' % title
    for (name, url, desc) in google.find(search, 2):
        if url.startswith('http://www.imdb.com/title/tt'):
-            return url[28:35]
+             return normalizeImdbId(int(oxlib.intValue(url)))

    try:
        req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
--- a/oxweb/metacritic.py
+++ b/oxweb/metacritic.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+from urllib import quote
+
+from oxlib.cache import getUrl, getUrlUnicode
+from oxlib import findRe, decodeHtml, stripTags
+
+
+def getMetacriticShowUrl(title):
+    title = quote(title)
+    url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
+    data = getUrl(url)
+    return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
+
+def getData(title, url=None):
+  if not url:
+    url = getMetacriticShowUrl(title)
+  if not url:
+    return None
+  data = getUrlUnicode(url)
+  score = findRe(data, 'ALT="Metascore: (.*?)"')
+  if score: 
+    score = int(score)
+  else: 
+    score = -1
+
+  reviews = re.compile(
+            '<div class="scoreandreview"><div class="criticscore">(.*?)</div>'
+            '.*?<span class="publication">(.*?)</span>'
+            '.*?<span class="criticname">(.*?)</span></div>'
+            '.*?<div class="quote">(.*?)<br>'
+            '.*?<a href="(.*?)" ', re.DOTALL).findall(data)
+
+  metacritics = []
+  for review in reviews:
+    metacritics.append({
+        'score': int(review[0]),
+        'publication':review[1],
+        'critic':decodeHtml(review[2]),
+        'quote': stripTags(review[3]).strip(),
+        'link': review[4],
+    })
+  return dict(score = score, critics = metacritics, url = url)
+
--- a/oxweb/oxdb.py
+++ b/oxweb/oxdb.py
@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import oxlib.cache
+
+def getPosterUrl(id):
+    url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
+    if oxlib.cache.exists(url):
+        return url
+    return ''
+
--- a/oxweb/piratecinema.py
+++ b/oxweb/piratecinema.py
@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import oxlib.cache
+from oxlib.cache import exists
+
+
+def getPosterUrl(id):
+    url = "http://piratecinema.org/posters/%s/%s.jpg" % (id[:4], id)
+    if oxlib.cache.exists(url):
+        return url
+    return ''
+
--- a/oxweb/tv.py
+++ b/oxweb/tv.py
@ -16,7 +16,7 @@ def getEpisodeData(url):
    '''
    data = getUrlUnicode(url)
    r = {}
-    r['description'] = findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0]
+    r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
    r['show'] = findRe(data, '<h1>(.*?)</h1>')
    r['title'] =  findRe(data, '<title>.*?: (.*?) - TV.com  </title>')
    #episode score