add some sites, fix some bugs

2008-07-29 19:04:23 +02:00 · 2008-07-29 19:04:23 +02:00 · 7c90d07438
commit 7c90d07438
parent 1101c6bec6
7 changed files with 86 additions and 10 deletions
--- a/oxweb/init.py
+++ b/oxweb/init.py
@ -5,4 +5,5 @@ __version__ = '0.1.0'
 import imdb
 import wikipedia
 import google
-
+import piratecinema
 import oxdb
--- a/oxweb/epguides.py
+++ b/oxweb/epguides.py
@ -30,14 +30,20 @@ def getShowData(url):
    for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
        air_date = episode[3].strip()
        #'22 Sep 04' -> 2004-09-22 
-        air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
+        try:
            air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
        except:
            pass
        s = episode[1].split('-')[0].strip()
        e = episode[1].split('-')[-1].strip()
-        r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
+        try:
-            'prod code': episode[2],
+            r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
-            'air date': air_date,
+                'prod code': episode[2],
-            'url': episode[4],
+                'air date': air_date,
-            'title':episode[5],
+                'url': episode[4],
-        }
+                'title':episode[5],
            }
        except:
            print "oxweb.epguides failed,", url
    return r
--- a/oxweb/imdb.py
+++ b/oxweb/imdb.py
@ -81,6 +81,8 @@ def getMovieInfo(imdbId):
                txt = [cleanUp(k) for k in txt.split(', ')]
            elif title in ('country', 'language'):
                txt = [cleanUp(txt), ]
        if title == 'tv series':
            info['series_imdb'] = findRe(i[1], 'tt(\d{7})')
        if title == 'original air date':
            info['series_episode_info'] = txt.split('\n')[-1].strip()
            txt = txt.split('\n')[0].strip()
@ -646,7 +648,7 @@ def guess(title, director=''):
        search = 'site:imdb.com "%s"' % title
    for (name, url, desc) in google.find(search, 2):
        if url.startswith('http://www.imdb.com/title/tt'):
-            return url[28:35]
+             return normalizeImdbId(int(oxlib.intValue(url)))
    try:
        req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
--- a/oxweb/metacritic.py
+++ b/oxweb/metacritic.py
@ -0,0 +1,45 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
 from urllib import quote
 from oxlib.cache import getUrl, getUrlUnicode
 from oxlib import findRe, decodeHtml, stripTags
 def getMetacriticShowUrl(title):
    title = quote(title)
    url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
    data = getUrl(url)
    return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
 def getData(title, url=None):
  if not url:
    url = getMetacriticShowUrl(title)
  if not url:
    return None
  data = getUrlUnicode(url)
  score = findRe(data, 'ALT="Metascore: (.*?)"')
  if score: 
    score = int(score)
  else: 
    score = -1
  reviews = re.compile(
            '<div class="scoreandreview"><div class="criticscore">(.*?)</div>'
            '.*?<span class="publication">(.*?)</span>'
            '.*?<span class="criticname">(.*?)</span></div>'
            '.*?<div class="quote">(.*?)<br>'
            '.*?<a href="(.*?)" ', re.DOTALL).findall(data)
  metacritics = []
  for review in reviews:
    metacritics.append({
        'score': int(review[0]),
        'publication':review[1],
        'critic':decodeHtml(review[2]),
        'quote': stripTags(review[3]).strip(),
        'link': review[4],
    })
  return dict(score = score, critics = metacritics, url = url)
--- a/oxweb/oxdb.py
+++ b/oxweb/oxdb.py
@ -0,0 +1,10 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import oxlib.cache
 def getPosterUrl(id):
    url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
    if oxlib.cache.exists(url):
        return url
    return ''
--- a/oxweb/piratecinema.py
+++ b/oxweb/piratecinema.py
@ -0,0 +1,12 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import oxlib.cache
 from oxlib.cache import exists
 def getPosterUrl(id):
    url = "http://piratecinema.org/posters/%s/%s.jpg" % (id[:4], id)
    if oxlib.cache.exists(url):
        return url
    return ''
--- a/oxweb/tv.py
+++ b/oxweb/tv.py
@ -16,7 +16,7 @@ def getEpisodeData(url):
    '''
    data = getUrlUnicode(url)
    r = {}
-    r['description'] = findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0]
+    r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
    r['show'] = findRe(data, '<h1>(.*?)</h1>')
    r['title'] =  findRe(data, '<title>.*?: (.*?) - TV.com  </title>')
    #episode score