diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index ad016e0..d65d95f 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -170,9 +170,9 @@ class IMDb: else: IMDbDict['rating'] = -1 #Votes - m = re.compile('\((.*?) votes\)', re.IGNORECASE).search(data) + m = re.compile('\((.*?) votes\)', re.IGNORECASE).findall(data) if m: - IMDbDict['votes'] = int(m.group(1).replace(',', '')) + IMDbDict['votes'] = int(m[0].replace(',', '')) else: IMDbDict['votes'] = -1 diff --git a/scrapeit/tvrss.py b/scrapeit/tvrss.py index ccd4717..4e0aea5 100755 --- a/scrapeit/tvrss.py +++ b/scrapeit/tvrss.py @@ -9,6 +9,7 @@ import datetime import time import re from urllib2 import urlopen +from urllib import quote import Image import StringIO @@ -52,6 +53,17 @@ def get_episodedate(string): return s[1].strip() return None +def get_episode_string(string): + episode = get_episode(string) + season = get_season(string) + episodedate = get_episodedate(string) + estring = None + if season and episode: + estring = "S%02dE%02d" % (season, episode) + elif episodedate: + estring = episodedate + return estring + def choose_item(old, new): if old['link'] == new['link']: return False @@ -63,29 +75,50 @@ def choose_item(old, new): return True return False +def get_episodes(show_title): + search_url = "http://tvrss.net/search/index.php?distribution_group=combined&show_name=%s&show_name_exact=true&filename=&date=&quality=&release_group=&mode=rss" % quote(show_title) + data = read_url(search_url) + fd = feedparser.parse(search_url) + episodes = {} + for t in fd.entries: + episode = get_episode_string(t['summary']) + episodes[episode] = t['enclosures'][0]['href'] + return episodes + +def get_thumbnail(url): + try: + thumbnail = read_url(url) + im = Image.open(StringIO.StringIO(thumbnail)) + out = StringIO.StringIO() + width = 100 + height = int((100.0 / im.size[0]) * im.size[1]) + im = im.resize((width, height)) + im.crop((0,0,100,100)).convert().save(out, 'JPEG') + thumbnail = out.getvalue() + except: + thumbnail = None + return thumbnail + def get_imdbdata(imdbid): thumbnail = None description='' - imdb = IMDb.parse(imdbid) - if imdb: - poster = imdb['poster'] + i = imdb.IMDb(imdbid).parse() + if i: + poster = i['poster'] if poster != 'http://i.imdb.com/Heads/npa.gif': log.debug("getting poster %s" % poster) - try: - thumbnail = read_url(poster) - im = Image.open(StringIO.StringIO(thumbnail)) - out = StringIO.StringIO() - im.crop((0,0,100,100)).convert().save(out, 'JPEG') - thumbnail = out.getvalue() - except: - thumbnail = None - if imdb['summary']: - description=imdb['summary'] - else: - description=imdb['tagline'] - return (imdb, description, thumbnail) + thumbnail = get_thumbnail(poster) + if i['plot']: + description=i['plot'] + elif i['plot_outline']: + description=i['plot_outline'] + else: + description=i['tagline'] + + return (i, description, thumbnail) else: - return(imdb, '', None) + return(i, '', None) + def load(): log.debug("getting new shows from tvrss...") @@ -93,14 +126,7 @@ def load(): shows = {} for item in feed['entries']: show = get_show(item['description']) - season = get_season(item['description']) - episode = get_episode(item['description']) - episodedate = get_episodedate(item['description']) - estring = None - if season and episode: - estring = "S%02dE%02d" %(season, episode) - elif episodedate: - estring = episodedate + estring = get_episode_string(item['description']) if estring: if show and not hr_hdtv.search(item['title']): if shows.has_key(show):