# -*- Mode: Python; -*- # -*- coding: utf-8 -*- # vi:si:et:sw=2:sts=2:ts=2 import re from BeautifulSoup import BeautifulSoup from google import google from utils import read_url, read_url_utf8, stripTags import tvcom import imdb def epguidesUrl(title): ''' Search Epguide Url for Show via Show Title. Use Google to search the url, this is also done on Epguide. ''' for (name, url, desc) in google('allintitle: site:epguides.com %s' % title, 1): if url.startswith('http://epguides.com'): if re.search(title, name): return url return None def getShowImdb(title): imdbid = None url = epguidesUrl(title) if url: data = read_url(url) soup = BeautifulSoup(data) links = soup('a', {'href': re.compile('imdb.com/title/tt')}) if links: link = links[0].get('href') imdbid = "%07d" % int(re.compile('title/tt(\d*)').findall(link)[0]) if not imdbid: imdbid = imdb.guess(title) return imdbid def getEpisodeData(title, episode, show_url = None): ''' Collect information about an episode. Returns dict with title, show, description and episode ''' episodeData = { 'title': u'', 'show': title, 'description': u'', 'episode': episode, } description = u'' data = u'' if not show_url: show_url = epguidesUrl(title) if show_url: data = read_url_utf8(show_url) else: return imdb.getEpisodeData(title, episode) estring = u'' +episode.replace('S','').replace('E','-').replace('0',' ').strip() for line in data.split('\n'): a = line.split(estring) if len(a) == 2: soup = BeautifulSoup(line) episodeData['title'] = soup('a')[0].contents[0] tvcom_url = soup('a')[0].get('href') episodeData['description'] = tvcom.getEpisodeData(tvcom_url)['description'] break return episodeData