# -*- Mode: Python; -*- # -*- coding: utf-8 -*- # vi:si:et:sw=2:sts=2:ts=2 import re from BeautifulSoup import BeautifulSoup from utils import read_url_utf8, stripTags def getEpisodeData(url): ''' prases informatin on tvcom episode pages returns dict with title, show, description, score ''' tvcom = { 'description': u'' } data = read_url_utf8(url).replace('\n',' ') regexp = r'''
.*?
(.*?)
''' reg = re.compile(regexp, re.IGNORECASE) m = reg.findall(data) for match in m: description = match.strip() description = stripTags(description).replace('Watch Video','') tvcom['description'] = description.strip() soup = BeautifulSoup(data) #optional data try: tvcom['show'] = soup('h1')[0].contents[0] tvcom['title'] = soup('h1')[1].contents[0] tvcom['score'] = soup("span", {'class':"f-28 f-bold mt-10 mb-10 f-FF9 db lh-18"})[0].contents[0] except: pass return tvcom