34 lines
984 B
Python
34 lines
984 B
Python
# -*- Mode: Python; -*-
|
|
# -*- coding: utf-8 -*-
|
|
# vi:si:et:sw=2:sts=2:ts=2
|
|
|
|
import re
|
|
|
|
from BeautifulSoup import BeautifulSoup
|
|
|
|
from utils import read_url_utf8, stripTags
|
|
|
|
def getEpisodeData(url):
|
|
''' prases informatin on tvcom episode pages
|
|
returns dict with title, show, description, score
|
|
'''
|
|
tvcom = {
|
|
'description': u''
|
|
}
|
|
data = read_url_utf8(url).replace('\n',' ')
|
|
regexp = r'''<div id="main-col">.*?<div>(.*?)<div class="ta-r mt-10 f-bold">'''
|
|
reg = re.compile(regexp, re.IGNORECASE)
|
|
m = reg.findall(data)
|
|
for match in m:
|
|
description = match.strip()
|
|
description = stripTags(description).replace('Watch Video','')
|
|
tvcom['description'] = description.strip()
|
|
soup = BeautifulSoup(data)
|
|
#optional data
|
|
try:
|
|
tvcom['show'] = soup('h1')[0].contents[0]
|
|
tvcom['title'] = soup('h1')[1].contents[0]
|
|
tvcom['score'] = soup("span", {'class':"f-28 f-bold mt-10 mb-10 f-FF9 db lh-18"})[0].contents[0]
|
|
except:
|
|
pass
|
|
return tvcom
|