# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 import re import time from ox import stripTags, findRe from ox.cache import readUrlUnicode def getEpisodeData(url): ''' prases informatin on tvcom episode pages returns dict with title, show, description, score example: getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html') ''' data = readUrlUnicode(url) r = {} r['description'] = stripTags(findRe(data, 'div id="main-col">.*?