# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 import re import time from ox import strip_tags, find_re from ox.cache import read_url def get_episode_data(url): ''' prases informatin on tvcom episode pages returns dict with title, show, description, score example: get_episode_data('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html') ''' data = read_url(url, unicode=True) r = {} r['description'] = strip_tags(find_re(data, 'div id="main-col">.*?