2008-07-03 20:22:32 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
|
|
|
import re
|
|
|
|
import time
|
|
|
|
|
2009-10-12 15:18:59 +00:00
|
|
|
from oxlib import stripTags, findRe
|
|
|
|
from oxlib.cache import readUrlUnicode
|
2008-07-03 20:22:32 +00:00
|
|
|
|
|
|
|
import google
|
|
|
|
|
|
|
|
|
2008-07-29 17:27:22 +00:00
|
|
|
def getShowUrl(title):
|
2008-07-03 20:22:32 +00:00
|
|
|
'''
|
|
|
|
Search Epguide Url for Show via Show Title.
|
|
|
|
Use Google to search the url, this is also done on Epguide.
|
|
|
|
'''
|
|
|
|
for (name, url, desc) in google.find('allintitle: site:epguides.com %s' % title, 1):
|
|
|
|
if url.startswith('http://epguides.com'):
|
|
|
|
if re.search(title, name):
|
|
|
|
return url
|
|
|
|
return None
|
|
|
|
|
|
|
|
def getShowData(url):
|
2009-10-12 11:47:43 +00:00
|
|
|
data = readUrlUnicode(url)
|
2008-07-03 20:22:32 +00:00
|
|
|
r = {}
|
|
|
|
r['title'] = stripTags(findRe(data, '<h1>(.*?)</h1>'))
|
|
|
|
r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
|
|
|
|
r['episodes'] = {}
|
|
|
|
#1. 1- 1 1001 7 Aug 05 You Can't Miss the Bear
|
|
|
|
for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
|
|
|
|
air_date = episode[3].strip()
|
|
|
|
#'22 Sep 04' -> 2004-09-22
|
2008-07-29 17:04:23 +00:00
|
|
|
try:
|
|
|
|
air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
|
|
|
|
except:
|
|
|
|
pass
|
2008-07-03 20:22:32 +00:00
|
|
|
s = episode[1].split('-')[0].strip()
|
|
|
|
e = episode[1].split('-')[-1].strip()
|
2008-07-29 17:04:23 +00:00
|
|
|
try:
|
|
|
|
r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
|
|
|
|
'prod code': episode[2],
|
|
|
|
'air date': air_date,
|
|
|
|
'url': episode[4],
|
|
|
|
'title':episode[5],
|
|
|
|
}
|
|
|
|
except:
|
|
|
|
print "oxweb.epguides failed,", url
|
2008-07-03 20:22:32 +00:00
|
|
|
return r
|
|
|
|
|