This commit is contained in:
j 2010-07-12 10:52:26 +02:00
parent 9c432f2ab0
commit 4af1dd055e
2 changed files with 38 additions and 5 deletions

View file

@ -70,6 +70,11 @@ class Imdb(SiteParser):
],
'type': 'list'
},
'episode_title': {
'page': 'combined',
're': '<div id="tn15title">.*?<em>(.*?)</em>',
'type': 'string'
},
'filming_locations': {
'page': 'locations',
're': '<a href="/search/title\?locations=.*?">(.*?)</a>',
@ -119,7 +124,7 @@ class Imdb(SiteParser):
're': '<div class="starbar-meta">.*?<b>([\d,.]?)/10</b>',
'type': 'float'
},
'release_date': {
'release date': {
'page': 'releaseinfo',
're': '<a href="/date/(\d{2})-(\d{2})/">.*?</a> <a href="/year/(\d{4})/">',
'type': 'date'
@ -137,6 +142,21 @@ class Imdb(SiteParser):
're': '<h5>Runtime:</h5><div class="info-content">.*?([0-9]+ sec|[0-9]+ min).*?</div>',
'type': 'string'
},
'season': {
'page': 'combined',
're': '\(Season (\d+), Episode \d+\)',
'type': 'int'
},
'episode': {
'page': 'combined',
're': '\(Season \d+, Episode (\d+)\)',
'type': 'int'
},
'series': {
'page': 'combined',
're': '<h5>TV Series:</h5>.*?<a href="/title/tt(\d{7})',
'type': 'string'
},
'title': {
'page': 'combined',
're': '<h1>(.*?) <span>',
@ -168,10 +188,12 @@ class Imdb(SiteParser):
}
}
def __init__(self, id):
def __init__(self, id, timeout=-1):
self.baseUrl = "http://www.imdb.com/title/tt%s/" % id
super(Imdb, self).__init__()
super(Imdb, self).__init__(timeout)
if 'title' in self and self['title'].startswith('"') and self['title'].endswith('"'):
self['title'] = self['title'][1:-1]
if 'runtime' in self and self['runtime']:
if 'min' in self['runtime']: base=60
else: base=1
@ -191,6 +213,17 @@ class Imdb(SiteParser):
if key in self:
self[key] = filter(lambda x: x.lower() != 'home', self[key])
if 'series' in self:
if 'episode_title' in self:
self['series_title'] = self['title']
self['title'] = "%s: %s" % (self['series_title'], self['episode_title'])
if 'episode_title' in self and 'season' in self and 'episode' in self:
self['title'] = "%s (S%02dE%02d) %s" % (
self['series_title'], self['season'], self['episode'], self['episode_title'])
else:
for key in ('series_title', 'episode_title', 'season', 'episode'):
if key in self:
del self[key]
def guess(title, director='', timeout=google.DEFAULT_TIMEOUT):
#FIXME: proper file -> title

View file

@ -30,7 +30,7 @@ class SiteParser(dict):
def getUrl(self, page):
return "%s%s" % (self.baseUrl, page)
def __init__(self):
def __init__(self, timeout=-1):
for key in self.regex:
url = self.getUrl(self.regex[key]['page'])
data = readUrlUnicode(url)
@ -58,7 +58,7 @@ class SiteParser(dict):
return data
if self.regex[key]['type'] == 'float' and data:
data = apply_f(float, data)
elif self.regex[key]['type'] == 'int':
elif self.regex[key]['type'] == 'int' and data:
data = apply_f(int, data)
elif self.regex[key]['type'] == 'date':
parse_date = lambda d: d and datetime.strptime('-'.join(d), '%m-%d-%Y').strftime('%Y-%m-%d')