This commit is contained in:
j 2010-07-12 10:52:26 +02:00
parent 9c432f2ab0
commit 4af1dd055e
2 changed files with 38 additions and 5 deletions

View file

@ -70,6 +70,11 @@ class Imdb(SiteParser):
], ],
'type': 'list' 'type': 'list'
}, },
'episode_title': {
'page': 'combined',
're': '<div id="tn15title">.*?<em>(.*?)</em>',
'type': 'string'
},
'filming_locations': { 'filming_locations': {
'page': 'locations', 'page': 'locations',
're': '<a href="/search/title\?locations=.*?">(.*?)</a>', 're': '<a href="/search/title\?locations=.*?">(.*?)</a>',
@ -119,7 +124,7 @@ class Imdb(SiteParser):
're': '<div class="starbar-meta">.*?<b>([\d,.]?)/10</b>', 're': '<div class="starbar-meta">.*?<b>([\d,.]?)/10</b>',
'type': 'float' 'type': 'float'
}, },
'release_date': { 'release date': {
'page': 'releaseinfo', 'page': 'releaseinfo',
're': '<a href="/date/(\d{2})-(\d{2})/">.*?</a> <a href="/year/(\d{4})/">', 're': '<a href="/date/(\d{2})-(\d{2})/">.*?</a> <a href="/year/(\d{4})/">',
'type': 'date' 'type': 'date'
@ -137,6 +142,21 @@ class Imdb(SiteParser):
're': '<h5>Runtime:</h5><div class="info-content">.*?([0-9]+ sec|[0-9]+ min).*?</div>', 're': '<h5>Runtime:</h5><div class="info-content">.*?([0-9]+ sec|[0-9]+ min).*?</div>',
'type': 'string' 'type': 'string'
}, },
'season': {
'page': 'combined',
're': '\(Season (\d+), Episode \d+\)',
'type': 'int'
},
'episode': {
'page': 'combined',
're': '\(Season \d+, Episode (\d+)\)',
'type': 'int'
},
'series': {
'page': 'combined',
're': '<h5>TV Series:</h5>.*?<a href="/title/tt(\d{7})',
'type': 'string'
},
'title': { 'title': {
'page': 'combined', 'page': 'combined',
're': '<h1>(.*?) <span>', 're': '<h1>(.*?) <span>',
@ -168,10 +188,12 @@ class Imdb(SiteParser):
} }
} }
def __init__(self, id): def __init__(self, id, timeout=-1):
self.baseUrl = "http://www.imdb.com/title/tt%s/" % id self.baseUrl = "http://www.imdb.com/title/tt%s/" % id
super(Imdb, self).__init__() super(Imdb, self).__init__(timeout)
if 'title' in self and self['title'].startswith('"') and self['title'].endswith('"'):
self['title'] = self['title'][1:-1]
if 'runtime' in self and self['runtime']: if 'runtime' in self and self['runtime']:
if 'min' in self['runtime']: base=60 if 'min' in self['runtime']: base=60
else: base=1 else: base=1
@ -191,6 +213,17 @@ class Imdb(SiteParser):
if key in self: if key in self:
self[key] = filter(lambda x: x.lower() != 'home', self[key]) self[key] = filter(lambda x: x.lower() != 'home', self[key])
if 'series' in self:
if 'episode_title' in self:
self['series_title'] = self['title']
self['title'] = "%s: %s" % (self['series_title'], self['episode_title'])
if 'episode_title' in self and 'season' in self and 'episode' in self:
self['title'] = "%s (S%02dE%02d) %s" % (
self['series_title'], self['season'], self['episode'], self['episode_title'])
else:
for key in ('series_title', 'episode_title', 'season', 'episode'):
if key in self:
del self[key]
def guess(title, director='', timeout=google.DEFAULT_TIMEOUT): def guess(title, director='', timeout=google.DEFAULT_TIMEOUT):
#FIXME: proper file -> title #FIXME: proper file -> title

View file

@ -30,7 +30,7 @@ class SiteParser(dict):
def getUrl(self, page): def getUrl(self, page):
return "%s%s" % (self.baseUrl, page) return "%s%s" % (self.baseUrl, page)
def __init__(self): def __init__(self, timeout=-1):
for key in self.regex: for key in self.regex:
url = self.getUrl(self.regex[key]['page']) url = self.getUrl(self.regex[key]['page'])
data = readUrlUnicode(url) data = readUrlUnicode(url)
@ -58,7 +58,7 @@ class SiteParser(dict):
return data return data
if self.regex[key]['type'] == 'float' and data: if self.regex[key]['type'] == 'float' and data:
data = apply_f(float, data) data = apply_f(float, data)
elif self.regex[key]['type'] == 'int': elif self.regex[key]['type'] == 'int' and data:
data = apply_f(int, data) data = apply_f(int, data)
elif self.regex[key]['type'] == 'date': elif self.regex[key]['type'] == 'date':
parse_date = lambda d: d and datetime.strptime('-'.join(d), '%m-%d-%Y').strftime('%Y-%m-%d') parse_date = lambda d: d and datetime.strptime('-'.join(d), '%m-%d-%Y').strftime('%Y-%m-%d')