imdb can also be 8 digits
This commit is contained in:
parent
fb8b33d916
commit
9c90aaa5f8
1 changed files with 8 additions and 8 deletions
|
@ -267,7 +267,7 @@ class Imdb(SiteParser):
|
||||||
},
|
},
|
||||||
'series': {
|
'series': {
|
||||||
'page': 'reference',
|
'page': 'reference',
|
||||||
're': '<h4 itemprop="name">.*?<a href="/title/tt(\d{7})',
|
're': '<h4 itemprop="name">.*?<a href="/title/tt(\d+?)',
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'isSeries': {
|
'isSeries': {
|
||||||
|
@ -422,7 +422,7 @@ class Imdb(SiteParser):
|
||||||
for rel, data, _ in self['connections']:
|
for rel, data, _ in self['connections']:
|
||||||
if isinstance(rel, bytes):
|
if isinstance(rel, bytes):
|
||||||
rel = rel.decode('utf-8')
|
rel = rel.decode('utf-8')
|
||||||
#cc[rel] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
|
#cc[rel] = re.compile('<a href="/title/tt(\d+?)/">(.*?)</a>').findall(data)
|
||||||
def get_conn(c):
|
def get_conn(c):
|
||||||
r = {
|
r = {
|
||||||
'id': c[0],
|
'id': c[0],
|
||||||
|
@ -432,7 +432,7 @@ class Imdb(SiteParser):
|
||||||
if len(description) == 2 and description[-1].strip() != '-':
|
if len(description) == 2 and description[-1].strip() != '-':
|
||||||
r['description'] = description[-1].strip()
|
r['description'] = description[-1].strip()
|
||||||
return r
|
return r
|
||||||
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
|
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d+?)/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
|
||||||
|
|
||||||
self['connections'] = cc
|
self['connections'] = cc
|
||||||
|
|
||||||
|
@ -618,7 +618,7 @@ def get_movie_by_title(title, timeout=-1):
|
||||||
url = "http://akas.imdb.com/find?" + params
|
url = "http://akas.imdb.com/find?" + params
|
||||||
data = read_url(url, timeout=timeout, unicode=True)
|
data = read_url(url, timeout=timeout, unicode=True)
|
||||||
#if search results in redirect, get id of current page
|
#if search results in redirect, get id of current page
|
||||||
r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
|
r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d+?)/" />'
|
||||||
results = re.compile(r).findall(data)
|
results = re.compile(r).findall(data)
|
||||||
if results:
|
if results:
|
||||||
return results[0]
|
return results[0]
|
||||||
|
@ -697,12 +697,12 @@ def get_movie_id(title, director='', year='', timeout=-1):
|
||||||
|
|
||||||
data = read_url(url, timeout=timeout, unicode=True)
|
data = read_url(url, timeout=timeout, unicode=True)
|
||||||
#if search results in redirect, get id of current page
|
#if search results in redirect, get id of current page
|
||||||
r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
|
r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d+?)/" />'
|
||||||
results = re.compile(r).findall(data)
|
results = re.compile(r).findall(data)
|
||||||
if results:
|
if results:
|
||||||
return results[0]
|
return results[0]
|
||||||
#otherwise get first result
|
#otherwise get first result
|
||||||
r = '<td valign="top">.*?<a href="/title/tt(\d{7})/"'
|
r = '<td valign="top">.*?<a href="/title/tt(\d+?)/"'
|
||||||
results = re.compile(r).findall(data)
|
results = re.compile(r).findall(data)
|
||||||
if results:
|
if results:
|
||||||
return results[0]
|
return results[0]
|
||||||
|
@ -713,7 +713,7 @@ def get_movie_id(title, director='', year='', timeout=-1):
|
||||||
results = duckduckgo.find(google_query, timeout=timeout)
|
results = duckduckgo.find(google_query, timeout=timeout)
|
||||||
if results:
|
if results:
|
||||||
for r in results[:2]:
|
for r in results[:2]:
|
||||||
imdbId = find_re(r[1], 'title/tt(\d{7})')
|
imdbId = find_re(r[1], 'title/tt(\d+?)')
|
||||||
if imdbId:
|
if imdbId:
|
||||||
return imdbId
|
return imdbId
|
||||||
#or nothing
|
#or nothing
|
||||||
|
@ -740,7 +740,7 @@ def get_episodes(imdbId, season=None):
|
||||||
if season:
|
if season:
|
||||||
url += '?season=%d' % season
|
url += '?season=%d' % season
|
||||||
data = cache.read_url(url).decode()
|
data = cache.read_url(url).decode()
|
||||||
for e in re.compile('<div data-const="tt(\d{7})".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
|
for e in re.compile('<div data-const="tt(\d+?)".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
|
||||||
episodes['S%02dE%02d' % (int(e[1]), int(e[2]))] = e[0]
|
episodes['S%02dE%02d' % (int(e[1]), int(e[2]))] = e[0]
|
||||||
else:
|
else:
|
||||||
data = cache.read_url(url)
|
data = cache.read_url(url)
|
||||||
|
|
Loading…
Reference in a new issue