match as many digits as possible

This commit is contained in:
j 2019-07-23 16:42:20 +02:00
parent 9c90aaa5f8
commit d632cd3803

View file

@ -267,7 +267,7 @@ class Imdb(SiteParser):
}, },
'series': { 'series': {
'page': 'reference', 'page': 'reference',
're': '<h4 itemprop="name">.*?<a href="/title/tt(\d+?)', 're': '<h4 itemprop="name">.*?<a href="/title/tt(\d+)',
'type': 'string' 'type': 'string'
}, },
'isSeries': { 'isSeries': {
@ -422,7 +422,7 @@ class Imdb(SiteParser):
for rel, data, _ in self['connections']: for rel, data, _ in self['connections']:
if isinstance(rel, bytes): if isinstance(rel, bytes):
rel = rel.decode('utf-8') rel = rel.decode('utf-8')
#cc[rel] = re.compile('<a href="/title/tt(\d+?)/">(.*?)</a>').findall(data) #cc[rel] = re.compile('<a href="/title/tt(\d+)/">(.*?)</a>').findall(data)
def get_conn(c): def get_conn(c):
r = { r = {
'id': c[0], 'id': c[0],
@ -432,7 +432,7 @@ class Imdb(SiteParser):
if len(description) == 2 and description[-1].strip() != '-': if len(description) == 2 and description[-1].strip() != '-':
r['description'] = description[-1].strip() r['description'] = description[-1].strip()
return r return r
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d+?)/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data))) cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d+)/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
self['connections'] = cc self['connections'] = cc
@ -618,7 +618,7 @@ def get_movie_by_title(title, timeout=-1):
url = "http://akas.imdb.com/find?" + params url = "http://akas.imdb.com/find?" + params
data = read_url(url, timeout=timeout, unicode=True) data = read_url(url, timeout=timeout, unicode=True)
#if search results in redirect, get id of current page #if search results in redirect, get id of current page
r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d+?)/" />' r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d+)/" />'
results = re.compile(r).findall(data) results = re.compile(r).findall(data)
if results: if results:
return results[0] return results[0]
@ -697,12 +697,12 @@ def get_movie_id(title, director='', year='', timeout=-1):
data = read_url(url, timeout=timeout, unicode=True) data = read_url(url, timeout=timeout, unicode=True)
#if search results in redirect, get id of current page #if search results in redirect, get id of current page
r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d+?)/" />' r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d+)/" />'
results = re.compile(r).findall(data) results = re.compile(r).findall(data)
if results: if results:
return results[0] return results[0]
#otherwise get first result #otherwise get first result
r = '<td valign="top">.*?<a href="/title/tt(\d+?)/"' r = '<td valign="top">.*?<a href="/title/tt(\d+)/"'
results = re.compile(r).findall(data) results = re.compile(r).findall(data)
if results: if results:
return results[0] return results[0]
@ -713,7 +713,7 @@ def get_movie_id(title, director='', year='', timeout=-1):
results = duckduckgo.find(google_query, timeout=timeout) results = duckduckgo.find(google_query, timeout=timeout)
if results: if results:
for r in results[:2]: for r in results[:2]:
imdbId = find_re(r[1], 'title/tt(\d+?)') imdbId = find_re(r[1], 'title/tt(\d+)')
if imdbId: if imdbId:
return imdbId return imdbId
#or nothing #or nothing
@ -740,7 +740,7 @@ def get_episodes(imdbId, season=None):
if season: if season:
url += '?season=%d' % season url += '?season=%d' % season
data = cache.read_url(url).decode() data = cache.read_url(url).decode()
for e in re.compile('<div data-const="tt(\d+?)".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data): for e in re.compile('<div data-const="tt(\d+)".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
episodes['S%02dE%02d' % (int(e[1]), int(e[2]))] = e[0] episodes['S%02dE%02d' % (int(e[1]), int(e[2]))] = e[0]
else: else:
data = cache.read_url(url) data = cache.read_url(url)