From d632cd3803c8f42f75593fc74bfa626422fcf3ba Mon Sep 17 00:00:00 2001 From: j Date: Tue, 23 Jul 2019 16:42:20 +0200 Subject: [PATCH] match as many digits as possible --- ox/web/imdb.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index db745bf..7d91dc7 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -267,7 +267,7 @@ class Imdb(SiteParser): }, 'series': { 'page': 'reference', - 're': '

.*?.*?(.*?)').findall(data) + #cc[rel] = re.compile('(.*?)').findall(data) def get_conn(c): r = { 'id': c[0], @@ -432,7 +432,7 @@ class Imdb(SiteParser): if len(description) == 2 and description[-1].strip() != '-': r['description'] = description[-1].strip() return r - cc[rel] = list(map(get_conn, re.compile('(.*?)(.*?)<\/div', re.DOTALL).findall(data))) + cc[rel] = list(map(get_conn, re.compile('(.*?)(.*?)<\/div', re.DOTALL).findall(data))) self['connections'] = cc @@ -618,7 +618,7 @@ def get_movie_by_title(title, timeout=-1): url = "http://akas.imdb.com/find?" + params data = read_url(url, timeout=timeout, unicode=True) #if search results in redirect, get id of current page - r = '' + r = '' results = re.compile(r).findall(data) if results: return results[0] @@ -697,12 +697,12 @@ def get_movie_id(title, director='', year='', timeout=-1): data = read_url(url, timeout=timeout, unicode=True) #if search results in redirect, get id of current page - r = '' + r = '' results = re.compile(r).findall(data) if results: return results[0] #otherwise get first result - r = '.*?.*?
S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data): + for e in re.compile('
.*?
S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data): episodes['S%02dE%02d' % (int(e[1]), int(e[2]))] = e[0] else: data = cache.read_url(url)