From 9c90aaa5f8e6f17f22dc71f3eeba0a4bf40d2f15 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Tue, 23 Jul 2019 16:24:06 +0200
Subject: [PATCH] imdb can also be 8 digits

---
 ox/web/imdb.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 4821b0c..db745bf 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -267,7 +267,7 @@ class Imdb(SiteParser):
         },
         'series': {
             'page': 'reference',
-            're': '<h4 itemprop="name">.*?<a href="/title/tt(\d{7})',
+            're': '<h4 itemprop="name">.*?<a href="/title/tt(\d+?)',
             'type': 'string'
         },
         'isSeries': {
@@ -422,7 +422,7 @@ class Imdb(SiteParser):
             for rel, data, _ in self['connections']:
                 if isinstance(rel, bytes):
                     rel = rel.decode('utf-8')
-                #cc[rel] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
+                #cc[rel] = re.compile('<a href="/title/tt(\d+?)/">(.*?)</a>').findall(data)
                 def get_conn(c):
                     r = {
                         'id': c[0],
@@ -432,7 +432,7 @@ class Imdb(SiteParser):
                     if len(description) == 2 and description[-1].strip() != '-':
                         r['description'] = description[-1].strip()
                     return r
-                cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
+                cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d+?)/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
 
             self['connections'] = cc
 
@@ -618,7 +618,7 @@ def get_movie_by_title(title, timeout=-1):
     url = "http://akas.imdb.com/find?" + params
     data = read_url(url, timeout=timeout, unicode=True)
     #if search results in redirect, get id of current page
-    r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
+    r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d+?)/" />'
     results = re.compile(r).findall(data)    
     if results:
         return results[0]
@@ -697,12 +697,12 @@ def get_movie_id(title, director='', year='', timeout=-1):
 
     data = read_url(url, timeout=timeout, unicode=True)
     #if search results in redirect, get id of current page
-    r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
+    r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d+?)/" />'
     results = re.compile(r).findall(data)    
     if results:
         return results[0]
     #otherwise get first result
-    r = '<td valign="top">.*?<a href="/title/tt(\d{7})/"'
+    r = '<td valign="top">.*?<a href="/title/tt(\d+?)/"'
     results = re.compile(r).findall(data)
     if results:
         return results[0]
@@ -713,7 +713,7 @@ def get_movie_id(title, director='', year='', timeout=-1):
     results = duckduckgo.find(google_query, timeout=timeout)
     if results:
         for r in results[:2]:
-            imdbId = find_re(r[1], 'title/tt(\d{7})')
+            imdbId = find_re(r[1], 'title/tt(\d+?)')
             if imdbId:
                 return imdbId
     #or nothing
@@ -740,7 +740,7 @@ def get_episodes(imdbId, season=None):
     if season:
         url += '?season=%d' % season
         data = cache.read_url(url).decode()
-        for e in re.compile('<div data-const="tt(\d{7})".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
+        for e in re.compile('<div data-const="tt(\d+?)".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
             episodes['S%02dE%02d' % (int(e[1]), int(e[2]))] = e[0]
     else:
         data = cache.read_url(url)