more raw regexp strings

2024-08-30 13:30:47 +02:00 · 2024-08-30 13:30:47 +02:00 · ae10c5c9b9
commit ae10c5c9b9
parent 29a309f15e
11 changed files with 45 additions and 45 deletions
--- a/ox/web/allmovie.py
+++ b/ox/web/allmovie.py
@ -43,7 +43,7 @@ def get_data(id):
    data['synopsis'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
    data['themes'] = parse_list(html, 'themes')
    data['types'] = parse_list(html, 'types')
-    data['year'] = find_re(html, '<span class="year">.*?(\d+)')
+    data['year'] = find_re(html, r'<span class="year">.*?(\d+)')
    #data['stills'] = [re.sub('_derived.*?/', '', i) for i in re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)]
    data['stills'] = re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)
    #html = read_url("http://allmovie.com/work/%s/cast" % id, unicode=True)
--- a/ox/web/apple.py
+++ b/ox/web/apple.py
@ -51,11 +51,11 @@ def get_movie_data(title, director):
            'User-Agent': USER_AGENT
        }
        html = read_url(url, headers=headers, unicode=True)
-        results = re.compile('"(' + host + '.*?poster\.jpg)"').findall(html)
+        results = re.compile(r'"(' + host + r'.*?poster\.jpg)"').findall(html)
        if results:
            data['poster'] = results[0].replace('poster.jpg', 'poster-xlarge.jpg')
        html = read_url(url + 'includes/playlists/web.inc', headers=headers, unicode=True)
-        results = re.compile('"(' + host + '\S+\.mov)"').findall(html)
+        results = re.compile(r'"(' + host + r'\S+\.mov)"').findall(html)
        if results:
            data['trailer'] = results[-1]
    return data
--- a/ox/web/arsenalberlin.py
+++ b/ox/web/arsenalberlin.py
@ -28,7 +28,7 @@ def get_data(id, language='en'):
    if m:
        data['director'] = m[0]

-    m = re.compile("caUI.initImageScroller\(\[\{url:'(.*?)'").findall(html)
+    m = re.compile(r"caUI.initImageScroller\(\[\{url:'(.*?)'").findall(html)
    if m:
        data['image'] = m[0]

--- a/ox/web/google.py
+++ b/ox/web/google.py
@ -60,7 +60,7 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
            url += '&start=%d' % offset
        data = read_url(url, timeout=timeout)
        data = re.sub('<span class="f">(.*?)</span>', '\\1', data)
-        for a in re.compile('<a href="(htt\S+?)".*?>(.*?)</a>.*?<span class="st">(.*?)<\/span>').findall(data):
+        for a in re.compile(r'<a href="(htt\S+?)".*?>(.*?)</a>.*?<span class="st">(.*?)<\/span>').findall(data):
            results.append((strip_tags(decode_html(a[1])), a[0], strip_tags(decode_html(a[2]))))
            if len(results) >= max_results:
                break
--- a/ox/web/piratecinema.py
+++ b/ox/web/piratecinema.py
@ -8,7 +8,7 @@ from ox.net import read_url
 def get_poster_url(id):
    url = 'http://piratecinema.org/posters/'
    html = read_url(url).decode('utf-8')
-    results = re.compile('src="(.+)" title=".+\((\d{6}\d+)\)"').findall(html)
+    results = re.compile(r'src="(.+)" title=".+\((\d{6}\d+)\)"').findall(html)
    for result in results:
        if result[1] == id:
            return url + result[0]
--- a/ox/web/wikipedia.py
+++ b/ox/web/wikipedia.py
@ -81,36 +81,36 @@ def get_movie_data(wikipedia_url):
    if 'amg_id' in filmbox and not filmbox['amg_id'].isdigit():
        del filmbox['amg_id']
    if 'Allmovie movie' in data:
-        filmbox['amg_id'] = find_re(data, 'Allmovie movie\|.*?(\d+)')
+        filmbox['amg_id'] = find_re(data, r'Allmovie movie\|.*?(\d+)')
    elif 'Allmovie title' in data:
-        filmbox['amg_id'] = find_re(data, 'Allmovie title\|.*?(\d+)')
+        filmbox['amg_id'] = find_re(data, r'Allmovie title\|.*?(\d+)')

    if 'Official website' in data:
-        filmbox['website'] = find_re(data, 'Official website\|(.*?)}').strip()
+        filmbox['website'] = find_re(data, r'Official website\|(.*?)}').strip()

-    r = re.compile('{{IMDb title\|id=(\d{7})', re.IGNORECASE).findall(data)
+    r = re.compile(r'{{IMDb title\|id=(\d{7})', re.IGNORECASE).findall(data)
    if r:
        filmbox['imdb_id'] = r[0]
    else:
-        r = re.compile('{{IMDb title\|(\d{7})', re.IGNORECASE).findall(data)
+        r = re.compile(r'{{IMDb title\|(\d{7})', re.IGNORECASE).findall(data)
        if r:
            filmbox['imdb_id'] = r[0]

-    r = re.compile('{{Internet Archive.*?\|id=(.*?)[\|}]', re.IGNORECASE).findall(data)
+    r = re.compile(r'{{Internet Archive.*?\|id=(.*?)[\|}]', re.IGNORECASE).findall(data)
    if r:
        filmbox['archiveorg_id'] = r[0]

-    r = re.compile('{{mojo title\|(.*?)[\|}]', re.IGNORECASE).findall(data)
+    r = re.compile(r'{{mojo title\|(.*?)[\|}]', re.IGNORECASE).findall(data)
    if r:
        filmbox['mojo_id'] = r[0].replace('id=', '')

-    r = re.compile('{{rotten-tomatoes\|(.*?)[\|}]', re.IGNORECASE).findall(data)
+    r = re.compile(r'{{rotten-tomatoes\|(.*?)[\|}]', re.IGNORECASE).findall(data)
    if r:
        filmbox['rottentomatoes_id'] = r[0].replace('id=', '')
    if 'google video' in data:
-        filmbox['google_video_id'] = find_re(data, 'google video\|.*?(\d*?)[\|}]')
+        filmbox['google_video_id'] = find_re(data, r'google video\|.*?(\d*?)[\|}]')
    if 'DEFAULTSORT' in data:
-        filmbox['title_sort'] = find_re(data, '''\{\{DEFAULTSORT:(.*?)\}\}''')
+        filmbox['title_sort'] = find_re(data, r'''\{\{DEFAULTSORT:(.*?)\}\}''')
    return filmbox

 def get_image_url(name):