diff --git a/ox/format.py b/ox/format.py
index 83756c1..2aa0868 100644
--- a/ox/format.py
+++ b/ox/format.py
@@ -236,7 +236,7 @@ def int_value(strValue, default=''):
''
"""
try:
- val = re.compile('(\d+)').findall(str(strValue).strip())[0]
+ val = re.compile(r'(\d+)').findall(str(strValue).strip())[0]
except:
val = default
return val
@@ -253,7 +253,7 @@ def float_value(strValue, default=''):
''
"""
try:
- val = re.compile('([\d.]+)').findall(str(strValue).strip())[0]
+ val = re.compile(r'([\d.]+)').findall(str(strValue).strip())[0]
except:
val = default
return val
diff --git a/ox/html.py b/ox/html.py
index f7ca816..06ae96f 100644
--- a/ox/html.py
+++ b/ox/html.py
@@ -178,10 +178,10 @@ def highlight(text, query, hlClass="hl"):
"""
if query:
text = text.replace('
', '|')
- query = re.escape(query).replace('\ ', '.')
+ query = re.escape(query).replace(r'\ ', '.')
m = re.compile("(%s)" % query, re.IGNORECASE).findall(text)
for i in m:
- text = re.sub("(%s)" % re.escape(i).replace('\ ', '.'), '\\1' % hlClass, text)
+ text = re.sub(r"(%s)" % re.escape(i).replace(r'\ ', '.'), r'\\1' % hlClass, text)
text = text.replace('|', '
')
return text
@@ -234,7 +234,7 @@ def sanitize_html(html, tags=None, global_attributes=[]):
' '
'''
if not tags:
- valid_url = '^((https?:\/\/|\/|mailto:).*?)'
+ valid_url = r'^((https?:\/\/|\/|mailto:).*?)'
tags = [
# inline formatting
{'name': 'b'},
@@ -300,8 +300,8 @@ def sanitize_html(html, tags=None, global_attributes=[]):
'optional': ['width', 'height'],
'required': ['src'],
'validation': {
- 'width': '^\d+$',
- 'height': '^\d+$',
+ 'width': r'^\d+$',
+ 'height': r'^\d+$',
'src': valid_url
}
},
@@ -310,8 +310,8 @@ def sanitize_html(html, tags=None, global_attributes=[]):
'optional': ['width', 'height'],
'required': ['src'],
'validation': {
- 'width': '^\d+$',
- 'height': '^\d+$',
+ 'width': r'^\d+$',
+ 'height': r'^\d+$',
'src': valid_url
},
},
@@ -319,8 +319,8 @@ def sanitize_html(html, tags=None, global_attributes=[]):
{'name': 'figcaption'}
]
- tag_re = re.compile('<(/)?([^\ /]+)(.*?)(/)?>')
- attr_re = re.compile('([^=\ ]+)="([^"]+)"')
+ tag_re = re.compile(r'<(/)?([^\ /]+)(.*?)(/)?>')
+ attr_re = re.compile(r'([^=\ ]+)="([^"]+)"')
escaped = {}
level = 0
@@ -338,7 +338,7 @@ def sanitize_html(html, tags=None, global_attributes=[]):
if '[]' in validation:
html = re.sub(
- re.compile('\[((https?:\/\/|\/).+?) (.+?)\]', re.IGNORECASE),
+ re.compile(r'\[((https?:\/\/|\/).+?) (.+?)\]', re.IGNORECASE),
'\\3', html)
parts = split_tags(html)
diff --git a/ox/movie.py b/ox/movie.py
index 54ede0c..314df47 100644
--- a/ox/movie.py
+++ b/ox/movie.py
@@ -25,7 +25,7 @@ The Title[ ([SXX][EYY[+ZZ|-ZZ]])[ Episode Title]][.Version][.Part XY[.Part Title
def format_path(data, directory_key='director'):
def format_underscores(string):
- return re.sub('^\.|\.$|:|/|\?|<|>', '_', string)
+ return re.sub(r'^\.|\.$|:|/|\?|<|>', '_', string)
director = data['directorSort'] or ['Unknown Director']
title = data['seriesTitle' if data['isEpisode'] else 'title'] or 'Untitled'
year = data['seriesYear' if data['isEpisode'] else 'year'] or None
@@ -199,14 +199,14 @@ def parse_path(path, directory_key='director'):
string = re.sub('^_', '.', string)
string = re.sub('_$', '.', string)
# '_.foo$' or '_ (' is '?'
- string = re.sub(re.compile('_(?=(\.\w+$| \())', re.U), '?', string)
+ string = re.sub(re.compile(r'_(?=(\.\w+$| \())', re.U), '?', string)
# ' _..._ ' is '<...>'
string = re.sub('(?<= )_(.+)_(?= )', '<\g<1>>', string)
# 'foo_bar' or 'foo _ bar' is '/'
- string = re.sub(re.compile('(?<=\w)_(?=\w)', re.U), '/', string)
+ string = re.sub(re.compile(r'(?<=\w)_(?=\w)', re.U), '/', string)
string = re.sub(' _ ', ' / ', string)
# 'foo_ ' is ':'
- string = re.sub(re.compile('(?<=[\w\)\]])_ ', re.U), ': ', string)
+ string = re.sub(re.compile(r'(?<=[\w\)\]])_ ', re.U), ': ', string)
string = unicodedata.normalize('NFD', string)
return string
@@ -238,14 +238,14 @@ def parse_path(path, directory_key='director'):
# title, year
data['title'] = data['year'] = None
if title:
- match = re.search(' \(\d{4}(-(\d{4})?)?\)$', title)
+ match = re.search(r' \(\d{4}(-(\d{4})?)?\)$', title)
data['title'] = title[:-len(match.group(0))] if match else title
data['year'] = match.group(0)[2:-1] if match else None
file_title = re.sub('[/:]', '_', data['title'])
# (remove title from beginning of filename if the rest contains a dot)
file = re.sub('^' + re.escape(file_title) + '(?=.*\.)', '', file)
# (split by nospace+dot+word, but remove spaces preceding extension)
- parts = re.split('(?(.*?)')).strip()
data['themes'] = parse_list(html, 'themes')
data['types'] = parse_list(html, 'types')
- data['year'] = find_re(html, '.*?(\d+)')
+ data['year'] = find_re(html, r'.*?(\d+)')
#data['stills'] = [re.sub('_derived.*?/', '', i) for i in re.compile('(.*?)', '\\1', data)
- for a in re.compile('(.*?).*?(.*?)<\/span>').findall(data):
+ for a in re.compile(r'(.*?).*?(.*?)<\/span>').findall(data):
results.append((strip_tags(decode_html(a[1])), a[0], strip_tags(decode_html(a[2]))))
if len(results) >= max_results:
break
diff --git a/ox/web/piratecinema.py b/ox/web/piratecinema.py
index c452f04..e896c9e 100644
--- a/ox/web/piratecinema.py
+++ b/ox/web/piratecinema.py
@@ -8,7 +8,7 @@ from ox.net import read_url
def get_poster_url(id):
url = 'http://piratecinema.org/posters/'
html = read_url(url).decode('utf-8')
- results = re.compile('src="(.+)" title=".+\((\d{6}\d+)\)"').findall(html)
+ results = re.compile(r'src="(.+)" title=".+\((\d{6}\d+)\)"').findall(html)
for result in results:
if result[1] == id:
return url + result[0]
diff --git a/ox/web/wikipedia.py b/ox/web/wikipedia.py
index 5d86655..aad0aba 100644
--- a/ox/web/wikipedia.py
+++ b/ox/web/wikipedia.py
@@ -81,36 +81,36 @@ def get_movie_data(wikipedia_url):
if 'amg_id' in filmbox and not filmbox['amg_id'].isdigit():
del filmbox['amg_id']
if 'Allmovie movie' in data:
- filmbox['amg_id'] = find_re(data, 'Allmovie movie\|.*?(\d+)')
+ filmbox['amg_id'] = find_re(data, r'Allmovie movie\|.*?(\d+)')
elif 'Allmovie title' in data:
- filmbox['amg_id'] = find_re(data, 'Allmovie title\|.*?(\d+)')
+ filmbox['amg_id'] = find_re(data, r'Allmovie title\|.*?(\d+)')
if 'Official website' in data:
- filmbox['website'] = find_re(data, 'Official website\|(.*?)}').strip()
+ filmbox['website'] = find_re(data, r'Official website\|(.*?)}').strip()
- r = re.compile('{{IMDb title\|id=(\d{7})', re.IGNORECASE).findall(data)
+ r = re.compile(r'{{IMDb title\|id=(\d{7})', re.IGNORECASE).findall(data)
if r:
filmbox['imdb_id'] = r[0]
else:
- r = re.compile('{{IMDb title\|(\d{7})', re.IGNORECASE).findall(data)
+ r = re.compile(r'{{IMDb title\|(\d{7})', re.IGNORECASE).findall(data)
if r:
filmbox['imdb_id'] = r[0]
- r = re.compile('{{Internet Archive.*?\|id=(.*?)[\|}]', re.IGNORECASE).findall(data)
+ r = re.compile(r'{{Internet Archive.*?\|id=(.*?)[\|}]', re.IGNORECASE).findall(data)
if r:
filmbox['archiveorg_id'] = r[0]
- r = re.compile('{{mojo title\|(.*?)[\|}]', re.IGNORECASE).findall(data)
+ r = re.compile(r'{{mojo title\|(.*?)[\|}]', re.IGNORECASE).findall(data)
if r:
filmbox['mojo_id'] = r[0].replace('id=', '')
- r = re.compile('{{rotten-tomatoes\|(.*?)[\|}]', re.IGNORECASE).findall(data)
+ r = re.compile(r'{{rotten-tomatoes\|(.*?)[\|}]', re.IGNORECASE).findall(data)
if r:
filmbox['rottentomatoes_id'] = r[0].replace('id=', '')
if 'google video' in data:
- filmbox['google_video_id'] = find_re(data, 'google video\|.*?(\d*?)[\|}]')
+ filmbox['google_video_id'] = find_re(data, r'google video\|.*?(\d*?)[\|}]')
if 'DEFAULTSORT' in data:
- filmbox['title_sort'] = find_re(data, '''\{\{DEFAULTSORT:(.*?)\}\}''')
+ filmbox['title_sort'] = find_re(data, r'''\{\{DEFAULTSORT:(.*?)\}\}''')
return filmbox
def get_image_url(name):
diff --git a/setup.py b/setup.py
index fd7e507..4e9cd48 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ def get_version():
f = open(changelog)
head = f.read().strip().split('\n')[0]
f.close()
- rev = re.compile('\d+\.\d+\.(\d+)').findall(head)
+ rev = re.compile(r'\d+\.\d+\.(\d+)').findall(head)
if rev:
return '3.0.%s' % rev[0]
return '3.0.x'