escape strings

This commit is contained in:
j 2024-09-11 22:52:01 +01:00
commit 41edea1862
20 changed files with 74 additions and 74 deletions

View file

@ -31,13 +31,13 @@ def get_data(id):
'url': get_url(id)
}
html = read_url(data['url'])
data['imdbId'] = find_re(html, 'imdb.com/title/tt(\d{7})')
data['imdbId'] = find_re(html, r'imdb.com/title/tt(\d{7})')
if not data['imdbId']:
data['imdbId'] = _id_map.get(id, '')
data['title'] = strip_tags(find_re(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
data['year'] = find_re(html, '\(<a href="alpha1.html">(.*?)</a>\)')
data['title'] = strip_tags(find_re(html, r'<p class="name white">(.*?) \(<a href="alpha1.html">'))
data['year'] = find_re(html, r'\(<a href="alpha1.html">(.*?)</a>\)')
data['posters'] = []
poster = find_re(html, '<img src="(posters.*?)"')
poster = find_re(html, r'<img src="(posters.*?)"')
if poster:
poster = 'http://www.impawards.com/%s/%s' % (data['year'], poster)
data['posters'].append(poster)
@ -46,7 +46,7 @@ def get_data(id):
result = result.replace('_xlg.html', '.html')
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
html = read_url(url)
result = find_re(html, '<a href = (\w*?_xlg.html)')
result = find_re(html, r'<a href = (\w*?_xlg.html)')
if result:
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
html = read_url(url)
@ -62,7 +62,7 @@ def get_id(url):
split = split[4][:-5].split('_')
if split[-1] == 'xlg':
split.pop()
if find_re(split[-1], 'ver\d+$'):
if find_re(split[-1], r'ver\d+$'):
split.pop()
id = '%s/%s' % (year, '_'.join(split))
return id
@ -72,7 +72,7 @@ def get_ids(page=None):
ids = []
if page:
html = read_url('http://www.impawards.com/archives/page%s.html' % page, timeout=-1)
results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
results = re.compile(r'<a href = \.\./(.*?)>', re.DOTALL).findall(html)
for result in results:
url = 'http://impawards.com/%s' % result
ids.append(get_id(url))