escape strings
This commit is contained in:
parent
ae10c5c9b9
commit
41edea1862
20 changed files with 74 additions and 74 deletions
|
|
@ -31,13 +31,13 @@ def get_data(id):
|
|||
'url': get_url(id)
|
||||
}
|
||||
html = read_url(data['url'])
|
||||
data['imdbId'] = find_re(html, 'imdb.com/title/tt(\d{7})')
|
||||
data['imdbId'] = find_re(html, r'imdb.com/title/tt(\d{7})')
|
||||
if not data['imdbId']:
|
||||
data['imdbId'] = _id_map.get(id, '')
|
||||
data['title'] = strip_tags(find_re(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
|
||||
data['year'] = find_re(html, '\(<a href="alpha1.html">(.*?)</a>\)')
|
||||
data['title'] = strip_tags(find_re(html, r'<p class="name white">(.*?) \(<a href="alpha1.html">'))
|
||||
data['year'] = find_re(html, r'\(<a href="alpha1.html">(.*?)</a>\)')
|
||||
data['posters'] = []
|
||||
poster = find_re(html, '<img src="(posters.*?)"')
|
||||
poster = find_re(html, r'<img src="(posters.*?)"')
|
||||
if poster:
|
||||
poster = 'http://www.impawards.com/%s/%s' % (data['year'], poster)
|
||||
data['posters'].append(poster)
|
||||
|
|
@ -46,7 +46,7 @@ def get_data(id):
|
|||
result = result.replace('_xlg.html', '.html')
|
||||
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
|
||||
html = read_url(url)
|
||||
result = find_re(html, '<a href = (\w*?_xlg.html)')
|
||||
result = find_re(html, r'<a href = (\w*?_xlg.html)')
|
||||
if result:
|
||||
url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
|
||||
html = read_url(url)
|
||||
|
|
@ -62,7 +62,7 @@ def get_id(url):
|
|||
split = split[4][:-5].split('_')
|
||||
if split[-1] == 'xlg':
|
||||
split.pop()
|
||||
if find_re(split[-1], 'ver\d+$'):
|
||||
if find_re(split[-1], r'ver\d+$'):
|
||||
split.pop()
|
||||
id = '%s/%s' % (year, '_'.join(split))
|
||||
return id
|
||||
|
|
@ -72,7 +72,7 @@ def get_ids(page=None):
|
|||
ids = []
|
||||
if page:
|
||||
html = read_url('http://www.impawards.com/archives/page%s.html' % page, timeout=-1)
|
||||
results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
|
||||
results = re.compile(r'<a href = \.\./(.*?)>', re.DOTALL).findall(html)
|
||||
for result in results:
|
||||
url = 'http://impawards.com/%s' % result
|
||||
ids.append(get_id(url))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue