escape strings

This commit is contained in:
j 2024-09-11 22:52:01 +01:00
commit 41edea1862
20 changed files with 74 additions and 74 deletions

View file

@ -36,7 +36,7 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
except:
html = read_url(data["url"], timeout=timeout).decode('utf-8', 'ignore')
data["number"] = find_re(html, "<b>Spine #(\d+)")
data["number"] = find_re(html, r"<b>Spine #(\d+)")
data["title"] = decode_html(find_re(html, "<h1 class=\"header__primarytitle\".*?>(.*?)</h1>"))
data["title"] = data["title"].split(' \u2014 The Television Version')[0].strip()
@ -77,7 +77,7 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
data["posters"] = [result.replace("_w100", "")]
else:
data["posters"] = []
data['posters'] = [re.sub('(\?\d+)$', '', p) for p in data['posters']]
data['posters'] = [re.sub(r'(\?\d+)$', '', p) for p in data['posters']]
data['posters'] = [p for p in data['posters'] if p]
posters = find_re(html, '<div class="product-box-art".*?>(.*?)</div>')
@ -103,12 +103,12 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
def get_ids(page=None):
ids = []
html = read_url("https://www.criterion.com/shop/browse/list?sort=spine_number", unicode=True)
results = re.compile("films/(\d+)-").findall(html)
results = re.compile(r"films/(\d+)-").findall(html)
ids += results
results = re.compile("boxsets/(.*?)\"").findall(html)
results = re.compile(r"boxsets/(.*?)\"").findall(html)
for result in results:
html = read_url("https://www.criterion.com/boxsets/" + result, unicode=True)
results = re.compile("films/(\d+)-").findall(html)
results = re.compile(r"films/(\d+)-").findall(html)
ids += results
return sorted(set(ids), key=int)