some criterion fixes
This commit is contained in:
parent
fc7769c4cb
commit
758acfe01b
1 changed files with 22 additions and 7 deletions
|
@ -36,15 +36,24 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
|||
html = ox.cache.read_url(data["url"], timeout=timeout)
|
||||
data["number"] = find_re(html, "<li>Spine #(\d+)")
|
||||
|
||||
data["title"] = decode_html(find_re(html, "<h1 class=\"movietitle\">(.*?)</h1>"))
|
||||
data["title"] = decode_html(find_re(html, "<h1 class=\"header__primarytitle\".*?>(.*?)</h1>"))
|
||||
data["title"] = data["title"].split(u' \u2014 The Television Version')[0].strip()
|
||||
data["director"] = strip_tags(find_re(html, "<h2 class=\"director\">(.*?)</h2>"))
|
||||
results = find_re(html, '<div class="left_column">(.*?)</div>')
|
||||
results = re.compile("<li>(.*?)</li>").findall(results)
|
||||
data["country"] = results[0]
|
||||
data["year"] = results[1]
|
||||
results = find_re(html, '<ul class="film-meta-list">(.*?)</ul>')
|
||||
info = re.compile('<li itemprop="(.*?)".*?>(.*?)</li>', re.DOTALL).findall(results)
|
||||
info = {k: strip_tags(v).strip() for k, v in info}
|
||||
if 'director' in info:
|
||||
data['director'] =info['director']
|
||||
if 'countryOfOrigin' in info:
|
||||
data['country'] =info['countryOfOrigin']
|
||||
if 'inLanguage' in info:
|
||||
data['language'] =info['inLanguage']
|
||||
for v in re.compile('<li>(.*?)</li>', re.DOTALL).findall(results):
|
||||
if 'datePublished' in v:
|
||||
data['year'] = strip_tags(v).strip()
|
||||
elif 'duration' in v:
|
||||
data['duration'] = strip_tags(v).strip()
|
||||
data["synopsis"] = decode_html(strip_tags(find_re(html,
|
||||
"<div class=\"content_block last\">.*?<p>(.*?)</p>")))
|
||||
"<div class=\"product-summary\".*?>.*?<p>(.*?)</p>")))
|
||||
|
||||
result = find_re(html, "<div class=\"purchase\">(.*?)</div>")
|
||||
if 'Blu-Ray' in result or 'Essential Art House DVD' in result:
|
||||
|
@ -63,6 +72,12 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
|||
else:
|
||||
data["posters"] = []
|
||||
data['posters'] = [re.sub('(\?\d+)$', '', p) for p in data['posters']]
|
||||
data['posters'] = [p for p in data['posters'] if p]
|
||||
|
||||
posters = find_re(html, '<div class="product-box-art".*?>(.*?)</div>')
|
||||
for poster in re.compile('<img src="(.*?)"').findall(posters):
|
||||
data['posters'].append(poster)
|
||||
|
||||
result = find_re(html, "<img alt=\"Film Still\" height=\"252\" src=\"(.*?)\"")
|
||||
if result:
|
||||
data["stills"] = [result]
|
||||
|
|
Loading…
Reference in a new issue