ubu cleanup
This commit is contained in:
parent
5c883e19e6
commit
b147c61f5c
1 changed files with 5 additions and 1 deletions
|
@ -39,6 +39,10 @@ def get_data(url):
|
|||
if 'title' in m:
|
||||
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
|
||||
|
||||
if not 'title' in m:
|
||||
match = re.compile('<span id="ubuwork">(.*?)</span>').findall(data)
|
||||
if match:
|
||||
m['title'] = strip_tags(decode_html(match[0])).strip()
|
||||
if not 'title' in m:
|
||||
match = re.compile("<title>.*?&(.*?)</title>").findall(data)
|
||||
if match:
|
||||
|
@ -72,7 +76,7 @@ def get_data(url):
|
|||
if txt:
|
||||
if len(txt) > 1 and txt[0].strip() == m.get('title'):
|
||||
txt = txt[1:]
|
||||
m['description'] = '\n\n'.join(txt).split('RESOURCES')[0].strip()
|
||||
m['description'] = '\n\n'.join(txt).split('RESOURCES')[0].split('RELATED')[0].strip()
|
||||
y = re.compile('\((\d{4})\)').findall(data)
|
||||
if y:
|
||||
m['year'] = int(y[0])
|
||||
|
|
Loading…
Reference in a new issue