ubu cleanup
This commit is contained in:
parent
5c883e19e6
commit
b147c61f5c
1 changed files with 5 additions and 1 deletions
|
@ -39,6 +39,10 @@ def get_data(url):
|
||||||
if 'title' in m:
|
if 'title' in m:
|
||||||
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
|
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
|
||||||
|
|
||||||
|
if not 'title' in m:
|
||||||
|
match = re.compile('<span id="ubuwork">(.*?)</span>').findall(data)
|
||||||
|
if match:
|
||||||
|
m['title'] = strip_tags(decode_html(match[0])).strip()
|
||||||
if not 'title' in m:
|
if not 'title' in m:
|
||||||
match = re.compile("<title>.*?&(.*?)</title>").findall(data)
|
match = re.compile("<title>.*?&(.*?)</title>").findall(data)
|
||||||
if match:
|
if match:
|
||||||
|
@ -72,7 +76,7 @@ def get_data(url):
|
||||||
if txt:
|
if txt:
|
||||||
if len(txt) > 1 and txt[0].strip() == m.get('title'):
|
if len(txt) > 1 and txt[0].strip() == m.get('title'):
|
||||||
txt = txt[1:]
|
txt = txt[1:]
|
||||||
m['description'] = '\n\n'.join(txt).split('RESOURCES')[0].strip()
|
m['description'] = '\n\n'.join(txt).split('RESOURCES')[0].split('RELATED')[0].strip()
|
||||||
y = re.compile('\((\d{4})\)').findall(data)
|
y = re.compile('\((\d{4})\)').findall(data)
|
||||||
if y:
|
if y:
|
||||||
m['year'] = int(y[0])
|
m['year'] = int(y[0])
|
||||||
|
|
Loading…
Reference in a new issue