diff --git a/ox/web/ubu.py b/ox/web/ubu.py index dd66f8d..aab3dd4 100644 --- a/ox/web/ubu.py +++ b/ox/web/ubu.py @@ -39,6 +39,10 @@ def get_data(url): if 'title' in m: m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title']) + if not 'title' in m: + match = re.compile('(.*?)').findall(data) + if match: + m['title'] = strip_tags(decode_html(match[0])).strip() if not 'title' in m: match = re.compile(".*?&(.*?)").findall(data) if match: @@ -72,7 +76,7 @@ def get_data(url): if txt: if len(txt) > 1 and txt[0].strip() == m.get('title'): txt = txt[1:] - m['description'] = '\n\n'.join(txt).split('RESOURCES')[0].strip() + m['description'] = '\n\n'.join(txt).split('RESOURCES')[0].split('RELATED')[0].strip() y = re.compile('\((\d{4})\)').findall(data) if y: m['year'] = int(y[0])