titles without countries

2015-05-04 10:53:17 +02:00 · 2015-05-04 10:53:17 +02:00 · 5bf53ba463
commit 5bf53ba463
parent b147c61f5c
2 changed files with 4 additions and 5 deletions
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -476,7 +476,6 @@ class Imdb(SiteParser):
                                alt[title].append(c)
            self['alternativeTitles'] = []
            for t in sorted(alt, key=lambda a: sorted(alt[a])):
                if alt[t]:
                countries = sorted([normalize_country_name(c) or c for c in alt[t]])
                self['alternativeTitles'].append((t, countries))
            if not self['alternativeTitles']:
--- a/ox/web/ubu.py
+++ b/ox/web/ubu.py
@ -44,9 +44,9 @@ def get_data(url):
        if match:
            m['title'] = strip_tags(decode_html(match[0])).strip()
    if not 'title' in m:
-        match = re.compile("<title>.*?&amp;(.*?)</title>").findall(data)
+        match = re.compile("<title>.*?&amp;(.*?)</title>", re.DOTALL).findall(data)
        if match:
-            m['title'] = match[0].strip()
+            m['title'] = re.sub('\s+', ' ', match[0]).strip()
            if ' - ' in m['title']:
                m['title'] = m['title'].split(' - ', 1)[-1]
    if 'title' in m: