use new _ url format for links

This commit is contained in:
j 2014-07-10 10:07:06 +02:00
parent 35b16a40f7
commit 336d646c9c

View file

@ -129,15 +129,17 @@ class Imdb(models.Model):
data = ox.web.imdb.Imdb(self.imdb, timeout=timeout) data = ox.web.imdb.Imdb(self.imdb, timeout=timeout)
data = self.apply_patch(data) data = self.apply_patch(data)
def fix_links(t): def fix_links(t):
def quote_string(string):
return quote(string).replace('_', '%09').replace('%20', '_')
def fix_names(m): def fix_names(m):
return '<a href="/name=%s">%s</a>' % ( return '<a href="/name=%s">%s</a>' % (
quote(m.group(2).encode('utf-8')), m.group(2) quote_string(m.group(2).encode('utf-8')), m.group(2)
) )
t = re.sub('<a href="(/name/.*?/)">(.*?)</a>', fix_names, t) t = re.sub('<a href="(/name/.*?/)">(.*?)</a>', fix_names, t)
def fix_titles(m): def fix_titles(m):
return '<a href="/title=%s">%s</a>' % ( return '<a href="/title=%s">%s</a>' % (
quote(m.group(2).encode('utf-8')), m.group(2) quote_string(m.group(2).encode('utf-8')), m.group(2)
) )
t = re.sub('<a href="(/title/.*?/)">(.*?)</a>', fix_titles, t) t = re.sub('<a href="(/title/.*?/)">(.*?)</a>', fix_titles, t)