only use most common title per type, fixes #1826

This commit is contained in:
j 2013-08-24 17:30:37 +02:00
parent f429ed8b07
commit a8e76893d3

View file

@ -1,15 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import urllib2
import urllib import urllib
import re import re
import os
import time import time
import unicodedata import unicodedata
import ox import ox
from ox import find_re, strip_tags from ox import find_re, strip_tags
from ox.normalize import normalize_title, normalize_imdbid
import ox.cache import ox.cache
from siteparser import SiteParser from siteparser import SiteParser
@ -376,8 +373,28 @@ class Imdb(SiteParser):
if key in type: if key in type:
stop_word = True stop_word = True
break break
if not stop_word and not type in types: if not stop_word:
types[type] = t[1] if not type in types:
types[type] = []
types[type].append(t[1])
titles = {}
for type in types:
for title in types[type]:
if not title in titles:
titles[title] = []
titles[title].append(type)
def select_title(type):
title = types[type][0]
count = 0
if len(types[type]) > 1:
for t in types[type]:
if len(titles[t]) > count:
count = len(titles[t])
title = t
return title
types = {type: select_title(type) for type in types}
regexps = [ regexps = [
"^.+ \(imdb display title\) \(English title\)$", "^.+ \(imdb display title\) \(English title\)$",
"^USA \(imdb display title\)$", "^USA \(imdb display title\)$",