only use most common title per type, fixes #1826
This commit is contained in:
parent
f429ed8b07
commit
a8e76893d3
1 changed files with 22 additions and 5 deletions
|
@ -1,15 +1,12 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
import urllib2
|
|
||||||
import urllib
|
import urllib
|
||||||
import re
|
import re
|
||||||
import os
|
|
||||||
import time
|
import time
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
import ox
|
import ox
|
||||||
from ox import find_re, strip_tags
|
from ox import find_re, strip_tags
|
||||||
from ox.normalize import normalize_title, normalize_imdbid
|
|
||||||
import ox.cache
|
import ox.cache
|
||||||
|
|
||||||
from siteparser import SiteParser
|
from siteparser import SiteParser
|
||||||
|
@ -376,8 +373,28 @@ class Imdb(SiteParser):
|
||||||
if key in type:
|
if key in type:
|
||||||
stop_word = True
|
stop_word = True
|
||||||
break
|
break
|
||||||
if not stop_word and not type in types:
|
if not stop_word:
|
||||||
types[type] = t[1]
|
if not type in types:
|
||||||
|
types[type] = []
|
||||||
|
types[type].append(t[1])
|
||||||
|
titles = {}
|
||||||
|
for type in types:
|
||||||
|
for title in types[type]:
|
||||||
|
if not title in titles:
|
||||||
|
titles[title] = []
|
||||||
|
titles[title].append(type)
|
||||||
|
def select_title(type):
|
||||||
|
title = types[type][0]
|
||||||
|
count = 0
|
||||||
|
if len(types[type]) > 1:
|
||||||
|
for t in types[type]:
|
||||||
|
if len(titles[t]) > count:
|
||||||
|
count = len(titles[t])
|
||||||
|
title = t
|
||||||
|
return title
|
||||||
|
|
||||||
|
types = {type: select_title(type) for type in types}
|
||||||
|
|
||||||
regexps = [
|
regexps = [
|
||||||
"^.+ \(imdb display title\) \(English title\)$",
|
"^.+ \(imdb display title\) \(English title\)$",
|
||||||
"^USA \(imdb display title\)$",
|
"^USA \(imdb display title\)$",
|
||||||
|
|
Loading…
Reference in a new issue