sort title, exclude i verb, and los angeles

This commit is contained in:
j 2010-07-23 12:10:26 +02:00
parent 0d2bd1b962
commit 79286b4619

View file

@ -17,16 +17,41 @@ for article in _articles:
if article[-1] not in ("'", '-'): article += ' ' if article[-1] not in ("'", '-'): article += ' '
_spArticles.append(article) _spArticles.append(article)
_noarticles = (
'los angeles',
'i am ',
'i be area',
'i call ',
'i come ',
'i confess',
'i hired ',
'i killed ',
'i know ',
'i live ',
'i love',
'i married',
'i never',
'i shot',
'i start',
'i was',
)
def canonicalTitle(title): def canonicalTitle(title):
"""Return the title in the canonic format 'Movie Title, The'. """Return the title in the canonic format 'Movie Title, The'.
>>> canonicalTitle('The Movie Title') >>> canonicalTitle('The Movie Title')
'Movie Title, The' 'Movie Title, The'
>>> canonicalTitle('Los Angeles Plays Itself')
'Los Angeles Plays Itself'
""" """
try: try:
if _articlesDict.has_key(title.split(', ')[-1].lower()): return title if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
except IndexError: pass except IndexError: pass
ltitle = title.lower() ltitle = title.lower()
for start in _noarticles:
if ltitle.startswith(start):
return title
for article in _spArticles: for article in _spArticles:
if ltitle.startswith(article): if ltitle.startswith(article):
lart = len(article) lart = len(article)