some static id mapping

This commit is contained in:
j 2011-11-03 11:21:49 +01:00
parent 2b7268157b
commit cb8ebbe74a

View file

@ -6,7 +6,6 @@ from ox.cache import readUrlUnicode
from ox.html import stripTags from ox.html import stripTags
from ox.text import findRe from ox.text import findRe
import imdb
def getData(id): def getData(id):
''' '''
@ -24,6 +23,8 @@ def getData(id):
} }
html = readUrlUnicode(data['url']) html = readUrlUnicode(data['url'])
data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})') data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})')
if not data['imdbId']:
data['imdbId'] = _id_map.get(id, '')
data['title'] = stripTags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">')) data['title'] = stripTags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
data['year'] = findRe(html, '\(<a href="alpha1.html">(.*?)</a>\)') data['year'] = findRe(html, '\(<a href="alpha1.html">(.*?)</a>\)')
data['posters'] = [] data['posters'] = []
@ -44,6 +45,7 @@ def getData(id):
else: else:
poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img src="(posters.*?)"')) poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img src="(posters.*?)"'))
data['posters'].append(poster) data['posters'].append(poster)
return data return data
def getId(url): def getId(url):
@ -83,6 +85,217 @@ def getUrl(id):
url = u"http://www.impawards.com/%s_ver1.html" % id url = u"http://www.impawards.com/%s_ver1.html" % id
return url return url
_id_map = {
'1933/forty_second_street': '0024034',
'1933/tarzan_the_fearless': '0024645',
'1935/informer': '0026529',
'1935/thirty_nine_steps': '0026529',
'1935/top_hat': '0027125',
'1938/charlie_chaplin_cavalcade': '0284687',
'1943/falcon_and_the_co-eds': '035855',
'1969/angel_angel_down_we_go': '0065602',
'1970/crimson_altar': '0062833',
'1975/man_who_would_be_king_ver1': '0073341',
'1975/picnic_at_hanging_rock_ver1': '0073540',
'1979/electric_horseman_ver1': '0079100',
'1980/caligula_ver1': '0080491',
'1980/hollywood_knights_ver1': '0080881',
'1981/history_of_the_world_part_i': '0082517',
'1981/sea_wolves': '0081470',
'1983/krull_ver1': '0085811',
'1985/warriors_of_the_wind': '0087544',
'1989/friday_the_thirteenth_part_viii_ver1': '0097388',
'1989/high_hopes': '0095302',
'1989/millenium': '0097883',
'1989/story_of_women': '0096336',
'1990/edward_scissorhands_ver1': '0099487',
'1991/freddys_dead_ver1': '0101917',
'1993/robocop_three_ver1': '0107978',
'1993/waynes_world_two_ver1': '0108525',
'1994/above_the_rim_ver1': '0109035',
'1994/helas_pour_moi': '0107175',
'1994/house_of_the_spirits_ver1': '0107151',
'1994/i_dont_want_to_talk_about_it': '0106678',
'1994/in_custody': '0107199',
'1994/ladybird_ladybird': '0110296',
'1994/leon_the_pig_farmer': '0104710',
'1994/love_after_love': '0103710',
'1994/l_six_two_seven': '0104658',
'1994/martin_lawrence_you_so_crazy_ver1': '0111804',
'1994/savage_nights': '0105032',
'1994/sex_drugs_and_democracy': '0111135',
'1995/bye_bye_love': '0112606',
'1995/cold_comfort_farm': '0112701',
'1995/gumby_the_movie': '0113234',
'1995/les_miserables': '0113828',
'1995/mystery_of_rampo': '0110943',
'1995/pharaohs_army': '0114122',
'1995/pure_formality': '0110917',
'1995/quick_and_the_dead_ver1': '0114214',
'1995/reflections_in_the_dark': '0110956',
'1995/safe_ver1': '0114323',
'1995/search_and_destroy': '0114371',
'1995/secret_of_roan_inish_ver1': '0111112',
'1995/underneath': '0114788',
'1996/ghost_in_the_shell': '0113568',
'1996/hate': '0113247',
'1996/horseman_on_the_roof': '0113362',
'1996/kids_in_the_hall_brain_candy': '0116768',
'1996/maybe_maybe_not': '0109255',
'1996/prisoner_of_the_mountains': '0116754',
'1997/fifth_element_ver1': '0119116',
'1997/fools_rush_in_ver1': '0119141',
'1997/gi_jane_ver1': '0119173',
'1997/happy_together_ver1': '0118845',
'1997/lilies': '0116882',
'1997/mouth_to_mouth': '0112546',
'1997/mr_nice_guy': '0117786',
'1997/nenette_and_boni': '0117221',
'1997/paperback_romance': '0110405',
'1997/second_jungle_book': '0120087',
'1997/single_girl': '0113057',
'1997/super_speedway': '0120245',
'1997/temptress_moon': '0116295',
'1998/alarmist': '0119534',
'1998/barneys_great_adventure_the_movie': '0120598',
'1998/bulworth_ver1': '0118798',
'1998/celebration': '0154420',
'1998/east_palace_west_palace': '0119007',
'1998/hurricane_streets': '0119338',
'1998/i_married_a_strange_person': '0119346',
'1998/inheritors': '0141824',
'1998/killing_time': '0140312',
'1998/live_flesh': '0118819',
'1998/music_from_another_room': '0119734',
'1998/post_coitum_ver1': '0119923',
'1998/steam_the_turkish_bath': '0119248',
'1998/velocity_of_gary': '0120878',
'1999/after_life': '0165078',
'1999/emperor_and_the_assassin': '0162866',
'1999/fantasia_two_thousand': '0120910',
'1999/get_bruce': '0184510',
'1999/god_said_ha': '0119207',
'1999/jawbreaker': '0155776',
'1999/jeanne_and_the_perfect_guy': '0123923',
'1999/king_and_i': '0160429',
'1999/lovers_of_the_arctic_circle': '0133363',
'1999/plunkett_and_macleane': '0134033',
'1999/pokemon_the_first_movie': '0190641',
'1999/school_of_flesh': '0157208',
'1999/splendor': '0127296',
'1999/stranger_in_the_kingdom': '0126680',
'1999/train_of_life': '0170705',
'1999/twice_upon_a_yesterday': '0138590',
'1999/whiteboys': '0178988',
'1999/wildfire': '0194544',
'1999/windhorse': '0169388',
'2000/claim': '0218378',
'2000/color_of_paradise': '0191043',
'2000/criminal_lovers': '0205735',
'2000/everlasting_piece': '0218182',
'2000/girl_on_the_bridge_ver1': '0144201',
'2000/godzilla_two_thousand': '0188640',
'2000/goya_in_bordeaux': '0210717',
'2000/mad_about_mambo': '0156757',
'2000/picking_up_the_pieces': '0192455',
'2000/pokemon_the_movie_2000': '0257001',
'2000/seven_days_to_live': '0221928',
'2000/south_of_heaven_west_of_hell': '0179473',
'2000/suzhou_river': '0234837',
'2000/time_for_drunken_horses': '0259072',
'2000/venus_beauty_institute': '0174330',
'2001/circle': '0368646',
'2001/devils_backbone': '0256009',
'2001/kill_me_later': '0243595',
'2001/king_is_dancing': '0244173',
'2001/learning_curve': '0219126',
'2001/marco_polo__return_to_xanadu_ver1': '0296074',
'2001/me_you_them': '0244504',
'2001/our_lady_of_the_assassins': '0250809',
'2001/pinero': '0261066',
'2001/pokemon_three_the_movie_ver1': '0266860',
'2001/scratch': '0143861',
'2001/vampire_hunter_d_bloodlust_ver1': '0216651',
'2002/el_bosque_animado': '0310790',
'2002/fifty_first_state': '0227984',
'2002/les_destinees': '0216689',
'2002/sons_room': '0208990',
'2003/open_hearts': '0315543',
'2003/tulse_luper_suitcases': '0307596',
'2003/valentin': '0296915',
'2004/if_only_ver1': '0332136',
'2004/wondrous_oblivion': '0334725',
'2005/wu_ji': '0417976',
'2006/golden_door': '0465188',
'2006/kin': '1091189',
'2007/revenge_of_the_nerds': '0088000',
'2008/bad_batch': '1605644',
'2008/mercedes': '1368083',
'2008/spirit': '0831887',
'2009/dead_air': '0993841',
'2009/edge_of_love': '0819714',
'2009/fuel': '1072437',
'2009/fuel': '1072437',
'2009/one_good_man': '1239357',
'2009/st_trinians': '1210106',
'2009/surveillance': '0409345',
'2009/taken': '0936501',
'2009/vaml': '1610453',
'2010/adopting_haiti': '1764164',
'2010/afterlife': '0838247',
'2010/agora': '1186830',
'2010/athlete': '1356996',
'2010/beneath_the_blue': '1222698',
'2010/bitch_slap': '1212974',
'2010/black_waters_of_echos_pond': '0960066',
'2010/case_thirty_nine': '0795351',
'2010/finite_and_infinite_games': '1772268',
'2010/hole': '1085779',
'2010/jolene': '0867334',
'2010/lake_mungo': '0816556',
'2010/last_day_of_summer': '1242544',
'2010/leaves_of_grass': '1151359',
'2010/life_of_lemon': '1466057',
'2010/man_in_the_maze': '1721692',
'2010/mr_immortality_the_life_and_times_of_twista': '1711017',
'2010/paper_man': '0437405',
'2010/perfect_game': '0473102',
'2010/red_baron': '0365675',
'2010/satin': '0433397',
'2010/shutter_island': '1130884',
'2010/strange_powers': '1534075',
'2010/suicidegirls_must_die': '1584733',
'2010/veronika_decides_to_die': '1068678',
'2010/witchblade': '0494292',
'2010/youth_in_revolt': '0403702',
'2011/beastly': '1152398',
'2011/burning_palms': '1283887',
'2011/cabin_in_the_woods': '1259521',
'2011/conan': '0816462',
'2011/courageous': '1630036',
'2011/cruces_divided_two': '1698645',
'2011/green_with_envy': '1204342',
'2011/happythankyoumoreplease': '1481572',
'2011/homework': '1645080',
'2011/i_got_next': '1915570',
'2011/lebanon_pa': '1290082',
'2011/money_pet': '1965198',
'2011/my_suicide': '0492896',
'2011/priest': '0822847',
'2011/prowl': '1559033',
'2011/red_sonja': '0800175',
'2011/season_of_the_witch': '0479997',
'2011/stay_cool': '1235807',
'2011/sympathy_for_delicious': '1270277',
'2011/trust': '1529572',
'2011/undefeated': '1961604',
'2011/vanishing_on_seventh_street': '1452628',
'2011/where_is_robert_fisher': '2042712',
'2011/yellowbrickroad': '1398428',
'2012/haywire': '1506999',
'2012/last_call_at_the_oasis': '2043900',
}
if __name__ == '__main__': if __name__ == '__main__':
ids = getIds() ids = getIds()
print sorted(ids), len(ids) print sorted(ids), len(ids)