update wikipedia movie data

This commit is contained in:
j 2011-04-07 20:52:03 +02:00
parent bd242d9712
commit e02769552d

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import re
from urllib import urlencode from urllib import urlencode
from ox.utils import json from ox.utils import json
@ -52,7 +53,7 @@ def getWikiData(wikipediaUrl):
def getMovieData(wikipediaUrl): def getMovieData(wikipediaUrl):
if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl) if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl)
data = getWikiData(wikipediaUrl) data = getWikiData(wikipediaUrl)
filmbox_data = findRe(data, '''\{\{Infobox.Film(.*?)\n\}\}''') filmbox_data = findRe(data, '''\{\{[Ii]nfobox.[Ff]ilm(.*?)\n\}\}''')
filmbox = {} filmbox = {}
_box = filmbox_data.strip().split('\n|') _box = filmbox_data.strip().split('\n|')
if len(_box) == 1: if len(_box) == 1:
@ -64,18 +65,23 @@ def getMovieData(wikipediaUrl):
if key[0] == '|': if key[0] == '|':
key = key[1:] key = key[1:]
value = d[1].strip() value = d[1].strip()
filmbox[key] = value filmbox[key.strip()] = value
if 'imdb title' in data:
filmbox['imdb_id'] = findRe(data, 'imdb title\|.*?(\d+)') if 'Allmovie movie' in data:
elif 'imdb episode' in data: filmbox['amg_id'] = findRe(data, 'Allmovie movie\|.*?(\d+)')
filmbox['imdb_id'] = findRe(data, 'imdb episode\|.*?(\d+)') elif 'amg_id' in filmbox and filmbox['amg_id'].startswith('1:'):
if 'Amg movie' in data:
filmbox['amg_id'] = findRe(data, 'Amg movie\|.*?(\d+)')
if 'amg_id' in filmbox and filmbox['amg_id'].startswith('1:'):
filmbox['amg_id'] = filmbox['amg_id'][2:] filmbox['amg_id'] = filmbox['amg_id'][2:]
if 'otten-tomatoes' in data: r = re.compile('{{IMDb title\|(\d{7})', re.IGNORECASE).findall(data)
filmbox['rottentomatoes_id'] = findRe(data, '\{\{Rotten-tomatoes\|id=(.*?)\}\}') if r:
filmbox['imdb_id'] = r[0]
r = re.compile('{{mojo title\|(.*?)\|', re.IGNORECASE).findall(data)
if r:
filmbox['mojo_id'] = r[0]
r = re.compile('{{rotten-tomatoes\|(.*?)\|', re.IGNORECASE).findall(data)
if r:
filmbox['rottentomatoes_id'] = r[0]
if 'google video' in data: if 'google video' in data:
filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)\|') filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)\|')
if 'DEFAULTSORT' in data: if 'DEFAULTSORT' in data: