update wikipedia movie data
This commit is contained in:
parent
bd242d9712
commit
e02769552d
1 changed files with 17 additions and 11 deletions
|
@ -1,5 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
import re
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
|
||||||
from ox.utils import json
|
from ox.utils import json
|
||||||
|
@ -52,7 +53,7 @@ def getWikiData(wikipediaUrl):
|
||||||
def getMovieData(wikipediaUrl):
|
def getMovieData(wikipediaUrl):
|
||||||
if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl)
|
if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl)
|
||||||
data = getWikiData(wikipediaUrl)
|
data = getWikiData(wikipediaUrl)
|
||||||
filmbox_data = findRe(data, '''\{\{Infobox.Film(.*?)\n\}\}''')
|
filmbox_data = findRe(data, '''\{\{[Ii]nfobox.[Ff]ilm(.*?)\n\}\}''')
|
||||||
filmbox = {}
|
filmbox = {}
|
||||||
_box = filmbox_data.strip().split('\n|')
|
_box = filmbox_data.strip().split('\n|')
|
||||||
if len(_box) == 1:
|
if len(_box) == 1:
|
||||||
|
@ -64,18 +65,23 @@ def getMovieData(wikipediaUrl):
|
||||||
if key[0] == '|':
|
if key[0] == '|':
|
||||||
key = key[1:]
|
key = key[1:]
|
||||||
value = d[1].strip()
|
value = d[1].strip()
|
||||||
filmbox[key] = value
|
filmbox[key.strip()] = value
|
||||||
if 'imdb title' in data:
|
|
||||||
filmbox['imdb_id'] = findRe(data, 'imdb title\|.*?(\d+)')
|
if 'Allmovie movie' in data:
|
||||||
elif 'imdb episode' in data:
|
filmbox['amg_id'] = findRe(data, 'Allmovie movie\|.*?(\d+)')
|
||||||
filmbox['imdb_id'] = findRe(data, 'imdb episode\|.*?(\d+)')
|
elif 'amg_id' in filmbox and filmbox['amg_id'].startswith('1:'):
|
||||||
if 'Amg movie' in data:
|
|
||||||
filmbox['amg_id'] = findRe(data, 'Amg movie\|.*?(\d+)')
|
|
||||||
if 'amg_id' in filmbox and filmbox['amg_id'].startswith('1:'):
|
|
||||||
filmbox['amg_id'] = filmbox['amg_id'][2:]
|
filmbox['amg_id'] = filmbox['amg_id'][2:]
|
||||||
|
|
||||||
if 'otten-tomatoes' in data:
|
r = re.compile('{{IMDb title\|(\d{7})', re.IGNORECASE).findall(data)
|
||||||
filmbox['rottentomatoes_id'] = findRe(data, '\{\{Rotten-tomatoes\|id=(.*?)\}\}')
|
if r:
|
||||||
|
filmbox['imdb_id'] = r[0]
|
||||||
|
r = re.compile('{{mojo title\|(.*?)\|', re.IGNORECASE).findall(data)
|
||||||
|
if r:
|
||||||
|
filmbox['mojo_id'] = r[0]
|
||||||
|
|
||||||
|
r = re.compile('{{rotten-tomatoes\|(.*?)\|', re.IGNORECASE).findall(data)
|
||||||
|
if r:
|
||||||
|
filmbox['rottentomatoes_id'] = r[0]
|
||||||
if 'google video' in data:
|
if 'google video' in data:
|
||||||
filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)\|')
|
filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)\|')
|
||||||
if 'DEFAULTSORT' in data:
|
if 'DEFAULTSORT' in data:
|
||||||
|
|
Loading…
Reference in a new issue