add wikipedia.searchByImdb

This commit is contained in:
j 2007-09-15 20:05:31 +00:00
parent 7581cf3501
commit 3f7e8a8927
2 changed files with 35 additions and 1 deletions

View file

@ -199,7 +199,7 @@ class IMDb:
title = imdbpy_utils.normalizeTitle(title[1:title.rfind('"')]) + se + title[title.rfind('"')+1:] title = imdbpy_utils.normalizeTitle(title[1:title.rfind('"')]) + se + title[title.rfind('"')+1:]
else: else:
title = imdbpy_utils.normalizeTitle(title[1:title.rfind('"')]) + ':' + title[title.rfind('"')+1:] title = imdbpy_utils.normalizeTitle(title[1:title.rfind('"')]) + ':' + title[title.rfind('"')+1:]
return title return imdbpy_utils.normalizeTitle(title)
def parseYear(self): def parseYear(self):
year = '' year = ''

34
scrapeit/wikipedia.py Normal file
View file

@ -0,0 +1,34 @@
# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import re
from BeautifulSoup import BeautifulSoup
from utils import read_url
from imdb import IMDb
from google import google
def searchByImdb(imdb_id, title=None, director=None):
if not title:
i = IMDb(imdb_id)
title = i.parseTitle()
director = i.parseCredits()['director']
if director:
director = director[0]
else:
director = ''
for g in google('"%s" %s site:en.wikipedia.org' % (title, director), 20):
url = g[1]
data = read_url(url)
soup = BeautifulSoup(data)
edit_url = soup('a', dict(href=re.compile('action=edit'),
title=re.compile('You can edit this page.')))[0]['href']
edit_url = "http://en.wikipedia.org%s" % edit_url.replace('&', '&')
data = read_url(edit_url)
w_imdb_id = data.find('imdb_id')
if w_imdb_id > 0:
if imdb_id in data[w_imdb_id:w_imdb_id+50]:
return url
return ''