use wikipedia search stupid

This commit is contained in:
j 2007-09-16 19:00:22 +00:00
parent 9616b3477e
commit ee442c4090
1 changed files with 10 additions and 25 deletions

View File

@ -10,29 +10,14 @@ from imdb import IMDb
from google import google
def searchByImdb(imdb_id, title=None, director=None):
if not title:
i = IMDb(imdb_id)
title = i.parseTitle()
director = i.parseCredits()['director']
if director:
director = director[0]
else:
director = ''
q = u'%s %s site:en.wikipedia.org' % (title, director)
#print q.encode('utf-8')
for g in google(q, 7):
url = g[1]
data = read_url(url)
soup = BeautifulSoup(data)
edit_url = soup('a', dict(href=re.compile('action=edit'),
title=re.compile('You can edit this page.')))
if edit_url:
edit_url = edit_url[0]['href']
edit_url = "http://en.wikipedia.org%s" % edit_url.replace('&', '&')
data = read_url(edit_url)
w_imdb_id = data.find('imdb_id')
if w_imdb_id > 0:
if imdb_id in data[w_imdb_id:w_imdb_id+50]:
return url
def searchByImdb(imdb_id):
if len(imdb_id) != 7: return ''
url = "http://en.wikipedia.org/w/index.php?title=Special%3ASearch&search=imdb_id%20" + imdb_id + "&fulltext=Search"
data = read_url(url)
soup = BeautifulSoup(data)
result = soup('li', {"style":"padding-bottom: 1em;"})
if result:
url = result[0]('a')[0]['href']
url = "http://en.wikipedia.org%s" % url
return url
return ''