add wikipedia.searchByImdb
This commit is contained in:
parent
7581cf3501
commit
3f7e8a8927
2 changed files with 35 additions and 1 deletions
|
@ -199,7 +199,7 @@ class IMDb:
|
|||
title = imdbpy_utils.normalizeTitle(title[1:title.rfind('"')]) + se + title[title.rfind('"')+1:]
|
||||
else:
|
||||
title = imdbpy_utils.normalizeTitle(title[1:title.rfind('"')]) + ':' + title[title.rfind('"')+1:]
|
||||
return title
|
||||
return imdbpy_utils.normalizeTitle(title)
|
||||
|
||||
def parseYear(self):
|
||||
year = ''
|
||||
|
|
34
scrapeit/wikipedia.py
Normal file
34
scrapeit/wikipedia.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
# -*- Mode: Python; -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=2:sts=2:ts=2
|
||||
import re
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
from utils import read_url
|
||||
from imdb import IMDb
|
||||
from google import google
|
||||
|
||||
|
||||
def searchByImdb(imdb_id, title=None, director=None):
|
||||
if not title:
|
||||
i = IMDb(imdb_id)
|
||||
title = i.parseTitle()
|
||||
director = i.parseCredits()['director']
|
||||
if director:
|
||||
director = director[0]
|
||||
else:
|
||||
director = ''
|
||||
for g in google('"%s" %s site:en.wikipedia.org' % (title, director), 20):
|
||||
url = g[1]
|
||||
data = read_url(url)
|
||||
soup = BeautifulSoup(data)
|
||||
edit_url = soup('a', dict(href=re.compile('action=edit'),
|
||||
title=re.compile('You can edit this page.')))[0]['href']
|
||||
edit_url = "http://en.wikipedia.org%s" % edit_url.replace('&', '&')
|
||||
data = read_url(edit_url)
|
||||
w_imdb_id = data.find('imdb_id')
|
||||
if w_imdb_id > 0:
|
||||
if imdb_id in data[w_imdb_id:w_imdb_id+50]:
|
||||
return url
|
||||
return ''
|
Loading…
Reference in a new issue