trailer
This commit is contained in:
parent
73ec7e7aeb
commit
482599169b
2 changed files with 49 additions and 3 deletions
|
@ -110,7 +110,9 @@ class IMDb:
|
|||
self.locationUrl = "%slocations" % self.pageUrl
|
||||
self.externalreviewsSource = None
|
||||
self.externalreviewsUrl = "%sexternalreviews" % self.pageUrl
|
||||
|
||||
self.trailerSource = None
|
||||
self.trailerUrl = "%strailers" % self.pageUrl
|
||||
|
||||
def getPage(self, forcereload = False):
|
||||
if forcereload or not self.pageSource:
|
||||
self.pageSource = read_url_utf8(self.pageUrl)
|
||||
|
@ -272,6 +274,7 @@ class IMDb:
|
|||
IMDbDict['release_date'] = self.parseReleaseinfo()
|
||||
IMDbDict['business'] = self.parseBusiness()
|
||||
IMDbDict['reviews'] = self.parseExternalreviews()
|
||||
#IMDbDict['trailers'] = self.parseTrailers()
|
||||
self.IMDbDict = IMDbDict
|
||||
|
||||
if IMDbDict['episode_of']:
|
||||
|
@ -474,7 +477,7 @@ class IMDb:
|
|||
if forcereload or not self.externalreviewsSource:
|
||||
self.externalreviewsSource = read_url_utf8(self.externalreviewsUrl)
|
||||
return self.externalreviewsSource
|
||||
|
||||
|
||||
def parseExternalreviews(self):
|
||||
soup = BeautifulSoup(self.getExternalreviews())
|
||||
ol = soup('ol')
|
||||
|
@ -491,7 +494,25 @@ class IMDb:
|
|||
pass
|
||||
return ret
|
||||
return {}
|
||||
|
||||
|
||||
def getTrailers(self, forcereload = False):
|
||||
if forcereload or not self.trailerSource:
|
||||
self.trailerSource = read_url_utf8(self.trailerUrl)
|
||||
return self.trailerSource
|
||||
|
||||
def parseTrailers(self):
|
||||
ret = {}
|
||||
soup = BeautifulSoup(self.getTrailers())
|
||||
for p in soup('p'):
|
||||
if p('a') and p.firstText():
|
||||
a = p('a')[0]
|
||||
href = a['href']
|
||||
if href and href.startswith('http'):
|
||||
title = a.string
|
||||
title = title.replace('www.', '')
|
||||
ret[href] = title
|
||||
return ret
|
||||
|
||||
def guess(title, director=''):
|
||||
#FIXME: proper file -> title
|
||||
title = title.split('-')[0]
|
||||
|
|
25
scrapeit/yahootrailers.py
Normal file
25
scrapeit/yahootrailers.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -*- Mode: Python; -*-
|
||||
# vi:si:et:sw=2:sts=2:ts=2
|
||||
from urllib import quote
|
||||
import re
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
from utils import read_url, stripTags
|
||||
|
||||
|
||||
def trailerByTitle(title):
|
||||
title = title.strip()
|
||||
url = "http://movies.yahoo.com/mv/search?p=%s" % quote(title)
|
||||
data = read_url(url)
|
||||
soup = BeautifulSoup(data)
|
||||
movies = soup('a', {'href': re.compile('http://movies.yahoo.com/movie.*?')})
|
||||
if movies and movies[0].firstText() and title in movies[0].firstText():
|
||||
info = movies[0]['href']
|
||||
trailer = info.replace('/info', '/video')
|
||||
data = read_url(info)
|
||||
if trailer in data:
|
||||
return trailer
|
||||
return ''
|
||||
|
Loading…
Reference in a new issue