new imdb title code

This commit is contained in:
j 2007-05-22 18:01:28 +00:00
parent 84dc6241c0
commit 3942d76b6e
2 changed files with 18 additions and 5 deletions

View file

@ -157,11 +157,16 @@ class IMDb:
#Title, Year #Title, Year
title = u'' title = u''
year = u'' year = u''
flat_data = data.replace('\n', '').replace('\r', '') soup = BeautifulSoup(data)
html_title = re.compile('<strong class="title">(.*?) <small>\(<a href="/Sections/Years/(.*?)">').findall(flat_data) html_title = soup('div', {'id': 'tn15title'})
if html_title: html_title = html_title[0]('h1')
if html_title: html_title = html_title[0].contents
if html_title: if html_title:
title = html_title[0][0] title = html_title[0]
IMDbDict['year'] = html_title[0][1] year = re.compile('(\d\d\d\d)').findall(str(html_title[1]))
if year: year = year[0]
else: year = ''
IMDbDict['year'] = year
IMDbDict['title'] = stripTags(title).strip() IMDbDict['title'] = stripTags(title).strip()
else: else:
title = _getTerm(data, '<title>(.*?)</title>') title = _getTerm(data, '<title>(.*?)</title>')

View file

@ -2,12 +2,20 @@
# -*- Mode: Python; -*- # -*- Mode: Python; -*-
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=2:sts=2:ts=2
from utils import read_url import utils
import feedparser import feedparser
import StringIO import StringIO
import zipfile import zipfile
import re import re
import socket
def read_url(url):
t0 = socket.getdefaulttimeout()
socket.setdefaulttimeout(100)
data = utils.read_url(url)
socket.setdefaulttimeout(t0)
return data
def searchSubtitlesByIMDb(imdb, parts = 1, language = "eng"): def searchSubtitlesByIMDb(imdb, parts = 1, language = "eng"):
url = "http://www.opensubtitles.org/en/search/sublanguageid-%s/subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (language, parts, imdb) url = "http://www.opensubtitles.org/en/search/sublanguageid-%s/subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (language, parts, imdb)
data = read_url(url) data = read_url(url)