use direct srt links instead of zip files

This commit is contained in:
j 2007-07-30 17:58:02 +00:00
parent 7ca7d6b484
commit fb38bfc6a1

View file

@ -3,12 +3,15 @@
# vi:si:et:sw=2:sts=2:ts=2 # vi:si:et:sw=2:sts=2:ts=2
import utils import utils
import feedparser
import StringIO import StringIO
import zipfile import zipfile
import re import re
import socket import socket
from BeautifulSoup import BeautifulSoup
import feedparser
import chardet
def read_url(url): def read_url(url):
t0 = socket.getdefaulttimeout() t0 = socket.getdefaulttimeout()
socket.setdefaulttimeout(100) socket.setdefaulttimeout(100)
@ -16,7 +19,7 @@ def read_url(url):
socket.setdefaulttimeout(t0) socket.setdefaulttimeout(t0)
return data return data
def searchSubtitlesByIMDb(imdb, parts = 1, language = "eng"): def findSubtitlesByIMDb(imdb, parts = 1, language = "eng"):
url = "http://www.opensubtitles.org/en/search/sublanguageid-%s/subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (language, parts, imdb) url = "http://www.opensubtitles.org/en/search/sublanguageid-%s/subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (language, parts, imdb)
data = read_url(url) data = read_url(url)
fd = feedparser.parse(data) fd = feedparser.parse(data)
@ -50,7 +53,26 @@ def extractSubtitles(zip_data):
srts[f] = zfile.read(f) srts[f] = zfile.read(f)
return srts return srts
def loadSrtUnicode(data):
encoding = chardet.detect(data)['encoding']
try:
udata = unicode(data, encoding)
except:
try:
udata = unicode(data, 'latin-1')
except:
print "failed to detect encoding, giving up"
udata = u''
return udata
def downloadSubtitleByID(opensubtitle_id): def downloadSubtitleByID(opensubtitle_id):
zip_file = getZipFileLink(opensubtitle_id) data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
return downloadSubtitle(zip_file) soup = BeautifulSoup(data)
srts = {}
for a in soup('a', {'href': re.compile('download/file')}):
download_url = 'http://www.opensubtitles.org' + a['href']
file_name = a.contents[-1].split('\n')[0].strip()
data = loadSrtUnicode(read_url(download_url))
if data:
srts[file_name] = data
return srts