use direct srt links instead of zip files

This commit is contained in:
j 2007-07-30 17:58:02 +00:00
parent 7ca7d6b484
commit fb38bfc6a1
1 changed files with 27 additions and 5 deletions

View File

@ -3,12 +3,15 @@
# vi:si:et:sw=2:sts=2:ts=2
import utils
import feedparser
import StringIO
import zipfile
import re
import socket
from BeautifulSoup import BeautifulSoup
import feedparser
import chardet
def read_url(url):
t0 = socket.getdefaulttimeout()
socket.setdefaulttimeout(100)
@ -16,7 +19,7 @@ def read_url(url):
socket.setdefaulttimeout(t0)
return data
def searchSubtitlesByIMDb(imdb, parts = 1, language = "eng"):
def findSubtitlesByIMDb(imdb, parts = 1, language = "eng"):
url = "http://www.opensubtitles.org/en/search/sublanguageid-%s/subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (language, parts, imdb)
data = read_url(url)
fd = feedparser.parse(data)
@ -50,7 +53,26 @@ def extractSubtitles(zip_data):
srts[f] = zfile.read(f)
return srts
def loadSrtUnicode(data):
encoding = chardet.detect(data)['encoding']
try:
udata = unicode(data, encoding)
except:
try:
udata = unicode(data, 'latin-1')
except:
print "failed to detect encoding, giving up"
udata = u''
return udata
def downloadSubtitleByID(opensubtitle_id):
zip_file = getZipFileLink(opensubtitle_id)
return downloadSubtitle(zip_file)
data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
soup = BeautifulSoup(data)
srts = {}
for a in soup('a', {'href': re.compile('download/file')}):
download_url = 'http://www.opensubtitles.org' + a['href']
file_name = a.contents[-1].split('\n')[0].strip()
data = loadSrtUnicode(read_url(download_url))
if data:
srts[file_name] = data
return srts