80 lines
2.1 KiB
Python
80 lines
2.1 KiB
Python
# -*- coding: utf-8 -*-
|
|
# -*- Mode: Python; -*-
|
|
# vi:si:et:sw=2:sts=2:ts=2
|
|
|
|
import utils
|
|
import StringIO
|
|
import zipfile
|
|
import re
|
|
import socket
|
|
|
|
from BeautifulSoup import BeautifulSoup
|
|
import feedparser
|
|
import chardet
|
|
|
|
def read_url(url):
|
|
t0 = socket.getdefaulttimeout()
|
|
socket.setdefaulttimeout(100)
|
|
data = utils.read_url(url)
|
|
socket.setdefaulttimeout(t0)
|
|
return data
|
|
|
|
def findSubtitlesByIMDb(imdb, parts = 1, language = "eng"):
|
|
url = "http://www.opensubtitles.org/en/search/sublanguageid-%s/subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (language, parts, imdb)
|
|
data = read_url(url)
|
|
fd = feedparser.parse(data)
|
|
os_id = None
|
|
if fd.entries:
|
|
link = fd.entries[0]['links'][0]['href']
|
|
os_id = re.compile('subtitles/(.*?)/').findall(link)
|
|
if os_id:
|
|
os_id = os_id[0]
|
|
return os_id
|
|
|
|
def getZipFileLink(opensubtitle_id):
|
|
return "http://www.opensubtitles.org/en/download/sub/%s" % opensubtitle_id
|
|
|
|
def downloadSubtitle(zip_link):
|
|
return extractSubtitles(read_url(zip_link))
|
|
|
|
def extractSubtitles(zip_data):
|
|
srts ={}
|
|
z = StringIO.StringIO()
|
|
z.write(zip_data)
|
|
zfile = zipfile.ZipFile(z)
|
|
files = zfile.namelist()
|
|
def is_not_nfo(file): return not file.endswith('nfo')
|
|
files = filter(is_not_nfo, files)
|
|
if len(files) == 1:
|
|
srts[files[0]] = zfile.read(files[0])
|
|
else:
|
|
for f in zfile.namelist():
|
|
if f.endswith('.srt'):
|
|
srts[f] = zfile.read(f)
|
|
return srts
|
|
|
|
def loadSrtUnicode(data):
|
|
encoding = chardet.detect(data)['encoding']
|
|
try:
|
|
udata = unicode(data, encoding)
|
|
except:
|
|
try:
|
|
udata = unicode(data, 'latin-1')
|
|
except:
|
|
print "failed to detect encoding, giving up"
|
|
udata = u''
|
|
return udata
|
|
|
|
def downloadSubtitleByID(opensubtitle_id):
|
|
data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
|
|
soup = BeautifulSoup(data)
|
|
srts = {}
|
|
c = 0
|
|
for a in soup('a', {'href': re.compile('download/file')}):
|
|
download_url = 'http://www.opensubtitles.org' + a['href']
|
|
file_name = a.contents[-1].split('\n')[0].strip()
|
|
data = loadSrtUnicode(read_url(download_url))
|
|
if data:
|
|
srts["%03d_%s" %(c, file_name)] = data
|
|
c += 1
|
|
return srts
|