python-ox/ox/web/opensubtitles.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re

from ox.cache import read_url
from ox import find_re, strip_tags
from ox.iso import langCode2To3, langTo3Code

def find_subtitles(imdb, parts = 1, language = "eng"):
    import feedparser
    if len(language) == 2:
        language = langCode2To3(language)
    elif len(language) != 3:
        language = langTo3Code(language)
    url = "http://www.opensubtitles.org/en/search/"
    if language:
        url += "sublanguageid-%s/" % language
    url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb)
    data = read_url(url)
    if "title>opensubtitles.com - search results</title" in data:
        fd = feedparser.parse(data)
        opensubtitleId = None
        if fd.entries:
            link = fd.entries[0]['links'][0]['href']
            opensubtitleId = re.compile('subtitles/(.*?)/').findall(link)
            if opensubtitleId:
                opensubtitleId = opensubtitleId[0]
    else:
        opensubtitleId = find_re(data, '/en/subtitles/(.*?)/')
    return opensubtitleId

def download_subtitle(opensubtitle_id):
    srts = {}
    data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
    reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
    for f in re.compile(reg_exp, re.DOTALL).findall(data):
        name = strip_tags(f[1]).split('\n')[0]
        url = "http://www.opensubtitles.com%s" % f[0]
        srts[name] = read_url(url, unicode=True)
    return srts
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`# -- coding: utf-8 --`
			`# vi:si:et:sw=4:sts=4:ts=4`
			`import re`

net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`from ox.cache import read_url`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`from ox import find_re, strip_tags`
fix some failing tests 2012-09-09 17:28:11 +00:00			`from ox.iso import langCode2To3, langTo3Code`
add ox.web to this repos 2010-07-07 23:25:57 +00:00
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def find_subtitles(imdb, parts = 1, language = "eng"):`
remove feedparser 2018-01-14 15:47:15 +00:00			`import feedparser`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`if len(language) == 2:`
			`language = langCode2To3(language)`
			`elif len(language) != 3:`
			`language = langTo3Code(language)`
			`url = "http://www.opensubtitles.org/en/search/"`
			`if language:`
			`url += "sublanguageid-%s/" % language`
			`url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb)`
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`data = read_url(url)`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`if "title>opensubtitles.com - search results</title" in data:`
			`fd = feedparser.parse(data)`
			`opensubtitleId = None`
			`if fd.entries:`
			`link = fd.entries[0]['links'][0]['href']`
			`opensubtitleId = re.compile('subtitles/(.*?)/').findall(link)`
			`if opensubtitleId:`
			`opensubtitleId = opensubtitleId[0]`
			`else:`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`opensubtitleId = find_re(data, '/en/subtitles/(.*?)/')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`return opensubtitleId`

ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def download_subtitle(opensubtitle_id):`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`srts = {}`
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`reg_exp = 'href="(/en/download/file/.?)">(.?)</a>'`
			`for f in re.compile(reg_exp, re.DOTALL).findall(data):`
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`name = strip_tags(f[1]).split('\n')[0]`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`url = "http://www.opensubtitles.com%s" % f[0]`
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`srts[name] = read_url(url, unicode=True)`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`return srts`