diff --git a/ox/format.py b/ox/format.py index 7a1c78e..b7238f9 100644 --- a/ox/format.py +++ b/ox/format.py @@ -18,7 +18,7 @@ def toAZ(num): 'FOO' >>> toAZ(1234567890) - 'CYWOQVK' + 'CYWOQVJ' """ if num < 1: raise ValueError, "must supply a positive integer" digits = string.letters[26:] @@ -74,7 +74,7 @@ def to26(q): def from26(q): """ Converts an base 26 string to an integer - >>> from32('A') + >>> from26('A') 0 """ base26 = string.letters[26:] diff --git a/ox/html.py b/ox/html.py index 20aeb45..4824002 100644 --- a/ox/html.py +++ b/ox/html.py @@ -142,9 +142,9 @@ def decode_html(html): >>> decode_html('me & you and $&%') u'me & you and $&%' >>> decode_html('€') - u'€' + u'\u20ac' >>> decode_html('Anniversary of Daoud's Republic') - u'Anniversary of Daoud's Republic' + u"Anniversary of Daoud's Republic" """ if type(html) != unicode: html = unicode(html)[:] @@ -194,33 +194,33 @@ def escape_html(value): def sanitize_html(html, tags=None, wikilinks=False): ''' >>> sanitize_html('http://foo.com, bar') - 'http://foo.com, bar' + u'http://foo.com, bar' >>> sanitize_html('http://foo.com/foobar?foo, bar') - 'http://foo.com/foobar?foo, bar' + u'http://foo.com/foobar?foo, bar' >>> sanitize_html('(see: www.foo.com)') - '(see: www.foo.com)' + u'(see: www.foo.com)' >>> sanitize_html('foo@bar.com') - 'foo@bar.com' + u'foo@bar.com' >>> sanitize_html(sanitize_html('foo@bar.com')) - 'foo@bar.com' + u'foo@bar.com' >>> sanitize_html('foo') - 'foo' + u'foo' >>> sanitize_html('foo') - '<a href="javascript:alert()">foo' + u'<a href="javascript:alert()">foo' >>> sanitize_html('[http://foo.com foo]') - 'foo' + u'foo' >>> sanitize_html('foo') - '
foo
' + u'
foo
' >>> sanitize_html('') - '<script>alert()</script>' - >>> sanitize_html('\'foo\' < \'bar\' && "foo" > "bar"') - '\'foo\' < \'bar\' && "foo" > "bar"' + u'<script>alert()</script>' + >>> sanitize_html("'foo' < 'bar' && \"foo\" > \"bar\"") + u'\'foo\' < \'bar\' && "foo" > "bar"' >>> sanitize_html('foo') - 'foo' + u'foo' >>> sanitize_html('foo') - 'foo' + u'foo' >>> sanitize_html('Anniversary of Daoud's Republic') - 'Anniversary of Daoud's Republic' + u"Anniversary of Daoud's Republic" ''' if not tags: tags = [ diff --git a/ox/movie.py b/ox/movie.py index 2a47f12..efeec5d 100644 --- a/ox/movie.py +++ b/ox/movie.py @@ -172,20 +172,25 @@ def parse_item_files(files): def parse_path(path): ''' # all keys - >>> parse_path('F/Frost, Mark; Lynch, David/Twin Peaks (1991)/Twin Peaks (S01E01) Pilot.European Version.Part 1.Welcome to Twin Peaks.en.fr.MPEG')['path'] + >>> parse_path('F/Frost, Mark; Lynch, David/Twin Peaks (1991)/Twin Peaks (S01E01) Pilot.European Version.Part 1.Welcome to Twin Peaks.en.fr.MPEG')['normalizedPath'] 'F/Frost, Mark; Lynch, David/Twin Peaks (1991)/Twin Peaks (S01E00) Pilot.European Version.Part 1.Welcome to Twin Peaks.en.fr.mpg' + # pop directory title off file name - >>> parse_path('U/Unknown Director/www.xxx.com.._/www.xxx.com....Directors\'s Cut.avi')['version'] - 'Director\'s Cut' + >>> parse_path("U/Unknown Director/www.xxx.com.._/www.xxx.com....Director's Cut.avi")['version'] + "Director's Cut" + # handle dots - >>> parse_path('U/Unknown Director/Unknown Title (2000)/... Mr. .com....Director\'s Cut.srt')['version'] - 'Director\'s Cut' + >>> parse_path("U/Unknown Director/Unknown Title (2000)/... Mr. .com....Director's Cut.srt")['version'] + "Director's Cut" + # multiple years, season zero, multiple episodes, dots in episode title and part title - >>> parse_path('G/Groening, Matt/The Simpsons (1989-2012)/The Simpsons (S00E01-02) D.I.Y..Uncensored Version.Part 1.D.I.Y..de.avi')['path'] + >>> parse_path('G/Groening, Matt/The Simpsons (1989-2012)/The Simpsons (S00E01-02) D.I.Y..Uncensored Version.Part 1.D.I.Y..de.avi')['normalizedPath'] 'G/Groening, Matt/The Simpsons (1989-2012)/The Simpsons (S01E01+02) D.I.Y..Uncensored Version.Part 1.D.I.Y..de.avi' + # handle underscores >>> parse_path('U/Unknown Director/_com_ 1_0 _ NaN.._/_com_ 1_0 _ NaN....avi')['title'] '.com: 1/0 / NaN...' + # TODO: '.com.avi' ''' def parse_title(string): diff --git a/ox/net.py b/ox/net.py index 51b5318..dd57e04 100644 --- a/ox/net.py +++ b/ox/net.py @@ -35,7 +35,7 @@ def exists(url, data=None, headers=DEFAULT_HEADERS): return True return False -def headers(url, data=None, headers=DEFAULT_HEADERS): +def get_headers(url, data=None, headers=DEFAULT_HEADERS): try: f = open_url(url, data, headers) f.headers['Status'] = "%s" % f.code diff --git a/ox/text.py b/ox/text.py index 860744c..d2d09a8 100644 --- a/ox/text.py +++ b/ox/text.py @@ -405,9 +405,9 @@ def truncate_string(string, length, padding='...', position='right'): def truncate_words(s, num): """Truncates a string after a certain number of chacters, but ends with a word - >>> truncate_string('Truncates a string after a certain number of chacters, but ends with a word', 23) + >>> truncate_words('Truncates a string after a certain number of chacters, but ends with a word', 23) 'Truncates a string...' - >>> truncate_string('Truncates a string', 23) + >>> truncate_words('Truncates a string', 23) 'Truncates a string' """ diff --git a/ox/web/criterion.py b/ox/web/criterion.py index f7f21d1..d72f7dc 100644 --- a/ox/web/criterion.py +++ b/ox/web/criterion.py @@ -17,14 +17,14 @@ def get_url(id): def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): ''' - >>> get_data('1333')['imdbId'] + >>> get_data('1333').get('imdbId') u'0060304' >>> get_data('236')['posters'][0] - u'http://criterion_production.s3.amazonaws.com/release_images/1586/ThirdManReplace.jpg' + u'http://s3.amazonaws.com/criterion-production/release_images/1586/ThirdManReplace.jpg' >>> get_data('786')['posters'][0] - u'http://criterion_production.s3.amazonaws.com/product_images/185/343_box_348x490.jpg' + u'http://s3.amazonaws.com/criterion-production/product_images/185/343_box_348x490.jpg' ''' data = { "url": get_url(id) @@ -60,6 +60,7 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): data["posters"] = [result.replace("_w100", "")] else: data["posters"] = [] + data['posters'] = [re.sub('(\?\d+)$', '', p) for p in data['posters']] result = find_re(html, "\"Film>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0] + >>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0] 'http://www.dailymotion.com/cdn/FLV-320x240/video/x3opar_priere-pour-refuznik-1-jean-luc-god_shortfilms.flv' - >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?auth')[0] + >>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?auth')[0] 'http://www.dailymotion.com/cdn/FLV-320x240/video/x3ou94_priere-pour-refuznik-2-jean-luc-god_shortfilms.flv' ''' data = read_url(url) @@ -19,4 +19,3 @@ def get_video_url(url): v = unquote(v).split('@@')[0] return v return '' - diff --git a/ox/web/mininova.py b/ox/web/mininova.py index 8bba707..eb78cab 100644 --- a/ox/web/mininova.py +++ b/ox/web/mininova.py @@ -6,7 +6,7 @@ import socket from urllib import quote from ox.cache import read_url -from ox import find_re, cache, strip_tags, decode_html, getTorrentInfo, int_value, normalize_newlines +from ox import find_re, cache, strip_tags, decode_html, get_torrent_info, int_value, normalize_newlines from ox.normalize import normalize_imdbid import ox @@ -85,7 +85,7 @@ def get_data(mininovaId): if torrent['description']: torrent['description'] = normalize_newlines(decode_html(strip_tags(torrent['description']))).strip() t = read_url(torrent[u'torrent_link']) - torrent[u'torrent_info'] = getTorrentInfo(t) + torrent[u'torrent_info'] = get_torrent_info(t) return torrent class Mininova(Torrent): diff --git a/ox/web/opensubtitles.py b/ox/web/opensubtitles.py index e3b5f0b..7684402 100644 --- a/ox/web/opensubtitles.py +++ b/ox/web/opensubtitles.py @@ -5,7 +5,7 @@ import re import feedparser from ox.cache import read_url from ox import find_re, strip_tags -from ox import langCode2To3, langTo3Code +from ox.iso import langCode2To3, langTo3Code def find_subtitles(imdb, parts = 1, language = "eng"): if len(language) == 2: diff --git a/ox/web/thepiratebay.py b/ox/web/thepiratebay.py index 3e8981e..b751384 100644 --- a/ox/web/thepiratebay.py +++ b/ox/web/thepiratebay.py @@ -6,7 +6,7 @@ import socket from urllib import quote, urlencode from urllib2 import URLError -from ox import find_re, cache, strip_tags, decode_html, getTorrentInfo, normalize_newlines +from ox import find_re, cache, strip_tags, decode_html, get_torrent_info, normalize_newlines from ox.normalize import normalize_imdbid import ox @@ -94,8 +94,8 @@ def get_data(piratebayId): torrent[u'description'] = find_re(data, '
(.*?)
') if torrent[u'description']: torrent['description'] = normalize_newlines(decode_html(strip_tags(torrent['description']))).strip() - t = _read_url(torrent[u'torrent_link']) - torrent[u'torrent_info'] = getTorrentInfo(t) + t = read_url(torrent[u'torrent_link']) + torrent[u'torrent_info'] = get_torrent_info(t) return torrent class Thepiratebay(Torrent):