diff --git a/ox/web/youtube.py b/ox/web/youtube.py index 3d0f9d4..7ab45b8 100644 --- a/ox/web/youtube.py +++ b/ox/web/youtube.py @@ -5,19 +5,26 @@ import urllib2 import cookielib import re from xml.dom.minidom import parseString +import json import feedparser import ox from ox.cache import read_url, cache_timeout +def get_id(url): + match = re.compile('v=(.+?)($|&)').findall(url) + if match: + return match[0][0] + +def get_url(id): + return 'http://www.youtube.com/watch?v=%s' % id + def video_url(youtubeId, format='mp4', timeout=cache_timeout): """ youtubeId - if of video format - video format, options: webm, 1080p, 720p, mp4, high """ - def get_url(stream): - return '%s&signature=%s' % (stream['url'], stream['sig']) fmt = None if format == '4k': fmt=38 @@ -31,11 +38,11 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout): fmt=35 elif format == 'webm': streams = videos(youtubeId, 'webm') - return get_url(streams[max(streams.keys())]) + return streams[max(streams.keys())]['stream_url'] streams = videos(youtubeId) if str(fmt) in streams: - return get_url(streams[str(fmt)]) + return streams[str(fmt)]['stream_url'] def find(query, max_results=10, offset=1, orderBy='relevance'): query = quote(query) @@ -54,10 +61,15 @@ def find(query, max_results=10, offset=1, orderBy='relevance'): def info(id): info = {} + if id.startswith('http'): + id = get_id(id) + if not id: + return info url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id data = read_url(url) xml = parseString(data) - info['url'] = 'http://www.youtube.com/watch?v=%s' % id + info['id'] = id + info['url'] = get_url(id) info['title'] = xml.getElementsByTagName('title')[0].firstChild.data info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0] @@ -70,14 +82,13 @@ def info(id): k = xml.getElementsByTagName('media:keywords')[0].firstChild if k: info['keywords'] = k.data.split(', ') - url = "http://www.youtube.com/watch?v=%s" % id - data = read_url(url) + data = read_url(info['url']) match = re.compile('

License:

(.*?)

', re.DOTALL).findall(data) if match: info['license'] = match[0].strip() info['license'] = re.sub('<.+?>', '', info['license']).strip() - url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1"%id + url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1" % id data = read_url(url) xml = parseString(data) languages = [t.getAttribute('lang_code') for t in xml.getElementsByTagName('track')] @@ -113,14 +124,17 @@ def videos(id, format=''): data = read_url(url) match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data) streams = {} - for x in match[0].split(','): - stream = {} - for s in x.split('\\u0026'): - key, value = s.split('=') - value = unquote_plus(value) - stream[key] = value - if not stream_type or stream['type'].startswith(stream_type): - streams[stream['itag']] = stream + if match: + for x in match[0].split(','): + stream = {} + for s in x.split('\\u0026'): + key, value = s.split('=') + value = unquote_plus(value) + stream[key] = value + if 'url' in stream and 'sig' in stream: + stream['stream_url'] = '%s&signature=%s' % (stream['url'], stream['sig']) + if not stream_type or stream['type'].startswith(stream_type): + streams[stream['itag']] = stream return streams def playlist(url): @@ -172,3 +186,15 @@ def download_webm(id, filename): f.close() u.close() return filename + +def get_config(id): + if id.startswith('http'): + url = id + else: + url = get_url(id) + data = read_url(url) + match = re.compile('ytplayer.config = (.*?);<').findall(data) + if match: + config = json.load(match[0]) + return config +