diff --git a/ox/web/youtube.py b/ox/web/youtube.py index 3d0f9d4..7ab45b8 100644 --- a/ox/web/youtube.py +++ b/ox/web/youtube.py @@ -5,19 +5,26 @@ import urllib2 import cookielib import re from xml.dom.minidom import parseString +import json import feedparser import ox from ox.cache import read_url, cache_timeout +def get_id(url): + match = re.compile('v=(.+?)($|&)').findall(url) + if match: + return match[0][0] + +def get_url(id): + return 'http://www.youtube.com/watch?v=%s' % id + def video_url(youtubeId, format='mp4', timeout=cache_timeout): """ youtubeId - if of video format - video format, options: webm, 1080p, 720p, mp4, high """ - def get_url(stream): - return '%s&signature=%s' % (stream['url'], stream['sig']) fmt = None if format == '4k': fmt=38 @@ -31,11 +38,11 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout): fmt=35 elif format == 'webm': streams = videos(youtubeId, 'webm') - return get_url(streams[max(streams.keys())]) + return streams[max(streams.keys())]['stream_url'] streams = videos(youtubeId) if str(fmt) in streams: - return get_url(streams[str(fmt)]) + return streams[str(fmt)]['stream_url'] def find(query, max_results=10, offset=1, orderBy='relevance'): query = quote(query) @@ -54,10 +61,15 @@ def find(query, max_results=10, offset=1, orderBy='relevance'): def info(id): info = {} + if id.startswith('http'): + id = get_id(id) + if not id: + return info url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id data = read_url(url) xml = parseString(data) - info['url'] = 'http://www.youtube.com/watch?v=%s' % id + info['id'] = id + info['url'] = get_url(id) info['title'] = xml.getElementsByTagName('title')[0].firstChild.data info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0] @@ -70,14 +82,13 @@ def info(id): k = xml.getElementsByTagName('media:keywords')[0].firstChild if k: info['keywords'] = k.data.split(', ') - url = "http://www.youtube.com/watch?v=%s" % id - data = read_url(url) + data = read_url(info['url']) match = re.compile('