update ox.web.youtube
This commit is contained in:
parent
1c871f4d31
commit
075e735cd1
1 changed files with 42 additions and 16 deletions
|
@ -5,19 +5,26 @@ import urllib2
|
||||||
import cookielib
|
import cookielib
|
||||||
import re
|
import re
|
||||||
from xml.dom.minidom import parseString
|
from xml.dom.minidom import parseString
|
||||||
|
import json
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
import ox
|
import ox
|
||||||
from ox.cache import read_url, cache_timeout
|
from ox.cache import read_url, cache_timeout
|
||||||
|
|
||||||
|
|
||||||
|
def get_id(url):
|
||||||
|
match = re.compile('v=(.+?)($|&)').findall(url)
|
||||||
|
if match:
|
||||||
|
return match[0][0]
|
||||||
|
|
||||||
|
def get_url(id):
|
||||||
|
return 'http://www.youtube.com/watch?v=%s' % id
|
||||||
|
|
||||||
def video_url(youtubeId, format='mp4', timeout=cache_timeout):
|
def video_url(youtubeId, format='mp4', timeout=cache_timeout):
|
||||||
"""
|
"""
|
||||||
youtubeId - if of video
|
youtubeId - if of video
|
||||||
format - video format, options: webm, 1080p, 720p, mp4, high
|
format - video format, options: webm, 1080p, 720p, mp4, high
|
||||||
"""
|
"""
|
||||||
def get_url(stream):
|
|
||||||
return '%s&signature=%s' % (stream['url'], stream['sig'])
|
|
||||||
fmt = None
|
fmt = None
|
||||||
if format == '4k':
|
if format == '4k':
|
||||||
fmt=38
|
fmt=38
|
||||||
|
@ -31,11 +38,11 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout):
|
||||||
fmt=35
|
fmt=35
|
||||||
elif format == 'webm':
|
elif format == 'webm':
|
||||||
streams = videos(youtubeId, 'webm')
|
streams = videos(youtubeId, 'webm')
|
||||||
return get_url(streams[max(streams.keys())])
|
return streams[max(streams.keys())]['stream_url']
|
||||||
|
|
||||||
streams = videos(youtubeId)
|
streams = videos(youtubeId)
|
||||||
if str(fmt) in streams:
|
if str(fmt) in streams:
|
||||||
return get_url(streams[str(fmt)])
|
return streams[str(fmt)]['stream_url']
|
||||||
|
|
||||||
def find(query, max_results=10, offset=1, orderBy='relevance'):
|
def find(query, max_results=10, offset=1, orderBy='relevance'):
|
||||||
query = quote(query)
|
query = quote(query)
|
||||||
|
@ -54,10 +61,15 @@ def find(query, max_results=10, offset=1, orderBy='relevance'):
|
||||||
|
|
||||||
def info(id):
|
def info(id):
|
||||||
info = {}
|
info = {}
|
||||||
|
if id.startswith('http'):
|
||||||
|
id = get_id(id)
|
||||||
|
if not id:
|
||||||
|
return info
|
||||||
url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id
|
url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id
|
||||||
data = read_url(url)
|
data = read_url(url)
|
||||||
xml = parseString(data)
|
xml = parseString(data)
|
||||||
info['url'] = 'http://www.youtube.com/watch?v=%s' % id
|
info['id'] = id
|
||||||
|
info['url'] = get_url(id)
|
||||||
info['title'] = xml.getElementsByTagName('title')[0].firstChild.data
|
info['title'] = xml.getElementsByTagName('title')[0].firstChild.data
|
||||||
info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data
|
info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data
|
||||||
info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0]
|
info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0]
|
||||||
|
@ -70,8 +82,7 @@ def info(id):
|
||||||
k = xml.getElementsByTagName('media:keywords')[0].firstChild
|
k = xml.getElementsByTagName('media:keywords')[0].firstChild
|
||||||
if k:
|
if k:
|
||||||
info['keywords'] = k.data.split(', ')
|
info['keywords'] = k.data.split(', ')
|
||||||
url = "http://www.youtube.com/watch?v=%s" % id
|
data = read_url(info['url'])
|
||||||
data = read_url(url)
|
|
||||||
match = re.compile('<h4>License:</h4>(.*?)</p>', re.DOTALL).findall(data)
|
match = re.compile('<h4>License:</h4>(.*?)</p>', re.DOTALL).findall(data)
|
||||||
if match:
|
if match:
|
||||||
info['license'] = match[0].strip()
|
info['license'] = match[0].strip()
|
||||||
|
@ -113,12 +124,15 @@ def videos(id, format=''):
|
||||||
data = read_url(url)
|
data = read_url(url)
|
||||||
match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data)
|
match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data)
|
||||||
streams = {}
|
streams = {}
|
||||||
|
if match:
|
||||||
for x in match[0].split(','):
|
for x in match[0].split(','):
|
||||||
stream = {}
|
stream = {}
|
||||||
for s in x.split('\\u0026'):
|
for s in x.split('\\u0026'):
|
||||||
key, value = s.split('=')
|
key, value = s.split('=')
|
||||||
value = unquote_plus(value)
|
value = unquote_plus(value)
|
||||||
stream[key] = value
|
stream[key] = value
|
||||||
|
if 'url' in stream and 'sig' in stream:
|
||||||
|
stream['stream_url'] = '%s&signature=%s' % (stream['url'], stream['sig'])
|
||||||
if not stream_type or stream['type'].startswith(stream_type):
|
if not stream_type or stream['type'].startswith(stream_type):
|
||||||
streams[stream['itag']] = stream
|
streams[stream['itag']] = stream
|
||||||
return streams
|
return streams
|
||||||
|
@ -172,3 +186,15 @@ def download_webm(id, filename):
|
||||||
f.close()
|
f.close()
|
||||||
u.close()
|
u.close()
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
def get_config(id):
|
||||||
|
if id.startswith('http'):
|
||||||
|
url = id
|
||||||
|
else:
|
||||||
|
url = get_url(id)
|
||||||
|
data = read_url(url)
|
||||||
|
match = re.compile('ytplayer.config = (.*?);<').findall(data)
|
||||||
|
if match:
|
||||||
|
config = json.load(match[0])
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue