update ox.web.youtube
This commit is contained in:
parent
1c871f4d31
commit
075e735cd1
1 changed files with 42 additions and 16 deletions
|
@ -5,19 +5,26 @@ import urllib2
|
|||
import cookielib
|
||||
import re
|
||||
from xml.dom.minidom import parseString
|
||||
import json
|
||||
|
||||
import feedparser
|
||||
import ox
|
||||
from ox.cache import read_url, cache_timeout
|
||||
|
||||
|
||||
def get_id(url):
|
||||
match = re.compile('v=(.+?)($|&)').findall(url)
|
||||
if match:
|
||||
return match[0][0]
|
||||
|
||||
def get_url(id):
|
||||
return 'http://www.youtube.com/watch?v=%s' % id
|
||||
|
||||
def video_url(youtubeId, format='mp4', timeout=cache_timeout):
|
||||
"""
|
||||
youtubeId - if of video
|
||||
format - video format, options: webm, 1080p, 720p, mp4, high
|
||||
"""
|
||||
def get_url(stream):
|
||||
return '%s&signature=%s' % (stream['url'], stream['sig'])
|
||||
fmt = None
|
||||
if format == '4k':
|
||||
fmt=38
|
||||
|
@ -31,11 +38,11 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout):
|
|||
fmt=35
|
||||
elif format == 'webm':
|
||||
streams = videos(youtubeId, 'webm')
|
||||
return get_url(streams[max(streams.keys())])
|
||||
return streams[max(streams.keys())]['stream_url']
|
||||
|
||||
streams = videos(youtubeId)
|
||||
if str(fmt) in streams:
|
||||
return get_url(streams[str(fmt)])
|
||||
return streams[str(fmt)]['stream_url']
|
||||
|
||||
def find(query, max_results=10, offset=1, orderBy='relevance'):
|
||||
query = quote(query)
|
||||
|
@ -54,10 +61,15 @@ def find(query, max_results=10, offset=1, orderBy='relevance'):
|
|||
|
||||
def info(id):
|
||||
info = {}
|
||||
if id.startswith('http'):
|
||||
id = get_id(id)
|
||||
if not id:
|
||||
return info
|
||||
url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id
|
||||
data = read_url(url)
|
||||
xml = parseString(data)
|
||||
info['url'] = 'http://www.youtube.com/watch?v=%s' % id
|
||||
info['id'] = id
|
||||
info['url'] = get_url(id)
|
||||
info['title'] = xml.getElementsByTagName('title')[0].firstChild.data
|
||||
info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data
|
||||
info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0]
|
||||
|
@ -70,14 +82,13 @@ def info(id):
|
|||
k = xml.getElementsByTagName('media:keywords')[0].firstChild
|
||||
if k:
|
||||
info['keywords'] = k.data.split(', ')
|
||||
url = "http://www.youtube.com/watch?v=%s" % id
|
||||
data = read_url(url)
|
||||
data = read_url(info['url'])
|
||||
match = re.compile('<h4>License:</h4>(.*?)</p>', re.DOTALL).findall(data)
|
||||
if match:
|
||||
info['license'] = match[0].strip()
|
||||
info['license'] = re.sub('<.+?>', '', info['license']).strip()
|
||||
|
||||
url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1"%id
|
||||
url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1" % id
|
||||
data = read_url(url)
|
||||
xml = parseString(data)
|
||||
languages = [t.getAttribute('lang_code') for t in xml.getElementsByTagName('track')]
|
||||
|
@ -113,12 +124,15 @@ def videos(id, format=''):
|
|||
data = read_url(url)
|
||||
match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data)
|
||||
streams = {}
|
||||
if match:
|
||||
for x in match[0].split(','):
|
||||
stream = {}
|
||||
for s in x.split('\\u0026'):
|
||||
key, value = s.split('=')
|
||||
value = unquote_plus(value)
|
||||
stream[key] = value
|
||||
if 'url' in stream and 'sig' in stream:
|
||||
stream['stream_url'] = '%s&signature=%s' % (stream['url'], stream['sig'])
|
||||
if not stream_type or stream['type'].startswith(stream_type):
|
||||
streams[stream['itag']] = stream
|
||||
return streams
|
||||
|
@ -172,3 +186,15 @@ def download_webm(id, filename):
|
|||
f.close()
|
||||
u.close()
|
||||
return filename
|
||||
|
||||
def get_config(id):
|
||||
if id.startswith('http'):
|
||||
url = id
|
||||
else:
|
||||
url = get_url(id)
|
||||
data = read_url(url)
|
||||
match = re.compile('ytplayer.config = (.*?);<').findall(data)
|
||||
if match:
|
||||
config = json.load(match[0])
|
||||
return config
|
||||
|
||||
|
|
Loading…
Reference in a new issue