update ox.web.youtube

This commit is contained in:
j 2014-02-19 14:09:54 +05:30
parent 1c871f4d31
commit 075e735cd1

View file

@ -5,19 +5,26 @@ import urllib2
import cookielib
import re
from xml.dom.minidom import parseString
import json
import feedparser
import ox
from ox.cache import read_url, cache_timeout
def get_id(url):
match = re.compile('v=(.+?)($|&)').findall(url)
if match:
return match[0][0]
def get_url(id):
return 'http://www.youtube.com/watch?v=%s' % id
def video_url(youtubeId, format='mp4', timeout=cache_timeout):
"""
youtubeId - if of video
format - video format, options: webm, 1080p, 720p, mp4, high
"""
def get_url(stream):
return '%s&signature=%s' % (stream['url'], stream['sig'])
fmt = None
if format == '4k':
fmt=38
@ -31,11 +38,11 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout):
fmt=35
elif format == 'webm':
streams = videos(youtubeId, 'webm')
return get_url(streams[max(streams.keys())])
return streams[max(streams.keys())]['stream_url']
streams = videos(youtubeId)
if str(fmt) in streams:
return get_url(streams[str(fmt)])
return streams[str(fmt)]['stream_url']
def find(query, max_results=10, offset=1, orderBy='relevance'):
query = quote(query)
@ -54,10 +61,15 @@ def find(query, max_results=10, offset=1, orderBy='relevance'):
def info(id):
info = {}
if id.startswith('http'):
id = get_id(id)
if not id:
return info
url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id
data = read_url(url)
xml = parseString(data)
info['url'] = 'http://www.youtube.com/watch?v=%s' % id
info['id'] = id
info['url'] = get_url(id)
info['title'] = xml.getElementsByTagName('title')[0].firstChild.data
info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data
info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0]
@ -70,14 +82,13 @@ def info(id):
k = xml.getElementsByTagName('media:keywords')[0].firstChild
if k:
info['keywords'] = k.data.split(', ')
url = "http://www.youtube.com/watch?v=%s" % id
data = read_url(url)
data = read_url(info['url'])
match = re.compile('<h4>License:</h4>(.*?)</p>', re.DOTALL).findall(data)
if match:
info['license'] = match[0].strip()
info['license'] = re.sub('<.+?>', '', info['license']).strip()
url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1"%id
url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1" % id
data = read_url(url)
xml = parseString(data)
languages = [t.getAttribute('lang_code') for t in xml.getElementsByTagName('track')]
@ -113,12 +124,15 @@ def videos(id, format=''):
data = read_url(url)
match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data)
streams = {}
if match:
for x in match[0].split(','):
stream = {}
for s in x.split('\\u0026'):
key, value = s.split('=')
value = unquote_plus(value)
stream[key] = value
if 'url' in stream and 'sig' in stream:
stream['stream_url'] = '%s&signature=%s' % (stream['url'], stream['sig'])
if not stream_type or stream['type'].startswith(stream_type):
streams[stream['itag']] = stream
return streams
@ -172,3 +186,15 @@ def download_webm(id, filename):
f.close()
u.close()
return filename
def get_config(id):
if id.startswith('http'):
url = id
else:
url = get_url(id)
data = read_url(url)
match = re.compile('ytplayer.config = (.*?);<').findall(data)
if match:
config = json.load(match[0])
return config