update ox.web.youtube

This commit is contained in:
j 2014-02-19 14:09:54 +05:30
parent 1c871f4d31
commit 075e735cd1

View file

@ -5,19 +5,26 @@ import urllib2
import cookielib import cookielib
import re import re
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
import json
import feedparser import feedparser
import ox import ox
from ox.cache import read_url, cache_timeout from ox.cache import read_url, cache_timeout
def get_id(url):
match = re.compile('v=(.+?)($|&)').findall(url)
if match:
return match[0][0]
def get_url(id):
return 'http://www.youtube.com/watch?v=%s' % id
def video_url(youtubeId, format='mp4', timeout=cache_timeout): def video_url(youtubeId, format='mp4', timeout=cache_timeout):
""" """
youtubeId - if of video youtubeId - if of video
format - video format, options: webm, 1080p, 720p, mp4, high format - video format, options: webm, 1080p, 720p, mp4, high
""" """
def get_url(stream):
return '%s&signature=%s' % (stream['url'], stream['sig'])
fmt = None fmt = None
if format == '4k': if format == '4k':
fmt=38 fmt=38
@ -31,11 +38,11 @@ def video_url(youtubeId, format='mp4', timeout=cache_timeout):
fmt=35 fmt=35
elif format == 'webm': elif format == 'webm':
streams = videos(youtubeId, 'webm') streams = videos(youtubeId, 'webm')
return get_url(streams[max(streams.keys())]) return streams[max(streams.keys())]['stream_url']
streams = videos(youtubeId) streams = videos(youtubeId)
if str(fmt) in streams: if str(fmt) in streams:
return get_url(streams[str(fmt)]) return streams[str(fmt)]['stream_url']
def find(query, max_results=10, offset=1, orderBy='relevance'): def find(query, max_results=10, offset=1, orderBy='relevance'):
query = quote(query) query = quote(query)
@ -54,10 +61,15 @@ def find(query, max_results=10, offset=1, orderBy='relevance'):
def info(id): def info(id):
info = {} info = {}
if id.startswith('http'):
id = get_id(id)
if not id:
return info
url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id
data = read_url(url) data = read_url(url)
xml = parseString(data) xml = parseString(data)
info['url'] = 'http://www.youtube.com/watch?v=%s' % id info['id'] = id
info['url'] = get_url(id)
info['title'] = xml.getElementsByTagName('title')[0].firstChild.data info['title'] = xml.getElementsByTagName('title')[0].firstChild.data
info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data
info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0] info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0]
@ -70,14 +82,13 @@ def info(id):
k = xml.getElementsByTagName('media:keywords')[0].firstChild k = xml.getElementsByTagName('media:keywords')[0].firstChild
if k: if k:
info['keywords'] = k.data.split(', ') info['keywords'] = k.data.split(', ')
url = "http://www.youtube.com/watch?v=%s" % id data = read_url(info['url'])
data = read_url(url)
match = re.compile('<h4>License:</h4>(.*?)</p>', re.DOTALL).findall(data) match = re.compile('<h4>License:</h4>(.*?)</p>', re.DOTALL).findall(data)
if match: if match:
info['license'] = match[0].strip() info['license'] = match[0].strip()
info['license'] = re.sub('<.+?>', '', info['license']).strip() info['license'] = re.sub('<.+?>', '', info['license']).strip()
url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1"%id url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1" % id
data = read_url(url) data = read_url(url)
xml = parseString(data) xml = parseString(data)
languages = [t.getAttribute('lang_code') for t in xml.getElementsByTagName('track')] languages = [t.getAttribute('lang_code') for t in xml.getElementsByTagName('track')]
@ -113,14 +124,17 @@ def videos(id, format=''):
data = read_url(url) data = read_url(url)
match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data) match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data)
streams = {} streams = {}
for x in match[0].split(','): if match:
stream = {} for x in match[0].split(','):
for s in x.split('\\u0026'): stream = {}
key, value = s.split('=') for s in x.split('\\u0026'):
value = unquote_plus(value) key, value = s.split('=')
stream[key] = value value = unquote_plus(value)
if not stream_type or stream['type'].startswith(stream_type): stream[key] = value
streams[stream['itag']] = stream if 'url' in stream and 'sig' in stream:
stream['stream_url'] = '%s&signature=%s' % (stream['url'], stream['sig'])
if not stream_type or stream['type'].startswith(stream_type):
streams[stream['itag']] = stream
return streams return streams
def playlist(url): def playlist(url):
@ -172,3 +186,15 @@ def download_webm(id, filename):
f.close() f.close()
u.close() u.close()
return filename return filename
def get_config(id):
if id.startswith('http'):
url = id
else:
url = get_url(id)
data = read_url(url)
match = re.compile('ytplayer.config = (.*?);<').findall(data)
if match:
config = json.load(match[0])
return config