# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 from urllib.parse import quote, unquote_plus import urllib from http import cookiejar as cookielib import re from xml.dom.minidom import parseString import json import ox from ox.cache import read_url, cache_timeout def get_id(url): match = re.compile('v=(.+?)($|&)').findall(url) if match: return match[0][0] def get_url(id): return 'http://www.youtube.com/watch?v=%s' % id def video_url(youtubeId, format='mp4', timeout=cache_timeout): """ youtubeId - if of video format - video format, options: webm, 1080p, 720p, mp4, high """ fmt = None if format == '4k': fmt = 38 elif format == '1080p': fmt = 37 elif format == '720p': fmt = 22 elif format == 'mp4': fmt = 18 elif format == 'high': fmt = 35 elif format == 'webm': streams = videos(youtubeId, 'webm') return streams[max(streams.keys())]['url'] streams = videos(youtubeId) if str(fmt) in streams: return streams[str(fmt)]['url'] def get_video_info(id): eurl = get_url(id) data = read_url(eurl).decode('utf-8') t = re.compile(r'\W[\'"]?t[\'"]?: ?[\'"](.+?)[\'"]').findall(data) if t: t = t[0] else: raise IOError url = "http://www.youtube.com/get_video_info?&video_id=%s&el=$el&ps=default&eurl=%s&hl=en_US&t=%s" % (id, quote(eurl), quote(t)) data = read_url(url).decode('utf-8') info = {} for part in data.split('&'): key, value = part.split('=') info[key] = unquote_plus(value).replace('+', ' ') return info def find(query, max_results=10, offset=1, orderBy='relevance'): import feedparser query = quote(query) url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results) data = read_url(url) fd = feedparser.parse(data) videos = [] for item in fd.entries: id = item['id'].split('/')[-1] title = item['title'] description = item['description'] videos.append((title, id, description)) if len(videos) >= max_results: return videos return videos def info(id, timeout=cache_timeout): info = {} if id.startswith('http'): id = get_id(id) if not id: return info url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id data = read_url(url, timeout=timeout) xml = parseString(data) info['id'] = id info['url'] = get_url(id) info['title'] = xml.getElementsByTagName('title')[0].firstChild.data info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0] info['author'] = "http://www.youtube.com/user/%s"%xml.getElementsByTagName('name')[0].firstChild.data info['categories'] = [] for cat in xml.getElementsByTagName('media:category'): info['categories'].append(cat.firstChild.data) k = xml.getElementsByTagName('media:keywords')[0].firstChild if k: info['keywords'] = k.data.split(', ') data = read_url(info['url'], timeout=timeout) match = re.compile('