# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 from urllib import quote import httplib import xml.etree.ElementTree as ET import re import feedparser from oxlib.cache import getUrl, getUrlUnicode from oxlib import findString, findRe def getVideoKey(youtubeId): try: conn = httplib.HTTPConnection ("www.youtube.com") conn.request ("HEAD", '/v/' + youtubeId) response = conn.getresponse () conn.close () except: return False if response.status >= 300 and response.status < 400: location = response.getheader("location") return re.match(".*[?&]t=([^&]+)", location).groups()[0] else: return False def getVideoKeyLegacyAPI(videoId): url = 'http://www.youtube.com/api2_rest?method=youtube.videos.get_video_token&video_id=' + youtubeId data = getUrl(url) xml = ET.fromstring(data) youtubeKey = xml.find('t').text return youtubeKey def getVideoUrl(youtubeId, format='mp4'): youtubeKey = getVideoKey(youtubeId) if format == 'mp4': fmt=18 url = "http://youtube.com/get_video.php?video_id=%s&t=%s&fmt=%s" % (youtubeId, youtubeKey, fmt) else: url = "http://youtube.com/get_video.php?video_id=%s&t=%s" % (youtubeId, youtubeKey) return url ''' def getMovieInfo(youtubeId): url = "http://gdata.youtube.com/feeds/api/videos/%s " % youtubeId data = getUrl(url) fd = feedparser.parse(data) return getInfoFromAtom(fd.entries[0]) def getInfoFromAtom(entry): info = dict() info['title'] = entry['title'] info['description'] = entry['description'] info['author'] = entry['author'] info['published'] = entry['published_parsed'] info['keywords'] = entry['media_keywords'].split(', ') info['url'] = entry['links'][0]['href'] info['id'] = findString(info['url'], "/watch?v=") info['thumbnail'] = "http://img.youtube.com/vi/%s/0.jpg" % info['id'] info['flv'] = getVideoUrl(info['id'], 'flv') info['mp4'] = getVideoUrl(info['id'], 'mp4') info['embed'] = '' % (info['id'], info['id']) return info def find(query, max_results=10, offset=1, orderBy='relevance'): query = quote(query) url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s"%(query, orderBy, offset, max_results) data = getUrl(url) fd = feedparser.parse(data) videos = [] for entry in fd.entries: v = getInfoFromAtom(entry) videos.append(v) if len(videos) >= max_results: return videos return videos ''' def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None): url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query) data = getUrlUnicode(url) regx = re.compile(''' video''') id_title = regx.findall(data) data_flat = data.replace('\n', ' ') videos = {} for video in id_title: vid = video[0] if vid not in videos: v = dict() v['id'] = vid v['link'] = "http//youtube.com/watch.v=%s" % v['id'] v['title'] = video[2].strip() if video_url_base: v['video_link'] = "%s/%s" % (video_url_base, v['id']) else: v['video_url'] = getVideoUrl(v['id']) v['description'] = findRe(data, 'BeginvidDesc%s">(.*?)' % v['id']).strip().replace('', ' ').replace('', '') v['thumbnail'] = video[1] videos[vid] = v if len(videos) >= max_results: return videos.values() return videos.values()