python-oxweb/web/youtube.py

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from urllib import quote, unquote
import httplib
import xml.etree.ElementTree as ET
import re

import feedparser
from ox.cache import readUrl, readUrlUnicode
from ox import findString, findRe


def getVideoKey(youtubeId):
    data = readUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId)
    match = re.compile("token=(.+)&thumbnail").findall(data)
    if match:
        return unquote(match[0])
    return False
 
def getVideoUrl(youtubeId, format='mp4'):
    youtubeKey = getVideoKey(youtubeId)
    if format == '720p':
        fmt=22
        url = "http://youtube.com/get_video.php?video_id=%s&t=%s&fmt=%s" % (youtubeId, youtubeKey, fmt)
    elif format == 'mp4':
        fmt=18
        url = "http://youtube.com/get_video.php?video_id=%s&t=%s&fmt=%s" % (youtubeId, youtubeKey, fmt)
    else:
        url = "http://youtube.com/get_video.php?video_id=%s&t=%s" % (youtubeId, youtubeKey)
    return url

def getMovieInfo(youtubeId, video_url_base=None):
    url = "http://gdata.youtube.com/feeds/api/videos/%s" % youtubeId
    data = readUrl(url)
    fd = feedparser.parse(data)
    return getInfoFromAtom(fd.entries[0], video_url_base)

def getInfoFromAtom(entry, video_url_base=None):
    info = dict()
    info['title'] = entry['title']
    info['description'] = entry['description']
    info['author'] = entry['author']
    #info['published'] = entry['published_parsed']
    if 'media_keywords' in entry:
        info['keywords'] = entry['media_keywords'].split(', ')
    info['url'] = entry['links'][0]['href']
    info['id'] = findString(info['url'], "/watch?v=") 
    info['thumbnail'] = "http://img.youtube.com/vi/%s/0.jpg" % info['id']
    if video_url_base:
        info['flv'] = "%s/%s.%s" % (video_url_base, info['id'], 'flv')
        info['mp4'] = "%s/%s.%s" % (video_url_base, info['id'], 'mp4')
    else:
        info['flv'] = getVideoUrl(info['id'], 'flv')
        info['mp4'] = getVideoUrl(info['id'], 'mp4')
        info['720p'] = getVideoUrl(info['id'], '720p')
    info['embed'] = '<object width="425" height="355"><param name="movie" value="http://www.youtube.com/v/%s&hl=en"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/%s&hl=en" type="application/x-shockwave-flash" wmode="transparent" width="425" height="355"></embed></object>' % (info['id'], info['id'])
    return info

def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
    query = quote(query)
    url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
    data = readUrlUnicode(url)
    fd = feedparser.parse(data)
    videos = []
    for entry in fd.entries:
        v = getInfoFromAtom(entry, video_url_base)
        videos.append(v)
        if len(videos) >= max_results:
            return videos
    return videos

'''
def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
  url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query)
  data = readUrlUnicode(url)
  regx = re.compile(' <a href="/watch.v=(.*?)" title="(.*?)" ')
  regx = re.compile('<a href="/watch\?v=(\w*?)" ><img src="(.*?)"  class="vimg120" title="(.*?)" alt="video">')
  id_title = regx.findall(data)
  data_flat = data.replace('\n', ' ')
  videos = {}
  for video in id_title:
    vid = video[0]
    if vid not in videos:
      v = dict()
      v['id'] = vid
      v['link'] = "http//youtube.com/watch.v=%s" % v['id']
      v['title'] = video[2].strip()
      if video_url_base:
        v['video_link'] = "%s/%s" % (video_url_base, v['id'])
      else:
        v['video_url'] = getVideoUrl(v['id'])
      v['description'] = findRe(data, 'BeginvidDesc%s">(.*?)</span>' % v['id']).strip().replace('<b>', ' ').replace('</b>', '')
      v['thumbnail'] = video[1]
    videos[vid] = v
    if len(videos) >= max_results:
        return videos.values()
  return videos.values()
'''
youtube, find, getMovieInfo, getVideoUrl 2008-04-30 16:15:22 +02:00			`# -- coding: utf-8 --`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`# vi:si:et:sw=4:sts=4:ts=4`
unquote youtube key 2009-07-24 12:46:59 +02:00			`from urllib import quote, unquote`
use new gdata api to get video key 2008-10-07 22:22:17 +02:00			`import httplib`
youtube, find, getMovieInfo, getVideoUrl 2008-04-30 16:15:22 +02:00			`import xml.etree.ElementTree as ET`
fix youtube find 2008-09-30 15:58:21 +02:00			`import re`
youtube, find, getMovieInfo, getVideoUrl 2008-04-30 16:15:22 +02:00
cleanup imports 2008-04-30 16:22:01 +02:00			`import feedparser`
depend on ox, install as ox.web, migrate getUrl to readUrl 2009-10-12 13:47:43 +02:00			`from ox.cache import readUrl, readUrlUnicode`
			`from ox import findString, findRe`
youtube, find, getMovieInfo, getVideoUrl 2008-04-30 16:15:22 +02:00
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00
use new gdata api to get video key 2008-10-07 22:22:17 +02:00			`def getVideoKey(youtubeId):`
depend on ox, install as ox.web, migrate getUrl to readUrl 2009-10-12 13:47:43 +02:00			`data = readUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId)`
udpate youtube drm 2009-08-19 18:02:05 +02:00			`match = re.compile("token=(.+)&thumbnail").findall(data)`
get key from site again 2008-12-09 19:25:35 +01:00			`if match:`
unquote youtube key 2009-07-24 12:46:59 +02:00			`return unquote(match[0])`
fix plot, fix dont fail in youtube 2008-12-07 15:39:39 +01:00			`return False`
use new gdata api to get video key 2008-10-07 22:22:17 +02:00
			`def getVideoUrl(youtubeId, format='mp4'):`
			`youtubeKey = getVideoKey(youtubeId)`
add 720p video links 2008-12-09 19:09:12 +01:00			`if format == '720p':`
			`fmt=22`
			`url = "http://youtube.com/get_video.php?video_id=%s&t=%s&fmt=%s" % (youtubeId, youtubeKey, fmt)`
			`elif format == 'mp4':`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`fmt=18`
use new gdata api to get video key 2008-10-07 22:22:17 +02:00			`url = "http://youtube.com/get_video.php?video_id=%s&t=%s&fmt=%s" % (youtubeId, youtubeKey, fmt)`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`else:`
use new gdata api to get video key 2008-10-07 22:22:17 +02:00			`url = "http://youtube.com/get_video.php?video_id=%s&t=%s" % (youtubeId, youtubeKey)`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`return url`
youtube, find, getMovieInfo, getVideoUrl 2008-04-30 16:15:22 +02:00
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`def getMovieInfo(youtubeId, video_url_base=None):`
			`url = "http://gdata.youtube.com/feeds/api/videos/%s" % youtubeId`
depend on ox, install as ox.web, migrate getUrl to readUrl 2009-10-12 13:47:43 +02:00			`data = readUrl(url)`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`fd = feedparser.parse(data)`
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`return getInfoFromAtom(fd.entries[0], video_url_base)`
youtube, find, getMovieInfo, getVideoUrl 2008-04-30 16:15:22 +02:00
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`def getInfoFromAtom(entry, video_url_base=None):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`info = dict()`
			`info['title'] = entry['title']`
			`info['description'] = entry['description']`
			`info['author'] = entry['author']`
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`#info['published'] = entry['published_parsed']`
looks like this does not allways happen 2008-12-09 19:04:36 +01:00			`if 'media_keywords' in entry:`
			`info['keywords'] = entry['media_keywords'].split(', ')`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`info['url'] = entry['links'][0]['href']`
			`info['id'] = findString(info['url'], "/watch?v=")`
			`info['thumbnail'] = "http://img.youtube.com/vi/%s/0.jpg" % info['id']`
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`if video_url_base:`
			`info['flv'] = "%s/%s.%s" % (video_url_base, info['id'], 'flv')`
			`info['mp4'] = "%s/%s.%s" % (video_url_base, info['id'], 'mp4')`
			`else:`
			`info['flv'] = getVideoUrl(info['id'], 'flv')`
			`info['mp4'] = getVideoUrl(info['id'], 'mp4')`
add 720p video links 2008-12-09 19:09:12 +01:00			`info['720p'] = getVideoUrl(info['id'], '720p')`
fix youtube find 2008-09-30 15:58:21 +02:00			`info['embed'] = '<object width="425" height="355"><param name="movie" value="http://www.youtube.com/v/%s&hl=en"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/%s&hl=en" type="application/x-shockwave-flash" wmode="transparent" width="425" height="355"></embed></object>' % (info['id'], info['id'])`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`return info`
youtube, find, getMovieInfo, getVideoUrl 2008-04-30 16:15:22 +02:00
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`query = quote(query)`
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)`
depend on ox, install as ox.web, migrate getUrl to readUrl 2009-10-12 13:47:43 +02:00			`data = readUrlUnicode(url)`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`fd = feedparser.parse(data)`
			`videos = []`
			`for entry in fd.entries:`
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`v = getInfoFromAtom(entry, video_url_base)`
vi:si:et:sw=4:sts=4:ts=4 2008-06-19 11:47:02 +02:00			`videos.append(v)`
			`if len(videos) >= max_results:`
			`return videos`
			`return videos`
fix youtube find 2008-09-30 15:58:21 +02:00
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`'''`
fix youtube find 2008-09-30 15:58:21 +02:00			`def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):`
			`url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query)`
depend on ox, install as ox.web, migrate getUrl to readUrl 2009-10-12 13:47:43 +02:00			`data = readUrlUnicode(url)`
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`regx = re.compile(' <a href="/watch.v=(.?)" title="(.?)" ')`
			`regx = re.compile('<a href="/watch\?v=(\w?)" ><img src="(.?)" class="vimg120" title="(.*?)" alt="video">')`
fix youtube find 2008-09-30 15:58:21 +02:00			`id_title = regx.findall(data)`
			`data_flat = data.replace('\n', ' ')`
			`videos = {}`
			`for video in id_title:`
			`vid = video[0]`
			`if vid not in videos:`
			`v = dict()`
			`v['id'] = vid`
			`v['link'] = "http//youtube.com/watch.v=%s" % v['id']`
			`v['title'] = video[2].strip()`
			`if video_url_base:`
			`v['video_link'] = "%s/%s" % (video_url_base, v['id'])`
			`else:`
use new gdata api to get video key 2008-10-07 22:22:17 +02:00			`v['video_url'] = getVideoUrl(v['id'])`
fix youtube find 2008-09-30 15:58:21 +02:00			`v['description'] = findRe(data, 'BeginvidDesc%s">(.*?)</span>' % v['id']).strip().replace('<b>', ' ').replace('</b>', '')`
			`v['thumbnail'] = video[1]`
			`videos[vid] = v`
			`if len(videos) >= max_results:`
			`return videos.values()`
			`return videos.values()`
use gdata atom feeds 2008-10-07 23:07:49 +02:00			`'''`
fix youtube find 2008-09-30 15:58:21 +02:00