scrapeit/scrapeit/youtube.py

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2

import re
import urllib2
from urllib import quote

from utils import read_url, open_url


def get_video_url(id, get_redirected = False):
  url = 'http://www.youtube.com/watch?v=%s' % id
  data = read_url(url)
  video = re.compile('player2.swf\?video_id=.*?&t=(.*?)"').findall(data)
  if not video:
    video = re.compile('player2.swf\?.*&t=(.*?)\&').findall(data)
  if video:
    url = "http://youtube.com/get_video.php?video_id=%s&t=%s"%(id, video[0])
    if get_redirected:
      url = get_url(url)
    return url
  return None

def reg_search(reg, data):
  result = re.compile(reg).findall(data)
  if result:
      result = result[0].strip()
  else:
      result = ''
  return result

def search(query, video_url_base = None):
  url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query)
  data = read_url(url)
  regx = re.compile('''<a href="/watch.v=(.*?)">(.*?)</a><br/>''')
  id_title = regx.findall(data)
  data_flat = data.replace('\n', ' ')
  videos = []
  for video in id_title:
    v = dict()
    v['id'] = video[0]
    v['link'] = "http//youtube.com/watch.v=%s" % v['id']
    v['title'] = video[1].strip()
    if video_url_base:
      v['video_link'] = "%s/%s" % (video_url_base, v['id'])
    else:
      v['video_url'] = get_video_url(v['id'])
    v['description'] = reg_search('''BeginvidDesc%s">(.*?)</span>''' % v['id'], data_flat)
    v['thumbnail'] = reg_search('<img src="(.*?)" class="vimg120" alt="%s" />' % v['id'], data)
    videos.append(v)
  return videos