scrapeit/scrapeit/youtube.py

54 lines
1.5 KiB
Python

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import re
import urllib2
from urllib import quote
from utils import read_url, open_url
def get_video_url(id, get_redirected = False):
url = 'http://www.youtube.com/watch?v=%s' % id
data = read_url(url)
video = re.compile('player2.swf\?video_id=.*?&t=(.*?)"').findall(data)
if not video:
video = re.compile('player2.swf\?.*&t=(.*?)\&').findall(data)
if video:
url = "http://youtube.com/get_video.php?video_id=%s&t=%s"%(id, video[0])
if get_redirected:
url = get_url(url)
return url
return None
def reg_search(reg, data):
result = re.compile(reg).findall(data)
if result:
result = result[0].strip()
else:
result = ''
return result
def search(query, video_url_base = None):
url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query)
data = read_url(url)
regx = re.compile('''<a href="/watch.v=(.*?)">(.*?)</a><br/>''')
id_title = regx.findall(data)
data_flat = data.replace('\n', ' ')
videos = []
for video in id_title:
v = dict()
v['id'] = video[0]
v['link'] = "http//youtube.com/watch.v=%s" % v['id']
v['title'] = video[1].strip()
if video_url_base:
v['video_link'] = "%s/%s" % (video_url_base, v['id'])
else:
v['video_url'] = get_video_url(v['id'])
v['description'] = reg_search('''BeginvidDesc%s">(.*?)</span>''' % v['id'], data_flat)
v['thumbnail'] = reg_search('<img src="(.*?)" class="vimg120" alt="%s" />' % v['id'], data)
videos.append(v)
return videos