python-ox/ox/oembed.py

36 lines
1.2 KiB
Python
Raw Permalink Normal View History

# -*- coding: utf-8 -*-
# ci:si:et:sw=4:sts=4:ts=4
import re
2014-09-30 19:04:46 +00:00
from . import cache
from .text import find_re
from .utils import json, ET
2016-06-08 13:32:46 +00:00
def get_embed_code(url, maxwidth=None, maxheight=None):
embed = {}
2012-08-15 15:15:40 +00:00
header = cache.get_headers(url)
if header.get('content-type', '').startswith('text/html'):
html = cache.read_url(url)
2016-06-08 13:32:46 +00:00
links = re.compile('<link.*?>').findall(html)
json_oembed = [l for l in links if 'json+oembed' in l]
xml_oembed = [l for l in links if 'xml+oembed' in l]
if json_oembed:
oembed_url = find_re(json_oembed[0], 'href="(.*?)"')
if maxwidth:
oembed_url += '&maxwidth=%d' % maxwidth
if maxheight:
oembed_url += '&maxheight=%d' % maxheight
embed = json.loads(cache.read_url(oembed_url))
elif xml_oembed:
2016-06-08 13:32:46 +00:00
oembed_url = find_re(xml_oembed[0], 'href="(.*?)"')
if maxwidth:
oembed_url += '&maxwidth=%d' % maxwidth
if maxheight:
oembed_url += '&maxheight=%d' % maxheight
data = cache.read_url(oembed_url)
for e in ET.fromstring(data):
embed[e.tag] = e.text
return embed