oxjs/source/Ox/js/HTML.js

177 lines
5.8 KiB
JavaScript
Raw Normal View History

2011-11-05 16:46:53 +00:00
'use strict';
/*@
Ox.parseEmailAddresses <f> Takes HTML and turns e-mail addresses into links
@*/
// fixme: no tests
2012-01-06 11:57:54 +00:00
// fixme: shouldn't this be formatEmailAddresses?
// fixme: fails for linked emails
Ox.parseEmailAddresses = function(html) {
return html.replace(
/\b([0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6})\b/gi,
'<a href="mailto:$1">$1</a>'
);
};
/*@
Ox.parseHTML <f> Takes HTML from an untrusted source and returns something sane
> Ox.parseHTML('http://foo.com, bar')
'<a href="http://foo.com">http://foo.com</a>, bar'
> Ox.parseHTML('http://foo.com/foobar?foo, bar')
'<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar'
> Ox.parseHTML('(see: www.foo.com)')
'(see: <a href="http://www.foo.com">www.foo.com</a>)'
> Ox.parseHTML('foo@bar.com')
'<a href="mailto:foo@bar.com">foo@bar.com</a>'
> Ox.parseHTML('<a href="http://foo.com" onmouseover="alert()">foo</a>')
'<a href="http://foo.com">foo</a>'
> Ox.parseHTML('<a href="javascript:alert()">foo</a>')
'&lt;a href="javascript:alert()"&gt;foo'
> Ox.parseHTML('[http://foo.com foo]')
'<a href="http://foo.com">foo</a>'
> Ox.parseHTML('<rtl>foo</rtl>')
'<div style="direction: rtl">foo</div>'
> Ox.parseHTML('<script>alert()</script>')
'&lt;script&gt;alert()&lt;/script&gt;'
> Ox.parseHTML('\'foo\' < \'bar\' && "foo" > "bar"')
'\'foo\' &lt; \'bar\' &amp;&amp; "foo" &gt; "bar"'
> Ox.parseHTML('<b>foo')
'<b>foo</b>'
> Ox.parseHTML('<b>foo</b></b>')
'<b>foo</b>'
@*/
Ox.parseHTML = (function() {
var defaultTags = [
// inline formatting
2011-12-31 12:57:02 +00:00
'b', 'code', 'i', 's', 'sub', 'sup', 'u',
// block formatting
2012-02-18 15:40:41 +00:00
'blockquote', 'h1', 'h2', 'h3', 'p', 'pre',
// lists
'li', 'ol', 'ul',
// tables
'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
// other
'a', 'br', 'img',
// special
'rtl', '[]'
],
parse = {
a: {
'<a [^<>]*?href="((https?:\/\/|\/).+?)".*?>': '<a href="{1}">',
'<\/a>': '</a>'
},
img: {
'<img [^<>]*?src="((https?:\/\/|\/).+?)".*?>': '<img src="{1}">'
},
rtl: {
'<rtl>': '<div style="direction: rtl">',
'<\/rtl>': '</div>'
},
'*': function(tag) {
var ret = {};
2012-01-16 11:22:34 +00:00
ret['<(/?' + tag + ') ?/?>'] = '<{1}>';
return ret;
}
},
tab = '\t';
return function(html, tags, wikilinks) {
var matches = [];
tags = tags || defaultTags;
// html = Ox.clean(html); fixme: can this be a parameter?
if (tags.indexOf('[]') > -1) {
html = html.replace(/\[((https?:\/\/|\/).+?) (.+?)\]/gi, '<a href="$1">$3</a>');
tags = tags.filter(function(tag) {
return tag != '[]';
});
}
tags.forEach(function(tag) {
var p = parse[tag] || parse['*'](tag);
Ox.forEach(p, function(replace, regexp) {
html = html.replace(new RegExp(regexp, 'gi'), function() {
matches.push(Ox.formatString(replace, arguments));
return tab + (matches.length - 1) + tab;
});
});
});
html = Ox.encodeHTML(html);
//fixme: both fail if urls/emails are already links
//html = Ox.parseURLs(html);
//html = Ox.parseEmailAddresses(html);
matches.forEach(function(match, i) {
html = html.replace(new RegExp(tab + i + tab), match);
});
html = html.replace(/\n\n/g, '<br/><br/>');
// close extra opening (and remove extra closing) tags
// note: this converts '&quot;' to '"'
return Ox.element('<div>').html(html).html();
};
}());
/*@
Ox.parseURL <f> Takes a URL, returns its components
(url) -> <o> URL components
url <s> URL
<script>
Ox.test.object = Ox.parseURL('http://www.foo.com:8080/bar/index.html?a=0&b=1#c');
</script>
> Ox.test.object.hash
'#c'
> Ox.test.object.host
'www.foo.com:8080'
> Ox.test.object.hostname
'www.foo.com'
> Ox.test.object.origin
'http://www.foo.com:8080'
> Ox.test.object.pathname
'/bar/index.html'
> Ox.test.object.port
'8080'
> Ox.test.object.protocol
'http:'
> Ox.test.object.search
'?a=0&b=1'
@*/
Ox.parseURL = (function() {
var a = document.createElement('a'),
keys = ['hash', 'host', 'hostname', 'origin',
'pathname', 'port', 'protocol', 'search'];
return function(str) {
var ret = {};
a.href = str;
keys.forEach(function(key) {
ret[key] = a[key];
});
return ret;
};
}());
/*@
Ox.parseURLs <f> Takes HTML and turns URLs into links
> Ox.parseURLs('http://foo.com, bar')
'<a href="http://foo.com">http://foo.com</a>, bar'
> Ox.parseURLs('http://foo.com/foobar?foo, bar')
'<a href="http://foo.com/foobar?foo">http://foo.com/foobar?foo</a>, bar'
> Ox.parseURLs('www.foo.com, bar')
'<a href="http://www.foo.com">www.foo.com</a>, bar'
> Ox.parseURLs('<a href="http://foo.com">http://foo.com</a> etc')
'<a href="http://foo.com">http://foo.com</a> etc'
@*/
2012-01-06 11:57:54 +00:00
// fixme: shouldn't this be formatURLs?
// fixme: fails for urls inside links
Ox.parseURLs = function(html) {
return html.replace(
/\b((https?:\/\/|www\.).+?)([\.,:;!\?\)\]]*?(\s|$))/gi,
function(str, url, pre, end) {
url = (pre == 'www.' ? 'http://' : '' ) + url;
return Ox.formatString(
'<a href="{url}">{url}</a>{end}',
{
end: end,
url: url
}
);
}
);
2011-10-24 13:13:00 +00:00
};