oxjs/source/Ox/js/Encoding.js

372 lines
12 KiB
JavaScript
Raw Normal View History

2011-11-05 16:46:53 +00:00
'use strict';
2012-05-25 08:37:48 +00:00
/*@
Ox.encodeBase26 <b> Encode a number as bijective base26
See <a href="http://en.wikipedia.org/wiki/Bijective_numeration">
Bijective numeration</a>.
> Ox.encodeBase26(4461)
'FOO'
@*/
Ox.encodeBase26 = function(number) {
var string = '';
while (number) {
string = String.fromCharCode(64 + number % 26) + string;
number = parseInt(number / 26);
}
2012-05-25 08:37:48 +00:00
return string;
};
2012-05-25 08:37:48 +00:00
/*@
Ox.decodeBase26 <f> Decodes a bijective base26-encoded number
See <a href="http://en.wikipedia.org/wiki/Bijective_numeration">
Bijective numeration</a>.
> Ox.decodeBase26('foo')
4461
@*/
Ox.decodeBase26 = function(string) {
return string.toUpperCase().split('').reverse().reduce(function(p, c, i) {
return p + (c.charCodeAt(0) - 64) * Math.pow(26, i);
}, 0);
};
2012-05-25 08:37:48 +00:00
/*@
Ox.encodeBase32 <b> Encode a number as base32
See <a href="http://www.crockford.com/wrmg/base32.html">Base 32</a>.
> Ox.encodeBase32(15360)
'F00'
> Ox.encodeBase32(33819)
'110V'
@*/
Ox.encodeBase32 = function(number) {
return Ox.map(number.toString(32), function(character) {
return Ox.BASE_32_DIGITS[parseInt(character, 32)];
});
};
2012-05-25 08:37:48 +00:00
/*@
Ox.decodeBase32 <f> Decodes a base32-encoded number
See <a href="http://www.crockford.com/wrmg/base32.html">Base 32</a>.
> Ox.decodeBase32('foo')
15360
> Ox.decodeBase32('ILOU')
33819
> Ox.decodeBase32('?').toString()
'NaN'
@*/
Ox.decodeBase32 = function(string) {
return parseInt(Ox.map(string.toUpperCase(), function(character) {
var index = Ox.BASE_32_DIGITS.indexOf(
Ox.BASE_32_ALIASES[character] || character
);
2012-05-25 08:37:48 +00:00
return (index == -1 ? ' ' : index).toString(32);
}), 32);
};
2012-05-25 08:37:48 +00:00
/*@
Ox.encodeBase64 <f> Encode a number as base64
> Ox.encodeBase64(32394)
'foo'
@*/
Ox.encodeBase64 = function(number) {
return btoa(Ox.encodeBase256(number)).replace(/=/g, '');
};
2012-05-25 08:37:48 +00:00
/*@
Ox.decodeBase64 <f> Decodes a base64-encoded number
> Ox.decodeBase64('foo')
32394
@*/
Ox.decodeBase64 = function(string) {
return Ox.decodeBase256(atob(string));
};
2012-05-25 08:37:48 +00:00
/*@
Ox.encodeBase128 <f> Encode a number as base128
> Ox.encodeBase128(1685487)
'foo'
@*/
Ox.encodeBase128 = function(number) {
var string = '';
while (number) {
string = Ox.char(number & 127) + string;
number >>= 7;
}
return string;
};
2012-05-25 08:37:48 +00:00
/*@
Ox.decodeBase128 <f> Decode a base128-encoded number
> Ox.decodeBase128('foo')
1685487
@*/
Ox.decodeBase128 = function(string) {
return string.split('').reverse().reduce(function(p, c, i) {
return p + (c.charCodeAt(0) << i * 7);
}, 0);
};
2012-05-25 08:37:48 +00:00
/*@
Ox.encodeBase256 <f> Encode a number as base256
> Ox.encodeBase256(6713199)
'foo'
@*/
Ox.encodeBase256 = function(number) {
var string = '';
while (number) {
string = Ox.char(number & 255) + string;
number >>= 8;
}
return string;
};
2012-05-25 08:37:48 +00:00
/*@
Ox.decodeBase256 <f> Decode a base256-encoded number
> Ox.decodeBase256('foo')
6713199
@*/
Ox.decodeBase256 = function(string) {
return string.split('').reverse().reduce(function(p, c, i) {
return p + (c.charCodeAt(0) << i * 8);
}, 0);
};
2012-05-25 08:37:48 +00:00
/*@
Ox.encodeDeflate <f> Encodes a string, using deflate
Since PNGs are deflate-encoded, the <code>canvas</code> object's
<code>toDataURL</code> method provides an efficient implementation.
The string is encoded as UTF-8 and written to the RGB channels of a
canvas element, then the PNG dataURL is decoded from base64, and some
head, tail and chunk names are removed.
(str) -> <s> The encoded string
str <s> The string to be encoded
# Test with: Ox.decodeDeflate(Ox.encodeDeflate('foo'), alert)
@*/
2012-05-25 08:37:48 +00:00
Ox.encodeDeflate = function(string, callback) {
// Make sure we can encode the full unicode range of characters.
string = Ox.encodeUTF8(string);
// We can only safely write to RGB, so we need 1 pixel for 3 bytes.
// The string length may not be a multiple of 3, so we need to encode
// the number of padding bytes (1 byte), the string, and non-0-bytes
// as padding, so that the combined length becomes a multiple of 3.
var length = 1 + string.length, c = Ox.canvas(Math.ceil(length / 3), 1),
data, idat, pad = (3 - length % 3) % 3;
string = Ox.char(pad) + string + Ox.repeat('\u00FF', pad);
Ox.loop(c.data.length, function(i) {
// Write character codes into RGB, and 255 into ALPHA
c.data[i] = i % 4 < 3 ? string.charCodeAt(i - parseInt(i / 4)) : 255;
});
c.context.putImageData(c.imageData, 0, 0);
// Get the PNG data from the data URL and decode it from base64.
string = atob(c.canvas.toDataURL().split(',')[1]);
// Discard bytes 0 to 15 (8 bytes PNG signature, 4 bytes IHDR length, 4
// bytes IHDR name), keep bytes 16 to 19 (width), discard bytes 20 to 29
// (4 bytes height, 5 bytes flags), keep bytes 29 to 32 (IHDR checksum),
// keep the rest (IDAT chunks), discard the last 12 bytes (IEND chunk).
data = string.slice(16, 20) + string.slice(29, 33);
idat = string.slice(33, -12);
while (idat) {
// Each IDAT chunk is 4 bytes length, 4 bytes name, length bytes
// data and 4 bytes checksum. We can discard the name parts.
length = idat.slice(0, 4);
data += length + idat.slice(8, 12 + (
length = Ox.decodeBase256(length)
));
idat = idat.slice(12 + length);
}
// Allow for async use, symmetrical to Ox.decodeDeflate
callback && callback(data);
return data;
};
2012-05-25 08:37:48 +00:00
/*@
Ox.decodeDeflate <f> Decodes an deflate-encoded string
Since PNGs are deflate-encoded, the <code>canvas</code> object's
<code>drawImage</code> method provides an efficient implementation. The
string will be wrapped as a PNG dataURL, encoded as base64, and drawn
onto a canvas element, then the RGB channels will be read, and the
result will be decoded from UTF8.
(str) -> <u> undefined
str <s> The string to be decoded
callback <f> Callback function
str <s> The decoded string
@*/
2012-05-25 08:37:48 +00:00
Ox.decodeDeflate = function(string, callback) {
var image = new Image(),
// PNG file signature and IHDR chunk
data = '\u0089PNG\r\n\u001A\n\u0000\u0000\u0000\u000DIHDR'
+ string.slice(0, 4) + '\u0000\u0000\u0000\u0001'
+ '\u0008\u0006\u0000\u0000\u0000' + string.slice(4, 8),
// IDAT chunks
idat = string.slice(8), length;
function error() {
throw new RangeError('Deflate codec can\'t decode data.');
}
while (idat) {
// Reinsert the IDAT chunk names
length = idat.slice(0, 4);
data += length + 'IDAT' + idat.slice(4, 8 + (
length = Ox.decodeBase256(length)
));
idat = idat.slice(8 + length);
}
// IEND chunk
data += '\u0000\u0000\u0000\u0000IEND\u00AE\u0042\u0060\u0082';
// Unfortunately, we can't synchronously set the source of an image,
// draw it onto a canvas, and read its data.
image.onload = function() {
string = Ox.toArray(Ox.canvas(image).data).map(function(value, index) {
// Read one character per RGB byte, ignore ALPHA.
return index % 4 < 3 ? Ox.char(value) : '';
}).join('');
try {
// Parse the first byte as number of bytes to chop at the end,
// and the rest, without these bytes, as an UTF8-encoded string.
string = Ox.decodeUTF8(string.slice(1, -string.charCodeAt(0)));
} catch (e) {
error();
}
2012-05-25 08:37:48 +00:00
callback(string);
}
image.onerror = error;
image.src = 'data:image/png;base64,' + btoa(data);
};
2012-05-25 08:37:48 +00:00
/*@
Ox.encodeHTML <f> HTML-encodes a string
> Ox.encodeHTML('\'<"&">\'')
'&apos;&lt;&quot;&amp;&quot;&gt;&apos;'
> Ox.encodeHTML('äbçdê')
'&#x00E4;b&#x00E7;d&#x00EA;'
@*/
Ox.encodeHTML = function(str) {
return Ox.map(str.toString(), function(v) {
var code = v.charCodeAt(0);
return code < 128
? (v in Ox.HTML_ENTITIES ? Ox.HTML_ENTITIES[v] : v)
: '&#x' + Ox.pad(code.toString(16).toUpperCase(), 4) + ';';
});
};
2012-05-25 08:37:48 +00:00
/*@
Ox.decodeHTML <f> Decodes an HTML-encoded string
> Ox.decodeHTML('&apos;&lt;&quot;&amp;&quot;&gt;&apos;')
'\'<"&">\''
> Ox.decodeHTML('&#x0027;&#x003C;&#x0022;&#x0026;&#x0022;&#x003E;&#x0027;')
'\'<"&">\''
> Ox.decodeHTML('&auml;b&ccedil;d&ecirc;')
'äbçdê'
> Ox.decodeHTML('&#x00E4;b&#x00E7;d&#x00EA;')
'äbçdê'
> Ox.decodeHTML('<b>bold</b>')
'<b>bold</b>'
@*/
Ox.decodeHTML = function(str) {
// relies on dom, but shorter than using this:
// http://www.w3.org/TR/html5/named-character-references.html
return Ox.decodeHTMLEntities(Ox.element('<div>').html(str).html());
};
2012-05-25 08:37:48 +00:00
Ox.encodeHTMLEntities = function(str) {
return str.replace(
new RegExp('(' + Object.keys(Ox.HTML_ENTITIES).join('|') + ')', 'g'),
function(match) {
return Ox.HTML_ENTITIES[match];
}
2012-05-25 08:37:48 +00:00
);
};
2012-05-25 08:37:48 +00:00
Ox.decodeHTMLEntities = function(str) {
return str.replace(
new RegExp('(' + Ox.values(Ox.HTML_ENTITIES).join('|') + ')', 'g'),
function(match) {
return Ox.keyOf(Ox.HTML_ENTITIES, match);
}
);
};
2012-05-25 08:37:48 +00:00
/*@
Ox.encodeUTF8 <f> Encodes a string as UTF-8
see http://en.wikipedia.org/wiki/UTF-8
(string) -> <s> UTF-8 encoded string
string <s> Any string
> Ox.encodeUTF8("YES")
"YES"
> Ox.encodeUTF8("¥€$")
"\u00C2\u00A5\u00E2\u0082\u00AC\u0024"
@*/
Ox.encodeUTF8 = function(str) {
return Ox.map(str, function(chr) {
var code = chr.charCodeAt(0),
str = '';
if (code < 128) {
str = chr;
} else if (code < 2048) {
str = String.fromCharCode(code >> 6 | 192)
+ String.fromCharCode(code & 63 | 128);
} else {
str = String.fromCharCode(code >> 12 | 224)
+ String.fromCharCode(code >> 6 & 63 | 128)
+ String.fromCharCode(code & 63 | 128);
}
return str;
});
};
2012-05-25 08:37:48 +00:00
/*@
Ox.decodeUTF8 <f> Decodes an UTF-8-encoded string
see http://en.wikipedia.org/wiki/UTF-8
(utf8) -> <s> string
utf8 <s> Any UTF-8-encoded string
> Ox.decodeUTF8('YES')
'YES'
> Ox.decodeUTF8('\u00C2\u00A5\u00E2\u0082\u00AC\u0024')
'¥€$'
@*/
Ox.decodeUTF8 = function(string) {
var code, i = 0, length = string.length, ret = '';
function error(byte, position) {
throw new RangeError(
'UTF-8 codec can\'t decode byte 0x' +
byte.toString(16).toUpperCase() + ' at position ' + position
);
2012-05-25 08:37:48 +00:00
}
while (i < length) {
code = [
string.charCodeAt(i),
string.charCodeAt(i + 1),
string.charCodeAt(i + 2)
];
if (code[0] <= 128) {
ret += string[i];
i++;
} else if (
code[0] >= 192 && code[0] < 240
&& i < length - (code[0] < 224 ? 1 : 2)
) {
if (code[1] >= 128 && code[1] < 192) {
if (code[0] < 224) {
ret += String.fromCharCode(
(code[0] & 31) << 6 | code[1] & 63
);
i += 2;
} else if (code[2] >= 128 && code[2] < 192) {
ret += String.fromCharCode(
(code[0] & 15) << 12 | (code[1] & 63) << 6
| code[2] & 63
);
i += 3;
} else {
2012-05-25 08:37:48 +00:00
error(code[2], i + 2);
}
} else {
2012-05-25 08:37:48 +00:00
error(code[1], i + 1);
}
2012-05-25 08:37:48 +00:00
} else {
error(code[0], i);
}
2012-05-25 08:37:48 +00:00
}
return ret;
};