From 10967b0a55f71dedb34e3c250e5dfdb67c882592 Mon Sep 17 00:00:00 2001 From: AJ ONeal Date: Wed, 10 Jun 2015 17:59:01 -0600 Subject: [PATCH] v2.0 with proper encoding --- README.md | 44 +++++++++++++- bower.json | 2 +- index.js | 148 ++++++++++++++++++++++++++++++++++++++++++++++++ unibabel-dom.js | 1 + 4 files changed, 192 insertions(+), 3 deletions(-) create mode 100644 index.js create mode 120000 unibabel-dom.js diff --git a/README.md b/README.md index 46bacd1..edd1272 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,7 @@ utf8-typed Base64, TypedArrays, and UTF-8 / Unicode conversions in Browser (and Node) JavaScript -This is based wholly on the work by good folks at the MDN. -See +See API === @@ -19,6 +18,24 @@ var base64 = Unibabel.arrToBase64(uint8Array) var uint8Array = Unibabel.base64ToArr(base64) ``` +**Normal APIs** + +* utf8ToBuffer(utf8str) => array +* bufferToUtf8(array) => string + +* utf8ToBase64(utf8str) => base64 +* base64ToUtf8(base64) => string + +* bufferToBase64(array) => base64 +* base64ToBuffer(base64) => array + +**Helper APIs** + +* utf8ToBinaryString(utf8str) => binstr +* binaryStringToUtf8(binstr) => utf8str +* bufferToBinaryString(buffer) => binstr +* binaryStringToBuffer(binstr) => array + Examples ======== @@ -51,3 +68,26 @@ Mozilla has licensed this code in the Public Domain, which means that I am at li under the Apache 2, which is something that, general speaking, your legal department will feel more comfortable with. See + +ChangeLog +==== + +v2.0.0 +------ + +The new implementation is binary compatible with node.js, TextEncoder, +and other more-common UTF-8 encodings. + +It is also based on DOM APIs which result in much less code and are still +backwards compatible all the way back to IE6 (not on purpose, just that +it happens to work). + +See + +v1.0.0 +------ + +This version was based on the work by good folks at the MDN, however, +the UTF-8 conversion was not byte-compatible with other UTF-8 conversions +(such as node.js and TextEncoder), so don't use it. +See diff --git a/bower.json b/bower.json index 6f456e5..8cb1045 100644 --- a/bower.json +++ b/bower.json @@ -1,7 +1,7 @@ { "name": "unibabel", "main": "index.js", - "version": "1.0.0", + "version": "2.0.0", "homepage": "https://github.com/coolaj86/unibabel-js", "authors": [ "AJ ONeal " diff --git a/index.js b/index.js new file mode 100644 index 0000000..f99fa42 --- /dev/null +++ b/index.js @@ -0,0 +1,148 @@ +(function () { +'use strict'; + +function utf8ToBinaryString(str) { + var escstr = encodeURIComponent(str); + // replaces any uri escape sequence, such as %0A, + // with binary escape, such as 0x0A + var binstr = escstr.replace(/%([0-9A-F]{2})/g, function(match, p1) { + return String.fromCharCode('0x' + p1); + }); + + return binstr; +} + +function utf8ToBuffer(str) { + var binstr = utf8ToBinaryString(str); + var buf = binaryStringToBuffer(binstr); + return buf; +} + +function utf8ToBase64(str) { + var binstr = utf8ToBinaryString(str); + return btoa(binstr); +} + +function binaryStringToUtf8(binstr) { + var escstr = binstr.replace(/(.)/g, function (m, p) { + var code = p.charCodeAt(p).toString(16).toUpperCase(); + if (code.length < 2) { + code = '0' + code; + } + return '%' + code; + }); + + return decodeURIComponent(escstr); +} + +function bufferToUtf8(buf) { + var binstr = Array.prototype.map.call(buf, function (ch) { + return '0x' + String.fromCharCode(ch); + }).join(''); + + return binaryStringToUtf8(binstr); +} + +function base64ToUtf8(b64) { + var binstr = atob(b64); + + return binaryStringToUtf8(binstr); +} + +function bufferToBinaryString(buf) { + var binstr = Array.prototype.map.call(buf, function (ch) { + return '0x' + String.fromCharCode(ch); + }).join(''); + + return binstr; +} + +function bufferToBase64(arr) { + var binstr = bufferToBinaryString(arr); + return btoa(binstr); +} + +function binaryStringToBuffer(binstr) { + var buf; + + if ('undefined' === typeof Uint8Array) { + buf = new Uint8Array(binstr.length); + } else { + buf = []; + } + + Array.prototype.forEach.call(binstr, function (ch, i) { + buf[i] = ch.charCodeAt(0); + }); + + return buf; +} + +function base64ToBuffer(base64) { + var binstr = atob(base64); + var buf = binaryStringToBuffer(binstr); + return buf; +} + +// Hex Convenience Functions +, hexToBuffer: hexToBuffer +, bufferToHex: bufferToHex + +function bufferToHex(arr) { + var i; + var len; + var hex = ''; + var c; + + for (i = 0, len = arr.length; i < len; i += 1) { + c = arr[i].toString(16); + if (c.length < 2) { + c = '0' + c; + } + hex += c; + } + + return hex; +} + +function hexToBuffer(hex) { + // TODO use Uint8Array or ArrayBuffer or DataView + var i; + var byteLen = hex.length / 2; + var arr; + var j = 0; + + if (byteLen !== parseInt(byteLen, 10)) { + throw new Error("Invalid hex length '" + hex.length + "'"); + } + + arr = new Uint8Array(byteLen); + + for (i = 0; i < byteLen; i += 1) { + arr[i] = parseInt(hex[j] + hex[j + 1], 16); + j += 2; + } + + return arr; +} + +window.Unibabel = { + utf8ToBinaryString: utf8ToBinaryString +, utf8ToBuffer: utf8ToBuffer +, utf8ToBase64: utf8ToBase64 +, binaryStringToUtf8: binaryStringToUtf8 +, bufferToUtf8: bufferToUtf8 +, base64ToUtf8: base64ToUtf8 +, bufferToBinaryString: bufferToBinaryString +, bufferToBase64: bufferToBase64 +, binaryStringToBuffer: binaryStringToBuffer +, base64ToBuffer: base64ToBuffer + +// compat +, strToUtf8Arr: utf8ToBuffer +, utf8ArrToStr: bufferToUtf8 +, arrToBase64: bufferToBase64 +, base64ToArr: base64ToBuffer +}; + +}()); diff --git a/unibabel-dom.js b/unibabel-dom.js new file mode 120000 index 0000000..945ce43 --- /dev/null +++ b/unibabel-dom.js @@ -0,0 +1 @@ +index.js \ No newline at end of file