base-122-performance

Benchmark created by Kevin Albertson on


Preparation HTML

<script>
var kString = 0
, kUint8Array = 1
, kHeader = 15 // To avoid illegal characters, enforce odd and >13. TODO: improve.
, kShortened = 64
, kDefaultMimeType = "image/png"
, kDebug = false
, kIllegals = [
    0 // null
    , 10 // newline
    , 13 // carriage return
    , 34 // double quote
    , 38 // ampersand
    , 92 // backslash
]
;

/**
 * Encodes raw data into base-122.
 * @param {Uint8Array|Buffer|Array|String} rawData - The data to be encoded. This can be an array
 * or Buffer with raw data bytes or a string of bytes (i.e. the type of argument to btoa())
 * @returns {Array} The base-122 encoded data as a regular array of UTF-8 character byte values.
 */
function encode(rawData) {
    var dataType = typeof(rawData) == 'string' ? kString : kUint8Array
    , curIndex = 0
    , curBit = 0 // Points to current bit needed
    , curMask = 128
    , header = kHeader
    , outData = []
    , getByte = dataType == kString ? i => rawData.codePointAt(i) : i => rawData[i]
    ;

    // Get seven bits of input data. Returns false if there is no input left.
    function get7() {
        if (curIndex >= rawData.length) return false;
        // Shift, mask, unshift to get first part.
        var firstByte = getByte(curIndex);
        var firstPart = ((254 >>> curBit) & firstByte) << curBit;
        // Align it to a seven bit chunk.
        firstPart >>= 1;
        // Check if we need to go to the next byte for more bits.
        curBit += 7;
        if (curBit < 8) return firstPart; // Do not need next byte.
        curBit -= 8;
        curIndex++;
        // Now we want bits [0..curBit] of the next byte if it exists.
        if (curIndex >= rawData.length) return firstPart;
        var secondByte = getByte(curIndex);
        var secondPart = ((0xFF00 >>> curBit) & secondByte) & 0xFF;
        // Align it.
        secondPart >>= 8 - curBit;
        return firstPart | secondPart;
    }

    while(true) {
        // Grab 7 bits.
        var bits = get7();
        if (bits === false) break;
        var illegalIndex = kIllegals.indexOf(bits);
        if (illegalIndex != -1) {
            var b1 = 194, b2 = 128;
            b1 |= (7 & illegalIndex) << 2;
            // See if there are any input bits after the illegal sequence.
            var nextBits = get7();
            if (nextBits === false) {
                header |= kShortened;
            } else {
                // Push first bit onto first byte, remaining 6 onto second.
                var firstBit = (nextBits & 64) > 0 ? 1 : 0;
                b1 |= firstBit;
                b2 |= nextBits & 63;
            }
            outData.push(b1);
            outData.push(b2);
        } else {
            outData.push(bits);
        }
        
    }
    // Add header byte to front.
    outData.unshift(header);
    return (new TextDecoder()).decode(new Uint8Array(outData));
}

/**
 * Decodes base-122 encoded data back to the original data.
 * @param {Uint8Array|Buffer|String} rawData - The data to be decoded. This can be a Uint8Array
 * or Buffer with raw data bytes or a string of bytes (i.e. the type of argument to btoa())
 * @returns {Array} The data in a regular array representing byte values.
 */
function decode(base122Data) {
    var strData = base122Data
    , decoded = []
    , decodedIndex = 0
    , curByte = 0
    , bitOfByte = 0
    , header = strData.charCodeAt(0)
    ;

    function push7(byte) {
        byte <<= 1;
        // Align this byte to offset for current byte.
        curByte |= (byte >>> bitOfByte);
        bitOfByte += 7;
        if (bitOfByte >= 8) {
            decoded.push(curByte);
            bitOfByte -= 8;
            // Now, take the remainder, left shift by what has been taken.
            curByte = (byte << (7 - bitOfByte)) & 255;
        }
    }
    
    for (let i = 1; i < strData.length; i++) {
        let c = strData.charCodeAt(i);
        // Check if this is a two-byte character.
        if (c > 127) {
            // Note, the charCodeAt will give the codePoint, thus
            // 0b110xxxxx 0b10yyyyyy will give => xxxxxyyyyyy
            push7(kIllegals[(c >>> 8) & 7]); // 7 = 0b111.
            // Push the remainder if this is not the last character or if the header says to.
            // 64 = 0b01000000, is the flag of the header bit.
            if (i != strData.length - 1 || !(header & 64)) push7(c & 127);
        } else {
            // One byte characters can be pushed directly.
            push7(c);
        }
    }
    return decoded;
}
</script>

Setup

var kSize = 10000;
  var bytes = new Uint8Array(kSize);
  for (var i = 0; i < kSize; i++) bytes[i] = (Math.floor(Math.random() * 256));
  var str = String.fromCharCode(...bytes);
  var b64Str = btoa(str);
  var b122Str = encode(str);

Test runner

Ready to run.

Testing in
TestOps/sec
Base-122 Decode
decode(b122Str)
ready
Base-64 Decode
atob(b64Str);
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.

  • Revision 1: published by Kevin Albertson on