utf8encode vs TextEncoder (v6)

Revision 6 of this benchmark created by abral on


Description

The goal is to transform an utf-8 string into a "binary" string. This jsperf shows which is faster :

  • iterating over the string and manually checking each byte
  • converting the utf-8 string into an Uint8Array with the TextEncoder API and generating a binary string from it.

This test requires the TextEncoder/TextDecoder API.

Preparation HTML

<script>
function utf8encodeWithArray(string) {
  var result = [];

  for (var n = 0; n < string.length; n++) {

    var c = string.charCodeAt(n);

    if (c < 128) {
      result.push(String.fromCharCode(c));
    } else if ((c > 127) && (c < 2048)) {
      result.push(String.fromCharCode((c >> 6) | 192));
      result.push(String.fromCharCode((c & 63) | 128));
    } else {
      result.push(String.fromCharCode((c >> 12) | 224));
      result.push(String.fromCharCode(((c >> 6) & 63) | 128));
      result.push(String.fromCharCode((c & 63) | 128));
    }

  }

  return result.join("");
};
function utf8encodeWithString(string) {
  var result = "";

  for (var n = 0; n < string.length; n++) {

    var c = string.charCodeAt(n);

    if (c < 128) {
      result += String.fromCharCode(c);
    } else if ((c > 127) && (c < 2048)) {
      result += String.fromCharCode((c >> 6) | 192);
      result += String.fromCharCode((c & 63) | 128);
    } else {
      result += String.fromCharCode((c >> 12) | 224);
      result += String.fromCharCode(((c >> 6) & 63) | 128);
      result += String.fromCharCode((c & 63) | 128);
    }

  }

  return result;
};

function arrayLikeToString(array) {
  var chunk = 65536;
  var result = [], len = array.length, k = 0;

  while (k < len && chunk > 1) {
    try {
      result.push(String.fromCharCode.apply(null, array.subarray(k, k + chunk)));
      k += chunk;
    } catch (e) {
      chunk = Math.floor(chunk / 2);
    }
  }
  return result.join("");
};


function textencode(string) {
  var u8 = new TextEncoder("utf-8").encode(string);
  return arrayLikeToString(u8);
}

var length = 1024 * 1024;
var u8 = new Uint8Array(length);
for (var i = 0; i < length; i++) {
  u8[i] = i % 255;
}
var bigString = new TextDecoder("utf-8").decode(u8);
</script>

Test runner

Ready to run.

Testing in
TestOps/sec
utf8encodeWithArray
utf8encodeWithArray(bigString);
ready
utf8encodeWithString
utf8encodeWithString(bigString);
ready
textencode
textencode(bigString);
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.