UTF-8 encoding (v11)

Revision 11 of this benchmark created on


Preparation HTML

<script>

var alt_text = "šťžľčěďňřůĺŠŤŽĽČĚĎŇŘŮĹłążęćńśźŁĄŻĘĆŃŚŹĂ㪺ŢţščžćđŠČŽĆŐőŰűÄäÖöÜüßабвгдеёжзийклмнопрстуфхцчшчьыъэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯЎўЄєҐґЂЉЊЋЏђљњћџ€ΜπορώναφάωσπασμέναγυαλιάχωρίςναπάθωτίποταÉggetetiðgleránþessaðmeiðamigMogęjeśćszkłoiminieszkodziPotsăănâncsticlășieanumărăneșteЯможуїстишклойвономенінепошкодитьԿրնամապակիուտելևինծիանհանգիստչըներ։მინასვჭამდაარამტკივაमैंकाँचखासकताहूँमुझेउससेकोईपीडानहींहोतीאנייכוללאכולזכוכיתוזהלאמזיקליאיךקעןעסןגלאָזאוןעסטוטמירנישטװײأناقادرعلىأكلالزجاجوهذالاؤلمني私はガラスを食べられます。それは私を傷つけません。ฉันกินกระจกได้แต่มันไม่ทำให้ฉันเจ็บ";

function utf8_encode_1(input) {
  var output = "";
  for (var n = 0; n < input.length; n++) {
    var c = input.charCodeAt(n);
    if (c < 128) {
      output += String.fromCharCode(c);
    } else if ((c > 127) && (c < 2048)) {
      output += String.fromCharCode((c >> 6) | 192);
      output += String.fromCharCode((c & 63) | 128);
    } else {
      output += String.fromCharCode((c >> 12) | 224);
      output += String.fromCharCode(((c >> 6) & 63) | 128);
      output += String.fromCharCode((c & 63) | 128);
    }
  }
  return output;
}

function utf8_encode_2(input) {
  var output = "";
  var i = -1;
  var x, y;
  while (++i < input.length) {
    /* Decode utf-16 surrogate pairs */
    x = input.charCodeAt(i);
    y = i + 1 < input.length ? input.charCodeAt(i + 1) : 0;
    if (0xD800 <= x && x <= 0xDBFF && 0xDC00 <= y && y <= 0xDFFF) {
      x = 0x10000 + ((x & 0x03FF) << 10) + (y & 0x03FF);
      i++;
    }
    /* Encode output as utf-8 */
    if (x <= 0x7F) {
      output += String.fromCharCode(x);
    } else if (x <= 0x7FF) {
      output += String.fromCharCode(0xC0 | ((x >>> 6) & 0x1F));
      output += String.fromCharCode(0x80 | (x & 0x3F));
    } else if (x <= 0xFFFF) {
      output += String.fromCharCode(0xE0 | ((x >>> 12) & 0x0F));
      output += String.fromCharCode(0x80 | ((x >>> 6) & 0x3F));
      output += String.fromCharCode(0x80 | (x & 0x3F));
    } else if (x <= 0x1FFFFF) {
      output += String.fromCharCode(0xF0 | ((x >>> 18) & 0x07));
      output += String.fromCharCode(0x80 | ((x >>> 12) & 0x3F));
      output += String.fromCharCode(0x80 | ((x >>> 6) & 0x3F));
      output += String.fromCharCode(0x80 | (x & 0x3F));
    }
  }
  return output;
}

function utf8_encode_3(str) {
  var arr = [];
  for (var i = 0; i < str.length; i++) {
    var code = str.charCodeAt(i);
    if (0xD800 <= code && code <= 0xDBFF) {
      var hi = code,
        low = str.charCodeAt(i + 1);
      code = ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
      i++;
    }
    if (code <= 127) {
      arr[arr.length] = code;
    } else if (code <= 2047) {
      arr[arr.length] = (code >>> 6) + 0xC0;
      arr[arr.length] = code & 0x3F | 0x80;
    } else if (code <= 65535) {
      arr[arr.length] = (code >>> 12) + 0xE0;
      arr[arr.length] = (code >>> 6 & 0x3F) | 0x80;
      arr[arr.length] = (code & 0x3F) | 0x80;
    } else if (code <= 1114111) {
      arr[arr.length] = (code >>> 18) + 0xF0;
      arr[arr.length] = (code >>> 12 & 0x3F) | 0x80;
      arr[arr.length] = (code >>> 6 & 0x3F) | 0x80;
      arr[arr.length] = (code & 0x3F) | 0x80;
    }
  }
  return String.fromCharCode.apply(String, arr);
}

function utf8_encode_4(input) {
  var output = "";
  for (var i = 0; i < input.length; i++) {
    var x = input.charCodeAt(i);
    if (0xD800 <= x && x <= 0xDBFF) {
      var hi = x,
        low = input.charCodeAt(i + 1);
      x = ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
      i++;
    }
    if (x <= 0x7F) {
      output += String.fromCharCode(x);
    } else if (x <= 0x7FF) {
      output += String.fromCharCode(0xC0 | ((x >>> 6) & 0x1F));
      output += String.fromCharCode(0x80 | (x & 0x3F));
    } else if (x <= 0xFFFF) {
      output += String.fromCharCode(0xE0 | ((x >>> 12) & 0x0F));
      output += String.fromCharCode(0x80 | ((x >>> 6) & 0x3F));
      output += String.fromCharCode(0x80 | (x & 0x3F));
    } else if (x <= 0x1FFFFF) {
      output += String.fromCharCode(0xF0 | ((x >>> 18) & 0x07));
      output += String.fromCharCode(0x80 | ((x >>> 12) & 0x3F));
      output += String.fromCharCode(0x80 | ((x >>> 6) & 0x3F));
      output += String.fromCharCode(0x80 | (x & 0x3F));
    }
  }
  return output;
}

function utf8_encode_5(input) {
  var output = "";
  for (var i = 0; i < input.length; i++) {
    var code = input.charCodeAt(i);
    if (0xD800 <= code && code <= 0xDBFF) {
      var hi = code,
        low = input.charCodeAt(i + 1);
      code = ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
      i++;
    }
    if (code <= 127) {
      output += String.fromCharCode(code);
    } else if (code <= 2047) {
      output += String.fromCharCode((code >>> 6) + 0xC0);
      output += String.fromCharCode((code & 0x3F) | 0x80);
    } else if (code <= 65535) {
      output += String.fromCharCode((code >>> 12) + 0xE0);
      output += String.fromCharCode((code >>> 6 & 0x3F) | 0x80);
      output += String.fromCharCode((code & 0x3F) | 0x80);
    } else if (code <= 1114111) {
      output += String.fromCharCode((code >>> 18) + 0xF0);
      output += String.fromCharCode((code >>> 12 & 0x3F) | 0x80);
      output += String.fromCharCode((code >>> 6 & 0x3F) | 0x80);
      output += String.fromCharCode((code & 0x3F) | 0x80);
    }
  }
  return output;
}

</script>

Test runner

Ready to run.

Testing in
TestOps/sec
encodeURIComponent
unescape(encodeURIComponent(alt_text));
ready
method1
utf8_encode_1(alt_text);
ready
method2
utf8_encode_2(alt_text);
ready
method3
utf8_encode_3(alt_text);
ready
method4
utf8_encode_4(alt_text);
ready
method5
utf8_encode_5(alt_text);
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.