Tokenize perfomance comparation

Benchmark created on


Setup

var inputTestCases = [ "Нажмите сюда", "Нажмите [сюда]", "$variable", "$click[Нажмите] $here пожалуйста", "[Нажми [сюда] пожалуйста]", "", "[]", "[текст без закрывающей скобки", "Цена: \\$100", "$variable1$variable2" ]

Test runner

Ready to run.

Testing in
TestOps/sec
regexp way
function tokenize(input) { const regex = /\\\$|(\$[a-zA-Z]+)|(\[)|(\])|([^[\]\\$]+)|(.)/g; const tokens = []; let match; while ((match = regex.exec(input)) !== null) { const [fullMatch, varMatch, lbracket, rbracket, text, other] = match; if (fullMatch === '\\$') { tokens.push({ type: 'TEXT', value: '$' }); } else if (varMatch) { tokens.push({ type: 'VAR', value: varMatch.slice(1) }); // remove $ } else if (lbracket) { tokens.push({ type: 'LBRACKET' }); } else if (rbracket) { tokens.push({ type: 'RBRACKET' }); } else if (text) { tokens.push({ type: 'TEXT', value: text }); } else if (other) { tokens.push({ type: 'TEXT', value: other }); } } return tokens; }

inputTestCases.forEach((str) => tokenize(str))
ready
buffer way
"use strict"; function tokenize(input) { const tokens = []; let i = 0; let buffer = ''; function flushText() { if (buffer) { tokens.push({ type: 'TEXT', value: buffer }); buffer = ''; } } while (i < input.length) { const char = input[i]; const nextChar = input[i + 1]; if (char === '\\' && nextChar === '$') { // Экранированный $ buffer += '$'; i += 2; } else if (char === '$' && /^[a-zA-Z]/.test(nextChar || '')) { flushText(); i++; // skip $ let varName = ''; while (i < input.length && /[a-zA-Z]/.test(input[i])) { varName += input[i]; i++; } tokens.push({ type: 'VAR', value: varName }); } else if (char === '[') { flushText(); tokens.push({ type: 'LBRACKET' }); i++; } else if (char === ']') { flushText(); tokens.push({ type: 'RBRACKET' }); i++; } else { buffer += char; i++; } } flushText(); return tokens; }

inputTestCases.forEach((str) => tokenize(str))
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.