Levenshtein distance

Benchmark created on


Setup

var strA = "VB.NET is not a scripting language u r thinking of vbscript. Thats like saying javascript and java are the same thing.";
  var strB = "VB dotnet isn't a scripting language, you're thinking of VBScript. That's purdy much like sayin' Javascript and Java are one and the same.";
  var strC = "Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files.";
  var strD = "Permission is granted free of charge to anyone wanting a copy of this software and related documentation files.";
  
  function levenshtein_distance(a, b) {
  	if (a.length == 0) { return b.length; }
  	if (b.length == 0) { return a.length; }
  
  	var matrix = [], i, j;
  
  	// increment along the first column of each row
  	for (i = 0; i <= b.length; i++) {
  		matrix[i] = [i];
  	}
  
  	// increment each column in the first row
  	for (j = 0; j <= a.length; j++) {
  		matrix[0][j] = j;
  	}
  
  	// Fill in the rest of the matrix
  	for (i = 1; i <= b.length; i++) {
  		for (j = 1; j <= a.length; j++) {
  			if (b.charAt(i - 1) == a.charAt(j - 1)) {
  				matrix[i][j] = matrix[i - 1][j - 1];
  			} else {
  				matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, // substitution
  					Math.min(matrix[i][j - 1] + 1, // insertion
  						matrix[i - 1][j] + 1)); // deletion
  			}
  		}
  	}
  
  	return matrix[b.length][a.length];
  }
  
  function threeArgMin_levenshtein(a, b) {
  	if (a.length == 0) { return b.length; }
  	if (b.length == 0) { return a.length; }
  
  	var matrix = [], i, j;
  
  	for (i = 0; i <= b.length; i++) { matrix[i] = [i]; }
  	for (j = 0; j <= a.length; j++) { matrix[0][j] = j; }
  
  	// Fill in the rest of the matrix
  	for (i = 1; i <= b.length; i++) {
  		for (j = 1; j <= a.length; j++) {
  			if (b.charAt(i - 1) === a.charAt(j - 1)) {
  				matrix[i][j] = matrix[i - 1][j - 1];
  			} else {
  				matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1);
  			}
  		}
  	}
  	return matrix[b.length][a.length];
  }
  
  function cachedLen_levenshtein(a, b) {
  	var matrix = [], alen = a.length, blen = b.length, i, j;
  	if (alen === 0) { return blen; }
  	if (blen === 0) { return alen; }
  
  	for (i = 0; i <= blen; i++) { matrix[i] = [i]; }
  	for (j = 0; j <= alen; j++) { matrix[0][j] = j; }
  
  	// Fill in the rest of the matrix
  	for (i = 1; i <= blen; i++) {
  		for (j = 1; j <= alen; j++) {
  			if (b.charAt(i - 1) === a.charAt(j - 1)) {
  				matrix[i][j] = matrix[i - 1][j - 1];
  			} else {
  				matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, Math.min(matrix[i][j - 1] + 1, matrix[i - 1][j] + 1));
  			}
  		}
  	}
  	return matrix[blen][alen];
  }
  
  
  function kigiri_levenshtein(a, b) {
  	if (a.length === 0) { return b.length; }
  	if (b.length === 0) { return a.length; }
  	let tmp, i, j, prev, val, row;
  	// swap to save some memory O(min(a,b)) instead of O(a)
  	if (a.length > b.length) {
  		tmp = a;
  		a = b;
  		b = tmp;
  	}
  
  	row = Array(a.length + 1);
  	// init the row
  	for (i = 0; i <= a.length; i++) {
  		row[i] = i;
  	}
  
  	// fill in the rest
  	for (i = 1; i <= b.length; i++) {
  		prev = i;
  		for (j = 1; j <= a.length; j++) {
  			if (b[i - 1] === a[j - 1]) {
  				val = row[j - 1]; // match
  			} else {
  				val = Math.min(row[j - 1] + 1, // substitution
  					Math.min(prev + 1,     // insertion
  						row[j] + 1));  // deletion
  			}
  			row[j - 1] = prev;
  			prev = val;
  		}
  		row[a.length] = prev;
  	}
  	return row[a.length];
  }
  
  function AndreiMackenzie_levenshtein(a, b) {
  	if (a.length === 0) { return b.length; }
  	if (b.length === 0) { return a.length; }
  
  	var matrix = [];
  
  	// increment along the first column of each row
  	var i;
  	for (i = 0; i <= b.length; i++) {
  		matrix[i] = [i];
  	}
  
  	// increment each column in the first row
  	var j;
  	for (j = 0; j <= a.length; j++) {
  		matrix[0][j] = j;
  	}
  
  	// Fill in the rest of the matrix
  	for (i = 1; i <= b.length; i++) {
  		for (j = 1; j <= a.length; j++) {
  			if (b.charAt(i - 1) === a.charAt(j - 1)) {
  				matrix[i][j] = matrix[i - 1][j - 1];
  			} else {
  				matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, // substitution
  					Math.min(matrix[i][j - 1] + 1, // insertion
  						matrix[i - 1][j] + 1)); // deletion
  			}
  		}
  	}
  
  	return matrix[b.length][a.length];
  }
  
  
  function ziemba_levenshtein(a, b) {
  	if (a.length === 0) { return b.length; }
  	if (b.length === 0) { return a.length; }
  	if (a.length > b.length) { var c = a; a = b; b = c; }
  
  	var i, j, prev, val, alen = a.length, blen = b.length, row = Array(alen + 1);
  	for (i = 0; i <= alen; i++) { row[i] = i; }
  
  	for (i = 1; i <= blen; i++) {
  		prev = i;
  		for (j = 1; j <= alen; j++) {
  			if (b[i - 1] === a[j - 1]) {
  				val = row[j - 1]; // match
  			} else {
  				val = Math.min(row[j - 1] + 1, Math.min(prev + 1, row[j] + 1));
  			}
  			row[j - 1] = prev;
  			prev = val;
  		}
  		row[alen] = prev;
  	}
  	return row[alen];
  }
  
  /* Sacrifices readability in order to use the fewest number of variables by reusing where possible */
  function dziemba_levenshtein(a, b) {
  	var tmp;
  	if (a.length === 0) { return b.length; }
  	if (b.length === 0) { return a.length; }
  	if (a.length > b.length) { tmp = a; a = b; b = tmp; }
  
  	var i, j, res, alen = a.length, blen = b.length, row = Array(alen);
  	for (i = 0; i <= alen; i++) { row[i] = i; }
  
  	for (i = 1; i <= blen; i++) {
  		res = i;
  		for (j = 1; j <= alen; j++) {
  			tmp = row[j - 1];
  			row[j - 1] = res;
  			res = b[i - 1] === a[j - 1] ? tmp : Math.min(tmp + 1, Math.min(res + 1, row[j] + 1));
  		}
  	}
  	return res;
  }
  
  /**Uses the shortest syntax and latest es6 features because shorter & newer = better. Right? */
  function tiny_levenshtein(a, b) {
  	[a, b] = a.length > b.length ? [b, a] : [a, b];
  	if (a.length === 0) return b.length;
  	if (b.length === 0) return a.length;
  
  	var res, tmp, row = Array.from((function* (n = 0) { while (n <= a.length) yield n++; })());
  	for (var i = 1, blen = b.length; i <= blen; i++) {
  		res = i;
  		for (var j = 1, alen = a.length; j <= alen; j++) {
  			tmp = row[j - 1];
  			row[j - 1] = res;
  			res = b[i - 1] === a[j - 1] ? tmp : Math.min(row[j - 1], res, row[j]) + 1;
  		}
  	}
  	return res;
  }
  
  /*Returns a percentage indicating how close the strings matched. Is included because jsPerf doesn't let you remove cases...*/
  function findMatchPercent(a, b) {
  	var tmp;
  	if (a.length === 0) { return b.length; }
  	if (b.length === 0) { return a.length; }
  	if (a.length > b.length) { tmp = a; a = b; b = tmp; }
  
  	var i, j, res, alen = a.length, blen = b.length, row = Array(alen);
  	for (i = 0; i <= alen; i++) { row[i] = i; }
  
  	for (i = 1; i <= blen; i++) {
  		res = i;
  		for (j = 1; j <= alen; j++) {
  			tmp = row[j - 1];
  			row[j - 1] = res;
  			res = b[i - 1] === a[j - 1] ? tmp : Math.min(tmp + 1, Math.min(res + 1, row[j] + 1));
  		}
  	}
  	return (blen - res) / blen;
  }
  
  function ziemba_levenshtein_gen(a, b) {
  	if (a.length === 0) { return b.length; }
  	if (b.length === 0) { return a.length; }
  	if (a.length > b.length) { var c = a; a = b; b = c; }
  
  	var i, j, prev, val, alen = a.length, blen = b.length;
  	var row = Array.from((function* (max, n = 0) { while (n <= max) { yield n++; } })(alen + 1));
  
  	for (i = 1; i <= blen; i++) {
  		prev = i;
  		for (j = 1; j <= alen; j++) {
  			if (b[i - 1] === a[j - 1]) {
  				val = row[j - 1]; // match
  			} else {
  				val = Math.min(row[j - 1] + 1, Math.min(prev + 1, row[j] + 1));
  			}
  			row[j - 1] = prev;
  			prev = val;
  		}
  		row[alen] = prev;
  	}
  	return row[alen];
  }
  
  function ziemba_levenshtein_destruct(strA, strB) {
  	var [a, b] = strA.length > strB.length ? [strB, strA] : [strA, strB];
  	var alen = a.length, blen = b.length;
  	if (alen === 0) { return b.length; }
  	if (blen === 0) { return a.length; }
  
  	var i, j, prev, val, row = Array(alen + 1);
  	for (i = 0; i <= alen; i++) { row[i] = i; }
  
  	for (i = 1; i <= blen; i++) {
  		prev = i;
  		for (j = 1; j <= alen; j++) {
  			if (b[i - 1] === a[j - 1]) {
  				val = row[j - 1]; // match
  			} else {
  				val = Math.min(row[j - 1] + 1, Math.min(prev + 1, row[j] + 1));
  			}
  			row[j - 1] = prev;
  			prev = val;
  		}
  		row[alen] = prev;
  	}
  	return row[alen];
  }

Test runner

Ready to run.

Testing in
TestOps/sec
Standard
levenshtein_distance(strA, strB);
levenshtein_distance(strC, strD);
ready
Standard w/ 3 arg Math.min
threeArgMin_levenshtein(strA, strB);
threeArgMin_levenshtein(strC, strD);
ready
Standard w/ cached Array.lengths
cachedLen_levenshtein(strA, strB);
cachedLen_levenshtein(strC, strD);
ready
AndreiMackenzie_levenshtein
AndreiMackenzie_levenshtein(strA, strB);
AndreiMackenzie_levenshtein(strC, strD);
ready
kigiri_levenshtein
kigiri_levenshtein(strA, strB);
kigiri_levenshtein(strC, strD);
ready
ziemba_levenshtein
ziemba_levenshtein(strA, strB);
ziemba_levenshtein(strC, strD);
ready
dziemba_levenshtein
/* Sacrifices readability in order to use the fewest number of variables by reusing where possible */
dziemba_levenshtein(strA, strB);
dziemba_levenshtein(strC, strD);
ready
ziemba_levenshtein /w generator
/* Curious about the impact of using an ES6 generator function. */
ziemba_levenshtein_gen(strA, strB);
ziemba_levenshtein_gen(strC, strD);
ready
ziemba_levenshtein /w destructuring assignment
/* Curious about the impact of using an ES6 destructuring assignment. */
ziemba_levenshtein_destruct(strA, strB);
ziemba_levenshtein_destruct(strC, strD);
ready
tiny_levenshtein
/**Uses the shortest syntax and latest es6 features because shorter & newer = better. Right? */
tiny_levenshtein(strA, strB);
tiny_levenshtein(strC, strD);
ready
findMatchPercent
/*Returns a percentage indicating how close the strings matched. Is included because jsPerf doesn't let you remove cases...*/
findMatchPercent(strA, strB);
findMatchPercent(strC, strD);
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.