Levenshtein Distance (v5)

Revision 5 of this benchmark created by TheSpanishInquisition on


Description

*** Reupload of Revision 4, Updated by TheSpanishInquisition ***

Accidentally forgot to add functional optimisation. Its about 5 times faster than prototype optimisation. I cannot figure out why.

*** Fork of Revision 2, Updated by TheSpanishInquisition ***

Changed test data. Each test is now executed against exactly the same data set in the name of fairness. Added fully optimised adaptations of James Westgate's implementation. Removed old Damerau–Levenshtein Distance test CompareLD2 (see this jsPerf for comparison of Damerau–Levenshtein Distance, this is for Levenshtein Distance only).

Wiki Rosetta Code

*** Original ***

Prompted by a Stack Overflow question, this will compare implementations of the Levenshtein distance algorithm.

By adding a limit and optimising comparison operators, I hope to further increase the performance of the Westgate implementation.

Setup

Math.randomInt = function(min, max) {
        return Math.floor(Math.random() * (max - min + 1)) + min;
    };
    
    // Generate random strings.
    window.dataSet = [];
    window.dataSetLength = 50;
    for (var i = 0; i < dataSetLength; i++) {
        dataSet[i] = Math.randomInt(999, 999999).toString();
    }
    
    // Original.
    window.CompareLD1 = function(min, split) {
      try {
        split = !("0")[0];
      } catch (i) {
        split = true;
      }
      return function(a, b) {
        if (a == b) return 0;
        if (!a.length || !b.length) return b.length || a.length;
        if (split) {
          a = a.split("");
          b = b.split("");
        }
        var len1 = a.length + 1,
            len2 = b.length + 1,
            I = 0,
            i = 0,
            d = [
            [0]
            ],
            c, j, J;
        while (++i < len2) d[0][i] = i;
        i = 0;
        while (++i < len1) {
          J = j = 0;
          c = a[I];
          d[i] = [i];
          while (++j < len2) {
            d[i][j] = min(d[I][j] + 1, d[i][J] + 1, d[I][J] + (c != b[J]));
            ++J;
          }++I;
        }
        return d[len1 - 1][len2 - 1];
      }
    }(Math.min, false);
    
    
    
    function minn(a, b, c) {
      if (a > b) a = b;
      if (a < c) return a;
      return c;
    }
    
    // Stoianovici's.
    window.CompareLD3 = function(S1, S2, limit) {
      if (S1 == null || S2 == null || typeof(S1) != "string" || typeof(S2) != "string") return 9999999;
      if (limit == null) limit = 9999999;
      if (Math.abs(S1.length - S2.length) > limit) return 9999999;
      S1 = S1.toLowerCase();
      S2 = S2.toLowerCase();
      if (S1 == S2) return 0;
      var n = S1.length;
      var m = S2.length;
      dist = new Array(new Array(m + 2), new Array(m + 2));
      current = 0;
      for (i = 0; i <= m + 1; i++) {
        dist[1][i] = i;
      }
      for (i = 1; i <= n; i++) {
        ok = 0;
        dist[current][0] = i;
        if (i - limit >= 1) dist[current][i - limit - 1] = 9999999;
        for (j = Math.max(i - limit, 1); j <= Math.min(i + limit, m); j++) {
          if (S1.substr(i - 1, 1) == S2.substr(j - 1, 1)) dist[current][j] = dist[1 - current][j - 1];
          else dist[current][j] = minn(dist[1 - current][j - 1], dist[1 - current][j], dist[current][j - 1]) + 1;
          if (dist[current][j] <= limit) ok = 1;
        }
        if (i + limit <= m) {
          dist[current][i + limit + 1] = 9999999;
        }
        if (!ok) return 9999999;
        current = 1 - current;
      }
      if (dist[1 - current][m] > limit) return 9999999;
      return dist[1 - current][m];
    };
    
    // Westgate's.
    window.CompareLD6 = function(s, t, lim) {
      if (!lim) lim = 20;
      if (Math.abs(s.length - t.length) > lim) return lim;
    
      var d = []; //2d matrix
      // Step 1
      var n = s.length,
          m = t.length;
    
      if (n === 0) return m;
      if (m === 0) return n;
    
      var i = n + 1;
      do {
        d[i] = [];
      } while (i--);
    
      // Step 2
      i = n + 1, j = m + 1;
      do {
        d[i][0] = i;
      } while (i--);
      do {
        d[0][j] = j;
      } while (j--);
    
      // Step 3
      for (var i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);
    
        // Step 4
        for (var j = 1; j <= m; j++) {
    
          //Check the jagged ld total so far
          if (i === j && d[i][j] > 4) return n;
    
          var t_j = t.charAt(j - 1);
          var cost = (s_i === t_j) ? 0 : 1; // Step 5
          //Calculate the minimum
          var mi = d[i - 1][j] + 1;
          var b = d[i][j - 1] + 1;
          var c = d[i - 1][j - 1] + cost;
    
          if (b < mi) mi = b;
          if (c < mi) mi = c;
    
          d[i][j] = mi; // Step 6
        }
      }
    
      // Step 7
      return d[n][m];
    }
    
    
    
    // TheSpanishInquisition
    
    // Cache the matrix. Note this implementation is limited to
    // strings of 64 char or less. This could be altered to update
    // dynamically, or a larger value could be used.
    var matrix = [];
    for (var i = 0; i < 64; i++) {
        matrix[i] = [i];
        matrix[i].length = 64;
    }
    for (var i = 0; i < 64; i++) {
        matrix[0][i] = i;
    }
    
    // Functional implementation of Levenshtein Distance.
    String.levenshteinDistance = function(__this, that, limit) {
        var thisLength = __this.length, thatLength = that.length;
    
        if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;
        if (thisLength === 0) return thatLength;
        if (thatLength === 0) return thisLength;
    
        // Calculate matrix.
        var this_i, that_j, cost, min, t;
        for (i = 1; i <= thisLength; ++i) {
                this_i = __this[i-1];
    
                // Step 4
                for (j = 1; j <= thatLength; ++j) {
                        // Check the jagged ld total so far
                        if (i === j && matrix[i][j] > 4) return thisLength;
    
                        that_j = that[j-1];
                        cost = (this_i === that_j) ? 0 : 1; // Step 5
                        // Calculate the minimum (much faster than Math.min(...)).
                        min    = matrix[i - 1][j    ] + 1;                                              // Deletion.
                        if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.
                        if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.
    
                        matrix[i][j] = min;     // Update matrix.
                }
        }
    
        return matrix[thisLength][thatLength];
    };
    
    // Prototype implementation of Levenshtein Distance.
    String.prototype.levenshteinDistance = function(that, limit) {
        var thisLength = this.length, thatLength = that.length;
    
        if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;
        if (thisLength === 0) return thatLength;
        if (thatLength === 0) return thisLength;
    
        // Calculate matrix.
        var this_i, that_j, cost, min, t;
        for (i = 1; i <= thisLength; ++i) {
                this_i = this[i-1];
    
                // Step 4
                for (j = 1; j <= thatLength; ++j) {
                        // Check the jagged ld total so far
                        if (i === j && matrix[i][j] > 4) return thisLength;
    
                        that_j = that[j-1];
                        cost = (this_i === that_j) ? 0 : 1; // Step 5
                        // Calculate the minimum (much faster than Math.min(...)).
                        min    = matrix[i - 1][j    ] + 1;                                              // Deletion.
                        if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.
                        if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.
    
                        matrix[i][j] = min;     // Update matrix.
                }
        }
    
        return matrix[thisLength][thatLength];
    };

Test runner

Ready to run.

Testing in
TestOps/sec
Original [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD1(dataSet[x], dataSet[y]);
    }
}
ready
Westgate's [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD6(dataSet[x], dataSet[y]);
    }
}
ready
Stoianovici's [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD3(dataSet[x], dataSet[y]);
    }
}
ready
TheSpanishInquisition's [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        String.levenshteinDistance(dataSet[x], dataSet[y]);
    }
}
ready
TheSpanishInquisition's [ prototype ] [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        dataSet[x].levenshteinDistance(dataSet[y]);
    }
}
ready
Westgate's [ limit of 5 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD6(dataSet[x], dataSet[y], 5);
    }
}
ready
Stoianovici's [ limit of 5 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD3(dataSet[x], dataSet[y], 5);
    }
}
 
ready
TheSpanishInquisition's [ limit of 5 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        String.levenshteinDistance(dataSet[x], dataSet[y], 5);
    }
}
ready
TheSpanishInquisition's [ prototype ] [ limit of 5 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        dataSet[x].levenshteinDistance(dataSet[y], 5);
    }
}
ready
Westgate's [ limit of 10 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD6(dataSet[x], dataSet[y], 10);
    }
}
ready
Stoianovici's [ limit of 10 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD3(dataSet[x], dataSet[y], 10);
    }
}
 
ready
TheSpanishInquisition's [ limit of 10 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        String.levenshteinDistance(dataSet[x], dataSet[y], 10);
    }
}
ready
TheSpanishInquisition's [ prototype ] [ limit of 10 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        dataSet[x].levenshteinDistance(dataSet[y], 10);
    }
}
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.