Levenshtein distance (v19)

Revision 19 of this benchmark created on


Description

Prompted by a [Stack Overflow question][http://stackoverflow.com/questions/11919065/sort-an-array-by-the-levenshtein-distance-with-best-performance-in-javascript], this will compare implementations of the Levenshtein distance algorithm.

Setup

// Screen names grabbed from Lady Gaga's twitter followers.
    // http://jsfiddle.net/Jordan/UMKzq/1/
    window.screenNames = ["@deeia2000", "@bryankristanto", "@G_EZ_2012", "@alemayehulakew", "@SandraVeneva", "@celebiyesim", "@dior_rf", "@Stiffalator", "@IzmanMahmudi", "@judith_16", "@jele1199", "@Belenisortiz", "@lablabchi", "@alhamwaleed96", "@Frankie_W88", "@Surinaamse_", "@kotikibarsiki", "@pradeepnkt", "@vanimahajan37", "@mayrarodrigue46", "@OpsMiguel", "@s_sxt", "@ALIMayri24", "@NOT_forFREE", "@MiamiBitchez", "@paul__wayne", "@shinnydun", "@teamo_minhavida", "@Male_Jayerr", "@soniandikha", "@smokedoutsammy", "@indahcynkkeluar", "@Rhea_Scherbert", "@kemassiabdelgan", "@FacescoutZa", "@minhthanhphan", "@LhinaEndarlina", "@F6ame_R", "@geo_luiza", "@tt_hwi", "@ikmngkr_ykh", "@vutruonggiang3", "@rachelconroyxx", "@maychelerryans1", "@bonu58", "@fikirreta", "@amitbhagat17", "@OhliviaEs", "@Zh_Indira", "@akbarr_maulana", "@ReyvellRogado", "@eMxcee", "@maina_annie", "@angelohipolito1", "@luckgas", "@erdigulec", "@RadicBane", "@7co8oki", "@egoses", "@shirleyhemingw1", "@NascimentoKim", "@sirma_h", "@HoneyDanaE", "@ayhanturk60", "@JohnWan64688411", "@kharis_novelia", "@finkbina", "@domele94", "@anayieee", "@basakras", "@LipeSecretos55", "@Nirvana_Kurt_Co", "@putrajelek123", "@JLS_lulu13", "@EnashYaomatu", "@JamieRaymond8", "@Aulia_Fitri5", "@nabilmstrieux", "@ChelMaeAlinsunu", "@AryoJr", "@GreeneChae", "@whatsupardee", "@LarisaMaric", "@DanaeFlanagan", "@ChixDmusicBand", "@masyanya1995_", "@LetiziaCipriani", "@HiaAde", "@JoannaKarai", "@ivonlidya", "@akanchanchan", "@MiaaGotSwagg", "@lorinsh", "@OlurotimiKush", "@tiapav23", "@ganesh02222", "@Rodrigox_18", "@kayhan_Dre", "@1johnsonranch", "@aldidharro", "@meliamurph", "@Himansh21495710", "@gsli05necip", "@bgtmr025", "@925Aoi", "@GalauPangeran", "@ShaynalOranaise", "@tnk_pnpn", "@rizzajoybriza", "@JoshuaJojack", "@ilyascoskun1", "@WanAriff3", "@blondinen0221", "@johnkarlsosa", "@Derbora18", "@TmarYaha", "@Cristiano_88_", "@chloe11235", "@AliARSAL2", "@The_Super_zizo", "@waiyanhtoo13445", "@ime1608", "@marissemendoza", "@Deea182", "@kraravind07", "@cih3315", "@gui_assunao", "@CodiAnnW", "@Perfect_Belinda", "@AchmadHisyam1", "@baldo_cristian", "@ulrikaberg1", "@yuichiro15sai", "@PatrickSlavin2", "@Jean_249", "@LYrelax", "@yoganfb", "@malcolmcroberts", "@KorgPumba", "@AllProsAndCONs", "@khalreaees2", "@laurencool2", "@akkitab3", "@loove1Dloove", "@joshua010499", "@ishtyle79", "@khalid55_55", "@Victori05028144", "@alk35319034", "@Morgane_Mllr", "@linwanhsi", "@nadine_mongare", "@serenacast", "@MustafaMuhamm14", "@EleElena4", "@salvalaz_20", "@khalunaamira", "@iDriver_Crazy", "@Xanni2", "@Afsal_music", "@gjaymie", "@RhenTacAn", "@olsdhol", "@ChiaraRapali", "@Sex_Couture", "@nikolaowczorz", "@Williamsss7", "@junaid26959651", "@EhkaluPoe", "@gene_mika09", "@Bieber_124", "@AngeAngel199", "@MBTek1", "@AllisonKadoche", "@LyubashaN", "@estaloco1", "@rizal_aripin", "@taffytops99", "@JanineFeldkamp", "@ChsnLucie", "@mandeep15734355", "@K13R4N86", "@ShLelo", "@NinaPhung", "@yazeedzuod", "@LesroyMC7", "@Romy1298", "@adolescentes_12", "@geraglezZ", "@rafyrodz0334", "@SilvB20", "@santo_nubia", "@suepur", "@thebigchalenger", "@PalominoLuz1", "@CriisloveMiley", "@ANJEANETTEFERNA", "@ashlyn_ya", "@pastoredoyak", "@HeyyitsMichael", "@ATayden", "@leahmil84388097", "@ArafahSyafiq", "@NetyRamadan", "@kne_tt", "@mario33216140", "@9108810", "@raniabella5", "@Metshiyatz", "@AyudiahW", "@LiekeSays", "@ThebeastNr", "@JospedroRamos", "@Ariane555Silva", "@mirekgrzadziela", "@BubbieBonkers", "@DIBOLL", "@AnicetoHo", "@Faaaaaan1D", "@pophina", "@lexiwalters", "@bahhwhuut", "@fratyalva", "@ANdriANggRo", "@lyndz_xoxo", "@LiziStyless", "@ShindinaO", "@MalAaron", "@j_conradie", "@niffynog87", "@ZyreenReen", "@surewaterdrilli", "@GretaPanarotto", "@yeliz323232", "@nico14210", "@TheOnlyStarAnni", "@do_amigos", "@PachecoHopper84", "@AsiaSzostak", "@missfading", "@LasagnaCatt", "@AnjuliAng", "@PiotrBanan", "@EmersonViegas2", "@SashkaMilenova", "@nurlela82722746", "@Meme_Nathalia", "@Petrus1991", "@dakala3", "@WilleJanina", "@kkjh87it6", "@JNeeeeeeee", "@Unwingedfairy", "@violet9803", "@thaliaortizz", "@JoeySawyers", "@Ishwar73907748", "@1DTurkishGirls", "@ghalib_legendz", "@AliceJ_McIntosh", "@vivienp_", "@hannahadamss_", "@josete2310", "@samuellozano12", "@tygapkhiangte", "@Aditiya_ramdn24", "@BratuAlecs", "@Nussy_Ns", "@RaZenGrg", "@lj_bataller", "@LezMeBe", "@keithleamilitan", "@marjanadeli", "@AsMinaDePollo", "@kinkyminxsykes", "@AldrienAldriene", "@GRasonable", "@dheavidd", "@RandomPplNews", "@Josh_McMurtry", "@Kindajezzy", "@anjalinatz", "@jellydavies", "@milkyleavy", "@yellaboi_", "@sourysoura", "@AmazingSelenita", "@Denisse836", "@deliane_silva", "@tpwhd64a", "@TheMrG88", "@LauraHoggarth", "@Sofia_klark", "@VrAj_15", "@MArwa_SAied", "@asep_rover20", "@AsLizeth", "@AnnaLeeman", "@abbie_scannell", "@Boum_Abbou", "@miamingos123", "@jean_lising", "@chang4sheng1", "@AngelaVilei", "@KSUGossipGirl_", "@calesaric", "@Alejandro55alle", "@aliciadlhg", "@analaura_rg", "@mithawidiyanty", "@elinchixx", "@xyzhasan1998", "@daisybirkenhead", "@injy1998", "@nisan2020", "@qierstenwilson8", "@Smeib", "@DRmahmoud_M", "@harriet1304", "@tahertarmizi2", "@candelatorres6", "@anamari_96", "@KYMANO14", "@quiones_angie", "@rerefadila", "@PolinaJ", "@unikamg", "@c4ptainpsycho", "@Giu_Jonectioner", "@evilefebrianti", "@kirstyjo_bruce", "@SizZilinqPwince", "@Teo_Dora1D", "@DontCallMePanii", "@Xxzxx0Edcrfgr", "@AnsharKurniawan", "@ivanfaruolo", "@_BasitAliRaza", "@NO_De_luza", "@HoranBackup", "@HandFullOfRain", "@SmileNunnin", "@juliana50182518", "@Alinca4", "@andrea_wh", "@valerie33550099", "@allie_m1", "@JosanFranz", "@JackyRomuga", "@mimonzi", "@RismanFansIqbal", "@pauly_arco", "@KakoParker", "@PetterElfving", "@iJelenaGoogle", "@Tweets_Nikita", "@bikersbrother99", "@LisaFerera", "@Dragosh_Man", "@aannstephen", "@JuniorUFCRamos", "@Lewiskelly012", "@them12345", "@ThemTweetsPink", "@irem_justin126", "@emilyfr59168189", "@AoUsanee", "@hobelisk", "@rebeccamcmahon_", "@vicentepacheco7", "@paaooollooo", "@Choi248109", "@Daniell63850634", "@DoaaAboud1", "@wwsohj", "@acap_z", "@FuatFuatarkn", "@Gezimkrosa", "@hannzu43", "@lukman44492601", "@Sonata86460024", "@Leonardoojames", "@benny_benny1999", "@sanchez_diego", "@agastyashindu", "@ElwinataShirazy", "@Eja_Muhammad", "@elephantqw", "@snglesound", "@TEAMNACSsige", "@indahSwann", "@saoapple0112", "@petervhulst", "@NapashaMir", "@RubnChaval", "@LucieTouss", "@lowry_g", "@real_campaign", "@JonazFerrera", "@ChloEmmerson"];
    
    Math.RandomInt = function(min, max) {
      return Math.floor(Math.random() * (max - min + 1)) + min;
    };
    
    window.randomScreenName = function() {
      return screenNames[Math.RandomInt(0, screenNames.length - 1)];
    }
    
    window.CompareLD1 = function(min, split) {
      try {
        split = !("0")[0];
      } catch (i) {
        split = true;
      }
      return function(a, b) {
        if (a == b) return 0;
        if (!a.length || !b.length) return b.length || a.length;
        if (split) {
          a = a.split("");
          b = b.split("");
        }
        var len1 = a.length + 1,
            len2 = b.length + 1,
            I = 0,
            i = 0,
            d = [
            [0]
            ],
            c, j, J;
        while (++i < len2) d[0][i] = i;
        i = 0;
        while (++i < len1) {
          J = j = 0;
          c = a[I];
          d[i] = [i];
          while (++j < len2) {
            d[i][j] = min(d[I][j] + 1, d[i][J] + 1, d[I][J] + (c != b[J]));
            ++J;
          }++I;
        }
        return d[len1 - 1][len2 - 1];
      }
    }(Math.min, false);
    
    
    window.CompareLD2 = function(s, t) {
      var d = [];
      var n = s.length;
      var m = t.length;
      if (n == 0) return m;
      if (m == 0) return n;
      for (var i = n; i >= 0; i--) d[i] = [];
      for (var i = n; i >= 0; i--) d[i][0] = i;
      for (var j = m; j >= 0; j--) d[0][j] = j;
      for (var i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);
        for (var j = 1; j <= m; j++) {
          if (i == j && d[i][j] > 4) return n;
          var t_j = t.charAt(j - 1);
          var cost = (s_i == t_j) ? 0 : 1;
          var mi = d[i - 1][j] + 1;
          var b = d[i][j - 1] + 1;
          var c = d[i - 1][j - 1] + cost;
          if (b < mi) mi = b;
          if (c < mi) mi = c;
          d[i][j] = mi;
          if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
            d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
          }
        }
      }
      return d[n][m];
    };
    
    
    function minn(a, b, c) {
      if (a > b) a = b;
      if (a < c) return a;
      return c;
    }
    
    window.CompareLD3 = function(S1, S2, limit) {
      if (S1 == null || S2 == null || typeof(S1) != "string" || typeof(S2) != "string") return 9999999;
      if (limit == null) limit = 9999999;
      if (Math.abs(S1.length - S2.length) > limit) return 9999999;
      S1 = S1.toLowerCase();
      S2 = S2.toLowerCase();
      if (S1 == S2) return 0;
      var n = S1.length;
      var m = S2.length;
      dist = new Array(new Array(m + 2), new Array(m + 2));
      current = 0;
      for (i = 0; i <= m + 1; i++) {
        dist[1][i] = i;
      }
      for (i = 1; i <= n; i++) {
        ok = 0;
        dist[current][0] = i;
        if (i - limit >= 1) dist[current][i - limit - 1] = 9999999;
        for (j = Math.max(i - limit, 1); j <= Math.min(i + limit, m); j++) {
          if (S1.substr(i - 1, 1) == S2.substr(j - 1, 1)) dist[current][j] = dist[1 - current][j - 1];
          else dist[current][j] = minn(dist[1 - current][j - 1], dist[1 - current][j], dist[current][j - 1]) + 1;
          if (dist[current][j] <= limit) ok = 1;
        }
        if (i + limit <= m) {
          dist[current][i + limit + 1] = 9999999;
        }
        if (!ok) return 9999999;
        current = 1 - current;
      }
      if (dist[1 - current][m] > limit) return 9999999;
      return dist[1 - current][m];
    };
    
    window.CompareLD6 = function(s, t, lim) {
      if (lim == null) lim = 9999999;
      if (Math.abs(s.length - t.length) > lim) return 9999999;
    
      var d = []; //2d matrix
      // Step 1
      var n = s.length,
          m = t.length;
    
      if (n == 0) return m;
      if (m == 0) return n;
    
      var i = n + 1;
      do {
        d[i] = [];
      } while (i--);
    
      // Step 2
      i = n + 1, j = m + 1;
      do {
        d[i][0] = i;
      } while (i--);
      do {
        d[0][j] = j;
      } while (j--);
    
      // Step 3
      for (var i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);
    
        // Step 4
        for (var j = 1; j <= m; j++) {
    
          //Check the jagged ld total so far
          if (i == j && d[i][j] > 4) return n;
    
          var t_j = t.charAt(j - 1);
          var cost = (s_i == t_j) ? 0 : 1; // Step 5
          //Calculate the minimum
          var mi = d[i - 1][j] + 1;
          var b = d[i][j - 1] + 1;
          var c = d[i - 1][j - 1] + cost;
    
          if (b < mi) mi = b;
          if (c < mi) mi = c;
    
          d[i][j] = mi; // Step 6
        }
      }
    
      // Step 7
      return d[n][m];
    }
    
    // from http://andrew.hedges.name/experiments/levenshtein/levenshtein.js
    window.CompareLD7 = function(a, b) {
      var cost;
      
      var m = a.length;
      var n = b.length;
      
      // make sure a.length >= b.length to use O(min(n,m)) space, whatever that is
      if (m < n) {
        var c=a;a=b;b=c;
        var o=m;m=n;n=o;
      }
      
      var r = new Array();
      r[0] = new Array();
      for (var c = 0; c < n+1; c++) {
        r[0][c] = c;
      }
      
      for (var i = 1; i < m+1; i++) {
        r[i] = new Array();
        r[i][0] = i;
        for (var j = 1; j < n+1; j++) {
          cost = (a.charAt(i-1) == b.charAt(j-1))? 0: 1;
          r[i][j] = minimator(r[i-1][j]+1,r[i][j-1]+1,r[i-1][j-1]+cost);
        }
      }
      
      return r[m][n];
    }
    
    // from http://andrew.hedges.name/experiments/levenshtein/levenshtein.js
    window.CompareLD7 = function(a, b) {
      var cost;
      
      var m = a.length;
      var n = b.length;
      
      // make sure a.length >= b.length to use O(min(n,m)) space, whatever that is
      if (m < n) {
        var c=a;a=b;b=c;
        var o=m;m=n;n=o;
      }
      
      var r = new Array();
      r[0] = new Array();
      for (var c = 0; c < n+1; c++) {
        r[0][c] = c;
      }
      
      for (var i = 1; i < m+1; i++) {
        r[i] = new Array();
        r[i][0] = i;
        for (var j = 1; j < n+1; j++) {
          cost = (a.charAt(i-1) == b.charAt(j-1))? 0: 1;
          r[i][j] = minimator(r[i-1][j]+1,r[i][j-1]+1,r[i-1][j-1]+cost);
        }
      }
      
      return r[m][n];
    }
    
    window.CompareLD8 = (function() {
            var row2 = [];
            return function(s1, s2) {
              if (s1 === s2) {
                return 0;
              } else {
                var s1_len = s1.length, s2_len = s2.length;
                if (s1_len && s2_len) {
                  var i1 = 0, i2 = 0, a, b, c, c2, row = row2;
                  while (i1 < s1_len)
                    row[i1] = ++i1;
                  while (i2 < s2_len) {
                    c2 = s2.charCodeAt(i2);
                    a = i2;
                    ++i2;
                    b = i2;
                    for (i1 = 0; i1 < s1_len; ++i1) {
                      c = a + (s1.charCodeAt(i1) !== c2 ? 1 : 0);
                      a = row[i1];
                      b = b < a ? (b < c ? b + 1 : c) : (a < c ? a + 1 : c);
                      row[i1] = b;
                    }
                  }
                  return b;
                } else {
                  return s1_len + s2_len;
                }
              }
            };
          })();
    
    window.CompareLD9 = function(a, b) {
      if (a == b) return 0;
    
      var aLen = a.length, bLen = b.length;
    
      if (!aLen) return bLen;
      if (!bLen) return aLen;
    
      var len = aLen + 1,
          v0 = new Array(len),
          v1 = new Array(len),
          c2, min, tmp,
          i = 0,
          j = 0;
    
      while(i < len) v0[i] = i++;
    
      while(j < bLen) {
        v1[0] = j + 1;
        c2 = b.charAt(j++);
        i = 0;
    
        while(i < aLen) {
          min = v0[i] - (a.charAt(i) == c2 ? 1 : 0);
          if (v1[i] < min) min = v1[i];
          if (v0[++i] < min) min = v0[i];
          v1[i] = min + 1;
        }
    
        tmp = v0; v0 = v1; v1 = tmp;
      }
      return v0[aLen];
    }
    
    // return the smallest of the three values passed in
    minimator = function(x,y,z) {
      if (x < y && x < z) return x;
      if (y < x && y < z) return y;
      return z;
    }

Test runner

Ready to run.

Testing in
TestOps/sec
Original implementation
CompareLD1(randomScreenName(), randomScreenName());
ready
Westgate's implementation
CompareLD2(randomScreenName(), randomScreenName());
ready
Stoianovici's implementation (no limit)
CompareLD3(randomScreenName(), randomScreenName());
ready
Stoianovici's implementation (limit of 10)
CompareLD3(randomScreenName(), randomScreenName(), 10);
ready
Stoianovici's implementation (limit of 5)
CompareLD3(randomScreenName(), randomScreenName(), 5);
ready
Westgate's implementation (sans Damerau)
CompareLD6(randomScreenName(), randomScreenName());
ready
Andrew Hedges implementation (http://andrew.hedges.name/experiments/levenshtein/levenshtein.js)
CompareLD7(randomScreenName(), randomScreenName());
ready
Marco de Wit's Levenshtein
CompareLD8(randomScreenName(), randomScreenName());
ready
Another implementation
CompareLD9(randomScreenName(), randomScreenName());
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.