Levenshtein Distance

JavaScript performance comparison

Revision 5 of this test case created by TheSpanishInquisition

Info

*** Reupload of Revision 4, Updated by TheSpanishInquisition ***

Accidentally forgot to add functional optimisation. Its about 5 times faster than prototype optimisation. I cannot figure out why.

*** Fork of Revision 2, Updated by TheSpanishInquisition ***

Changed test data. Each test is now executed against exactly the same data set in the name of fairness. Added fully optimised adaptations of James Westgate's implementation. Removed old Damerau–Levenshtein Distance test CompareLD2 (see this jsPerf for comparison of Damerau–Levenshtein Distance, this is for Levenshtein Distance only).

Wiki Rosetta Code

*** Original ***

Prompted by a Stack Overflow question, this will compare implementations of the Levenshtein distance algorithm.

By adding a limit and optimising comparison operators, I hope to further increase the performance of the Westgate implementation.

Preparation code

 
<script>
Benchmark.prototype.setup = function() {
    Math.randomInt = function(min, max) {
        return Math.floor(Math.random() * (max - min + 1)) + min;
    };
   
    // Generate random strings.
    window.dataSet = [];
    window.dataSetLength = 50;
    for (var i = 0; i < dataSetLength; i++) {
        dataSet[i] = Math.randomInt(999, 999999).toString();
    }
   
    // Original.
    window.CompareLD1 = function(min, split) {
      try {
        split = !("0")[0];
      } catch (i) {
        split = true;
      }
      return function(a, b) {
        if (a == b) return 0;
        if (!a.length || !b.length) return b.length || a.length;
        if (split) {
          a = a.split("");
          b = b.split("");
        }
        var len1 = a.length + 1,
            len2 = b.length + 1,
            I = 0,
            i = 0,
            d = [
            [0]
            ],
            c, j, J;
        while (++i < len2) d[0][i] = i;
        i = 0;
        while (++i < len1) {
          J = j = 0;
          c = a[I];
          d[i] = [i];
          while (++j < len2) {
            d[i][j] = min(d[I][j] + 1, d[i][J] + 1, d[I][J] + (c != b[J]));
            ++J;
          }++I;
        }
        return d[len1 - 1][len2 - 1];
      }
    }(Math.min, false);
   
   
   
    function minn(a, b, c) {
      if (a > b) a = b;
      if (a < c) return a;
      return c;
    }
   
    // Stoianovici's.
    window.CompareLD3 = function(S1, S2, limit) {
      if (S1 == null || S2 == null || typeof(S1) != "string" || typeof(S2) != "string") return 9999999;
      if (limit == null) limit = 9999999;
      if (Math.abs(S1.length - S2.length) > limit) return 9999999;
      S1 = S1.toLowerCase();
      S2 = S2.toLowerCase();
      if (S1 == S2) return 0;
      var n = S1.length;
      var m = S2.length;
      dist = new Array(new Array(m + 2), new Array(m + 2));
      current = 0;
      for (i = 0; i <= m + 1; i++) {
        dist[1][i] = i;
      }
      for (i = 1; i <= n; i++) {
        ok = 0;
        dist[current][0] = i;
        if (i - limit >= 1) dist[current][i - limit - 1] = 9999999;
        for (j = Math.max(i - limit, 1); j <= Math.min(i + limit, m); j++) {
          if (S1.substr(i - 1, 1) == S2.substr(j - 1, 1)) dist[current][j] = dist[1 - current][j - 1];
          else dist[current][j] = minn(dist[1 - current][j - 1], dist[1 - current][j], dist[current][j - 1]) + 1;
          if (dist[current][j] <= limit) ok = 1;
        }
        if (i + limit <= m) {
          dist[current][i + limit + 1] = 9999999;
        }
        if (!ok) return 9999999;
        current = 1 - current;
      }
      if (dist[1 - current][m] > limit) return 9999999;
      return dist[1 - current][m];
    };
   
    // Westgate's.
    window.CompareLD6 = function(s, t, lim) {
      if (!lim) lim = 20;
      if (Math.abs(s.length - t.length) > lim) return lim;
   
      var d = []; //2d matrix
      // Step 1
      var n = s.length,
          m = t.length;
   
      if (n === 0) return m;
      if (m === 0) return n;
   
      var i = n + 1;
      do {
        d[i] = [];
      } while (i--);
   
      // Step 2
      i = n + 1, j = m + 1;
      do {
        d[i][0] = i;
      } while (i--);
      do {
        d[0][j] = j;
      } while (j--);
   
      // Step 3
      for (var i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);
   
        // Step 4
        for (var j = 1; j <= m; j++) {
   
          //Check the jagged ld total so far
          if (i === j && d[i][j] > 4) return n;
   
          var t_j = t.charAt(j - 1);
          var cost = (s_i === t_j) ? 0 : 1; // Step 5
          //Calculate the minimum
          var mi = d[i - 1][j] + 1;
          var b = d[i][j - 1] + 1;
          var c = d[i - 1][j - 1] + cost;
   
          if (b < mi) mi = b;
          if (c < mi) mi = c;
   
          d[i][j] = mi; // Step 6
        }
      }
   
      // Step 7
      return d[n][m];
    }
   
   
   
    // TheSpanishInquisition
   
    // Cache the matrix. Note this implementation is limited to
    // strings of 64 char or less. This could be altered to update
    // dynamically, or a larger value could be used.
    var matrix = [];
    for (var i = 0; i < 64; i++) {
        matrix[i] = [i];
        matrix[i].length = 64;
    }
    for (var i = 0; i < 64; i++) {
        matrix[0][i] = i;
    }
   
    // Functional implementation of Levenshtein Distance.
    String.levenshteinDistance = function(__this, that, limit) {
        var thisLength = __this.length, thatLength = that.length;
   
        if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;
        if (thisLength === 0) return thatLength;
        if (thatLength === 0) return thisLength;
   
        // Calculate matrix.
        var this_i, that_j, cost, min, t;
        for (i = 1; i <= thisLength; ++i) {
                this_i = __this[i-1];
   
                // Step 4
                for (j = 1; j <= thatLength; ++j) {
                        // Check the jagged ld total so far
                        if (i === j && matrix[i][j] > 4) return thisLength;
   
                        that_j = that[j-1];
                        cost = (this_i === that_j) ? 0 : 1; // Step 5
                        // Calculate the minimum (much faster than Math.min(...)).
                        min    = matrix[i - 1][j    ] + 1;                                              // Deletion.
                        if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.
                        if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.
   
                        matrix[i][j] = min;     // Update matrix.
                }
        }
   
        return matrix[thisLength][thatLength];
    };
   
    // Prototype implementation of Levenshtein Distance.
    String.prototype.levenshteinDistance = function(that, limit) {
        var thisLength = this.length, thatLength = that.length;
   
        if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;
        if (thisLength === 0) return thatLength;
        if (thatLength === 0) return thisLength;
   
        // Calculate matrix.
        var this_i, that_j, cost, min, t;
        for (i = 1; i <= thisLength; ++i) {
                this_i = this[i-1];
   
                // Step 4
                for (j = 1; j <= thatLength; ++j) {
                        // Check the jagged ld total so far
                        if (i === j && matrix[i][j] > 4) return thisLength;
   
                        that_j = that[j-1];
                        cost = (this_i === that_j) ? 0 : 1; // Step 5
                        // Calculate the minimum (much faster than Math.min(...)).
                        min    = matrix[i - 1][j    ] + 1;                                              // Deletion.
                        if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.
                        if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.
   
                        matrix[i][j] = min;     // Update matrix.
                }
        }
   
        return matrix[thisLength][thatLength];
    };
   
};
</script>

Test runner

Warning! For accurate results, please disable Firebug before running the tests. (Why?)

Java applet disabled.

Testing in unknown unknown
Test Ops/sec
Original [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD1(dataSet[x], dataSet[y]);
    }
}
pending…
Westgate's [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD6(dataSet[x], dataSet[y]);
    }
}
pending…
Stoianovici's [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD3(dataSet[x], dataSet[y]);
    }
}
pending…
TheSpanishInquisition's [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        String.levenshteinDistance(dataSet[x], dataSet[y]);
    }
}
pending…
TheSpanishInquisition's [ prototype ] [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        dataSet[x].levenshteinDistance(dataSet[y]);
    }
}
pending…
Westgate's [ limit of 5 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD6(dataSet[x], dataSet[y], 5);
    }
}
pending…
Stoianovici's [ limit of 5 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD3(dataSet[x], dataSet[y], 5);
    }
}
 
pending…
TheSpanishInquisition's [ limit of 5 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        String.levenshteinDistance(dataSet[x], dataSet[y], 5);
    }
}
pending…
TheSpanishInquisition's [ prototype ] [ limit of 5 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        dataSet[x].levenshteinDistance(dataSet[y], 5);
    }
}
pending…
Westgate's [ limit of 10 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD6(dataSet[x], dataSet[y], 10);
    }
}
pending…
Stoianovici's [ limit of 10 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD3(dataSet[x], dataSet[y], 10);
    }
}
 
pending…
TheSpanishInquisition's [ limit of 10 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        String.levenshteinDistance(dataSet[x], dataSet[y], 10);
    }
}
pending…
TheSpanishInquisition's [ prototype ] [ limit of 10 ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        dataSet[x].levenshteinDistance(dataSet[y], 10);
    }
}
pending…

Compare results of other browsers

Revisions

You can edit these tests or add even more tests to this page by appending /edit to the URL. Here’s a list of current revisions for this page:

1 comment

TheSpanishInquisition (revision owner) commented :

Why is the prototype version so slow in comparison to the functional version? I don't know for sure.

I assume it is related specifically to String.prototype and how browsers handle strings internally. All attempts to get the speed of String.levenshteinDistance in String.prototype.levenshteinDistance have failed.

If you need a String.prototype method in your code, just wrap the functional version.

Add a comment