Damerau-Levenshtein Distance

JavaScript performance comparison

Test case created by TheSpanishInquisition and last updated

Info

Damerau-Levenshtein Distance is a small modification to Levenshtein Distance to include transpositions. In JavaScript the following line of code (as implemented in Westgate's CompareLD2) adds Damerau's transposition and can only be written in a couple of ways:

``if (i > 1 && j > 1 && s_i === t.charAt(j - 2) && s.charAt(i - 2) === t_j) {    d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);}``

This performance test compares Westgate's original Damerau-Levenshtein Distance and potential optimisations of the Damerau transposition.

Wiki

See this jsPerf for comparison of Levenshtein Distance, this is for Damerau–Levenshtein Distance only.

Preparation code

`` <script>  Benchmark.prototype.setup = function() {    Math.randomInt = function(min, max) {        return Math.floor(Math.random() * (max - min + 1)) + min;    };        // Generate random strings.    window.dataSet = [];    window.dataSetLength = 50;    for (var i = 0; i < dataSetLength; i++) {        dataSet[i] = Math.randomInt(999, 999999).toString();    }                // Westgate's    window.CompareLD2 = function(s, t) {      var d = [];      var n = s.length;      var m = t.length;      if (n === 0) return m;      if (m === 0) return n;      for (var i = n; i >= 0; i--) d[i] = [];      for (var i = n; i >= 0; i--) d[i][0] = i;      for (var j = m; j >= 0; j--) d[0][j] = j;      for (var i = 1; i <= n; i++) {        var s_i = s.charAt(i - 1);        for (var j = 1; j <= m; j++) {          if (i === j && d[i][j] > 4) return n;          var t_j = t.charAt(j - 1);          var cost = (s_i === t_j) ? 0 : 1;          var mi = d[i - 1][j] + 1;          var b = d[i][j - 1] + 1;          var c = d[i - 1][j - 1] + cost;          if (b < mi) mi = b;          if (c < mi) mi = c;          d[i][j] = mi;          if (i > 1 && j > 1 && s_i === t.charAt(j - 2) && s.charAt(i - 2) === t_j) {            d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);          }        }      }      return d[n][m];    };                // TheSpanishInquisition        // Cache the matrix. Note this implementation is limited to    // strings of 64 char or less. This could be altered to update    // dynamically, or a larger value could be used.    var matrix = [];    for (var i = 0; i < 64; i++) {        matrix[i] = [i];        matrix[i].length = 64;    }    for (var i = 0; i < 64; i++) {        matrix[0][i] = i;    }        // Functional implementation of Levenshtein Distance.    String.damerauLevenshteinDistance = function(__this, that, limit) {        var thisLength = __this.length, thatLength = that.length;            if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;        if (thisLength === 0) return thatLength;        if (thatLength === 0) return thisLength;            // Calculate matrix.        var this_i, that_j, cost, min, t;        for (i = 1; i <= thisLength; ++i) {                this_i = __this[i-1];                    // Step 4                for (j = 1; j <= thatLength; ++j) {                        // Check the jagged ld total so far                        if (i === j && matrix[i][j] > 4) return thisLength;                            that_j = that[j-1];                        cost = (this_i === that_j) ? 0 : 1; // Step 5                        // Calculate the minimum (much faster than Math.min(...)).                        min    = matrix[i - 1][j    ] + 1;                                              // Deletion.                        if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.                        if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.                                // Damerau transposition.                        if (i > 1 && j > 1 && this_i === that[j-2] && this[i-2] === that_j) {                                matrix[i][j] = Math.min(matrix[i    ][j    ],                                                        matrix[i - 2][j - 2] + cost);   // Transposition.                        }                            matrix[i][j] = min;     // Update matrix.                }        }            return matrix[thisLength][thatLength];    };        String.damerauLevenshteinDistance2 = function(__this, that, limit) {        var thisLength = __this.length, thatLength = that.length;            if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;        if (thisLength === 0) return thatLength;        if (thatLength === 0) return thisLength;            // Calculate matrix.        var this_i, that_j, cost, min, t;        for (i = 1; i <= thisLength; ++i) {                this_i = __this[i-1];                    // Step 4                for (j = 1; j <= thatLength; ++j) {                        // Check the jagged ld total so far                        if (i === j && matrix[i][j] > 4) return thisLength;                            that_j = that[j-1];                        cost = (this_i === that_j) ? 0 : 1; // Step 5                        // Calculate the minimum (much faster than Math.min(...)).                        min    = matrix[i - 1][j    ] + 1;                                              // Deletion.                        if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.                        if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.                                // Update matrix.                        matrix[i][j] = (i > 1 && j > 1 && this_i === that[j-2] && this[i-2] === that_j && (t = matrix[i-2][j-2]+cost) < min) ? t : min; // Transposition.                }        }            return matrix[thisLength][thatLength];    };  };</script>``

Test runner

Warning! For accurate results, please disable Firebug before running the tests. (Why?)

Java applet disabled.

Testing in unknown unknown
Test Ops/sec
Westgate's [ no limit ]
``for (var x = 0; x < dataSetLength; x++) {    for (var y = 0; y < dataSetLength; y++) {        CompareLD2(dataSet[x], dataSet[y]);    }}``
pending…
TheSpanishInquisition's [ no limit ]
``for (var x = 0; x < dataSetLength; x++) {    for (var y = 0; y < dataSetLength; y++) {        String.damerauLevenshteinDistance(dataSet[x], dataSet[y]);    }}``
pending…
TheSpanishInquisition's [ optimised damerau ] [ no limit ]
``for (var x = 0; x < dataSetLength; x++) {    for (var y = 0; y < dataSetLength; y++) {        String.damerauLevenshteinDistance2(dataSet[x], dataSet[y]);    }}``
pending…

Revisions

You can edit these tests or add even more tests to this page by appending `/edit` to the URL. Here’s a list of current revisions for this page:

• Revision 2: published
• Revision 3: published
• Revision 4: published
• Revision 5: published
• Revision 6: published

Jean-Pierre St-Yves commented :

For TheSpanishInquisition method, is it possible that there were a mixed-matched with the keyword "this" in the line:

matrix[i][j] = (i > 1 && j > 1 && thisi === that[j-2] &amp;&amp; this[i-2] === thatj && (t = matrix[i-2][j-2]+cost) < min) ? t : min; // Transposition.

matrix[i][j] = (i > 1 && j > 1 && this_i === that[j-2] && __this[i-2] === that_j && (t = matrix[i-2][j-2]+cost) < min) ? t : min; // Transposition.

Jean-Pierre St-Yves commented :

Is it possible that the code:

if (i === j && matrix[i][j] > 4) return thisLength;

Should be:

if (i === j && matrix[i][j] > limit) return thisLength;

Comment form temporarily disabled.