Damerau-Levenshtein Distance

JavaScript performance comparison

Test case created by TheSpanishInquisition and last updated

Info

Damerau-Levenshtein Distance is a small modification to Levenshtein Distance to include transpositions. In JavaScript the following line of code (as implemented in Westgate's CompareLD2) adds Damerau's transposition and can only be written in a couple of ways:

if (i > 1 && j > 1 && s_i === t.charAt(j - 2) && s.charAt(i - 2) === t_j) {
    d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
}

This performance test compares Westgate's original Damerau-Levenshtein Distance and potential optimisations of the Damerau transposition.

Wiki

See this jsPerf for comparison of Levenshtein Distance, this is for Damerau–Levenshtein Distance only.

Preparation code

 
<script>
Benchmark.prototype.setup = function() {
    Math.randomInt = function(min, max) {
        return Math.floor(Math.random() * (max - min + 1)) + min;
    };
   
    // Generate random strings.
    window.dataSet = [];
    window.dataSetLength = 50;
    for (var i = 0; i < dataSetLength; i++) {
        dataSet[i] = Math.randomInt(999, 999999).toString();
    }
   
   
   
    // Westgate's
    window.CompareLD2 = function(s, t) {
      var d = [];
      var n = s.length;
      var m = t.length;
      if (n === 0) return m;
      if (m === 0) return n;
      for (var i = n; i >= 0; i--) d[i] = [];
      for (var i = n; i >= 0; i--) d[i][0] = i;
      for (var j = m; j >= 0; j--) d[0][j] = j;
      for (var i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);
        for (var j = 1; j <= m; j++) {
          if (i === j && d[i][j] > 4) return n;
          var t_j = t.charAt(j - 1);
          var cost = (s_i === t_j) ? 0 : 1;
          var mi = d[i - 1][j] + 1;
          var b = d[i][j - 1] + 1;
          var c = d[i - 1][j - 1] + cost;
          if (b < mi) mi = b;
          if (c < mi) mi = c;
          d[i][j] = mi;
          if (i > 1 && j > 1 && s_i === t.charAt(j - 2) && s.charAt(i - 2) === t_j) {
            d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
          }
        }
      }
      return d[n][m];
    };
   
   
   
    // TheSpanishInquisition
   
    // Cache the matrix. Note this implementation is limited to
    // strings of 64 char or less. This could be altered to update
    // dynamically, or a larger value could be used.
    var matrix = [];
    for (var i = 0; i < 64; i++) {
        matrix[i] = [i];
        matrix[i].length = 64;
    }
    for (var i = 0; i < 64; i++) {
        matrix[0][i] = i;
    }
   
    // Functional implementation of Levenshtein Distance.
    String.damerauLevenshteinDistance = function(__this, that, limit) {
        var thisLength = __this.length, thatLength = that.length;
   
        if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;
        if (thisLength === 0) return thatLength;
        if (thatLength === 0) return thisLength;
   
        // Calculate matrix.
        var this_i, that_j, cost, min, t;
        for (i = 1; i <= thisLength; ++i) {
                this_i = __this[i-1];
   
                // Step 4
                for (j = 1; j <= thatLength; ++j) {
                        // Check the jagged ld total so far
                        if (i === j && matrix[i][j] > 4) return thisLength;
   
                        that_j = that[j-1];
                        cost = (this_i === that_j) ? 0 : 1; // Step 5
                        // Calculate the minimum (much faster than Math.min(...)).
                        min    = matrix[i - 1][j    ] + 1;                                              // Deletion.
                        if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.
                        if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.
   
   
                        // Damerau transposition.
                        if (i > 1 && j > 1 && this_i === that[j-2] && this[i-2] === that_j) {
                                matrix[i][j] = Math.min(matrix[i    ][j    ],
                                                        matrix[i - 2][j - 2] + cost);   // Transposition.
                        }
   
                        matrix[i][j] = min;     // Update matrix.
                }
        }
   
        return matrix[thisLength][thatLength];
    };
   
    String.damerauLevenshteinDistance2 = function(__this, that, limit) {
        var thisLength = __this.length, thatLength = that.length;
   
        if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;
        if (thisLength === 0) return thatLength;
        if (thatLength === 0) return thisLength;
   
        // Calculate matrix.
        var this_i, that_j, cost, min, t;
        for (i = 1; i <= thisLength; ++i) {
                this_i = __this[i-1];
   
                // Step 4
                for (j = 1; j <= thatLength; ++j) {
                        // Check the jagged ld total so far
                        if (i === j && matrix[i][j] > 4) return thisLength;
   
                        that_j = that[j-1];
                        cost = (this_i === that_j) ? 0 : 1; // Step 5
                        // Calculate the minimum (much faster than Math.min(...)).
                        min    = matrix[i - 1][j    ] + 1;                                              // Deletion.
                        if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.
                        if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.
   
   
                        // Update matrix.
                        matrix[i][j] = (i > 1 && j > 1 && this_i === that[j-2] && this[i-2] === that_j && (t = matrix[i-2][j-2]+cost) < min) ? t : min; // Transposition.
                }
        }
   
        return matrix[thisLength][thatLength];
    };
};
</script>

Test runner

Warning! For accurate results, please disable Firebug before running the tests. (Why?)

Java applet disabled.

Testing in unknown unknown
Test Ops/sec
Westgate's [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        CompareLD2(dataSet[x], dataSet[y]);
    }
}
pending…
TheSpanishInquisition's [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        String.damerauLevenshteinDistance(dataSet[x], dataSet[y]);
    }
}
pending…
TheSpanishInquisition's [ optimised damerau ] [ no limit ]
for (var x = 0; x < dataSetLength; x++) {
    for (var y = 0; y < dataSetLength; y++) {
        String.damerauLevenshteinDistance2(dataSet[x], dataSet[y]);
    }
}
pending…

Compare results of other browsers

Revisions

You can edit these tests or add even more tests to this page by appending /edit to the URL. Here’s a list of current revisions for this page:

2 comments

Jean-Pierre St-Yves commented :

For TheSpanishInquisition method, is it possible that there were a mixed-matched with the keyword "this" in the line:

matrix[i][j] = (i > 1 && j > 1 && thisi === that[j-2] &amp;&amp; this[i-2] === thatj && (t = matrix[i-2][j-2]+cost) < min) ? t : min; // Transposition.

that should read:

matrix[i][j] = (i > 1 && j > 1 && this_i === that[j-2] && __this[i-2] === that_j && (t = matrix[i-2][j-2]+cost) < min) ? t : min; // Transposition.

Jean-Pierre St-Yves commented :

Is it possible that the code:

if (i === j && matrix[i][j] > 4) return thisLength;

Should be:

if (i === j && matrix[i][j] > limit) return thisLength;

Add a comment