Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Compare Strings Javascript Return %of Likely

I am looking for a JavaScript function that can compare two strings and return the likeliness that they are alike. I have looked at soundex but that's not really great for multi-word strings or non-names. I am looking for a function like:

    function compare(strA,strB){          }          compare("Apples","apple") = Some X Percentage. 

The function would work with all types of strings, including numbers, multi-word values, and names. Perhaps there's a simple algorithm I could use?

Ultimately none of these served my purpose so I used this:

     function compare(c, u) {             var incept = false;             var ca = c.split(",");             u = clean(u);             //ca = correct answer array (Collection of all correct answer)             //caa = a single correct answer word array (collection of words of a single correct answer)             //u = array of user answer words cleaned using custom clean function             for (var z = 0; z < ca.length; z++) {                 caa = $.trim(ca[z]).split(" ");                 var pc = 0;                 for (var x = 0; x < caa.length; x++) {                     for (var y = 0; y < u.length; y++) {                         if (soundex(u[y]) != null && soundex(caa[x]) != null) {                             if (soundex(u[y]) == soundex(caa[x])) {                                 pc = pc + 1;                             }                         }                         else {                             if (u[y].indexOf(caa[x]) > -1) {                                 pc = pc + 1;                             }                         }                     }                 }                 if ((pc / caa.length) > 0.5) {                     return true;                 }             }             return false;         }                  // create object listing the SOUNDEX values for each letter         // -1 indicates that the letter is not coded, but is used for coding         //  0 indicates that the letter is omitted for modern census archives         //                              but acts like -1 for older census archives         //  1 is for BFPV         //  2 is for CGJKQSXZ         //  3 is for DT         //  4 is for L         //  5 is for MN my home state         //  6 is for R         function makesoundex() {             this.a = -1             this.b = 1             this.c = 2             this.d = 3             this.e = -1             this.f = 1             this.g = 2             this.h = 0             this.i = -1             this.j = 2             this.k = 2             this.l = 4             this.m = 5             this.n = 5             this.o = -1             this.p = 1             this.q = 2             this.r = 6             this.s = 2             this.t = 3             this.u = -1             this.v = 1             this.w = 0             this.x = 2             this.y = -1             this.z = 2         }                  var sndx = new makesoundex()                  // check to see that the input is valid         function isSurname(name) {             if (name == "" || name == null) {                 return false             } else {                 for (var i = 0; i < name.length; i++) {                     var letter = name.charAt(i)                     if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) {                         return false                     }                 }             }             return true         }                  // Collapse out directly adjacent sounds         // 1. Assume that surname.length>=1         // 2. Assume that surname contains only lowercase letters         function collapse(surname) {             if (surname.length == 1) {                 return surname             }             var right = collapse(surname.substring(1, surname.length))             if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) {                 return surname.charAt(0) + right.substring(1, right.length)             }             return surname.charAt(0) + right         }                  // Collapse out directly adjacent sounds using the new National Archives method         // 1. Assume that surname.length>=1         // 2. Assume that surname contains only lowercase letters         // 3. H and W are completely ignored         function omit(surname) {             if (surname.length == 1) {                 return surname             }             var right = omit(surname.substring(1, surname.length))             if (!sndx[right.charAt(0)]) {                 return surname.charAt(0) + right.substring(1, right.length)             }             return surname.charAt(0) + right         }                  // Output the coded sequence         function output_sequence(seq) {             var output = seq.charAt(0).toUpperCase() // Retain first letter             output += "-" // Separate letter with a dash             var stage2 = seq.substring(1, seq.length)             var count = 0             for (var i = 0; i < stage2.length && count < 3; i++) {                 if (sndx[stage2.charAt(i)] > 0) {                     output += sndx[stage2.charAt(i)]                     count++                 }             }             for (; count < 3; count++) {                 output += "0"             }             return output         }                  // Compute the SOUNDEX code for the surname         function soundex(value) {             if (!isSurname(value)) {                 return null             }             var stage1 = collapse(value.toLowerCase())             //form.result.value=output_sequence(stage1);                      var stage1 = omit(value.toLowerCase())             var stage2 = collapse(stage1)             return output_sequence(stage2);                  }                  function clean(u) {             var u = u.replace(/\,/g, "");             u = u.toLowerCase().split(" ");             var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"];             var n = [];             for (var y = 0; y < u.length; y++) {                 var test = false;                 for (var z = 0; z < cw.length; z++) {                     if (u[y] != "" && u[y] != cw[z]) {                         test = true;                         break;                     }                 }                 if (test) {         //Don't use & or $ in comparison                     var val = u[y].replace("$", "").replace("&", "");                     n.push(val);                 }             }             return n;         } 
like image 776
Brad Ruderman Avatar asked May 06 '12 19:05

Brad Ruderman


People also ask

Is it safe to compare JavaScript strings?

Firstly, you are safe to compare strings that contain characters from Basic Multilangual Plane (including the ASCII characters) using regular comparison operators === , == or utility function Object.is() . Both str1 and str2 contain ASCII characters, so you can safely compare them using comparison operators.

Can we use == to compare strings in JavaScript?

In JavaScript, strings can be compared based on their “value”, “characters case”, “length”, or “alphabetically” order: To compare strings based on their values and characters case, use the “Strict Equality Operator (===)”.

How do I compare two strings in JavaScript?

To compare two strings in JavaScript, use the localeCompare() method. The method returns 0 if both the strings are equal, -1 if string 1 is sorted before string 2 and 1 if string 2 is sorted before string 1.

How do I compare two strings in TypeScript if condition?

Use the strict equality operator (===) to check if two strings are equal in TypeScript, e.g. if (str1 === str2) {} . The strict equality operator returns true if the strings are equal, otherwise false is returned.


2 Answers

Here's an answer based on Levenshtein distance https://en.wikipedia.org/wiki/Levenshtein_distance

function similarity(s1, s2) {   var longer = s1;   var shorter = s2;   if (s1.length < s2.length) {     longer = s2;     shorter = s1;   }   var longerLength = longer.length;   if (longerLength == 0) {     return 1.0;   }   return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); } 

For calculating edit distance

function editDistance(s1, s2) {   s1 = s1.toLowerCase();   s2 = s2.toLowerCase();    var costs = new Array();   for (var i = 0; i <= s1.length; i++) {     var lastValue = i;     for (var j = 0; j <= s2.length; j++) {       if (i == 0)         costs[j] = j;       else {         if (j > 0) {           var newValue = costs[j - 1];           if (s1.charAt(i - 1) != s2.charAt(j - 1))             newValue = Math.min(Math.min(newValue, lastValue),               costs[j]) + 1;           costs[j - 1] = lastValue;           lastValue = newValue;         }       }     }     if (i > 0)       costs[s2.length] = lastValue;   }   return costs[s2.length]; } 

Usage

similarity('Stack Overflow','Stack Ovrflw') 

returns 0.8571428571428571


You can play with it below:

function checkSimilarity(){    var str1 = document.getElementById("lhsInput").value;    var str2 = document.getElementById("rhsInput").value;    document.getElementById("output").innerHTML = similarity(str1, str2);  }    function similarity(s1, s2) {        var longer = s1;        var shorter = s2;        if (s1.length < s2.length) {          longer = s2;          shorter = s1;        }        var longerLength = longer.length;        if (longerLength == 0) {          return 1.0;        }        return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);      }        function editDistance(s1, s2) {        s1 = s1.toLowerCase();        s2 = s2.toLowerCase();          var costs = new Array();        for (var i = 0; i <= s1.length; i++) {          var lastValue = i;          for (var j = 0; j <= s2.length; j++) {            if (i == 0)              costs[j] = j;            else {              if (j > 0) {                var newValue = costs[j - 1];                if (s1.charAt(i - 1) != s2.charAt(j - 1))                  newValue = Math.min(Math.min(newValue, lastValue),                    costs[j]) + 1;                costs[j - 1] = lastValue;                lastValue = newValue;              }            }          }          if (i > 0)            costs[s2.length] = lastValue;        }        return costs[s2.length];      }
<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div>  <div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div>  <div>Match: <span id="output">No Input</span></div>
like image 135
overlord1234 Avatar answered Oct 21 '22 21:10

overlord1234


Using this library for string similarity worked like a charm for me!

Here's the Example -

var similarity = stringSimilarity.compareTwoStrings("Apples","apple");    // => 0.88 
like image 28
Tushar Walzade Avatar answered Oct 21 '22 21:10

Tushar Walzade