I am looking for a JavaScript function that can compare two strings and return the likeliness that they are alike. I have looked at soundex but that's not really great for multi-word strings or non-names. I am looking for a function like:
function compare(strA,strB){ } compare("Apples","apple") = Some X Percentage.
The function would work with all types of strings, including numbers, multi-word values, and names. Perhaps there's a simple algorithm I could use?
Ultimately none of these served my purpose so I used this:
function compare(c, u) { var incept = false; var ca = c.split(","); u = clean(u); //ca = correct answer array (Collection of all correct answer) //caa = a single correct answer word array (collection of words of a single correct answer) //u = array of user answer words cleaned using custom clean function for (var z = 0; z < ca.length; z++) { caa = $.trim(ca[z]).split(" "); var pc = 0; for (var x = 0; x < caa.length; x++) { for (var y = 0; y < u.length; y++) { if (soundex(u[y]) != null && soundex(caa[x]) != null) { if (soundex(u[y]) == soundex(caa[x])) { pc = pc + 1; } } else { if (u[y].indexOf(caa[x]) > -1) { pc = pc + 1; } } } } if ((pc / caa.length) > 0.5) { return true; } } return false; } // create object listing the SOUNDEX values for each letter // -1 indicates that the letter is not coded, but is used for coding // 0 indicates that the letter is omitted for modern census archives // but acts like -1 for older census archives // 1 is for BFPV // 2 is for CGJKQSXZ // 3 is for DT // 4 is for L // 5 is for MN my home state // 6 is for R function makesoundex() { this.a = -1 this.b = 1 this.c = 2 this.d = 3 this.e = -1 this.f = 1 this.g = 2 this.h = 0 this.i = -1 this.j = 2 this.k = 2 this.l = 4 this.m = 5 this.n = 5 this.o = -1 this.p = 1 this.q = 2 this.r = 6 this.s = 2 this.t = 3 this.u = -1 this.v = 1 this.w = 0 this.x = 2 this.y = -1 this.z = 2 } var sndx = new makesoundex() // check to see that the input is valid function isSurname(name) { if (name == "" || name == null) { return false } else { for (var i = 0; i < name.length; i++) { var letter = name.charAt(i) if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) { return false } } } return true } // Collapse out directly adjacent sounds // 1. Assume that surname.length>=1 // 2. Assume that surname contains only lowercase letters function collapse(surname) { if (surname.length == 1) { return surname } var right = collapse(surname.substring(1, surname.length)) if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) { return surname.charAt(0) + right.substring(1, right.length) } return surname.charAt(0) + right } // Collapse out directly adjacent sounds using the new National Archives method // 1. Assume that surname.length>=1 // 2. Assume that surname contains only lowercase letters // 3. H and W are completely ignored function omit(surname) { if (surname.length == 1) { return surname } var right = omit(surname.substring(1, surname.length)) if (!sndx[right.charAt(0)]) { return surname.charAt(0) + right.substring(1, right.length) } return surname.charAt(0) + right } // Output the coded sequence function output_sequence(seq) { var output = seq.charAt(0).toUpperCase() // Retain first letter output += "-" // Separate letter with a dash var stage2 = seq.substring(1, seq.length) var count = 0 for (var i = 0; i < stage2.length && count < 3; i++) { if (sndx[stage2.charAt(i)] > 0) { output += sndx[stage2.charAt(i)] count++ } } for (; count < 3; count++) { output += "0" } return output } // Compute the SOUNDEX code for the surname function soundex(value) { if (!isSurname(value)) { return null } var stage1 = collapse(value.toLowerCase()) //form.result.value=output_sequence(stage1); var stage1 = omit(value.toLowerCase()) var stage2 = collapse(stage1) return output_sequence(stage2); } function clean(u) { var u = u.replace(/\,/g, ""); u = u.toLowerCase().split(" "); var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"]; var n = []; for (var y = 0; y < u.length; y++) { var test = false; for (var z = 0; z < cw.length; z++) { if (u[y] != "" && u[y] != cw[z]) { test = true; break; } } if (test) { //Don't use & or $ in comparison var val = u[y].replace("$", "").replace("&", ""); n.push(val); } } return n; }
Firstly, you are safe to compare strings that contain characters from Basic Multilangual Plane (including the ASCII characters) using regular comparison operators === , == or utility function Object.is() . Both str1 and str2 contain ASCII characters, so you can safely compare them using comparison operators.
In JavaScript, strings can be compared based on their “value”, “characters case”, “length”, or “alphabetically” order: To compare strings based on their values and characters case, use the “Strict Equality Operator (===)”.
To compare two strings in JavaScript, use the localeCompare() method. The method returns 0 if both the strings are equal, -1 if string 1 is sorted before string 2 and 1 if string 2 is sorted before string 1.
Use the strict equality operator (===) to check if two strings are equal in TypeScript, e.g. if (str1 === str2) {} . The strict equality operator returns true if the strings are equal, otherwise false is returned.
Here's an answer based on Levenshtein distance https://en.wikipedia.org/wiki/Levenshtein_distance
function similarity(s1, s2) { var longer = s1; var shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } var longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); }
For calculating edit distance
function editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); var costs = new Array(); for (var i = 0; i <= s1.length; i++) { var lastValue = i; for (var j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { var newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }
Usage
similarity('Stack Overflow','Stack Ovrflw')
returns 0.8571428571428571
You can play with it below:
function checkSimilarity(){ var str1 = document.getElementById("lhsInput").value; var str2 = document.getElementById("rhsInput").value; document.getElementById("output").innerHTML = similarity(str1, str2); } function similarity(s1, s2) { var longer = s1; var shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } var longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); } function editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); var costs = new Array(); for (var i = 0; i <= s1.length; i++) { var lastValue = i; for (var j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { var newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }
<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div> <div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div> <div>Match: <span id="output">No Input</span></div>
Using this library for string similarity worked like a charm for me!
Here's the Example -
var similarity = stringSimilarity.compareTwoStrings("Apples","apple"); // => 0.88
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With