What is an elegant way to take a javascript array, order by the frequency of the values, and then filter for uniques?
So,
["apples", "oranges", "oranges", "oranges", "bananas", "bananas", "oranges"]
becomes
["oranges, "bananas", "apples"]
The sort() method returns a reference to the original array, so mutating the returned array will mutate the original array as well.
The Fisher Yates Method The above example, array.sort(), is not accurate, it will favor some numbers over the others. The most popular correct method, is called the Fisher Yates shuffle, and was introduced in data science as early as 1938!
Compute the frequency of each item first.
{
apples: 1,
oranges: 4,
bananas: 2
}
Then create an array from this frequency object which will also remove the duplicates.
["apples", "oranges", "bananas"]
Now sort this array in descending order using the frequency map we created earlier.
function compareFrequency(a, b) {
return frequency[b] - frequency[a];
}
array.sort(compareFrequency);
Here's the entire source (using the newly introduced Array functions in ECMA 5) and combining the de-duplication and frequency map generation steps,
function sortByFrequency(array) {
var frequency = {};
array.forEach(function(value) { frequency[value] = 0; });
var uniques = array.filter(function(value) {
return ++frequency[value] == 1;
});
return uniques.sort(function(a, b) {
return frequency[b] - frequency[a];
});
}
Same as above using the regular array iteration.
function sortByFrequencyAndRemoveDuplicates(array) {
var frequency = {}, value;
// compute frequencies of each value
for(var i = 0; i < array.length; i++) {
value = array[i];
if(value in frequency) {
frequency[value]++;
}
else {
frequency[value] = 1;
}
}
// make array from the frequency object to de-duplicate
var uniques = [];
for(value in frequency) {
uniques.push(value);
}
// sort the uniques array in descending order by frequency
function compareFrequency(a, b) {
return frequency[b] - frequency[a];
}
return uniques.sort(compareFrequency);
}
// returns most frequent to least frequent
Array.prototype.byCount= function(){
var itm, a= [], L= this.length, o= {};
for(var i= 0; i<L; i++){
itm= this[i];
if(!itm) continue;
if(o[itm]== undefined) o[itm]= 1;
else ++o[itm];
}
for(var p in o) a[a.length]= p;
return a.sort(function(a, b){
return o[b]-o[a];
});
}
//test
var A= ["apples","oranges","oranges","oranges","bananas","bananas","oranges"];
A.byCount()
/* returned value: (Array) oranges,bananas,apples */
I was actually working on this at the same time - the solution I came up with is pretty much identical to Anurag's.
However I thought it might be worth sharing as I had a slightly different way of calculating the frequency of occurrences, using the ternary operator and checking if the value has been counted yet in a slightly different way.
function sortByFrequencyAndFilter(myArray)
{
var newArray = [];
var freq = {};
//Count Frequency of Occurances
var i=myArray.length-1;
for (var i;i>-1;i--)
{
var value = myArray[i];
freq[value]==null?freq[value]=1:freq[value]++;
}
//Create Array of Filtered Values
for (var value in freq)
{
newArray.push(value);
}
//Define Sort Function and Return Sorted Results
function compareFreq(a,b)
{
return freq[b]-freq[a];
}
return newArray.sort(compareFreq);
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With