I have an array with some duplicates. An efficient algorithm for sorting the array based on the duplicate count e.g.
['[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]']
=>
['[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]']
Because the counts are as followings [3, 2, 2, 1, 1]
I came up with:
const itemCounts = {}
const ordereditems = []
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
}
const tuples = []
for (let key in itemCounts) {
tuples.push([key, itemCounts[key]])
}
return tuples.sort((a, b) => a[1] < b[1]).map(x => x[0])
Which is about Θ(3 N + N log N)
?
Perhaps something faster with lodash?
Maybe keep a sorted priority queue as I do the counting?
Maybe use a radix sort of some kind?
This is my try, and first attempt to write a snippet :)
var allitems = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'];
function original_code(allitems) {
const itemCounts = {}
const ordereditems = []
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
}
const tuples = []
for (let key in itemCounts) {
tuples.push([key, itemCounts[key]])
}
return tuples.sort((a, b) => a[1] < b[1]).map(x => x[0])
}
function myTry(allitems) {
var arr;
const dic = {};
arr = [];
for (let i = 0; i < allitems.length; i++) {
let item = allitems[i];
if (!dic.hasOwnProperty(item)) {
dic[item] = 1;
arr.push(item);
} else
dic[item]++;
}
arr.sort((a, b) => dic[b] - dic[a]);
return arr;
}
//measure attempts
{ //original code
var t0 = performance.now();
var res;
for (let j = 0; j < 10000; j++) {
res = original_code(allitems);
}
var t1 = performance.now();
console.log("original " + (t1 - t0) + " milliseconds.");
console.log("original " + res);
}
{ //my try
var t0 = performance.now();
var res;
for (let j = 0; j < 10000; j++) {
res = myTry(allitems);
}
var t1 = performance.now();
console.log("myTry " + (t1 - t0) + " milliseconds.");
console.log("myTry " + res);
}
{ //my try
var t0 = performance.now();
var res;
for (let j = 0; j < 10000; j++) {
res = myTry(allitems);
}
var t1 = performance.now();
console.log("myTry2 " + (t1 - t0) + " milliseconds.");
console.log("myTry2 " + res);
}
{ //original code
var t0 = performance.now();
var res;
for (let j = 0; j < 10000; j++) {
res = original_code(allitems);
}
var t1 = performance.now();
console.log("original2 " + (t1 - t0) + " milliseconds.");
console.log("original2 " + res);
}
Edit
I've tried to make a more reliable measurement. If there is a better way, I'd appreciate if you tell me.
+ Changed the sorting.
Use array#reduce
to create a frequency object with word and frequency of this word in the array. Then based on the object value sort the values and then take out the all the words.
var arr = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'],
frequency = arr.reduce((r,val) => {
r[val] = r[val] || {val, count : 0};
r[val].count = r[val].count + 1;
return r;
},{});
var result = Object.values(frequency).sort((a,b) => {
return b.count - a.count;
}).map(({val}) => val);
console.log(result);
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With