Order array of email addresses by frequency counts

Question

I have an array with some duplicates. An efficient algorithm for sorting the array based on the duplicate count e.g.

['d@me.com', 'z@gmail.com', 'e@me.com', 'b@me.com', 'c@me.com', 'z@gmail.com', 'z@gmail.com', 'b@me.com', 'e@me.com']
=>
['z@gmail.com', 'e@me.com', 'b@me.com', 'd@me.com', 'c@me.com']

Because the counts are as followings [3, 2, 2, 1, 1]

I came up with:

const itemCounts = {}
const ordereditems = []
for (let i = 0; i < allitems.length; i++) {
  let item = allitems[i];
  itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
}
const tuples = []
for (let key in itemCounts) {
  tuples.push([key, itemCounts[key]])
}
return tuples.sort((a, b) => a[1] < b[1]).map(x => x[0])

Which is about Θ(3 N + N log N)?

Perhaps something faster with lodash?

Maybe keep a sorted priority queue as I do the counting?

Maybe use a radix sort of some kind?

Koray · Accepted Answer

This is my try, and first attempt to write a snippet :)

var allitems = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'];

function original_code(allitems) {
  const itemCounts = {}
  const ordereditems = []
  for (let i = 0; i < allitems.length; i++) {
    let item = allitems[i];
    itemCounts[item] = itemCounts[item] ? itemCounts[item] + 1 : 1
  }
  const tuples = []
  for (let key in itemCounts) {
    tuples.push([key, itemCounts[key]])
  }
  return tuples.sort((a, b) => a[1] < b[1]).map(x => x[0])
}

function myTry(allitems) {
  var arr;
  const dic = {};
  arr = [];
  for (let i = 0; i < allitems.length; i++) {
    let item = allitems[i];
    if (!dic.hasOwnProperty(item)) {
      dic[item] = 1;
      arr.push(item);
    } else
      dic[item]++;
  }
  arr.sort((a, b) => dic[b] - dic[a]);
  return arr;
}



//measure attempts
{ //original code
  var t0 = performance.now();
  var res;
  for (let j = 0; j < 10000; j++) {
    res = original_code(allitems);
  }
  var t1 = performance.now();
  console.log("original " + (t1 - t0) + " milliseconds.");
  console.log("original " + res);
}

{ //my try
  var t0 = performance.now();
  var res;
  for (let j = 0; j < 10000; j++) {
    res = myTry(allitems);
  }
  var t1 = performance.now();
  console.log("myTry " + (t1 - t0) + " milliseconds.");
  console.log("myTry " + res);
}



{ //my try
  var t0 = performance.now();
  var res;
  for (let j = 0; j < 10000; j++) {
    res = myTry(allitems);
  }
  var t1 = performance.now();
  console.log("myTry2 " + (t1 - t0) + " milliseconds.");
  console.log("myTry2 " + res);
}

{ //original code
  var t0 = performance.now();
  var res;
  for (let j = 0; j < 10000; j++) {
    res = original_code(allitems);
  }
  var t1 = performance.now();
  console.log("original2 " + (t1 - t0) + " milliseconds.");
  console.log("original2 " + res);
}

Edit
I've tried to make a more reliable measurement. If there is a better way, I'd appreciate if you tell me.

+ Changed the sorting.

Hassan Imam · Answer

Use array#reduce to create a frequency object with word and frequency of this word in the array. Then based on the object value sort the values and then take out the all the words.

var arr = ['d', 'a', 'e', 'b', 'c', 'a', 'a', 'b', 'e'],
    frequency = arr.reduce((r,val) => {
      r[val] = r[val] || {val, count : 0};
      r[val].count = r[val].count + 1;
      return r;
    },{});
var result = Object.values(frequency).sort((a,b) => {
  return b.count - a.count;
}).map(({val}) => val);
console.log(result);

Order array of email addresses by frequency counts

Tags:

javascript

algorithm

sorting

david_adler

2 Answers

Koray

Hassan Imam

Recent Activity

Donate For Us

Order array of email addresses by frequency counts

Tags:

javascript

algorithm

sorting

david_adler

2 Answers

Koray

Hassan Imam

Related questions

Recent Activity

Donate For Us