Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Faster way to find an occurrence of 2-Dimensional array of keywords with frequency value

Do I have a faster way to count all frequency of all elements in 2-Dimensional array? Like this sample:

var array = [["a", "b"]["c", "d"]["b", "d"]["c", "a", "b"], ["a", "b", "c", "d"];

My expected result would be an array of objects contain keyword and frequency value.
Like this,

result = [{ keyword: "a",
            frequency: 3
          }, {
            keyword: "b",
            frequency: 4
          }, ... ];

Here is my solution:

function generateData (records) {
  var data = [];
  for (var i = 0; i < records; ++i) {
      data.push(["a", "b", "c", "d", "e"]);
  }
  // some gap to insert data
  setTimeout(function () {
  }, Math.random() * 1000);
  return data;
}

function mine (data) {
  var result = [];
  data.forEach( function (keywords) {
      for (var i = 0, len = keywords.length; i < len; ++i) {
          var pos = result.map( function (x) {
              return x.keyword;
          }).indexOf(keywords[i]);

          if (pos == -1) {
              var newKeyword = {
                  keyword: keywords[i],
                  frequency: 1
              }
              result.push(newKeyword);
          } else { 
              result[pos].frequency += 1;
          }
      }
  });
  return result;
}

var dataset = generateData(50000);

var start = performance.now();
var result = mine(dataset);
var end = performance.now();

console.log(result);
console.log("Total time: " + (end - start) + " milliseconds.");

Does anyone have a faster way to solve this problem? Note: With 2-Dimensional of keywords array (around ~50,000 records).

like image 606
Parn Avatar asked Dec 07 '22 12:12

Parn


2 Answers

You can use .reduce() to get the desired frequency in the form of an object:

let data = [
  ["a", "b"],
  ["c", "d"],
  ["b", "d"],
  ["c", "a", "b"],
  ["a", "b", "c", "d"]
];

let result = [].concat(...data).reduce((r, c) => (r[c] = (r[c] || 0) + 1, r), {});

console.log(result);
like image 68
Mohammad Usman Avatar answered Dec 11 '22 09:12

Mohammad Usman


If this really is a bottleneck and squeezing speed out of the counting is worth code that's not quite as pretty as the functional solutions, you will have a hard time beating for loops in today's javascript engines. In my tests this is about twice as fast as using reduce():

var array = [["a", "b"],["c", "d"],["b", "d"],["c", "a", "b"], ["a", "b", "c", "d"]];

let counts = new Map()
for (let i = 0; i < array.length; i++){
    for (let j = 0; j < array[i].length; j++){
        let n = counts.get(array[i][j]) || 0
        counts.set(array[i][j], n + 1)
    }
}

JSperf Benchmark here

like image 40
Mark Avatar answered Dec 11 '22 10:12

Mark