Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

What is the most efficient way to implement GroupBy in Javascript?

I am trying to implement a GroupBy method with these parameters

function GroupBy(keySelector, elementSelector, comparer)
{
    // keySelector = function(e) { return e.ID }
    // elementSelector = function(e) { return e.Name }
    // comparer = { Equals: function(a,b) { return a==b }, GetHashCode:... }
}

However I don't know an efficient way to implement it.

I created a jsPerf test with linq.js and a method I created which doesn't use a comparer and only work on flat types. (Output test here)

Other libraries such as underscore and Lo-Dash doesn't take a comparer parameter. So their implementations are irrelevant.

My key could be a class, so I need something to determine if TKey is the same in different instances.

So basically what I am trying to do is copy C# Linq GroupBy behavior documented here.

Sample input:

var arrComplex =
[
    { N: { Value: 10 }, Name: "Foo" },
    { N: { Value: 10 }, Name: "Bar" },
    { N: { Value: 20 }, Name: "Foo" },
    { N: { Value: 20 }, Name: "Bar" }
];

Sample output (or something like this):

[
    {
       "Key": {"Value":10},
       "Elements":["Foo","Bar"]
    },
    {
        "Key": {"Value":20},
        "Elements":["Foo","Bar"]
    }
] 

Any ideas on how to implement it?

Bounty

For the bounty I would like you take in consideration that:

  • The key could be an object
  • Two objects can be equal if some property is equal
  • It should be as fast as or faster than existing solutions
  • The result can be an array or object, doesn't matter as long as I can get elements grouped by a key

Well, I expect a complete answer.

like image 594
BrunoLM Avatar asked Dec 17 '13 11:12

BrunoLM


People also ask

How to do groupBy in JavaScript?

groupBy(dog => { return dog. breed; }); We can use the new groupBy function by calling it on the array instance, just like using a map , filter , or reduce function. groupBy takes a callback function which is called for each element in the array in ascending order.

How to group by using reduce in JavaScript?

The reduce method takes in an array and returns a single value, which is what we want for our groupBy method. And then we need to add the current value to this array and move on to the next iteration of the reduce by returning the object that we just created.

How to group array elements in JavaScript?

The group() method groups the elements of the calling array according to the string values returned by a provided testing function. The returned object has separate properties for each group, containing arrays with the elements in the group. This method should be used when group names can be represented by strings.

What is reduce function in JavaScript?

JavaScript Array reduce() The reduce() method executes a reducer function for array element. The reduce() method returns a single value: the function's accumulated result. The reduce() method does not execute the function for empty array elements. The reduce() method does not change the original array.


1 Answers

I used your jsperf as a reference, for some of the finer points of script. I really, really liked your 'hash' code, so I totally stole it. Mine uses a different method to generate the string used to make the hash, which seems to be a little faster, which increases the performance, according to the 'browserscope' charts. I include in my test a 'too much recursion' proof of concept to show that it has recursion protection, like JSON.stringify and .toSource().

My jsfiddle shows that the code returns the format you need. My jsperf seems to indicate that it outperforms the posted solution. I also included the linq.js solution, but it performs pretty bad in FireFox for me. It works comparably in Safari, Chrome, an IE, but not faster than mine, except in IE. I even tried it on my phone, and still I have the same performance difference. I have personally tested it in the latest versions of all browsers in a side-by-side to the posted solution, and mine out performs by around 40% across each of them. What are everyone's thoughts?

Here is my code:

var arr = [
  { N: 10, Name: "Foo" },
  { N: 10, Name: "Bar" },
  { N: 20, Name: "Foo" },
  { N: 20, Name: "Bar" }
];

var poc = { name:'blah', obj:{} };
poc.obj = poc;
var arrComplex = [
  { N: { Value: 10, TooMuchRecursionProofPOC:poc }, Name: "Foo" },
  { N: { Value: 10, TooMuchRecursionProofPOC:poc }, Name: "Bar" },
  { N: { Value: 20, TooMuchRecursionProofPOC:poc }, Name: "Foo" },
  { N: { Value: 20, TooMuchRecursionProofPOC:poc }, Name: "Bar" }
];

var eArr = Enumerable.From(arr);
var eArrComplex = Enumerable.From(arrComplex);

function setup_hashers() {
  // recursion protection idea
  var rp = '_rp'+(Math.random()*10000000);

  function tstr() {
    var out = '', i = '';
    if (this[rp]) { this[rp] = undefined; return out; }
    for (i in this)
      if (i != rp && this.hasOwnProperty(i))
        out += this[i] instanceof Object
          ? ((this[rp] = true) && this[i] != this && !this[i][rp] ? tstr.call(this[i]) : '')
          : (this[i].toString || tstr).call(this[i]);
    return out;
  };

  Number.prototype.GetHashCode = function() {
    return this.valueOf();
  };

  Object.prototype.GetHashCode = function() {
    var s = (this instanceof Object ? tstr : this.toString || tstr).call(this),
      h = 0;
    if (s.length)
      for (var i = 0; i < s.length; i++)
        h = ((h << 5) - h) + s.charCodeAt(i);

    return h;
  };
}

function group_by(a, keyFunc, valFunc, comp, as_array) {
  if (!a.length) return as_array ? [] : {};

  var keyFunc = keyFunc || function (e) { return e; },
      valFunc = valFunc || function (e) { return e; };
  var comp = comp || {
      Equals: function (a, b) { return a == b; },
      Hash: function (e) { return e.GetHashCode(); }
  };


  var hashs = {}, key = '', hash = '';
  for (var i = 0; i < a.length; i++) {
    key = keyFunc(a[i]);
    hash = comp.Hash(key);
    if (typeof hashs[hash] != 'undefined')
      hash = comp.Equals(key, hashs[hash].Key)
        ? hash
        : hash + '-' + i;
    hashs[hash] = hashs[hash] || { Key: key, Elements: [] };
    hashs[hash].Elements.push(valFunc(a[i]));
  }

  if (as_array) {
    var out = [], j = '', keys = Object.keys(hashs);
    for (var j = 0; j < keys.length; j++)
      out.push(hashs[keys[j]]);
    return out;
  }

  return hashs;
};

function group_by_control(a, keyFunc, valFunc) {
  if (!a.length) return as_array ? [] : {};

  var keyFunc = keyFunc || function (e) { return e; },
      valFunc = valFunc || function (e) { return e; };

  var hashs = {}, key = '', hash = '';
  for (var i = 0; i < a.length; i++) {
    key = keyFunc(a[i]);
    hashs[key] = hashs[key] || { Key: key, Elements: [] };
    hashs[key].Elements.push(valFunc(a[i]));
  }

  var out = [], j = '', keys = Object.keys(hashs);
  for (var j = 0; j < keys.length; j++)
  out.push(hashs[keys[j]]);
  return out;
};

setup_hashers();

console.log(group_by_control(
  arr,
  function(e) { return e.N },
  function(e) { return e.Name }
));

console.log(group_by(
  arrComplex, function(e) { return e.N; },
  function(e) { return e.Name; },
  {
    Equals: function(a, b) { return a.Value == b.Value },
    Hash: function(e) { return e.GetHashCode(); }
  }
));

console.log(group_by(
  arrComplex, function(e) { return e.N; },
  function(e) { return e.Name; },
  {
    Equals: function(a, b) { return a.Value == b.Value },
    Hash: function(e) { return e.GetHashCode(); }
  },
  true
));
like image 91
loushou Avatar answered Oct 19 '22 02:10

loushou