Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

dc.js create dataTable with min max avg values for three columns

Attempting to create a d3/dc/xfilter dataTable with the min, max and average values for 3 of the columns in the sample data. Been struggling for hours but unable to understand how to integrate the reduceAdd, reduceRemove, reduceInitial functions into the dataTable to create the three necessary rows.

Desired output will look something like this:

------------------------------------------
|  Value   |  Cars  |  Bikes  |  Trucks  |
------------------------------------------
|    Min   |   125  |   310   |    189   |
------------------------------------------
|    Max   |   230  |   445   |    290   |
------------------------------------------
|    Avg   |   178  |   385   |    245   |
------------------------------------------

Also cannot see how to add the first (label) column. I know reduceInitial can return an array (e.g. ['min', 'max', 'avg']) but how to reference the labels from it?

var myCSV = [	
{"shift":"1","date":"01/01/2016/08/00/00","car":"178","truck":"255","bike":"317","moto":"237"},
{"shift":"2","date":"01/01/2016/17/00/00","car":"125","truck":"189","bike":"445","moto":"273"},
{"shift":"3","date":"02/01/2016/08/00/00","car":"140","truck":"219","bike":"328","moto":"412"},
{"shift":"4","date":"02/01/2016/17/00/00","car":"222","truck":"290","bike":"432","moto":"378"},
{"shift":"5","date":"03/01/2016/08/00/00","car":"200","truck":"250","bike":"420","moto":"319"},
{"shift":"6","date":"03/01/2016/17/00/00","car":"230","truck":"220","bike":"310","moto":"413"},
{"shift":"7","date":"04/01/2016/08/00/00","car":"155","truck":"177","bike":"377","moto":"180"},
{"shift":"8","date":"04/01/2016/17/00/00","car":"179","truck":"203","bike":"405","moto":"222"},
{"shift":"9","date":"05/01/2016/08/00/00","car":"208","truck":"185","bike":"360","moto":"195"},
{"shift":"10","date":"05/01/2016/17/00/00","car":"150","truck":"290","bike":"315","moto":"280"},
{"shift":"11","date":"06/01/2016/08/00/00","car":"200","truck":"220","bike":"350","moto":"205"},
{"shift":"12","date":"06/01/2016/17/00/00","car":"230","truck":"170","bike":"390","moto":"400"},
];


dataTable = dc.dataTable('#dataTable');
lc1 = dc.lineChart("#line1");
lc2 = dc.lineChart("#line2");
lc3 = dc.lineChart("#line3");

var dateFormat = d3.time.format("%d/%m/%Y/%H/%M/%S");

myCSV.forEach(function (d) {
	d.date = dateFormat.parse(d.date);
});

myCSV.forEach(function (d) {
	d['car'] = +d['car'];
	d['bike'] = +d['bike'];
	d['moto'] = +d['moto'];
});

//console.log(myCSV);

var facts = crossfilter(myCSV);
var dateDim = facts.dimension(function (d) {return d.date});

var carDim = facts.dimension(function (d) {return d['car']});
var dgCar = dateDim.group().reduceSum(function (d) {return d['car']});

var bikeDim = facts.dimension(function (d) {return d['bike']});
var dgBike = dateDim.group().reduceSum(function (d) {return d['bike']});

var motoDim = facts.dimension(function (d) {return d['moto']});
var dgMoto = dateDim.group().reduceSum(function (d) {return d['moto']});

var minDate = new Date ("2016-01-01T08:00:00.000Z");
var maxDate = new Date ("2016-01-03T17:00:00.000Z");	

var maxY = d3.max(myCSV, function(d) {return d['car']});

function reduceAdd(i,d){ return i+1; }
function reduceRemove(i,d){return i-1; }
function reduceInitial(){ return ['min','max','avg'];}


dataTable
  .width(jsTablWidth)
  .height(400)
  .dimension(dateDim)
  .group( function(d){return '';} )
  .columns([
    {
      label: 'Value',
      format: function(d) { return dateGroup1.reduce(reduceAdd,reduceRemove,reduceInital); }
    },
    {
      label: tSel1.replace(/_/g, " "),
      format: function(d) { return //avg cars ; }
    },
    {
      label: tSel2.replace(/_/g, " "),
      format: function(d) { return //avg bikes ; }
    },
    {
      label: tSel3.replace(/_/g, " "),
      format: function(d) { return //avg moto; }
    }
  ]);


dc.renderAll();
dc.redrawAll();
svg{height:280px;}
<script src="http://cdnjs.cloudflare.com/ajax/libs/d3/3.3.3/d3.min.js"></script>
<script src="http://cdnjs.cloudflare.com/ajax/libs/crossfilter/1.3.1/crossfilter.min.js"></script>
<script src="http://dc-js.github.io/dc.js/js/dc.js"></script>
<link href="http://dc-js.github.io/dc.js/css/dc.css" rel="stylesheet"/>

<svg id="dataTable"></svg>
<svg id="line1"></svg>
<svg id="line2"></svg>
<svg id="line3"></svg>
like image 728
crashwap Avatar asked Feb 27 '17 00:02

crashwap


2 Answers

Okay, hope you're okay with transposing the table across the diagonal, putting the modes of transportation as rows instead of columns. This solution is already pretty wacky without figuring that part out.

sample output

There's really no way to calculate of the min and max except to keep track of all the values. So we're going to use the reductions from the complex reductions example. These actually don't reduce at all, but maintain a sorted array of the filtered rows.

We need a unique key in order to keep the sorted array (so that we remove the correct row. Luckily you have that in the shift field.

So here are those functions, or rather functions that generate reducers given a unique key accessor.

  function groupArrayAdd(keyfn) {
      var bisect = d3.bisector(keyfn);
      return function(elements, item) {
          var pos = bisect.right(elements, keyfn(item));
          elements.splice(pos, 0, item);
          return elements;
      };
  }
  function groupArrayRemove(keyfn) {
      var bisect = d3.bisector(keyfn);
      return function(elements, item) {
          var pos = bisect.left(elements, keyfn(item));
          if(keyfn(elements[pos])===keyfn(item))
              elements.splice(pos, 1);
          return elements;
      };
  }
  function groupArrayInit() {
      return [];
  }

Since these keep references to the entire rows, we only need one group; we'll use more specific accessors when we calculate the metrics below.

Here we want crossfilter.groupAll, which reduces everything to one bin. This is because the rows are not partitioned by any key; every row contributes to all modes of transport:

var filteredRows = facts.groupAll().reduce(
  groupArrayAdd(dc.pluck('shift')),
  groupArrayRemove(dc.pluck('shift')),
  groupArrayInit
);

Now comes the most absurd part. We're going to create the fakest dimension object you ever saw. The important thing is that it's an object with a .bottom() method which dynamically calculates each of the rows:

var fakeDim = {
  bottom: function() {
    return [
      {key: 'Car', value: filteredRows.value(), acc: dc.pluck('car')},
      {key: 'Truck', value: filteredRows.value(), acc: dc.pluck('car')},
      {key: 'Bike', value: filteredRows.value(), acc: dc.pluck('bike')},
      {key: 'Moto', value: filteredRows.value(), acc: dc.pluck('moto')}
    ];
  }
};

Except, wait, that doesn't look like it's doing any calculation at all, just fetching values? And what's that weird acc?

Well we're producing exactly the source data that we need to produce the table rows, and we'll use the format accessors below to actually calculate everything. We'll use the key for the "label column", we'll keep the raw rows in the value member; and we'll supply an accessor acc for computing the metrics.

The data table definition looks like this:

dataTable
  .width(400)
  .height(400)
  .dimension(fakeDim)
  .group( function(d){return '';} )
  .columns([
    {
      label: 'Value',
      format: function(d) { 
        return d.key;
      }
    },
    {
      label: 'Min',
      format: function(d) {
        return d3.min(d.value, d.acc);
      }
    },
    {
      label: 'Max',
      format: function(d) {
        return d3.max(d.value, d.acc);
      }
    },
    {
      label: 'Avg',
      format: function(d) {
        return d3.mean(d.value, d.acc);
      }
    }
  ]);

Here's where all the metrics are finally calculated. We'll have all the rows available, and we have an accessor for each table row. d3-array has handy functions for computing the min, max, and average of an array. Boom, done.

I threw a stacked chart into this fiddle for testing. (I know stacking these values probably makes no sense, it just helps to be able to filter.)

http://jsfiddle.net/gordonwoodhull/g4xqvgvL/21/

Rotating the dataTable

The extra bounty on this reminded me that I never solved the table transposition problem, so I thought I'd take a look, because it's fun. I still think the bounty should go to @SergGr, but here is a solution to transpose the table, based on the categories, the dimension, and the column accessors/formatters.

First, we're going to need the list of categories, so let's structure the categories and field names a little better:

var categories = {  
  Car: 'car',
  Truck: 'truck',
  Bike: 'bike',
  Moto: 'moto'
};

Now the fake dimension can be simplified, because it's generated from this category map:

function fake_dimension(cats) {
  return {
    bottom: function() {
      return Object.keys(cats).map(function(k) {
        return {
          key: k,
          value: filteredRows.value(),
          acc: dc.pluck(cats[k])
        };
      });
    }
  };
}
var fakeDim = fake_dimension(categories);

We need to pull the column definitions out of the chart definition, because we're going to transform them:

var columns = [
  {
    label: 'Value',
    format: function(d) { 
      return d.key;
    }
  },
  {
    label: 'Min',
    format: function(d) {
      return d3.min(d.value, d.acc);
    }
  },
  {
    label: 'Max',
    format: function(d) {
      return d3.max(d.value, d.acc);
    }
  },
  {
    label: 'Avg',
    format: function(d) {
      return d3.mean(d.value, d.acc);
    }
  }
];

Finally, we can write the transposition function:

function transpose_datatable(cats, dim, cols) {
  var cols2 = d3.map(cols, function(col) { // 1
    return col.label;
  });
  return {
    dim: { // 2
      bottom: function() {
        var dall = d3.map(dim.bottom(Infinity), function(row) { // 3
          return row.key;
        });
        return cols.slice(1).map(function(col) { // 4
          var row = {
            label: col.label
          };
          Object.keys(cats).forEach(function(k) {
            row[k] = dall.get(k);
          });
          return row;
        });
      }
    },
    cols: [ // 5
      {
        label: cols[0].label,
        format: function(d) {
          return d.label;
        }
      }
    ].concat(Object.keys(cats).map(function(k) { // 6
      return {
        label: k,
        format: function(d) {
          return cols2.get(d.label).format(d[k]);
        }
      }
    }))
  };
}

var transposed = transpose_datatable(categories, fakeDim, columns)
  1. First, we're going to need a map of the original columns to their definitions, because these will become the rows. We can use a d3.map here, which acts like a well-behaved JavaScript object.
  2. We're going to create a new fake dimension, and a new array of column definitions. The fake dimension only has a .bottom() method, just like the one above.
  3. The definition of .bottom() will need all the original data, indexed by key (category name). So we'll throw that into a d3.map object as well.
  4. Now we can build the fake dimension data. The first column was just the titles (which will now be column headers), so we'll skip that. The data for the row will be the new title (former column label), and a field for each category. The fields are populated with the rows from the original dimension.
  5. The new column definitions need to replace the label, column, and the rest are generated from the category names.
  6. The label for each column is now the category name, and .format() calls the original column's format, fetching the data using the category name.

New screenshot:

screenshot with rotated datatable

like image 97
Gordon Avatar answered Nov 14 '22 01:11

Gordon


This is another solution that produce result closer to the requested ones although with much more code than Gordon's.

Intro

I agree with Gordon that there is no reasonable way to achieve what you want directly with crossfilter. Crossfilter is row oriented and you want to produce multiple rows basing on columns. So the only way is to make some "fake" step. And "fake" step implicitly mean that result will not be updated when the original datasource is changed. I see no way to fix it as crossfilter hides its implementation deatils (such as filterListeners, dataListeners, and removeDataListeners) well enough.

However dc is implemented in such a way that by default after various events all charts are redrawn (because they are all in the same global group). And because of this "fake objects" if properly implemented might be recalculated as well basing on the updated data.

Thus my code contains two implementations for min/max:

  • fast(er) but unsafe if you don't do any additional filter
  • slow(er) but safe in case you want additional filtering

Note that if you used fast but unasfe implementation and do additional filtering, you'll get exceptions and other features might get broken as well.

Code

All the code is available at https://jsfiddle.net/4kcu2ut1/1/. Let's separate it into logical blocks and see them one by one.

First go some helper methods and objects. Each Op object essentially contains methods necessary to pass to reduce + additional optional getOutput if the accumulator contains more data then just result such as the case for avgOp of min/max "safe" ops.

var minOpFast = {
    add: function (acc, el) {
        return Math.min(acc, el);
    },
    remove: function (acc, el) {
        throw new Error("Not supported");
    },
    initial: function () {
        return Number.MAX_VALUE;
    }
};

var maxOpFast = {
    add: function (acc, el) {
        return Math.max(acc, el);
    },
    remove: function (acc, el) {
        throw new Error("Not supported");
    },
    initial: function () {
        return Number.MIN_VALUE;
    }
};


var binarySearch = function (arr, target) {
    var lo = 0;
    var hi = arr.length;
    while (lo < hi) {
        var mid = (lo + hi) >>> 1; // safe int division
        if (arr[mid] === target)
            return mid;
        else if (arr[mid] < target)
            lo = mid + 1;
        else
            hi = mid;
    }
    return lo;
};

var minOpSafe = {
    add: function (acc, el) {
        var index = binarySearch(acc, el);
        acc.splice(index, 0, el);
        return acc;
    },
    remove: function (acc, el) {
        var index = binarySearch(acc, el);
        acc.splice(index, 1);
        return acc;
    },
    initial: function () {
        return [];
    },
    getOutput: function (acc) {
        return acc[0];
    }
};

var maxOpSafe = {
    add: function (acc, el) {
        var index = binarySearch(acc, el);
        acc.splice(index, 0, el);
        return acc;
    },
    remove: function (acc, el) {
        var index = binarySearch(acc, el);
        acc.splice(index, 1);
        return acc;
    },
    initial: function () {
        return [];
    },
    getOutput: function (acc) {
        return acc[acc.length - 1];
    }
};

var avgOp = {
    add: function (acc, el) {
        acc.cnt += 1;
        acc.sum += el;
        acc.avg = acc.sum / acc.cnt;
        return acc;
    },
    remove: function (acc, el) {
        acc.cnt -= 1;
        acc.sum -= el;
        acc.avg = acc.sum / acc.cnt;
        return acc;
    },
    initial: function () {
        return {
            cnt: 0,
            sum: 0,
            avg: 0
        };
    },
    getOutput: function (acc) {
        return acc.avg;
    }
};

Then we prepare source data and specify transformation we want. aggregates is a list of operations from the previous step additionally decorated with key to store temporary data in compound accumulator (it just has to be unique) and label to show in the output. srcKeys contains list of names of properties (all of which must be of the same shape) that will be processed by each operation from the aggregates lits.

var myCSV = [
    {"shift": "1", "date": "01/01/2016/08/00/00", "car": "178", "truck": "255", "bike": "317", "moto": "237"},
    {"shift": "2", "date": "01/01/2016/17/00/00", "car": "125", "truck": "189", "bike": "445", "moto": "273"},
    {"shift": "3", "date": "02/01/2016/08/00/00", "car": "140", "truck": "219", "bike": "328", "moto": "412"},
    {"shift": "4", "date": "02/01/2016/17/00/00", "car": "222", "truck": "290", "bike": "432", "moto": "378"},
    {"shift": "5", "date": "03/01/2016/08/00/00", "car": "200", "truck": "250", "bike": "420", "moto": "319"},
    {"shift": "6", "date": "03/01/2016/17/00/00", "car": "230", "truck": "220", "bike": "310", "moto": "413"},
    {"shift": "7", "date": "04/01/2016/08/00/00", "car": "155", "truck": "177", "bike": "377", "moto": "180"},
    {"shift": "8", "date": "04/01/2016/17/00/00", "car": "179", "truck": "203", "bike": "405", "moto": "222"},
    {"shift": "9", "date": "05/01/2016/08/00/00", "car": "208", "truck": "185", "bike": "360", "moto": "195"},
    {"shift": "10", "date": "05/01/2016/17/00/00", "car": "150", "truck": "290", "bike": "315", "moto": "280"},
    {"shift": "11", "date": "06/01/2016/08/00/00", "car": "200", "truck": "220", "bike": "350", "moto": "205"},
    {"shift": "12", "date": "06/01/2016/17/00/00", "car": "230", "truck": "170", "bike": "390", "moto": "400"},
];

var dateFormat = d3.time.format("%d/%m/%Y/%H/%M/%S");

myCSV.forEach(function (d) {
    d.date = dateFormat.parse(d.date);
    d['car'] = +d['car'];
    d['bike'] = +d['bike'];
    d['moto'] = +d['moto'];
    d['truck'] = +d['truck'];
    d.shift = +d.shift;
});

//console.table(myCSV);

var aggregates = [
    // not compatible with addtional filtering
    /*{
        key: 'min',
        label: 'Min',
        agg: minOpFast
    },**/
    {
        key: 'minSafe',
        label: 'Min Safe',
        agg: minOpSafe
    },
    // not compatible with addtional filtering
    /*{
        key: 'max',
        label: 'Max',
        agg: maxOpFast
    },*/
    {
        key: 'maxSafe',
        label: 'Max Safe',
        agg: maxOpSafe
    },
    {
        key: 'avg',
        agg: avgOp,
        label: 'Average'
    }
];

var srcKeys = ['car', 'bike', 'moto', 'truck'];

And now to the magic. buildTransposedAggregatesDimension is what does all the heavy work here. Essentially it does two steps:

  1. First groupAll to get aggregated data for each combination in a cross product of all operatins and all keys.

  2. Split the mega-object grouped to an array that can be a data-source for another crossfilter

Step #2 is where my "fake" is. It seems to me as much less "fake" than in Gordon's solution as it doesn't rely on any internal details of crossfilter or dc (see bottom method in Gordon's solution).

Also splitting at step #2 is where data is actually transposed to meet your requirements. Obviously, the code can be easily modified to do not do it and produce results in the same way as in Gordon's solution.

Note also that it is important that additional step does no additional calculations and only just transforms already computed values to appropriet format. This is crucial for update after filtering to work because in such ay table bound to the result of buildTransposedAggregatesDimension is still effectively bound to the original crossfilter datasource.

var buildTransposedAggregatesDimension = function (facts, keysList, aggsList) {
    // "grouped" is a single record with all aggregates for all keys computed
    var grouped = facts.groupAll()
            .reduce(
            function add(acc, el) {
                aggsList.forEach(function (agg) {
                    var innerAcc = acc[agg.key];
                    keysList.forEach(function (key) {
                        var v = el[key];
                        innerAcc[key] = agg.agg.add(innerAcc[key], v);
                    });
                    acc[agg.key] = innerAcc;
                });
                return acc;
            },
            function remove(acc, el) {
                aggsList.forEach(function (agg) {
                    var innerAcc = acc[agg.key];
                    keysList.forEach(function (key) {
                        var v = el[key];
                        innerAcc[key] = agg.agg.remove(innerAcc[key], v);
                    });
                    acc[agg.key] = innerAcc;
                });
                return acc;
            },
            function initial() {
                var acc = {};
                aggsList.forEach(function (agg) {
                    var innerAcc = {};
                    keysList.forEach(function (key) {
                        innerAcc[key] = agg.agg.initial();
                    });
                    acc[agg.key] = innerAcc;
                });
                return acc;
            }).value();

    // split grouped back to array with element for each aggregation function
    var groupedAsArr = [];
    aggsList.forEach(function (agg, index) {
        groupedAsArr.push({
            sortIndex: index, // preserve index in aggsList so we can sort by it later
            //agg: agg,
            key: agg.key,
            label: agg.label,
            valuesContainer: grouped[agg.key],
            getOutput: function (columnKey) {
                var aggregatedValueForKey = grouped[agg.key][columnKey];
                return agg.agg.getOutput !== undefined ?
                        agg.agg.getOutput(aggregatedValueForKey) :
                        aggregatedValueForKey;
            }
        })
    });
    return crossfilter(groupedAsArr).dimension(function (el) {   return el;    });
};

Small helper method buildColumns creates columns for each original key in srcKeys + additional column for the label of operations

var buildColumns = function (srcKeys) {
    var columns = [];
    columns.push({
        label: "Aggregate",
        format: function (el) {
            return el.label;
        }
    });
    srcKeys.forEach(function (key) {
        columns.push({
            label: key,
            format: function (el) {
                return el.getOutput(key);
            }
        });
    });
    return columns;
};

So now let's get all together and create a table.

var facts = crossfilter(myCSV);
var aggregatedDimension = buildTransposedAggregatesDimension(facts, srcKeys, aggregates);
dataTable = dc.dataTable('#dataTable'); // put such a <table> in your HTML!

dataTable
        .width(500)
        .height(400)
        .dimension(aggregatedDimension)
        .group(function (d) { return ''; })
        .columns(buildColumns(srcKeys))
        .sortBy(function (el) { return el.sortIndex; })
        .order(d3.ascending);

//dataTable.render();
dc.renderAll();

There is also additional piece of code shamelessly stolen from Gordon to add a line chart for additional filtering.

like image 4
SergGr Avatar answered Nov 14 '22 02:11

SergGr