Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Binning an array in javascript for a histogram

I have below array in Javascript which I need to bin into 20 buckets. The data values are between 0 and 1, so the bin size would be .05. I feel like there should be a function out there that takes two arguments, an array and a bin size, but I cannot find one. I know that D3.js has some feature that help building such an array, but I cannot figure out which function might help.

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]
like image 754
NodeJS_dev Avatar asked May 25 '16 18:05

NodeJS_dev


4 Answers

The feature you want is the histogram layout. You can do something like this:

var data = d3.layout.histogram()
    .bins(20)
    (arr);

This is just an general example, you'll have to adjust the values. Check the documentation: https://github.com/d3/d3/wiki/Histogram-Layout

like image 142
Gerardo Furtado Avatar answered Sep 22 '22 07:09

Gerardo Furtado


You could make the bins yourself using some JS:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]

var bins = [];
var binCount = 0;
var interval = .05;
var numOfBuckets = 1;

//Setup Bins
for(var i = 0; i < numOfBuckets; i += interval){
  bins.push({
    binNum: binCount,
    minNum: i,
    maxNum: i + interval,
    count: 0
  })
  binCount++;
}

//Loop through data and add to bin's count
for (var i = 0; i < arr.length; i++){
  var item = arr[i];
  for (var j = 0; j < bins.length; j++){
    var bin = bins[j];
    if(item > bin.minNum && item <= bin.maxNum){
      bin.count++;
      break;  // An item can only be in one bin.
    }
  }  
}

https://jsbin.com/keropoyadu/edit?js,output

like image 20
Jack Fairfield Avatar answered Nov 16 '22 15:11

Jack Fairfield


With the release of D3.js v6 d3.layout.histogram has been superseded by d3.bin() which now belongs to the d3-array module.

To bin your array you create a histogram generator:

var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0,1]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

There are some more options available to configure your thresholds and thereby your bins, but this generator will do exactly what you asked for. You retrieve the computed bins as an array by calling the generator with your array of values:

var bins = histGenerator(arr);

Have a look at this working example:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
];

var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0;]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

var bins = histGenerator(arr);
console.log(bins);
<script src="http://d3js.org/d3.v6.js"></script>
like image 14
altocumulus Avatar answered Nov 16 '22 15:11

altocumulus


The d3js library has a d3.layout.histogram() function that returns a histogram layout object for grouping data into bins. The layout object is both an object and a function. You can call methods on the layout object to set the desired behavior of the layout. You can then call the layout object to group the data into an array of bins. Each bin is an array of values. Each bin has addition properties of x, dx, dy.

For example, the following code will group the data into 20 bins that cover the range from 0 to 1.

var arr = ["0.362743", "0.357969", "0.356322", "0.355757", "0.358511", "0.357218", "0.356696", "0.354579", "0.828295", "0.391186", "0.378577", "0.39372", "0.396416", "0.395641", "0.37573", "0.379666", "0.377443", "0.391842", "0.402021", "0.377516", "0.38936", "0.38936", "0.400883", "0.393171", "0.374419", "0.400821", "0.380502", "0.396098", "0.388256", "0.398968", "0.392525", "0.401858", "0.387297", "0.376471", "0.378183", "0.379787", "0.382024", "0.387928", "0.395367", "0.391972", "0.381295", "0.391183", "0.383598", "0.386424", "0.384338", "0.401834", "0.406253", "0.392854", "0.399266", "0.400804", "0.391146", "0.395441", "0.396265", "0.397894", "0.384822", "0.385181", "0.395443", "0.400981", "0.401716", "0.406633", "0.406887", "0.40694", "0.391219", "0.387946", "0.398858", "0.402233", "0.388583", "0.389772", "0.397084", "0.711566", "0.954557", "0.524007", "0.672288", "0.668441", "0.421726", "0.549536", "0.932952", "0.397851", "0.395536", "0.354818", "0.374355", "0.375257", "0.362613", "0.391271", "0.379219", "0.363316", "0.866006", "0.862254", "0.864403", "0.861346", "0.845225", "0.784467", "0.801275", "0.638579", "0.847282", "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"];
var bins = d3.layout.histogram()  // create layout object
    .bins(20)       // to use 20 bins
    .range([0, 1])  // to cover range from 0 to 1
    (arr);          // group the data into the bins

After the code runs...

bins[i] is an array of values in the ith bin
bins[i].x is the lower bounds of the ith bin
bins[i].dx is the width of the ith bin
bins[i].x + bins[i].dx is the upper bounds of the ith bin
bins[i].y is the number of values in the ith bin

The documentation for the histogram layout object is at...

https://github.com/d3/d3/wiki/Histogram-Layout

Note: By default, the layout object converts string values to number values. Thus, the layout function will work with your string values.

like image 6
Bobby Orndorff Avatar answered Nov 16 '22 16:11

Bobby Orndorff