Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

d3.js - group 2 data values in a stacked bar chart

Tags:

d3.js

I have the following csv data,

date,scanned,unscanned,compid,sbu
01/2014,10,90,101,f&r
02/2014,55,40,101,f&r
03/2014,45,23,101,f&r
04/2014,65,35,101,f&r
05/2014,100,20,101,f&r
06/2014,50,30,101,f&r
07/2014,10,90,101,f&r
08/2014,22,48,101,f&r
09/2014,0,100,101,f&r
10/2014,3,97,101,f&r
11/2014,22,60,101,f&r
12/2014,57,37,101,f&r
01/2014,30,100,101,ip
02/2014,130,10,101,ip

Is there a way that we can combine the data for jan-2014 for both the f&r and ip sbu values and show the values in stacked bar. for e.g if i check a checkbox to group, i need to show scanned as 30+10=40 and unscanned as 100+90= 190 in a stack for jan 2014 in x-axis.

My code to build the stack bar is as follows:

var w = 960,
    h = 500,
    p = [20, 50, 30, 20],

    x = d3.time.scale().range([1, 80]);
    y = d3.scale.linear().range([0, h - p[0] - p[2]]),
    z = d3.scale.ordinal().range(["#819FF7", "#CB491A"]),
    parse = d3.time.format("%m/%Y").parse,
    format = d3.time.format("%b-%y");

    var xAxis=d3.svg.axis()
          .scale(x)
          .orient("bottom")
          .ticks(d3.time.month, 1)
              //.ticks(12)



   xAxis.tickFormat(d3.time.format("%b-%y"));


    /*var yAxis = d3.svg.axis()
    .scale(y)
    .ticks(12)
    .orient("left");*/

var svg = d3.select("#container").append("svg:svg")
    .attr("width", w)
    .attr("height", h)
  .append("svg:g")
    .attr("transform", "translate(" + p[3] + "," + (h - p[2]) + ")");

d3.csv("scandata.csv", function(scan) {

  // Transpose the data into layers by cause.
  var scantypes = d3.layout.stack()(["scanned", "unscanned"].map(function(scans) {
    return scan.map(function(d) {
      return {x: parse(d.date), y: +d[scans],z:d.compid,typescan:scans};
    });
  }));




  // Compute the x-domain (by date) and y-domain (by top).
  x.domain(scantypes [0].map(function(d) { return d.x; }));
  y.domain([0, d3.max(scantypes[scantypes .length - 1], function(d) { return d.y0 + d.y; })]);

  // Add a group for each scan.
  var cause = svg.selectAll("g.scan")
      .data(scantypes)
    .enter().append("svg:g")
      .attr("class", "scan")
      .style("fill", function(d, i) { return z(i); })
      .style("stroke", function(d, i) { return d3.rgb(z(i)).darker(); });

  // Add a rect for each date.
  var rect = cause.selectAll("rect")
      .data(Object)
    .enter().append("svg:rect")
      .attr("id", function(d,i) { return i + " comp " + d.z;  })
      .attr("x", function(d,i) { 
                        if (i ==0) 
                        { 
                            return x(d.x) ;
                        } 
                        else 
                        {
                            return x(d.x);
                        }} )
      .attr("y", function(d) { return -y(d.y0) - y(d.y); })
      .attr("height", function(d) { return y(d.y); })
      .attr("width", 30)//x.rangeBand()/2
    .on("mouseover", function(d){

                   return tooltip.style("visibility", "visible")
                                   .text((d.y))//d.typescan + " -  " + 
                                   .style("left", (d3.event.pageX) + "px") 
                                   .style("top", (d3.event.pageY - 20) + "px");      ;})
      .on("mousemove", function(d){

                      return tooltip.style("visibility", "visible")
                                   .text((d.y)) //d.typescan + " -  " + 
                                   .style("left", (d3.event.pageX) + "px") 
                                   .style("top", (d3.event.pageY - 20) + "px");      ;})

      .on("mouseout", function(d){return tooltip.style("visibility", "hidden");}) 
      .on("click", function(d){});



  var tooltip = d3.select("#container")
    .append("div")
    .style("position", "absolute")
    .style("z-index", "10")
    .style("visibility", "visible")
    .text("Scanned vs UnScanned")
    .style("font", "Arial")
      .style("color", "white")
    .style("font-size", "14px");

  //Add x-Axis
    svg.append("g")
    .attr("class", "x axis")
    //.attr("transform", function(d) { return "translate(0,80)"; })
    .call(xAxis)




  // Add a label per date.
  var label = svg.selectAll("text")
      .data(x.domain())
    .enter().append("svg:text")
      .attr("x", function(d) { return x(d.x); })//x.rangeBand() / 4
      .attr("y", 6)
      .attr("text-anchor", "middle")
      .attr("dy", ".71em")
      .text(format);

  // Add y-axis rules.
  var rule = svg.selectAll("g.rule")
      .data(y.ticks(5))
    .enter().append("svg:g")
      .attr("class", "rule")
      .attr("transform", function(d) { return "translate(0," + -y(d) + ")"; });

  rule.append("svg:line")
      .attr("x2", w - p[1] - p[3])
      .style("stroke", function(d) { return d ? "#fff" : "#000"; })
      .style("stroke-opacity", function(d) { return d ? .7 : null; });

  rule.append("svg:text")
      .attr("x", -15)
      .style("font-family","Arial 12px")
      .attr("dy", ".25em")
      .text(d3.format(",d"));
like image 499
krishna_v Avatar asked Feb 03 '14 08:02

krishna_v


1 Answers

You seem to be confused about what the SVG should look like, and so don't know how to make it happen.

The bars in SVG are just rectangles. You need to tell them where they should be positioned (which is always defined by the top left corner of the bar) and how big they should be.

To get the bars to line up in a stacked graph, you need to figure out their position and size based on all the values for that stack.

I've created a very simplified example of a stacked bar chart using your data (just the scanned/unscanned data, I haven't separated things out by the sbu variable).

Here's the working example

Here's the code with comments:

var width = 400;
    height = 500;

var svg = d3.select("body").append("svg")
            .attr("width", width)
            .attr("height", height);

var xScale = d3.scale.ordinal()
               .rangeRoundBands([0,width], 0.1);
var yScale = d3.scale.linear()
               .range([height, 0]);
        //note the inverted range, so that small values
        //scale to the bottom of the SVG

var data = d3.csv.parse( d3.select("pre#data").text() );
//this just grabs the text from the preformatted block
//and parses it as if it was a csv file
//in your real code, you would use d3.csv(filename, callbackFunction) 
//and the rest would be inside your callback function:

xScale.domain( data.map(function(d){return d.date;}) );
//the xScale domain is the list of all categorical values.
//The map function grabs all the date values from the data
//and returns them as a new array.
//I'm not worrying about parsing dates, since
//strings work fine with an ordinal scale
//(although you'd want to parse them if you wanted to reformat them).

yScale.domain( [0,
                d3.max(data,
                       function(d){
                           return +d.scanned + +d.unscanned;
                       })
                ]);
//The yScale domain starts at 0 (since it's a bar chart)
//and goes to the maximum *total* value for each date.
//The d3.max function finds the maximum for an array
//based on the result returned by the function for each
//element of the array.  This function just finds the sum
//of the scanned and unscanned values 
//(after converting them from strings to numbers with "+").


var dateGroups = svg.selectAll("g") 
        //create an empty selection of groups
   .data(data); //join to the data, each row will get a group

dateGroups.enter().append("g")
    //create the actual <g> elements for each row of data
    .attr("class", "dateGroup"); 
    //give them a meaningful class

//Now, within each group create a rectangle 
//for each category (scanned and unscanned).
//If you had lots of categories, you'd want to 
//use a nested selection and a second data join.
//However, to do that you'd need to do a lot of 
//data manipulation to create an array of 
//separate data objects for each category.
//
//With only two categories, it's easier to just
//do each one separately, and let them inherit
//the data from the parent <g> element.

//For the bottom of the stack:
var bottom = dateGroups.append("rect")
    .attr("class", "data scanned");

bottom.attr("y", function(d){
        return yScale(+d.scanned);
    } )
        //y is the TOP of the rectangle
        //i.e., the position of this data value
        //on the scale
    .attr("height", function(d){
        return Math.abs( yScale(+d.scanned) - yScale(0) );
        //The height of the rectangle is the difference between
        //its data value and the zero line.
        //Note that the yScale value of zero is 
        //bigger than the yScale value of the data
        //because of the inverted scale, so we use
        //absolute value to always get a positive height.
    } );

//For the top of the stack:
var top = dateGroups.append("rect")
    .attr("class", "data unscanned");

top.attr("y", function(d){
        return yScale(+d.unscanned + +d.scanned);
    } )
        //y is the TOP of the rectangle
        //i.e., the position on the scale of 
        //the *total* of the two data categories
    .attr("height", function(d){
        return Math.abs( yScale(+d.unscanned) - yScale(0) );
        //The height of this bar is just based on 
        //its value.  However, this could also be 
        //written as
        //Math.abs(+yScale(+d.scanned + +d.unscanned) 
        //              - yScale(+d.scanned) )
        //i.e., as the difference between the total
        //(top of the bar) and the other category's 
        //value (bottom of the bar)
    } );

//The x value and width are the same for both bars
//so we can re-select all the rectangles and 
//set these attributes at the same time:
dateGroups.selectAll("rect.data")
    .attr("x", function(d){
        return xScale(d.date);
    })
    .attr("width", xScale.rangeBand() );
    //don't need a function for width,
    //since it doesn't depend on the data

Once you are sure you understand what is happening at every step of that program, then you can start to add extra features like axes or tooltips. You will also be in a good position to adapt the code to work with many categories, although in that case you will probably want to create a sub-array of representing the data for each category, and use a nested selection to create the rectangles. That's the approach used by most stacked bar graph examples; they will hopefully be easier to understand after working with this very simplified version.

Edit

The above solution works if you know that you only have two values in each stack, with the data for both values from the same row of the data table. If you might have many bars in each stack, and/or if they come from multiple rows of the data table, you will want to use a nested selection to match the data to individual bars.

In order to use the nested selection approach, you first have to do some manipulation to your data. You need to get it into nested array format. The outer array has to represent each stack, and each stack data object has to include a sub-array representing each bar.

How you make the nested array depends on your original data format. When the values that you want to stack are in different rows, the d3.nest operator can group them together into sub-arrays. When the stacked values are different numbers from the same row of the data table, you have to use a forEach() function to loop through all the rows of your data and construct an array from each.

In your example, you want to do both, so we're going to combine a nesting operation with a forEach operation. At the same time, we're going to calculate the running totals for the stack: in order to position each bar correctly, we need to know not only its own count, but also the total count of all values under it in the stack.

Here's a working fiddle

The data manipulation code is

/*Nest data by date string */
var nestFunction = d3.nest().key( function(d){return d.date;} );
var nestedData = nestFunction.entries(data);

var maxTotal = 0; //maximum count per date, 
                  //for setting the y domain
nestedData.forEach(function(dateGroup) {
   //for each entry in the nested array,
   //each of which contains all the rows for a given date,
   //calculate the total count,
   //and the before-and-after counts for each category.  

    dateGroup.date = dateGroup.key;
    //just using the original strings here, but you could
    //parse the string date value to create a date object

    dateGroup.bars = [];
        //create an array to hold one value for each bar
        //(i.e., two values for each of the original rows)

    var total = 0; //total count per date


    dateGroup.values.forEach(function(row) {
        //the values array created by the nest function
        //contians all the original row data objects
        //that match this date (i.e., the nesting key)

        //create an object representing the bar for
        //the scanned count, and add to the bars array
        dateGroup.bars.push(
            {date:dateGroup.date,
             type: "scanned",
             count: +row.scanned,
             compid: row.compid,
             sbu: row.sbu,
             y0: total, //total value *before* this bar
             y1: (total = total + +row.scanned) //new total
            }
            );

        //create an object representing the bar for
        //the UNscanned count, and add to the bars array
        dateGroup.bars.push(
            {date:dateGroup.date,
             type: "unscanned",
             count: +row.unscanned,
             compid: row.compid,
             sbu: row.sbu,
             y0: total, //total value *before* this bar
             y1: (total = total + +row.unscanned) //new total
            }
            );
    });

    maxTotal = Math.max(maxTotal, total); //update max

});

If you didn't want to stack certain types of bars together -- for example, if you wanted to keep the values from different compids in different stacks -- then you would include that parameter as a second key to the nesting function. Values are only nested together if they match on all the nesting keys. Of course, then you'd also have to amend your x-scale to separate out the stacks by both keys. Look up examples of grouped bar charts for how to do that.

Once you have the properly nested data, you join the outer array (the array of nest objects) to <g> elements representing each stack, and then create a nested selection of rectangles within each group and join the inner array (the bar data) to it:

var dateGroups = svg.selectAll("g") 
        //create an empty selection of groups
   .data(nestedData); //join to the data, 
    //each nested object (i.e., date) will get a group

dateGroups.enter().append("g")
    //create the actual <g> elements for each row of data
    .attr("class", "dateGroup"); 
    //give them a meaningful class

//Now, within each group create a rectangle 
//for each category from the "bars" array created earlier.
//This uses a nested selection, since we don't know
//how many bars there will be for a given date.


var bars = dateGroups.selectAll("rect")
    .data( function(d) {return d.bars;})
        //the 'd' value passed in is the data for each
        //dateGroup, each of which will now have a 
        //nested selection of bars

bars.enter().append("rect"); //create the rectangles

bars.attr("class", function(d){
        //assign classes for all the categorical values
        //(after stripping out non-word characters)
        //so they can be styled with CSS

        var specialchars = /\W+/g; 
             //regular expression to match all non-letter, non-digit characters

        return ["data",
                "type-" + d.type.replace(specialchars, ""), 
                "compid-" + d.compid.replace(specialchars, ""),
                "sbu-" + d.sbu.replace(specialchars, "")
               ].join(" "); //class list is space-separated

    })
    .attr("y", function(d){
        return yScale(d.y1);
        //y is the TOP of the rectangle
        //i.e., the position of the *total* value
        //for this bar and all others under it in the stack
    } )
    .attr("height", function(d){
        return Math.abs( yScale(d.y1) - yScale(d.y0) );
        //the height of the rectangle is the difference 
        //between the total *after* 
        //this value is added to the stack
        // (top of the bar, y1)
        //and the total *before* it is added 
        // (bottom of the bar, y0)

        //Since this is a linear scale, this could also 
        //be written as
        //Math.abs( yScale(d.count) - yScale(0) )
        //i.e., as the difference between
        //its data value and zero line.

        //Note the use of absolute value to
        //compensate for a possibly inverted scale.
    } )
    .attr("x", function(d){
        return xScale(d.date);
    })
    .attr("width", xScale.rangeBand() )
    //don't need a function for width,
    //since it doesn't depend on the data
    .append("title") //add a tooltip title
        .text(function(d) {
            return d.sbu + ", " +d.type +":" + d.count;
        });
like image 192
AmeliaBR Avatar answered Nov 09 '22 10:11

AmeliaBR