Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

d3 accessing nested data in grouped bar chart

I'm building a grouped bar chart by nesting a .csv file. The chart will also be viewable as a line chart, so I want a nesting structure that suits the line object. My original .csv looks like this:

Month,Actual,Forecast,Budget
Jul-14,200000,-,74073.86651
Aug-14,198426.57,-,155530.2499
Sep-14,290681.62,-,220881.4631
Oct-14,362974.9,-,314506.6437
Nov-14,397662.09,-,382407.67
Dec-14,512434.27,-,442192.1932
Jan-15,511470.25,511470.25,495847.6137
Feb-15,-,536472.5467,520849.9105
Mar-15,-,612579.9047,596957.2684
Apr-15,-,680936.5086,465313.8723
May-15,-,755526.7173,739904.081
Jun-15,-,811512.772,895890.1357

and my nesting is like this:

  d3.csv("data/net.csv", function(error, data) {
    if (error) throw error;

            var headers = d3.keys(data[0]).filter(function(head) {
            return head != "Month";
          });

                  data.forEach(function(d) {
                    d.month = parseDate(d.Month);
          });
            var categories = headers.map(function(name) { 

              return {
                name: name, // "name": the csv headers except month
                values: data.map(function(d) { 
                  return {
                    date: d.month, 
                    rate: +(d[name]),
                    };
                }),
              };

            });

The code to build my chart is:

  var bars = svg.selectAll(".barGroup")
        .data(data) // Select nested data and append to new svg group elements
        .enter()
        .append("g")
        .attr("class", "barGroup")   
        .attr("transform", function (d) { return "translate(" + xScale(d.month) + ",0)"; });

  bars.selectAll("rect")
        .data(categories)
        .enter()
        .append("rect")
        .attr("width", barWidth)
        .attr("x", function (d, i) { if (i < 2) {return 0;} else {return xScale.rangeBand() / 2;}})
        .attr("y", function (d) { return yScale(d.rate); })
        .attr("height", function (d) { return h - yScale(d.rate); })
        .attr("class", function (d) { return lineClass(d.name); });

The g elements are fine and the individual bars are being mapped to them, with the x value and class applied correctly.

My problem comes in accessing the data for 'rate' for the height and y value of the bars. In the form above it gives a NaN. I've also tried using the category data to append g elements and then appending the rects with:

  .data(function(d) { return d.values })

This allows me to access the rate data, but maps all 36 bars to each of the rangeBands.

It also works fine in a flatter data structure, but I can't seem to use it when it's nested two levels down, despite looking through a great many examples and SO questions.

How do I access the rate data?

In response to Cyril's request, here's the full code:

    var margin = {top: 20, right: 18, bottom: 80, left: 50},
        w = parseInt(d3.select("#bill").style("width")) - margin.left - margin.right,
        h = parseInt(d3.select("#bill").style("height")) - margin.top - margin.bottom;

    var customTimeFormat = d3.time.format.multi([
      [".%L", function(d) { return d.getMilliseconds(); }],
      [":%S", function(d) { return d.getSeconds(); }],
      ["%I:%M", function(d) { return d.getMinutes(); }],
      ["%I %p", function(d) { return d.getHours(); }],
      ["%a %d", function(d) { return d.getDay() && d.getDate() != 1; }],
      ["%b %d", function(d) { return d.getDate() != 1; }],
      ["%b", function(d) { return d.getMonth(); }],
      ["%Y", function() { return true; }]
    ]);


    var parseDate = d3.time.format("%b-%y").parse;

    var displayDate = d3.time.format("%b %Y");

    var xScale = d3.scale.ordinal()
        .rangeRoundBands([0, w], .1);

    var xScale1 = d3.scale.linear()
          .domain([0, 2]);

    var yScale = d3.scale.linear()
         .range([h, 0])
         .nice();

    var xAxis = d3.svg.axis()
        .scale(xScale)
        .tickFormat(customTimeFormat)
        .orient("bottom");

    var yAxis = d3.svg.axis()
        .scale(yScale)
        .orient("left")
        .innerTickSize(-w)
        .outerTickSize(0);

    var svg = d3.select("#svgCont")
        .attr("width", w + margin.left + margin.right)
        .attr("height", h + margin.top + margin.bottom)
        .append("g")
        .attr("transform", "translate(" + margin.left + "," + margin.top + ")");

    var thous = d3.format(",.0f")

    var lineClass = d3.scale.ordinal().range(["actual", "forecast", "budget"]);  

    var tip = d3.tip()
      .attr('class', 'd3-tip')
      .offset([-10, 0])
      .html(function(d) {
        return "<p id='date'>" + displayDate(d.date) + "</p><p id='value'>$" + thous(d.rate);
      })

    d3.csv("data/net.csv", function(error, data) {
      if (error) throw error;

              var headers = d3.keys(data[0]).filter(function(head) {
              return head != "Month";
            });

                    data.forEach(function(d) {
                      d.month = parseDate(d.Month);
            });
              var categories = headers.map(function(name) { 

                return {
                  name: name, 
                  values: data.map(function(d) {
                    return {
                      date: d.month, 
                      rate: +(d[name]),
                      };
                  }),
                };

              });

    var min = d3.min(categories, function(d) {
                        return d3.min(d.values, function(d) {
                            return d.rate;
                        });
                    });



    var max = d3.max(categories, function(d) {
                        return d3.max(d.values, function(d) {
                            return d.rate;
                        });
                    });

    var minY = min < 0 ? min * 1.2 : min * 0.8;

                  xScale.domain(data.map(function(d) { return d.month; }));
                  yScale.domain([minY, (max * 1.1)]);

    var barWidth = headers.length > 2 ? xScale.rangeBand() / 2 : xScale.rangeBand() ;

    svg.call(tip);

    svg.append("g")
        .attr("class", "x axis")
        .attr("transform", "translate(0," + h + ")")
        .call(xAxis);

    svg.append("g")
          .attr("class", "y axis")
          .call(yAxis);

    var bars = svg.selectAll(".barGroup")
          .data(data) 
          .enter()
          .append("g")
          .attr("class", "barGroup")   
          .attr("transform", function (d) { return "translate(" + xScale(d.month) + ",0)"; });

    bars.selectAll("rect")
          .data(categories)
          .enter()
          .append("rect")
          .attr("width", barWidth)
          .attr("x", function (d, i) { if (i < 2) {return 0;} else {return xScale.rangeBand() / 2;}})
          .attr("y", function (d) { return yScale(d.rate); })
          .attr("height", function (d) { return h - yScale(d.rate); })
          .attr("class", function (d) { return lineClass(d.name) + " bar"; });


    var legend = svg.selectAll(".legend")
          .data(headers) 
          .enter()
          .append("g")
          .attr("class", "legend");

    legend.append("line")
          .attr("class", function(d) { return lineClass(d); })
          .attr("x1", 0)
          .attr("x2", 40)
          .attr("y1", function(d, i) { return (h + 30) + (i *14); })
          .attr("y2", function(d, i) { return (h + 30) + (i *14); });

    legend.append("text")
        .attr("x", 50)
        .attr("y", function(d, i) { return (h + 32) + (i *14); })
        .text(function(d) { return d; });

    svg.selectAll(".bar")
       .on('mouseover', tip.show)
       .on('mouseout', tip.hide);

    });

Update 18 Feb '16.

It seems I haven't explained what I was trying to do sufficiently well. The line and bar versions of the chart will be seen separately, i.e. users can see either one according to input to a select element. Also note that I don't have control over how the data comes in initially.

I have a version of exactly how it should work here.

This question was raised when I was still working through it, but I never solved the issue – I used a workaround of doing two separate nests of the data.

like image 437
tgerard Avatar asked Dec 16 '15 02:12

tgerard


2 Answers

Link to jsfiddle: https://jsfiddle.net/sladav/rLh4qwyf/1/

I think the root of the issue is that you want to use two variables that do not explicitly exist in your original data set: (1) Category and (2) Rate.

Your data is formatted in a wide format in that each category gets its own variable and the value for rate exists at the crossroads of month and one of the given categories. I think the way you're nesting ultimately is or at least should address this, but it is unclear to me if or where something gets lost in translation. Conceptually, I think it makes more sense to start with an organization that matches what you are trying to accomplish. I reformatted the original data and approached it again - on a conceptual level the nesting seems straightforward and simple...

NEW COLUMNS:

  • Month: Time Variable; mapped to X axis
  • Category: Categorical values [Actual, Forecast, Budget]; used to group/color
  • Rate: Numerical value; mapped to Y axis

Reorganized CSV (dropped NULLs):

Month,Category,Rate
Jul-14,Actual,200000
Aug-14,Actual,198426.57
Sep-14,Actual,290681.62
Oct-14,Actual,362974.9
Nov-14,Actual,397662.09
Dec-14,Actual,512434.27
Jan-15,Actual,511470.25
Jan-15,Forecast,511470.25
Feb-15,Forecast,536472.5467
Mar-15,Forecast,612579.9047
Apr-15,Forecast,680936.5086
May-15,Forecast,755526.7173
Jun-15,Forecast,811512.772
Jul-14,Budget,74073.86651
Aug-14,Budget,155530.2499
Sep-14,Budget,220881.4631
Oct-14,Budget,314506.6437
Nov-14,Budget,382407.67
Dec-14,Budget,442192.1932
Jan-15,Budget,495847.6137
Feb-15,Budget,520849.9105
Mar-15,Budget,596957.2684
Apr-15,Budget,465313.8723
May-15,Budget,739904.081
Jun-15,Budget,895890.1357

With your newly formatted data, you start by using d3.nest to GROUP your data explicitly with the CATEGORY variable. Now your data exists in two tiers. The first tier has three groups (one for each category). The second tier contains the RATE data for each line/set of bars. You have to nest your data selections as well - the first layer is used to draw the lines, the second layer for the bars.

Nesting your data:

var nestedData = d3.nest()
      .key(function(d) { return d.Category;})
      .entries(data)

Create svg groups for your grouped, 1st-tier data:

d3.select(".plot-space").selectAll(".g-category")
    .data(nestedData)
    .enter().append("g")
    .attr("class", "g-category")

Use this data to add your lines/paths:

d3.selectAll(".g-category").append("path")
    .attr("class", "line")
    .attr("d", function(d){ return lineFunction(d.values);})
    .style("stroke", function(d) {return color(d.key);})

Finally, "step into" 2nd-tier to add bars/rect:

d3.selectAll(".g-category").selectAll(".bars")
     .data(function(d) {return d.values;})
     .enter().append("rect")
        .attr("class", "bar")
        .attr("x", function(d) {return x(d.Month);})
        .attr("y", function(d) {return y(d.Rate);})
        .attr("width", 20)
        .attr("height", function(d) {return height - y(d.Rate)})
        .attr("fill", function(d) {return color(d.Category)})

This is a straightforward approach (to me at least), in that you take it one category at a time, using the grouped data to draw a line, then individual data points to draw the bars.

LAZY EDIT:

To get category bars side by side

Create ordinal scale mapping category to [1,nCategories]. Use this to dynamically offset bars with something like

translate( newScale(category)*barWidth )

To show either bars or lines (not both)

Create a function that selects bars/lines and transitions/toggles their visibility/opacity. Run when your drop-down input changes and with the drop-down input as input to the function.

like image 70
Steve Ladavich Avatar answered Nov 17 '22 11:11

Steve Ladavich


The problem, I belive, is that you are binding the categories array to the bars selection, like this:

bars.selectAll("rect").data(categories)

As far as I can see (whithout a running demo) categories is an array with only four values (one for each category).

You have to go one step 'deeper' in your nested data structure.

To draw a set of bars for each category you would need to iterate over categories and bind the values array that contains the actual values to the selection.

Something like:

  categories.each(function (category) {
    var klass = category.name;
    bars.selectAll("rect ." + klass)
        .data(category.values)
        .enter()
        .append("rect")
        .attr("class", klass)
        .attr("width", barWidth)
        .attr("x", function (d, i) { /* omitted */})
        .attr("y", function (d) { return yScale(d.rate); })
        .attr("height", function (d) { return h - yScale(d.rate); });
  });

---- Edit

Instead of the above code, think about drawing the bars just like you do with the lines. Like this:

var bars = svg.selectAll(".barGroup")
    .data(categories)
    .enter()
    .append("g")
    .attr("class", function (d) { return lineClass(d.name) + "Bar barGroup"; })
    .attr("transform", function (d, i) {
        var x = i > 1 ? xScale.rangeBand() / 2 : 0;
        return "translate(" + x + ",0)";
    })
    .selectAll('rect')
    .data(function (d) { return d.values; })
    .enter()
    .append("rect")
    .attr("class", "bar")
    .attr("width", barWidth)
    .attr("x", function (d, i) { return xScale(d.date); })
    .attr("y", function (d, i) { return yScale(d.rate); })
    .attr("height", function (d) { return h - yScale(d.rate); });
like image 30
Doktorn Avatar answered Nov 17 '22 10:11

Doktorn