I searched for some help on building linear regression and found some examples here:
nonlinear regression function
and also some js libraries that should cover this, but unfortunately I wasn't able to make them work properly:
simple-statistics.js and this one: regression.js
With regression.js
I was able to get the m
and b
values for the line, so I could use y = m*x + b
to plot the line that followed the linear regression of my graph, but couldn't apply those values to the line generator, the code I tried is the following:
d3.csv("typeStatsTom.csv", function (error, dataset) {
//Here I plot other stuff, setup the x & y scale correctly etc.
//Then to plot the line:
var data = [x.domain(), y.domain()];
var result = regression('linear', data);
console.log(result)
console.log(result.equation[0]);
var linereg = d3.svg.line()
.x(function (d) { return x(d.Ascendenti); })
.y(function (d) { return y((result.equation[0] * d.Ascendenti) + result.equation[1]); });
var reglinepath = svg.append("path")
.attr("class", "line")
.attr("d", linereg(dataset))
.attr("fill", "none")
.attr("stroke", "#386cb0")
.attr("stroke-width", 1 + "px");
The values of result are the following in the console:
Object
equation: Array[2]
0: 1.8909425770308126
1: 0.042557422969139225
length: 2
__proto__: Array[0]
points: Array[2]
string: "y = 1.89x + 0.04"
__proto__: Object
From what I can tell in the console I should have set up the x
and y
values correctly, but of course the path in the resulting svg is not shown (but drawn), so I don't know what to do anymore.
Any help is really really appreciated, even a solution involving the simple.statistics.js
library would be helpful!
Thanks!
I made it work using the following code found here:
function linearRegression(y,x){
var lr = {};
var n = y.length;
var sum_x = 0;
var sum_y = 0;
var sum_xy = 0;
var sum_xx = 0;
var sum_yy = 0;
for (var i = 0; i < y.length; i++) {
sum_x += x[i];
sum_y += y[i];
sum_xy += (x[i]*y[i]);
sum_xx += (x[i]*x[i]);
sum_yy += (y[i]*y[i]);
}
lr['slope'] = (n * sum_xy - sum_x * sum_y) / (n*sum_xx - sum_x * sum_x);
lr['intercept'] = (sum_y - lr.slope * sum_x)/n;
lr['r2'] = Math.pow((n*sum_xy - sum_x*sum_y)/Math.sqrt((n*sum_xx-sum_x*sum_x)*(n*sum_yy-sum_y*sum_y)),2);
return lr;
};
var yval = dataset.map(function (d) { return parseFloat(d.xHeight); });
var xval = dataset.map(function (d) { return parseFloat(d.Ascendenti); });
var lr = linearRegression(yval,xval);
// now you have:
// lr.slope
// lr.intercept
// lr.r2
console.log(lr);
And then plotting a line with:
var max = d3.max(dataset, function (d) { return d.OvershootingSuperiore; });
var myLine = svg.append("svg:line")
.attr("x1", x(0))
.attr("y1", y(lr.intercept))
.attr("x2", x(max))
.attr("y2", y( (max * lr.slope) + lr.intercept ))
.style("stroke", "black");
Using the code I found here
It looks to me like your path is getting drawn, just way off the screen.
Perhaps the regression is calculated incorrectly? The problem may be on line 202:
var data = [x.domain(), y.domain()];
var result = regression('linear', data);
If the raw data looks like [[1, 500], [2, 300]]
this will find the linear regression of [[1, 2], [300, 500]
which probably isn't what you want.
I'm guessing what you'd like to do is compute the regression with the entire set of data points rather than with the graph's bounds. Then rather than charting this line for every data value, you want to just plot the endpoints.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With