I have developed Simple Linear regression function in SQL Server from here (https://ask.sqlservercentral.com/questions/96778/can-this-linear-regression-algorithm-for-sql-serve.html) to calculate Alpha,Beta and some extra values like Upper 95% and Lower 95%. The Simple Linear regression takes the argument as X and y.
Now I am in need of perform Multiple Linear regression SQL Server, which takes arguments y and X1,X2,X3,.....Xn
Hence the Output will be as follows:
Coefficients Standard Error t Stat P-value Lower 95% Upper 95%
+-------------------------------------------------------------------------------------------+
Intercept -23.94650812 19.85250194 -1.20622117 0.351059563 -109.3649298
X Variable 1 0.201064291 0.119759437 1.678901439 0.235179 -0.314218977
X Variable 2 -0.014046021 0.037366638 -0.375897368 0.743119791 -0.174821687
X Variable 3 0.502074905 0.295848189 1.697069389 0.231776287 -0.770857111
X Variable 4 0.068238344 0.219256527 0.311226057 0.785072958 -0.875146351
Anyone can please suggest me a good way to achieve this.
I would look at using CLR integration to take advantage of an existing .NET library supporting Linear Regression, for example Math.NET Numerics. Using a CLR stored procedure you would be able to read the data out of a table, transform it to the .NET libraries matrix type, run the regression, then either write the results back to a table or return a row set directly.
But just for fun here is Linear Least Squares solved via Orthogonal Decomposition using Householder reflections in SQL. (Warning will run slowly on any significant amount of data.)
-- Create a type to repsent a 2D Matrix
CREATE TYPE dbo.Matrix AS TABLE (i int, j int, Aij float, PRIMARY KEY (i, j))
GO
-- Function to perform QR factorisation ie A -> QR
CREATE FUNCTION dbo.QRDecomposition (
@matrix dbo.Matrix READONLY
)
RETURNS @result TABLE (matrix char(1), i int, j int, Aij float)
AS
BEGIN
DECLARE @m int, @n int, @i int, @j int, @a float
SELECT @m = MAX(i), @n = MAX(j)
FROM @matrix
SET @i = 1
SET @j = 1
DECLARE @R dbo.Matrix
DECLARE @Qj dbo.Matrix
DECLARE @Q dbo.Matrix
-- Generate a @m by @m Identity Matrix to transform to Q, add more numbers for m > 1000
;WITH e1(n) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b),
e3(n) AS (SELECT 1 FROM e1 CROSS JOIN e2),
numbers(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY n) FROM e3)
INSERT INTO @Q (i, j, Aij)
SELECT i.n, j.n, CASE WHEN i.n = j.n THEN 1 ELSE 0 END
FROM numbers i
CROSS JOIN numbers j
WHERE i.n <= @m AND j.n <= @m
-- Copy input matrix to be transformed to R
INSERT @R (i, j, Aij)
SELECT i, j, Aij
FROM @matrix
-- Loop performing Householder reflections
WHILE @j < @n OR (@j = @n AND @m > @n) BEGIN
SELECT @a = SQRT(SUM(Aij * Aij))
FROM @R
WHERE j = @j
AND i >= @i
SELECT @a = -SIGN(Aij) * @a
FROM @R
WHERE j = @j AND i = @j + (@j - 1)
;WITH u (i, j, Aij) AS (
SELECT i, 1, u.ui
FROM (
SELECT i, CASE WHEN i = j THEN Aij + @a ELSE Aij END AS ui
FROM @R
WHERE j = @j
AND i >= @i
) u
)
INSERT @Qj (i, j, Aij)
SELECT i, j, CASE WHEN i = j THEN 1 - 2 * Aij ELSE - 2 * Aij END as Aij
FROM (
SELECT u.i, ut.i AS j, u.Aij * ut.Aij / (SELECT SUM(Aij * Aij) FROM u) AS Aij
FROM u u
CROSS JOIN u ut
) vvt
-- Apply inverse Householder reflection to Q
UPDATE Qj
SET Aij = [Qj+1].Aij
FROM @Q Qj
INNER JOIN (
SELECT Q.i, QjT.j, SUM(QjT.Aij * Q.Aij) AS Aij
FROM @Q Q
INNER JOIN (
SELECT i AS j, j AS i, Aij
FROM @Qj
) QjT ON QjT.i = Q.j
GROUP BY Q.i, QjT.j
) [Qj+1] ON [Qj+1].i = Qj.i AND [Qj+1].j = Qj.j
-- Apply Householder reflections to R
UPDATE Rj
SET Aij = [Rj+1].Aij
FROM @R Rj
INNER JOIN (
SELECT Qj.i, R.j, SUM(Qj.Aij * R.Aij) AS Aij
FROM @Qj Qj
INNER JOIN @R R ON R.i = Qj.j
GROUP BY Qj.i, R.j
) [Rj+1] ON [Rj+1].i = Rj.i AND [Rj+1].j = Rj.j
-- Prepare Qj for next Householder reflection
UPDATE @Qj
SET Aij = CASE WHEN i = j THEN 1 ELSE 0 END
WHERE i <= @j OR j <= @j
DELETE FROM @Qj WHERE i > @j AND j > @j
SET @j = @j + 1
SET @i = @i + 1
END
-- Output Q
INSERT @result (matrix, i, j, Aij)
SELECT 'Q', i, j, Aij
FROM @Q
-- Output R
INSERT @result (matrix, i, j, Aij)
SELECT 'R', i, j, Aij
FROM @R
RETURN
END
GO
-- Function to perform linear regression
CREATE FUNCTION dbo.MatrixLeastSquareRegression (
@X dbo.Matrix READONLY
, @y dbo.Matrix READONLY
)
RETURNS @b TABLE (i int, j int, Aij float)
AS
BEGIN
DECLARE @QR TABLE (matrix char(1), i int, j int, Aij float)
INSERT @QR(matrix, i, j, Aij)
SELECT matrix, i, j, Aij
FROM dbo.QRDecomposition(@X)
DECLARE @Qty dbo.Matrix
-- @Qty = Q'y
INSERT INTO @Qty(i, j, Aij)
SELECT a.j, b.j, SUM(a.Aij * b.Aij)
FROM @QR a
INNER JOIN @y b ON b.i = a.i
WHERE a.matrix = 'Q'
GROUP BY a.j, b.j
DECLARE @m int, @n int, @i int, @j int, @a float
SELECT @m = MAX(j)
FROM @QR R
WHERE R.matrix = 'R'
SET @i = @m
-- Solve Rb = Q'y via back substitution
WHILE @i > 0 BEGIN
INSERT @b (i, j, Aij)
SELECT R.i, 1, ( y.Aij - ISNULL(sumKnown.Aij, 0) ) / R.Aij
FROM @QR R
INNER JOIN @Qty y ON y.i = R.i
LEFT JOIN (
SELECT SUM(R.Aij * ISNULL(b.Aij, 0)) AS Aij
FROM @QR R
INNER JOIN @b b ON b.i = R.j
WHERE R.matrix = 'R'
AND R.i = @i
) sumKnown ON 1 = 1
WHERE R.matrix = 'R'
AND R.i = @i
AND R.j = @i
SET @i = @i - 1
END
RETURN
END
GO
Here is a test script/example of usage:
DECLARE @TestData TABLE (i int IDENTITY(1, 1), X1 float, X2 float, X3 float, X4 float, y float)
DECLARE @c float
DECLARE @b1 float
DECLARE @b2 float
DECLARE @b3 float
DECLARE @b4 float
-- bs are the target coefficiants
SET @c = RAND()
SET @b1 = 2 * RAND()
SET @b2 = 3 * RAND()
SET @b3 = 4 * RAND()
SET @b4 = 5 * RAND()
-- Generate some test data, calcualte y from c + Xb plus some noise: y = c + Xb + e
-- Note: Using RAND() for e is not nomrally ditributed noise as linear regression assumes, this will mess with the estimate of c
DECLARE @k int = 1
WHILE @k < 50 BEGIN
INSERT @TestData(X1, X2, X3, X4, y)
SELECT x1, x2, x3, x4, @c + x1 * @b1 + x2 * @b2 + x3 * @b3 + x4 * @b4 + 0.2 * RAND()
FROM (
SELECT RAND() AS x1, RAND() AS x2, RAND() AS x3, RAND() AS x4
) X
SET @k = @k + 1
END
-- Put our data into dbo.Matrix types
DECLARE @X dbo.Matrix
INSERT @X (i, j, Aij)
-- Extra column for constant
SELECT i, 1, 1
FROM @TestData
UNION
SELECT i, 2, X1
FROM @TestData
UNION
SELECT i, 3, X2
FROM @TestData
UNION
SELECT i, 4, X3
FROM @TestData
UNION
SELECT i, 5, X4
FROM @TestData
DECLARE @y dbo.Matrix
INSERT @y (i, j, Aij)
SELECT i, 1, y
FROM @TestData
-- Estimates for coefficient values
DECLARE @bhat dbo.Matrix
INSERT @bhat (i, j, Aij)
SELECT i, j, Aij
FROM dbo.MatrixLeastSquareRegression(@X, @y)
SELECT CASE i
WHEN 1 THEN @c
WHEN 2 THEN @b1
WHEN 3 THEN @b2
WHEN 4 THEN @b3
WHEN 5 THEN @b4
END AS b
, Aij AS best
FROM @bhat
SELECT y.Aij AS y, Xb.Aij AS yest
FROM (
SELECT x.i, SUM(x.Aij * bh.Aij) AS Aij
FROM @X x
INNER JOIN @bhat bh ON bh.i = x.j
GROUP BY x.i
) Xb
INNER JOIN @y y ON y.i = Xb.i
SELECT SUM(SQUARE(y.Aij - Xb.Aij)) / COUNT(*) AS [Variance]
FROM (
SELECT x.i, SUM(x.Aij * bh.Aij) AS Aij
FROM @X x
INNER JOIN @bhat bh ON bh.i = x.j
GROUP BY x.i
) Xb
INNER JOIN @y y ON y.i = Xb.i
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With