I need to join multiple tables in SQL Server with a common column dates but I want to avoid repeating the values from the different tables when merge.
drop table if exists #d, #t1, #t2
create table #d (DataDate date)
create table #t1 (DataDate date, Value1 float, Value2 float)
create table #t2 (DataDate date, Value3 float, Value4 float)
insert into #d values ('20181201'),('20181202'),('20181203')
insert into #t1 values
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.15, 1.19),
('20181203', 3.16, 1.195)
insert into #t2 values
('20181201', 4.14, 2.18),
('20181203', 4.15, 2.19),
('20181203', 4.1, 2.195)
select #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate
Actual Results
DataDate Value1 Value2 Value3 Value4
12/1/2018 3.14 1.18 4.14 2.18
12/1/2018 3.135 1.185 4.14 2.18
12/2/2018 3.15 1.19 NULL NULL
12/3/2018 3.16 1.195 4.15 2.19
12/3/2018 3.16 1.195 4.1 2.195
Desired Results
DataDate Value1 Value2 Value3 Value4
12/1/2018 3.14 1.18 4.14 2.18
12/1/2018 3.135 1.185 NULL NULL
12/2/2018 3.15 1.19 NULL NULL
12/3/2018 3.16 1.195 4.15 2.19
12/3/2018 NULL NULL 4.1 2.195
Here is a proposed solution.
I have added a third table, just to demonstrate that this could be solved for N tables with a common column.
Prepare demo data:
/* Prepare demo objects */
DROP TABLE IF EXISTS #d, #t1, #t2
CREATE TABLE #d (DataDate date)
CREATE TABLE #t1 (DataDate date, Value1 float, Value2 float)
CREATE TABLE #t2 (DataDate date, Value3 float, Value4 float)
CREATE TABLE #t3 (DataDate date, Value5 float, Value6 float)
/* Insert demo data */
INSERT INTO #d VALUES ('20181201'),('20181202'),('20181203')
INSERT INTO #t1 VALUES
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.15, 1.19),
('20181203', 3.16, 1.195)
INSERT INTO #t2 VALUES
('20181201', 4.14, 2.18),
('20181203', 4.15, 2.19),
('20181203', 4.1, 2.195)
INSERT INTO #t3 VALUES
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.16, 1.195)
Proposed QUERY Solution:
SELECT
COALESCE(d.DataDate, t1.datadate, t2.datadate, t3.datadate) AS DataDate
, t1.Value1
, t1.Value2
, t2.Value3
, t2.Value4
, t3.Value5
, t3.Value6
FROM
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
FROM #d) AS d
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
FROM #t1) AS t1
ON (t1.DataDate = d.DataDate AND t1.rn = d.rn)
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
FROM #t2) AS t2
ON (t2.DataDate = d.DataDate AND t2.rn = d.rn)
OR (t2.DataDate = t1.DataDate AND t2.rn = t1.rn)
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
FROM #t3) AS t3
ON (t3.DataDate = d.DataDate AND t3.rn = d.rn)
OR (t3.DataDate = t1.DataDate AND t3.rn = t1.rn)
OR (t3.DataDate = t2.DataDate AND t3.rn = t2.rn)
ORDER BY DataDate;
Demo fiddle is posted on db<>fiddle here
Results:
DataDate | Value1 | Value2 | Value3 | Value4 | Value5 | Value6 :------------------ | -----: | -----: | -----: | -----: | -----: | -----: 01/12/2018 00:00:00 | 3.14 | 1.18 | 4.14 | 2.18 | 3.14 | 1.18 01/12/2018 00:00:00 | 3.135 | 1.185 | null | null | 3.135 | 1.185 02/12/2018 00:00:00 | 3.15 | 1.19 | null | null | 3.16 | 1.195 03/12/2018 00:00:00 | 3.16 | 1.195 | 4.15 | 2.19 | null | null 03/12/2018 00:00:00 | null | null | 4.1 | 2.195 | null | null
Note (optional):
You can greately improve performance by introducing indexes.
As a demo, I have added CLUSTERED INDEXES on DateData column and the preformance increase is significant.
/* Add to improve performance */
CREATE CLUSTERED INDEX CI_DataDate ON #d (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t1 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t2 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t3 (DataDate);
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With