I have a table consisting of 10 million rows where I am trying to find who was the first/last maintainer of some machines (id) depending on some dates and also depending on what status the machine had. My query uses six joins, is there any other preferred option? EDIT: The original table has index, trying to optimise the query replacing the joins - if its possible? SQL Fiddle with example:
SQL Fiddle
EDIT (added additional information below):
Example table:
CREATE TABLE vendor_info (
id INT,
datestamp INT,
statuz INT,
maintainer VARCHAR(25));
INSERT INTO vendor_info VALUES (1, 20180101, 0, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180101, 0, 'Eric');
INSERT INTO vendor_info VALUES (3, 20180101, 1, 'David');
INSERT INTO vendor_info VALUES (1, 20180201, 1, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180201, 0, 'Jay');
INSERT INTO vendor_info VALUES (3, 20180201, 1, 'Jay');
INSERT INTO vendor_info VALUES (1, 20180301, 1, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180301, 1, 'David');
INSERT INTO vendor_info VALUES (3, 20180301, 1, 'Eric');
Query and desired output:
SELECT
id
, MIN(datestamp) AS min_datestamp
, MAX(datestamp) AS max_datestamp
, MAX(case when statuz = 0 then datestamp end) AS max_s0_date
, MAX(case when statuz = 1 then datestamp end) AS max_s1_date
, MIN(case when statuz = 0 then datestamp end) AS min_s0_date
, MIN(case when statuz = 1 then datestamp end) AS min_s1_date
INTO vendor_dates
FROM vendor_info
GROUP BY id;
SELECT
vd.id
, v1.maintainer AS first_maintainer
, v2.maintainer AS last_maintainer
, v3.maintainer AS last_s0_maintainer
, v4.maintainer AS last_s1_maintainer
, v5.maintainer AS first_s0_maintainer
, v6.maintainer AS first_s1_maintainer
FROM vendor_dates vd
LEFT JOIN vendor_info v1 ON vd.id = v1.id AND vd.min_datestamp = v1.datestamp
LEFT JOIN vendor_info v2 ON vd.id = v2.id AND vd.max_datestamp = v2.datestamp
LEFT JOIN vendor_info v3 ON vd.id = v3.id AND vd.max_s0_date = v3.datestamp
LEFT JOIN vendor_info v4 ON vd.id = v4.id AND vd.max_s1_date = v4.datestamp
LEFT JOIN vendor_info v5 ON vd.id = v5.id AND vd.min_s0_date = v5.datestamp
LEFT JOIN vendor_info v6 ON vd.id = v6.id AND vd.min_s1_date = v6.datestamp;
Adding an index to vendor_info reduces duration of your 2nd query from over 300ms to under 30ms average over repeated runs
PRIMARY KEY CLUSTERED (id, datestamp)
Changing the 2 step process into a CTE reduces total duration even more to well under 15ms average over repeated runs.
The CTE method lets the query optimiser use the new primary key
CREATE TABLE vendor_info (
id INT,
datestamp INT,
statuz INT,
maintainer VARCHAR(25)
PRIMARY KEY CLUSTERED (id, datestamp)
);
INSERT INTO vendor_info VALUES (1, 20180101, 0, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180101, 0, 'Eric');
INSERT INTO vendor_info VALUES (3, 20180101, 1, 'David');
INSERT INTO vendor_info VALUES (1, 20180201, 1, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180201, 0, 'Jay');
INSERT INTO vendor_info VALUES (3, 20180201, 1, 'Jay');
INSERT INTO vendor_info VALUES (1, 20180301, 1, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180301, 1, 'David');
INSERT INTO vendor_info VALUES (3, 20180301, 1, 'Eric');
WITH vendor_dates AS
(SELECT
id
, MIN(datestamp) AS min_datestamp
, MAX(datestamp) AS max_datestamp
, MAX(case when statuz = 0 then datestamp end) AS max_s0_date
, MAX(case when statuz = 1 then datestamp end) AS max_s1_date
, MIN(case when statuz = 0 then datestamp end) AS min_s0_date
, MIN(case when statuz = 1 then datestamp end) AS min_s1_date
FROM vendor_info
GROUP BY id
)
SELECT
vd.id
, v1.maintainer AS first_maintainer
, v2.maintainer AS last_maintainer
, v3.maintainer AS last_s0_maintainer
, v4.maintainer AS last_s1_maintainer
, v5.maintainer AS first_s0_maintainer
, v6.maintainer AS first_s1_maintainer
FROM vendor_dates vd
LEFT JOIN vendor_info v1 ON vd.id = v1.id AND vd.min_datestamp = v1.datestamp
LEFT JOIN vendor_info v2 ON vd.id = v2.id AND vd.max_datestamp = v2.datestamp
LEFT JOIN vendor_info v3 ON vd.id = v3.id AND vd.max_s0_date = v3.datestamp
LEFT JOIN vendor_info v4 ON vd.id = v4.id AND vd.max_s1_date = v4.datestamp
LEFT JOIN vendor_info v5 ON vd.id = v5.id AND vd.min_s0_date = v5.datestamp
LEFT JOIN vendor_info v6 ON vd.id = v6.id AND vd.min_s1_date = v6.datestamp;
Check the following query.
WITH
a AS (
SELECT
id, datestamp, maintainer, statuz,
MIN(datestamp) OVER(PARTITION BY id) AS fm,
MAX(datestamp) OVER(PARTITION BY id) AS lm,
MIN(datestamp) OVER(PARTITION BY id, statuz) AS fZm,
MAX(datestamp) OVER(PARTITION BY id, statuz) AS lZm
FROM vendor_info
)
SELECT
id,
MIN(IIF(datestamp = fm, maintainer, NULL)) AS first_maintainer,
MAX(IIF(datestamp = lm, maintainer, NULL)) AS last_maintainer,
MAX(IIF(datestamp = lZm AND statuz = 0, maintainer, NULL)) AS last_s0_maintainer,
MAX(IIF(datestamp = lZm AND statuz = 1, maintainer, NULL)) AS last_s1_maintainer,
MIN(IIF(datestamp = fZm AND statuz = 0, maintainer, NULL)) AS first_s0_maintainer,
MIN(IIF(datestamp = fZm AND statuz = 1, maintainer, NULL)) AS first_s1_maintainer
FROM a
GROUP BY id;
It can be tested on SQL Fiddle.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With