I have this data in myTable
:
Date Status PersonID
-----------------------------------------
2018/01/01 2 2015 ┐ 2
2018/01/02 2 2015 ┘
2018/01/05 2 2015 ┐
2018/01/06 2 2015 3
2018/01/07 2 2015 ┘
2018/01/11 2 2015 - 1
2018/01/01 2 1018 - 1
2018/01/03 2 1018 - 1
2018/01/05 2 1018 ┐ 2
2018/01/06 2 1018 ┘
2018/01/08 2 1018 ┐ 2
2018/01/09 2 1018 ┘
2018/01/03 2 1625 ┐
2018/01/04 2 1625 4
2018/01/05 2 1625
2018/01/06 2 1625 ┘
2018/01/17 2 1625 - 1
2018/01/29 2 1625 - 1
-----------------------------------
and I need to count consecutive duplicate values like this:
This is the result I need:
count personid
-----------------
2 2015
3 2015
1 2015
1 1018
1 1018
2 1018
2 1018
4 1625
1 1625
1 1625
I am using SQL Server 2016 - please help
This is a 'Gaps and Islands' problem, you can try like following.
;with cte
as (select *,
dateadd(day, -row_number()
over (partition by status, personid
order by [date] ), [date]) AS grp
FROM @table
)
,cte1
AS (select *,row_number() over(partition by personid, grp,status order by [date]) rn,
count(*) over(partition by personid, grp) ct
from cte
)
select ct as count, personid
from cte1
where rn=1
Online Demo
Note: You might not get the rows in same sequence as you don't have any column which can be used for ordering the way you showed in the desired output.
This type of problem is known as 'Gaps and Islands'. You are either identifying consecutive data sets (Islands) or range of values between two islands (Gaps). There are many different ways to achieve the results that also performs well with large data sets. You can refer the below well written articles for that.
https://www.itprotoday.com/sql-server/solving-gaps-and-islands-enhanced-window-functions
https://www.red-gate.com/simple-talk/sql/t-sql-programming/the-sql-of-gaps-and-islands-in-sequences/
https://www.sqlshack.com/data-boundaries-finding-gaps-islands-and-more/
Here is an attempt to your question.
CREATE TABLE #test
(
dt DATETIME
,Status INT
,PersonID INT
)
INSERT INTO #Test (dt, Status, PersonID) VALUES
('2018/01/01', 2, 2015),
('2018/01/02', 2, 2015),
('2018/01/05', 2, 2015),
('2018/01/06', 2, 2015),
('2018/01/07', 2, 2015),
('2018/01/11', 2, 2015),
('2018/01/01', 2, 1018),
('2018/01/03', 2, 1018),
('2018/01/05', 2, 1018),
('2018/01/06', 2, 1018),
('2018/01/08', 2, 1018),
('2018/01/09', 2, 1018),
('2018/01/03', 2, 1625),
('2018/01/04', 2, 1625),
('2018/01/05', 2, 1625),
('2018/01/06', 2, 1625),
('2018/01/17', 2, 1625),
('2018/01/29', 2, 1625)
;with cte_dt_from
AS
(
SELECT PersonID, MIN(Dt) dt_from_start
FROM #Test
GROUP BY PersonID
),
cte_offset_num
AS
(
SELECT T1.PersonID, T1.dt, DATEDIFF(DAY, T2.dt_from_start, T1.dt) dt_offset
FROM #test T1
INNER JOIN cte_dt_from T2 ON T2.PersonID = T1.PersonID
),
cte_starting_point
AS
(
SELECT A.PersonID, A.dt_offset, ROW_NUMBER() OVER(PARTITION BY A.PersonID ORDER BY A.dt_offset) AS rownum
FROM cte_offset_num AS A
WHERE NOT EXISTS (
SELECT *
FROM cte_offset_num AS B
WHERE B.PersonID = A.PersonID AND B.dt_offset = A.dt_offset - 1)
)
,
cte_ending_point
AS
(
SELECT A.PersonID, A.dt_offset, ROW_NUMBER() OVER(PARTITION BY A.PersonID ORDER BY A.dt_offset) AS rownum
FROM cte_offset_num AS A
WHERE NOT EXISTS (
SELECT *
FROM cte_offset_num AS B
WHERE B.PersonID = A.PersonID AND B.dt_offset = A.dt_offset + 1)
)
SELECT (E.dt_offset - S.dt_offset) + 1 AS [count], S.PersonID
FROM cte_starting_point AS S
JOIN cte_ending_point AS E ON E.PersonID = S.PersonID AND E.rownum = S.rownum
ORDER BY S.PersonID;
DROP TABLE #Test;
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With