Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

T-SQL | String "Manipulation" and Aggregation

I have the following scenario.

SOURCE TABLE 1

CREATE TABLE #Table1 
(
     Div varchar(10), 
     Dept varchar(10), 
     States varchar(10)
)

INSERT INTO #Table1
   SELECT 'Div1','Dept1','CA,NV,TX'
   UNION ALL
   SELECT 'Div2','Dept2','MI,OH,IN'
   UNION ALL
   SELECT 'Div3','Dept2','NY,NJ,PA'
   UNION ALL
   SELECT 'Div4','Dept1',NULL

SOURCE TABLE 2

CREATE TABLE #Table2 
(
    Div varchar(10), 
    Dept varchar(10), 
    States varchar(10)
)

INSERT INTO #Table2
   SELECT 'Div1','Dept1','CA'
   UNION ALL
   SELECT 'Div1','Dept1','NV, TX'
   UNION ALL
   SELECT 'Div1','Dept1','TX, CA'
   UNION ALL
   SELECT 'Div1','Dept1','CA, NV'
   UNION ALL
   SELECT 'Div2','Dept2','MI, OH'
   UNION ALL
   SELECT 'Div2','Dept2','MI, IN'
   UNION ALL
   SELECT 'Div2','Dept2','OH'
   UNION ALL
   SELECT 'Div3','Dept2','NY, NJ, PA'

DESIRED OUTPUT

CREATE TABLE #Table3 
(
    Div varchar(10), 
    Dept varchar(10), 
    States varchar(50)
)

INSERT INTO #Table3
SELECT 'Div1','Dept1','CA - (3), NV - (2), TX - (2)'
UNION ALL
SELECT 'Div2','Dept2','MI - (2), OH - (2), IN - (1)'
UNION ALL
SELECT 'Div3','Dept2','NY - (1), NJ - (1), PA - (1)'
UNION ALL
SELECT 'Div4','Dept1',NULL

SELECT * FROM #Table1
SELECT * FROM #Table2
SELECT * FROM #Table3

DROP TABLE #Table1
DROP TABLE #Table2
DROP TABLE #Table3

SQLFIDDLE

Goal: Based on #Table1 and #Table2, join both table on Div and Dept fields and then aggregate the counts for distinct states in States field and create an output where you have Div, Dept, and States with unique count of each one of those states printed next to the state.

I am not sure how to achieve this. I am trying LIKE but can't quite figure out how to make it dynamic. I will continue trying to see if I can figure out. Thought I would post this question here and see if I can get some assist.

Thank you

UPDATE:

Desired Output

Div     Dept    States
Div1    Dept1   CA - (3), NV - (2), TX - (2)
Div2    Dept2   MI - (2), OH - (2), IN - (1)
Div3    Dept2   NY - (1), NJ - (1), PA - (1)
Div4    Dept1   NULL
like image 963
007 Avatar asked Oct 03 '14 16:10

007


2 Answers

Your requirements are very nasty but as developers we have to work with what we've got. Here's a solution using Common Table Expression (CTE) extensively:

;WITH
    CTE1 AS
    (
        SELECT      Div, Dept,
                    REPLACE(States,' ','') + ',' AS States
        FROM        Table2
    ),
    CTE2 AS
    (
        SELECT      c1.Div, c1.Dept,
                    LEFT(c1.States,CHARINDEX(',', c1.States)-1)                 AS IndividualState,
                    RIGHT(c1.States,LEN(c1.States)-CHARINDEX(',', c1.States))   AS RemainingStates
        FROM        CTE1    c1
        UNION ALL
        SELECT      c2.Div, c2.Dept,
                    LEFT(c2.RemainingStates,CHARINDEX(',', c2.RemainingStates)-1),
                    RIGHT(c2.RemainingStates,LEN(c2.RemainingStates) - CHARINDEX(',', c2.RemainingStates))
        FROM        CTE2    c2
        WHERE       LEN(c2.RemainingStates) > 0
    ),
    CTE3 AS
    (
        SELECT      Div, Dept,
                    IndividualState,
                    COUNT(*)            AS StateCount
        FROM        CTE2
        GROUP BY    Div, Dept, IndividualState
    ),
    CTE4 AS
    (
        SELECT      t1.Div, t1.Dept,
                    (
                        SELECT  c3.IndividualState + ' - (' + CONVERT(varchar(10),c3.StateCount) + '), ' 
                        FROM    CTE3 c3
                        WHERE   c3.Div = t1.Div AND c3.Dept = t1.Dept
                        FOR XML PATH('')
                    )       AS States
        FROM        Table1  t1
    )

SELECT  Div, Dept,
        LEFT(States, LEN(States) - 1) AS States
FROM    CTE4

Explanation

  1. CTE1 cleans up the data in Table2: remove spaces, add a comma to the end
  2. CTE2 does the normalization
  3. CTE3 does the counting
  4. CTE4 does the final assembly, putting CA | 3 into CA - (3), ...

The last SELECT remove the trailing comma for neater output.

To better understand each step, you can replace the final SELECT with SELECT * FROM CTE1, SELECT * FROM CTE2, etc.

like image 163
Code Different Avatar answered Sep 30 '22 06:09

Code Different


Ok, so first of all, you'll need to split the concatenated values in #Temp1 and #Temp2. There are various methods for doing so, I'll use the numbers table one that is described in this awesome blog post from Aaron Bertrand. So, we'll need a numbers table, which can be done this way:

;WITH n AS
(
    SELECT  x = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
    FROM sys.all_objects AS s1
    CROSS JOIN sys.all_objects AS s2
)
SELECT Number = x
INTO #Numbers
FROM n
WHERE x BETWEEN 1 AND 8000;

Then, you'll need to actually do the splitting and then a group concatenation method for your result:

;WITH T1 AS
(
    SELECT *
    FROM #Table1 T
    OUTER APPLY (SELECT Item = SUBSTRING(T.States, Number,
                                         CHARINDEX(',',T.States + ',', Number) - 
                                         Number)
                 FROM #Numbers
                 WHERE Number <= CONVERT(INT, LEN(T.States))
                 AND SUBSTRING(',' + T.States, Number, LEN(',')) = ',') N
), T2 AS
(
    SELECT *
    FROM #Table2 T
    OUTER APPLY (SELECT Item = SUBSTRING(T.States, Number,
                                         CHARINDEX(', ',T.States + ', ', Number) - 
                                         Number)
                 FROM #Numbers
                 WHERE Number <= CONVERT(INT, LEN(T.States))
                 AND SUBSTRING(', ' + T.States, Number, LEN(', ')) = ', ') N
), T3 AS
(
    SELECT T1.Div, T1.Dept, T1.Item, COUNT(*) N
    FROM T1 
    LEFT JOIN T2
        ON T1.Div = T2.Div
        AND T1.Dept = T2.Dept
        AND T1.Item = T2.Item
    GROUP BY T1.Div, T1.Dept, T1.Item
)
SELECT  A.Div, 
        A.Dept, 
        States = STUFF((SELECT  ',' + CONVERT(VARCHAR(20), Item) + 
                                ' - (' + CAST(N AS VARCHAR(4)) + ')'
                        FROM T3 
                        WHERE Div = A.Div
                        AND Dept = A.Dept
                    FOR XML PATH(''), TYPE).value('.[1]','nvarchar(max)'),1,1,'')
FROM T3 A
ORDER BY Div, Dept, Item

The results are:

╔══════╦═══════╦════════════════════════════╗
║ Div  ║ Dept  ║           States           ║
╠══════╬═══════╬════════════════════════════╣
║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
║ Div4 ║ Dept1 ║ NULL                       ║
╚══════╩═══════╩════════════════════════════╝
like image 30
Lamak Avatar answered Sep 30 '22 04:09

Lamak