Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Updating row columns from a comma-separated column from the same table

I have the following table definition

CREATE TABLE _Table 
(
    [Pat] NVARCHAR(8), 
    [Codes] NVARCHAR(50), 
    [C1] NVARCHAR(6), 
    [C2] NVARCHAR(6), 
    [C3] NVARCHAR(6), 
    [C4] NVARCHAR(6), 
    [C5] NVARCHAR(6)
);
GO

INSERT INTO _Table ([Pat], [Codes], [C1], [C2], [C3], [C4], [C5])
VALUES
    ('Pat1', 'U212,Y973,Y982', null, null, null, null, null),
    ('Pat2', 'M653', null, null, null, null, null), 
    ('Pat3', 'U212,Y973,Y983,Z924,Z926', null, null, null, null, null);
GO  

SQL Fiddle here.

Now, I would like to split the codes for each row and populate the Cn columns so we end up with

Pat     Codes                       C1      C2      C3      C4      C5
Pat1    'U212,Y973,Y982'            U212    Y973    Y982    NULL    NULL
Pat2    'M653'                      M653    NULL    NULL    NULL    NULL
Pat3    'U212,Y973,Y983,Z924,Z926'  U212    Y973    Y983    Z924    Z926

I am looking at dynamic SQL but is there a better way...

I have started down the CTE route, but I am weak here. I am essentially looping, removing the first comma separated code and using left to get that code and selected it as C1.

;WITH tmp([Pat], [Codes], [C1], [C2], [C3], [C4], [C5]) AS
(
    SELECT
        Pat,
        STUFF(Codes, 1, CHARINDEX(',', Codes + ','), ''), 
        LEFT(Codes, CHARINDEX(',', Codes + ',') - 1), 
        [C2], 
        [C3], 
        [C4], 
        [C5]
    FROM _Table 
    UNION all
    SELECT
        Pat,
        STUFF(Codes, 1, CHARINDEX(',', Codes + ','), ''), 
        LEFT(Codes, CHARINDEX(',', Codes + ',') - 1), 
        [C2], 
        [C3], 
        [C4], 
        [C5]
    FROM _Table 
    WHERE
        Codes > ''
)
SELECT Pat, Codes,  [C1], [C2], [C3], [C4], [C5] 
FROM tmp 
ORDER BY Pat

This works for one code, but how do I do all 5? Note, in practice this could increase to N codes.

like image 289
MoonKnight Avatar asked May 17 '18 15:05

MoonKnight


2 Answers

If I understand the requirement correctly this is extremely simple. No splitting or other type of function, Dynamic SQL, recursive CTEs, PIVOTING or any other skulduggery is necessary.

To perform the "split" you can use CROSS APPLY like so:

SELECT 
  Pat,
  Codes,
  C1 = SUBSTRING(Codes,1,ISNULL(d1.d-1,8000)),
  C2 = SUBSTRING(Codes,d1.d+1, d2.d-d1.d-1),
  C3 = SUBSTRING(Codes,d2.d+1, d3.d-d2.d-1),
  C4 = SUBSTRING(Codes,d3.d+1, d4.d-d3.d-1),
  C5 = SUBSTRING(Codes,d4.d+1, 8000)
FROM _Table
CROSS APPLY (VALUES (NULLIF(CHARINDEX(',',Codes),0)))        d1(d)
CROSS APPLY (VALUES (NULLIF(CHARINDEX(',',Codes,d1.d+1),0))) d2(d)
CROSS APPLY (VALUES (NULLIF(CHARINDEX(',',Codes,d2.d+1),0))) d3(d)
CROSS APPLY (VALUES (NULLIF(CHARINDEX(',',Codes,d3.d+1),0))) d4(d);

Returns

Pat      Codes                         C1    C2     C3    C4    C5    
-------- ----------------------------- ----- ------ ----- ----- ------
Pat1     U212,Y973,Y982                U212  Y973   NULL  NULL  NULL
Pat2     M653                          M653  NULL   NULL  NULL  NULL
Pat3     U212,Y973,Y983,Z924,Z926      U212  Y973   Y983  Z924  Z926

Note the super-simple and utltra-efficient execution plan:

enter image description here

You can simplify this even further if the codes are always four character like so:

SELECT
  Pat,
  Codes,
  C1 = NULLIF(SUBSTRING(Codes,1,4),''),
  C2 = NULLIF(SUBSTRING(Codes,6,4),''),
  C3 = NULLIF(SUBSTRING(Codes,11,4),''),
  C4 = NULLIF(SUBSTRING(Codes,16,4),''),
  C2 = NULLIF(SUBSTRING(Codes,21,4),'')
FROM _Table;

To perform the update you would do this for first solution:

UPDATE _Table
SET 
  C1 = SUBSTRING(Codes,1,ISNULL(d1.d-1,8000)),
  C2 = SUBSTRING(Codes,d1.d+1, d2.d-d1.d-1),
  C3 = SUBSTRING(Codes,d2.d+1, d3.d-d2.d-1),
  C4 = SUBSTRING(Codes,d3.d+1, d4.d-d3.d-1),
  C5 = SUBSTRING(Codes,d4.d+1, 8000)
FROM _Table
CROSS APPLY (VALUES (NULLIF(CHARINDEX(',',Codes),0)))        d1(d)
CROSS APPLY (VALUES (NULLIF(CHARINDEX(',',Codes,d1.d+1),0))) d2(d)
CROSS APPLY (VALUES (NULLIF(CHARINDEX(',',Codes,d2.d+1),0))) d3(d)
CROSS APPLY (VALUES (NULLIF(CHARINDEX(',',Codes,d3.d+1),0))) d4(d);

If, again, the codes are only four characters long then the update is so easy it feels like cheating:

UPDATE _Table
SET C1 = NULLIF(SUBSTRING(Codes,1,4),''),
    C2 = NULLIF(SUBSTRING(Codes,6,4),''),
    C3 = NULLIF(SUBSTRING(Codes,11,4),''),
    C4 = NULLIF(SUBSTRING(Codes,16,4),''),
    C5 = NULLIF(SUBSTRING(Codes,21,4),'');
like image 164
Alan Burstein Avatar answered Sep 30 '22 20:09

Alan Burstein


I have started down the CTE route, but I am weak here. I am essentially looping, removing the first comma separated code and using left to get that code and selected it as C1.

There are many possible' CSV splitters in SQL Server. I just want to extend your idea with cte (recursive version combined with conditional aggregation):

;WITH tmp([Pat], [Codes],x, lvl) AS
(
  SELECT
   Pat,
   Codes = CAST(STUFF(Codes,1,CHARINDEX(',', Codes + ','), '')AS NVARCHAR(MAX)), 
   x = CAST(LEFT(Codes, CHARINDEX(',', Codes + ',') - 1) AS NVARCHAR(MAX)),
   lvl = 1
  FROM _Table
  -- WHERE c1 IS NULL AND c2 IS NULL AND ...
  -- to avoid recalculating the same rows if run many times
  UNION ALL
  SELECT  Pat,
    Codes = STUFF(Codes,1,CHARINDEX(',', Codes + ','), ''), 
    x= LEFT(Codes, CHARINDEX(',', Codes + ',') - 1),
    lvl = lvl+1
  FROM tmp
  WHERE Codes > ''
)
SELECT Pat
    ,c1 = MAX(IIF(lvl=1, x, NULL))
    ,c2 = MAX(IIF(lvl=2, x, NULL))
    ,c3 = MAX(IIF(lvl=3, x, NULL))
    ,c4 = MAX(IIF(lvl=4, x, NULL))
    ,c5 = MAX(IIF(lvl=5, x, NULL))
    -- add more if necessary
FROM tmp 
GROUP BY Pat
-- OPTION (MAXRECURSION 0);

DBFiddle Demo


And UPDATE:

;WITH tmp([Pat], [Codes],x, lvl) AS
(
    SELECT
        Pat,
        Codes=CAST(STUFF(Codes,1,CHARINDEX(',',Codes+','),'')AS NVARCHAR(MAX)), 
        x = CAST(LEFT(Codes, CHARINDEX(',', Codes + ',') - 1) AS NVARCHAR(MAX)),
        lvl = 1
    FROM _Table
    UNION ALL
    SELECT  Pat,
        Codes = STUFF(Codes, 1, CHARINDEX(',', Codes + ','), ''), 
        x= LEFT(Codes, CHARINDEX(',', Codes + ',') - 1),
        lvl = lvl+1
    FROM tmp
    WHERE Codes > ''
), cte2 AS (
SELECT Pat
    ,c1 = MAX(IIF(lvl=1, x, NULL))
    ,c2 = MAX(IIF(lvl=2, x, NULL))
    ,c3 = MAX(IIF(lvl=3, x, NULL))
    ,c4 = MAX(IIF(lvl=4, x, NULL))
    ,c5 = MAX(IIF(lvl=5, x, NULL))
FROM tmp 
GROUP BY Pat
)
UPDATE _Table
SET c1 = c.c1
   ,c2 = c.c2
   ,c3 = c.c3
   ,c4 = c.c4
   ,c5 = c.c5
FROM _Table t
JOIN cte2 c
  ON t.Pat = c.Pat
 OPTION (MAXRECURSION 0);

DBFiddle Demo

Output:

╔══════╦══════════════════════════╦══════╦══════╦══════╦══════╦══════╗
║ Pat  ║          Codes           ║  C1  ║  C2  ║  C3  ║  C4  ║  C5  ║
╠══════╬══════════════════════════╬══════╬══════╬══════╬══════╬══════╣
║ Pat1 ║ U212,Y973,Y982           ║ U212 ║ Y973 ║ Y982 ║ null ║ null ║
║ Pat2 ║ M653                     ║ M653 ║ null ║ null ║ null ║ null ║
║ Pat3 ║ U212,Y973,Y983,Z924,Z926 ║ U212 ║ Y973 ║ Y983 ║ Z924 ║ Z926 ║
╚══════╩══════════════════════════╩══════╩══════╩══════╩══════╩══════╝

Final thought: The correct way is to normalize schema.

like image 26
Lukasz Szozda Avatar answered Sep 30 '22 19:09

Lukasz Szozda