Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

parsing data out of a single data point

I have a nasty data set:

+----+------------------------------------------------------------------------------+
| PK |                                 Medications                                  |
+----+------------------------------------------------------------------------------+
|  1 | NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil                  |
|  2 | celexa, lortab, lyrica, ambien, xanax                                        |
|  3 | adipex                                                                       |
|  4 | opana, roxicodone                                                            |
|  5 | adderall                                                                     |
|  6 | hydrocodone/apap                                                             |
|  7 | NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien            |
|  8 | prozac                                                                       |
|  9 | flexeril                                                                     |
| 10 | soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol |
+----+------------------------------------------------------------------------------+

Please keep in mind that this is just 2 columns.

What I would like to return is simply a 1 column list of distinct medications across the entire dataset:

NAPROXEN
 neurontin
 DOCUSATE
 HYDROCODONE
 BACLOFEN
 advil
celexa
 lortab
 lyrica
 ambien
 xanax
adipex
opana

What is the best way to go about this?

Thank you so much for your guidance.

like image 722
Alex Gordon Avatar asked Jan 15 '23 21:01

Alex Gordon


2 Answers

DECLARE @Medications TABLE
(
     PK BIGINT PRIMARY KEY IDENTITY(1,1)
    ,Medications NVARCHAR(4000)
)

/* Populate the table with example records*/
INSERT INTO @Medications(Medications) 
VALUES ('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
      ,('celexa, lortab, lyrica, ambien, xanax   ')
      ,('adipex')
      ,('opana roxicodone')
      ,('adderall')
      ,('hydrocodone/apap')
      ,('NEXIUM,METOPROLOL,lipitor,VERAPAMIL,ASPIRIN,WARFARIN,ambien')
      ,('prozac')
      ,('flexeril')
      ,('soma,LITHIUM,MULTI-VITAMIN,fentanyl patch,percocet,PROPANOLOL,tegretol');

Option 1

select distinct Medication = ltrim(rtrim(O.splitdata ))
from
(
select *,
cast('<X>'+replace(F.Medications,',','</X><X>')+'</X>' as XML) as xmlfilter
from @Medications F
)F1
cross apply
( 
 select fdata.D.value('.','varchar(50)') as splitdata 
 from f1.xmlfilter.nodes('X') as fdata(D)) O
where O.splitdata <> ''
group by O.splitdata

Option 2

SELECT DISTINCT display_term
FROM @Medications
CROSS APPLY sys.dm_fts_parser('"' + Medications + '"', 1033, 0,0)
where display_term NOT LIKE 'nn%'

Option 3

SELECT  DISTINCT
    LTRIM(RTRIM(SUBSTRING(Medications, Number ,CHARINDEX(',', Medications + ',', Number ) - Number))) AS Medication 
FROM @Medications 
JOIN master..spt_values ON Number <= DATALENGTH(Medications) + 1  AND type='P'
AND SUBSTRING(',' + Medications, Number , 1) = ','  
like image 160
Niladri Biswas Avatar answered Jan 21 '23 15:01

Niladri Biswas


This might be the solution you are looking for:

;WITH SourceDate(PK,MedicationsXML) AS
(
    SELECT PK
          ,CAST(N'<r><![CDATA[' + REPLACE(Medications, ',', ']]></r><r><![CDATA[') + ']]></r>' AS XML)
    FROM @Medications
)
SELECT DISTINCT Medicament
FROM SourceDate
CROSS APPLY (SELECT DISTINCT RTRIM(LTRIM(Tbl.Col.value('.', 'nvarchar(250)'))) AS Medicament FROM MedicationsXML.nodes('//r') Tbl(Col)) AS List

And this is full working example (I add duplicated records to show that it is returning only the distinct value:

SET NOCOUNT ON
GO

    /* Declare table that holds the data*/
    DECLARE @Medications TABLE
    (
         PK BIGINT PRIMARY KEY IDENTITY(1,1)
        ,Medications NVARCHAR(4000)
    )

    /* Populate the table with exmaple records*/
    INSERT INTO @Medications(Medications)
    VALUES ('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
          ,('celexa, lortab, lyrica, ambien, xanax   ')
          ,('adipex')
          ,('opana, roxicodone')
          ,('adderall')
          ,('hydrocodone/apap')
          ,('NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien')
          ,('prozac')
          ,('flexeril')
          ,('soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol')
          ,('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
          ,('celexa, lortab, lyrica, ambien, xanax   ')
          ,('adipex')
          ,('opana, roxicodone')
          ,('adderall')
          ,('hydrocodone/apap')
          ,('NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien')
          ,('prozac')
          ,('flexeril')
          ,('soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol')

    ;WITH SourceDate(PK,MedicationsXML) AS
    (
        SELECT PK
              ,CAST(N'<r><![CDATA[' + REPLACE(Medications, ',', ']]></r><r><![CDATA[') + ']]></r>' AS XML)
        FROM @Medications
    )
    SELECT DISTINCT Medicament
    FROM SourceDate
    CROSS APPLY (SELECT DISTINCT RTRIM(LTRIM(Tbl.Col.value('.', 'nvarchar(250)'))) AS Medicament FROM MedicationsXML.nodes('//r') Tbl(Col)) AS List


SET NOCOUNT OFF
GO

Feel free to ask any questions.

like image 27
gotqn Avatar answered Jan 21 '23 13:01

gotqn