Here is the DDL --
create table tbl1 (
id number,
value varchar2(50)
);
insert into tbl1 values (1, 'AA, UT, BT, SK, SX');
insert into tbl1 values (2, 'AA, UT, SX');
insert into tbl1 values (3, 'UT, SK, SX, ZF');
Notice, here value is comma separated string.
But, we need result like following-
ID VALUE
-------------
1 AA
1 UT
1 BT
1 SK
1 SX
2 AA
2 UT
2 SX
3 UT
3 SK
3 SX
3 ZF
How do we write SQL for this?
I agree that this is a really bad design. Try this if you can't change that design:
select distinct id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
order by id, level;
OUPUT
id value level
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Credits to this
To remove duplicates in a more elegant and efficient way (credits to @mathguy)
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and PRIOR id = id
and PRIOR SYS_GUID() is not null
order by id, level;
If you want an "ANSIer" approach go with a CTE:
with t (id,res,val,lev) as (
select id, trim(regexp_substr(value,'[^,]+', 1, 1 )) res, value as val, 1 as lev
from tbl1
where regexp_substr(value, '[^,]+', 1, 1) is not null
union all
select id, trim(regexp_substr(val,'[^,]+', 1, lev+1) ) res, val, lev+1 as lev
from t
where regexp_substr(val, '[^,]+', 1, lev+1) is not null
)
select id, res,lev
from t
order by id, lev;
OUTPUT
id val lev
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Another recursive approach by MT0 but without regex:
WITH t ( id, value, start_pos, end_pos ) AS
( SELECT id, value, 1, INSTR( value, ',' ) FROM tbl1
UNION ALL
SELECT id,
value,
end_pos + 1,
INSTR( value, ',', end_pos + 1 )
FROM t
WHERE end_pos > 0
)
SELECT id,
SUBSTR( value, start_pos, DECODE( end_pos, 0, LENGTH( value ) + 1, end_pos ) - start_pos ) AS value
FROM t
ORDER BY id,
start_pos;
I've tried 3 approaches with a 30000 rows dataset and 118104 rows returned and got the following average results:
@Mathguy has also tested with a bigger dataset:
In all cases the recursive query (I only tested the one with regular substr and instr) does better, by a factor of 2 to 5. Here are the combinations of # of strings / tokens per string and CTAS execution times for hierarchical vs. recursive, hierarchical first. All times in seconds
This will get the values without requiring you to remove duplicates or having to use a hack of including SYS_GUID()
or DBMS_RANDOM.VALUE()
in the CONNECT BY
:
SELECT t.id,
v.COLUMN_VALUE AS value
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v
Update:
Returning the index of the element in the list:
Option 1 - Return a UDT:
CREATE TYPE string_pair IS OBJECT( lvl INT, value VARCHAR2(4000) );
/
CREATE TYPE string_pair_table IS TABLE OF string_pair;
/
SELECT t.id,
v.*
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT string_pair( level, TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS string_pair_table
)
) v;
Option 2 - Use ROW_NUMBER()
:
SELECT t.id,
v.COLUMN_VALUE AS value,
ROW_NUMBER() OVER ( PARTITION BY id ORDER BY ROWNUM ) AS lvl
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v;
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With