Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Deduplicate Oracle xmlagg list

Using Oracle 11.2 i'm trying to select 2 grouped lists from the tables below, here's my code:

CREATE  TABLE tmp_main AS (
  SELECT rownum col1, 'txt_' || to_char(rownum) Col2 FROM dual 
    CONNECT BY rownum<=2
);

CREATE TABLE tmp_keys AS (
  SELECT DECODE(rownum,1,1,2,1,3,1,4,2,5,2,6,2) col1, 'key_' || to_char(rownum) key1 , rownum seq FROM dual 
     CONNECT BY rownum<=6
);

CREATE TABLE tmp_line AS (
  SELECT DECODE(rownum,1,1,2,1,3,1,4,1,5,2,6,2,7,2,8,2) col1, 'line_' || DECODE(rownum,2,1,3,1,4,2,5,3,7,3,8,4) line1 , rownum seq   FROM dual 
     CONNECT BY rownum<=8
);

update tmp_line set line1=null where line1='line_';
update tmp_keys set seq=null where col1=1;

tmp_keys.seq can be null so I need to order by seq first then key1 This is what i've tried:

SELECT  m.col1,m.col2,
RTRIM(XMLAGG(XMLELEMENT(E,k.key1 , ',').EXTRACT('//text()') ORDER BY k.seq,k.key1 ).GetClobVal(),',') as key_list ,
RTRIM(XMLAGG(XMLELEMENT(E,l.line1 || ',').EXTRACT('//text()') ORDER BY l.seq ).GetClobVal(),',')  line_list
FROM tmp_main m
JOIN tmp_keys k
ON m.col1=k.col1
JOIN tmp_line l
ON m.col1=l.col1
group by m.col1,col2;

Which gives:

col1 col2   key_list                                                                    line_list
1   txt_1   key_1,key_1,key_1,key_1,key_2,key_2,key_2,key_2,key_3,key_3,key_3,key_3     ,,,line_1,line_1,line_1,line_1,line_1,line_1,line_2,line_2,line_2
2   txt_2   key_4,key_4,key_4,key_4,key_5,key_5,key_5,key_5,key_6,key_6,key_6,key_6     line_3,line_3,line_3,,,,line_3,line_3,line_3,line_4,line_4,line_4

ie duplicates.

What I would like is:

col1 col2   key_list                    line_list
1   txt_1   key_1,key_2,key_3           ,line_1,line_1,line_2
2   txt_2   key_3,key_4,key_5           line_3,,line_3,line_4

ie null line1 values are kept.

Caveats:

  • The real query is much bigger so 1 table scan would be nice as speed is important.
  • The 2 lists could be > 4000 chars, so no listagg or similar functions allowed (which is why i've used xmlagg and GetClobVal())

Any help appreciated

like image 428
DS. Avatar asked Jun 18 '26 04:06

DS.


1 Answers

First, assign a row_number for the key and line values, starting at 1 for each key or line seq.

So you'll partition by the final grouping values and the relevant seq. Then sort by the key/line values:

row_number() over ( 
  partition by m.col1,col2,k.seq
  order by key1
) rnk, 
row_number() over ( 
  partition by m.col1,col2,l.seq
  order by line1
) rnl

Then aggregate only those rows where this row number = 1:

with rws as (
  select 
    m.*, line1, key1,
    l.seq seql,k.seq seqk,
    row_number() over ( 
      partition by m.col1,col2,k.seq
      order by key1
    ) rnk, 
    row_number() over ( 
      partition by m.col1,col2,l.seq
      order by line1
    ) rnl
  from tmp_main m
  join tmp_keys k
  on   m.col1=k.col1
  join tmp_line l
  on   m.col1=l.col1
)
  select 
    col1,col2,
    rtrim(
      xmlagg(
        xmlelement(
          e,
          case when rnk = 1 then key1 || ',' end 
        ).extract('//text()') 
        order by seqk
      ).getclobval(),','
    ) key_list ,
    rtrim(
      xmlagg(
        xmlelement(
          e,
          case when rnl = 1 then line1 || ',' end 
        ).extract('//text()') 
        order by seql 
      ).getclobval(),','
    ) line_list
  from   rws
  group  by col1,col2;

COL1    COL2     KEY_LIST             LINE_LIST               
      1 txt_1    key_1,key_2,key_3    ,line_1,line_1,line_2    
      2 txt_2    key_4,key_5,key_6    line_3,,line_3,line_4 
like image 173
Chris Saxon Avatar answered Jun 20 '26 18:06

Chris Saxon