Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Count sequential matching words in two strings oracle

I want a query that returns the number of sequential match of words in two strings example:

Table

Id  column1               column2     result   
1   'foo bar live'        'foo bar'       2  
2   'foo live tele'       'foo tele'      1  
3   'bar foo live'        'foo bar live'  0 

to get total number of occurrence I am using:

select id, column1,column2,
extractvalue(dbms_xmlgen.getxmltype('select cardinality (
  sys.dbms_debug_vc2coll(''' || replace(lower(column1), ' ', ''',''' ) || ''') multiset intersect
  sys.dbms_debug_vc2coll('''||replace(lower(column2), ' ', ''',''' )||'''))  x from dual'), '//text()') cnt
from table.

Can anyone please suggest a query on similar lines for sequential match also as I want number of sequential matches and number of occurrences shown together.

like image 540
user2342436 Avatar asked Oct 09 '13 11:10

user2342436


2 Answers

Personally, in this situation, I would choose PL/SQL code over plain SQL. Something like:

Package specification:

create or replace package PKG is
  function NumOfSeqWords(
    p_str1 in varchar2,
    p_str2 in varchar2
  ) return number;
end;

Package body:

create or replace package body PKG is
  function NumOfSeqWords(
    p_str1 in varchar2,
    p_str2 in varchar2
  ) return number
  is
    l_str1     varchar2(4000) := p_str1;
    l_str2     varchar2(4000) := p_str2;
    l_res      number  default 0;
    l_del_pos1 number;
    l_del_pos2 number;
    l_word1    varchar2(1000);
    l_word2    varchar2(1000);
  begin
    loop
      l_del_pos1 := instr(l_str1, ' ');
      l_del_pos2 := instr(l_str2, ' ');
      case l_del_pos1
        when 0 
        then l_word1 := l_str1;
             l_str1 := ''; 
        else l_word1 := substr(l_str1, 1, l_del_pos1 - 1);
      end case;
      case l_del_pos2
        when 0 
        then l_word2 := l_str2;
             l_str2 := ''; 
        else l_word2 := substr(l_str2, 1, l_del_pos2 - 1);
      end case;
      exit when (l_word1 <> l_word2) or 
                ((l_word1 is null) or (l_word2 is null));

      l_res := l_res + 1;
      l_str1 := substr(l_str1, l_del_pos1 + 1);
      l_str2 := substr(l_str2, l_del_pos2 + 1);
    end loop;
    return l_res;
  end;
end;

Test case:

 with t1(Id1, col1, col2) as(
   select 1, 'foo bar live'  ,'foo bar'     from dual union all
   select 2, 'foo live tele' ,'foo tele'    from dual union all
   select 3, 'bar foo live'  ,'foo bar live'from dual
  )
  select id1
       , col1
       , col2
       , pkg.NumOfSeqWords(col1, col2) as res
    from t1
  ;

Result:

       ID1 COL1          COL2                RES
---------- ------------- ------------ ----------
         1 foo bar live  foo bar               2
         2 foo live tele foo tele              1
         3 bar foo live  foo bar live          0
like image 69
Nick Krasnov Avatar answered Nov 14 '22 07:11

Nick Krasnov


Why to give up on the query approach. I know it's a bit complicated and I hope someone can work on it to improve it, but working on this during my spare time I was able to survive a an afternoon of calls...

Here on SQLFidlle

SELECT Table1.id,
       Table1.column1,
       Table1.column2,
       max(nvl(t.l,0)) RESULT
FROM (
  SELECT  id,
          column1,
          column2,
          LEVEL l,
          decode(LEVEL,
                     1,
                 substr(column1, 1, instr(column1,' ', 1, LEVEL) -1),
                 substr(column1, 1, (instr(column1,' ', 1, LEVEL )))
                 )  sub1,
          decode(LEVEL,
                     1,
                 substr(column2, 1, instr(column2,' ', 1, LEVEL) -1),
                 substr(column2, 1, (instr(column2,' ', 1, LEVEL )))
                 )  sub2

     FROM (SELECT id,
                  column1 || ' ' column1,
                  column2 || ' ' column2
             FROM Table1)
    WHERE  decode(LEVEL,
                      1,
                  substr(column1, 1, instr(column1,' ', 1, LEVEL) -1),
                  substr(column1, 1, (instr(column1,' ', 1, LEVEL )))
                 )  =
           decode(LEVEL,
                      1,
                  substr(column2, 1, instr(column2,' ', 1, LEVEL) -1),
                  substr(column2, 1, (instr(column2,' ', 1, LEVEL )))
                 )
  START WITH column1 IS NOT NULL
  CONNECT BY instr(column1,' ', 1, LEVEL) > 0
  ) t
RIGHT OUTER JOIN Table1 ON trim(t.column1) = Table1.column1
                       AND trim(t.column2) = Table1.column2
                       AND t.id = Table1.id
GROUP BY  Table1.id,
          Table1.column1,
          Table1.column2
ORDER BY  max(nvl(t.l,0)) DESC
like image 23
mucio Avatar answered Nov 14 '22 06:11

mucio