Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Mark rows within comments

I have a table like this:

name line text
aaa 1 some text
aaa 2 /* some text */
aaa 3 some text
aaa 4 /*
aaa 5 some text
aaa 6 some text
aaa 7 */
aaa 8 some text
aaa 9 /*
aaa 10 * some text
aaa 11 * some text
aaa 12 */
aaa 13 some text
with t as
(
    select 'aaa' name,  1 line, 'some text' text    from dual union all
    select 'aaa',       2,      '/* some text */'   from dual union all
    select 'aaa',       3,      'some text'         from dual union all
    select 'aaa',       4,      '/*'                from dual union all
    select 'aaa',       5,      'some text'         from dual union all
    select 'aaa',       6,      'some text'         from dual union all
    select 'aaa',       7,      '*/'                from dual union all
    select 'aaa',       8,      'some text'         from dual union all
    select 'aaa',       9,      '/*'                from dual union all
    select 'aaa',      10,      '* some text'       from dual union all
    select 'aaa',      11,      '* some text'       from dual union all
    select 'aaa',      12,      '*/'                from dual union all
    select 'aaa',      13,      'some text'         from dual
)
select t.*
from t
order by name, line

How can I calculate a value for a new column where lines within line or block comments would be marked as 1 and the rest as 0?

Like this:

name line text is_comment
aaa 1 some text 0
aaa 2 /* some text */ 1
aaa 3 some text 0
aaa 4 /* 1
aaa 5 some text 1
aaa 6 some text 1
aaa 7 */ 1
aaa 8 some text 0
aaa 9 /* 1
aaa 10 * some text 1
aaa 11 * some text 1
aaa 12 */ 1
aaa 13 some text 0
like image 976
Zurab-D Avatar asked Dec 21 '25 22:12

Zurab-D


2 Answers

So its easy to find the row the comment starts or ends on, using the relevant like pattern.

Then to find rows which are in-between the opening and closing comment we can count how many opening comments we have seen to-date and how many closing comments we have seen to-date, and if its 1 we have recently opening a comment but not yet closed it.

  case
  -- We always count the opening comment
  when text like '/*%'
  -- We always count the closing comment
  or text like '%*/'
  -- And we can detect in-between rows when the difference
  -- between the number of opening comments and the number of closing comments = 1
  or sum(case
    when text like '/*%'
    then 1
    else 0
    end) over (partition by name order by line)
    - sum(case
    when text like '%*/'
    then 1
    else 0
    end) over (partition by name order by line) = 1
  then 1
  else 0
  end as is_comment

db<>fiddle

like image 158
Dale K Avatar answered Dec 24 '25 19:12

Dale K


If you can have /*/ within your text then looking for /* and, independently, looking for */ is going to give the wrong answer.

A way to deal with that edge-case is to aggregate your text into a single string and then use a regular expression to match /\*.*?\*/ which looks for the complete comment; then you can find the start-and-end of each comment and line check whether each line overlaps with one-or-more comments.

WITH line_bounds (name, line, text, line_start, line_end) AS (
  SELECT name,
         line,
         text,
         SUM(LENGTH(text) + 1) OVER (PARTITION BY name ORDER BY line)
           - LENGTH(text),
         SUM(LENGTH(text) + 1) OVER (PARTITION BY name ORDER BY line)
  FROM   table_name
),
comment_bounds (name, comment_start, comment_end) AS (
  SELECT name,
         comment_start,
         comment_end
  FROM   (
           SELECT name,
                  LISTAGG(text, CHR(10)) WITHIN GROUP (ORDER BY line) AS full_text
           FROM   table_name
           GROUP BY name
         )
         CROSS JOIN LATERAL (
           SELECT REGEXP_INSTR(full_text, '/\*.*?\*/', 1, LEVEL, 0, 'n')
                    AS comment_start,
                  REGEXP_INSTR(full_text, '/\*.*?\*/', 1, LEVEL, 1, 'n')
                    AS comment_end
           FROM   DUAL
           CONNECT BY LEVEL <= REGEXP_COUNT(full_text, '/\*.*?\*/', 1, 'n')
         )
)
SELECT name,
       line,
       text,
       ( SELECT COUNT(*)
         FROM   comment_bounds c
         WHERE  c.name = l.name
         AND    c.comment_end > l.line_start
         AND    c.comment_start < l.line_end ) AS is_comment
FROM   line_bounds l

Or, using simple string functions and a recursive query (rather than slower regular expressions):

WITH line_bounds (name, line, text, line_start, line_end) AS (
  SELECT name,
         line,
         text,
         SUM(LENGTH(text) + 1) OVER (PARTITION BY name ORDER BY line)
           - LENGTH(text),
         SUM(LENGTH(text) + 1) OVER (PARTITION BY name ORDER BY line)
  FROM   table_name
),
full_text (name, full_text) AS (
  SELECT name,
         LISTAGG(text, CHR(10)) WITHIN GROUP (ORDER BY line)
  FROM   table_name
  GROUP BY name
),
comment_bounds (name, full_text, comment_start, comment_end) AS (
  SELECT name,
         full_text,
         INSTR(full_text, '/*', 1),
         INSTR(full_text, '*/', INSTR(full_text, '/*', 1) + 2) + 2
  FROM   full_text
  WHERE  INSTR(full_text, '/*', 1) > 0
  AND    INSTR(full_text, '*/', INSTR(full_text, '/*', 1) + 2) > 0
UNION ALL
  SELECT name,
         full_text,
         INSTR(full_text, '/*', comment_end),
         INSTR(full_text, '*/', INSTR(full_text, '/*', comment_end) + 2) + 2
  FROM   comment_bounds
  WHERE  INSTR(full_text, '/*', comment_end) > 0
  AND    INSTR(full_text, '*/', INSTR(full_text, '/*', comment_end) + 2) > 0
)
CYCLE name, comment_start, comment_end SET is_cycle TO 1 DEFAULT 0
SELECT name,
       line,
       text,
       ( SELECT COUNT(*)
         FROM   comment_bounds c
         WHERE  c.name = l.name
         AND    c.comment_end > l.line_start
         AND    c.comment_start < l.line_end ) AS is_comment
FROM   line_bounds l

Which, for the sample data:

CREATE TABLE table_name (name, line, text) AS
    select 'aaa',  1, 'some text'       from dual union all
    select 'aaa',  2, '/* some text */' from dual union all
    select 'aaa',  3, 'some text'       from dual union all
    select 'aaa',  4, '/*'              from dual union all
    select 'aaa',  5, 'some text'       from dual union all
    select 'aaa',  6, 'some text'       from dual union all
    select 'aaa',  7, '*/'              from dual union all
    select 'aaa',  8, 'some text'       from dual union all
    select 'aaa',  9, '/*'              from dual union all
    select 'aaa', 10, '* some text'     from dual union all
    select 'aaa', 11, '* some text'     from dual union all
    select 'aaa', 12, '*/'              from dual union all
    select 'aaa', 13, 'some text'       from dual union all
    select 'bbb',  1, '/*/'             from dual union all
    select 'bbb',  2, 'comment'         from dual union all
    select 'bbb',  3, '/*/'             from dual union all
    select 'bbb',  4, 'not comment'     from dual union all
    select 'bbb',  5, '/*/ comment /*/' from dual union all
    select 'bbb',  6, 'not comment'     from dual union all
    select 'bbb',  7, '/* c1 */ nc1 /* c2 */' from dual union all
    select 'ccc',  1, '*/ no comments'  from dual union all
    select 'ccc',  2, '*/ no comments'  from dual union all
    select 'ddd',  1, '/*/* comment'    from dual union all
    select 'ddd',  2, '/*/*'            from dual union all
    select 'ddd',  3, 'not comment */'  from dual union all
    select 'ddd',  4, 'not comment */* comment /* comment /*/ not comment' from dual union all
    select 'ddd',  5, '/*/*/*/*/'       from dual

Both output:

NAME LINE TEXT IS_COMMENT
aaa 1 some text 0
aaa 2 /* some text */ 1
aaa 3 some text 0
aaa 4 /* 1
aaa 5 some text 1
aaa 6 some text 1
aaa 7 */ 1
aaa 8 some text 0
aaa 9 /* 1
aaa 10 * some text 1
aaa 11 * some text 1
aaa 12 */ 1
aaa 13 some text 0
bbb 1 /*/ 1
bbb 2 comment 1
bbb 3 /*/ 1
bbb 4 not comment 0
bbb 5 /*/ comment /*/ 1
bbb 6 not comment 0
bbb 7 /* c1 */ nc1 /* c2 */ 2
ccc 1 */ no comments 0
ccc 2 */ no comments 0
ddd 1 /*/* comment 1
ddd 2 /*/* 1
ddd 3 not comment */ 0
ddd 4 not comment */* comment /* comment /*/ not comment 1
ddd 5 /*/*/*/*/ 1

fiddle

like image 37
MT0 Avatar answered Dec 24 '25 19:12

MT0



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!