I have a table like this:
| name | line | text |
|---|---|---|
| aaa | 1 | some text |
| aaa | 2 | /* some text */ |
| aaa | 3 | some text |
| aaa | 4 | /* |
| aaa | 5 | some text |
| aaa | 6 | some text |
| aaa | 7 | */ |
| aaa | 8 | some text |
| aaa | 9 | /* |
| aaa | 10 | * some text |
| aaa | 11 | * some text |
| aaa | 12 | */ |
| aaa | 13 | some text |
with t as
(
select 'aaa' name, 1 line, 'some text' text from dual union all
select 'aaa', 2, '/* some text */' from dual union all
select 'aaa', 3, 'some text' from dual union all
select 'aaa', 4, '/*' from dual union all
select 'aaa', 5, 'some text' from dual union all
select 'aaa', 6, 'some text' from dual union all
select 'aaa', 7, '*/' from dual union all
select 'aaa', 8, 'some text' from dual union all
select 'aaa', 9, '/*' from dual union all
select 'aaa', 10, '* some text' from dual union all
select 'aaa', 11, '* some text' from dual union all
select 'aaa', 12, '*/' from dual union all
select 'aaa', 13, 'some text' from dual
)
select t.*
from t
order by name, line
How can I calculate a value for a new column where lines within line or block comments would be marked as 1 and the rest as 0?
Like this:
| name | line | text | is_comment |
|---|---|---|---|
| aaa | 1 | some text | 0 |
| aaa | 2 | /* some text */ | 1 |
| aaa | 3 | some text | 0 |
| aaa | 4 | /* | 1 |
| aaa | 5 | some text | 1 |
| aaa | 6 | some text | 1 |
| aaa | 7 | */ | 1 |
| aaa | 8 | some text | 0 |
| aaa | 9 | /* | 1 |
| aaa | 10 | * some text | 1 |
| aaa | 11 | * some text | 1 |
| aaa | 12 | */ | 1 |
| aaa | 13 | some text | 0 |
So its easy to find the row the comment starts or ends on, using the relevant like pattern.
Then to find rows which are in-between the opening and closing comment we can count how many opening comments we have seen to-date and how many closing comments we have seen to-date, and if its 1 we have recently opening a comment but not yet closed it.
case
-- We always count the opening comment
when text like '/*%'
-- We always count the closing comment
or text like '%*/'
-- And we can detect in-between rows when the difference
-- between the number of opening comments and the number of closing comments = 1
or sum(case
when text like '/*%'
then 1
else 0
end) over (partition by name order by line)
- sum(case
when text like '%*/'
then 1
else 0
end) over (partition by name order by line) = 1
then 1
else 0
end as is_comment
db<>fiddle
If you can have /*/ within your text then looking for /* and, independently, looking for */ is going to give the wrong answer.
A way to deal with that edge-case is to aggregate your text into a single string and then use a regular expression to match /\*.*?\*/ which looks for the complete comment; then you can find the start-and-end of each comment and line check whether each line overlaps with one-or-more comments.
WITH line_bounds (name, line, text, line_start, line_end) AS (
SELECT name,
line,
text,
SUM(LENGTH(text) + 1) OVER (PARTITION BY name ORDER BY line)
- LENGTH(text),
SUM(LENGTH(text) + 1) OVER (PARTITION BY name ORDER BY line)
FROM table_name
),
comment_bounds (name, comment_start, comment_end) AS (
SELECT name,
comment_start,
comment_end
FROM (
SELECT name,
LISTAGG(text, CHR(10)) WITHIN GROUP (ORDER BY line) AS full_text
FROM table_name
GROUP BY name
)
CROSS JOIN LATERAL (
SELECT REGEXP_INSTR(full_text, '/\*.*?\*/', 1, LEVEL, 0, 'n')
AS comment_start,
REGEXP_INSTR(full_text, '/\*.*?\*/', 1, LEVEL, 1, 'n')
AS comment_end
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT(full_text, '/\*.*?\*/', 1, 'n')
)
)
SELECT name,
line,
text,
( SELECT COUNT(*)
FROM comment_bounds c
WHERE c.name = l.name
AND c.comment_end > l.line_start
AND c.comment_start < l.line_end ) AS is_comment
FROM line_bounds l
Or, using simple string functions and a recursive query (rather than slower regular expressions):
WITH line_bounds (name, line, text, line_start, line_end) AS (
SELECT name,
line,
text,
SUM(LENGTH(text) + 1) OVER (PARTITION BY name ORDER BY line)
- LENGTH(text),
SUM(LENGTH(text) + 1) OVER (PARTITION BY name ORDER BY line)
FROM table_name
),
full_text (name, full_text) AS (
SELECT name,
LISTAGG(text, CHR(10)) WITHIN GROUP (ORDER BY line)
FROM table_name
GROUP BY name
),
comment_bounds (name, full_text, comment_start, comment_end) AS (
SELECT name,
full_text,
INSTR(full_text, '/*', 1),
INSTR(full_text, '*/', INSTR(full_text, '/*', 1) + 2) + 2
FROM full_text
WHERE INSTR(full_text, '/*', 1) > 0
AND INSTR(full_text, '*/', INSTR(full_text, '/*', 1) + 2) > 0
UNION ALL
SELECT name,
full_text,
INSTR(full_text, '/*', comment_end),
INSTR(full_text, '*/', INSTR(full_text, '/*', comment_end) + 2) + 2
FROM comment_bounds
WHERE INSTR(full_text, '/*', comment_end) > 0
AND INSTR(full_text, '*/', INSTR(full_text, '/*', comment_end) + 2) > 0
)
CYCLE name, comment_start, comment_end SET is_cycle TO 1 DEFAULT 0
SELECT name,
line,
text,
( SELECT COUNT(*)
FROM comment_bounds c
WHERE c.name = l.name
AND c.comment_end > l.line_start
AND c.comment_start < l.line_end ) AS is_comment
FROM line_bounds l
Which, for the sample data:
CREATE TABLE table_name (name, line, text) AS
select 'aaa', 1, 'some text' from dual union all
select 'aaa', 2, '/* some text */' from dual union all
select 'aaa', 3, 'some text' from dual union all
select 'aaa', 4, '/*' from dual union all
select 'aaa', 5, 'some text' from dual union all
select 'aaa', 6, 'some text' from dual union all
select 'aaa', 7, '*/' from dual union all
select 'aaa', 8, 'some text' from dual union all
select 'aaa', 9, '/*' from dual union all
select 'aaa', 10, '* some text' from dual union all
select 'aaa', 11, '* some text' from dual union all
select 'aaa', 12, '*/' from dual union all
select 'aaa', 13, 'some text' from dual union all
select 'bbb', 1, '/*/' from dual union all
select 'bbb', 2, 'comment' from dual union all
select 'bbb', 3, '/*/' from dual union all
select 'bbb', 4, 'not comment' from dual union all
select 'bbb', 5, '/*/ comment /*/' from dual union all
select 'bbb', 6, 'not comment' from dual union all
select 'bbb', 7, '/* c1 */ nc1 /* c2 */' from dual union all
select 'ccc', 1, '*/ no comments' from dual union all
select 'ccc', 2, '*/ no comments' from dual union all
select 'ddd', 1, '/*/* comment' from dual union all
select 'ddd', 2, '/*/*' from dual union all
select 'ddd', 3, 'not comment */' from dual union all
select 'ddd', 4, 'not comment */* comment /* comment /*/ not comment' from dual union all
select 'ddd', 5, '/*/*/*/*/' from dual
Both output:
| NAME | LINE | TEXT | IS_COMMENT |
|---|---|---|---|
| aaa | 1 | some text | 0 |
| aaa | 2 | /* some text */ | 1 |
| aaa | 3 | some text | 0 |
| aaa | 4 | /* | 1 |
| aaa | 5 | some text | 1 |
| aaa | 6 | some text | 1 |
| aaa | 7 | */ | 1 |
| aaa | 8 | some text | 0 |
| aaa | 9 | /* | 1 |
| aaa | 10 | * some text | 1 |
| aaa | 11 | * some text | 1 |
| aaa | 12 | */ | 1 |
| aaa | 13 | some text | 0 |
| bbb | 1 | /*/ | 1 |
| bbb | 2 | comment | 1 |
| bbb | 3 | /*/ | 1 |
| bbb | 4 | not comment | 0 |
| bbb | 5 | /*/ comment /*/ | 1 |
| bbb | 6 | not comment | 0 |
| bbb | 7 | /* c1 */ nc1 /* c2 */ | 2 |
| ccc | 1 | */ no comments | 0 |
| ccc | 2 | */ no comments | 0 |
| ddd | 1 | /*/* comment | 1 |
| ddd | 2 | /*/* | 1 |
| ddd | 3 | not comment */ | 0 |
| ddd | 4 | not comment */* comment /* comment /*/ not comment | 1 |
| ddd | 5 | /*/*/*/*/ | 1 |
fiddle
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With