This should be easy, but I'm just missing something. I have the following:
IF OBJECT_ID('LAST_NM') IS NOT NULL
DROP TABLE LAST_NM
CREATE TABLE LAST_NM (
ID int NOT NULL IDENTITY(1,1),
LAST_NM_ORIGINAL varchar(255) NOT NULL,
LAST_NM_1 varchar(255)NULL,
LAST_NM_2 varchar(255)NULL,
LAST_NM_3 varchar(255)NULL,
LAST_NM_4 varchar(255)NULL,
PRIMARY KEY (ID)
);
INSERT INTO LAST_NM
(LAST_NM_ORIGINAL, LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)
VALUES
('SMITH', 'HARRIS', NULL, 'HARRIS', NULL),
('JONES', 'FUTURE', 'FUTURE', 'FUTURE', 'FUTURE'),
('SMITH', 'ALPHA', 'ALPHA', 'ALPHA', NULL),
('SMITH', 'BETA', 'BETA', 'GEORGE', NULL),
('SMITH', 'SMITH', NULL, 'SMITH', NULL),
('DOPE', NULL, NULL, NULL, 'CURLS')
What I want to do is SELECT
from this table where:
IS NOT NULL
LAST_NM_ORIGINAL
I've tried playing with CASE
and SWITCH
and I got a messy version working if I hard code the heck out of statements and union them like this:
SELECT * FROM (
SELECT ID, LAST_NM_ORIGINAL, LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4
FROM LAST_NM
WHERE LAST_NM_1 IS NOT NULL
AND LAST_NM_2 IS NOT NULL
AND LAST_NM_3 IS NOT NULL
AND LAST_NM_4 IS NOT NULL
AND LAST_NM_1 = LAST_NM_2
AND LAST_NM_1 = LAST_NM_3
AND LAST_NM_3 = LAST_NM_4
AND LAST_NM_1 <> LAST_NM_ORIGINAL
UNION
SELECT ID, LAST_NM_ORIGINAL, LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4
FROM LAST_NM
WHERE LAST_NM_1 IS NOT NULL
AND LAST_NM_2 IS NOT NULL
AND LAST_NM_3 IS NOT NULL
AND LAST_NM_4 IS NULL
AND LAST_NM_1 = LAST_NM_2
AND LAST_NM_1 = LAST_NM_3
AND LAST_NM_1 <> LAST_NM_ORIGINAL
/*
WRITE OUT EACH POSSIBLE WAY AND UNION ALL OF THEM
.
.
.
*/
UNION
SELECT ID, LAST_NM_ORIGINAL, LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4
FROM LAST_NM
WHERE LAST_NM_1 IS NULL
AND LAST_NM_2 IS NULL
AND LAST_NM_3 IS NULL
AND LAST_NM_4 IS NOT NULL
AND LAST_NM_4 <> LAST_NM_ORIGINAL
) AS RESULT_SET
To summarize, I want to select out the rows if the LAST_NM_#
is not NULL and is the same as all the other NOT NULL LAST_NM_#
and is different than the LAST_NM_ORIGINAL
So in my example, I should get back rows 1, 2, 3, and 6. But not rows 4 (the 'new' names don't agree) or row 5 (the new names are the same as the old one).
There has to be a better way than just writing each of those out and UNIONING them.. right?
SQL has the is [not] null predicate to test if a particular value is null . With is [not] distinct from SQL also provides a comparison operator that treats two null values as the same. Note that you have to use the negated form with not to arrive at similar logic to the equals ( = ) operator.
The value NULL does not equal zero (0), nor does it equal a space (' '). Because the NULL value cannot be equal or unequal to any value, you cannot perform any comparison on this value by using operators such as '=' or '<>'.
You can't compare with NULL. You need is. null to test if something is a reference to the NULL object. @CarlesMitjans the variable is not always NULL, normally it has another integer value.
Here it is using UNPIVOT
;WITH NAMES
AS (
SELECT DISTINCT ID
,LAST_NM_ORIGINAL
,LAST_NM_NEW
FROM (
SELECT ID
,LAST_NM_ORIGINAL
,LAST_NM_1
,LAST_NM_2
,LAST_NM_3
,LAST_NM_4
FROM LAST_NM
) AS X
UNPIVOT(LAST_NM_NEW FOR LAST_NM_NEWS IN (
LAST_NM_1
,LAST_NM_2
,LAST_NM_3
,LAST_NM_4
)) AS Y
)
SELECT ID
,LAST_NM_ORIGINAL
,LAST_NM_NEW
FROM NAMES
WHERE ID IN (
SELECT ID
FROM NAMES
GROUP BY ID
HAVING COUNT(ID) = 1
)
AND LAST_NM_ORIGINAL <> LAST_NM_NEW
This question immediately screams UNPIVOT to me.
You can see the link or just google for syntax and examples, and then use it to get a derived table that looks like this:
ID NM_Orig NM_Number NM_Value
1 Smith 1 Harris
1 Smith 2 NULL
1 Smith 3 Harris
1 Smith 4 NULL
2 Jones 1 Future
etc...
From that derived table you would query to get IDs WHERE NM_Value is NOT NULL AND NM_Value <> NM_Orig AND WHERE NOT EXISTS
a correlated ROW With a NON-NULL NM_Value
that is different from the correlated NM_Value
.
Here's another way to do it.
SELECT
*
FROM
LAST_NM
WHERE
(LAST_NM_1 IS NULL OR LAST_NM_1 = COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)) AND
(LAST_NM_2 IS NULL OR LAST_NM_2 = COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)) AND
(LAST_NM_3 IS NULL OR LAST_NM_3 = COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)) AND
(LAST_NM_4 IS NULL OR LAST_NM_4 = COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)) AND
(LAST_NM_1 IS NULL OR LAST_NM_1 <> LAST_NM_ORIGINAL) AND
(LAST_NM_2 IS NULL OR LAST_NM_2 <> LAST_NM_ORIGINAL) AND
(LAST_NM_3 IS NULL OR LAST_NM_3 <> LAST_NM_ORIGINAL) AND
(LAST_NM_4 IS NULL OR LAST_NM_4 <> LAST_NM_ORIGINAL)
Edit. Can be shortened to the query below since you want at least one of the LAST_NM_# to not be null:
SELECT
*
FROM
LAST_NM
WHERE
(LAST_NM_1 IS NULL OR LAST_NM_1 = COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)) AND
(LAST_NM_2 IS NULL OR LAST_NM_2 = COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)) AND
(LAST_NM_3 IS NULL OR LAST_NM_3 = COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)) AND
(LAST_NM_4 IS NULL OR LAST_NM_4 = COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4)) AND
(LAST_NM_ORIGINAL <> COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4))
Here is one way to do this.
select ID
, LAST_NM_ORIGINAL
, LAST_NM_1
, LAST_NM_2
, LAST_NM_3
, LAST_NM_4
from LAST_NM
where replace(isnull(LAST_NM_1, '') + isnull(LAST_NM_2, '') + isnull(LAST_NM_3, '') + isnull(LAST_NM_4, ''), LAST_NM_ORIGINAL, '') > ''
AND replace(isnull(LAST_NM_1, '') + isnull(LAST_NM_2, '') + isnull(LAST_NM_3, '') + isnull(LAST_NM_4, ''), coalesce(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4), '') = ''
Your original query does have some superfluous predicates. Any row meeting LAST_NM_1 = LAST_NM_2
is guaranteed to have both LAST_NM_1 NOT NULL
and LAST_NM_2 NOT NULL
for example.
But it can be made quite concise by using VALUES
to get the 4 columns in tabular form as below.
SELECT *
FROM LAST_NM
WHERE EXISTS (SELECT *
FROM (VALUES(LAST_NM_1),
(LAST_NM_2),
(LAST_NM_3),
(LAST_NM_4)) V(LAST_NM_N)
HAVING MAX(LAST_NM_N) = MIN(LAST_NM_N) /*exactly one NOT NULL value among the 4 columns*/
AND MAX(LAST_NM_N) <> LAST_NM_ORIGINAL)
Here is another way to do it
SELECT * FROM
#LAST_NM
WHERE
COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4) = COALESCE(LAST_NM_2, LAST_NM_3, LAST_NM_4,LAST_NM_1)
AND COALESCE(LAST_NM_2, LAST_NM_3, LAST_NM_4,LAST_NM_1) = COALESCE(LAST_NM_3, LAST_NM_4,LAST_NM_1,LAST_NM_2)
AND COALESCE(LAST_NM_3, LAST_NM_4,LAST_NM_1,LAST_NM_2) = COALESCE(LAST_NM_4,LAST_NM_1,LAST_NM_2,LAST_NM_3)
AND (LAST_NM_ORIGINAL <> COALESCE(LAST_NM_1, LAST_NM_2, LAST_NM_3, LAST_NM_4))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With