Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Problems understanding intermittent inconsistencies when loading data with SSIS package

The problem

During the passed few months the below described procedure has worked without any problems a vast majority of the times it has run (on 2008 r2). We have, however, three instances of incorrectly connected data. The question is, what is causing this and how do I remedy it?

DATA_PreImp

    sourceid    col01   col02   col03   col04   col...
    100001      John    Smith   
    100002      Calvin  Klein
    100003      Peter   Parker
    100004      Moe     Greene

Usually the rendered result is that the attribute is connected to the Items_Main correctly but sometimes (less than 1%) the order is scrambled so that the value of col01 is not connected to the same Items_Main as the value of the rest of the columns.

Any insights as to what is causing this would be most appreciated.

The data moving procedure

We have an SSIS package that transfers data from a flat table called DATA_PreImp to a structure consisting of three related tables (attribute based).

  • Items_Main should contains one row for each row in DATA_PreImp
  • Items_Featurevalues contains one row for each column value of a row in DATA_PreImp
  • Items_MainRel contains the connection between Items_Main and Items_FeatureValues

The first step in the SSIS package inserts the data from DATA_PreImp to Items_Main and inserts the generated identifier into the TARGET_ID column in the empty DATA_PreImpMappingTMP table.

insert into items_main(creationdate, status)
output inserted.itemid into DATA_PreImpMappingTMP(TARGET_ID)
select getdate(), '0' from data_preimp
order by sourceid asc;

The second step in the SSIS package fill the Items_MainRel table with TARGET_ID (Itemid originally) and an identifier for the feature (in this case a 5).

insert into items_mainrel(itemid, featureid)
output inserted.itemrelid into DATA_PreImpMapping2TMP(INDREL_ID)
select TARGET_ID, 5 from DATA_PreImpMappingTMP
order by TARGET_ID asc;

The third step is to fill the SOURCE_ID column in the DATA_PreImpMapping2TMP table with the SOURCE_ID from DATA_PreImp.

with cte as (select sourceid, row_number() over (order by sourceid asc) as row from data_preimp)
update m set m.SOURCE_ID = s.sourceid, m.FEAT_ID = 5
from DATA_PreImpMapping2TMP as m
join cte as s on s.row = m.ROW;

The last step is to fill the Items_FeatureValues table with data from DATA_PreImpMapping2TMP and DATA_PreImp.

insert into items_featurevalues(itemrelid, languageid, fnvarchar)
select DATA_PreImpMapping2TMP.INDREL_ID, 0, data_preimp.col01
from DATA_PreImpMapping2TMP
join data_preimp on (DATA_PreImpMapping2TMP.SOURCE_ID = data_preimp.sourceid)
where FEAT_ID = 5

Data table structure

Here is what is needed to create the scenario:

CREATE TABLE [dbo].[DATA_PreImp](
    [sourceid] [bigint] IDENTITY(1,1) NOT NULL,
    [col01] [nvarchar](500) NULL,
    [col02] [nvarchar](500) NULL,
    [col03] [nvarchar](500) NULL,
    [col04] [nvarchar](500) NULL,
    [col05] [nvarchar](500) NULL,
    [col06] [nvarchar](500) NULL,
    [col07] [nvarchar](500) NULL,
    [col08] [nvarchar](500) NULL,
    [col09] [nvarchar](500) NULL,
    [col10] [nvarchar](500) NULL,
 CONSTRAINT [PK_DATA_PreImp] PRIMARY KEY CLUSTERED 
(
    [sourceid] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]


CREATE TABLE [dbo].[DATA_PreImpMappingTMP](
    [ROW] [int] IDENTITY(1,1) NOT NULL,
    [TARGET_ID] [int] NULL,
PRIMARY KEY CLUSTERED 
(
    [ROW] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]


CREATE TABLE [dbo].[Items_Main](
    [Itemid] [int] IDENTITY(1,1) NOT NULL,
    [creationDate] [smalldatetime] NOT NULL,
    [status] [int] NOT NULL,
    [purchdate] [smalldatetime] NULL,
    [logindate] [smalldatetime] NULL,
 CONSTRAINT [PK_Items_Main] PRIMARY KEY CLUSTERED 
(
    [Itemid] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]


CREATE TABLE [dbo].[DATA_PreImpMapping2TMP](
    [ROW] [int] IDENTITY(1,1) NOT NULL,
    [SOURCE_ID] [int] NULL,
    [INDREL_ID] [int] NULL,
    [FEAT_ID] [int] NULL,
PRIMARY KEY CLUSTERED 
(
    [ROW] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

CREATE TABLE [dbo].[Items_Features](
    [featureId] [int] IDENTITY(1,1) NOT NULL,
    [featureRef] [varchar](15) NOT NULL,
    [featureName] [varchar](50) NOT NULL,
    [creationDate] [smalldatetime] NOT NULL,
    [status] [int] NOT NULL,
    [fieldType] [varchar](50) NOT NULL,
    [featureType] [int] NOT NULL,
    [featureDesc] [varchar](500) NULL,
 CONSTRAINT [PK_Items_Features] PRIMARY KEY CLUSTERED 
(
    [featureId] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]) ON [PRIMARY]


CREATE TABLE [dbo].[Items_MainRel](
    [ItemRelId] [int] IDENTITY(1,1) NOT NULL,
    [Itemid] [int] NOT NULL,
    [featureId] [int] NOT NULL,
 CONSTRAINT [PK_Items_MainRel] PRIMARY KEY CLUSTERED 
(
    [ItemRelId] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

GO

CREATE TABLE [dbo].[Items_FeatureValues](
    [valueId] [int] IDENTITY(1,1) NOT NULL,
    [ItemRelId] [int] NOT NULL,
    [languageId] [int] NOT NULL,
    [FnVarChar] [nvarchar](250) NULL,
    [FInt] [int] NULL,
    [FImage] [int] NULL,
    [FNText] [ntext] NULL,
    [FSmallDateTime] [smalldatetime] NULL,
 CONSTRAINT [PK_Items_FeatureValues] PRIMARY KEY CLUSTERED 
(
    [valueId] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

GO    

ALTER TABLE [dbo].[Items_MainRel]  WITH CHECK ADD  CONSTRAINT [FK_Items_MainRel_Items_Features] FOREIGN KEY([featureId])
REFERENCES [dbo].[Items_Features] ([featureId])
GO

ALTER TABLE [dbo].[Items_MainRel] CHECK CONSTRAINT [FK_Items_MainRel_Items_Features]
GO

ALTER TABLE [dbo].[Items_MainRel]  WITH CHECK ADD  CONSTRAINT [FK_Items_MainRel_Items_Main] FOREIGN KEY([Itemid])
REFERENCES [dbo].[Items_Main] ([Itemid])
GO

ALTER TABLE [dbo].[Items_MainRel] CHECK CONSTRAINT [FK_Items_MainRel_Items_Main]
GO


ALTER TABLE [dbo].[Items_FeatureValues]  WITH CHECK ADD  CONSTRAINT [FK_Items_FeatureValues_Items_MainRel] FOREIGN KEY([ItemRelId])
REFERENCES [dbo].[Items_MainRel] ([ItemRelId])
ON DELETE CASCADE
GO

ALTER TABLE [dbo].[Items_FeatureValues] CHECK CONSTRAINT [FK_Items_FeatureValues_Items_MainRel]
GO


INSERT INTO DATA_PreImp (col01,col02,col03,col04) 
VALUES('John', 'Smith', '1964', 'NewYork'),
        ('Calvin', 'Klein', '1960', 'Washington D. C.'),
        ('Peter', 'Parker', '1974', 'Losangles'),
        ('Moe', 'Greene', '1928', 'Lasvegas')


INSERT INTO Items_Features (featureRef, featureName, creationDate, [status], fieldType, featureType, featureDesc)
VALUES ('firstname','First_Name', GETDATE(), 0, 'FnVarChar', 3, 'FirstName'),
    ('lastname','Last_Name', GETDATE(), 0, 'FnVarChar', 3, 'LastName'),
    ('Birth','Birth', GETDATE(), 0, 'FnVarChar', 3, 'Birth'),
    ('City','City', GETDATE(), 0, 'FnVarChar', 3, 'City')  
like image 231
ahdaniels Avatar asked Mar 07 '13 15:03

ahdaniels


1 Answers

The problem was the computed column CUST_CD. After a lot of researching, it seems that the BULK INSERT does not like complex computed types (see Using SQL Server spatial types in SSIS data load). The solution is to removed the computed column and just make it a varchar(20) NULL. Then I created a new Execute SQL Task that updates any NULL rows with the computed value.

like image 90
Kumar Ashish Avatar answered Nov 15 '22 04:11

Kumar Ashish