I have several SSIS (.dtsx) files and I need to rewrite their functionality to C#.
They are configured to simply take data from one db1.table1 and via some custom mappings transfer the data to db2.table2 (the column names don't exactly match).
Is there some documentation on SSIS xml format? Is there something I should be aware of when parsing the XML format manually to retrieve the source-target table name and column names?
Following code is designed for SSIS packages PackageFormatVersion=3
It is not pretty but it's ok for one way XML transformation.
Parsing source
private static Mapping<ColumnMapping> ParseSourceComponent(XElement source)
{
var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value;
var nonErrorOutput = source.XPathSelectElement("outputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error"));
var outputColumns = nonErrorOutput.XPathSelectElement("outputColumns").Elements().Select(x =>
new ColumnMapping
{
Id = (int)x.Attribute("id"),
Name = (string)x.Attribute("name")
}).ToList();
return new Mapping<ColumnMapping>
{
TableName = NormalizeTableNames(table),
Columns = outputColumns
};
}
static readonly Regex tableNameRegex = new Regex("\\[dbo\\]\\.\\[(.*)\\]");
private static string NormalizeTableNames(string rawTableName)
{
var matches = tableNameRegex.Match(rawTableName);
if (matches.Success)
return matches.Groups[1].Value;
return rawTableName;
}
Parsing destination
private static Mapping<InputColumnMapping> ParseDestinationComponent(string ssisName,XElement source)
{
var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value;
var nonErrorOutput = source.XPathSelectElement("inputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error"));
var inputColumns = nonErrorOutput.XPathSelectElement("inputColumns").Elements().Select(x =>
new
{
lineageId = (int)x.Attribute("lineageId"),
externalMetadataColumnId = (int)x.Attribute("externalMetadataColumnId")
}).ToList();
var externalMetadataColumns = nonErrorOutput.XPathSelectElement("externalMetadataColumns").Elements().Select(x =>
new InputColumnMapping
{
Id = (int)x.Attribute("id"),
Name = (string)x.Attribute("name")
}).ToList();
foreach (var externalMetadataColumn in externalMetadataColumns.ToList())
{
var inputMapping =
inputColumns.FirstOrDefault(x => x.externalMetadataColumnId == externalMetadataColumn.Id);
if (inputMapping == null)
{
Console.WriteLine("{0} | destination external column {1} with id {2} was not found in input mappings", ssisName, externalMetadataColumn.Name, externalMetadataColumn.Id);
externalMetadataColumns.Remove(externalMetadataColumn);
continue;
}
externalMetadataColumn.MappsToId = inputMapping.lineageId;
}
return new Mapping<InputColumnMapping>
{
TableName = NormalizeTableNames(table),
Columns = externalMetadataColumns
};
}
Processing the whole .dtsx file
private static RemoteMappingFile ParseDtsx(string ssisName)
{
var xml = XDocument.Load(@"ssis/"+ssisName);
if (xml.Root == null)
{
throw new Exception("Root is null");
}
var mappings = new List<RemoteMapping>();
XNamespace ns = "www.microsoft.com/SqlServer/Dts";
XmlNamespaceManager man = new XmlNamespaceManager(new NameTable());
man.AddNamespace("DTS", "www.microsoft.com/SqlServer/Dts");
var executables = xml.Root.Descendants(ns + "Executable").Select(x => x).ToList();
foreach (var executable in executables)
{
var components = executable.Descendants(ns + "ObjectData").First().XPathSelectElement("pipeline/components").Elements().ToList();
if (components.Count != 2)
{
Console.WriteLine("{0} | WARN - 2 components expected. Found {1} with names: {2}", ssisName, components.Count, string.Join(",",components.Select(x=>((string)x.Attribute("name"))).ToList()));
}
var source = components.First(x => ((string)x.Attribute("name")).Contains("Source"));
var destination = components.First(x => ((string)x.Attribute("name")).Contains("Destination"));
var sourceMapping = ParseSourceComponent(source);
var destinationMapping = ParseDestinationComponent(ssisName,destination);
var remoteMapping = new RemoteMapping
{
TableNames = new Column { Source = sourceMapping.TableName, Destination = destinationMapping.TableName },
Columns = new List<Column>()
};
foreach (var sourceItem in sourceMapping.Columns)
{
var foundMatchingDestinationColumn =
destinationMapping.Columns.FirstOrDefault(x => x.MappsToId == sourceItem.Id);
if (foundMatchingDestinationColumn == null)
{
Console.WriteLine("{0} | input mapping {1} with id {2} was not found in destination mappings",
ssisName, sourceItem.Name, sourceItem.Id);
continue;
}
remoteMapping.Columns.Add(new Column
{
Destination = foundMatchingDestinationColumn.Name,
Source = sourceItem.Name
});
}
mappings.Add(remoteMapping);
}
return new RemoteMappingFile
{
RemoteMappings = mappings,
SSISName = ssisName
};
}
Needed data structures
public class ColumnMapping
{
public int Id { get; set; }
public string Name { get; set; }
}
public class InputColumnMapping : ColumnMapping
{
public int MappsToId { get; set; }
}
public class Mapping<T> where T : ColumnMapping
{
[XmlAttribute]
public string TableName { get; set; }
public List<T> Columns { get; set; }
}
public class RemoteMapping
{
public Column TableNames { get; set; }
public List<Column> Columns { get; set; }
}
public class Column
{
[XmlAttribute]
public string Source { get; set; }
[XmlAttribute]
public string Destination { get; set; }
}
public class RemoteMappingFile
{
[XmlAttribute]
public string SSISName { get; set; }
public List<RemoteMapping> RemoteMappings { get; set; }
}
public class MappingsXml
{
public List<RemoteMappingFile> Mappings { get; set; }
}
The main method takes all .dtsx files in ssis folder
internal class Program
{
private static void Main()
{
//var mappings = Directory.EnumerateFiles("ssis","*.dtsx").Select(x=>ParseDtsx(Path.GetFileName(x).ToString())).ToList();
var list = new MappingsXml
{
Mappings =
Directory.EnumerateFiles("ssis", "*.dtsx")
.Select(x => ParseDtsx((Path.GetFileName(x) ?? "").ToString()))
.ToList()
};
var xsSubmit = new XmlSerializer(typeof (MappingsXml));
using (var file = new StreamWriter(
@"AutoRemoteMappingXmls.xml"))
{
xsSubmit.Serialize(file, list);
}
}
}
Final output:
<?xml version="1.0" encoding="utf-8"?>
<MappingsXml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<Mappings>
<RemoteMappingFile SSISName="ssis1.dtsx">
<RemoteMappings>
<RemoteMapping>
<TableNames Source="sourceTable1" Destination="destinationTable1" />
<Columns>
<Column Source="sourceColumn1" Destination="destinationColumn1" />
<Column Source="sourceColumn2" Destination="destinationColumn2" />
</Columns>
</RemoteMapping>
<RemoteMapping>
<TableNames Source="sourceTable2" Destination="destinationTable2" />
<Columns>
<Column Source="sourceColumn3" Destination="destinationColumn3" />
<Column Source="sourceColumn4" Destination="destinationColumn4" />
</Columns>
</RemoteMapping>
</RemoteMappings>
</RemoteMappingFile>
</Mappings>
</MappingsXml>
It also writes to console if:
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With