Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Parse SSIS .xml source to retrieve table mappings

I have several SSIS (.dtsx) files and I need to rewrite their functionality to C#.

They are configured to simply take data from one db1.table1 and via some custom mappings transfer the data to db2.table2 (the column names don't exactly match).

Is there some documentation on SSIS xml format? Is there something I should be aware of when parsing the XML format manually to retrieve the source-target table name and column names?

like image 265
Vojtech B Avatar asked Nov 28 '25 01:11

Vojtech B


1 Answers

Following code is designed for SSIS packages PackageFormatVersion=3

It is not pretty but it's ok for one way XML transformation.

Parsing source

private static Mapping<ColumnMapping> ParseSourceComponent(XElement source)
{
    var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value;


    var nonErrorOutput = source.XPathSelectElement("outputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error"));

    var outputColumns = nonErrorOutput.XPathSelectElement("outputColumns").Elements().Select(x => 
        new ColumnMapping
        {
            Id = (int)x.Attribute("id"),
            Name = (string)x.Attribute("name")
        }).ToList();

    return new Mapping<ColumnMapping>
    {
        TableName = NormalizeTableNames(table),
        Columns = outputColumns
    };
}
static readonly Regex tableNameRegex = new Regex("\\[dbo\\]\\.\\[(.*)\\]");
private static string NormalizeTableNames(string rawTableName)
{
    var matches = tableNameRegex.Match(rawTableName);
    if (matches.Success) 
        return matches.Groups[1].Value;
    return rawTableName;

}

Parsing destination

private static Mapping<InputColumnMapping> ParseDestinationComponent(string ssisName,XElement source)
{
    var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value;


    var nonErrorOutput = source.XPathSelectElement("inputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error"));

    var inputColumns = nonErrorOutput.XPathSelectElement("inputColumns").Elements().Select(x =>
        new
        {
            lineageId = (int)x.Attribute("lineageId"),
            externalMetadataColumnId = (int)x.Attribute("externalMetadataColumnId")
        }).ToList();

    var externalMetadataColumns = nonErrorOutput.XPathSelectElement("externalMetadataColumns").Elements().Select(x =>
        new InputColumnMapping
        {
            Id = (int)x.Attribute("id"),
            Name = (string)x.Attribute("name")
        }).ToList();
    foreach (var externalMetadataColumn in externalMetadataColumns.ToList())
    {
        var inputMapping =
            inputColumns.FirstOrDefault(x => x.externalMetadataColumnId == externalMetadataColumn.Id);
        if (inputMapping == null)
        {
            Console.WriteLine("{0} | destination external column {1} with id {2} was not found in input mappings", ssisName, externalMetadataColumn.Name, externalMetadataColumn.Id);
            externalMetadataColumns.Remove(externalMetadataColumn);
            continue;
        }
        externalMetadataColumn.MappsToId = inputMapping.lineageId;
    }
    return new Mapping<InputColumnMapping>
    {
        TableName = NormalizeTableNames(table),
        Columns = externalMetadataColumns
    };
}

Processing the whole .dtsx file

private static RemoteMappingFile ParseDtsx(string ssisName)
{
    var xml = XDocument.Load(@"ssis/"+ssisName);

    if (xml.Root == null)
    {
        throw new Exception("Root is null");
    }
    var mappings = new List<RemoteMapping>();

    XNamespace ns = "www.microsoft.com/SqlServer/Dts";
    XmlNamespaceManager man = new XmlNamespaceManager(new NameTable());
    man.AddNamespace("DTS", "www.microsoft.com/SqlServer/Dts");
    var executables = xml.Root.Descendants(ns + "Executable").Select(x => x).ToList();
    foreach (var executable in executables)
    {
        var components = executable.Descendants(ns + "ObjectData").First().XPathSelectElement("pipeline/components").Elements().ToList();
        if (components.Count != 2)
        {
            Console.WriteLine("{0} | WARN - 2 components expected. Found {1} with names: {2}", ssisName, components.Count, string.Join(",",components.Select(x=>((string)x.Attribute("name"))).ToList()));
        }
        var source = components.First(x => ((string)x.Attribute("name")).Contains("Source"));
        var destination = components.First(x => ((string)x.Attribute("name")).Contains("Destination"));
        var sourceMapping = ParseSourceComponent(source);
        var destinationMapping = ParseDestinationComponent(ssisName,destination);
        var remoteMapping = new RemoteMapping
        {
            TableNames = new Column { Source = sourceMapping.TableName, Destination = destinationMapping.TableName },
            Columns = new List<Column>()
        };
        foreach (var sourceItem in sourceMapping.Columns)
        {
            var foundMatchingDestinationColumn =
                destinationMapping.Columns.FirstOrDefault(x => x.MappsToId == sourceItem.Id);
            if (foundMatchingDestinationColumn == null)
            {
                Console.WriteLine("{0} | input mapping {1} with id {2} was not found in destination mappings",
                    ssisName, sourceItem.Name, sourceItem.Id);
                continue;
            }
            remoteMapping.Columns.Add(new Column
            {
                Destination = foundMatchingDestinationColumn.Name,
                Source = sourceItem.Name
            });
        }
        mappings.Add(remoteMapping);
    }

    return new RemoteMappingFile
    {
        RemoteMappings = mappings,
        SSISName = ssisName
    };
}

Needed data structures

public class ColumnMapping
{
    public int Id { get; set; }
    public string Name { get; set; }
}
public class InputColumnMapping : ColumnMapping
{
    public int MappsToId { get; set; }
}
public class Mapping<T> where T : ColumnMapping
{
    [XmlAttribute]
    public string TableName { get; set; }
    public List<T> Columns { get; set; }
}
public class RemoteMapping
{
    public Column TableNames { get; set; }
    public List<Column> Columns { get; set; }
}

public class Column
{
    [XmlAttribute]
    public string Source { get; set; }

[XmlAttribute]
    public string Destination { get; set; }
}
public class RemoteMappingFile
{
    [XmlAttribute]
    public string SSISName { get; set; }
    public List<RemoteMapping> RemoteMappings { get; set; }
}
public class MappingsXml
{
    public List<RemoteMappingFile> Mappings { get; set; }
}

The main method takes all .dtsx files in ssis folder

internal class Program
{
    private static void Main()
    {
        //var mappings = Directory.EnumerateFiles("ssis","*.dtsx").Select(x=>ParseDtsx(Path.GetFileName(x).ToString())).ToList();
        var list = new MappingsXml
        {
            Mappings =
                Directory.EnumerateFiles("ssis", "*.dtsx")
                    .Select(x => ParseDtsx((Path.GetFileName(x) ?? "").ToString()))
                    .ToList()
        };
        var xsSubmit = new XmlSerializer(typeof (MappingsXml));

        using (var file = new StreamWriter(
            @"AutoRemoteMappingXmls.xml"))
        {
            xsSubmit.Serialize(file, list);
        }
    }
}

Final output:

<?xml version="1.0" encoding="utf-8"?>
<MappingsXml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <Mappings>
    <RemoteMappingFile SSISName="ssis1.dtsx">
      <RemoteMappings>
        <RemoteMapping>
          <TableNames Source="sourceTable1" Destination="destinationTable1" />
          <Columns>
            <Column Source="sourceColumn1" Destination="destinationColumn1" />
            <Column Source="sourceColumn2" Destination="destinationColumn2" />
          </Columns>
        </RemoteMapping>
        <RemoteMapping>
          <TableNames Source="sourceTable2" Destination="destinationTable2" />
          <Columns>
            <Column Source="sourceColumn3" Destination="destinationColumn3" />
            <Column Source="sourceColumn4" Destination="destinationColumn4" />
          </Columns>
        </RemoteMapping>
      </RemoteMappings>
    </RemoteMappingFile>
  </Mappings>
</MappingsXml>

It also writes to console if:

  1. There are more than 2 DTS:ObjectData/pipeline/components/component (We expect only "OLE DB Source" and "OLE DB Destination". Some time there are some data transformation components so there might be some additional work needed for that
  2. There is some source column that is not mapped to destination column
  3. There is some destination column that is not mapped to source column
  4. Source and destination table names don't match (not really a problem)
like image 191
Vojtech B Avatar answered Nov 30 '25 15:11

Vojtech B



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!