Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to compare the cell values of two data tables

If i have two data tables with same structure,the same primary key and the same number of columns .

How to compare their content and detect the cells which are not the same in the two data tables ?

ex:

TB_Offline

emp_num(key)  salary      ov    

 455           3000      67.891   
 677           5000      89.112    
 778           6000      12.672   

TB_Online

emp_num(key)  salary      ov  

 455           3000      67.891 
 677           5000      50.113 
 778           5500      12.672   

I want to get result like this(or some structure to show the differences) :

emp_num(key)| salary_off |salary_on|s1   | ov_off  |  ov_on  |s2         

   677      |   5000     |    5000 | 1   |  89.112 |  50.113 | 0    
   778      |   6000     |    5500 | 0   |  12.672 |  12.672 | 1 

NOTE:

455 doesn't exist in the result because it was the exact in all columns among the two datatables.
like image 389
Anyname Donotcare Avatar asked Mar 05 '16 17:03

Anyname Donotcare


1 Answers

Here's an implementation in a fairly generic method that compares two DataTables and returns another DataTable with the differences shown.

  • Dynamic 'key' column (only single column, not multiple).
  • Doesn't display rows that have the same data.
  • Handles NULLs in data.
  • Columns not in both tables.
  • T=object comparisons.

Populating the DataTables...


    /// Build data and test the underlying method.
    public void Main()
    {
        Dictionary columns = new Dictionary();
        columns.Add("emp_num", typeof(int));
        columns.Add("salary", typeof(int));
        columns.Add("ov", typeof(double));

        DataTable left = new DataTable();
        foreach(KeyValuePair column in columns)
        {
            left.Columns.Add(column.Key, column.Value);
        }
        left.Rows.Add(455, 3000, 67.891);
        left.Rows.Add(677, 5000, 89.112);
        left.Rows.Add(778, 6000, 12.672);
        left.Rows.Add(9001, 5500, 12.672);
        left.Rows.Add(4, null, 9.2);
        //left.Dump("Left");

        DataTable right = new DataTable();
        right.Columns.Add("outlier", typeof(string));
        foreach (KeyValuePair column in columns)
        {
            right.Columns.Add(column.Key, column.Value);
        }
        right.Columns.Add("float", typeof(float));
        right.Rows.Add(0, 455, 3000, 67.891, 5);
        right.Rows.Add(1, 677, 5000, 50.113, 5);
        right.Rows.Add(2, 778, 5500, 12.672, 6);
        right.Rows.Add(2, 9000, 5500, 12.672, 6);
        right.Rows.Add(3, 4, 10, 9.2, 7);
        //right.Dump("Right");


        // Compare.
        DataTable results = Compare(left, right, "emp_num");
        //results.Dump("Results"); // Fancy table output via LINQPad.

        // Get the comparison columns for display.
        List comparedColumns = new List();
        foreach (DataColumn column in results.Columns)
        {
            comparedColumns.Add(column.ColumnName);
        }

        // Display the comparison rows.
        Console.WriteLine(string.Join(", ", comparedColumns));
        foreach(DataRow row in results.Rows)
        {
            Console.WriteLine(string.Join(", ", row.ItemArray));
        }
    }

Fancy table output from LINQPad of the input.

Generic Method: DataTable Compare(DataTable, DataTable)


    /// Compares the values of each row in the provided DataTables and returns any rows that have a difference based on a provided 'key' column.
    /// the 'pre' data.
    /// the 'post' data.
    /// Name of the column to use for matching rows.
    /// New DataTable populated with difference rows only.
    public DataTable Compare(DataTable left, DataTable right, string keyColumn)
    {
        const string Pre = "_Pre";
        const string Post = "_Post";

        DataColumn leftKey = left.Columns.Contains(keyColumn) ? left.Columns[keyColumn] : null;
        DataColumn rightKey = right.Columns.Contains(keyColumn) ? right.Columns[keyColumn] : null;

        if (leftKey == null || rightKey == null)
        {
            return null;
        }

        // Get the matching columns between the two tables for doing comparisons.
        List comparisonColumns = new List();
        DataTable results = new DataTable();
        // Adding the key column to the front for sake of ease of viewing.
        results.Columns.Add(new DataColumn(leftKey.ColumnName, leftKey.DataType));
        foreach (DataColumn column in left.Columns)
        {
            if(column == leftKey)
            {
                continue;
            }

            // Remove any columns that are not present in the compare table.
            foreach (DataColumn compareColumn in right.Columns)
            {
                if (column.ColumnName == compareColumn.ColumnName && column.DataType == compareColumn.DataType)
                {
                    comparisonColumns.Add(column.ColumnName);
                    results.Columns.Add(new DataColumn(column.ColumnName + Pre, column.DataType));
                    results.Columns.Add(new DataColumn(column.ColumnName + Post, column.DataType));
                    break;
                }
            }
        }

        foreach (DataRow leftRow in left.Rows)
        {
            object key = leftRow.Field(leftKey);
            string filterExpression = string.Format("{0} = {1}", keyColumn, key);
            DataRow rightRow = right.Select(filterExpression).SingleOrDefault();
            // Need a row for a comparison to be valid.
            if (rightRow == null)
            {
                continue;
            }

            List comparison = new List();
            comparison.Add(key);
            bool isDiff = false;
            foreach (string comparisonColumn in comparisonColumns)
            {
                object pre = leftRow.ItemArray[left.Columns.IndexOf(comparisonColumn)];
                comparison.Add(pre);
                object post = rightRow.ItemArray[right.Columns.IndexOf(comparisonColumn)];
                comparison.Add(post);

                // Only need the row if the values differ in at least one column.
                isDiff |= (pre == null && post != null) || (pre != null && post == null) || (!pre.Equals(post));
            }
            if (isDiff)
            {
                results.Rows.Add(comparison.ToArray());
            }
        }

        return results;
    }

Output:

Fancy table output from LINQPad of the output.


    emp_num, salary_Pre, salary_Post, ov_Pre, ov_Post
    677, 5000, 5000, 89.112, 50.113
    778, 6000, 5500, 12.672, 12.672
    4, , 10, 9.2, 9.2

like image 53
Matt Kava Avatar answered Oct 30 '22 18:10

Matt Kava