Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

All Common Substrings Between Two Strings

Tags:

c#

I am working on C# to find all the common substrings between two strings. For instance, if the input is: S1= "need asssitance with email" S2= "email assistance needed"

The output should be- 'need assistance email'

The below code returns the longest common substring, but I want my code to return all the common substrings. Any help is much appreciated!

static void commonsubstrings()
    {
        input1 = "need asssitance with email";
        input2 = "email assistance needed"

        if (input2.Length > input1.Length)
        {
            swap = input1;
            input1 = input2;
            input2 = swap;
        }

        int k = 1;
        String temp;
        String longTemp = "";
        for (int i = 0; (i <= input1.Length); i++)
        {
            if ((i == input1.Length))
            {
                if (longest != null)
                {
                    k = longest.Length + 1;
                }
                else
                {
                    k = 1;
                }
                temp = input1.Substring(1, input1.Length - 1);
                if (temp.Equals(""))
                {
                    break;
                }
                if (k <= temp.Length)
                {
                    i = k - 1;
                    input1 = temp;
                    if ((longest != null) && (longest.Length > longTemp.Length))
                    {
                        longTemp = longest;
                    }
                }
            }
            holder1 = input1.Substring(0, k);

            for (int j = 0; (j < input2.Length) && (j + k <= input2.Length); j++)
            {
                check1 = input2.Substring(j, k);
                if (holder1.Equals(check1))
                {
                    longest = holder1;
                    break;
                }
            }
            k++;
        }

        Console.WriteLine(longest);
        Console.ReadLine(); 

}

like image 978
pk188 Avatar asked Jun 11 '13 22:06

pk188


People also ask

How do you find common characters in two strings?

Approach: Count the frequencies of all the characters from both strings. Now, for every character if the frequency of this character in string s1 is freq1 and in string s2 is freq2 then total valid pairs with this character will be min(freq1, freq2). The sum of this value for all the characters is the required answer.

How many Substrings can be formed?

The total number of substrings formed by string of length N is (N*(N+1))/2, initialise count as (N*(N+1))/2.

What are substrings of a string?

A substring is a subset or part of another string, or it is a contiguous sequence of characters within a string.


2 Answers

A different approach: you could use levenshtein distance to find the similarity of two words. If the distance is less than a specified value you could consider two strings as equal. Then you can use the levenshtein comparer for Enumerable.Intersect.

Then it's simple as:

string S1= "need asssitance with email" ;
string S2 = "email assistance needed";
string[] words1 = S1.Split();
string[] words2 = S2.Split();
var wordsIntersecting = words1.Intersect(words2, new LevenshteinComparer());
string output = string.Join(" ", wordsIntersecting);

output: need asssitance email

Here is the custom comparer:

class LevenshteinComparer : IEqualityComparer<string>
{
    public int MaxDistance { get; set; }
    private Levenshtein _Levenshtein = new Levenshtein();

    public LevenshteinComparer() : this(50) { }

    public LevenshteinComparer(int maxDistance)
    {
        this.MaxDistance = maxDistance;
    }

    public bool Equals(string x, string y)
    {
        int distance = _Levenshtein.iLD(x, y);
        return distance <= MaxDistance;
    }

    public int GetHashCode(string obj)
    {
        return 0; 
    }
}

and here is an implementation of the levenshtein algoritm:

public class Levenshtein
{
    ///*****************************
    /// Compute Levenshtein distance 
    /// Memory efficient version
    ///*****************************
    public int iLD(String sRow, String sCol)
    {
        int RowLen = sRow.Length;  // length of sRow
        int ColLen = sCol.Length;  // length of sCol
        int RowIdx;                // iterates through sRow
        int ColIdx;                // iterates through sCol
        char Row_i;                // ith character of sRow
        char Col_j;                // jth character of sCol
        int cost;                   // cost

        /// Test string length
        if (Math.Max(sRow.Length, sCol.Length) > Math.Pow(2, 31))
            throw (new Exception("\nMaximum string length in Levenshtein.iLD is " + Math.Pow(2, 31) + ".\nYours is " + Math.Max(sRow.Length, sCol.Length) + "."));

        // Step 1

        if (RowLen == 0)
        {
            return ColLen;
        }

        if (ColLen == 0)
        {
            return RowLen;
        }

        /// Create the two vectors
        int[] v0 = new int[RowLen + 1];
        int[] v1 = new int[RowLen + 1];
        int[] vTmp;



        /// Step 2
        /// Initialize the first vector
        for (RowIdx = 1; RowIdx <= RowLen; RowIdx++)
        {
            v0[RowIdx] = RowIdx;
        }

        // Step 3

        /// Fore each column
        for (ColIdx = 1; ColIdx <= ColLen; ColIdx++)
        {
            /// Set the 0'th element to the column number
            v1[0] = ColIdx;

            Col_j = sCol[ColIdx - 1];


            // Step 4

            /// Fore each row
            for (RowIdx = 1; RowIdx <= RowLen; RowIdx++)
            {
                Row_i = sRow[RowIdx - 1];


                // Step 5

                if (Row_i == Col_j)
                {
                    cost = 0;
                }
                else
                {
                    cost = 1;
                }

                // Step 6

                /// Find minimum
                int m_min = v0[RowIdx] + 1;
                int b = v1[RowIdx - 1] + 1;
                int c = v0[RowIdx - 1] + cost;

                if (b < m_min)
                {
                    m_min = b;
                }
                if (c < m_min)
                {
                    m_min = c;
                }

                v1[RowIdx] = m_min;
            }

            /// Swap the vectors
            vTmp = v0;
            v0 = v1;
            v1 = vTmp;

        }


        // Step 7

        /// Value between 0 - 100
        /// 0==perfect match 100==totaly different
        /// 
        /// The vectors where swaped one last time at the end of the last loop,
        /// that is why the result is now in v0 rather than in v1
        //System.Console.WriteLine("iDist=" + v0[RowLen]);
        int max = System.Math.Max(RowLen, ColLen);
        return ((100 * v0[RowLen]) / max);
    }





    ///*****************************
    /// Compute the min
    ///*****************************

    private int Minimum(int a, int b, int c)
    {
        int mi = a;

        if (b < mi)
        {
            mi = b;
        }
        if (c < mi)
        {
            mi = c;
        }

        return mi;
    }

    ///*****************************
    /// Compute Levenshtein distance         
    ///*****************************

    public int LD(String sNew, String sOld)
    {
        int[,] matrix;              // matrix
        int sNewLen = sNew.Length;  // length of sNew
        int sOldLen = sOld.Length;  // length of sOld
        int sNewIdx; // iterates through sNew
        int sOldIdx; // iterates through sOld
        char sNew_i; // ith character of sNew
        char sOld_j; // jth character of sOld
        int cost; // cost

        /// Test string length
        if (Math.Max(sNew.Length, sOld.Length) > Math.Pow(2, 31))
            throw (new Exception("\nMaximum string length in Levenshtein.LD is " + Math.Pow(2, 31) + ".\nYours is " + Math.Max(sNew.Length, sOld.Length) + "."));

        // Step 1

        if (sNewLen == 0)
        {
            return sOldLen;
        }

        if (sOldLen == 0)
        {
            return sNewLen;
        }

        matrix = new int[sNewLen + 1, sOldLen + 1];

        // Step 2

        for (sNewIdx = 0; sNewIdx <= sNewLen; sNewIdx++)
        {
            matrix[sNewIdx, 0] = sNewIdx;
        }

        for (sOldIdx = 0; sOldIdx <= sOldLen; sOldIdx++)
        {
            matrix[0, sOldIdx] = sOldIdx;
        }

        // Step 3

        for (sNewIdx = 1; sNewIdx <= sNewLen; sNewIdx++)
        {
            sNew_i = sNew[sNewIdx - 1];

            // Step 4

            for (sOldIdx = 1; sOldIdx <= sOldLen; sOldIdx++)
            {
                sOld_j = sOld[sOldIdx - 1];

                // Step 5

                if (sNew_i == sOld_j)
                {
                    cost = 0;
                }
                else
                {
                    cost = 1;
                }

                // Step 6

                matrix[sNewIdx, sOldIdx] = Minimum(matrix[sNewIdx - 1, sOldIdx] + 1, matrix[sNewIdx, sOldIdx - 1] + 1, matrix[sNewIdx - 1, sOldIdx - 1] + cost);

            }
        }

        // Step 7

        /// Value between 0 - 100
        /// 0==perfect match 100==totaly different
        System.Console.WriteLine("Dist=" + matrix[sNewLen, sOldLen]);
        int max = System.Math.Max(sNewLen, sOldLen);
        return (100 * matrix[sNewLen, sOldLen]) / max;
    }
}

Credits to the Levenshtein class to: http://www.codeproject.com/Articles/13525/Fast-memory-efficient-Levenshtein-algorithm

like image 113
Tim Schmelter Avatar answered Nov 15 '22 19:11

Tim Schmelter


    public static string [] CommonString(string left, string right)
    {
        List<string> result = new List<string>();
        string [] rightArray = right.Split();
        string [] leftArray = left.Split();

        result.AddRange(rightArray.Where(r => leftArray.Any(l => l.StartsWith(r))));

        // must check other way in case left array contains smaller words than right array
        result.AddRange(leftArray.Where(l => rightArray.Any(r => r.StartsWith(l))));

        return result.Distinct().ToArray();
    }
like image 42
nerdybeardo Avatar answered Nov 15 '22 20:11

nerdybeardo