Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Printing all alignments of two sequences with maximum score

Sequence Alignment is quite a standard problem and finds application in DNA or protein alignment in the field of Bioinformatics. I recently came across a different version of this problem.

Given two input strings(assume that strings are made up of only A,C,G,T), the problem basically was to find the maximum alignment score based on the following matrix --

   A  C  G  T  -
A  5 -1 -2 -1 -3  
C -1  5 -3 -2 -4
G -2 -3  5 -2 -2
T -1 -2 -2  5 -1
- -3 -4 -2 -1 Not Allowed

So,if A is aligned with -, we add -3 to the alignment score or if G is aligned with T, we add -2 to the score or if C is aligned with C, we add 5. So, for input strings AGTGATG and GTTAG, the maximum alignment score is 14 and one of the alignments with maximum score can be represented as

AGTGATG
-GTTA-G

The alignment score is calculated as follows : A- = -3, GG = 5, TT = 5, GT= -2, AA = 5, T-= -1 and GG = 5. Adding them up, -3+5+5-2+5-1+5 = 14 which is the maximum possible alignment score for this pair of strings.

I am able to code it up using dynamic programming and get the Alignment score matrix but I am facing problems in printing all possible alignments of the two strings with maximum alignment score. I tried to backtrack as we do in LCS but could not make it work. I am attaching my code.

static Dictionary<string, int> dict;

    static void Main(string[] args)
    {
        //This has been assumed that the strings contain only A,C,G,T and -(?)..caps

        Console.WriteLine("Enter first string : ");
        string a = Console.ReadLine();
        a = "-" + a;
        Console.WriteLine("Enter second string : ");
        string b = Console.ReadLine();
        b = "-" + b;
        int[,] SQ = new int[a.Length, b.Length];
        #region Create Dictionary
        dict = new Dictionary<string, int>();
        dict.Add("AA", 5);
        dict.Add("AC", -1);
        dict.Add("AG", -2);
        dict.Add("AT", -1);
        dict.Add("A-", -3);

        dict.Add("CA", -1);
        dict.Add("CC", 5);
        dict.Add("CG", -3);
        dict.Add("CT", -2);
        dict.Add("C-", -4);

        dict.Add("GA", -2);
        dict.Add("GC", -3);
        dict.Add("GG", 5);
        dict.Add("GT", -2);
        dict.Add("G-", -2);

        dict.Add("TA", -1);
        dict.Add("TC", -2);
        dict.Add("TG", -2);
        dict.Add("TT", 5);
        dict.Add("T-", -1);

        dict.Add("-A", -3);
        dict.Add("-C", -4);
        dict.Add("-G", -2);
        dict.Add("-T", -1);
        dict.Add("--", 0);
        #endregion Create Dictionary

        for (int i = 0; i < a.Length; i++)
        {
            for (int j = 0; j < b.Length; j++)
            {
                int key = 0, key1 = 0, key2 = 0;
                dict.TryGetValue(a[i].ToString() + b[j].ToString(), out key);
                dict.TryGetValue("-" + b[j].ToString(), out key1);
                dict.TryGetValue(a[i].ToString() + "-", out key2);
                if (i == 0)
                    SQ[i, j] = key1;
                else if (j == 0)
                    SQ[i, j] = key2;
                else
                    SQ[i, j] = Math.Max(SQ[i - 1, j - 1] + key, Math.Max(SQ[i - 1, j] + key1, SQ[i, j - 1] + key2));
            }
        }
        for (int i = 0; i < a.Length; i++)
        {
            for (int j = 0; j < b.Length; j++)
            {
                Console.Write(SQ[i, j] + "   ");
            }
            Console.WriteLine();
        }

        Console.WriteLine("Alignment Score : " + SQ[a.Length - 1, b.Length - 1]);            
        printAllAlignmentsWithHighestAlignmentScore();
        Console.Read();
    }

Can somebody please help me in implementing printAllAlignmentsWithHighestAlignmentScore() function?

like image 533
discoverAnkit Avatar asked Jul 24 '14 09:07

discoverAnkit


1 Answers

Finally, I have the working code to do exactly what I wanted to do. The problem is actually a slight variation of Needleman–Wunsch algorithm

Code :

class Program
{
    static Dictionary<string, int> dict;
    static void printAllAlignments(int[,] SQ, string a, string b, int p, int q, string str1, string str2){
        if (p == 0 || q == 0){
            while (p == 0 && q != 0){
                str1 = "-" + str1;
                str2 = b[--q]+str2;
            }
            while (q == 0 && p != 0){
                str1 = a[--p]+str1;
                str2 = '-' + str2;
            }
            Console.WriteLine("\n"+str1+"\n"+str2+"\n");
            return;
        }

        if (SQ[p, q] == (dict[a[p - 1] + b[q - 1].ToString()] + SQ[p - 1, q - 1]))
            printAllAlignments(SQ, a, b, p - 1, q - 1, a[p-1]+str1, b[q-1]+str2);

        if (SQ[p, q] == (dict[a[p - 1]+ "-"] + SQ[p - 1, q]))
            printAllAlignments(SQ, a, b, p - 1, q, a[p-1]+str1, "-"+str2);

        if (SQ[p, q] == (dict["-" + b[q-1]] + SQ[p, q - 1]))            
            printAllAlignments(SQ, a, b, p, q - 1, "-"+str1, b[q-1]+str2);


    }
    static void Main(string[] args)
    {
        //This has been assumed that the strings contain only A,C,G,T and -(?)..caps

        Console.WriteLine("Enter first string : ");
        string a = Console.ReadLine();         
        Console.WriteLine("Enter second string : ");
        string b = Console.ReadLine();          
        int[,] SQ = new int[a.Length+1, b.Length+1];

        #region Create Dictionary
        dict = new Dictionary<string, int>();
        dict.Add("AA", 5);
        dict.Add("AC", -1);
        dict.Add("AG", -2);
        dict.Add("AT", -1);
        dict.Add("A-", -3);

        dict.Add("CA", -1);
        dict.Add("CC", 5);
        dict.Add("CG", -3);
        dict.Add("CT", -2);
        dict.Add("C-", -4);

        dict.Add("GA", -2);
        dict.Add("GC", -3);
        dict.Add("GG", 5);
        dict.Add("GT", -2);
        dict.Add("G-", -2);

        dict.Add("TA", -1);
        dict.Add("TC", -2);
        dict.Add("TG", -2);
        dict.Add("TT", 5);
        dict.Add("T-", -1);

        dict.Add("-A", -3);
        dict.Add("-C", -4);
        dict.Add("-G", -2);
        dict.Add("-T", -1);
        dict.Add("--", 0);
        #endregion Create Dictionary

        SQ[0, 0] = 0;            
        for (int i = 1; i <= a.Length; i++)            
            SQ[i, 0] = dict["-" + a[i - 1].ToString()] + SQ[i - 1, 0];

        for (int i = 1; i <= b.Length; i++)
            SQ[0, i] = dict[b[i - 1].ToString() + "-"] + SQ[0, i - 1];

        for (int i = 1; i <= a.Length; i++)
            for (int j = 1; j <= b.Length; j++)
                SQ[i, j] = Math.Max(SQ[i - 1, j - 1] + dict[a[i-1].ToString() + b[j-1]], Math.Max(SQ[i - 1, j] + dict[a[i-1] + "-"], SQ[i, j - 1] + dict["-" + b[j-1]]));           


        Console.WriteLine("Max Alignment Score : " + SQ[a.Length, b.Length]);
        printAllAlignments(SQ, a, b, a.Length , b.Length,"","");
        Console.Read();
    }
}
like image 151
discoverAnkit Avatar answered Sep 30 '22 17:09

discoverAnkit