Sequence Alignment is quite a standard problem and finds application in DNA or protein alignment in the field of Bioinformatics. I recently came across a different version of this problem.
Given two input strings(assume that strings are made up of only A,C,G,T), the problem basically was to find the maximum alignment score based on the following matrix --
A C G T -
A 5 -1 -2 -1 -3
C -1 5 -3 -2 -4
G -2 -3 5 -2 -2
T -1 -2 -2 5 -1
- -3 -4 -2 -1 Not Allowed
So,if A is aligned with -, we add -3 to the alignment score or if G is aligned with T, we add -2 to the score or if C is aligned with C, we add 5. So, for input strings AGTGATG and GTTAG, the maximum alignment score is 14 and one of the alignments with maximum score can be represented as
AGTGATG
-GTTA-G
The alignment score is calculated as follows : A- = -3, GG = 5, TT = 5, GT= -2, AA = 5, T-= -1 and GG = 5. Adding them up, -3+5+5-2+5-1+5 = 14 which is the maximum possible alignment score for this pair of strings.
I am able to code it up using dynamic programming and get the Alignment score matrix but I am facing problems in printing all possible alignments of the two strings with maximum alignment score. I tried to backtrack as we do in LCS but could not make it work. I am attaching my code.
static Dictionary<string, int> dict;
static void Main(string[] args)
{
//This has been assumed that the strings contain only A,C,G,T and -(?)..caps
Console.WriteLine("Enter first string : ");
string a = Console.ReadLine();
a = "-" + a;
Console.WriteLine("Enter second string : ");
string b = Console.ReadLine();
b = "-" + b;
int[,] SQ = new int[a.Length, b.Length];
#region Create Dictionary
dict = new Dictionary<string, int>();
dict.Add("AA", 5);
dict.Add("AC", -1);
dict.Add("AG", -2);
dict.Add("AT", -1);
dict.Add("A-", -3);
dict.Add("CA", -1);
dict.Add("CC", 5);
dict.Add("CG", -3);
dict.Add("CT", -2);
dict.Add("C-", -4);
dict.Add("GA", -2);
dict.Add("GC", -3);
dict.Add("GG", 5);
dict.Add("GT", -2);
dict.Add("G-", -2);
dict.Add("TA", -1);
dict.Add("TC", -2);
dict.Add("TG", -2);
dict.Add("TT", 5);
dict.Add("T-", -1);
dict.Add("-A", -3);
dict.Add("-C", -4);
dict.Add("-G", -2);
dict.Add("-T", -1);
dict.Add("--", 0);
#endregion Create Dictionary
for (int i = 0; i < a.Length; i++)
{
for (int j = 0; j < b.Length; j++)
{
int key = 0, key1 = 0, key2 = 0;
dict.TryGetValue(a[i].ToString() + b[j].ToString(), out key);
dict.TryGetValue("-" + b[j].ToString(), out key1);
dict.TryGetValue(a[i].ToString() + "-", out key2);
if (i == 0)
SQ[i, j] = key1;
else if (j == 0)
SQ[i, j] = key2;
else
SQ[i, j] = Math.Max(SQ[i - 1, j - 1] + key, Math.Max(SQ[i - 1, j] + key1, SQ[i, j - 1] + key2));
}
}
for (int i = 0; i < a.Length; i++)
{
for (int j = 0; j < b.Length; j++)
{
Console.Write(SQ[i, j] + " ");
}
Console.WriteLine();
}
Console.WriteLine("Alignment Score : " + SQ[a.Length - 1, b.Length - 1]);
printAllAlignmentsWithHighestAlignmentScore();
Console.Read();
}
Can somebody please help me in implementing printAllAlignmentsWithHighestAlignmentScore() function?
Finally, I have the working code to do exactly what I wanted to do. The problem is actually a slight variation of Needleman–Wunsch algorithm
Code :
class Program
{
static Dictionary<string, int> dict;
static void printAllAlignments(int[,] SQ, string a, string b, int p, int q, string str1, string str2){
if (p == 0 || q == 0){
while (p == 0 && q != 0){
str1 = "-" + str1;
str2 = b[--q]+str2;
}
while (q == 0 && p != 0){
str1 = a[--p]+str1;
str2 = '-' + str2;
}
Console.WriteLine("\n"+str1+"\n"+str2+"\n");
return;
}
if (SQ[p, q] == (dict[a[p - 1] + b[q - 1].ToString()] + SQ[p - 1, q - 1]))
printAllAlignments(SQ, a, b, p - 1, q - 1, a[p-1]+str1, b[q-1]+str2);
if (SQ[p, q] == (dict[a[p - 1]+ "-"] + SQ[p - 1, q]))
printAllAlignments(SQ, a, b, p - 1, q, a[p-1]+str1, "-"+str2);
if (SQ[p, q] == (dict["-" + b[q-1]] + SQ[p, q - 1]))
printAllAlignments(SQ, a, b, p, q - 1, "-"+str1, b[q-1]+str2);
}
static void Main(string[] args)
{
//This has been assumed that the strings contain only A,C,G,T and -(?)..caps
Console.WriteLine("Enter first string : ");
string a = Console.ReadLine();
Console.WriteLine("Enter second string : ");
string b = Console.ReadLine();
int[,] SQ = new int[a.Length+1, b.Length+1];
#region Create Dictionary
dict = new Dictionary<string, int>();
dict.Add("AA", 5);
dict.Add("AC", -1);
dict.Add("AG", -2);
dict.Add("AT", -1);
dict.Add("A-", -3);
dict.Add("CA", -1);
dict.Add("CC", 5);
dict.Add("CG", -3);
dict.Add("CT", -2);
dict.Add("C-", -4);
dict.Add("GA", -2);
dict.Add("GC", -3);
dict.Add("GG", 5);
dict.Add("GT", -2);
dict.Add("G-", -2);
dict.Add("TA", -1);
dict.Add("TC", -2);
dict.Add("TG", -2);
dict.Add("TT", 5);
dict.Add("T-", -1);
dict.Add("-A", -3);
dict.Add("-C", -4);
dict.Add("-G", -2);
dict.Add("-T", -1);
dict.Add("--", 0);
#endregion Create Dictionary
SQ[0, 0] = 0;
for (int i = 1; i <= a.Length; i++)
SQ[i, 0] = dict["-" + a[i - 1].ToString()] + SQ[i - 1, 0];
for (int i = 1; i <= b.Length; i++)
SQ[0, i] = dict[b[i - 1].ToString() + "-"] + SQ[0, i - 1];
for (int i = 1; i <= a.Length; i++)
for (int j = 1; j <= b.Length; j++)
SQ[i, j] = Math.Max(SQ[i - 1, j - 1] + dict[a[i-1].ToString() + b[j-1]], Math.Max(SQ[i - 1, j] + dict[a[i-1] + "-"], SQ[i, j - 1] + dict["-" + b[j-1]]));
Console.WriteLine("Max Alignment Score : " + SQ[a.Length, b.Length]);
printAllAlignments(SQ, a, b, a.Length , b.Length,"","");
Console.Read();
}
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With