I have a program that will tell me the distance between two strings, which is working just fine.
e.g.
word 1 = hello
word 2 = hi
has a cost of 5 to go from one to the other (substitution of i with e is 2, and there are 3 insertions).
Basically, an insert costs 1, delete costs 1, substitution is 2. Words can also be shuffled in the string to reduce cost.
I need a way to remember what operation is happening at what point so that I can show an alignment.
e.g.
wax
S M S(substitute move substitute, cost of 4)
and
Any ideas or tips?
import sys
from sys import stdout
def minEditDist(target, source):
# Length of the target strings set to variables
n = len(target)
m = len(source)
distance = [[0 for i in range(m+1)] for j in range(n+1)]
for i in range(1,n+1):
distance[i][0] = distance[i-1][0] + insertCost(target[i-1])
for j in range(1,m+1):
distance[0][j] = distance[0][j-1] + deleteCost(source[j-1])
for i in range(1,n+1):
for j in range(1,m+1):
distance[i][j] = min(distance[i-1][j]+1,
distance[i][j-1]+1,
distance[i-1][j-1]+subCost(source[j-1],target[i-1]))
# Return the minimum distance using all the table cells
return distance[i][j]
def subCost(x,y):
if x == y:
return 0
else:
return 2
def insertCost(x):
return 1
def deleteCost(x):
return 1
# User inputs the strings for comparison
# Commented out here because cloud9 won't take input like this
# word1 = raw_input("Enter A Word: ")
# word2 = raw_input("Enter The Second Word: ")
word1 = "wax"
word2 = "and"
word1x = word1
word2x = word2
# Reassign variables to words with stripped right side whitespace
word1x = word1x.strip()
word2x = word2x.strip()
if(len(word1) > len(word2)):
range_num = len(word1)
else:
range_num = len(word2)
# Display the minimum distance between the two specified strings
print "The minimum edit distance between S1 and S2 is: ", minEditDist(word1x,word2x), "!"
print (word1x)
print (word2x)
You can start with something like this.
I have added the proper data for "S".
path = []
def minEditDist(target, source):
# Length of the target strings set to variables
n = len(target)
m = len(source)
distance = [[0 for i in range(m+1)] for j in range(n+1)]
for i in range(1,n+1):
distance[i][0] = distance[i-1][0] + insertCost(target[i-1])
for j in range(1,m+1):
distance[0][j] = distance[0][j-1] + deleteCost(source[j-1])
for i in range(1,n+1):
for j in range(1,m+1):
sc = subCost(source[j-1],target[i-1])
distance[i][j] = min(distance[i-1][j]+1,
distance[i][j-1]+1,
distance[i-1][j-1]+sc)
if distance[i-1][j]+1 > distance[i-1][j-1]+sc and distance[i][j-1]+1 > distance[i-1][j-1]+sc:
path.append("S");
print path
# Return the minimum distance using all the table cells
return distance[i][j]
def subCost(x,y):
if x == y:
return 0
else:
return 2
def insertCost(x):
path.append("I")
return 1
def deleteCost(x):
path.append("D")
return 1
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With