Sequence : Edit Distance
The (Levenshtein) Edit Distance is the minimum number (cost) of single-character edit operations (deletions, insertions, and/or replacements) required to transform one string into the other and vice-versa. O(mn), where m, n = len(s1), len(s2) Dynamic programming: 1) dp[i][j] - edit distance between s1[0:i] and s2[0:j] 2) dp[0][j] = dp[i][0] = 0 3) if s1[i - 1] == s2[j - 1] : dp[i][j] = dp[i - 1][j - 1] 4) if s1[i - 1] != s2[j - 1] : dp[i][j] = 1 + min(dp[i][j - 1], dp[i - 1][j], dp[i - 1][j - 1]) 5) dp[m][n] - edit distance between s1 and s2
|
|
// -----------------------------------------------------------------------------
// Edit Distance C++
// -----------------------------------------------------------------------------
template<typename T>
int editDist(std::vector<T> &s1, std::vector<T> &s2)
{
int m = s1.size();
int n = s2.size();
std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1));
for (int i = 0; i <= m; i++) dp[i][0] = i;
for (int j = 0; j <= n; j++) dp[0][j] = j;
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1] == s2[j - 1])
dp[i][j] = dp[i - 1][j - 1];
else
dp[i][j] = 1 + std::min(dp[i][j - 1],
std::min(dp[i - 1][j],
dp[i - 1][j - 1]));
}
}
return dp[m][n];
}
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------
// Edit Distance C#
// -----------------------------------------------------------------------------
static int editDist<T>(T[] s1, T[] s2) where T : IEquatable<T>
{
int m = s1.Length;
int n = s2.Length;
int[][] dp = new int[m + 1][];
for (int i = 0; i < m + 1; i++) dp[i] = new int[n + 1];
for (int i = 0; i <= m; i++) dp[i][0] = i;
for (int j = 0; j <= n; j++) dp[0][j] = j;
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1[i - 1].Equals(s2[j - 1]))
dp[i][j] = dp[i - 1][j - 1];
else
dp[i][j] = 1 + Math.Min(dp[i][j - 1],
Math.Min(dp[i - 1][j],
dp[i - 1][j - 1]));
}
}
return dp[m][n];
}
// -----------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Edit Distance Python
# ------------------------------------------------------------------------------
def editDist(s1, s2):
m, n = len(s1), len(s2)
dp = [[0 for j in range(n + 1)] for i in range(m + 1)]
for i in range(m + 1): dp[i][0] = i
for j in range(n + 1): dp[0][j] = j
for i in range(1, m + 1):
for j in range(1, n + 1):
if s1[i - 1] == s2[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
else:
dp[i][j] = 1 + min(dp[i][j - 1], dp[i - 1][j], dp[i - 1][j - 1])
return dp[m][n]
# ------------------------------------------------------------------------------
Different cost (delete, insert, replace)
// -----------------------------------------------------------------------------
// Edit Distance C++
// -----------------------------------------------------------------------------
template<typename T>
int editDist(std::vector<T> &s1, std::vector<T> &s2, int deleteCost,
int insertCost,
int replaceCost)
{
int m = s1.size();
int n = s2.size();
std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1));
for (int i = 1; i <= m; i++) dp[i][0] = dp[i - 1][0] + deleteCost;
for (int j = 1; j <= n; j++) dp[0][j] = dp[0][j - 1] + insertCost;
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
int replCost = s1[i - 1] == s2[j - 1] ? 0 : replaceCost;
dp[i][j] = std::min(dp[i][j - 1] + insertCost,
std::min(dp[i - 1][j] + deleteCost,
dp[i - 1][j - 1] + replCost));
}
}
return dp[m][n];
}
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------
// Edit Distance C#
// -----------------------------------------------------------------------------
static int editDist<T>(T[] s1, T[] s2, int deleteCost,
int insertCost,
int replaceCost) where T : IEquatable<T>
{
int m = s1.Length;
int n = s2.Length;
int[][] dp = new int[m + 1][];
for (int i = 0; i < m + 1; i++) dp[i] = new int[n + 1];
for (int i = 1; i <= m; i++) dp[i][0] = dp[i - 1][0] + deleteCost;
for (int j = 1; j <= n; j++) dp[0][j] = dp[0][j - 1] + insertCost;
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
int replCost = s1[i - 1].Equals(s2[j - 1]) ? 0 : replaceCost;
dp[i][j] = Math.Min(dp[i][j - 1] + insertCost,
Math.Min(dp[i - 1][j] + deleteCost,
dp[i - 1][j - 1] + replCost));
}
}
return dp[m][n];
}
// -----------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Edit Distance Python
# ------------------------------------------------------------------------------
def editDist(s1, s2, deleteCost, insertCost, replaceCost):
m, n = len(s1), len(s2)
dp = [[0 for j in range(n + 1)] for i in range(m + 1)]
for i in range(1, m + 1): dp[i][0] = dp[i - 1][0] + deleteCost
for j in range(1, n + 1): dp[0][j] = dp[0][j - 1] + insertCost
for i in range(1, m + 1):
for j in range(1, n + 1):
replCost = 0 if s1[i - 1] == s2[j - 1] else replaceCost
dp[i][j] = min(dp[i][j - 1] + insertCost,
dp[i - 1][j] + deleteCost,
dp[i - 1][j - 1] + replCost)
return dp[m][n]
# ------------------------------------------------------------------------------