-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathedit-distance.c.tmpl
172 lines (153 loc) · 4.55 KB
/
edit-distance.c.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#linein
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
#include <stdint.h>
#include "config.h"
#include "text-fuzzy.h"
#include "edit-distance-[% stem %].h"
int [% function %] (text_fuzzy_t * tf)
{
const [% type %] * word1 = (const [% type %] *) tf->b.[% value %];
int len1 = tf->b.[% length %];
const [% type %] * word2 = (const [% type %] *) tf->text.[% value %];
int len2 = tf->text.[% length %];
/* Return value. */
int d;
/* X and Y coordinates in the matrix of strings. */
int i;
int j;
/* Unfeasible value; indicates no match. */
int large_value;
int max;
/* Matrix is the dynamic programming matrix. We economize on space
by having only two columns. */
#ifdef STACKALLOCOK
int matrix[2][len2 + 1];
#else
int * matrix[2];
#endif
tf->n_edit = 0;
max = tf->max_distance;
#ifndef STACKALLOCOK
for (i = 0; i < 2; i++) {
CALLOC (matrix[i], len2 + 1, int);
}
#endif
/*
Initialize the 0 row of "matrix".
0
1
2
3
*/
if (max != NO_MAX_DISTANCE) {
large_value = max + 1;
}
else {
if (len2 > len1) {
large_value = len2;
}
else {
large_value = len1;
}
}
for (j = 0; j <= len2; j++) {
matrix[0][j] = j;
}
/* Loop over column. */
for (i = 1; i <= len1; i++) {
[% type %] c1;
/* The first value to consider of the ith column. */
int min_j;
/* The last value to consider of the ith column. */
int max_j;
/* The smallest value of the matrix in the ith column. */
int col_min;
/* The next column of the matrix to fill in. */
int next;
/* The previously-filled-in column of the matrix. */
int prev;
c1 = word1[i-1];
min_j = 1;
max_j = len2;
/* If we have a maximum permitted distance, we can set the
minimum and maximum columns to inspect to be smaller than
the smallest and largest values respectively. */
if (max != NO_MAX_DISTANCE) {
if (i > max) {
min_j = i - max;
}
if (len2 > max + i) {
max_j = max + i;
}
}
col_min = INT_MAX;
next = i % 2;
if (next == 1) {
prev = 0;
}
else {
prev = 1;
}
matrix[next][0] = i;
/* Loop over rows. */
for (j = 1; j <= len2; j++) {
if (j < min_j || j > max_j) {
/* Put a large value in there. */
matrix[next][j] = large_value;
}
else {
[% type %] c2;
c2 = word2[j - 1];
if ([% compare_c1_c2 %]) {
/* The character at position i in word1 is the same as
the character at position j in word2. */
matrix[next][j] = matrix[prev][j - 1];
}
else {
/* The character at position i in word1 is not the
same as the character at position j in word2, so
work out what the minimum cost for getting to cell
i, j is. */
int delete;
int insert;
int substitute;
int minimum;
delete = matrix[prev][j] + [% delete_cost %];
insert = matrix[next][j-1] + [% insert_cost %];
substitute = matrix[prev][j-1] + [% substitute_cost %];
minimum = delete;
if (insert < minimum) {
minimum = insert;
}
if (substitute < minimum) {
minimum = substitute;
}
matrix[next][j] = minimum;
}
}
/* Find the minimum value in the ith column. */
if (matrix[next][j] < col_min) {
col_min = matrix[next][j];
}
}
if (max != NO_MAX_DISTANCE) {
if (col_min > max) {
/* All the elements of the ith column are greater than the
maximum, so no match less than or equal to max can be
found by looking at succeeding columns. */
d = large_value;
goto cleanup;
}
}
}
d = matrix[len1 % 2][len2];
cleanup:
#ifndef STACKALLOCOK
for (i = 0; i < 2; i++) {
FREE (matrix[i]);
}
#endif
return d;
}