2 * lcs.c : routines for creating an lcs
\r
4 * ====================================================================
\r
5 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
\r
7 * This software is licensed as described in the file COPYING, which
\r
8 * you should have received as part of this distribution. The terms
\r
9 * are also available at http://subversion.tigris.org/license-1.html.
\r
10 * If newer versions of this license are posted there, you may use a
\r
11 * newer version instead, at your option.
\r
13 * This software consists of voluntary contributions made by many
\r
14 * individuals. For exact contribution history, see the revision
\r
15 * history and logs, available at http://subversion.tigris.org/.
\r
16 * ====================================================================
\r
21 #include <apr_pools.h>
\r
22 #include <apr_general.h>
\r
28 * Calculate the Longest Common Subsequence between two datasources.
\r
29 * This function is what makes the diff code tick.
\r
31 * The LCS algorithm implemented here is described by Sun Wu,
\r
32 * Udi Manber and Gene Meyers in "An O(NP) Sequence Comparison Algorithm"
\r
36 typedef struct svn_diff__snake_t svn_diff__snake_t;
\r
38 struct svn_diff__snake_t
\r
41 svn_diff__lcs_t *lcs;
\r
42 svn_diff__position_t *position[2];
\r
45 static APR_INLINE void
\r
46 svn_diff__snake(apr_off_t k,
\r
47 svn_diff__snake_t *fp,
\r
49 svn_diff__lcs_t **freelist,
\r
52 svn_diff__position_t *start_position[2];
\r
53 svn_diff__position_t *position[2];
\r
54 svn_diff__lcs_t *lcs;
\r
55 svn_diff__lcs_t *previous_lcs;
\r
57 /* The previous entry at fp[k] is going to be replaced. See if we
\r
58 * can mark that lcs node for reuse, because the sequence up to this
\r
59 * point was a dead end.
\r
68 previous_lcs = lcs->next;
\r
69 lcs->next = *freelist;
\r
74 if (fp[k - 1].y + 1 > fp[k + 1].y)
\r
76 start_position[0] = fp[k - 1].position[0];
\r
77 start_position[1] = fp[k - 1].position[1]->next;
\r
79 previous_lcs = fp[k - 1].lcs;
\r
83 start_position[0] = fp[k + 1].position[0]->next;
\r
84 start_position[1] = fp[k + 1].position[1];
\r
86 previous_lcs = fp[k + 1].lcs;
\r
90 /* ### Optimization, skip all positions that don't have matchpoints
\r
91 * ### anyway. Beware of the sentinel, don't skip it!
\r
94 position[0] = start_position[0];
\r
95 position[1] = start_position[1];
\r
97 while (position[0]->node == position[1]->node)
\r
99 position[0] = position[0]->next;
\r
100 position[1] = position[1]->next;
\r
103 if (position[1] != start_position[1])
\r
108 *freelist = lcs->next;
\r
112 lcs = apr_palloc(pool, sizeof(*lcs));
\r
115 lcs->position[idx] = start_position[0];
\r
116 lcs->position[abs(1 - idx)] = start_position[1];
\r
117 lcs->length = position[1]->offset - start_position[1]->offset;
\r
118 lcs->next = previous_lcs;
\r
124 fp[k].lcs = previous_lcs;
\r
129 previous_lcs->refcount++;
\r
132 fp[k].position[0] = position[0];
\r
133 fp[k].position[1] = position[1];
\r
135 fp[k].y = position[1]->offset;
\r
139 static svn_diff__lcs_t *
\r
140 svn_diff__lcs_reverse(svn_diff__lcs_t *lcs)
\r
142 svn_diff__lcs_t *next;
\r
143 svn_diff__lcs_t *prev;
\r
146 while (lcs != NULL)
\r
159 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */
\r
160 svn_diff__position_t *position_list2, /* pointer to tail (ring) */
\r
164 apr_off_t length[2];
\r
165 svn_diff__snake_t *fp;
\r
169 svn_diff__lcs_t *lcs, *lcs_freelist = NULL;
\r
171 svn_diff__position_t sentinel_position[2];
\r
173 /* Since EOF is always a sync point we tack on an EOF link
\r
174 * with sentinel positions
\r
176 lcs = apr_palloc(pool, sizeof(*lcs));
\r
177 lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0]));
\r
178 lcs->position[0]->offset = position_list1 ? position_list1->offset + 1 : 1;
\r
179 lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1]));
\r
180 lcs->position[1]->offset = position_list2 ? position_list2->offset + 1 : 1;
\r
185 if (position_list1 == NULL || position_list2 == NULL)
\r
188 /* Calculate length of both sequences to be compared */
\r
189 length[0] = position_list1->offset - position_list1->next->offset + 1;
\r
190 length[1] = position_list2->offset - position_list2->next->offset + 1;
\r
191 idx = length[0] > length[1] ? 1 : 0;
\r
193 /* strikerXXX: here we allocate the furthest point array, which is
\r
194 * strikerXXX: sized M + N + 3 (!)
\r
196 fp = apr_pcalloc(pool,
\r
197 sizeof(*fp) * (apr_size_t)(length[0] + length[1] + 3));
\r
198 fp += length[idx] + 1;
\r
200 sentinel_position[idx].next = position_list1->next;
\r
201 position_list1->next = &sentinel_position[idx];
\r
202 sentinel_position[idx].offset = position_list1->offset + 1;
\r
204 sentinel_position[abs(1 - idx)].next = position_list2->next;
\r
205 position_list2->next = &sentinel_position[abs(1 - idx)];
\r
206 sentinel_position[abs(1 - idx)].offset = position_list2->offset + 1;
\r
208 /* These are never dereferenced, only compared by value, so we
\r
209 * can safely fake these up and the void* cast is OK.
\r
211 sentinel_position[0].node = (void*)&sentinel_position[0];
\r
212 sentinel_position[1].node = (void*)&sentinel_position[1];
\r
214 d = length[abs(1 - idx)] - length[idx];
\r
216 /* k = -1 will be the first to be used to get previous
\r
217 * position information from, make sure it holds sane
\r
220 fp[-1].position[0] = sentinel_position[0].next;
\r
221 fp[-1].position[1] = &sentinel_position[1];
\r
227 for (k = -p; k < d; k++)
\r
229 svn_diff__snake(k, fp, idx, &lcs_freelist, pool);
\r
232 for (k = d + p; k >= d; k--)
\r
234 svn_diff__snake(k, fp, idx, &lcs_freelist, pool);
\r
239 while (fp[d].position[1] != &sentinel_position[1]);
\r
241 lcs->next = fp[d].lcs;
\r
242 lcs = svn_diff__lcs_reverse(lcs);
\r
244 position_list1->next = sentinel_position[idx].next;
\r
245 position_list2->next = sentinel_position[abs(1 - idx)].next;
\r