Index: subversion/libsvn_diff/lcs.c =================================================================== --- subversion/libsvn_diff/lcs.c (revision 1128318) +++ subversion/libsvn_diff/lcs.c (working copy) @@ -45,12 +45,14 @@ apr_off_t y; svn_diff__lcs_t *lcs; svn_diff__position_t *position[2]; + apr_int32_t uniquematchcount; }; static APR_INLINE void svn_diff__snake(apr_off_t k, svn_diff__snake_t *fp, int idx, + apr_uint32_t *token_counts[2], svn_diff__lcs_t **freelist, apr_pool_t *pool) { @@ -58,6 +60,7 @@ svn_diff__position_t *position[2]; svn_diff__lcs_t *lcs; svn_diff__lcs_t *previous_lcs; + apr_int32_t uniquematchcount; /* The previous entry at fp[k] is going to be replaced. See if we * can mark that lcs node for reuse, because the sequence up to this @@ -80,6 +83,7 @@ { start_position[0] = fp[k - 1].position[0]; start_position[1] = fp[k - 1].position[1]->next; + uniquematchcount = fp[k - 1].uniquematchcount; previous_lcs = fp[k - 1].lcs; } @@ -87,11 +91,17 @@ { start_position[0] = fp[k + 1].position[0]->next; start_position[1] = fp[k + 1].position[1]; + uniquematchcount = fp[k + 1].uniquematchcount; previous_lcs = fp[k + 1].lcs; } + if (previous_lcs) + { + previous_lcs->refcount++; + } + /* ### Optimization, skip all positions that don't have matchpoints * ### anyway. Beware of the sentinel, don't skip it! */ @@ -99,8 +109,12 @@ position[0] = start_position[0]; position[1] = start_position[1]; + while (1) + { while (position[0]->node == position[1]->node) { + if (token_counts[0][position[0]->node] == 1 && token_counts[1][position[0]->node] == 1) + uniquematchcount++; position[0] = position[0]->next; position[1] = position[1]->next; } @@ -122,22 +136,24 @@ lcs->length = position[1]->offset - start_position[1]->offset; lcs->next = previous_lcs; lcs->refcount = 1; - fp[k].lcs = lcs; + previous_lcs = lcs; + start_position[0] = position[0]; + start_position[1] = position[1]; } - else - { - fp[k].lcs = previous_lcs; + if (position[0]->node >= 0 && token_counts[1][position[0]->node] == 0) + start_position[0] = position[0] = position[0]->next; + else if (position[1]->node >= 0 && token_counts[0][position[1]->node] == 0) + start_position[1] = position[1] = position[1]->next; + else + break; } - if (previous_lcs) - { - previous_lcs->refcount++; - } - + fp[k].lcs = previous_lcs; fp[k].position[0] = position[0]; fp[k].position[1] = position[1]; fp[k].y = position[1]->offset; + fp[k].uniquematchcount = uniquematchcount; } @@ -185,18 +201,62 @@ } +void +clear_fp(svn_diff__snake_t *fp, + apr_off_t k, + apr_off_t min_index, + apr_off_t max_index, + svn_diff__lcs_t **lcs_freelist) +{ + svn_diff__snake_t best; + svn_diff__lcs_t *lcs, *prev_lcs; + + best = fp[k]; + best.lcs->refcount++; + for (k = min_index; k <= max_index; k++) + { + lcs = fp[k].lcs; + fp[k].y = 0; + fp[k].lcs = NULL; + fp[k].uniquematchcount = 0; + while (lcs) + { + lcs->refcount--; + if (lcs->refcount) + break; + + prev_lcs = lcs->next; + lcs->next = lcs_freelist; + lcs_freelist = lcs; + lcs = prev_lcs; + } + } + fp[0] = best; + fp[0].uniquematchcount = 0; +} + + svn_diff__lcs_t * svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */ svn_diff__position_t *position_list2, /* pointer to tail (ring) */ + apr_uint32_t *token_counts_list1, /* array of counts */ + apr_uint32_t *token_counts_list2, /* array of counts */ + apr_uint32_t num_tokens, apr_off_t prefix_lines, apr_off_t suffix_lines, apr_pool_t *pool) { int idx; apr_off_t length[2]; + apr_uint32_t *token_counts[2]; + apr_uint32_t unique_count[2]; + apr_uint32_t token_index; svn_diff__snake_t *fp; apr_off_t d; apr_off_t k; + apr_off_t min_index; + apr_off_t max_index; + apr_off_t oldk; apr_off_t p = 0; svn_diff__lcs_t *lcs, *lcs_freelist = NULL; @@ -231,10 +291,19 @@ return lcs; } + unique_count[1] = unique_count[0] = 0; + for (token_index = 0; token_index < num_tokens; token_index++) + { + if (token_counts_list1[token_index] == 0) + unique_count[1] += token_counts_list2[token_index]; + if (token_counts_list2[token_index] == 0) + unique_count[0] += token_counts_list1[token_index]; + } + /* Calculate length of both sequences to be compared */ length[0] = position_list1->offset - position_list1->next->offset + 1; length[1] = position_list2->offset - position_list2->next->offset + 1; - idx = length[0] > length[1] ? 1 : 0; + idx = length[0] - unique_count[0] > length[1] - unique_count[1] ? 1 : 0; /* strikerXXX: here we allocate the furthest point array, which is * strikerXXX: sized M + N + 3 (!) @@ -246,18 +315,20 @@ sentinel_position[idx].next = position_list1->next; position_list1->next = &sentinel_position[idx]; sentinel_position[idx].offset = position_list1->offset + 1; + token_counts[idx] = token_counts_list1; sentinel_position[abs(1 - idx)].next = position_list2->next; position_list2->next = &sentinel_position[abs(1 - idx)]; sentinel_position[abs(1 - idx)].offset = position_list2->offset + 1; + token_counts[abs(1 - idx)] = token_counts_list2; - /* These are never dereferenced, only compared by value, so we - * can safely fake these up and the void* cast is OK. + /* Negative indices will not be used elsewhere */ - sentinel_position[0].node = (void*)&sentinel_position[0]; - sentinel_position[1].node = (void*)&sentinel_position[1]; + sentinel_position[0].node = -1; + sentinel_position[1].node = -2; - d = length[abs(1 - idx)] - length[idx]; + d = length[abs(1 - idx)] - unique_count[abs(1 - idx)] + - (length[idx] - unique_count[idx]); /* k = -1 will be the first to be used to get previous * position information from, make sure it holds sane @@ -269,17 +340,33 @@ p = 0; do { + min_index = -p + (d<0 ? d : 0); + max_index = p + (d>0 ? d : 0); /* Forward */ - for (k = -p; k < d; k++) + for (k = min_index; k < d; k++) { - svn_diff__snake(k, fp, idx, &lcs_freelist, pool); + svn_diff__snake(k, fp, idx, token_counts, &lcs_freelist, pool); + if ((d >= 0 && fp[k].uniquematchcount > k + 2 * p) || (d < 0 && fp[k].uniquematchcount > k + 2*(p - d))) + { + d -= k; + clear_fp(fp, k, min_index, max_index, &lcs_freelist); + p = 0; + k = 0; + max_index = (d>0 ? d : 0); + } } - for (k = d + p; k >= d; k--) + for (k = max_index; k >= d; k--) { - svn_diff__snake(k, fp, idx, &lcs_freelist, pool); + svn_diff__snake(k, fp, idx, token_counts, &lcs_freelist, pool); + if ((d <= 0 && fp[k].uniquematchcount > -k + 2 * p) || (d > 0 && fp[k].uniquematchcount > 2*(p + d) - k)) + { + d -= k; + clear_fp(fp, k, min_index, max_index, &lcs_freelist); + p = 0; + k = 0; + } } - p++; } while (fp[d].position[1] != &sentinel_position[1]);