Index: subversion/libsvn_diff/diff.c =================================================================== --- subversion/libsvn_diff/diff.c (revision 1104713) +++ subversion/libsvn_diff/diff.c (working copy) @@ -105,6 +105,10 @@ { svn_diff__tree_t *tree; svn_diff__position_t *position_list[2]; + apr_uint32_t num_tokens; + apr_uint32_t token_index; + svn_diff__position_t *current; + apr_uint32_t *token_counts[2]; svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, svn_diff_datasource_modified}; svn_diff__lcs_t *lcs; @@ -138,6 +142,8 @@ prefix_lines, subpool)); + num_tokens = svn_diff__get_node_count(tree); + /* The cool part is that we don't need the tokens anymore. * Allow the app to clean them up if it wants to. */ @@ -147,8 +153,35 @@ /* We don't need the nodes in the tree either anymore, nor the tree itself */ svn_pool_destroy(treepool); + token_counts[0] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + token_counts[1] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + for (token_index = 0; token_index < num_tokens; token_index++) + token_counts[0][token_index] = token_counts[1][token_index] = 0; + + current = position_list[0]; + if (current != NULL) + { + do + { + token_counts[0][current->node]++; + current = current->next; + } + while (current != position_list[0]); + } + current = position_list[1]; + if (current != NULL) + { + do + { + token_counts[1][current->node]++; + current = current->next; + } + while (current != position_list[1]); + } + /* Get the lcs */ - lcs = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, + lcs = svn_diff__lcs(position_list[0], position_list[1], token_counts[0], + token_counts[1], num_tokens, prefix_lines, suffix_lines, subpool); /* Produce the diff */ Index: subversion/libsvn_diff/diff.h =================================================================== --- subversion/libsvn_diff/diff.h (revision 1104713) +++ subversion/libsvn_diff/diff.h (working copy) @@ -62,7 +62,7 @@ struct svn_diff__position_t { svn_diff__position_t *next; - svn_diff__node_t *node; + apr_int32_t node; apr_off_t offset; }; @@ -91,12 +91,21 @@ svn_diff__lcs_t * svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */ svn_diff__position_t *position_list2, /* pointer to tail (ring) */ + apr_uint32_t *token_counts_list1, /* array of counts */ + apr_uint32_t *token_counts_list2, /* array of counts */ + apr_uint32_t num_tokens, apr_off_t prefix_lines, apr_off_t suffix_lines, apr_pool_t *pool); /* + * Returns number of tokens in a tree + */ +apr_uint32_t +svn_diff__get_node_count(svn_diff__tree_t *tree); + +/* * Support functions to build a tree of token positions */ void @@ -128,6 +137,7 @@ svn_diff__resolve_conflict(svn_diff_t *hunk, svn_diff__position_t **position_list1, svn_diff__position_t **position_list2, + apr_uint32_t num_tokens, apr_pool_t *pool); Index: subversion/libsvn_diff/diff3.c =================================================================== --- subversion/libsvn_diff/diff3.c (revision 1104713) +++ subversion/libsvn_diff/diff3.c (working copy) @@ -38,208 +38,232 @@ svn_diff__resolve_conflict(svn_diff_t *hunk, svn_diff__position_t **position_list1, svn_diff__position_t **position_list2, + apr_uint32_t num_tokens, apr_pool_t *pool) { - apr_off_t modified_start = hunk->modified_start + 1; - apr_off_t latest_start = hunk->latest_start + 1; - apr_off_t common_length; - apr_off_t modified_length = hunk->modified_length; - apr_off_t latest_length = hunk->latest_length; - svn_diff__position_t *start_position[2]; - svn_diff__position_t *position[2]; - svn_diff__lcs_t *lcs = NULL; - svn_diff__lcs_t **lcs_ref = &lcs; - svn_diff_t **diff_ref = &hunk->resolved_diff; - apr_pool_t *subpool; + apr_off_t modified_start = hunk->modified_start + 1; + apr_off_t latest_start = hunk->latest_start + 1; + apr_off_t common_length; + apr_off_t modified_length = hunk->modified_length; + apr_off_t latest_length = hunk->latest_length; + svn_diff__position_t *start_position[2]; + svn_diff__position_t *position[2]; + apr_uint32_t token_index; + svn_diff__position_t *current; + apr_uint32_t *token_counts[2]; + svn_diff__lcs_t *lcs = NULL; + svn_diff__lcs_t **lcs_ref = &lcs; + svn_diff_t **diff_ref = &hunk->resolved_diff; + apr_pool_t *subpool; - /* First find the starting positions for the - * comparison - */ + /* First find the starting positions for the + * comparison + */ - start_position[0] = *position_list1; - start_position[1] = *position_list2; + start_position[0] = *position_list1; + start_position[1] = *position_list2; - while (start_position[0]->offset < modified_start) - start_position[0] = start_position[0]->next; + while (start_position[0]->offset < modified_start) + start_position[0] = start_position[0]->next; - while (start_position[1]->offset < latest_start) - start_position[1] = start_position[1]->next; + while (start_position[1]->offset < latest_start) + start_position[1] = start_position[1]->next; - position[0] = start_position[0]; - position[1] = start_position[1]; + position[0] = start_position[0]; + position[1] = start_position[1]; - common_length = modified_length < latest_length - ? modified_length : latest_length; + common_length = modified_length < latest_length + ? modified_length : latest_length; - while (common_length > 0 - && position[0]->node == position[1]->node) - { - position[0] = position[0]->next; - position[1] = position[1]->next; + while (common_length > 0 + && position[0]->node == position[1]->node) + { + position[0] = position[0]->next; + position[1] = position[1]->next; - common_length--; - } + common_length--; + } - if (common_length == 0 - && modified_length == latest_length) - { - hunk->type = svn_diff__type_diff_common; - hunk->resolved_diff = NULL; + if (common_length == 0 + && modified_length == latest_length) + { + hunk->type = svn_diff__type_diff_common; + hunk->resolved_diff = NULL; - *position_list1 = position[0]; - *position_list2 = position[1]; + *position_list1 = position[0]; + *position_list2 = position[1]; - return; - } + return; + } - hunk->type = svn_diff__type_conflict; + hunk->type = svn_diff__type_conflict; - /* ### If we have a conflict we can try to find the - * ### common parts in it by getting an lcs between - * ### modified (start to start + length) and - * ### latest (start to start + length). - * ### We use this lcs to create a simple diff. Only - * ### where there is a diff between the two, we have - * ### a conflict. - * ### This raises a problem; several common diffs and - * ### conflicts can occur within the same original - * ### block. This needs some thought. - * ### - * ### NB: We can use the node _pointers_ to identify - * ### different tokens - */ + /* ### If we have a conflict we can try to find the + * ### common parts in it by getting an lcs between + * ### modified (start to start + length) and + * ### latest (start to start + length). + * ### We use this lcs to create a simple diff. Only + * ### where there is a diff between the two, we have + * ### a conflict. + * ### This raises a problem; several common diffs and + * ### conflicts can occur within the same original + * ### block. This needs some thought. + * ### + * ### NB: We can use the node _pointers_ to identify + * ### different tokens + */ - subpool = svn_pool_create(pool); + subpool = svn_pool_create(pool); - /* Calculate how much of the two sequences was - * actually the same. - */ - common_length = (modified_length < latest_length - ? modified_length : latest_length) - - common_length; + /* Calculate how much of the two sequences was + * actually the same. + */ + common_length = (modified_length < latest_length + ? modified_length : latest_length) + - common_length; - /* If there were matching symbols at the start of - * both sequences, record that fact. - */ - if (common_length > 0) - { - lcs = apr_palloc(subpool, sizeof(*lcs)); - lcs->next = NULL; - lcs->position[0] = start_position[0]; - lcs->position[1] = start_position[1]; - lcs->length = common_length; + /* If there were matching symbols at the start of + * both sequences, record that fact. + */ + if (common_length > 0) + { + lcs = apr_palloc(subpool, sizeof(*lcs)); + lcs->next = NULL; + lcs->position[0] = start_position[0]; + lcs->position[1] = start_position[1]; + lcs->length = common_length; - lcs_ref = &lcs->next; - } + lcs_ref = &lcs->next; + } - modified_length -= common_length; - latest_length -= common_length; + modified_length -= common_length; + latest_length -= common_length; - modified_start = start_position[0]->offset; - latest_start = start_position[1]->offset; + modified_start = start_position[0]->offset; + latest_start = start_position[1]->offset; - start_position[0] = position[0]; - start_position[1] = position[1]; + start_position[0] = position[0]; + start_position[1] = position[1]; - /* Create a new ring for svn_diff__lcs to grok. - * We can safely do this given we don't need the - * positions we processed anymore. - */ - if (modified_length == 0) - { - *position_list1 = position[0]; - position[0] = NULL; - } - else - { - while (--modified_length) - position[0] = position[0]->next; + /* Create a new ring for svn_diff__lcs to grok. + * We can safely do this given we don't need the + * positions we processed anymore. + */ + if (modified_length == 0) + { + *position_list1 = position[0]; + position[0] = NULL; + } + else + { + while (--modified_length) + position[0] = position[0]->next; - *position_list1 = position[0]->next; - position[0]->next = start_position[0]; - } + *position_list1 = position[0]->next; + position[0]->next = start_position[0]; + } - if (latest_length == 0) - { - *position_list2 = position[1]; - position[1] = NULL; - } - else - { - while (--latest_length) - position[1] = position[1]->next; + if (latest_length == 0) + { + *position_list2 = position[1]; + position[1] = NULL; + } + else + { + while (--latest_length) + position[1] = position[1]->next; - *position_list2 = position[1]->next; - position[1]->next = start_position[1]; - } + *position_list2 = position[1]->next; + position[1]->next = start_position[1]; + } - *lcs_ref = svn_diff__lcs(position[0], position[1], 0, 0, - subpool); + token_counts[0] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + token_counts[1] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + for (token_index = 0; token_index < num_tokens; token_index++) + token_counts[0][token_index] = token_counts[1][token_index] = 0; - /* Fix up the EOF lcs element in case one of - * the two sequences was NULL. - */ - if ((*lcs_ref)->position[0]->offset == 1) - (*lcs_ref)->position[0] = *position_list1; + current = position[0]; + do + { + token_counts[0][current->node]++; + current = current->next; + } + while (current != position[0]); + current = position[1]; + do + { + token_counts[1][current->node]++; + current = current->next; + } + while (current != position[1]); - if ((*lcs_ref)->position[1]->offset == 1) - (*lcs_ref)->position[1] = *position_list2; + *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0], + token_counts[1], num_tokens, 0, 0, subpool); - /* Restore modified_length and latest_length */ - modified_length = hunk->modified_length; - latest_length = hunk->latest_length; + /* Fix up the EOF lcs element in case one of + * the two sequences was NULL. + */ + if ((*lcs_ref)->position[0]->offset == 1) + (*lcs_ref)->position[0] = *position_list1; - /* Produce the resolved diff */ - while (1) - { - if (modified_start < lcs->position[0]->offset - || latest_start < lcs->position[1]->offset) - { - (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + if ((*lcs_ref)->position[1]->offset == 1) + (*lcs_ref)->position[1] = *position_list2; - (*diff_ref)->type = svn_diff__type_conflict; - (*diff_ref)->original_start = hunk->original_start; - (*diff_ref)->original_length = hunk->original_length; - (*diff_ref)->modified_start = modified_start - 1; - (*diff_ref)->modified_length = lcs->position[0]->offset + /* Restore modified_length and latest_length */ + modified_length = hunk->modified_length; + latest_length = hunk->latest_length; + + /* Produce the resolved diff */ + while (1) + { + if (modified_start < lcs->position[0]->offset + || latest_start < lcs->position[1]->offset) + { + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + + (*diff_ref)->type = svn_diff__type_conflict; + (*diff_ref)->original_start = hunk->original_start; + (*diff_ref)->original_length = hunk->original_length; + (*diff_ref)->modified_start = modified_start - 1; + (*diff_ref)->modified_length = lcs->position[0]->offset - modified_start; - (*diff_ref)->latest_start = latest_start - 1; - (*diff_ref)->latest_length = lcs->position[1]->offset + (*diff_ref)->latest_start = latest_start - 1; + (*diff_ref)->latest_length = lcs->position[1]->offset - latest_start; - (*diff_ref)->resolved_diff = NULL; + (*diff_ref)->resolved_diff = NULL; - diff_ref = &(*diff_ref)->next; - } + diff_ref = &(*diff_ref)->next; + } - /* Detect the EOF */ - if (lcs->length == 0) - break; + /* Detect the EOF */ + if (lcs->length == 0) + break; - modified_start = lcs->position[0]->offset; - latest_start = lcs->position[1]->offset; + modified_start = lcs->position[0]->offset; + latest_start = lcs->position[1]->offset; - (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); - (*diff_ref)->type = svn_diff__type_diff_common; - (*diff_ref)->original_start = hunk->original_start; - (*diff_ref)->original_length = hunk->original_length; - (*diff_ref)->modified_start = modified_start - 1; - (*diff_ref)->modified_length = lcs->length; - (*diff_ref)->latest_start = latest_start - 1; - (*diff_ref)->latest_length = lcs->length; - (*diff_ref)->resolved_diff = NULL; + (*diff_ref)->type = svn_diff__type_diff_common; + (*diff_ref)->original_start = hunk->original_start; + (*diff_ref)->original_length = hunk->original_length; + (*diff_ref)->modified_start = modified_start - 1; + (*diff_ref)->modified_length = lcs->length; + (*diff_ref)->latest_start = latest_start - 1; + (*diff_ref)->latest_length = lcs->length; + (*diff_ref)->resolved_diff = NULL; - diff_ref = &(*diff_ref)->next; + diff_ref = &(*diff_ref)->next; - modified_start += lcs->length; - latest_start += lcs->length; + modified_start += lcs->length; + latest_start += lcs->length; - lcs = lcs->next; - } + lcs = lcs->next; + } - *diff_ref = NULL; + *diff_ref = NULL; - svn_pool_destroy(subpool); + svn_pool_destroy(subpool); } @@ -251,6 +275,10 @@ { svn_diff__tree_t *tree; svn_diff__position_t *position_list[3]; + apr_uint32_t num_tokens; + apr_uint32_t token_index; + svn_diff__position_t *current; + apr_uint32_t *token_counts[3]; svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, svn_diff_datasource_modified, svn_diff_datasource_latest}; @@ -292,6 +320,8 @@ prefix_lines, subpool)); + num_tokens = svn_diff__get_node_count(tree); + /* Get rid of the tokens, we don't need them to calc the diff */ if (vtable->token_discard_all != NULL) vtable->token_discard_all(diff_baton); @@ -299,10 +329,52 @@ /* We don't need the nodes in the tree either anymore, nor the tree itself */ svn_pool_destroy(treepool); + token_counts[0] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + token_counts[1] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + token_counts[2] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + for (token_index = 0; token_index < num_tokens; token_index++) + { + token_counts[0][token_index] = token_counts[1][token_index] = 0; + token_counts[2][token_index] = 0; + } + + current = position_list[0]; + if (current != NULL) + { + do + { + token_counts[0][current->node]++; + current = current->next; + } + while (current != position_list[0]); + } + current = position_list[1]; + if (current != NULL) + { + do + { + token_counts[1][current->node]++; + current = current->next; + } + while (current != position_list[1]); + } + current = position_list[2]; + if (current != NULL) + { + do + { + token_counts[2][current->node]++; + current = current->next; + } + while (current != position_list[2]); + } + /* Get the lcs for original-modified and original-latest */ - lcs_om = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, + lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0], + token_counts[1], num_tokens, prefix_lines, suffix_lines, subpool); - lcs_ol = svn_diff__lcs(position_list[0], position_list[2], prefix_lines, + lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0], + token_counts[2], num_tokens, prefix_lines, suffix_lines, subpool); /* Produce a merged diff */ @@ -435,6 +507,7 @@ svn_diff__resolve_conflict(*diff_ref, &position_list[1], &position_list[2], + num_tokens, pool); } else if (is_modified) Index: subversion/libsvn_diff/diff4.c =================================================================== --- subversion/libsvn_diff/diff4.c (revision 1104713) +++ subversion/libsvn_diff/diff4.c (working copy) @@ -173,6 +173,10 @@ { svn_diff__tree_t *tree; svn_diff__position_t *position_list[4]; + apr_uint32_t num_tokens; + apr_uint32_t token_index; + svn_diff__position_t *current; + apr_uint32_t *token_counts[4]; svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, svn_diff_datasource_modified, svn_diff_datasource_latest, @@ -227,6 +231,8 @@ prefix_lines, subpool2)); + num_tokens = svn_diff__get_node_count(tree); + /* Get rid of the tokens, we don't need them to calc the diff */ if (vtable->token_discard_all != NULL) vtable->token_discard_all(diff_baton); @@ -234,8 +240,61 @@ /* We don't need the nodes in the tree either anymore, nor the tree itself */ svn_pool_clear(subpool3); + token_counts[0] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + token_counts[1] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + token_counts[2] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + token_counts[3] = apr_palloc(subpool, num_tokens * sizeof(**token_counts)); + for (token_index = 0; token_index < num_tokens; token_index++) + { + token_counts[0][token_index] = token_counts[1][token_index] = 0; + token_counts[2][token_index] = token_counts[3][token_index] = 0; + } + + current = position_list[0]; + if (current != NULL) + { + do + { + token_counts[0][current->node]++; + current = current->next; + } + while (current != position_list[0]); + } + current = position_list[1]; + if (current != NULL) + { + do + { + token_counts[1][current->node]++; + current = current->next; + } + while (current != position_list[1]); + } + current = position_list[2]; + if (current != NULL) + { + do + { + token_counts[2][current->node]++; + current = current->next; + } + while (current != position_list[2]); + } + current = position_list[3]; + if (current != NULL) + { + do + { + token_counts[3][current->node]++; + current = current->next; + } + while (current != position_list[3]); + } + /* Get the lcs for original - latest */ - lcs_ol = svn_diff__lcs(position_list[0], position_list[2], prefix_lines, + lcs_ol = svn_diff__lcs(position_list[0], position_list[2], + token_counts[0], token_counts[2], + num_tokens, prefix_lines, suffix_lines, subpool3); diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool); @@ -257,7 +316,9 @@ /* Get the lcs for common ancestor - original * Do reverse adjustements */ - lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], prefix_lines, + lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], + token_counts[3], token_counts[2], + num_tokens, prefix_lines, suffix_lines, subpool3); diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3); adjust_diff(diff_ol, diff_adjust); @@ -267,7 +328,9 @@ /* Get the lcs for modified - common ancestor * Do forward adjustments */ - lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], prefix_lines, + lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], + token_counts[1], token_counts[3], + num_tokens, prefix_lines, suffix_lines, subpool3); diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3); adjust_diff(diff_ol, diff_adjust); @@ -283,7 +346,7 @@ if (hunk->type == svn_diff__type_conflict) { svn_diff__resolve_conflict(hunk, &position_list[1], - &position_list[2], pool); + &position_list[2], num_tokens, pool); } } Index: subversion/libsvn_diff/lcs.c =================================================================== --- subversion/libsvn_diff/lcs.c (revision 1104713) +++ subversion/libsvn_diff/lcs.c (working copy) @@ -51,6 +51,7 @@ svn_diff__snake(apr_off_t k, svn_diff__snake_t *fp, int idx, + apr_uint32_t *token_counts[2], svn_diff__lcs_t **freelist, apr_pool_t *pool) { @@ -92,6 +93,11 @@ } + if (previous_lcs) + { + previous_lcs->refcount++; + } + /* ### Optimization, skip all positions that don't have matchpoints * ### anyway. Beware of the sentinel, don't skip it! */ @@ -99,41 +105,44 @@ position[0] = start_position[0]; position[1] = start_position[1]; - while (position[0]->node == position[1]->node) + while (1) { - position[0] = position[0]->next; - position[1] = position[1]->next; - } - - if (position[1] != start_position[1]) - { - lcs = *freelist; - if (lcs) + while (position[0]->node == position[1]->node) { - *freelist = lcs->next; + position[0] = position[0]->next; + position[1] = position[1]->next; } - else + + if (position[1] != start_position[1]) { - lcs = apr_palloc(pool, sizeof(*lcs)); + lcs = *freelist; + if (lcs) + { + *freelist = lcs->next; + } + else + { + lcs = apr_palloc(pool, sizeof(*lcs)); + } + + lcs->position[idx] = start_position[0]; + lcs->position[abs(1 - idx)] = start_position[1]; + lcs->length = position[1]->offset - start_position[1]->offset; + lcs->next = previous_lcs; + lcs->refcount = 1; + previous_lcs = lcs; + start_position[0] = position[0]; + start_position[1] = position[1]; } - - lcs->position[idx] = start_position[0]; - lcs->position[abs(1 - idx)] = start_position[1]; - lcs->length = position[1]->offset - start_position[1]->offset; - lcs->next = previous_lcs; - lcs->refcount = 1; - fp[k].lcs = lcs; + if (position[0]->node >= 0 && token_counts[1][position[0]->node] == 0) + start_position[0] = position[0] = position[0]->next; + else if (position[1]->node >= 0 && token_counts[0][position[1]->node] == 0) + start_position[1] = position[1] = position[1]->next; + else + break; } - else - { - fp[k].lcs = previous_lcs; - } - if (previous_lcs) - { - previous_lcs->refcount++; - } - + fp[k].lcs = previous_lcs; fp[k].position[0] = position[0]; fp[k].position[1] = position[1]; @@ -188,12 +197,18 @@ svn_diff__lcs_t * svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */ svn_diff__position_t *position_list2, /* pointer to tail (ring) */ + apr_uint32_t *token_counts_list1, /* array of counts */ + apr_uint32_t *token_counts_list2, /* array of counts */ + apr_uint32_t num_tokens, apr_off_t prefix_lines, apr_off_t suffix_lines, apr_pool_t *pool) { int idx; apr_off_t length[2]; + apr_uint32_t *token_counts[2]; + apr_uint32_t unique_count[2]; + apr_uint32_t token_index; svn_diff__snake_t *fp; apr_off_t d; apr_off_t k; @@ -231,10 +246,19 @@ return lcs; } + unique_count[1] = unique_count[0] = 0; + for (token_index = 0; token_index < num_tokens; token_index++) + { + if (token_counts_list1[token_index] == 0) + unique_count[1] += token_counts_list2[token_index]; + if (token_counts_list2[token_index] == 0) + unique_count[0] += token_counts_list1[token_index]; + } + /* Calculate length of both sequences to be compared */ length[0] = position_list1->offset - position_list1->next->offset + 1; length[1] = position_list2->offset - position_list2->next->offset + 1; - idx = length[0] > length[1] ? 1 : 0; + idx = length[0] - unique_count[0] > length[1] - unique_count[1] ? 1 : 0; /* strikerXXX: here we allocate the furthest point array, which is * strikerXXX: sized M + N + 3 (!) @@ -246,18 +270,20 @@ sentinel_position[idx].next = position_list1->next; position_list1->next = &sentinel_position[idx]; sentinel_position[idx].offset = position_list1->offset + 1; + token_counts[idx] = token_counts_list1; sentinel_position[abs(1 - idx)].next = position_list2->next; position_list2->next = &sentinel_position[abs(1 - idx)]; sentinel_position[abs(1 - idx)].offset = position_list2->offset + 1; + token_counts[abs(1 - idx)] = token_counts_list2; - /* These are never dereferenced, only compared by value, so we - * can safely fake these up and the void* cast is OK. + /* Negative indices will not be used elsewhere */ - sentinel_position[0].node = (void*)&sentinel_position[0]; - sentinel_position[1].node = (void*)&sentinel_position[1]; + sentinel_position[0].node = -1; + sentinel_position[1].node = -2; - d = length[abs(1 - idx)] - length[idx]; + d = length[abs(1 - idx)] - unique_count[abs(1 - idx)] + - (length[idx] - unique_count[idx]); /* k = -1 will be the first to be used to get previous * position information from, make sure it holds sane @@ -272,12 +298,12 @@ /* Forward */ for (k = -p; k < d; k++) { - svn_diff__snake(k, fp, idx, &lcs_freelist, pool); + svn_diff__snake(k, fp, idx, token_counts, &lcs_freelist, pool); } for (k = d + p; k >= d; k--) { - svn_diff__snake(k, fp, idx, &lcs_freelist, pool); + svn_diff__snake(k, fp, idx, token_counts, &lcs_freelist, pool); } p++; Index: subversion/libsvn_diff/token.c =================================================================== --- subversion/libsvn_diff/token.c (revision 1104713) +++ subversion/libsvn_diff/token.c (working copy) @@ -46,6 +46,7 @@ svn_diff__node_t *right; apr_uint32_t hash; + apr_int32_t index; void *token; }; @@ -53,10 +54,20 @@ { svn_diff__node_t *root[SVN_DIFF__HASH_SIZE]; apr_pool_t *pool; + apr_uint32_t node_count; }; /* + * Returns number of tokens in a tree + */ +apr_uint32_t +svn_diff__get_node_count(svn_diff__tree_t *tree) +{ + return tree->node_count; +} + +/* * Support functions to build a tree of token positions */ @@ -65,6 +76,7 @@ { *tree = apr_pcalloc(pool, sizeof(**tree)); (*tree)->pool = pool; + (*tree)->node_count = 0; } @@ -122,6 +134,7 @@ new_node->right = NULL; new_node->hash = hash; new_node->token = token; + new_node->index = tree->node_count++; *node = *node_ref = new_node; @@ -168,7 +181,7 @@ /* Create a new position */ position = apr_palloc(pool, sizeof(*position)); position->next = NULL; - position->node = node; + position->node = node->index; position->offset = offset; *position_ref = position;