Index: subversion/libsvn_diff/diff.c =================================================================== --- subversion/libsvn_diff/diff.c (revision 1128966) +++ subversion/libsvn_diff/diff.c (working copy) @@ -33,7 +33,39 @@ #include "diff.h" +/* Returns an array with the counts for the tokens in + * the looped linked list given in loop_start. + * num_tokens equals the highest possible token index +1. + */ +svn_diff__token_index_t* +svn_diff__get_token_counts(svn_diff__position_t *loop_start, + svn_diff__token_index_t num_tokens, + apr_pool_t *pool) +{ + svn_diff__token_index_t *token_counts; + svn_diff__token_index_t token_index; + svn_diff__position_t *current; + + token_counts = apr_palloc(pool, num_tokens * sizeof(*token_counts)); + for (token_index = 0; token_index < num_tokens; token_index++) + token_counts[token_index] = 0; + + current = loop_start; + if (current != NULL) + { + do + { + token_counts[current->token_index]++; + current = current->next; + } + while (current != loop_start); + } + + return token_counts; +} + + svn_diff_t * svn_diff__diff(svn_diff__lcs_t *lcs, apr_off_t original_start, apr_off_t modified_start, @@ -105,6 +137,8 @@ { svn_diff__tree_t *tree; svn_diff__position_t *position_list[2]; + svn_diff__token_index_t num_tokens; + svn_diff__token_index_t *token_counts[2]; svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, svn_diff_datasource_modified}; svn_diff__lcs_t *lcs; @@ -138,6 +172,8 @@ prefix_lines, subpool)); + num_tokens = svn_diff__get_node_count(tree); + /* The cool part is that we don't need the tokens anymore. * Allow the app to clean them up if it wants to. */ @@ -147,8 +183,12 @@ /* We don't need the nodes in the tree either anymore, nor the tree itself */ svn_pool_destroy(treepool); + token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens, subpool); + token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens, subpool); + /* Get the lcs */ - lcs = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, + lcs = svn_diff__lcs(position_list[0], position_list[1], token_counts[0], + token_counts[1], num_tokens, prefix_lines, suffix_lines, subpool); /* Produce the diff */ Index: subversion/libsvn_diff/diff.h =================================================================== --- subversion/libsvn_diff/diff.h (revision 1128966) +++ subversion/libsvn_diff/diff.h (working copy) @@ -59,10 +59,13 @@ svn_diff_t *resolved_diff; }; +/* Type used for token indices and counts of tokens. Must be signed. */ +typedef long int svn_diff__token_index_t; + struct svn_diff__position_t { svn_diff__position_t *next; - svn_diff__node_t *node; + svn_diff__token_index_t token_index; apr_off_t offset; }; @@ -99,12 +102,21 @@ svn_diff__lcs_t * svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */ svn_diff__position_t *position_list2, /* pointer to tail (ring) */ + svn_diff__token_index_t *token_counts_list1, /* array of counts */ + svn_diff__token_index_t *token_counts_list2, /* array of counts */ + svn_diff__token_index_t num_tokens, /* length of count arrays */ apr_off_t prefix_lines, apr_off_t suffix_lines, apr_pool_t *pool); /* + * Returns number of tokens in a tree + */ +svn_diff__token_index_t +svn_diff__get_node_count(svn_diff__tree_t *tree); + +/* * Support functions to build a tree of token positions */ void @@ -124,6 +136,11 @@ apr_off_t prefix_lines, apr_pool_t *pool); +/* Count the number of each token in a looped linked list */ +svn_diff__token_index_t* +svn_diff__get_token_counts(svn_diff__position_t *loop_start, + svn_diff__token_index_t num_tokens, + apr_pool_t *pool); /* Morph a svn_lcs_t into a svn_diff_t. */ svn_diff_t * @@ -136,6 +153,7 @@ svn_diff__resolve_conflict(svn_diff_t *hunk, svn_diff__position_t **position_list1, svn_diff__position_t **position_list2, + svn_diff__token_index_t num_tokens, apr_pool_t *pool); Index: subversion/libsvn_diff/diff3.c =================================================================== --- subversion/libsvn_diff/diff3.c (revision 1128966) +++ subversion/libsvn_diff/diff3.c (working copy) @@ -38,6 +38,7 @@ svn_diff__resolve_conflict(svn_diff_t *hunk, svn_diff__position_t **position_list1, svn_diff__position_t **position_list2, + svn_diff__token_index_t num_tokens, apr_pool_t *pool) { apr_off_t modified_start = hunk->modified_start + 1; @@ -47,6 +48,7 @@ apr_off_t latest_length = hunk->latest_length; svn_diff__position_t *start_position[2]; svn_diff__position_t *position[2]; + svn_diff__token_index_t *token_counts[2]; svn_diff__lcs_t *lcs = NULL; svn_diff__lcs_t **lcs_ref = &lcs; svn_diff_t **diff_ref = &hunk->resolved_diff; @@ -72,7 +74,7 @@ ? modified_length : latest_length; while (common_length > 0 - && position[0]->node == position[1]->node) + && position[0]->token_index == position[1]->token_index) { position[0] = position[0]->next; position[1] = position[1]->next; @@ -173,9 +175,12 @@ position[1]->next = start_position[1]; } - *lcs_ref = svn_diff__lcs(position[0], position[1], 0, 0, - subpool); + token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens, subpool); + token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens, subpool); + *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0], + token_counts[1], num_tokens, 0, 0, subpool); + /* Fix up the EOF lcs element in case one of * the two sequences was NULL. */ @@ -251,6 +256,8 @@ { svn_diff__tree_t *tree; svn_diff__position_t *position_list[3]; + svn_diff__token_index_t num_tokens; + svn_diff__token_index_t *token_counts[3]; svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, svn_diff_datasource_modified, svn_diff_datasource_latest}; @@ -292,6 +299,8 @@ prefix_lines, subpool)); + num_tokens = svn_diff__get_node_count(tree); + /* Get rid of the tokens, we don't need them to calc the diff */ if (vtable->token_discard_all != NULL) vtable->token_discard_all(diff_baton); @@ -299,10 +308,16 @@ /* We don't need the nodes in the tree either anymore, nor the tree itself */ svn_pool_destroy(treepool); + token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens, subpool); + token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens, subpool); + token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens, subpool); + /* Get the lcs for original-modified and original-latest */ - lcs_om = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, + lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0], + token_counts[1], num_tokens, prefix_lines, suffix_lines, subpool); - lcs_ol = svn_diff__lcs(position_list[0], position_list[2], prefix_lines, + lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0], + token_counts[2], num_tokens, prefix_lines, suffix_lines, subpool); /* Produce a merged diff */ @@ -435,6 +450,7 @@ svn_diff__resolve_conflict(*diff_ref, &position_list[1], &position_list[2], + num_tokens, pool); } else if (is_modified) Index: subversion/libsvn_diff/diff4.c =================================================================== --- subversion/libsvn_diff/diff4.c (revision 1128966) +++ subversion/libsvn_diff/diff4.c (working copy) @@ -173,6 +173,8 @@ { svn_diff__tree_t *tree; svn_diff__position_t *position_list[4]; + svn_diff__token_index_t num_tokens; + svn_diff__token_index_t *token_counts[4]; svn_diff_datasource_e datasource[] = {svn_diff_datasource_original, svn_diff_datasource_modified, svn_diff_datasource_latest, @@ -227,6 +229,8 @@ prefix_lines, subpool2)); + num_tokens = svn_diff__get_node_count(tree); + /* Get rid of the tokens, we don't need them to calc the diff */ if (vtable->token_discard_all != NULL) vtable->token_discard_all(diff_baton); @@ -234,8 +238,15 @@ /* We don't need the nodes in the tree either anymore, nor the tree itself */ svn_pool_clear(subpool3); + token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens, subpool); + token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens, subpool); + token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens, subpool); + token_counts[3] = svn_diff__get_token_counts(position_list[3], num_tokens, subpool); + /* Get the lcs for original - latest */ - lcs_ol = svn_diff__lcs(position_list[0], position_list[2], prefix_lines, + lcs_ol = svn_diff__lcs(position_list[0], position_list[2], + token_counts[0], token_counts[2], + num_tokens, prefix_lines, suffix_lines, subpool3); diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool); @@ -257,7 +268,9 @@ /* Get the lcs for common ancestor - original * Do reverse adjustements */ - lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], prefix_lines, + lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], + token_counts[3], token_counts[2], + num_tokens, prefix_lines, suffix_lines, subpool3); diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3); adjust_diff(diff_ol, diff_adjust); @@ -267,7 +280,9 @@ /* Get the lcs for modified - common ancestor * Do forward adjustments */ - lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], prefix_lines, + lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], + token_counts[1], token_counts[3], + num_tokens, prefix_lines, suffix_lines, subpool3); diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3); adjust_diff(diff_ol, diff_adjust); @@ -283,7 +298,7 @@ if (hunk->type == svn_diff__type_conflict) { svn_diff__resolve_conflict(hunk, &position_list[1], - &position_list[2], pool); + &position_list[2], num_tokens, pool); } } Index: subversion/libsvn_diff/lcs.c =================================================================== --- subversion/libsvn_diff/lcs.c (revision 1128966) +++ subversion/libsvn_diff/lcs.c (working copy) @@ -81,6 +81,7 @@ static APR_INLINE void svn_diff__snake(svn_diff__snake_t *fp_k, + svn_diff__token_index_t *token_counts[2], svn_diff__lcs_t **freelist, apr_pool_t *pool) { @@ -122,6 +123,11 @@ } + if (previous_lcs) + { + previous_lcs->refcount++; + } + /* ### Optimization, skip all positions that don't have matchpoints * ### anyway. Beware of the sentinel, don't skip it! */ @@ -129,8 +135,10 @@ position[0] = start_position[0]; position[1] = start_position[1]; - while (position[0]->node == position[1]->node) + while (1) { + while (position[0]->token_index == position[1]->token_index) + { position[0] = position[0]->next; position[1] = position[1]->next; } @@ -152,18 +160,20 @@ lcs->length = position[1]->offset - start_position[1]->offset; lcs->next = previous_lcs; lcs->refcount = 1; - fp_k[0].lcs = lcs; + previous_lcs = lcs; + start_position[0] = position[0]; + start_position[1] = position[1]; } - else - { - fp_k[0].lcs = previous_lcs; + /* Skip any and all tokens that only occur in one of the files */ + if (position[0]->token_index >= 0 && token_counts[1][position[0]->token_index] == 0) + start_position[0] = position[0] = position[0]->next; + else if (position[1]->token_index >= 0 && token_counts[0][position[1]->token_index] == 0) + start_position[1] = position[1] = position[1]->next; + else + break; } - if (previous_lcs) - { - previous_lcs->refcount++; - } - + fp_k[0].lcs = previous_lcs; fp_k[0].position[0] = position[0]; fp_k[0].position[1] = position[1]; @@ -218,11 +228,17 @@ svn_diff__lcs_t * svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */ svn_diff__position_t *position_list2, /* pointer to tail (ring) */ + svn_diff__token_index_t *token_counts_list1, /* array of counts */ + svn_diff__token_index_t *token_counts_list2, /* array of counts */ + svn_diff__token_index_t num_tokens, apr_off_t prefix_lines, apr_off_t suffix_lines, apr_pool_t *pool) { apr_off_t length[2]; + svn_diff__token_index_t *token_counts[2]; + svn_diff__token_index_t unique_count[2]; + svn_diff__token_index_t token_index; svn_diff__snake_t *fp; apr_off_t d; apr_off_t k; @@ -260,10 +276,23 @@ return lcs; } - /* Calculate lengths M and N of the sequences to be compared */ - length[0] = position_list1->offset - position_list1->next->offset + 1; - length[1] = position_list2->offset - position_list2->next->offset + 1; + unique_count[1] = unique_count[0] = 0; + for (token_index = 0; token_index < num_tokens; token_index++) + { + if (token_counts_list1[token_index] == 0) + unique_count[1] += token_counts_list2[token_index]; + if (token_counts_list2[token_index] == 0) + unique_count[0] += token_counts_list1[token_index]; + } + /* Calculate lengths M and N of the sequences to be compared. Do not + * count tokens unique to one file, as those are ignored in __snake. + */ + length[0] = position_list1->offset - position_list1->next->offset + 1 + - unique_count[0]; + length[1] = position_list2->offset - position_list2->next->offset + 1 + - unique_count[1]; + /* strikerXXX: here we allocate the furthest point array, which is * strikerXXX: sized M + N + 3 (!) */ @@ -281,16 +310,17 @@ sentinel_position[0].next = position_list1->next; position_list1->next = &sentinel_position[0]; sentinel_position[0].offset = position_list1->offset + 1; + token_counts[0] = token_counts_list1; sentinel_position[1].next = position_list2->next; position_list2->next = &sentinel_position[1]; sentinel_position[1].offset = position_list2->offset + 1; + token_counts[1] = token_counts_list2; - /* These are never dereferenced, only compared by value, so we - * can safely fake these up and the void* cast is OK. + /* Negative indices will not be used elsewhere */ - sentinel_position[0].node = (void*)&sentinel_position[0]; - sentinel_position[1].node = (void*)&sentinel_position[1]; + sentinel_position[0].token_index = -1; + sentinel_position[1].token_index = -2; /* position d = M - N corresponds to the initial state, where * we are at the beginning of both files. @@ -310,12 +340,12 @@ /* For k < 0, insertions are free */ for (k = (d < 0 ? d : 0) - p; k < 0; k++) { - svn_diff__snake(fp + k, &lcs_freelist, pool); + svn_diff__snake(fp + k, token_counts, &lcs_freelist, pool); } /* for k > 0, deletions are free */ for (k = (d > 0 ? d : 0) + p; k >= 0; k--) { - svn_diff__snake(fp + k, &lcs_freelist, pool); + svn_diff__snake(fp + k, token_counts, &lcs_freelist, pool); } p++; Index: subversion/libsvn_diff/token.c =================================================================== --- subversion/libsvn_diff/token.c (revision 1128966) +++ subversion/libsvn_diff/token.c (working copy) @@ -46,6 +46,7 @@ svn_diff__node_t *right; apr_uint32_t hash; + svn_diff__token_index_t index; void *token; }; @@ -53,10 +54,20 @@ { svn_diff__node_t *root[SVN_DIFF__HASH_SIZE]; apr_pool_t *pool; + svn_diff__token_index_t node_count; }; /* + * Returns number of tokens in a tree + */ +svn_diff__token_index_t +svn_diff__get_node_count(svn_diff__tree_t *tree) +{ + return tree->node_count; +} + +/* * Support functions to build a tree of token positions */ @@ -65,6 +76,7 @@ { *tree = apr_pcalloc(pool, sizeof(**tree)); (*tree)->pool = pool; + (*tree)->node_count = 0; } @@ -122,6 +134,7 @@ new_node->right = NULL; new_node->hash = hash; new_node->token = token; + new_node->index = tree->node_count++; *node = *node_ref = new_node; @@ -168,7 +181,7 @@ /* Create a new position */ position = apr_palloc(pool, sizeof(*position)); position->next = NULL; - position->node = node; + position->token_index = node->index; position->offset = offset; *position_ref = position;