Index: subversion/include/svn_diff.h =================================================================== --- subversion/include/svn_diff.h (revision 1001548) +++ subversion/include/svn_diff.h (working copy) @@ -112,6 +112,11 @@ svn_error_t *(*datasource_open)(void *diff_baton, svn_diff_datasource_e datasource); + /** Open the datasources of type @a datasources. */ + svn_error_t *(*datasources_open)(void *diff_baton, apr_off_t *prefix_lines, + svn_diff_datasource_e datasource0, + svn_diff_datasource_e datasource1); + /** Close the datasource of type @a datasource. */ svn_error_t *(*datasource_close)(void *diff_baton, svn_diff_datasource_e datasource); @@ -124,6 +129,9 @@ void *diff_baton, svn_diff_datasource_e datasource); + /** Get the number of identical prefix lines from the @a diff_baton. */ + apr_off_t (*get_prefix_lines)(void *diff_baton); + /** A function for ordering the tokens, resembling 'strcmp' in functionality. * @a compare should contain the return value of the comparison: * If @a ltoken and @a rtoken are "equal", return 0. If @a ltoken is Index: subversion/libsvn_diff/diff_memory.c =================================================================== --- subversion/libsvn_diff/diff_memory.c (revision 1001548) +++ subversion/libsvn_diff/diff_memory.c (working copy) @@ -95,7 +95,23 @@ return SVN_NO_ERROR; } +/* Implements svn_diff_fns_t::datasources_open */ +static svn_error_t * +datasources_open(void *baton, apr_off_t *prefix_lines, + svn_diff_datasource_e datasource0, + svn_diff_datasource_e datasource1) +{ + /* Do nothing: everything is already there and initialized to 0 */ + return SVN_NO_ERROR; +} +/* Implements svn_diff_fns_t::datasource_get_prefix_lines */ +static apr_off_t +get_prefix_lines(void *baton) +{ + return 0; +} + /* Implements svn_diff_fns_t::datasource_close */ static svn_error_t * datasource_close(void *baton, svn_diff_datasource_e datasource) @@ -189,8 +205,10 @@ static const svn_diff_fns_t svn_diff__mem_vtable = { datasource_open, + datasources_open, datasource_close, datasource_get_next_token, + get_prefix_lines, token_compare, token_discard, token_discard_all Index: subversion/libsvn_diff/diff_file.c =================================================================== --- subversion/libsvn_diff/diff_file.c (revision 1001548) +++ subversion/libsvn_diff/diff_file.c (working copy) @@ -77,6 +77,10 @@ char *curp[4]; char *endp[4]; + apr_off_t prefix_lines; + int suffix_start_chunk[4]; + apr_off_t suffix_offset_in_chunk[4]; + /* List of free tokens that may be reused. */ svn_diff__file_token_t *tokens; @@ -233,7 +237,330 @@ curp, length, 0, file_baton->pool); } +static svn_error_t * +increment_pointer_or_chunk(svn_diff__file_baton_t *file_baton, + char **curp, char **endp, int *chunk_number, + char *buffer, apr_off_t last_chunk_number, int idx) +{ + apr_off_t length; + if ((*curp) == (*endp) - 1) + { + if (*chunk_number == last_chunk_number) + (*curp)++; /* *curp == *endp with last chunk signals end of file */ + else + { + (*chunk_number)++; + length = *chunk_number == last_chunk_number ? + offset_in_chunk(file_baton->size[idx]) : CHUNK_SIZE; + SVN_ERR(read_chunk(file_baton->file[idx], + file_baton->path[idx], + buffer, length, + chunk_to_offset(*chunk_number), + file_baton->pool)); + *endp = buffer + length; + *curp = buffer; + } + } + else + { + (*curp)++; + } + + return SVN_NO_ERROR; +} + +static svn_error_t * +decrement_pointer_or_chunk(svn_diff__file_baton_t *file_baton, + char **curp, char **endp, int *chunk_number, + char *buffer, int idx) +{ + if (*curp == buffer) + { + if (*chunk_number == 0) + (*chunk_number)--; /* *chunk_number == -1 signals beginning of file */ + else + { + (*chunk_number)--; + SVN_ERR(read_chunk(file_baton->file[idx], + file_baton->path[idx], + buffer, CHUNK_SIZE, + chunk_to_offset(*chunk_number), + file_baton->pool)); + *endp = buffer + CHUNK_SIZE; + *curp = *endp - 1; + } + } + else + { + (*curp)--; + } + + return SVN_NO_ERROR; +} + +/* Find the identical prefix for idx0 and idx1, counting number of lines. + * After this function is finished, the buffers, chunks, curp's and endp's + * of the file_baton are set to point at the first byte after the prefix. */ +static svn_error_t * +find_identical_prefix(svn_diff__file_baton_t *file_baton, + svn_boolean_t *at_least_one_end_reached, + int idx0, int idx1) +{ + apr_off_t last_chunk0, last_chunk1; + + last_chunk0 = offset_to_chunk(file_baton->size[idx0]); + last_chunk1 = offset_to_chunk(file_baton->size[idx1]); + *at_least_one_end_reached = FALSE; + + file_baton->prefix_lines = 0; + while (*file_baton->curp[idx0] == *file_baton->curp[idx1] + && !*at_least_one_end_reached) + { + /* ### This will only work for \n and \r\n, not for \r */ + if (*file_baton->curp[idx0] == '\n') + file_baton->prefix_lines++; + SVN_ERR(increment_pointer_or_chunk(file_baton, + &file_baton->curp[idx0], + &file_baton->endp[idx0], + &file_baton->chunk[idx0], + file_baton->buffer[idx0], + last_chunk0, idx0)); + SVN_ERR(increment_pointer_or_chunk(file_baton, + &file_baton->curp[idx1], + &file_baton->endp[idx1], + &file_baton->chunk[idx1], + file_baton->buffer[idx1], + last_chunk1, idx1)); + *at_least_one_end_reached = + file_baton->curp[idx0] == file_baton->endp[idx0] + || file_baton->curp[idx1] == file_baton->endp[idx1]; + } + + /* If both files reached their end (i.e. are fully identical), we're done */ + if (file_baton->curp[idx0] == file_baton->endp[idx0] + && file_baton->curp[idx1] == file_baton->endp[idx1]) + return SVN_NO_ERROR; + + /* Back up to the last newline */ + do + { + SVN_ERR(decrement_pointer_or_chunk(file_baton, + &file_baton->curp[idx0], + &file_baton->endp[idx0], + &file_baton->chunk[idx0], + file_baton->buffer[idx0], + idx0)); + SVN_ERR(decrement_pointer_or_chunk(file_baton, + &file_baton->curp[idx1], + &file_baton->endp[idx1], + &file_baton->chunk[idx1], + file_baton->buffer[idx1], + idx1)); + } while (*file_baton->curp[idx0] != '\n' + && file_baton->chunk[idx0] != -1 + && file_baton->chunk[idx1] != -1); + + /* slide one byte forward, to point past the \n */ + if (file_baton->chunk[idx0] == -1) + file_baton->chunk[idx0] = 0; /* point to beginning of file again */ + else + SVN_ERR(increment_pointer_or_chunk(file_baton, + &file_baton->curp[idx0], + &file_baton->endp[idx0], + &file_baton->chunk[idx0], + file_baton->buffer[idx0], + last_chunk0, idx0)); + if (file_baton->chunk[idx1] == -1) + file_baton->chunk[idx1] = 0; /* point to beginning of file again */ + else + SVN_ERR(increment_pointer_or_chunk(file_baton, + &file_baton->curp[idx1], + &file_baton->endp[idx1], + &file_baton->chunk[idx1], + file_baton->buffer[idx1], + last_chunk1, idx1)); + + return SVN_NO_ERROR; +} + +/* Find the identical suffix for idx0 and idx1. Before this function is called + * the file_baton's curp's and chunks should be positioned right after the + * identical prefix (which is the case after find_identical_prefix), + * so we can determine where suffix scanning should ultimately stop. */ +static svn_error_t * +find_identical_suffix(svn_diff__file_baton_t *file_baton, + int idx0, int idx1) +{ + char *suffix_buffer0, *suffix_buffer1; + int suffix_chunk0, suffix_chunk1; + apr_off_t length0, length1; + apr_off_t last_chunk0, last_chunk1; + apr_off_t suffix_min_offset0; + apr_off_t suffix_min_chunk0; + char *curp0, *curp1; + char *endp0, *endp1; + + last_chunk0 = offset_to_chunk(file_baton->size[idx0]); + last_chunk1 = offset_to_chunk(file_baton->size[idx1]); + + /* position everything at last chunk, pointer to last byte */ + suffix_buffer0 = apr_palloc(file_baton->pool, + (apr_size_t) (file_baton->size[idx0] > CHUNK_SIZE ? + CHUNK_SIZE : file_baton->size[idx0])); + suffix_chunk0 = last_chunk0; + length0 = file_baton->size[idx0] % CHUNK_SIZE; + SVN_ERR(read_chunk(file_baton->file[idx0], file_baton->path[idx0], + suffix_buffer0, length0, + chunk_to_offset(suffix_chunk0), + file_baton->pool)); + endp0 = suffix_buffer0 + length0; + curp0 = endp0 - 1; + + suffix_buffer1 = apr_palloc(file_baton->pool, + (apr_size_t) (file_baton->size[idx1] > CHUNK_SIZE ? + CHUNK_SIZE : file_baton->size[idx1])); + suffix_chunk1 = last_chunk1; + length1 = file_baton->size[idx1] % CHUNK_SIZE; + SVN_ERR(read_chunk(file_baton->file[idx1], file_baton->path[idx1], + suffix_buffer1, length1, + chunk_to_offset(suffix_chunk1), + file_baton->pool)); + endp1 = suffix_buffer1 + length1; + curp1 = endp1 - 1; + + /* Get the chunk and pointer offset at which we should stop scanning + * backward for the identical suffix. This is just past the prefix. */ + suffix_min_chunk0 = file_baton->chunk[idx0]; + suffix_min_offset0 = file_baton->curp[idx0] - file_baton->buffer[idx0]; + if (file_baton->size[idx0] > file_baton->size[idx1]) + { + suffix_min_chunk0 += + (file_baton->size[idx0] - file_baton->size[idx1]) / CHUNK_SIZE; + suffix_min_offset0 += + (file_baton->size[idx0] - file_baton->size[idx1]) % CHUNK_SIZE; + } + + /* Scan backwards until mismatch or until we are where the prefix ended */ + while (*curp0 == *curp1 && suffix_chunk0 != -1 && suffix_chunk1 != -1 + && !(suffix_chunk0 == suffix_min_chunk0 + && (curp0 - suffix_buffer0) == suffix_min_offset0)) + { + SVN_ERR(decrement_pointer_or_chunk(file_baton, &curp0, &endp0, + &suffix_chunk0, suffix_buffer0, + idx0)); + SVN_ERR(decrement_pointer_or_chunk(file_baton, &curp1, &endp1, + &suffix_chunk1, suffix_buffer1, + idx1)); + } + + /* slide one byte forward, to point at the first byte of common suffix */ + if (suffix_chunk0 == -1) + suffix_chunk0 = 0; /* point to beginning of file again */ + else + SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0, + &suffix_chunk0, suffix_buffer0, + last_chunk0, idx0)); + if (suffix_chunk1 == -1) + suffix_chunk1 = 0; /* point to beginning of file again */ + else + SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1, + &suffix_chunk1, suffix_buffer1, + last_chunk1, idx1)); + + /* skip to just after a newline */ + while (*curp0 != '\n' && !(curp0 == endp0 || curp1 == endp1)) + { + SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0, + &suffix_chunk0, suffix_buffer0, + last_chunk0, idx0)); + SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1, + &suffix_chunk1, suffix_buffer1, + last_chunk1, idx1)); + } + + /* slide one more byte, to point past the \n */ + SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0, + &suffix_chunk0, suffix_buffer0, + last_chunk0, idx0)); + SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1, + &suffix_chunk1, suffix_buffer1, + last_chunk1, idx1)); + + file_baton->suffix_start_chunk[idx0] = suffix_chunk0; + file_baton->suffix_start_chunk[idx1] = suffix_chunk1; + file_baton->suffix_offset_in_chunk[idx0] = curp0 - suffix_buffer0; + file_baton->suffix_offset_in_chunk[idx1] = curp1 - suffix_buffer1; + + return SVN_NO_ERROR; +} + +/* Implements svn_diff_fns_t::datasource_open */ +static svn_error_t * +datasources_open(void *baton, apr_off_t *prefix_lines, + svn_diff_datasource_e datasource0, + svn_diff_datasource_e datasource1) +{ + svn_diff__file_baton_t *file_baton = baton; + int idx0, idx1; + apr_finfo_t finfo0, finfo1; + apr_off_t length0, length1; + svn_boolean_t at_least_one_end_reached; + + /* Open datasource0 and read first chunk */ + idx0 = datasource_to_index(datasource0); + SVN_ERR(svn_io_file_open(&file_baton->file[idx0], file_baton->path[idx0], + APR_READ, APR_OS_DEFAULT, file_baton->pool)); + SVN_ERR(svn_io_file_info_get(&finfo0, APR_FINFO_SIZE, + file_baton->file[idx0], file_baton->pool)); + file_baton->size[idx0] = finfo0.size; + length0 = (apr_off_t) (finfo0.size > CHUNK_SIZE ? CHUNK_SIZE : finfo0.size); + file_baton->buffer[idx0] = apr_palloc(file_baton->pool, (apr_size_t) length0); + SVN_ERR(read_chunk(file_baton->file[idx0], file_baton->path[idx0], + file_baton->buffer[idx0], length0, 0, file_baton->pool)); + file_baton->endp[idx0] = file_baton->buffer[idx0] + length0; + file_baton->curp[idx0] = file_baton->buffer[idx0]; + + /* Open datasource1 and read first chunk */ + idx1 = datasource_to_index(datasource1); + SVN_ERR(svn_io_file_open(&file_baton->file[idx1], file_baton->path[idx1], + APR_READ, APR_OS_DEFAULT, file_baton->pool)); + SVN_ERR(svn_io_file_info_get(&finfo1, APR_FINFO_SIZE, + file_baton->file[idx1], file_baton->pool)); + file_baton->size[idx1] = finfo1.size; + length1 = (apr_off_t) (finfo1.size > CHUNK_SIZE ? CHUNK_SIZE : finfo1.size); + file_baton->buffer[idx1] = apr_palloc(file_baton->pool, (apr_size_t) length1); + SVN_ERR(read_chunk(file_baton->file[idx1], file_baton->path[idx1], + file_baton->buffer[idx1], length1, 0, file_baton->pool)); + file_baton->endp[idx1] = file_baton->buffer[idx1] + length1; + file_baton->curp[idx1] = file_baton->buffer[idx1]; + + if (length0 == 0 || length1 == 0) + /* there will not be any identical prefix/suffix, so we're done */ + return SVN_NO_ERROR; + + SVN_ERR(find_identical_prefix(file_baton, &at_least_one_end_reached, + idx0, idx1)); + *prefix_lines = file_baton->prefix_lines; + + if (at_least_one_end_reached) + /* at least one file consisted totally of identical prefix, + * so there will be no identical suffix. We're done. */ + return SVN_NO_ERROR; + + SVN_ERR(find_identical_suffix(file_baton, idx0, idx1)); + + return SVN_NO_ERROR; +} + +static apr_off_t +get_prefix_lines(void *baton) +{ + svn_diff__file_baton_t *file_baton = baton; + + return file_baton->prefix_lines; +} + /* Implements svn_diff_fns_t::datasource_close */ static svn_error_t * datasource_close(void *baton, svn_diff_datasource_e datasource) @@ -277,6 +604,11 @@ return SVN_NO_ERROR; } + if (file_baton->suffix_start_chunk[idx] || file_baton->suffix_offset_in_chunk[idx]) + if (file_baton->chunk[idx] == file_baton->suffix_start_chunk[idx] + && (curp - file_baton->buffer[idx]) == file_baton->suffix_offset_in_chunk[idx]) + return SVN_NO_ERROR; + /* Get a new token */ file_token = file_baton->tokens; if (file_token) @@ -526,8 +858,10 @@ static const svn_diff_fns_t svn_diff__file_vtable = { datasource_open, + datasources_open, datasource_close, datasource_get_next_token, + get_prefix_lines, token_compare, token_discard, token_discard_all Index: subversion/libsvn_diff/diff.h =================================================================== --- subversion/libsvn_diff/diff.h (revision 1001548) +++ subversion/libsvn_diff/diff.h (working copy) @@ -111,6 +111,7 @@ void *diff_baton, const svn_diff_fns_t *vtable, svn_diff_datasource_e datasource, + svn_boolean_t datasource_opened, apr_pool_t *pool); Index: subversion/libsvn_diff/token.c =================================================================== --- subversion/libsvn_diff/token.c (revision 1001548) +++ subversion/libsvn_diff/token.c (working copy) @@ -139,6 +139,7 @@ void *diff_baton, const svn_diff_fns_t *vtable, svn_diff_datasource_e datasource, + svn_boolean_t datasource_opened, apr_pool_t *pool) { svn_diff__position_t *start_position; @@ -152,10 +153,11 @@ *position_list = NULL; - SVN_ERR(vtable->datasource_open(diff_baton, datasource)); + if (!datasource_opened) + SVN_ERR(vtable->datasource_open(diff_baton, datasource)); position_ref = &start_position; - offset = 0; + offset = vtable->get_prefix_lines(diff_baton); hash = 0; /* The callback fn doesn't need to touch it per se */ while (1) { Index: subversion/libsvn_diff/diff.c =================================================================== --- subversion/libsvn_diff/diff.c (revision 1001548) +++ subversion/libsvn_diff/diff.c (working copy) @@ -43,6 +43,22 @@ svn_diff_t *diff; svn_diff_t **diff_ref = &diff; + if (want_common && (original_start > 1)) + { + /* we have a prefix to skip */ + (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref)); + + (*diff_ref)->type = svn_diff__type_common; + (*diff_ref)->original_start = 0; + (*diff_ref)->original_length = original_start - 1; + (*diff_ref)->modified_start = 0; + (*diff_ref)->modified_length = modified_start - 1; + (*diff_ref)->latest_start = 0; + (*diff_ref)->latest_length = 0; + + diff_ref = &(*diff_ref)->next; + } + while (1) { if (original_start < lcs->position[0]->offset @@ -108,6 +124,7 @@ svn_diff__lcs_t *lcs; apr_pool_t *subpool; apr_pool_t *treepool; + apr_off_t prefix_lines = 0; *diff = NULL; @@ -116,17 +133,22 @@ svn_diff__tree_create(&tree, treepool); + SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, + svn_diff_datasource_original, svn_diff_datasource_modified)); + /* Insert the data into the tree */ SVN_ERR(svn_diff__get_tokens(&position_list[0], tree, diff_baton, vtable, svn_diff_datasource_original, + TRUE, subpool)); SVN_ERR(svn_diff__get_tokens(&position_list[1], tree, diff_baton, vtable, svn_diff_datasource_modified, + TRUE, subpool)); /* The cool part is that we don't need the tokens anymore. @@ -142,7 +164,7 @@ lcs = svn_diff__lcs(position_list[0], position_list[1], subpool); /* Produce the diff */ - *diff = svn_diff__diff(lcs, 1, 1, TRUE, pool); + *diff = svn_diff__diff(lcs, prefix_lines + 1, prefix_lines + 1, TRUE, pool); /* Get rid of all the data we don't have a use for anymore */ svn_pool_destroy(subpool); Index: subversion/libsvn_diff/diff3.c =================================================================== --- subversion/libsvn_diff/diff3.c (revision 1001548) +++ subversion/libsvn_diff/diff3.c (working copy) @@ -267,18 +267,21 @@ tree, diff_baton, vtable, svn_diff_datasource_original, + FALSE, subpool)); SVN_ERR(svn_diff__get_tokens(&position_list[1], tree, diff_baton, vtable, svn_diff_datasource_modified, + FALSE, subpool)); SVN_ERR(svn_diff__get_tokens(&position_list[2], tree, diff_baton, vtable, svn_diff_datasource_latest, + FALSE, subpool)); /* Get rid of the tokens, we don't need them to calc the diff */ Index: subversion/libsvn_diff/diff4.c =================================================================== --- subversion/libsvn_diff/diff4.c (revision 1001548) +++ subversion/libsvn_diff/diff4.c (working copy) @@ -194,24 +194,28 @@ tree, diff_baton, vtable, svn_diff_datasource_original, + FALSE, subpool2)); SVN_ERR(svn_diff__get_tokens(&position_list[1], tree, diff_baton, vtable, svn_diff_datasource_modified, + FALSE, subpool)); SVN_ERR(svn_diff__get_tokens(&position_list[2], tree, diff_baton, vtable, svn_diff_datasource_latest, + FALSE, subpool)); SVN_ERR(svn_diff__get_tokens(&position_list[3], tree, diff_baton, vtable, svn_diff_datasource_ancestor, + FALSE, subpool2)); /* Get rid of the tokens, we don't need them to calc the diff */