*** This is just a patch for experimentation. *** Implement something like the --ignore-all-space in libsvn_diff. Currently, this option is hard-coded, just to play with it. Before the lines are compared, all space characters are stripped. This works for diff and diff3, so don't use svn update with this patch:-) * subversion/libsvn_diff/diff_file.c (svn_diff__file_baton_t): Add filter_baton pointers. (struct filter_baton): New struct. (fitler): New function that does the very advanced string normalization. (svn_diff__file_datasource_get_next_token): Normalize chunk before computing hash code. (svn_diff__file_token_compare): Normalize the chunks before comparing. (svn_diff_file_diff, svn_diff_file_diff3): Initialize filter batons. (svn_diff3__file_output_common): Output the line from the "modified" file instead of the "original" file. Index: subversion/libsvn_diff/diff_file.c =================================================================== --- subversion/libsvn_diff/diff_file.c (revision 18321) +++ subversion/libsvn_diff/diff_file.c (arbetskopia) @@ -60,6 +60,9 @@ svn_diff__file_token_t *tokens; + /* ### Filter function? Or maybe not that generality? */ + void *filter_baton[4]; + apr_pool_t *pool; } svn_diff__file_baton_t; @@ -242,6 +245,37 @@ } +/* Baton used by the filter function. */ +struct filter_baton { + svn_stringbuf_t *buf; +}; + +/* Normalize *LEN bytes pointed to by *BUF for comparison. + Currently, this removes all spaces. + *BUF and *LEN may be replaced with a new buffer/length pair. + BATON stores the state of the operation. + A call with *LEN set to 0 indicates the end of the chunk and + resets the state stored in BATON. */ +static void +filter(char **buf, apr_off_t *len, void* baton) +{ + struct filter_baton *fb = baton; + char *p = *buf; + + svn_stringbuf_setempty (fb->buf); + + while (p < *buf + *len) + { + /* ### NOte that I have a pending patent on this efficient code + snippet... */ + if (*p != ' ') + svn_stringbuf_appendbytes (fb->buf, p, 1); + ++p; + } + *buf = fb->buf->data; + *len = fb->buf->len; +} + static svn_error_t * svn_diff__file_datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton, @@ -258,6 +292,9 @@ apr_uint32_t h = 0; /* Did the last chunk end in a CR character? */ svn_boolean_t had_cr = FALSE; + /* Filtered buf and length. */ + char *buf; + apr_off_t len; *token = NULL; @@ -314,7 +351,10 @@ length = endp - curp; file_token->length += length; - h = svn_diff__adler32(h, curp, length); + buf = curp; + len = length; + filter(&buf, &len, file_baton->filter_baton[idx]); + h = svn_diff__adler32(h, buf, len); curp = endp = file_baton->buffer[idx]; file_baton->chunk[idx]++; @@ -340,7 +380,15 @@ length = eol - curp; file_token->length += length; - *hash = svn_diff__adler32(h, curp, length); + buf = curp; + len = length; + filter(&buf, &len, file_baton->filter_baton[idx]); + h = svn_diff__adler32(h, buf, len); + /* Signal end of token to the filter, and let it finish it's work. */ + buf = NULL; + len = 0; + filter(&buf, &len, file_baton->filter_baton[idx]); + *hash = svn_diff__adler32(h, buf, len); file_baton->curp[idx] = eol; *token = file_token; @@ -364,29 +412,37 @@ apr_off_t offset[2]; int idx[2]; apr_off_t length[2]; - apr_off_t total_length; + apr_off_t total_length[2]; apr_off_t len; int i; int chunk[2]; - if (file_token1->length < file_token2->length) +#if 0 + /* Some easy outs. */ + if (! filter) { - *compare = -1; - return SVN_NO_ERROR; - } + apr_off_t tot_length; + if (file_token1->length < file_token2->length) + { + *compare = -1; + return SVN_NO_ERROR; + } - if (file_token1->length > file_token2->length) - { - *compare = 1; - return SVN_NO_ERROR; - } + if (file_token1->length > file_token2->length) + { + *compare = 1; + return SVN_NO_ERROR; + } - total_length = file_token1->length; - if (total_length == 0) - { - *compare = 0; - return SVN_NO_ERROR; + tot_length = file_token1->length; + + if (tot_length == 0) + { + *compare = 0; + return SVN_NO_ERROR; + } } +#endif idx[0] = svn_diff__file_datasource_to_index(file_token1->datasource); idx[1] = svn_diff__file_datasource_to_index(file_token2->datasource); @@ -394,51 +450,92 @@ offset[1] = file_token2->offset; chunk[0] = file_baton->chunk[idx[0]]; chunk[1] = file_baton->chunk[idx[1]]; + total_length[0] = file_token1->length; + total_length[1] = file_token2->length; + for (i = 0; i < 2; ++i) + { + if (offset_to_chunk(offset[i]) == chunk[i]) + { + /* If the start of the token is in memory, the entire token is + * in memory. + */ + bufp[i] = file_baton->buffer[idx[i]]; + bufp[i] += offset_in_chunk(offset[i]); + + length[i] = total_length[i]; + filter(&bufp[i], &length[i], file_baton->filter_baton[idx[i]]); + total_length[i] = 0; + } + else + length[i] = 0; + } + do { for (i = 0; i < 2; i++) { - if (offset_to_chunk(offset[i]) == chunk[i]) + while (length[i] == 0 && total_length[i] > 0) { - /* If the start of the token is in memory, the entire token is - * in memory. - */ - bufp[i] = file_baton->buffer[idx[i]]; - bufp[i] += offset_in_chunk(offset[i]); - - length[i] = total_length; - } - else - { /* Read a chunk from disk into a buffer */ bufp[i] = buffer[i]; - length[i] = total_length > COMPARE_CHUNK_SIZE ? - COMPARE_CHUNK_SIZE : total_length; + length[i] = total_length[i] > COMPARE_CHUNK_SIZE ? + COMPARE_CHUNK_SIZE : total_length[i]; SVN_ERR(read_chunk(file_baton->file[idx[i]], file_baton->path[idx[i]], bufp[i], length[i], offset[i], file_baton->pool)); + offset[i] += length[i]; + total_length[i] -= length[i]; + filter(&bufp[i], &length[i], file_baton->filter_baton[idx[i]]); } + /* If we have no more data, let the filter know. */ + if (length[i] == 0 && total_length[i] == 0) + { + filter(&bufp[i], &length[i], file_baton->filter_baton[idx[i]]); + total_length[i] = -1; + } + } len = length[0] > length[1] ? length[1] : length[0]; - offset[0] += len; - offset[1] += len; - + if (len == 0) + { + *compare = 0; + break; + } /* Compare two chunks (that could be entire tokens if they both reside * in memory). */ *compare = memcmp(bufp[0], bufp[1], len); if (*compare != 0) - return SVN_NO_ERROR; + break; - total_length -= len; + length[0] -= len; + length[1] -= len; } - while(total_length > 0); + while(1); - *compare = 0; + if (*compare == 0) + { + if (length[0] > 0) + *compare = -1; + else if (length[1] > 0) + *compare = 1; + } + + /* Clean up filters. Note that we don't waste time on reading the whole + chunk just to make the filter happy, but who cares? */ + for (i = 0; i < 2; ++i) + { + if (total_length[i] >= 0) + { + length[i] = 0; + filter(&bufp[i], &length[i], file_baton->filter_baton[i]); + } + } + return SVN_NO_ERROR; } @@ -482,11 +579,18 @@ apr_pool_t *pool) { svn_diff__file_baton_t baton; + struct filter_baton filter_baton[2]; + int i; memset(&baton, 0, sizeof(baton)); baton.path[0] = original; baton.path[1] = modified; baton.pool = svn_pool_create(pool); + for (i = 0; i < 2; ++i) + { + baton.filter_baton[i] = &filter_baton[i]; + filter_baton[i].buf = svn_stringbuf_create ("", pool); + } SVN_ERR(svn_diff_diff(diff, &baton, &svn_diff__file_vtable, pool)); @@ -502,12 +606,19 @@ apr_pool_t *pool) { svn_diff__file_baton_t baton; + struct filter_baton fb[3]; + int i; memset(&baton, 0, sizeof(baton)); baton.path[0] = original; baton.path[1] = modified; baton.path[2] = latest; baton.pool = svn_pool_create(pool); + for (i = 0; i < 3; ++i) + { + baton.filter_baton[i] = &fb[i]; + fb[i].buf = svn_stringbuf_create("", pool); + } SVN_ERR(svn_diff_diff3(diff, &baton, &svn_diff__file_vtable, pool)); @@ -1088,8 +1199,8 @@ apr_off_t modified_start, apr_off_t modified_length, apr_off_t latest_start, apr_off_t latest_length) { - return svn_diff3__file_output_hunk(baton, 0, - original_start, original_length); + return svn_diff3__file_output_hunk(baton, 1, + modified_start, modified_length); } static svn_error_t *