[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: [PATCH]: Optimize merge_file_trivial()

From: Philip Martin <philip.martin_at_wandisco.com>
Date: Fri, 15 Jun 2012 18:36:52 +0100

Markus Schaber <m.schaber_at_3s-software.com> writes:

> +svn_io_filesizes_three_different_p(svn_boolean_t *different_p12,
> + svn_boolean_t *different_p23,
> + svn_boolean_t *different_p13,
> + const char *file1,
> + const char *file2,
> + const char *file3,
> + apr_pool_t *pool)

Probably better to use scratch_pool, rather than pool, in new code.

> +{
> + apr_finfo_t finfo1, finfo2, finfo3;
> + apr_status_t status1, status2, status3;
> + const char *file1_apr, *file2_apr, *file3_apr;
> +
> + /* Not using svn_io_stat() because don't want to generate
> + svn_error_t objects for non-error conditions. */
> +
> + SVN_ERR(cstring_from_utf8(&file1_apr, file1, pool));
> + SVN_ERR(cstring_from_utf8(&file2_apr, file2, pool));
> + SVN_ERR(cstring_from_utf8(&file3_apr, file3, pool));
> +
> + /* Stat all three files */
> + status1 = apr_stat(&finfo1, file1_apr, APR_FINFO_MIN, pool);
> + status2 = apr_stat(&finfo2, file2_apr, APR_FINFO_MIN, pool);
> + status3 = apr_stat(&finfo3, file3_apr, APR_FINFO_MIN, pool);
> +
> + /* If we got an error stat'ing a file, it could be because the
> + file was removed... or who knows. Whatever the case, we
> + don't know if the filesizes are definitely different, so
> + assume that they're not. */
> + *different_p12 = !status1 && !status2 && finfo1.size != finfo2.size;
> + *different_p23 = !status2 && !status3 && finfo2.size != finfo3.size;
> + *different_p13 = !status1 && !status3 && finfo1.size != finfo3.size;
> +
> + return SVN_NO_ERROR;
> +}
> +
> +
> +svn_error_t *
> svn_io_file_checksum2(svn_checksum_t **checksum,
> const char *file,
> svn_checksum_kind_t kind,
> @@ -4072,6 +4109,136 @@
>
>
>
> +/* Do a byte-for-byte comparison of FILE1, FILE2 and FILE3. */
> +static svn_error_t *
> +contents_three_identical_p(svn_boolean_t *identical_p12,
> + svn_boolean_t *identical_p23,
> + svn_boolean_t *identical_p13,
> + const char *file1,
> + const char *file2,
> + const char *file3,
> + apr_pool_t *pool)

Probably better to use scratch_pool, rather than pool, in new code.

> +{
> + svn_error_t *err;
> + apr_size_t bytes_read1, bytes_read2, bytes_read3;
> + char *buf1 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
> + char *buf2 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
> + char *buf3 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
> + apr_file_t *file1_h;
> + apr_file_t *file2_h;
> + apr_file_t *file3_h;
> + svn_boolean_t eof1 = FALSE;
> + svn_boolean_t eof2 = FALSE;
> + svn_boolean_t eof3 = FALSE;
> + svn_boolean_t read_1, read_2, read_3;
> +
> + SVN_ERR(svn_io_file_open(&file1_h, file1, APR_READ, APR_OS_DEFAULT,
> + pool));
> +
> + err = svn_io_file_open(&file2_h, file2, APR_READ, APR_OS_DEFAULT,
> + pool);
> +
> + if (err)
> + return svn_error_trace(
> + svn_error_compose_create(err,
> + svn_io_file_close(file1_h, pool)));
> +
> + err = svn_io_file_open(&file3_h, file3, APR_READ, APR_OS_DEFAULT,
> + pool);
> +
> + if (err)
> + return svn_error_trace(
> + svn_error_compose_create(
> + err,
> + svn_error_compose_create(svn_io_file_close(file1_h, pool),
> + svn_io_file_close(file2_h, pool))));
> +
> + /* assume TRUE, until disproved below */
> + *identical_p12 = *identical_p23 = *identical_p13 = TRUE;
> + /* We need to read as long as no error occurs, and as long as one of the
> + * flags could still change due to a read operation */
> + while (!err
> + && (*identical_p12 && !eof1 && !eof2)
> + || (*identical_p23 && !eof2 && !eof3)
> + || (*identical_p13 && !eof1 && !eof3))

../src/subversion/libsvn_subr/io.c: In function ‘contents_three_identical_p’:
../src/subversion/libsvn_subr/io.c:4163: warning: suggest parentheses around ‘&&’ within ‘||’

I think you want:

  while (!err
         && ((*identical_p12 && !eof1 && !eof2)
             || (*identical_p23 && !eof2 && !eof3)
             || (*identical_p13 && !eof1 && !eof3)))

> +struct test_file_definition_t test_file_definitions[] =
> + {
> + {"empty", "", 0},
> + {"single_a", "a", 1},
> + {"single_b", "b", 1},
> + {"hundred_a", "aaaaa", 100},
> + {"hundred_b", "bbbbb", 100},
> + {"hundred_b1", "baaaa", 100},
> + {"hundred_b2", "abaaa", 100},
> + {"hundred_b3", "aabaa", 100},
> + {"hundred_b4", "aaaba", 100},
> + {"hundred_b5", "aaaab", 100},
> + {"chunk_minus_one_a", "aaaaa", SVN__STREAM_CHUNK_SIZE - 1},
> + {"chunk_minus_one_b1", "baaaa", SVN__STREAM_CHUNK_SIZE - 1},
> + {"chunk_minus_one_b2", "abaaa", SVN__STREAM_CHUNK_SIZE - 1},
> + {"chunk_minus_one_b3", "aabaa", SVN__STREAM_CHUNK_SIZE - 1},
> + {"chunk_minus_one_b4", "aaaba", SVN__STREAM_CHUNK_SIZE - 1},
> + {"chunk_minus_one_b5", "aaaab", SVN__STREAM_CHUNK_SIZE - 1},
> + {"chunk_a", "aaaaa", SVN__STREAM_CHUNK_SIZE},
> + {"chunk_b1", "baaaa", SVN__STREAM_CHUNK_SIZE},
> + {"chunk_b2", "abaaa", SVN__STREAM_CHUNK_SIZE},
> + {"chunk_b3", "aabaa", SVN__STREAM_CHUNK_SIZE},
> + {"chunk_b4", "aaaba", SVN__STREAM_CHUNK_SIZE},
> + {"chunk_b5", "aaaab", SVN__STREAM_CHUNK_SIZE},
> + {"chunk_plus_one_a", "aaaaa", SVN__STREAM_CHUNK_SIZE + 1},
> + {"chunk_plus_one_b1", "baaaa", SVN__STREAM_CHUNK_SIZE + 1},
> + {"chunk_plus_one_b2", "abaaa", SVN__STREAM_CHUNK_SIZE + 1},
> + {"chunk_plus_one_b3", "aabaa", SVN__STREAM_CHUNK_SIZE + 1},
> + {"chunk_plus_one_b4", "aaaba", SVN__STREAM_CHUNK_SIZE + 1},
> + {"chunk_plus_one_b5", "aaaab", SVN__STREAM_CHUNK_SIZE + 1},
> + {"twochunk_minus_one_a", "aaaaa", SVN__STREAM_CHUNK_SIZE*2 - 1},
> + {"twochunk_minus_one_b1", "baaaa", SVN__STREAM_CHUNK_SIZE*2 - 1},
> + {"twochunk_minus_one_b2", "abaaa", SVN__STREAM_CHUNK_SIZE*2 - 1},
> + {"twochunk_minus_one_b3", "aabaa", SVN__STREAM_CHUNK_SIZE*2 - 1},
> + {"twochunk_minus_one_b4", "aaaba", SVN__STREAM_CHUNK_SIZE*2 - 1},
> + {"twochunk_minus_one_b5", "aaaab", SVN__STREAM_CHUNK_SIZE*2 - 1},
> + {"twochunk_a", "aaaaa", SVN__STREAM_CHUNK_SIZE*2},
> + {"twochunk_b1", "baaaa", SVN__STREAM_CHUNK_SIZE*2},
> + {"twochunk_b2", "abaaa", SVN__STREAM_CHUNK_SIZE*2},
> + {"twochunk_b3", "aabaa", SVN__STREAM_CHUNK_SIZE*2},
> + {"twochunk_b4", "aaaba", SVN__STREAM_CHUNK_SIZE*2},
> + {"twochunk_b5", "aaaab", SVN__STREAM_CHUNK_SIZE*2},
> + {"twochunk_plus_one_a", "aaaaa", SVN__STREAM_CHUNK_SIZE*2 + 1},
> + {"twochunk_plus_one_b1", "baaaa", SVN__STREAM_CHUNK_SIZE*2 + 1},
> + {"twochunk_plus_one_b2", "abaaa", SVN__STREAM_CHUNK_SIZE*2 + 1},
> + {"twochunk_plus_one_b3", "aabaa", SVN__STREAM_CHUNK_SIZE*2 + 1},
> + {"twochunk_plus_one_b4", "aaaba", SVN__STREAM_CHUNK_SIZE*2 + 1},
> + {"twochunk_plus_one_b5", "aaaab", SVN__STREAM_CHUNK_SIZE*2 + 1},
> + {0},
> + };
> +
> +/* Function to prepare a single test file */
> +
> +static svn_error_t *
> +create_test_file(struct test_file_definition_t* definition, apr_pool_t *pool, apr_pool_t *scratch_pool)
> +{
> + apr_status_t status;
> + apr_file_t *file_h;
> + apr_off_t midpos = definition->size / 2;
> + svn_error_t *err = NULL;
> + int i;
> +
> + if (definition->size < 5)
> + SVN_ERR_ASSERT(strlen(definition->data) >= definition->size);
> + else
> + SVN_ERR_ASSERT(strlen(definition->data) >= 5);
> +
> + status = apr_filepath_merge(&definition->created_path,
> + TEST_DIR,
> + definition->name,
> + 0,
> + pool);

Do you need to do that? Could you simply relative paths as is done
subversion/tests/libsvn_diff/diff-diff3-test.c.

> +
> + if (status)
> + return svn_error_wrap_apr(status, "Can't merge filename '%s'",
> + definition->name);
> +
> + SVN_ERR(svn_io_file_open(&file_h,
> + definition->created_path,
> + APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_EXCL,
> + APR_REG,
> + scratch_pool));

You need APR_OS_DEFAULT here otherwise the files get created on Unix
without read permission.

> +
> + for (i=1; i <= definition->size; i += 1)
> + {
> + char c;
> + if (i == 1)
> + c = definition->data[0];
> + else if (i < midpos)
> + c = definition->data[1];
> + else if (i == midpos)
> + c = definition->data[2];
> + else if (i < definition->size)
> + c = definition->data[3];
> + else
> + c = definition->data[4];
> +
> + status = apr_file_putc(c, file_h);
> +
> + if (status)
> + break;
> + }
> +
> + if (status)
> + err = svn_error_wrap_apr(status, "Can't merge filename '%s'",
> + definition->name);
> +
> + return svn_error_compose_create(err, svn_io_file_close(file_h, scratch_pool));
> +}
> +
> +/* Function to prepare the whole set of on-disk files to be compared. */
> +static svn_error_t *
> +create_comparison_candidates(apr_pool_t *pool)
> +{
> + apr_finfo_t finfo;
> + apr_pool_t *iterpool = svn_pool_create(pool);
> + struct test_file_definition_t *candidate;
> +
> + /* If there's already a directory named io-test-temp, delete it.
> + Doing things this way means that repositories stick around after
> + a failure for postmortem analysis, but also that tests can be
> + re-run without cleaning out the repositories created by prior
> + runs. */
> + if (apr_stat(&finfo, TEST_DIR, APR_FINFO_TYPE, pool) == APR_SUCCESS)
> + {
> + if (finfo.filetype == APR_DIR)
> + SVN_ERR(svn_io_remove_dir2(TEST_DIR, TRUE, NULL, NULL, pool));
> + else
> + return svn_error_createf(SVN_ERR_TEST_FAILED, NULL,
> + "there is already a file named '%s'", TEST_DIR);
> + }
> +
> + SVN_ERR(svn_io_dir_make(TEST_DIR, APR_OS_DEFAULT, pool));

svn_io_check_path rather than apr_stat.

Call svn_test_add_dir_cleanup() to get --cleanup to remove the dir.

> +
> +
> + for (candidate = test_file_definitions; candidate->name != NULL; candidate += 1)
> + {
> + svn_pool_clear(iterpool);
> + SVN_ERR(create_test_file(candidate, pool, iterpool));
> + }

Destroy iterpool, and in the four tests.

> +
> + return SVN_NO_ERROR;
> +}

-- 
uberSVN: Apache Subversion Made Easy
http://www.uberSVN.com
Received on 2012-06-15 19:37:30 CEST

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.