> -----Original Message-----
> From: stsp_at_apache.org [mailto:stsp_at_apache.org]
> Sent: vrijdag 20 juni 2014 17:24
> To: commits_at_subversion.apache.org
> Subject: svn commit: r1604188 - in
> /subversion/trunk/subversion/libsvn_fs_fs: low_level.c low_level.h
> transaction.c
>
> Author: stsp
> Date: Fri Jun 20 15:24:02 2014
> New Revision: 1604188
>
> URL: http://svn.apache.org/r1604188
> Log:
> Improve memory usage while fetching changed paths data from a
> transaction
> by eliminating a full in-memory copy of changed path data.
>
> * subversion/libsvn_fs_fs/low_level.c
> (svn_fs_fs__read_changes_incrementally): New FSFS-internal API which
> invokes a callback for each changed path in a transaction.
>
> * subversion/libsvn_fs_fs/low_level.h
> (svn_fs_fs__read_changes_incrementally): Declare.
>
> * subversion/libsvn_fs_fs/transaction.c
> (svn_fs_fs__txn_changes_fetch): Use
> svn_fs_fs__read_changes_incrementally(),
> instead of reading a changes_t list into memory and looping over that list.
>
> Modified:
> subversion/trunk/subversion/libsvn_fs_fs/low_level.c
> subversion/trunk/subversion/libsvn_fs_fs/low_level.h
> subversion/trunk/subversion/libsvn_fs_fs/transaction.c
>
> Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.c
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/lo
> w_level.c?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/low_level.c (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/low_level.c Fri Jun 20
> 15:24:02 2014
> @@ -365,6 +365,31 @@ svn_fs_fs__read_changes(apr_array_header
> return SVN_NO_ERROR;
> }
>
> +svn_error_t *
> +svn_fs_fs__read_changes_incrementally(svn_stream_t *stream,
> + svn_fs_fs__change_receiver_t
> + change_receiver,
> + void *change_receiver_baton,
> + apr_pool_t *pool)
> +{
> + change_t *change;
> + apr_pool_t *iterpool;
> +
> + iterpool = svn_pool_create(pool);
> + do
> + {
> + svn_pool_clear(iterpool);
> +
> + SVN_ERR(read_change(&change, stream, iterpool));
> + if (change)
> + SVN_ERR(change_receiver(change_receiver_baton, change, iterpool));
> + }
> + while (change);
> + svn_pool_destroy(iterpool);
> +
> + return SVN_NO_ERROR;
> +}
> +
> /* Write a single change entry, path PATH, change CHANGE, and copyfrom
> string COPYFROM, into the file specified by FILE. Only include the
> node kind field if INCLUDE_NODE_KIND is true. Only include the
>
> Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.h
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/lo
> w_level.h?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/low_level.h (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/low_level.h Fri Jun 20
> 15:24:02 2014
> @@ -68,6 +68,20 @@ svn_fs_fs__read_changes(apr_array_header
> svn_stream_t *stream,
> apr_pool_t *pool);
>
> +typedef svn_error_t *(*svn_fs_fs__change_receiver_t)(
> + void *baton,
> + change_t *change,
> + apr_pool_t *scratch_pool);
> +
> +/* Read all the changes from STREAM and invoke CHANGE_RECEIVER on
> each change.
> + Do all allocations in POOL. */
> +svn_error_t *
> +svn_fs_fs__read_changes_incrementally(svn_stream_t *stream,
> + svn_fs_fs__change_receiver_t
> + change_receiver,
> + void *change_receiver_baton,
> + apr_pool_t *scratch_pool);
> +
> /* Write the changed path info from CHANGES in filesystem FS to the
> output stream STREAM. You may call this function multiple time on
> the same stream but the last call should set TERMINATE_LIST to write
>
> Modified: subversion/trunk/subversion/libsvn_fs_fs/transaction.c
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/tra
> nsaction.c?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/transaction.c (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Fri Jun 20
> 15:24:02 2014
> @@ -777,84 +777,72 @@ fold_change(apr_hash_t *changed_paths,
> return SVN_NO_ERROR;
> }
>
> -/* Examine all the changed path entries in CHANGES and store them in
> +/* An implementation of svn_fs_fs__change_receiver_t.
> + Examine all the changed path entries in CHANGES and store them in
> *CHANGED_PATHS. Folding is done to remove redundant or unnecessary
> data. Do all allocations in POOL. */
> static svn_error_t *
> -process_changes(apr_hash_t *changed_paths,
> - apr_array_header_t *changes,
> - apr_pool_t *pool)
> -{
> - apr_pool_t *iterpool = svn_pool_create(pool);
> - int i;
> -
> - /* Read in the changes one by one, folding them into our local hash
> - as necessary. */
> -
> - for (i = 0; i < changes->nelts; ++i)
> - {
> - /* The ITERPOOL will be cleared at the end of this function
> - * since it is only used rarely and for a single hash iterator.
> - */
> - change_t *change = APR_ARRAY_IDX(changes, i, change_t *);
> -
> - SVN_ERR(fold_change(changed_paths, change));
> -
> - /* Now, if our change was a deletion or replacement, we have to
> - blow away any changes thus far on paths that are (or, were)
> - children of this path.
> - ### i won't bother with another iteration pool here -- at
> - most we talking about a few extra dups of paths into what
> - is already a temporary subpool.
> +process_changes(void *baton,
> + change_t *change,
> + apr_pool_t *scratch_pool)
> +{
> + apr_hash_t *changed_paths = baton;
> +
> + SVN_ERR(fold_change(changed_paths, change));
> +
> + /* Now, if our change was a deletion or replacement, we have to
> + blow away any changes thus far on paths that are (or, were)
> + children of this path.
> + ### i won't bother with another iteration pool here -- at
> + most we talking about a few extra dups of paths into what
> + is already a temporary subpool.
> + */
> +
> + if ((change->info.change_kind == svn_fs_path_change_delete)
> + || (change->info.change_kind == svn_fs_path_change_replace))
> + {
> + apr_hash_index_t *hi;
> + apr_pool_t *iterpool;
> +
> + /* a potential child path must contain at least 2 more chars
> + (the path separator plus at least one char for the name).
> + Also, we should not assume that all paths have been normalized
> + i.e. some might have trailing path separators.
> */
> + apr_ssize_t path_len = change->path.len;
> + apr_ssize_t min_child_len = path_len == 0
> + ? 1
> + : change->path.data[path_len-1] == '/'
> + ? path_len + 1
> + : path_len + 2;
> +
> + /* CAUTION: This is the inner loop of an O(n^2) algorithm.
> + The number of changes to process may be >> 1000.
> + Therefore, keep the inner loop as tight as possible.
> + */
> + iterpool = svn_pool_create(scratch_pool);
> + for (hi = apr_hash_first(scratch_pool, changed_paths);
> + hi;
> + hi = apr_hash_next(hi))
> + {
> + /* KEY is the path. */
> + const void *path;
> + apr_ssize_t klen;
> + apr_hash_this(hi, &path, &klen, NULL);
>
> - if ((change->info.change_kind == svn_fs_path_change_delete)
> - || (change->info.change_kind == svn_fs_path_change_replace))
> - {
> - apr_hash_index_t *hi;
> -
> - /* a potential child path must contain at least 2 more chars
> - (the path separator plus at least one char for the name).
> - Also, we should not assume that all paths have been normalized
> - i.e. some might have trailing path separators.
> - */
> - apr_ssize_t path_len = change->path.len;
> - apr_ssize_t min_child_len = path_len == 0
> - ? 1
> - : change->path.data[path_len-1] == '/'
> - ? path_len + 1
> - : path_len + 2;
> -
> - /* CAUTION: This is the inner loop of an O(n^2) algorithm.
> - The number of changes to process may be >> 1000.
> - Therefore, keep the inner loop as tight as possible.
> - */
> - for (hi = apr_hash_first(iterpool, changed_paths);
> - hi;
> - hi = apr_hash_next(hi))
> - {
> - /* KEY is the path. */
> - const void *path;
> - apr_ssize_t klen;
> - apr_hash_this(hi, &path, &klen, NULL);
> -
> - /* If we come across a child of our path, remove it.
> - Call svn_dirent_is_child only if there is a chance that
> - this is actually a sub-path.
> - */
> - if ( klen >= min_child_len
> - && svn_dirent_is_child(change->path.data, path, iterpool))
> - apr_hash_set(changed_paths, path, klen, NULL);
> - }
> -
> - /* Clear the per-iteration subpool. */
> svn_pool_clear(iterpool);
> +
> + /* If we come across a child of our path, remove it.
> + Call svn_dirent_is_child only if there is a chance that
> + this is actually a sub-path.
> + */
> + if ( klen >= min_child_len
> + && svn_dirent_is_child(change->path.data, path, iterpool))
> + apr_hash_set(changed_paths, path, klen, NULL);
This doesn't look like an on-disk path, so it should never use the dirent apis. It should use either a relpath or an fspath api.
And you can just pass a NULL pool to the is_child functions (except for the URI variant), so that might even completely avoid using an iterpool.
> }
> + svn_pool_destroy(iterpool);
> }
>
> - /* Destroy the per-iteration subpool. */
> - svn_pool_destroy(iterpool);
> -
> return SVN_NO_ERROR;
> }
>
> @@ -866,7 +854,6 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t
> {
> apr_file_t *file;
> apr_hash_t *changed_paths = apr_hash_make(pool);
> - apr_array_header_t *changes;
> apr_pool_t *scratch_pool = svn_pool_create(pool);
>
> SVN_ERR(svn_io_file_open(&file,
> @@ -874,11 +861,11 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t
> APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
> scratch_pool));
>
> - SVN_ERR(svn_fs_fs__read_changes(&changes,
> + SVN_ERR(svn_fs_fs__read_changes_incrementally(
> svn_stream_from_aprfile2(file, TRUE,
> scratch_pool),
> + process_changes, changed_paths,
> scratch_pool));
> - SVN_ERR(process_changes(changed_paths, changes, pool));
> svn_pool_destroy(scratch_pool);
If the incrementally / streamy function uses a proper iterpool, the subpool called scratch_pool might not be necessary in this block.
(Might still be necessary for usages outside the diff hunks)
Bert
>
> *changed_paths_p = changed_paths;
>
Received on 2014-06-20 22:10:03 CEST