[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: returning hash from FSFS changes cache?

From: Stefan Fuhrmann <stefan.fuhrmann_at_wandisco.com>
Date: Tue, 24 Jun 2014 00:28:35 +0200

On Mon, Jun 23, 2014 at 11:41 PM, Stefan Sperling <stsp_at_elego.de> wrote:

> The FSFS changes cache returns an array representation of changed
> path data which needs to be converted back to a hash for API reasons
> by svn_fs_fs__paths_changed(). No other function accesses the
> changes cache so returning a temporary array seems odd.
> Wouldn't it make sense for the cache to return hashes directly?
>
> Isn't it better to convert from hash to array during serialization,
> which ideally happens only once, rather than constructing an array
> and then a hash from array elements every time the cache is accessed?
>

The rationale is as follows - which may or may not be correct:

* constructing an array is extremely cheap
  (even in the ruby example, it uses only ~2MB, half of it due to resize;
  adding entries is trivial) and having it as an intermediate step almost
  no extra costs.
* not every item that gets put into cache will ever get retrieved.
  Adding data to the cache is therefore the hot path (with notable
exceptions).

Looking at the (de-)serializer code, there is still room for improvement.
The array could be used directly instead of creating a temporary copy.

-- Stefan^2.

> I'm not sure how to measure the effects of this.
>
> Index: subversion/libsvn_fs_fs/cached_data.c
> ===================================================================
> --- subversion/libsvn_fs_fs/cached_data.c (revision 1604933)
> +++ subversion/libsvn_fs_fs/cached_data.c (working copy)
> @@ -2686,7 +2686,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
> }
>
> svn_error_t *
> -svn_fs_fs__get_changes(apr_array_header_t **changes,
> +svn_fs_fs__get_changes(apr_hash_t **changes,
> svn_fs_t *fs,
> svn_revnum_t rev,
> apr_pool_t *result_pool)
> @@ -3093,7 +3093,7 @@ auto_select_stream(svn_stream_t **stream,
> * FILE and wrapping FILE_STREAM. Use POOL for allocations.
> */
> static svn_error_t *
> -block_read_changes(apr_array_header_t **changes,
> +block_read_changes(apr_hash_t **changes,
> svn_fs_t *fs,
> svn_fs_fs__revision_file_t *rev_file,
> svn_fs_fs__p2l_entry_t *entry,
> @@ -3303,7 +3303,7 @@ block_read(void **result,
> break;
>
> case SVN_FS_FS__ITEM_TYPE_CHANGES:
> - SVN_ERR(block_read_changes((apr_array_header_t
> **)&item,
> + SVN_ERR(block_read_changes((apr_hash_t **)&item,
> fs, revision_file,
> entry, is_result,
> pool, iterpool));
> Index: subversion/libsvn_fs_fs/cached_data.h
> ===================================================================
> --- subversion/libsvn_fs_fs/cached_data.h (revision 1604933)
> +++ subversion/libsvn_fs_fs/cached_data.h (working copy)
> @@ -150,7 +150,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist,
> * Allocate the result in POOL.
> */
> svn_error_t *
> -svn_fs_fs__get_changes(apr_array_header_t **changes,
> +svn_fs_fs__get_changes(apr_hash_t **changes,
> svn_fs_t *fs,
> svn_revnum_t rev,
> apr_pool_t *pool);
> Index: subversion/libsvn_fs_fs/low_level.c
> ===================================================================
> --- subversion/libsvn_fs_fs/low_level.c (revision 1604933)
> +++ subversion/libsvn_fs_fs/low_level.c (working copy)
> @@ -381,7 +381,7 @@ read_change(change_t **change_p,
> }
>
> svn_error_t *
> -svn_fs_fs__read_changes(apr_array_header_t **changes,
> +svn_fs_fs__read_changes(apr_hash_t **changes,
> svn_stream_t *stream,
> apr_pool_t *result_pool,
> apr_pool_t *scratch_pool)
> @@ -389,15 +389,14 @@ svn_error_t *
> change_t *change;
> apr_pool_t *iterpool;
>
> - /* pre-allocate enough room for most change lists
> - (will be auto-expanded as necessary) */
> - *changes = apr_array_make(result_pool, 30, sizeof(change_t *));
> + *changes = apr_hash_make(result_pool);
>
> SVN_ERR(read_change(&change, stream, result_pool, scratch_pool));
> iterpool = svn_pool_create(scratch_pool);
> while (change)
> {
> - APR_ARRAY_PUSH(*changes, change_t*) = change;
> + apr_hash_set(*changes, change->path.data, change->path.len,
> + &change->info);
> SVN_ERR(read_change(&change, stream, result_pool, iterpool));
> svn_pool_clear(iterpool);
> }
> Index: subversion/libsvn_fs_fs/low_level.h
> ===================================================================
> --- subversion/libsvn_fs_fs/low_level.h (revision 1604933)
> +++ subversion/libsvn_fs_fs/low_level.h (working copy)
> @@ -86,7 +86,7 @@ svn_fs_fs__unparse_footer(apr_off_t l2p_offset,
> /* Read all the changes from STREAM and store them in *CHANGES,
> allocated in RESULT_POOL. Do temporary allocations in SCRATCH_POOL. */
> svn_error_t *
> -svn_fs_fs__read_changes(apr_array_header_t **changes,
> +svn_fs_fs__read_changes(apr_hash_t **changes,
> svn_stream_t *stream,
> apr_pool_t *result_pool,
> apr_pool_t *scratch_pool);
> Index: subversion/libsvn_fs_fs/temp_serializer.c
> ===================================================================
> --- subversion/libsvn_fs_fs/temp_serializer.c (revision 1604933)
> +++ subversion/libsvn_fs_fs/temp_serializer.c (working copy)
> @@ -1125,7 +1125,7 @@ deserialize_change(void *buffer, change_t **change
> }
>
> /* Auxiliary structure representing the content of a change_t array.
> - This structure is much easier to (de-)serialize than an APR array.
> + This structure is much easier to (de-)serialize than an APR hash.
> */
> typedef struct changes_data_t
> {
> @@ -1142,20 +1142,30 @@ svn_fs_fs__serialize_changes(void **data,
> void *in,
> apr_pool_t *pool)
> {
> - apr_array_header_t *array = in;
> + apr_hash_t *hash = in;
> changes_data_t changes;
> svn_temp_serializer__context_t *context;
> svn_stringbuf_t *serialized;
> int i;
> + apr_hash_index_t *hi;
>
> /* initialize our auxiliary data structure */
> - changes.count = array->nelts;
> + changes.count = apr_hash_count(hash);
> changes.changes = apr_palloc(pool, sizeof(change_t*) * changes.count);
>
> - /* populate it with the array elements */
> - for (i = 0; i < changes.count; ++i)
> - changes.changes[i] = APR_ARRAY_IDX(array, i, change_t*);
> + /* populate it with the hash elements */
> + i = 0;
> + for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi), ++i)
> + {
> + const char *path = svn__apr_hash_index_key(hi);
> + svn_fs_path_change2_t *info = svn__apr_hash_index_val(hi);
>
> + changes.changes[i] = apr_palloc(pool, sizeof (change_t));
> + changes.changes[i]->path.data = path;
> + changes.changes[i]->path.len = strlen(path);
> + changes.changes[i]->info = *info;
> + }
> +
> /* serialize it and all its elements */
> context = svn_temp_serializer__init(&changes,
> sizeof(changes),
> @@ -1188,8 +1198,7 @@ svn_fs_fs__deserialize_changes(void **out,
> {
> int i;
> changes_data_t *changes = (changes_data_t *)data;
> - apr_array_header_t *array = apr_array_make(pool, changes->count,
> - sizeof(change_t *));
> + apr_hash_t *hash = apr_hash_make(pool);
>
> /* de-serialize our auxiliary data structure */
> svn_temp_deserializer__resolve(changes, (void**)&changes->changes);
> @@ -1197,13 +1206,17 @@ svn_fs_fs__deserialize_changes(void **out,
> /* de-serialize each entry and add it to the array */
> for (i = 0; i < changes->count; ++i)
> {
> + change_t *change;
> +
> deserialize_change(changes->changes,
> (change_t **)&changes->changes[i]);
> - APR_ARRAY_PUSH(array, change_t *) = changes->changes[i];
> + change = changes->changes[i];
> + apr_hash_set(hash, change->path.data, change->path.len,
> + &change->info);
> }
>
> /* done */
> - *out = array;
> + *out = hash;
>
> return SVN_NO_ERROR;
> }
> Index: subversion/libsvn_fs_fs/transaction.c
> ===================================================================
> --- subversion/libsvn_fs_fs/transaction.c (revision 1604933)
> +++ subversion/libsvn_fs_fs/transaction.c (working copy)
> @@ -881,23 +881,8 @@ svn_fs_fs__paths_changed(apr_hash_t **changed_path
> svn_revnum_t rev,
> apr_pool_t *pool)
> {
> - apr_hash_t *changed_paths;
> - apr_array_header_t *changes;
> - int i;
> -
> - SVN_ERR(svn_fs_fs__get_changes(&changes, fs, rev, pool));
> -
> - changed_paths = svn_hash__make(pool);
> - for (i = 0; i < changes->nelts; ++i)
> - {
> - change_t *change = APR_ARRAY_IDX(changes, i, change_t *);
> - apr_hash_set(changed_paths, change->path.data, change->path.len,
> - &change->info);
> - }
> -
> - *changed_paths_p = changed_paths;
> -
> - return SVN_NO_ERROR;
> + return svn_error_trace(svn_fs_fs__get_changes(changed_paths_p, fs, rev,
> + pool));
> }
>
> /* Copy a revision node-rev SRC into the current transaction TXN_ID in
>
Received on 2014-06-24 00:29:01 CEST

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.