[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

[PATCH] Optimize svn log --limit

From: D.J. Heap <dj_at_shadyvale.net>
Date: 2005-05-11 02:45:20 CEST

This patch optimizes 'svn log --limit' using the method proposed by Greg
Hudson on the previous 'Quick Analysis' thread. For me, on a 10k revision
repo a 'svn log --limit 10 svn://localhost/svn-repo/trunk' went from around
10 seconds to less than a second. It passes ra_local tests on Window and
Linux; I would appreciate a thorough review.

However, I could not think of a way to optimize the forward-thru-history use
case very much since there is no reasonable way to read through history
forward. That is, 'svn log --limit 10 -r 0:HEAD
svn://localhost/svn-repo/trunk'.

Can someone else think of a way to optimize that case?

Log:
Optimize 'svn log --limit' performance by reading histories for
paths together as proposed by Greg Hudson.

* subversion/libsvn_repos/log.c
   (path_history_root): New helper function to get revision roots.
   (get_history): New helper to read history for a path.
   (check_history): New helper to see if history needs to be read for
   a path.
   (svn_repos_get_logs3): Reworked to read histories for the target paths
   in step and to pay attention to the limit parameter earlier.

Index: subversion/libsvn_repos/log.c
===================================================================
--- subversion/libsvn_repos/log.c (revision 14678)
+++ subversion/libsvn_repos/log.c (working copy)
@@ -187,22 +187,128 @@
   return SVN_NO_ERROR;
 }
 
+/* This is used by svn_repos_get_logs3 to get a revision
+ root for a path. */
+static svn_error_t *
+path_history_root (svn_fs_root_t **root,
+ svn_fs_t *fs,
+ const char* path,
+ svn_revnum_t rev,
+ svn_repos_authz_func_t authz_read_func,
+ void* authz_read_baton,
+ apr_pool_t* pool)
+{
+ /* Get a revision root for REV. */
+ SVN_ERR (svn_fs_revision_root (root, fs, rev, pool));
 
+ if (authz_read_func)
+ {
+ svn_boolean_t readable;
+ SVN_ERR (authz_read_func (&readable, *root, path,
+ authz_read_baton, pool));
+ if (! readable)
+ return svn_error_create (SVN_ERR_AUTHZ_UNREADABLE, NULL, NULL);
+ }
+ return SVN_NO_ERROR;
+}
 
-/* Implements svn_repos_history_func_t interface. Accumulate history
- revisions in the apr_array_header_t * which is the BATON. */
+/* This is used by svn_repos_get_logs3 to keep track of multiple
+ path history information while working through history. */
+struct path_info
+{
+ svn_fs_root_t *root;
+ const char *path;
+ svn_fs_history_t *hist;
+ apr_pool_t *newpool;
+ apr_pool_t *oldpool;
+ svn_revnum_t history_rev;
+};
+
+/* This helper to svn_repos_get_logs3 is used to get the path's
+ history. */
 static svn_error_t *
-history_to_revs_array (void *baton,
- const char *path,
- svn_revnum_t revision,
- apr_pool_t *pool)
+get_history (struct path_info *info,
+ svn_fs_t *fs,
+ svn_boolean_t cross_copies,
+ svn_repos_authz_func_t authz_read_func,
+ void *authz_read_baton,
+ svn_revnum_t start)
 {
- apr_array_header_t *revs_array = baton;
- APR_ARRAY_PUSH (revs_array, svn_revnum_t) = revision;
+ apr_pool_t* temppool;
+
+ SVN_ERR (svn_fs_history_prev (&info->hist, info->hist, cross_copies,
+ info->newpool));
+ if (! info->hist)
+ return SVN_NO_ERROR;
+
+ /* Fetch the location information for this history step. */
+ SVN_ERR (svn_fs_history_location (&info->path, &info->history_rev,
+ info->hist, info->newpool));
+
+ /* Is the history item readable? If not, done with path. */
+ if (authz_read_func)
+ {
+ svn_boolean_t readable;
+ svn_fs_root_t *history_root;
+ SVN_ERR (svn_fs_revision_root (&history_root, fs,
+ info->history_rev,
+ info->newpool));
+ SVN_ERR (authz_read_func (&readable, history_root,
+ info->path,
+ authz_read_baton,
+ info->newpool));
+ if (! readable)
+ info->hist = NULL;
+ }
+
+ /* Now we can clear the old pool. */
+ temppool = info->oldpool;
+ info->oldpool = info->newpool;
+ svn_pool_clear (temppool);
+ info->newpool = temppool;
+
+ /* If this history item predates our START revision then
+ don't fetch any more for this path. */
+ if (info->history_rev < start)
+ info->hist = NULL;
   return SVN_NO_ERROR;
 }
 
+/* This helper to svn_repos_get_logs3 is used to advance the path's
+ history to the next one *if* the revision it is at is equal to
+ or greater than current. The changed flag is only touched if
+ this path has history in the current rev. */
+static svn_error_t *
+check_history (svn_boolean_t *changed,
+ struct path_info *info,
+ svn_fs_t *fs,
+ svn_revnum_t current,
+ svn_boolean_t cross_copies,
+ svn_repos_authz_func_t authz_read_func,
+ void *authz_read_baton,
+ svn_revnum_t start)
+{
+ /* If we're already done with histories for this path,
+ don't try to fetch any more. */
+ if (! info->hist)
+ return SVN_NO_ERROR;
 
+ /* If the last rev we got for this path is less than current,
+ then just return and don't fetch history for this path.
+ The caller will get to this rev eventually or else reach
+ the limit. */
+ if (info->history_rev < current)
+ return SVN_NO_ERROR;
+
+ /* If the last rev we got for this path is equal to current
+ then set *changed to true and get the next history
+ rev where this path was changed. */
+ *changed = TRUE;
+ SVN_ERR (get_history (info, fs, cross_copies, authz_read_func,
+ authz_read_baton, start));
+ return SVN_NO_ERROR;
+}
+
 svn_error_t *
 svn_repos_get_logs3 (svn_repos_t *repos,
                      const apr_array_header_t *paths,
@@ -221,7 +327,9 @@
   apr_pool_t *subpool = svn_pool_create (pool);
   svn_fs_t *fs = repos->fs;
   apr_array_header_t *revs = NULL;
- int count = 0;
+ svn_revnum_t hist_end = end;
+ svn_revnum_t hist_start = start;
+ int i;
 
   SVN_ERR (svn_fs_youngest_rev (&head, fs, pool));
 
@@ -241,6 +349,13 @@
       (SVN_ERR_FS_NO_SUCH_REVISION, 0,
        _("No such revision %ld"), end);
 
+ /* Get an ordered copy of the start and end revisions. */
+ if (start > end)
+ {
+ hist_start = end;
+ hist_end = start;
+ }
+
   /* If paths were specified, then we only really care about revisions
      in which those paths were changed. So we ask the filesystem for
      all the revisions in which any of the paths was changed.
@@ -258,107 +373,102 @@
            && (! svn_path_is_empty (APR_ARRAY_IDX (paths, 0, const char *))))
           || (paths->nelts > 1)))
     {
- /* If there is only one path, we'll just get its sorted changed
- revisions. Else, we'll be combining all our findings into a
- hash (to remove duplicates) and then generating a sorted
- array from that hash. */
- if (paths->nelts == 1)
+ svn_revnum_t current;
+ apr_array_header_t *histories;
+ svn_boolean_t any_histories_left = TRUE;
+
+ revs = apr_array_make (pool, limit ? limit : 10,
+ sizeof (svn_revnum_t));
+ histories = apr_array_make (subpool, paths->nelts,
+ sizeof(struct path_info *));
+ /* Create a history object for each path so we can walk through
+ them all at the same time until we have all changes or limit
+ is reached.
+
+ There is some pool fun going on due to the fact that we have
+ to hold on to the old pool with the history before we can
+ get the next history. */
+ for (i = 0; i < paths->nelts; i++)
         {
- /* Get the changed revisions for this path. */
- const char *this_path = APR_ARRAY_IDX (paths, 0, const char *);
- revs = apr_array_make (pool, 64, sizeof (svn_revnum_t));
- SVN_ERR (svn_repos_history2 (fs, this_path,
- history_to_revs_array, revs,
- authz_read_func, authz_read_baton,
- start, end,
- strict_node_history ? FALSE : TRUE,
- pool));
+ const char *this_path = APR_ARRAY_IDX (paths, i, const char *);
+ svn_fs_root_t *root;
+ struct path_info *info = apr_palloc (subpool,
+ sizeof(struct path_info));
+
+ SVN_ERR (path_history_root (&root, fs, this_path, hist_end,
+ authz_read_func, authz_read_baton,
+ pool));
+ info->root = root;
+ info->path = this_path;
+ info->oldpool = svn_pool_create (subpool);
+ info->newpool = svn_pool_create (subpool);
+ SVN_ERR (svn_fs_node_history (&info->hist, info->root, info->path,
+ info->oldpool));
+ SVN_ERR (get_history (info, fs,
+ strict_node_history ? FALSE : TRUE,
+ authz_read_func, authz_read_baton,
+ hist_start));
+ *((struct path_info **) apr_array_push (histories)) = info;
         }
- else
+
+ /* Loop through all the revisions in the range and add any
+ where a path was changed to the array. */
+ for (current = hist_end;
+ current >= hist_start && any_histories_left;
+ --current)
         {
- int i;
- apr_hash_t *all_revs = apr_hash_make (pool);
- apr_hash_index_t *hi;
-
- /* And the search is on... */
- for (i = 0; i < paths->nelts; i++)
+ svn_boolean_t changed = FALSE;
+ any_histories_left = FALSE;
+ for (i = 0; i < histories->nelts; i++)
             {
- const char *this_path = APR_ARRAY_IDX (paths, i, const char *);
- apr_array_header_t *changed_revs =
- apr_array_make (pool, 64, sizeof (svn_revnum_t));
- int j;
+ struct path_info *info = APR_ARRAY_IDX (histories, i,
+ struct path_info *);
 
- /* Get the changed revisions for this path, and add them to
- the hash (this will eliminate duplicates). */
- SVN_ERR (svn_repos_history2 (fs, this_path,
- history_to_revs_array, changed_revs,
- authz_read_func, authz_read_baton,
- start, end,
- strict_node_history ? FALSE : TRUE,
- pool));
- for (j = 0; j < changed_revs->nelts; j++)
- {
- /* We're re-using the memory allocated for the array
- here in order to avoid more allocations. */
- svn_revnum_t *chrev =
- (((svn_revnum_t *)(changed_revs)->elts) + j);
- apr_hash_set (all_revs, chrev, sizeof (chrev), (void *)1);
- }
+ /* Check history for this path in current rev. */
+ SVN_ERR (check_history (&changed, info, fs, current,
+ strict_node_history ? FALSE : TRUE,
+ authz_read_func, authz_read_baton,
+ hist_start));
+ if ( info->hist != NULL )
+ any_histories_left = TRUE;
             }
 
- /* Now that we have a hash of all the revisions in which any of
- our paths changed, we can convert that back into a sorted
- array. */
- revs = apr_array_make (pool, apr_hash_count (all_revs),
- sizeof (svn_revnum_t));
- for (hi = apr_hash_first (pool, all_revs);
- hi;
- hi = apr_hash_next (hi))
- {
- const void *key;
- svn_revnum_t revision;
-
- apr_hash_this (hi, &key, NULL, NULL);
- revision = *((const svn_revnum_t *)key);
- (*((svn_revnum_t *) apr_array_push (revs))) = revision;
- }
+ /* If any of the paths changed in this rev then add it. */
+ if (changed)
+ *(svn_revnum_t*) apr_array_push (revs) = current;
 
- /* Now sort the array */
- qsort ((revs)->elts, (revs)->nelts, (revs)->elt_size,
- svn_sort_compare_revisions);
+ /* See if we've got enough yet -- we can only stop early if
+ they wanted history in normal order and specified a limit. */
+ if (limit && start > end && revs->nelts >= limit)
+ break;
         }
-
- /* If no revisions were found for these entries, we have nothing
- to show. Just return now before we break a sweat. */
- if (! (revs && revs->nelts))
- return SVN_NO_ERROR;
     }
+ else
+ {
+ /* Build a rev list with all the revs in the range specified. */
+ int count = hist_end - hist_start + 1;
+ if ( limit && start > end && count > limit )
+ count = limit;
+ revs = apr_array_make (pool, count, sizeof (svn_revnum_t));
+ for (i = 0; i < count; ++i)
+ *(svn_revnum_t*) apr_array_push (revs) = hist_end - i;
+ }
 
- for (this_rev = start;
- ((start >= end) ? (this_rev >= end) : (this_rev <= end));
- ((start >= end) ? this_rev-- : this_rev++))
+ /* Work loop for processing the revisions we found. */
+ for (i = 0; i < revs->nelts; ++i)
     {
       svn_string_t *author, *date, *message;
       apr_hash_t *r_props, *changed_paths = NULL;
 
       svn_pool_clear (subpool);
 
- /* If we have a list of revs for use, check to make sure this is
- one of them. */
- if (revs)
- {
- int i, matched = 0;
- for (i = 0; ((i < revs->nelts) && (! matched)); i++)
- {
- if (this_rev == ((svn_revnum_t *)(revs->elts))[i])
- matched = 1;
- }
+ /* Reverse the order we read the list of revs if needed. */
+ if ( start > end )
+ this_rev = APR_ARRAY_IDX (revs, i, svn_revnum_t);
+ else
+ this_rev = APR_ARRAY_IDX (revs, revs->nelts - i - 1, svn_revnum_t);
 
- if (! matched)
- continue;
- }
-
- SVN_ERR (svn_fs_revision_proplist (&r_props, fs, this_rev, pool));
+ SVN_ERR (svn_fs_revision_proplist (&r_props, fs, this_rev, subpool));
       author = apr_hash_get (r_props, SVN_PROP_REVISION_AUTHOR,
                              APR_HASH_KEY_STRING);
       date = apr_hash_get (r_props, SVN_PROP_REVISION_DATE,
@@ -416,7 +526,9 @@
                             message ? message->data : NULL,
                             subpool));
 
- if (++count == limit)
+ /* We still need to check limit here in case range was specified in
+ forward time order and we couldn't optimize as well. */
+ if ( limit && i >= limit )
         break;
     }
 

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Wed May 11 02:46:27 2005

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.