[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: [PATCH] Optimize svn log --limit

From: Peter N. Lundblad <peter_at_famlundblad.se>
Date: 2005-05-12 21:57:01 CEST

On Wed, 11 May 2005, D.J. Heap wrote:

> New version.
>
New complaints:-) Mostly style stuff, but one security problem.

> Index: subversion/libsvn_repos/log.c
> ===================================================================
> --- subversion/libsvn_repos/log.c (revision 14699)
> +++ subversion/libsvn_repos/log.c (working copy)

> +/* This helper to svn_repos_get_logs3 is used to get the path's
> + history. */
> static svn_error_t *
> -history_to_revs_array (void *baton,
> - const char *path,
> - svn_revnum_t revision,
> - apr_pool_t *pool)
> +get_history (struct path_info *info,
> + svn_fs_t *fs,
> + svn_boolean_t cross_copies,
> + svn_repos_authz_func_t authz_read_func,
> + void *authz_read_baton,
> + svn_revnum_t start)
> {
> - apr_array_header_t *revs_array = baton;
> - APR_ARRAY_PUSH (revs_array, svn_revnum_t) = revision;
> + apr_pool_t *temppool;
> +
> + SVN_ERR (svn_fs_history_prev (&info->hist, info->hist, cross_copies,
> + info->newpool));
> + if (! info->hist)
> + return SVN_NO_ERROR;
> +
> + /* Fetch the location information for this history step. */
> + SVN_ERR (svn_fs_history_location (&info->path, &info->history_rev,
> + info->hist, info->newpool));
> +
> + /* Is the history item readable? If not, done with path. */
> + if (authz_read_func)
> + {
> + svn_boolean_t readable;
> + svn_fs_root_t *history_root;
> + SVN_ERR (svn_fs_revision_root (&history_root, fs,
> + info->history_rev,
> + info->newpool));

Indentation.

> +/* Helper to find the next interesting history revision. */
> +static svn_revnum_t
> +next_history_rev ( apr_array_header_t *histories )

Spacing.

> +/* Helper function used by svn_repos_get_logs3 to send history
> + info to the caller's callback. */
> +static svn_error_t *
> +send_change_rev (svn_revnum_t rev,
> + svn_fs_t *fs,
> + svn_boolean_t discover_changed_paths,
> + svn_repos_authz_func_t authz_read_func,
> + void *authz_read_baton,
> + svn_log_message_receiver_t receiver,
> + void *receiver_baton,
> + apr_pool_t *pool)
> +{
> + svn_string_t *author, *date, *message;
> + apr_hash_t *r_props, *changed_paths = NULL;
> +
> + SVN_ERR (svn_fs_revision_proplist (&r_props, fs, rev, pool));
> + author = apr_hash_get (r_props, SVN_PROP_REVISION_AUTHOR,
> + APR_HASH_KEY_STRING);
> + date = apr_hash_get (r_props, SVN_PROP_REVISION_DATE,
> + APR_HASH_KEY_STRING);
> + message = apr_hash_get (r_props, SVN_PROP_REVISION_LOG,
> + APR_HASH_KEY_STRING);
> +
> + /* Discover changed paths if the user requested them
> + or if we need to check that they are readable. */
> + if ((rev > 0)
> + && (authz_read_func || discover_changed_paths))
> + {
> + svn_fs_root_t *newroot;
> + svn_error_t *patherr;
> +
> + SVN_ERR (svn_fs_revision_root (&newroot, fs, rev, pool));
> + patherr = detect_changed (&changed_paths,
> + newroot, fs,
> + authz_read_func, authz_read_baton,
> + pool);
> +
> + if (patherr
> + && patherr->apr_err == SVN_ERR_AUTHZ_UNREADABLE)
> + {
> + /* All changed-paths are unreadable, so clear all fields. */
> + svn_error_clear (patherr);
> + changed_paths = NULL;
> + author = NULL;
> + date = NULL;
> + message = NULL;
> + }
> + else if (patherr
> + && patherr->apr_err == SVN_ERR_AUTHZ_PARTIALLY_READABLE)
> + {
> + /* At least one changed-path was unreadable, so omit the
> + log message. (The unreadable paths are already
> + missing from the hash.) */

Indentation.

> + svn_error_clear (patherr);
> + message = NULL;
> + }
> + else if (patherr)
> + return patherr;
> +
> + /* It may be the case that an authz func was passed in, but
> + the user still doesn't want to see any changed-paths. */

Indentation.

> @@ -241,6 +445,16 @@
> (SVN_ERR_FS_NO_SUCH_REVISION, 0,
> _("No such revision %ld"), end);
>
> + /* Get an ordered copy of the start and end revisions and allocate
> + a revision list buffer if they want history in forward order. */
> + if (start > end)
> + {
> + hist_start = end;
> + hist_end = start;
> + }
> + else
> + revs = apr_array_make (pool, limit ? limit : 10, sizeof (svn_revnum_t));
> +

Ooops! Opens a DOS attack. Now one can quickly ask the server to
allocate an arbitrary amount of memory. Set this to a small constant
value instead. (YOu can get it to allocate much memory, but hopefully
we've fixed the schema before anyone has millions of revisions:-).

Maybe you should add a comment that it's important to use pool not
subpool here and why. It's easy for someone to "fix" this

...
> + /* Loop through all the revisions in the range and add any
> + where a path was changed to the array, or if they wanted
> + history in reverse order just send it to them right away. */
> + for (current = hist_end;
> + current >= hist_start && any_histories_left;
> + current = next_history_rev (histories) )

Spacing.

> + /* If any of the paths changed in this rev then add or send it. */
> + if (changed)
> + {

Indentation.

> + /* If they wanted it in reverse order we can send it completely
> + streamily right now. */
> + if (start > end)
> + {
> + SVN_ERR (send_change_rev (current, fs, discover_changed_paths,
> + authz_read_func, authz_read_baton,
> + receiver, receiver_baton, subpool));
> + if (limit && ++sent_count > limit)
> + break;
> + }
> + else
> + {
> + /* They wanted it in forward order, so we have to buffer up
> + a list of revs and process it later. */

Indentation.

> + *(svn_revnum_t*) apr_array_push (revs) = current;
> + }
> + }
> }
> -
> - /* If no revisions were found for these entries, we have nothing
> - to show. Just return now before we break a sweat. */
> - if (! (revs && revs->nelts))
> - return SVN_NO_ERROR;
> }
> -
> - for (this_rev = start;
> - ((start >= end) ? (this_rev >= end) : (this_rev <= end));
> - ((start >= end) ? this_rev-- : this_rev++))
> + else
> {
> - svn_string_t *author, *date, *message;
> - apr_hash_t *r_props, *changed_paths = NULL;
> -
> - svn_pool_clear (subpool);
> -
> - /* If we have a list of revs for use, check to make sure this is
> - one of them. */
> - if (revs)
> + /* They want history for the root path, so every rev has a change. */
> + int count = hist_end - hist_start + 1;
> + if (limit && start > end && count > limit)
> + count = limit;
> + for (i = 0; i < count; ++i)
> {
> - int i, matched = 0;
> - for (i = 0; ((i < revs->nelts) && (! matched)); i++)
> + /* If they wanted it in reverse order we can send it completely
> + streamily right now. */
> + if ( start > end )
> + SVN_ERR (send_change_rev (hist_end - i, fs,
> + discover_changed_paths,
> + authz_read_func, authz_read_baton,
> + receiver, receiver_baton, subpool));
> + else
> {
> - if (this_rev == ((svn_revnum_t *)(revs->elts))[i])
> - matched = 1;
> + /* They wanted it in forward order, so we have to buffer up
> + a list of revs and process it later. */
> + *(svn_revnum_t*) apr_array_push (revs) = hist_end - i;

Hmmm... Why not send the revisions right away in this case? We know
we'll send every revision, so we cnn just calculate where to start
from the limit parameter, can't we?

> }
> -
> - if (! matched)
> - continue;
> }
> + }
>
...
> + if (revs)
> + {
> + /* Work loop for processing the revisions we found since they wanted
> + history in forward order. */

And here we go nit-picking again:-) Indentation.

> + for (i = 0; i < revs->nelts; ++i)
> {
> - svn_fs_root_t *newroot;
> - svn_error_t *patherr;
> + svn_pool_clear (subpool);
>
> - SVN_ERR (svn_fs_revision_root (&newroot, fs, this_rev, subpool));
> - patherr = detect_changed (&changed_paths,
> - newroot, fs,
> + SVN_ERR (send_change_rev (APR_ARRAY_IDX (revs, revs->nelts - i - 1,
> + svn_revnum_t),
> + fs, discover_changed_paths,
> authz_read_func, authz_read_baton,
> - subpool);
> -
> - if (patherr
> - && patherr->apr_err == SVN_ERR_AUTHZ_UNREADABLE)
> - {
> - /* All changed-paths are unreadable, so clear all fields. */
> - svn_error_clear (patherr);
> - changed_paths = NULL;
> - author = NULL;
> - date = NULL;
> - message = NULL;
> - }
> - else if (patherr
> - && patherr->apr_err == SVN_ERR_AUTHZ_PARTIALLY_READABLE)
> - {
> - /* At least one changed-path was unreadable, so omit the
> - log message. (The unreadable paths are already
> - missing from the hash.) */
> - svn_error_clear (patherr);
> - message = NULL;
> - }
> - else if (patherr)
> - return patherr;
> -
> - /* It may be the case that an authz func was passed in, but
> - the user still doesn't want to see any changed-paths. */
> - if (! discover_changed_paths)
> - changed_paths = NULL;
> + receiver, receiver_baton, subpool));
> + if (limit && i >= limit)
> + break;
> }
> -
> - SVN_ERR ((*receiver) (receiver_baton,
> - changed_paths,
> - this_rev,
> - author ? author->data : NULL,
> - date ? date->data : NULL,
> - message ? message->data : NULL,
> - subpool));
> -
> - if (++count == limit)
> - break;
> }
>
> svn_pool_destroy (subpool);
>
>

Else I think it's great! I really love this streamyness!

Thanks,
//Peter

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Thu May 12 21:50:28 2005

This is an archived mail posted to the Subversion Dev mailing list.