On Fri, Feb 22, 2008 at 1:28 PM, <hwright_at_tigris.org> wrote:
> Author: hwright
> Date: Fri Feb 22 10:28:01 2008
> New Revision: 29542
>
> Log:
> Use a rangelist-based algorithm to find paths which belong to overlapping
> mergeinfos. This is both quicker, and removes the possibility of a subtle
> server DOS attack which rangelist_to_revs() could have caused (by sending
> some bogus mergeinfo and causing excessive memory usage.)
>
> * subversion/libsvn_repos/log.c
> (rangelist_to_revs, find_merge_sources, pathlists_are_equal): Remove.
> (array_pop_front, struct rangelist_path, compare_rangelist_paths): New.
> (combine_mergeinfo_path_lists): Improve the algorithm used to combine
> mergeinfo path lists. Instead of breaking out each mergeinfo into its
> constituent revisions, use a rangelist-based algorithm to build the list
> of paths which have overlapping rangelists in a mergeinfo.
>
> * subversion/tests/cmdline/log_tests.py
> (merge_sensitive_log_single_revision): Update expectation by removing bogus
> revision.
>
>
> Modified:
> trunk/subversion/libsvn_repos/log.c
> trunk/subversion/tests/cmdline/log_tests.py
>
> Modified: trunk/subversion/libsvn_repos/log.c
> URL: http://svn.collab.net/viewvc/svn/trunk/subversion/libsvn_repos/log.c?pathrev=29542&r1=29541&r2=29542
> ==============================================================================
> --- trunk/subversion/libsvn_repos/log.c (original)
> +++ trunk/subversion/libsvn_repos/log.c Fri Feb 22 10:28:01 2008
> @@ -848,92 +848,20 @@
> return SVN_NO_ERROR;
> }
>
> -/* Unpack a rangelist into a list of discrete revisions.
> - *
> - * Take an array of 'svn_merge_range_t's in RANGELIST, return an array
> - * of 'svn_revnum_t's in *REVS. If RANGELIST contains no elements, set
> - * *REVS to an empty array.
> - */
> -static svn_error_t *
> -rangelist_to_revs(apr_array_header_t **revs,
> - const apr_array_header_t *rangelist,
> - apr_pool_t *pool)
> -{
> - int i;
> -
> - *revs = apr_array_make(pool, rangelist->nelts, sizeof(svn_revnum_t));
> -
> - for (i = 0; i < rangelist->nelts; i++)
> - {
> - svn_merge_range_t *range = APR_ARRAY_IDX(rangelist, i,
> - svn_merge_range_t *);
> - svn_revnum_t rev = range->start + 1;
> -
> - while (rev <= range->end)
> - {
> - APR_ARRAY_PUSH(*revs, svn_revnum_t) = rev;
> - rev += 1;
> - }
> - }
> -
> - return SVN_NO_ERROR;
> +/* Remove and return the first item from ARR. */
> +static void *
> +array_pop_front(apr_array_header_t *arr)
> +{
> + void *item = arr->elts;
> +
> + if (apr_is_empty_array(arr))
> + return NULL;
> +
> + arr->elts += arr->elt_size;
> + arr->nelts -= 1;
> + return item;
> }
Hi Hyrum,
This function doesn't seem safe if we subsequently push new elements
onto ARR. For example, if ARR->NELTS == ARR->NALLOC and we pop the
first element, then push a new element onto ARR with apr_array_push():
APR_DECLARE(void *) apr_array_push(apr_array_header_t *arr)
{
if (arr->nelts == arr->nalloc) {
^^^
*** ARR->NELTS == ARR->NALLOC - 1, so we won't allocate a new element
in the array since it looks like we already have space ***
int new_size = (arr->nalloc <= 0) ? 1 : arr->nalloc * 2;
char *new_data;
new_data = apr_palloc(arr->pool, arr->elt_size * new_size);
memcpy(new_data, arr->elts, arr->nalloc * arr->elt_size);
memset(new_data + arr->nalloc * arr->elt_size, 0,
arr->elt_size * (new_size - arr->nalloc));
arr->elts = new_data;
arr->nalloc = new_size;
}
++arr->nelts;
return arr->elts + (arr->elt_size * (arr->nelts - 1));
^^^
**** Oops, we return a pointer outside of the allocated array no? ***
}
FWIW I don't see that this commit pushes anything onto the arrays it
front pops from, so it hasn't caused a problem yet.
The solution is probably as simple as decrementing ARR->NALLOC, though
I'm not sure what problems that might cause.
> -/* Set *MERGE_SOURCES to an array of 'char *' paths, where each path
> - is one that in MERGEINFO has REVISION as part of its rangelist.
> - If none, set *MERGE_SOURCES to an empty array. */
> -static svn_error_t *
> -find_merge_sources(apr_array_header_t **merge_sources,
> - svn_revnum_t revision,
> - svn_mergeinfo_t mergeinfo,
> - apr_pool_t *pool)
> -{
> - apr_hash_index_t *hi;
> -
> - *merge_sources = apr_array_make(pool, 0, sizeof(const char *));
> - for (hi = apr_hash_first(pool, mergeinfo); hi; hi = apr_hash_next(hi))
> - {
> - apr_array_header_t *rangelist;
> - const char *key;
> - int i;
> -
> - apr_hash_this(hi, (void*) &key, NULL, (void*) &rangelist);
> -
> - for (i = 0; i < rangelist->nelts; i++)
> - {
> - svn_merge_range_t *range = APR_ARRAY_IDX(rangelist, i,
> - svn_merge_range_t *);
> -
> - if (revision > range->start && revision <= range->end)
> - APR_ARRAY_PUSH(*merge_sources, const char *) = key;
> - }
> - }
> -
> - return SVN_NO_ERROR;
> -}
> -
> -/* Return TRUE is the paths in PATHLIST1 are the same as those in PATHLIST2,
> - FALSE otherwise. */
> -static svn_boolean_t
> -pathlists_are_equal(apr_array_header_t *pathlist1,
> - apr_array_header_t *pathlist2)
> -{
> - int i;
> -
> - if (pathlist1->nelts != pathlist2->nelts)
> - return FALSE;
> -
> - for (i = 0; i < pathlist1->nelts; i++)
> - {
> - const char *path1 = APR_ARRAY_IDX(pathlist1, i, const char *);
> - const char *path2 = APR_ARRAY_IDX(pathlist2, i, const char *);
> -
> - if (strcmp(path1, path2) != 0)
> - return FALSE;
> - }
> -
> - return TRUE;
> -}
> /* A struct which represents a single revision range, and the paths which
> have mergeinfo in that range. */
> struct path_list_range
> @@ -942,6 +870,39 @@
> svn_merge_range_t range;
> };
>
> +/* A struct which represents "inverse mergeinfo", that is, instead of having
> + a path->revision_range_list mapping, which is the way mergeinfo is commonly
> + represented, this struct enables a revision_range_list,path tuple, where
> + the paths can be accessed by revision. */
> +struct rangelist_path
> +{
> + apr_array_header_t *rangelist;
> + const char *path;
> +};
> +
> +/* Comparator function for combine_mergeinfo_path_lists(). Returns */
Looks like you meant to say something more here, "Returns..."
> +static int
> +compare_rangelist_paths(const void *a, const void *b)
> +{
> + struct rangelist_path *rpa = *((struct rangelist_path **) a);
> + struct rangelist_path *rpb = *((struct rangelist_path **) b);
> + svn_merge_range_t *mra = APR_ARRAY_IDX(rpa->rangelist, 0,
> + svn_merge_range_t *);
> + svn_merge_range_t *mrb = APR_ARRAY_IDX(rpb->rangelist, 0,
> + svn_merge_range_t *);
> +
> + if (mra->start < mrb->start)
> + return -1;
> + if (mra->start > mrb->start)
> + return 1;
> + if (mra->end < mrb->end)
> + return -1;
> + if (mra->end > mrb->end)
> + return 1;
> +
> + return 0;
> +}
> +
> /* From MERGEINFO, return in *COMBINED_LIST, allocated in POOL, a list of
> 'struct path_list_range's. This list represents the rangelists in
> MERGEINFO and each path which has mergeinfo in that range. */
> @@ -951,63 +912,135 @@
> apr_pool_t *pool)
> {
> apr_hash_index_t *hi;
> - apr_array_header_t *rangelist;
> - apr_array_header_t *revs;
> - apr_array_header_t *path_lists;
> - struct path_list_range *plr;
> + apr_array_header_t *rangelist_paths;
> + struct rangelist_path *first_rp;
> apr_pool_t *subpool = svn_pool_create(pool);
> - int i;
>
> - /* Get all the revisions in all the mergeinfo. */
> - rangelist = apr_array_make(subpool, 0, sizeof(svn_merge_range_t *));
> + /* Create a list of (revision range, path) tuples from MERGEINFO. */
> + rangelist_paths = apr_array_make(subpool, apr_hash_count(mergeinfo),
> + sizeof(struct rangelist_path *));
> for (hi = apr_hash_first(subpool, mergeinfo); hi;
> hi = apr_hash_next(hi))
> {
> - const char *path;
> - apr_array_header_t *changes;
> -
> - apr_hash_this(hi, (void *) &path, NULL, (void *) &changes);
> - SVN_ERR(svn_rangelist_merge(&rangelist, changes, subpool));
> + int i;
> + struct rangelist_path *rp = apr_palloc(subpool, sizeof(*rp));
> + apr_hash_this(hi, (void *) &rp->path, NULL,
> + (void *) &rp->rangelist);
> + APR_ARRAY_PUSH(rangelist_paths, struct rangelist_path *) = rp;
> +
> + /* We need to make local copies of the rangelist, since we will be
> + modifying it, below. */
> + rp->rangelist = svn_rangelist_dup(rp->rangelist, subpool);
> +
> + /* Make all of the rangelists inclusive, both start and end. */
> + for (i = 0; i < rp->rangelist->nelts; i++)
> + APR_ARRAY_IDX(rp->rangelist, i, svn_merge_range_t *)->start += 1;
> }
> - SVN_ERR(rangelist_to_revs(&revs, rangelist, subpool));
>
> - /* For each revision, find the mergeinfo path(s) it belongs to.
> - TODO: Figure out a clever algorithm to do this on a per-rangelist basis. */
> - path_lists = apr_array_make(subpool, revs->nelts,
> - sizeof(apr_array_header_t *));
> - for (i = 0; i < revs->nelts; i++)
> + /* Loop over the (revision range, path) tuples, chopping them into
> + (revision range, paths) tuples, and appending those to the output list. */
> + *combined_list = apr_array_make(pool, 0, sizeof(struct path_list_range *));
> + while (rangelist_paths->nelts > 1)
> {
> - svn_revnum_t rev = APR_ARRAY_IDX(revs, i, svn_revnum_t);
> - apr_array_header_t *paths;
> -
> - SVN_ERR(find_merge_sources(&paths, rev, mergeinfo, pool));
> - APR_ARRAY_PUSH(path_lists, apr_array_header_t *) = paths;
> - }
> + svn_revnum_t youngest, next_youngest, tail, youngest_end;
> + struct path_list_range *plr;
> + struct rangelist_path *rp;
> + int num_revs;
> + int i;
>
> - /* Condense the revision and pathlist lists back to rangelist notiation. */
> - *combined_list = apr_array_make(pool, 0, sizeof(struct path_list_range *));
> - plr = apr_palloc(pool, sizeof(*plr));
> - plr->range.start = APR_ARRAY_IDX(revs, 0, svn_revnum_t);
> - plr->paths = APR_ARRAY_IDX(path_lists, 0, apr_array_header_t *);
> + /* First, sort the list such that the start revision of the first
> + revision arrays are sorted. */
> + qsort(rangelist_paths->elts, rangelist_paths->nelts,
> + rangelist_paths->elt_size, compare_rangelist_paths);
> +
> + /* Next, find the number of revision ranges which start with the same
> + revision. */
> + rp = APR_ARRAY_IDX(rangelist_paths, 0, struct rangelist_path *);
> + youngest =
> + APR_ARRAY_IDX(rp->rangelist, 0, struct svn_merge_range_t *)->start;
> + next_youngest = youngest;
> + for (num_revs = 1; next_youngest == youngest; num_revs++)
> + {
> + if (num_revs == rangelist_paths->nelts)
> + {
> + num_revs += 1;
> + break;
> + }
> + rp = APR_ARRAY_IDX(rangelist_paths, num_revs,
> + struct rangelist_path *);
> + next_youngest =
> + APR_ARRAY_IDX(rp->rangelist, 0, struct svn_merge_range_t *)->start;
> + }
> + num_revs -= 1;
>
> - for (i = 1; i < revs->nelts; i++)
> - {
> - apr_array_header_t *cur_pathlist = APR_ARRAY_IDX(path_lists, i,
> - apr_array_header_t *);
> - apr_array_header_t *prev_pathlist = APR_ARRAY_IDX(path_lists, i - 1,
> - apr_array_header_t *);
> - if (! pathlists_are_equal(cur_pathlist, prev_pathlist))
> + /* The start of the new range will be YOUNGEST, and we now find the end
> + of the new range, which should be either one less than the next
> + earliest start of a rangelist, or the end of the first rangelist. */
> + youngest_end = APR_ARRAY_IDX(APR_ARRAY_IDX(rangelist_paths, 0,
> + struct rangelist_path *)->rangelist,
> + 0, svn_merge_range_t *)->end;
> + if ( (next_youngest == youngest) || (youngest_end < next_youngest) )
> + tail = youngest_end;
> + else
> + tail = next_youngest - 1;
> +
> + /* Insert the (earliest, tail) tuple into the output list, along with
> + a list of paths which match it. */
> + plr = apr_palloc(pool, sizeof(*plr));
> + plr->range.start = youngest;
> + plr->range.end = tail;
> + plr->paths = apr_array_make(pool, num_revs, sizeof(const char *));
> + for (i = 0; i < num_revs; i++)
> + APR_ARRAY_PUSH(plr->paths, const char *) =
> + APR_ARRAY_IDX(rangelist_paths, i, struct rangelist_path *)->path;
> + APR_ARRAY_PUSH(*combined_list, struct path_list_range *) = plr;
> +
> + /* Now, check to see which (rangelist path) combinations we can remove,
> + and do so. */
> + for (i = 0; i < num_revs; i++)
> {
> - plr->range.end = APR_ARRAY_IDX(revs, i - 1, svn_revnum_t);
> - APR_ARRAY_PUSH(*combined_list, struct path_list_range *) = plr;
> + rp = APR_ARRAY_IDX(rangelist_paths, i, struct rangelist_path *);
> + svn_merge_range_t *range = APR_ARRAY_IDX(rp->rangelist, 0,
> + svn_merge_range_t *);
>
> - plr = apr_palloc(pool, sizeof(*plr));
> - plr->range.start = APR_ARRAY_IDX(revs, i, svn_revnum_t);
> - plr->paths = cur_pathlist;
> + /* Set the start of the range to beyond the end of the range we
> + just built. If the range is now "inverted", we can get pop it
> + off the list. */
> + range->start = tail + 1;
> + if (range->start > range->end)
> + {
> + if (rp->rangelist->nelts == 1)
> + {
> + /* The range is the only on its list, so we should remove
> + the entire rangelist_path, adjusting our loop control
> + variables appropriately. */
> + array_pop_front(rangelist_paths);
> + i--;
> + num_revs--;
> + }
> + else
> + {
> + /* We have more than one range on the list, so just remove
> + the first one. */
> + array_pop_front(rp->rangelist);
> + }
> + }
> }
> }
> - plr->range.end = APR_ARRAY_IDX(revs, i - 1, svn_revnum_t);
> - APR_ARRAY_PUSH(*combined_list, struct path_list_range *) = plr;
> +
> + /* Finally, add the last remaining (revision range, path) to the output
> + list. */
> + first_rp = APR_ARRAY_IDX(rangelist_paths, 0, struct rangelist_path *);
> + while (first_rp->rangelist->nelts > 0)
> + {
> + struct path_list_range *plr = apr_palloc(pool, sizeof(*plr));
> +
> + plr->paths = apr_array_make(pool, 1, sizeof(const char *));
> + APR_ARRAY_PUSH(plr->paths, const char *) = first_rp->path;
> + plr->range = *APR_ARRAY_IDX(first_rp->rangelist, 0, svn_merge_range_t *);
> + array_pop_front(first_rp->rangelist);
> + APR_ARRAY_PUSH(*combined_list, struct path_list_range *) = plr;
> + }
>
> svn_pool_destroy(subpool);
>
>
> Modified: trunk/subversion/tests/cmdline/log_tests.py
> URL: http://svn.collab.net/viewvc/svn/trunk/subversion/tests/cmdline/log_tests.py?pathrev=29542&r1=29541&r2=29542
> ==============================================================================
> --- trunk/subversion/tests/cmdline/log_tests.py (original)
> +++ trunk/subversion/tests/cmdline/log_tests.py Fri Feb 22 10:28:01 2008
> @@ -1115,7 +1115,7 @@
>
> log_chain = parse_log_output(output)
> expected_merges = {
> - 14: [], 13 : [14], 12 : [14], 11 : [14, 12], 7 : [14, 12],
> + 14: [], 13 : [14], 12 : [14], 11 : [14, 12],
> }
> check_merge_results(log_chain, expected_merges)
>
> @@ -1125,7 +1125,7 @@
> '-g', '-r12', BRANCH_B_path)
> log_chain = parse_log_output(output)
> expected_merges = {
> - 12: [], 11 : [12], 7 : [12],
> + 12: [], 11 : [12],
> }
> check_merge_results(log_chain, expected_merges)
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: svn-unsubscribe_at_subversion.tigris.org
> For additional commands, e-mail: svn-help_at_subversion.tigris.org
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe_at_subversion.tigris.org
For additional commands, e-mail: dev-help_at_subversion.tigris.org
Received on 2008-02-26 19:11:21 CET