I think there is a bug in how APR resizes hash tables.
Happy to give a reproduction recipe using Subversion, but that may not
even be necessary if one just follows this logic:
Resizes are triggered at the bottom of find_entry() in
apr/tables/apr_hash.c:
if (++ht->count > ht->max) {
ht->max = ht->max * 2 + 1;
resize_array(ht);
}
Note that the hash table's size (that is, its idea of its array's
length) is doubled *before* the array itself is reallocated. Stepping
into the above call to resize_array(ht) ...
static void resize_array(apr_hash_t *ht)
{
apr_hash_index_t *hi;
apr_hash_entry_t **new_array;
int i;
new_array = alloc_array(ht);
for (hi = apr_hash_first(ht); hi; hi = apr_hash_next(hi)) {
i = hi->this->hash & ht->max;
hi->this->next = new_array[i];
new_array[i] = hi->this;
}
ht->array = new_array;
}
Notice how apr_hash_first() and apr_hash_next() are used to loop over
the hash's array, re-assigning hash entries to particular buckets.
Unfortunately, the hash table still contains the old array -- the new
one doesn't get installed until after the loop, as the last statement
in resize_array(). But since ht->max has already been doubled in
find_entry(), the hash is *claiming* that its array is twice as large
as it actually is. So apr_hash_next() does not stop the iteration
soon enough -- it walks right past then end of the array and into the
pool, or whatever's there.
Anyway, this is what I think caused our apparently perfect hash table
to seg fault on a resize. (Immediately before the resize, I inspected
every single hash entry by hand from gdb, starting from the top-level
array entries. Thank goodness hashes first resize at 16 entries! :-))
Below is one patch to fix this; I'm sure there are others equally good
or better...
2000-10-27 Karl Fogel <kfogel@collab.net>
* apr_hash.c (alloc_array): take new_len argument.
(apr_make_hash): pass initial length to alloc_array.
(resize_array): pass new length to alloc_array, install both new
length and new array into table after re-bucketing is done.
*** apr_hash.c Fri Oct 27 16:21:01 2000
--- apr_hash.c Fri Oct 27 16:30:47 2000
***************
*** 122,130 ****
* Hash creation functions.
*/
! static apr_hash_entry_t **alloc_array(apr_hash_t *ht)
{
! return apr_pcalloc(ht->pool, sizeof(*ht->array) * (ht->max + 1));
}
APR_DECLARE(apr_hash_t *) apr_make_hash(apr_pool_t *pool)
--- 122,130 ----
* Hash creation functions.
*/
! static apr_hash_entry_t **alloc_array(apr_hash_t *ht, apr_size_t new_len)
{
! return apr_pcalloc(ht->pool, sizeof(*ht->array) * new_len);
}
APR_DECLARE(apr_hash_t *) apr_make_hash(apr_pool_t *pool)
***************
*** 134,140 ****
ht->pool = pool;
ht->count = 0;
ht->max = INITIAL_MAX;
! ht->array = alloc_array(ht);
return ht;
}
--- 134,140 ----
ht->pool = pool;
ht->count = 0;
ht->max = INITIAL_MAX;
! ht->array = alloc_array(ht, ht->max + 1);
return ht;
}
***************
*** 185,199 ****
{
apr_hash_index_t *hi;
apr_hash_entry_t **new_array;
int i;
! new_array = alloc_array(ht);
for (hi = apr_hash_first(ht); hi; hi = apr_hash_next(hi)) {
! i = hi->this->hash & ht->max;
hi->this->next = new_array[i];
new_array[i] = hi->this;
}
ht->array = new_array;
}
/*
--- 185,201 ----
{
apr_hash_index_t *hi;
apr_hash_entry_t **new_array;
+ apr_size_t new_max = ht->max * 2 + 1;
int i;
! new_array = alloc_array(ht, new_max);
for (hi = apr_hash_first(ht); hi; hi = apr_hash_next(hi)) {
! i = hi->this->hash & new_max;
hi->this->next = new_array[i];
new_array[i] = hi->this;
}
ht->array = new_array;
+ ht->max = new_max;
}
/*
***************
*** 273,282 ****
he->val = val;
*hep = he;
/* check that the collision rate isn't too high */
! if (++ht->count > ht->max) {
! ht->max = ht->max * 2 + 1;
resize_array(ht);
- }
return hep;
}
--- 275,282 ----
he->val = val;
*hep = he;
/* check that the collision rate isn't too high */
! if (++ht->count > ht->max)
resize_array(ht);
return hep;
}
Received on Sat Oct 21 14:36:13 2006