[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: svn commit: r1477540 - in /subversion/trunk: ./ subversion/libsvn_subr/cache-membuffer.c

From: Stefan Fuhrmann <stefan.fuhrmann_at_wandisco.com>
Date: Thu, 6 Jun 2013 12:02:39 +0200

Hi Joe,

Thanks for the patch. Committed as r1490221.

-- Stefan^2.

On Tue, May 14, 2013 at 12:51 AM, Joe Swatosh <joe.swatosh_at_gmail.com> wrote:

> Small (possible?) compatibility improvements: suggest APR_UINT64_C()
> instead of ull suffix in subversion\libsvn_subr\cache-membuffer.c.
>
> --
> Joe
>
> On Tue, Apr 30, 2013 at 3:34 AM, <stefan2_at_apache.org> wrote:
> > Author: stefan2
> > Date: Tue Apr 30 10:34:26 2013
> > New Revision: 1477540
> >
> > URL: http://svn.apache.org/r1477540
> > Log:
> > Cherry-pick merge r458643 from branches/cache-server to /trunk
> > (this accidentally didn't get included into the r1476664 merge).
> >
> > Not intended for 1.8 back-port.
> >
> > Modified:
> > subversion/trunk/ (props changed)
> > subversion/trunk/subversion/libsvn_subr/cache-membuffer.c
> >
> > Propchange: subversion/trunk/
> >
> ------------------------------------------------------------------------------
> > Merged /subversion/branches/cache-server:r1458643
> >
> > Modified: subversion/trunk/subversion/libsvn_subr/cache-membuffer.c
> > URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/cache-membuffer.c?rev=1477540&r1=1477539&r2=1477540&view=diff
> >
> ==============================================================================
> > --- subversion/trunk/subversion/libsvn_subr/cache-membuffer.c (original)
> > +++ subversion/trunk/subversion/libsvn_subr/cache-membuffer.c Tue Apr 30
> 10:34:26 2013
> > @@ -98,7 +98,7 @@
> > * comments in ensure_data_insertable_l2().
> > *
> > * To limit the entry size and management overhead, not the actual item
> keys
> > - * but only their MD5 checksums will not be stored. This is reasonably
> safe
> > + * but only their MD5-based hashes will be stored. This is reasonably
> safe
> > * to do since users have only limited control over the full keys, even
> if
> > * these contain folder paths. So, it is very hard to deliberately
> construct
> > * colliding keys. Random checksum collisions can be shown to be
> extremely
> > @@ -884,9 +884,12 @@ get_group_index(svn_membuffer_t **cache,
> > {
> > svn_membuffer_t *segment0 = *cache;
> >
> > - /* select the cache segment to use. they have all the same
> group_count */
> > - *cache = &segment0[key[0] & (segment0->segment_count -1)];
> > - return key[1] % segment0->group_count;
> > + /* select the cache segment to use. they have all the same
> group_count.
> > + * Since key may not be well-distributed, pre-fold it to a smaller but
> > + * "denser" ranger. The divisors are primes larger than the largest
> > + * counts. */
> > + *cache = &segment0[(key[1] % 2809637ull) & (segment0->segment_count -
> 1)];
> > + return (key[0] % 5030895599ull) % segment0->group_count;
>
> Perhaps:
>
> + *cache = &segment0[(key[1] % APR_UINT64_C(2809637)) &
> (segment0->segment_count - 1)];
> + return (key[0] % APR_UINT64_C(5030895599)) % segment0->group_count;
>
> ?
>
> > }
> >
> > /* Reduce the hit count of ENTRY and update the accumulated hit info
> > @@ -1007,6 +1010,8 @@ find_entry(svn_membuffer_t *cache,
> > */
> > if (group->used == GROUP_SIZE)
> > {
> > + static int count = 0;
> > +
> > /* every entry gets the same chance of being removed.
> > * Otherwise, we free the first entry, fill it and
> > * remove it again on the next occasion without considering
> > @@ -1026,6 +1031,7 @@ find_entry(svn_membuffer_t *cache,
> > let_entry_age(cache, entry);
> >
> > drop_entry(cache, entry);
> > + printf("%d\n", ++count);
> > }
> >
> > /* initialize entry for the new key
> > @@ -2042,6 +2048,22 @@ membuffer_cache_set_partial(svn_membuffe
> > * svn_cache__t instance.
> > */
> >
> > +/* Stores the combined key value for the given key. It will be used by
> > + * combine_key() to short-circuit expensive hash calculations.
> > + */
> > +typedef struct last_access_key_t
> > +{
> > + /* result of key combining */
> > + entry_key_t combined_key;
> > +
> > + /* length of the key (or APR_HASH_KEY_STRING if not used) */
> > + apr_size_t key_len;
> > +
> > + /* the original key. Only KEY_LEN bytes are valid. We use uint32 for
> > + * better compatibility with pseudo-md5 functions. */
> > + apr_uint32_t key[64];
> > +} last_access_key_t;
> > +
> > /* Internal cache structure (used in svn_cache__t.cache_internal)
> basically
> > * holding the additional parameters needed to call the respective
> membuffer
> > * functions.
> > @@ -2089,6 +2111,11 @@ typedef struct svn_membuffer_cache_t
> > */
> > int alloc_counter;
> >
> > + /* cache for the last key used.
> > + * Will be NULL for caches with short fix-sized keys.
> > + */
> > + last_access_key_t *last_access;
> > +
> > /* if enabled, this will serialize the access to this instance.
> > */
> > svn_mutex__t *mutex;
> > @@ -2106,46 +2133,127 @@ typedef struct svn_membuffer_cache_t
> > */
> > #define ALLOCATIONS_PER_POOL_CLEAR 10
> >
> > -
> > /* Basically calculate a hash value for KEY of length KEY_LEN, combine
> it
> > * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
> > + * This could replace combine_key() entirely but we actually use it only
> > + * when the quick path failed.
> > */
> > static void
> > -combine_key(svn_membuffer_cache_t *cache,
> > - const void *key,
> > - apr_ssize_t key_len)
> > +combine_long_key(svn_membuffer_cache_t *cache,
> > + const void *key,
> > + apr_ssize_t key_len)
> > {
> > + assert(cache->last_access);
> > +
> > + /* handle variable-length keys */
> > if (key_len == APR_HASH_KEY_STRING)
> > key_len = strlen((const char *) key);
> >
> > - if (key_len < 16)
> > + /* same key as the last time? -> short-circuit */
> > + if ( key_len == cache->last_access->key_len
> > + && memcmp(key, cache->last_access->key, key_len) == 0)
> > {
> > - apr_uint32_t data[4] = { 0 };
> > - memcpy(data, key, key_len);
> > + memcpy(cache->combined_key, cache->last_access->combined_key,
> > + sizeof(cache->combined_key));
> > + }
> > + else if (key_len >= 64)
> > + {
> > + /* relatively long key. Use the generic, slow hash code for it */
> > + apr_md5((unsigned char*)cache->combined_key, key, key_len);
> > + cache->combined_key[0] ^= cache->prefix[0];
> > + cache->combined_key[1] ^= cache->prefix[1];
> >
> > - svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key, data);
> > + /* is the key short enough to cache the result? */
> > + if (key_len <= sizeof(cache->last_access->key))
> > + {
> > + memcpy(cache->last_access->combined_key, cache->combined_key,
> > + sizeof(cache->combined_key));
> > + cache->last_access->key_len = key_len;
> > + memcpy(cache->last_access->key, key, key_len);
> > + }
> > }
> > - else if (key_len < 32)
> > + else
> > {
> > - apr_uint32_t data[8] = { 0 };
> > - memcpy(data, key, key_len);
> > + /* shorter keys use efficient hash code and *do* cache the
> results */
> > + cache->last_access->key_len = key_len;
> > + if (key_len < 16)
> > + {
> > + memset(cache->last_access->key, 0, 16);
> > + memcpy(cache->last_access->key, key, key_len);
> > +
> > + svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key,
> > + cache->last_access->key);
> > + }
> > + else if (key_len < 32)
> > + {
> > + memset(cache->last_access->key, 0, 32);
> > + memcpy(cache->last_access->key, key, key_len);
> > +
> > + svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key,
> > + cache->last_access->key);
> > + }
> > + else
> > + {
> > + memset(cache->last_access->key, 0, 64);
> > + memcpy(cache->last_access->key, key, key_len);
> > +
> > + svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key,
> > + cache->last_access->key);
> > + }
> > +
> > + cache->combined_key[0] ^= cache->prefix[0];
> > + cache->combined_key[1] ^= cache->prefix[1];
> > +
> > + memcpy(cache->last_access->combined_key, cache->combined_key,
> > + sizeof(cache->combined_key));
> > + }
> > +}
> > +
> > +/* Basically calculate a hash value for KEY of length KEY_LEN, combine
> it
> > + * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
> > + */
> > +static void
> > +combine_key(svn_membuffer_cache_t *cache,
> > + const void *key,
> > + apr_ssize_t key_len)
> > +{
> > + /* copy of *key, padded with 0 */
> > + apr_uint64_t data[2];
> >
> > - svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key, data);
> > + /* short, fixed-size keys are the most common case */
> > + if (key_len == 16)
> > + {
> > + data[0] = ((const apr_uint64_t *)key)[0];
> > + data[1] = ((const apr_uint64_t *)key)[1];
> > }
> > - else if (key_len < 64)
> > + else if (key_len == 8)
> > {
> > - apr_uint32_t data[16] = { 0 };
> > + data[0] = ((const apr_uint64_t *)key)[0];
> > + data[1] = 0;
> > + }
> > + else if (key_len != APR_HASH_KEY_STRING && key_len < 16)
> > + {
> > + data[0] = 0;
> > + data[1] = 0;
> > memcpy(data, key, key_len);
> > -
> > - svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key, data);
> > }
> > else
> > {
> > - apr_md5((unsigned char*)cache->combined_key, key, key_len);
> > + /* longer or variably sized keys */
> > + combine_long_key(cache, key, key_len);
> > + return;
> > }
> >
> > - cache->combined_key[0] ^= cache->prefix[0];
> > - cache->combined_key[1] ^= cache->prefix[1];
> > + /* scramble key DATA. All of this must be reversible to prevent key
> > + * collisions. So, we limit ourselves to xor and permutations. */
> > + data[1] = (data[1] << 27) | (data[1] >> 37);
> > + data[1] ^= data[0] & 0xffff;
> > + data[0] ^= data[1] & 0xffffffffffff0000ull;
>
> Likewise:
>
> + data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
>
> ?
>
> > + data[0] = (data[0] << 43) | (data[0] >> 21);
> > +
> > + /* combine with this cache's namespace */
> > + cache->combined_key[0] = data[0] ^ cache->prefix[0];
> > + cache->combined_key[1] = data[1] ^ cache->prefix[1];
> > }
> >
> > /* Implement svn_cache__vtable_t.get (not thread-safe)
> > @@ -2560,6 +2668,18 @@ svn_cache__create_membuffer_cache(svn_ca
> > pool));
> > memcpy(cache->prefix, checksum->digest, sizeof(cache->prefix));
> >
> > + /* fix-length keys of 16 bytes or under don't need a buffer because we
> > + * can use a very fast key combining algorithm. */
> > + if ((klen == APR_HASH_KEY_STRING) || klen > sizeof(entry_key_t))
> > + {
> > + cache->last_access = apr_pcalloc(pool,
> sizeof(*cache->last_access));
> > + cache->last_access->key_len = APR_HASH_KEY_STRING;
> > + }
> > + else
> > + {
> > + cache->last_access = NULL;
> > + }
> > +
> > #ifdef SVN_DEBUG_CACHE_MEMBUFFER
> >
> > /* Initialize cache debugging support.
> >
> >
>

-- 
*Join one of our free daily demo sessions on* *Scaling Subversion for the
Enterprise <http://www.wandisco.com/training/webinars>*
*
*
Received on 2013-06-06 12:03:12 CEST

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.