Re: svn commit: r1477540 - in /subversion/trunk: ./ subversion/libsvn_subr/cache-membuffer.c
From: Joe Swatosh <joe.swatosh_at_gmail.com>
Date: Mon, 13 May 2013 15:51:18 -0700
Small (possible?) compatibility improvements: suggest APR_UINT64_C()
-- Joe On Tue, Apr 30, 2013 at 3:34 AM, <stefan2_at_apache.org> wrote: > Author: stefan2 > Date: Tue Apr 30 10:34:26 2013 > New Revision: 1477540 > > URL: http://svn.apache.org/r1477540 > Log: > Cherry-pick merge r458643 from branches/cache-server to /trunk > (this accidentally didn't get included into the r1476664 merge). > > Not intended for 1.8 back-port. > > Modified: > subversion/trunk/ (props changed) > subversion/trunk/subversion/libsvn_subr/cache-membuffer.c > > Propchange: subversion/trunk/ > ------------------------------------------------------------------------------ > Merged /subversion/branches/cache-server:r1458643 > > Modified: subversion/trunk/subversion/libsvn_subr/cache-membuffer.c > URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/cache-membuffer.c?rev=1477540&r1=1477539&r2=1477540&view=diff > ============================================================================== > --- subversion/trunk/subversion/libsvn_subr/cache-membuffer.c (original) > +++ subversion/trunk/subversion/libsvn_subr/cache-membuffer.c Tue Apr 30 10:34:26 2013 > @@ -98,7 +98,7 @@ > * comments in ensure_data_insertable_l2(). > * > * To limit the entry size and management overhead, not the actual item keys > - * but only their MD5 checksums will not be stored. This is reasonably safe > + * but only their MD5-based hashes will be stored. This is reasonably safe > * to do since users have only limited control over the full keys, even if > * these contain folder paths. So, it is very hard to deliberately construct > * colliding keys. Random checksum collisions can be shown to be extremely > @@ -884,9 +884,12 @@ get_group_index(svn_membuffer_t **cache, > { > svn_membuffer_t *segment0 = *cache; > > - /* select the cache segment to use. they have all the same group_count */ > - *cache = &segment0[key[0] & (segment0->segment_count -1)]; > - return key[1] % segment0->group_count; > + /* select the cache segment to use. they have all the same group_count. > + * Since key may not be well-distributed, pre-fold it to a smaller but > + * "denser" ranger. The divisors are primes larger than the largest > + * counts. */ > + *cache = &segment0[(key[1] % 2809637ull) & (segment0->segment_count - 1)]; > + return (key[0] % 5030895599ull) % segment0->group_count; Perhaps: + *cache = &segment0[(key[1] % APR_UINT64_C(2809637)) & (segment0->segment_count - 1)]; + return (key[0] % APR_UINT64_C(5030895599)) % segment0->group_count; ? > } > > /* Reduce the hit count of ENTRY and update the accumulated hit info > @@ -1007,6 +1010,8 @@ find_entry(svn_membuffer_t *cache, > */ > if (group->used == GROUP_SIZE) > { > + static int count = 0; > + > /* every entry gets the same chance of being removed. > * Otherwise, we free the first entry, fill it and > * remove it again on the next occasion without considering > @@ -1026,6 +1031,7 @@ find_entry(svn_membuffer_t *cache, > let_entry_age(cache, entry); > > drop_entry(cache, entry); > + printf("%d\n", ++count); > } > > /* initialize entry for the new key > @@ -2042,6 +2048,22 @@ membuffer_cache_set_partial(svn_membuffe > * svn_cache__t instance. > */ > > +/* Stores the combined key value for the given key. It will be used by > + * combine_key() to short-circuit expensive hash calculations. > + */ > +typedef struct last_access_key_t > +{ > + /* result of key combining */ > + entry_key_t combined_key; > + > + /* length of the key (or APR_HASH_KEY_STRING if not used) */ > + apr_size_t key_len; > + > + /* the original key. Only KEY_LEN bytes are valid. We use uint32 for > + * better compatibility with pseudo-md5 functions. */ > + apr_uint32_t key[64]; > +} last_access_key_t; > + > /* Internal cache structure (used in svn_cache__t.cache_internal) basically > * holding the additional parameters needed to call the respective membuffer > * functions. > @@ -2089,6 +2111,11 @@ typedef struct svn_membuffer_cache_t > */ > int alloc_counter; > > + /* cache for the last key used. > + * Will be NULL for caches with short fix-sized keys. > + */ > + last_access_key_t *last_access; > + > /* if enabled, this will serialize the access to this instance. > */ > svn_mutex__t *mutex; > @@ -2106,46 +2133,127 @@ typedef struct svn_membuffer_cache_t > */ > #define ALLOCATIONS_PER_POOL_CLEAR 10 > > - > /* Basically calculate a hash value for KEY of length KEY_LEN, combine it > * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY. > + * This could replace combine_key() entirely but we actually use it only > + * when the quick path failed. > */ > static void > -combine_key(svn_membuffer_cache_t *cache, > - const void *key, > - apr_ssize_t key_len) > +combine_long_key(svn_membuffer_cache_t *cache, > + const void *key, > + apr_ssize_t key_len) > { > + assert(cache->last_access); > + > + /* handle variable-length keys */ > if (key_len == APR_HASH_KEY_STRING) > key_len = strlen((const char *) key); > > - if (key_len < 16) > + /* same key as the last time? -> short-circuit */ > + if ( key_len == cache->last_access->key_len > + && memcmp(key, cache->last_access->key, key_len) == 0) > { > - apr_uint32_t data[4] = { 0 }; > - memcpy(data, key, key_len); > + memcpy(cache->combined_key, cache->last_access->combined_key, > + sizeof(cache->combined_key)); > + } > + else if (key_len >= 64) > + { > + /* relatively long key. Use the generic, slow hash code for it */ > + apr_md5((unsigned char*)cache->combined_key, key, key_len); > + cache->combined_key[0] ^= cache->prefix[0]; > + cache->combined_key[1] ^= cache->prefix[1]; > > - svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key, data); > + /* is the key short enough to cache the result? */ > + if (key_len <= sizeof(cache->last_access->key)) > + { > + memcpy(cache->last_access->combined_key, cache->combined_key, > + sizeof(cache->combined_key)); > + cache->last_access->key_len = key_len; > + memcpy(cache->last_access->key, key, key_len); > + } > } > - else if (key_len < 32) > + else > { > - apr_uint32_t data[8] = { 0 }; > - memcpy(data, key, key_len); > + /* shorter keys use efficient hash code and *do* cache the results */ > + cache->last_access->key_len = key_len; > + if (key_len < 16) > + { > + memset(cache->last_access->key, 0, 16); > + memcpy(cache->last_access->key, key, key_len); > + > + svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key, > + cache->last_access->key); > + } > + else if (key_len < 32) > + { > + memset(cache->last_access->key, 0, 32); > + memcpy(cache->last_access->key, key, key_len); > + > + svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key, > + cache->last_access->key); > + } > + else > + { > + memset(cache->last_access->key, 0, 64); > + memcpy(cache->last_access->key, key, key_len); > + > + svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key, > + cache->last_access->key); > + } > + > + cache->combined_key[0] ^= cache->prefix[0]; > + cache->combined_key[1] ^= cache->prefix[1]; > + > + memcpy(cache->last_access->combined_key, cache->combined_key, > + sizeof(cache->combined_key)); > + } > +} > + > +/* Basically calculate a hash value for KEY of length KEY_LEN, combine it > + * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY. > + */ > +static void > +combine_key(svn_membuffer_cache_t *cache, > + const void *key, > + apr_ssize_t key_len) > +{ > + /* copy of *key, padded with 0 */ > + apr_uint64_t data[2]; > > - svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key, data); > + /* short, fixed-size keys are the most common case */ > + if (key_len == 16) > + { > + data[0] = ((const apr_uint64_t *)key)[0]; > + data[1] = ((const apr_uint64_t *)key)[1]; > } > - else if (key_len < 64) > + else if (key_len == 8) > { > - apr_uint32_t data[16] = { 0 }; > + data[0] = ((const apr_uint64_t *)key)[0]; > + data[1] = 0; > + } > + else if (key_len != APR_HASH_KEY_STRING && key_len < 16) > + { > + data[0] = 0; > + data[1] = 0; > memcpy(data, key, key_len); > - > - svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key, data); > } > else > { > - apr_md5((unsigned char*)cache->combined_key, key, key_len); > + /* longer or variably sized keys */ > + combine_long_key(cache, key, key_len); > + return; > } > > - cache->combined_key[0] ^= cache->prefix[0]; > - cache->combined_key[1] ^= cache->prefix[1]; > + /* scramble key DATA. All of this must be reversible to prevent key > + * collisions. So, we limit ourselves to xor and permutations. */ > + data[1] = (data[1] << 27) | (data[1] >> 37); > + data[1] ^= data[0] & 0xffff; > + data[0] ^= data[1] & 0xffffffffffff0000ull; Likewise: + data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000); ? > + data[0] = (data[0] << 43) | (data[0] >> 21); > + > + /* combine with this cache's namespace */ > + cache->combined_key[0] = data[0] ^ cache->prefix[0]; > + cache->combined_key[1] = data[1] ^ cache->prefix[1]; > } > > /* Implement svn_cache__vtable_t.get (not thread-safe) > @@ -2560,6 +2668,18 @@ svn_cache__create_membuffer_cache(svn_ca > pool)); > memcpy(cache->prefix, checksum->digest, sizeof(cache->prefix)); > > + /* fix-length keys of 16 bytes or under don't need a buffer because we > + * can use a very fast key combining algorithm. */ > + if ((klen == APR_HASH_KEY_STRING) || klen > sizeof(entry_key_t)) > + { > + cache->last_access = apr_pcalloc(pool, sizeof(*cache->last_access)); > + cache->last_access->key_len = APR_HASH_KEY_STRING; > + } > + else > + { > + cache->last_access = NULL; > + } > + > #ifdef SVN_DEBUG_CACHE_MEMBUFFER > > /* Initialize cache debugging support. > >Received on 2013-05-14 00:51:51 CEST |
This is an archived mail posted to the Subversion Dev mailing list.
This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.