[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: svn commit: r1477540 - in /subversion/trunk: ./ subversion/libsvn_subr/cache-membuffer.c

From: Joe Swatosh <joe.swatosh_at_gmail.com>
Date: Mon, 13 May 2013 15:51:18 -0700

Small (possible?) compatibility improvements: suggest APR_UINT64_C()
instead of ull suffix in subversion\libsvn_subr\cache-membuffer.c.

--
Joe
On Tue, Apr 30, 2013 at 3:34 AM,  <stefan2_at_apache.org> wrote:
> Author: stefan2
> Date: Tue Apr 30 10:34:26 2013
> New Revision: 1477540
>
> URL: http://svn.apache.org/r1477540
> Log:
> Cherry-pick merge r458643 from branches/cache-server to /trunk
> (this accidentally didn't get included into the r1476664 merge).
>
> Not intended for 1.8 back-port.
>
> Modified:
>     subversion/trunk/   (props changed)
>     subversion/trunk/subversion/libsvn_subr/cache-membuffer.c
>
> Propchange: subversion/trunk/
> ------------------------------------------------------------------------------
>   Merged /subversion/branches/cache-server:r1458643
>
> Modified: subversion/trunk/subversion/libsvn_subr/cache-membuffer.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/cache-membuffer.c?rev=1477540&r1=1477539&r2=1477540&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_subr/cache-membuffer.c (original)
> +++ subversion/trunk/subversion/libsvn_subr/cache-membuffer.c Tue Apr 30 10:34:26 2013
> @@ -98,7 +98,7 @@
>   * comments in ensure_data_insertable_l2().
>   *
>   * To limit the entry size and management overhead, not the actual item keys
> - * but only their MD5 checksums will not be stored. This is reasonably safe
> + * but only their MD5-based hashes will be stored. This is reasonably safe
>   * to do since users have only limited control over the full keys, even if
>   * these contain folder paths. So, it is very hard to deliberately construct
>   * colliding keys. Random checksum collisions can be shown to be extremely
> @@ -884,9 +884,12 @@ get_group_index(svn_membuffer_t **cache,
>  {
>    svn_membuffer_t *segment0 = *cache;
>
> -  /* select the cache segment to use. they have all the same group_count */
> -  *cache = &segment0[key[0] & (segment0->segment_count -1)];
> -  return key[1] % segment0->group_count;
> +  /* select the cache segment to use. they have all the same group_count.
> +   * Since key may not be well-distributed, pre-fold it to a smaller but
> +   * "denser" ranger.  The divisors are primes larger than the largest
> +   * counts. */
> +  *cache = &segment0[(key[1] % 2809637ull) & (segment0->segment_count - 1)];
> +  return (key[0] % 5030895599ull) % segment0->group_count;
Perhaps:
+  *cache = &segment0[(key[1] % APR_UINT64_C(2809637)) &
(segment0->segment_count - 1)];
+  return (key[0] % APR_UINT64_C(5030895599)) % segment0->group_count;
?
>  }
>
>  /* Reduce the hit count of ENTRY and update the accumulated hit info
> @@ -1007,6 +1010,8 @@ find_entry(svn_membuffer_t *cache,
>         */
>        if (group->used == GROUP_SIZE)
>          {
> +          static int count = 0;
> +
>            /* every entry gets the same chance of being removed.
>             * Otherwise, we free the first entry, fill it and
>             * remove it again on the next occasion without considering
> @@ -1026,6 +1031,7 @@ find_entry(svn_membuffer_t *cache,
>                let_entry_age(cache, entry);
>
>            drop_entry(cache, entry);
> +          printf("%d\n", ++count);
>          }
>
>        /* initialize entry for the new key
> @@ -2042,6 +2048,22 @@ membuffer_cache_set_partial(svn_membuffe
>   * svn_cache__t instance.
>   */
>
> +/* Stores the combined key value for the given key.  It will be used by
> + * combine_key() to short-circuit expensive hash calculations.
> + */
> +typedef struct last_access_key_t
> +{
> +  /* result of key combining */
> +  entry_key_t combined_key;
> +
> +  /* length of the key (or APR_HASH_KEY_STRING if not used) */
> +  apr_size_t key_len;
> +
> +  /* the original key.  Only KEY_LEN bytes are valid.  We use uint32 for
> +   * better compatibility with pseudo-md5 functions. */
> +  apr_uint32_t key[64];
> +} last_access_key_t;
> +
>  /* Internal cache structure (used in svn_cache__t.cache_internal) basically
>   * holding the additional parameters needed to call the respective membuffer
>   * functions.
> @@ -2089,6 +2111,11 @@ typedef struct svn_membuffer_cache_t
>     */
>    int alloc_counter;
>
> +  /* cache for the last key used.
> +   * Will be NULL for caches with short fix-sized keys.
> +   */
> +  last_access_key_t *last_access;
> +
>    /* if enabled, this will serialize the access to this instance.
>     */
>    svn_mutex__t *mutex;
> @@ -2106,46 +2133,127 @@ typedef struct svn_membuffer_cache_t
>   */
>  #define ALLOCATIONS_PER_POOL_CLEAR 10
>
> -
>  /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
>   * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
> + * This could replace combine_key() entirely but we actually use it only
> + * when the quick path failed.
>   */
>  static void
> -combine_key(svn_membuffer_cache_t *cache,
> -            const void *key,
> -            apr_ssize_t key_len)
> +combine_long_key(svn_membuffer_cache_t *cache,
> +                 const void *key,
> +                 apr_ssize_t key_len)
>  {
> +  assert(cache->last_access);
> +
> +  /* handle variable-length keys */
>    if (key_len == APR_HASH_KEY_STRING)
>      key_len = strlen((const char *) key);
>
> -  if (key_len < 16)
> +  /* same key as the last time? -> short-circuit */
> +  if (   key_len == cache->last_access->key_len
> +      && memcmp(key, cache->last_access->key, key_len) == 0)
>      {
> -      apr_uint32_t data[4] = { 0 };
> -      memcpy(data, key, key_len);
> +      memcpy(cache->combined_key, cache->last_access->combined_key,
> +             sizeof(cache->combined_key));
> +    }
> +  else if (key_len >= 64)
> +    {
> +      /* relatively long key.  Use the generic, slow hash code for it */
> +      apr_md5((unsigned char*)cache->combined_key, key, key_len);
> +      cache->combined_key[0] ^= cache->prefix[0];
> +      cache->combined_key[1] ^= cache->prefix[1];
>
> -      svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key, data);
> +      /* is the key short enough to cache the result? */
> +      if (key_len <= sizeof(cache->last_access->key))
> +        {
> +          memcpy(cache->last_access->combined_key, cache->combined_key,
> +                 sizeof(cache->combined_key));
> +          cache->last_access->key_len = key_len;
> +          memcpy(cache->last_access->key, key, key_len);
> +        }
>      }
> -  else if (key_len < 32)
> +  else
>      {
> -      apr_uint32_t data[8] = { 0 };
> -      memcpy(data, key, key_len);
> +      /* shorter keys use efficient hash code and *do* cache the results */
> +      cache->last_access->key_len = key_len;
> +      if (key_len < 16)
> +        {
> +          memset(cache->last_access->key, 0, 16);
> +          memcpy(cache->last_access->key, key, key_len);
> +
> +          svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key,
> +                             cache->last_access->key);
> +        }
> +      else if (key_len < 32)
> +        {
> +          memset(cache->last_access->key, 0, 32);
> +          memcpy(cache->last_access->key, key, key_len);
> +
> +          svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key,
> +                             cache->last_access->key);
> +        }
> +      else
> +        {
> +          memset(cache->last_access->key, 0, 64);
> +          memcpy(cache->last_access->key, key, key_len);
> +
> +          svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key,
> +                             cache->last_access->key);
> +        }
> +
> +      cache->combined_key[0] ^= cache->prefix[0];
> +      cache->combined_key[1] ^= cache->prefix[1];
> +
> +      memcpy(cache->last_access->combined_key, cache->combined_key,
> +             sizeof(cache->combined_key));
> +    }
> +}
> +
> +/* Basically calculate a hash value for KEY of length KEY_LEN, combine it
> + * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
> + */
> +static void
> +combine_key(svn_membuffer_cache_t *cache,
> +            const void *key,
> +            apr_ssize_t key_len)
> +{
> +  /* copy of *key, padded with 0 */
> +  apr_uint64_t data[2];
>
> -      svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key, data);
> +  /* short, fixed-size keys are the most common case */
> +  if (key_len == 16)
> +    {
> +      data[0] = ((const apr_uint64_t *)key)[0];
> +      data[1] = ((const apr_uint64_t *)key)[1];
>      }
> -  else if (key_len < 64)
> +  else if (key_len == 8)
>      {
> -      apr_uint32_t data[16] = { 0 };
> +      data[0] = ((const apr_uint64_t *)key)[0];
> +      data[1] = 0;
> +    }
> +  else if (key_len != APR_HASH_KEY_STRING && key_len < 16)
> +    {
> +      data[0] = 0;
> +      data[1] = 0;
>        memcpy(data, key, key_len);
> -
> -      svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key, data);
>      }
>    else
>      {
> -      apr_md5((unsigned char*)cache->combined_key, key, key_len);
> +      /* longer or variably sized keys */
> +      combine_long_key(cache, key, key_len);
> +      return;
>      }
>
> -  cache->combined_key[0] ^= cache->prefix[0];
> -  cache->combined_key[1] ^= cache->prefix[1];
> +  /* scramble key DATA.  All of this must be reversible to prevent key
> +   * collisions.  So, we limit ourselves to xor and permutations. */
> +  data[1] = (data[1] << 27) | (data[1] >> 37);
> +  data[1] ^= data[0] & 0xffff;
> +  data[0] ^= data[1] & 0xffffffffffff0000ull;
Likewise:
+  data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
?
> +  data[0] = (data[0] << 43) | (data[0] >> 21);
> +
> +  /* combine with this cache's namespace */
> +  cache->combined_key[0] = data[0] ^ cache->prefix[0];
> +  cache->combined_key[1] = data[1] ^ cache->prefix[1];
>  }
>
>  /* Implement svn_cache__vtable_t.get (not thread-safe)
> @@ -2560,6 +2668,18 @@ svn_cache__create_membuffer_cache(svn_ca
>                         pool));
>    memcpy(cache->prefix, checksum->digest, sizeof(cache->prefix));
>
> +  /* fix-length keys of 16 bytes or under don't need a buffer because we
> +   * can use a very fast key combining algorithm. */
> +  if ((klen == APR_HASH_KEY_STRING) ||  klen > sizeof(entry_key_t))
> +    {
> +      cache->last_access = apr_pcalloc(pool, sizeof(*cache->last_access));
> +      cache->last_access->key_len = APR_HASH_KEY_STRING;
> +    }
> +  else
> +    {
> +      cache->last_access = NULL;
> +    }
> +
>  #ifdef SVN_DEBUG_CACHE_MEMBUFFER
>
>    /* Initialize cache debugging support.
>
>
Received on 2013-05-14 00:51:51 CEST

This is an archived mail posted to the Subversion Dev mailing list.