[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: svn commit: r1333326 - in /subversion/trunk/subversion: include/private/svn_hash_private.h libsvn_fs_fs/temp_serializer.c libsvn_subr/hash.c

From: Hyrum K Wright <hyrum.wright_at_wandisco.com>
Date: Thu, 3 May 2012 14:53:27 -0500

On Thu, May 3, 2012 at 2:16 AM, <stefan2_at_apache.org> wrote:
> Author: stefan2
> Date: Thu May  3 07:16:11 2012
> New Revision: 1333326
>
> URL: http://svn.apache.org/viewvc?rev=1333326&view=rev
> Log:
> Introduce private API functions that wrap apr_hash_make_custom
> and return hash tables that are 2 to 4 times faster than the APR default.
> Both yield repeatable results (each instance will store items in the same
> order if the keys are the same). The first, svn_hask__make will return
> a hash table that behaves like pre APR 1.4.6 default hashes.
>
> * subversion/include/private/svn_hash_private.h
>  (svn_hash__make, svn_hash__make_fast): new private API
> * subversion/libsvn_subr/hash.c
>  (svn_hash_from_cstring_keys): use new API
>  (hashfunc_compatible, LOWER_7BITS_SET, BIT_7_SET, READ_CHUNK,
>   hashfunc_fast): implement efficient hash functions
>  (svn_hash__make, svn_hash__make_fast): implement new private API
> * subversion/libsvn_fs_fs/temp_serializer.c
>  (deserialize_dir, svn_fs_fs__deserialize_properties): use new API
>
> Modified:
>    subversion/trunk/subversion/include/private/svn_hash_private.h
>    subversion/trunk/subversion/libsvn_fs_fs/temp_serializer.c
>    subversion/trunk/subversion/libsvn_subr/hash.c
>
> Modified: subversion/trunk/subversion/include/private/svn_hash_private.h
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_hash_private.h?rev=1333326&r1=1333325&r2=1333326&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/include/private/svn_hash_private.h (original)
> +++ subversion/trunk/subversion/include/private/svn_hash_private.h Thu May  3 07:16:11 2012
> @@ -102,6 +102,31 @@ svn_hash__get_bool(apr_hash_t *hash,
>
>  /** @} */
>
> +/**
> + * @defgroup svn_hash_create Create optimized APR hash tables
> + * @{
> + */
> +
> +/** Returns a hash table, allocated in @a pool, with the same ordering of
> + * elements as APR 1.4.5 or earlier (using apr_hashfunc_default) but uses
> + * a faster hash function implementation.
> + *
> + * @since New in 1.8.
> + */
> +apr_hash_t *
> +svn_hash__make(apr_pool_t *pool);
> +
> +/** Returns a hash table, allocated in @a pool, that is faster to modify
> + * and access then the ones returned by @ref svn_hash__make. The element
> + * order does not match any APR default.
> + *
> + * @since New in 1.8.
> + */
> +apr_hash_t *
> +svn_hash__make_fast(apr_pool_t *pool);
> +
> +/** @} */
> +
>  /** @} */
>
>  #ifdef __cplusplus
>
> Modified: subversion/trunk/subversion/libsvn_fs_fs/temp_serializer.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/temp_serializer.c?rev=1333326&r1=1333325&r2=1333326&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_fs_fs/temp_serializer.c (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/temp_serializer.c Thu May  3 07:16:11 2012
> @@ -30,6 +30,7 @@
>
>  #include "private/svn_fs_util.h"
>  #include "private/svn_temp_serializer.h"
> +#include "private/svn_hash_private.h"
>
>  #include "temp_serializer.h"
>
> @@ -359,7 +360,7 @@ serialize_dir(apr_hash_t *entries, apr_p
>  static apr_hash_t *
>  deserialize_dir(void *buffer, hash_data_t *hash_data, apr_pool_t *pool)
>  {
> -  apr_hash_t *result = apr_hash_make(pool);
> +  apr_hash_t *result = svn_hash__make(pool);
>   apr_size_t i;
>   apr_size_t count;
>   svn_fs_dirent_t *entry;
> @@ -678,7 +679,7 @@ svn_fs_fs__deserialize_properties(void *
>                                   apr_size_t data_len,
>                                   apr_pool_t *pool)
>  {
> -  apr_hash_t *hash = apr_hash_make(pool);
> +  apr_hash_t *hash = svn_hash__make(pool);
>   properties_data_t *properties = (properties_data_t *)data;
>   size_t i;
>
>
> Modified: subversion/trunk/subversion/libsvn_subr/hash.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/hash.c?rev=1333326&r1=1333325&r2=1333326&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_subr/hash.c (original)
> +++ subversion/trunk/subversion/libsvn_subr/hash.c Thu May  3 07:16:11 2012
> @@ -40,6 +40,7 @@
>  #include "svn_pools.h"
>
>  #include "private/svn_dep_compat.h"
> +#include "private/svn_string_private.h"
>  #include "private/svn_hash_private.h"
>
>  #include "svn_private_config.h"
> @@ -496,7 +497,7 @@ svn_hash_from_cstring_keys(apr_hash_t **
>                            apr_pool_t *pool)
>  {
>   int i;
> -  apr_hash_t *hash = apr_hash_make(pool);
> +  apr_hash_t *hash = svn_hash__make(pool);
>   for (i = 0; i < keys->nelts; i++)
>     {
>       const char *key =
> @@ -561,3 +562,127 @@ svn_hash__get_bool(apr_hash_t *hash, con
>   return default_value;
>  }
>
> +
> +
> +/*** Optimized hash functions ***/
> +
> +/* Optimized version of apr_hashfunc_default. It assumes that the CPU has
> + * 32-bit multiplications with high throughput of at least 1 operation
> + * every 3 cycles. Latency is not an issue. Another optimization is a
> + * mildly unrolled main loop.
> + */
> +static unsigned int
> +hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
> +{
> +    unsigned int hash = 0;
> +    const unsigned char *key = (const unsigned char *)char_key;
> +    const unsigned char *p;
> +    apr_ssize_t i;
> +
> +    if (*klen == APR_HASH_KEY_STRING)
> +      {
> +        for (p = key; ; p+=4)
> +          {
> +            unsigned int new_hash = hash * 33 * 33 * 33 * 33;
> +            if (!p[0]) break;
> +            new_hash += p[0] * 33 * 33 * 33;
> +            if (!p[1]) break;
> +            new_hash += p[1] * 33 * 33;
> +            if (!p[2]) break;
> +            new_hash += p[2] * 33;
> +            if (!p[3]) break;
> +            hash = new_hash + p[3];
> +          }
> +        for (; *p; p++)
> +            hash = hash * 33 + *p;
> +
> +        *klen = p - key;
> +      }
> +    else
> +      {
> +        for (p = key, i = *klen; i >= 4; i-=4, p+=4)
> +          {
> +            hash = hash * 33 * 33 * 33 * 33
> +                 + p[0] * 33 * 33 * 33
> +                 + p[1] * 33 * 33
> +                 + p[2] * 33
> +                 + p[3];
> +          }
> +        for (; i; i--, p++)
> +            hash = hash * 33 + *p;
> +      }
> +
> +    return hash;
> +}
> +
> +#define LOWER_7BITS_SET 0x7f7f7f7f
> +#define BIT_7_SET       0x80808080
> +
> +#if SVN_UNALIGNED_ACCESS_IS_OK
> +#  define READ_CHUNK(p)\
> +     *(const apr_uint32_t *)(p);
> +#else
> +#  define READ_CHUNK(p) \
> +     (   (apr_uint32_t)p[0]        \
> +      + ((apr_uint32_t)p[1] << 8)  \
> +      + ((apr_uint32_t)p[2] << 16) \
> +      + ((apr_uint32_t)p[3] << 24))
> +#endif
> +
> +/* Similar to the previous but operates on 4 bytes at once instead of the
> + * classic unroll. This is particularly fast when unaligned access is
> + * supported.
> + */
> +static unsigned int
> +hashfunc_fast(const char *char_key, apr_ssize_t *klen)
> +{
> +    unsigned int hash = 0;
> +    const unsigned char *key = (const unsigned char *)char_key;
> +    const unsigned char *p;
> +    apr_ssize_t i;
> +    apr_uint32_t chunk, test;
> +
> +    if (*klen == APR_HASH_KEY_STRING)
> +      {
> +        for (p = key; ; p += sizeof(chunk))
> +          {
> +            /* This is a variant of the well-known strlen test: */
> +            chunk = READ_CHUNK(p);
> +            test = chunk | ((chunk & LOWER_7BITS_SET) + LOWER_7BITS_SET);
> +            if ((test & BIT_7_SET) != BIT_7_SET)
> +              break;
> +
> +            hash = (hash + chunk) * 0xd1f3da69;
> +          }
> +        for (; i; i--, p++)

i is being used uninitialized here, giving potential bogus results.

-Hyrum

> +            hash = hash * 33 + *p;
> +
> +        *klen = p - key;
> +      }
> +    else
> +      {
> +        for ( p = key, i = *klen
> +            ; i >= sizeof(chunk)
> +            ; i -= sizeof(chunk), p += sizeof(chunk))
> +          {
> +            chunk = READ_CHUNK(p);
> +            hash = (hash + chunk) * 0xd1f3da69;
> +          }
> +        for (; i; i--, p++)
> +            hash = hash * 33 + *p;
> +      }
> +
> +    return hash;
> +}
> +
> +apr_hash_t *
> +svn_hash__make(apr_pool_t *pool)
> +{
> +  return apr_hash_make_custom(pool, hashfunc_compatible);
> +}
> +
> +apr_hash_t *
> +svn_hash__make_fast(apr_pool_t *pool)
> +{
> +  return apr_hash_make_custom(pool, hashfunc_fast);
> +}
>
>

-- 
uberSVN: Apache Subversion Made Easy
http://www.uberSVN.com/
Received on 2012-05-03 21:54:00 CEST

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.