[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

[PATCH] Further improve xdelta

From: Peter N. Lundblad <peter_at_famlundblad.se>
Date: 2006-03-23 23:14:46 CET

Hi,

Continuing my little love affair, I found a pretty simple way to
significantly improve the speed of the xdelta algorithm.

This patch introduces a custom hash table instead ofusing apr_hash_t.
I choose open addressing to improve locality. I credit much of the
speedup to the fact that GCC (4.0) optimizes this algorithm quite
aggressively. In my tests with a few large binary files, the speed
increases by about 18%. I think the speedup should be lower when the
compared data streams are more similar, but I haven't tested that.

Comments?

(NOte, I'm going to leave xdelta itself after this commit, since I
don't see a point in tweaking it forever.)

Thanks,
//Peter

[[[
Optimize the xdelta algorithm by introducing a custom hash table instead
of using the general-purpose apr_hash_t.

* subversion/libsvn_delta/xdelta.c
  (MATCH_BLOCKSIZE): Move earlier in the file.
  (struct block): Rename from match; store the adler sum instead of the
  length. Use apr_size_t for the position.
  (struct blocks): New struct.
  (add_block, find_block): New functions.
  (init_blocks_table): Rename from init_matches_table. Init the custom
  hash table.
  (find_match): Use custom hash table instead of apr_hash_t.
  (compute_delta): Use custom hash table. Move the check for a small
  target before initializing the table.
]]]

Index: subversion/libsvn_delta/xdelta.c
===================================================================
--- subversion/libsvn_delta/xdelta.c (revision 19001)
+++ subversion/libsvn_delta/xdelta.c (arbetskopia)
@@ -90,48 +90,105 @@
   return ad;
 }
 
-/* Match structure used in the xdelta matches hashtable. This contains the
- position and length of the initial match. */
+/* Size of the blocks we compute checksums for. This was chosen out of
+ thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. */
+#define MATCH_BLOCKSIZE 64
 
-struct match
+/* Information for a block of the delta source. The length of the
+ block is the smaller of MATCH_BLOCKSIZE and the difference between
+ the size of the source data and the position of this block. */
+struct block
 {
- apr_uint32_t pos;
- apr_uint32_t len;
+ apr_uint32_t adlersum;
+ apr_size_t pos;
 };
 
-/* Initialize the matches table from DATA. This goes through every
- block of BLOCKSIZE bytes in the source and checksums it, inserting the
- result into the MATCHES table. */
+/* A hash table, using open addressing, of the blocks of the source. */
+struct blocks
+{
+ /* The largest valid index of slots. */
+ apr_size_t max;
+ /* The vector of blocks. A pos value of (apr_size_t)-1 represents an unused
+ slot. */
+ struct block *slots;
+};
 
+
+/* Insert a block with the checksum ADLERSUM at position POS in the source data
+ into the table BLOCKS. Duplicates are ignored. */
 static void
-init_matches_table(const char *data,
+add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_size_t pos)
+{
+ apr_size_t h = adlersum & blocks->max;
+
+ /* This will terminate, since we know that we will not fill the table. */
+ while (blocks->slots[h].pos != (apr_size_t)-1)
+ {
+ /* No duplicates! */
+ if (blocks->slots[h].adlersum == adlersum)
+ return;
+ h = (h + 1) & blocks->max;
+ }
+ blocks->slots[h].adlersum = adlersum;
+ blocks->slots[h].pos = pos;
+}
+
+/* Find a block in BLOCKS with the checksum ADLERSUM, returning its position
+ in the source data. If there is no such block, return (apr_size_t)-1. */
+static apr_size_t
+find_block(const struct blocks *blocks, apr_uint32_t adlersum)
+{
+ apr_size_t h = adlersum & blocks->max;
+
+ while (blocks->slots[h].adlersum != adlersum
+ && blocks->slots[h].pos != (apr_size_t)-1)
+ h = (h + 1) & blocks->max;
+
+ return blocks->slots[h].pos;
+}
+
+/* Initialize the matches table from DATA of size DATALEN. This goes
+ through every block of MATCH_BLOCKSIZE bytes in the source and
+ checksums it, inserting the result into the BLOCKS table. */
+static void
+init_blocks_table(const char *data,
                    apr_size_t datalen,
- apr_size_t blocksize,
- apr_hash_t *matches,
+ struct blocks *blocks,
                    apr_pool_t *pool)
 {
- apr_size_t i = 0;
+ apr_size_t i;
   struct adler32 adler;
- for (i = 0; i < datalen; i += blocksize)
+ apr_size_t nblocks;
+ apr_size_t nslots = 1;
+
+ /* Be pesimistic about the block count. */
+ nblocks = datalen / MATCH_BLOCKSIZE + 1;
+ /* Find nearest larger power of two. */
+ while (nslots <= nblocks)
+ nslots *= 2;
+ /* Double the number of slots to avoid a too high load. */
+ nslots *= 2;
+ blocks->max = nslots - 1;
+ blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots)));
+ for (i = 0; i < nslots; ++i)
     {
+ /* Avoid using an indeterminate value in the lookup. */
+ blocks->slots[i].adlersum = 0;
+ blocks->slots[i].pos = (apr_size_t)-1;
+ }
+
+ for (i = 0; i < datalen; i += MATCH_BLOCKSIZE)
+ {
       /* If this is the last piece, it may be blocksize large */
       apr_uint32_t step =
- ((i + blocksize) >= datalen) ? (datalen - i) : blocksize;
+ ((i + MATCH_BLOCKSIZE) >= datalen) ? (datalen - i) : MATCH_BLOCKSIZE;
       apr_uint32_t adlersum =
         adler32_sum(init_adler32(&adler, data + i, step));
- if (apr_hash_get(matches, &adlersum, sizeof(adlersum)) == NULL)
- {
- struct match *newmatch = apr_palloc(pool, sizeof *newmatch);
- apr_uint32_t *key = apr_palloc(pool, sizeof *key);
- newmatch->pos = i;
- newmatch->len = step;
- *key = adlersum;
- apr_hash_set(matches, key, sizeof(*key), newmatch);
- }
+ add_block(blocks, adlersum, i);
     }
 }
 
-/* Try to find a match for the target data B in MATCHES, and then
+/* Try to find a match for the target data B in BLOCKS, and then
    extend the match as long as data in A and B at the match position
    continues to match. We set the position in a we ended up in (in
    case we extended it backwards) in APOSP, the length of the match in
@@ -144,7 +201,7 @@
    lookup found a match, regardless of length. Return FALSE
    otherwise. */
 static svn_boolean_t
-find_match(apr_hash_t *matches,
+find_match(const struct blocks *blocks,
            const struct adler32 *rolling,
            const char *a,
            apr_size_t asize,
@@ -158,17 +215,16 @@
 {
   apr_uint32_t sum = adler32_sum(rolling);
   apr_size_t alen, badvance, apos;
- struct match *match;
   apr_size_t tpos, tlen;
 
+ tpos = find_block(blocks, sum);
+
   /* See if we have a match. */
- match = apr_hash_get(matches, &sum, sizeof(sum));
- if (match == NULL)
+ if (tpos == (apr_size_t)-1)
     return FALSE;
 
- /* See where our match started. */
- tpos = match->pos;
- tlen = match->len;
+ tlen = ((tpos + MATCH_BLOCKSIZE) >= asize)
+ ? (asize - tpos) : MATCH_BLOCKSIZE;
 
   /* Make sure it's not a false match. */
   if (memcmp(a + tpos, b + bpos, tlen) != 0)
@@ -204,11 +260,7 @@
   return TRUE;
 }
 
-/* Size of the blocks we compute checksums for. This was chosen out of
- thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. */
-#define MATCH_BLOCKSIZE 64
 
-
 /* Compute a delta from A to B using xdelta.
 
    The basic xdelta algorithm is as follows:
@@ -241,13 +293,10 @@
               apr_uint32_t bsize,
               apr_pool_t *pool)
 {
- apr_hash_t *matches = apr_hash_make(pool);
+ struct blocks blocks;
   struct adler32 rolling;
   apr_size_t sz, lo, hi, pending_insert_start = 0, pending_insert_len = 0;
 
- /* Initialize the matches table. */
- init_matches_table(a, asize, MATCH_BLOCKSIZE, matches, pool);
-
   /* If the size of the target is smaller than the match blocksize, just
      insert the entire target. */
   if (bsize < MATCH_BLOCKSIZE)
@@ -257,6 +306,9 @@
       return;
     }
 
+ /* Initialize the matches table. */
+ init_blocks_table(a, asize, &blocks, pool);
+
   /* Initialize our rolling checksum. */
   init_adler32(&rolling, b, MATCH_BLOCKSIZE);
   for (sz = bsize, lo = 0, hi = MATCH_BLOCKSIZE; lo < sz;)
@@ -267,7 +319,7 @@
       apr_size_t next;
       svn_boolean_t match;
 
- match = find_match(matches, &rolling, a, asize, b, bsize, lo, &apos,
+ match = find_match(&blocks, &rolling, a, asize, b, bsize, lo, &apos,
                          &alen, &badvance, &pending_insert_len);
 
       /* If we didn't find a real match, insert the byte at the target

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Thu Mar 23 23:15:16 2006

This is an archived mail posted to the Subversion Dev mailing list.