On Sat, Jan 1, 2011 at 9:33 PM, <stefan2_at_apache.org> wrote:
> Author: stefan2
> Date: Sat Jan 1 20:33:22 2011
> New Revision: 1054286
>
> URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
> Log:
> XDelta calculation is major part of svn_txdelta_send_txstream.
> Therefore, speed up string matching code and the relatively
> expensive hash-code (adler32) calculations.
>
> * subversion/libsvn_delta/xdelta.c
> (init_adler32): init adler checksum with 0 instead of 1;
> use svn_adler32 for performance
> (adler32_out): "-1" can / must be ommitted now that we
> start at 0 instead of 1 for s1.
> (adler32_replace): special, faster implementation replacing
> a adler32_out(), adler32_in() sequence
>
> (match_length): new utility function
> (find_match): speed up the main loop by calling match_length()
> (compute_delta): optimize adler32 calculations
>
> Modified:
> subversion/trunk/subversion/libsvn_delta/xdelta.c
Hi Stefan,
This makes my (Windows 32-bit VCE 2008) build fail:
svn_delta-1.lib(xdelta.obj) : error LNK2019: unresolved external
symbol _svn__adler32 referenced in function _init_adler32
..\..\..\Release\subversion\libsvn_delta\libsvn_delta-1.dll : fatal
error LNK1120: 1 unresolved externals
Johan
> Modified: subversion/trunk/subversion/libsvn_delta/xdelta.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_delta/xdelta.c?rev=1054286&r1=1054285&r2=1054286&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_delta/xdelta.c (original)
> +++ subversion/trunk/subversion/libsvn_delta/xdelta.c Sat Jan 1 20:33:22 2011
> @@ -29,6 +29,8 @@
>
> #include "svn_delta.h"
> #include "delta.h"
> +
> +#include "private/svn_adler32.h"
>
> /* This is pseudo-adler32. It is adler32 without the prime modulus.
> The idea is borrowed from monotone, and is a translation of the C++
> @@ -39,6 +41,10 @@
> #define ADLER32_MASK 0x0000ffff
> #define ADLER32_CHAR_MASK 0x000000ff
>
> +/* Size of the blocks we compute checksums for. This was chosen out of
> + thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. */
> +#define MATCH_BLOCKSIZE 64
> +
> /* Structure to store the state of our adler32 checksum. */
> struct adler32
> {
> @@ -66,11 +72,32 @@ adler32_out(struct adler32 *ad, const ch
> {
> ad->s1 -= ((apr_uint32_t) (c)) & ADLER32_CHAR_MASK;
> ad->s1 &= ADLER32_MASK;
> - ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK)) + 1;
> + ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK));
> ad->s2 &= ADLER32_MASK;
> --ad->len;
> }
>
> +/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
> + * This function may (and will) only be called for
> + * ad->len == MATCH_BLOCKSIZE.
> + */
> +static APR_INLINE void
> +adler32_replace(struct adler32 *ad, const char c_out, const char c_in)
> +{
> + apr_uint32_t s1 = ad->s1;
> + apr_uint32_t s2 = ad->s2;
> +
> + s2 -= (MATCH_BLOCKSIZE * (((apr_uint32_t) c_out) & ADLER32_CHAR_MASK));
> +
> + s1 -= ((apr_uint32_t) (c_out)) & ADLER32_CHAR_MASK;
> + s1 += ((apr_uint32_t) (c_in)) & ADLER32_CHAR_MASK;
> +
> + s2 += s1;
> +
> + ad->s1 = s1 & ADLER32_MASK;
> + ad->s2 = s2 & ADLER32_MASK;
> +}
> +
> /* Return the current adler32 checksum in the adler32 structure. */
>
> static APR_INLINE apr_uint32_t
> @@ -85,18 +112,15 @@ adler32_sum(const struct adler32 *ad)
> static APR_INLINE struct adler32 *
> init_adler32(struct adler32 *ad, const char *data, apr_uint32_t datalen)
> {
> - ad->s1 = 1;
> - ad->s2 = 0;
> - ad->len = 0;
> - while (datalen--)
> - adler32_in(ad, *(data++));
> + apr_uint32_t adler32 = svn__adler32(0, data, datalen);
> +
> + ad->s1 = adler32 & ADLER32_MASK;
> + ad->s2 = (adler32 >> 16) & ADLER32_MASK;
> + ad->len = datalen;
> +
> return ad;
> }
>
> -/* Size of the blocks we compute checksums for. This was chosen out of
> - thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. */
> -#define MATCH_BLOCKSIZE 64
> -
> /* Information for a block of the delta source. The length of the
> block is the smaller of MATCH_BLOCKSIZE and the difference between
> the size of the source data and the position of this block. */
> @@ -201,6 +225,35 @@ init_blocks_table(const char *data,
> }
> }
>
> +/* Return the lowest position at which A and B differ. If no difference
> + * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
> + */
> +static apr_size_t
> +match_length(const char *a, const char *b, apr_size_t max_len)
> +{
> + apr_size_t pos = 0;
> +
> +#if SVN_UNALIGNED_ACCESS_IS_OK
> +
> + /* Chunky operation is so much faster ...
> + *
> + * We can't make this work on architectures that require aligned access
> + * because A and B will probably have different alignment. So, skipping
> + * the first few chars until alignment is reached is not an option.
> + */
> + for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
> + if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
> + break;
> +
> +#endif
> +
> + for (; pos < max_len; ++pos)
> + if (a[pos] != b[pos])
> + break;
> +
> + return pos;
> +}
> +
> /* Try to find a match for the target data B in BLOCKS, and then
> extend the match as long as data in A and B at the match position
> continues to match. We set the position in a we ended up in (in
> @@ -229,6 +282,8 @@ find_match(const struct blocks *blocks,
> apr_uint32_t sum = adler32_sum(rolling);
> apr_size_t alen, badvance, apos;
> apr_size_t tpos, tlen;
> + apr_size_t delta, max_delta;
> + const char *aptr, *bptr;
>
> tpos = find_block(blocks, sum);
>
> @@ -246,14 +301,15 @@ find_match(const struct blocks *blocks,
> apos = tpos;
> alen = tlen;
> badvance = tlen;
> +
> /* Extend the match forward as far as possible */
> - while ((apos + alen < asize)
> - && (bpos + badvance < bsize)
> - && (a[apos + alen] == b[bpos + badvance]))
> - {
> - ++alen;
> - ++badvance;
> - }
> + max_delta = asize - apos - alen < bsize - bpos - badvance
> + ? asize - apos - alen
> + : bsize - bpos - badvance;
> + delta = match_length(a + apos + alen, b + bpos + badvance, max_delta);
> +
> + alen += delta;
> + badvance += delta;
>
> /* See if we can extend backwards into a previous insert hunk. */
> while (apos > 0
> @@ -329,7 +385,6 @@ compute_delta(svn_txdelta__ops_baton_t *
> apr_size_t apos = 0;
> apr_size_t alen = 1;
> apr_size_t badvance = 1;
> - apr_size_t next;
> svn_boolean_t match;
>
> match = find_match(&blocks, &rolling, a, asize, b, bsize, lo, &apos,
> @@ -354,14 +409,49 @@ compute_delta(svn_txdelta__ops_baton_t *
> svn_txdelta__insert_op(build_baton, svn_txdelta_source,
> apos, alen, NULL, pool);
> }
> - next = lo;
> - for (; next < lo + badvance; ++next)
> +
> + if (badvance == 1)
> + {
> + /* This seems to be the _vast_ majority case -- even if
> + * you sum BADVANCE up, this case still accounts for 2/3
> + * of all bytes being processed.
> + */
> + if (lo + MATCH_BLOCKSIZE < bsize)
> + adler32_replace(&rolling, b[lo], b[lo + MATCH_BLOCKSIZE]);
> + else
> + adler32_out(&rolling, b[lo]);
> +
> + lo++;
> + }
> + else if (badvance >= MATCH_BLOCKSIZE)
> {
> - adler32_out(&rolling, b[next]);
> - if (next + MATCH_BLOCKSIZE < bsize)
> - adler32_in(&rolling, b[next + MATCH_BLOCKSIZE]);
> + /* BADVANCE is often large enough that we can calculate the
> + * Adler32 sum directly instead of expensively updating the
> + * existing values.
> + */
> + apr_size_t remaining_block = lo + MATCH_BLOCKSIZE < bsize
> + ? MATCH_BLOCKSIZE
> + : bsize - (lo + MATCH_BLOCKSIZE);
> + init_adler32(&rolling,
> + b + lo + badvance - remaining_block,
> + remaining_block);
> + lo += badvance;
> + }
> + else
> + {
> + /* The very rare 3rd case
> + * (can only possibly happen close to end of the file).
> + */
> + apr_size_t next = lo;
> +
> + for (; next < lo + badvance; ++next)
> + if (next + MATCH_BLOCKSIZE < bsize)
> + adler32_replace(&rolling, b[next], b[next + MATCH_BLOCKSIZE]);
> + else
> + adler32_out(&rolling, b[next]);
> +
> + lo = next;
> }
> - lo = next;
> }
>
> /* If we still have an insert pending at the end, throw it in. */
>
>
>
--
Johan
Received on 2011-01-02 09:21:37 CET