Index: subversion/libsvn_diff/token.c =================================================================== --- subversion/libsvn_diff/token.c (revision 12955) +++ subversion/libsvn_diff/token.c (working copy) @@ -32,7 +32,7 @@ * Prime number to use as the size of the hash table. This number was * not selected by testing of any kind and may need tweaking. */ -#define SVN_DIFF__HASH_SIZE 127 +#define SVN_DIFF__HASH_SIZE 4093 struct svn_diff__node_t { Index: subversion/libsvn_client/blame.c =================================================================== --- subversion/libsvn_client/blame.c (revision 12955) +++ subversion/libsvn_client/blame.c (working copy) @@ -399,6 +399,7 @@ const char *temp_dir; struct delta_baton *delta_baton; + fprintf (stderr, "Handling revision %d\n", revnum); /* Clear the current pool. */ svn_pool_clear (frb->currpool); Index: subversion/libsvn_delta/vdelta.c =================================================================== --- subversion/libsvn_delta/vdelta.c (revision 12955) +++ subversion/libsvn_delta/vdelta.c (working copy) @@ -20,10 +20,9 @@ #include #include /* for APR_INLINE */ - +#include #include "svn_delta.h" #include "delta.h" - /* ==================================================================== */ /* Hash table for vdelta hashing. @@ -302,7 +301,193 @@ } } +/* This is pseudo-adler32, and xdelta, as monotone does it. The + adler32 has no prime modulus. This is a direct translation of the + C++ code, hence the reason it looks a bit weird. */ +#define adler32_mask 0x0000ffff +#define adler32_char_mask 0x000000ff +struct adler32 +{ + apr_uint32_t s1; + apr_uint32_t s2; + apr_uint32_t len; + apr_uint32_t mask; +}; +static inline void +adler32_in (struct adler32 *ad, const char c) +{ + ad->s1 += ((apr_uint32_t) (c)) & adler32_char_mask; + ad->s1 &= adler32_mask; + ad->s2 += ad->s1; + ad->s2 &= adler32_mask; + ad->len++; +} +static inline void +adler32_out (struct adler32 *ad, const char c) +{ + ad->s1 -= ((apr_uint32_t) (c)) & adler32_char_mask; + ad->s1 &= adler32_mask; + ad->s2 -= (ad->len * (((apr_uint32_t) c) & adler32_char_mask)) + 1; + ad->s2 &= adler32_mask; + --ad->len; +} +static inline apr_uint32_t +adler32_sum (const struct adler32 *ad) +{ + return (ad->s2 << 16) | (ad->s1); +} +static inline struct adler32 * +init_adler32 (struct adler32 *ad, const char *data, apr_uint32_t datalen) +{ + ad->s1 = 1; + ad->s2 = 0; + ad->mask = adler32_mask; + ad->len = 0; + while (datalen--) + adler32_in (ad, *(data++)); + return ad; +} +struct match +{ + apr_uint32_t pos; + apr_uint32_t len; +}; +static void +init_matches_table (const char *data, + apr_uint32_t datalen, + apr_uint32_t blocksize, + apr_hash_t *matches, + apr_pool_t *pool) +{ + apr_uint32_t i = 0; + struct adler32 adler; + + for (i = 0; i < datalen; i += blocksize) + { + apr_uint32_t step = ((i + blocksize) >= datalen) ? (datalen - i) : blocksize; + apr_uint32_t adlersum = adler32_sum (init_adler32 (&adler, data + i, step)); + if (apr_hash_get (matches, &adlersum, sizeof (adlersum)) == NULL) + { + struct match *newmatch = apr_palloc (pool, sizeof (struct match)); + apr_uint32_t *key = apr_palloc (pool, sizeof (apr_uint32_t)); + newmatch->pos = i; + newmatch->len = step; + *key = adlersum; + apr_hash_set (matches, key, sizeof (*key), + newmatch); + } + } +} +static void +find_match (apr_hash_t *matches, + const struct adler32 *rolling, + const char *a, + apr_uint32_t asize, + const char *b, + apr_uint32_t bsize, + apr_uint32_t bpos, + apr_uint32_t *aposp, + apr_uint32_t *alenp, + apr_uint32_t *badvancep) +{ + apr_uint32_t sum = adler32_sum (rolling); + apr_uint32_t alen, badvance, apos; + struct match *match; + apr_uint32_t tpos, tlen; + match = apr_hash_get (matches, &sum, sizeof (sum)); + if (match == NULL) + return; + tpos = match->pos; + tlen = match->len; + + /* Make sure it's not a false match */ + if (memcmp (a + tpos, b + bpos, tlen) != 0) + return; + + apos = tpos; + alen = tlen; + badvance = tlen; + + /* Extend the match forward */ + while ((apos + alen >= 0) + && (bpos + badvance >= 0) + && (apos + alen < asize) + && (bpos + badvance < bsize) + && (a[apos + alen] == b[bpos + badvance])) + { + ++alen; + ++badvance; + } + // assert (memcmp (a + apos, b + bpos, alen) == 0); + *aposp = apos; + *alenp = alen; + *badvancep = badvance; +} + +#define MATCH_BLOCKSIZE 64 +static void +compute_delta (svn_txdelta__ops_baton_t *build_baton, + const char *a, + apr_uint32_t asize, + const char *b, + apr_uint32_t bsize, + apr_pool_t *pool) +{ + apr_hash_t *matches = apr_hash_make(pool); + struct adler32 rolling; + apr_uint32_t sz, lo, hi; + + init_matches_table (a, asize, MATCH_BLOCKSIZE, matches, pool); + if (asize == 0) + { + svn_txdelta__insert_op (build_baton, svn_txdelta_new, + 0, bsize, b, pool); + return; + } + init_adler32 (&rolling, b, MATCH_BLOCKSIZE); + for (sz = bsize, lo = 0, hi = MATCH_BLOCKSIZE; lo < sz;) + { + + apr_uint32_t apos = 0; + apr_uint32_t alen = 1; + apr_uint32_t badvance = 1; + apr_uint32_t next; + + find_match (matches, &rolling, a, asize, b, bsize, lo, &apos, &alen, &badvance); + if (alen < MATCH_BLOCKSIZE && + (apos + alen < asize)) + { + // assert (alen == 1); + // assert (lo >= 0); + // assert (lo < bsize); + svn_txdelta__insert_op (build_baton, svn_txdelta_new, + 0, 1, b + lo, pool); + } + else + { + svn_txdelta__insert_op (build_baton, svn_txdelta_source, + apos, alen, NULL, pool); + } + next = lo; + for (; next < lo + badvance; ++next) + { + + // assert (next >= 0); + // assert (next < bsize); + adler32_out (&rolling, b[next]); + if (next + MATCH_BLOCKSIZE < bsize) + adler32_in (&rolling, b[next + MATCH_BLOCKSIZE]); + } + lo = next; + hi = lo + MATCH_BLOCKSIZE; + } +} + +#undef NOOP +#undef VDELTA +#define XDELTA + void svn_txdelta__vdelta (svn_txdelta__ops_baton_t *build_baton, const char *data, @@ -310,12 +495,22 @@ apr_size_t target_len, apr_pool_t *pool) { +#if defined(XDELTA) + compute_delta (build_baton, + data, source_len, + data + source_len, target_len, + pool); + +#elif defined(NOOP) + + svn_txdelta__insert_op (build_baton, svn_txdelta_new, + 0, target_len, data + source_len, pool); +#elif defined(VDELTA) hash_table_t *table = create_hash_table (source_len + target_len, pool); - vdelta (build_baton, data, data, data + source_len, FALSE, table, pool); vdelta (build_baton, data, data + source_len, data + source_len + target_len, - TRUE, table, pool); - + TRUE, table, pool); +#endif #if 0 /* This bit of code calculates the hash load and the number of collisions. Please note that a the number