Re: [PATCH v4] speed up svn_txdelta_apply_instructions

From: Stefan Fuhrmann <stefanfuhrmann_at_alice-dsl.de>
Date: Wed, 26 May 2010 23:42:51 +0200

Julian Foad wrote:
> Hi Stefan.
>
> Just a quick note to say thanks for this, but there seems to be a
> problem with this version when compiling:
>
> text_delta.c:709: warning: comparison between pointer and integer
>
> and when running the test suite in a trunk head build:
>
> text_delta.c:693: svn_txdelta_apply_instructions: Assertion `left > 0'
> failed.
>
> - Julian
>
Oops. I had forgot to run a debug build as well.
Both assertions have been fixed now.

-- Stefan^2.

Index: subversion/libsvn_delta/text_delta.c
===================================================================
--- subversion/libsvn_delta/text_delta.c (revision 944296)
+++ subversion/libsvn_delta/text_delta.c (working copy)
@@ -564,61 +564,176 @@
   return SVN_NO_ERROR;
}

+/* memcpy() is hard to tune for a wide range of buffer lengths.
+ * Therefore, it is often tuned for high throughput on large
+ * buffers and relatively low latency for mid-sized buffers
+ * (tens of bytes). However, the overhead for very small buffers
+ * (<10 bytes) is still high. Even passing the parameters, for
+ * instance, may take as long as copying 3 bytes.
+ *
+ * Because short copy sequences seem to be a common case in
+ * non-packed FSFS repositories, we copy them directly. Larger
+ * buffer sizes aren't hurt measurably by the exta 'if' clause.
+ *
+ * As a further optimization, we return a pointer to the first
+ * byte after the copied target range.
+ */
+static APR_INLINE char*
+fast_memcpy(char *target, const char *source, apr_size_t len)
+{
+ if (len > 7)
+ {
+ memcpy(target, source, len);
+ target += len;
+ }
+ else
+ {
+ /* memcpy is not exactly fast for small block sizes.
+ * Since they are common, let's run optimized code for them.
+ */
+ const char *end = source + len;
+ for (; source != end; source++)
+ *(target++) = *source;
+ }

+ return target;
+}
+
+/* Unlike memmove() or memcpy(), create repeating patterns if
+ * source and target range overlap. Returns a pointer to the first
+ * byte after the copied target range.
+ */
+static APR_INLINE char*
+patterning_copy(char *target, const char *source, apr_size_t len)
+{
+ const char *end = source + len;
+
+ /* On the majority of machines (x86 / x64), unaligned access is
+ * much cheaper than repeated aligned access. Therefore, use
+ * chunky copies on these machines when feasible.
+ * For those machines, GCC, ICC and MSC will define one of the following:
+ */
+#if defined(_M_IX86) || defined(_M_X64) || defined(i386) || defined(__x86_64)
+
+ if (end + sizeof(apr_uint32_t) <= target)
+ {
+ /* Source and target are at least 4 bytes apart, so we
+ * can copy in 4-byte chunks.
+ */
+ for (; source + sizeof (apr_uint32_t) <= end;
+ source += sizeof (apr_uint32_t),
+ target += sizeof (apr_uint32_t))
+ *(apr_uint32_t*)(target) = *(apr_uint32_t*)(source);
+ }
+
+#endif
+
+ /* fall-through to byte-wise copy (either for the below-chunk-size
+ * tail or the whole copy)
+ */
+ for (; source != end; source++)
+ *(target++) = *source;
+
+ return target;
+}
+
+/* Fill the tbuf target buffer by applying the copy instructions
+ * in window to the source buffer sbuf. The target buffer length
+ * *tlen should match window->tview_len or be larger (assertion
+ * being triggered otherwise). It will receive the number of bytes
+ * actually copied to the target buffer.
+ *
+ * All copy source references - either to source, window new data
+ * or target buffer must be valid, i.e. must not exceed the
+ * respective buffer bounds. count_and_verify_instructions()
+ * should take care of this.
+ *
+ * In debug mode, any type of data corruption in window that leads
+ * to out-of-bound buffer access should be detected. In release
+ * mode, only the instruction list itself and the target buffer
+ * are guarded. */
void
svn_txdelta_apply_instructions(svn_txdelta_window_t *window,
                                const char *sbuf, char *tbuf,
                                apr_size_t *tlen)
{
- const svn_txdelta_op_t *op;
- apr_size_t i, j, tpos = 0;
+ /* The main loop is run quite often.
+ * Pre-calculate as much data as possible.
+ */
+ const svn_txdelta_op_t *op = window->ops;
+ const svn_txdelta_op_t *last_op = window->ops + window->num_ops;

- for (op = window->ops; op < window->ops + window->num_ops; op++)
+ /* target buffer range is limited by target buffer length
+ * as well as the window length.
+ */
+ apr_size_t left = *tlen < window->tview_len ? *tlen : window->tview_len;
+
+ /* where to copy to */
+ char *target = tbuf;
+
+ /* Process the window copy ops until we filled the target buffer. */
+ for (; op < last_op; ++op)
     {
- const apr_size_t buf_len = (op->length < *tlen - tpos
- ? op->length : *tlen - tpos);
+ /* under no circumstance will we write beyond the end
+ * of the target buffer. If there are "too many" operations,
+ * the last ones will be zero-sized copies. Our special
+ * copy routines are guaranteed not to segfault in that case.
+ */
+ const apr_size_t buf_len = op->length < left ? op->length : left;
+ left -= buf_len;

       /* Check some invariants common to all instructions. */
- assert(tpos + op->length <= window->tview_len);
+ assert(target - tbuf + op->length <= window->tview_len);

+ /* If that assertion is violated, the target buffer is much
+ * too small. Your window instructions are probably corrupt.
+ * This is the earliest spot where be can detect this issue
+ * but we may carry on executing the list anyways.
+ */
+ assert(buf_len > 0);
+
       switch (op->action_code)
         {
         case svn_txdelta_source:
           /* Copy from source area. */
           assert(op->offset + op->length <= window->sview_len);
- memcpy(tbuf + tpos, sbuf + op->offset, buf_len);
+ target = fast_memcpy(target, sbuf + op->offset, buf_len);
           break;

         case svn_txdelta_target:
- /* Copy from target area. Don't use memcpy() since its
- semantics aren't guaranteed for overlapping memory areas,
- and target copies are allowed to overlap to generate
- repeated data. */
- assert(op->offset < tpos);
- for (i = op->offset, j = tpos; i < op->offset + buf_len; i++)
- tbuf[j++] = tbuf[i];
+ /* Copy from target area. We can't use memcpy() or the like
+ * since we need a specific semantics for overlapping copies:
+ * they must result in repeating patterns.
+ * Note that most copyies won't have overlapping source and
+ * target ranges (they are just a result of self-compressed
+ * data) but a small percentage will.
+ */
+ assert(op->offset < target - tbuf);
+ target = patterning_copy(target, tbuf + op->offset, buf_len);
           break;

         case svn_txdelta_new:
           /* Copy from window new area. */
           assert(op->offset + op->length <= window->new_data->len);
- memcpy(tbuf + tpos,
- window->new_data->data + op->offset,
- buf_len);
+ target = fast_memcpy(target,
+ window->new_data->data + op->offset,
+ buf_len);
           break;

         default:
           assert(!"Invalid delta instruction code");
         }
-
- tpos += op->length;
- if (tpos >= *tlen)
- return; /* The buffer is full. */
     }

- /* Check that we produced the right amount of data. */
- assert(tpos == window->tview_len);
- *tlen = tpos;
+ /* Check that we produced the right amount of data.
+ * Otherwise, the target buffer has been too small or the sum of
+ * instruction lengths does not match the target window length.
+ * Both cases are probably caused by data corruption.
+ */
+ assert(target - tbuf == window->tview_len);
+
+ /* Return how much of the target buffer we actually filled. */
+ *tlen = target - tbuf;
}

/* This is a private interlibrary compatibility wrapper. */
Received on 2010-05-26 23:43:43 CEST

This message: [ Message body ]
Next message: Julian Foad: "Re: [PATCH v4] speed up svn_txdelta_apply_instructions"
Previous message: Chloe Sowers: "SVC Core Dump"
In reply to: Julian Foad: "Re: [PATCH v4] speed up svn_txdelta_apply_instructions"
Next in thread: Julian Foad: "Re: [PATCH v4] speed up svn_txdelta_apply_instructions"
Reply: Julian Foad: "Re: [PATCH v4] speed up svn_txdelta_apply_instructions"

Contemporary messages sorted: [ by date ] [ by thread ] [ by subject ] [ by author ] [ by messages with attachments ]