Hi devs,
further reducing my backlog of patches sitting in my
working copy, this and the next patch optimize code
locally - shaving off cycles here and there. The net
effect is somewhere between 3 and 10 percent
for repository access (ls, export, etc.).
In this patch, I eliminated calls to memcpy for small
copies as they are particularly expensive in the MS CRT.
-- Stefan^2.
[[[
Eliminate memcpy from critical paths during reading
data from the repository.
* subversion/libsvn_delta/text_delta.c
(svn_txdelta_apply_instructions): replace memcpy
for small amounts of data; optimize overlapping
copies; optimize 'buffer full' detection
* subversion/libsvn_subr/svn_string.c
(svn_stringbuf_appendbytes): replace memcpy
with specialized code when adding single chars.
]]]
Index: subversion/libsvn_delta/text_delta.c
===================================================================
--- subversion/libsvn_delta/text_delta.c (revision 937673)
+++ subversion/libsvn_delta/text_delta.c (working copy)
@@ -32,6 +32,7 @@
#include "svn_io.h"
#include "svn_pools.h"
#include "svn_checksum.h"
+#include "svn_private_config.h"
#include "delta.h"
@@ -570,23 +597,38 @@
const char *sbuf, char *tbuf,
apr_size_t *tlen)
{
- const svn_txdelta_op_t *op;
- apr_size_t i, j, tpos = 0;
+ const svn_txdelta_op_t *op, *last_op = window->ops + window->num_ops;
+ apr_size_t to_fill = *tlen > window->tview_len ? window->tview_len : *tlen;
+ apr_size_t left = to_fill;
+ const char* end, *source;
+ char *target = tbuf;
- for (op = window->ops; op < window->ops + window->num_ops; op++)
+ for (op = window->ops; left > 0; op++)
{
- const apr_size_t buf_len = (op->length < *tlen - tpos
- ? op->length : *tlen - tpos);
+ const apr_size_t buf_len = op->length > left ? left : op->length;
+ left -= buf_len;
/* Check some invariants common to all instructions. */
- assert(tpos + op->length <= window->tview_len);
+ assert(target - tbuf + op->length <= window->tview_len);
switch (op->action_code)
{
case svn_txdelta_source:
/* Copy from source area. */
assert(op->offset + op->length <= window->sview_len);
- memcpy(tbuf + tpos, sbuf + op->offset, buf_len);
+ if (buf_len > 7)
+ {
+ memcpy(target, sbuf + op->offset, buf_len);
+ target += buf_len;
+ }
+ else
+ {
+ /* memcpy is not exactly fast for small block sizes.
+ Since they are common, let's run optimized code for them. */
+ end = sbuf + op->offset + buf_len;
+ for (source = sbuf + op->offset; source != end; source++)
+ *(target++) = *source;
+ }
break;
case svn_txdelta_target:
@@ -594,31 +636,46 @@
semantics aren't guaranteed for overlapping memory areas,
and target copies are allowed to overlap to generate
repeated data. */
- assert(op->offset < tpos);
- for (i = op->offset, j = tpos; i < op->offset + buf_len; i++)
- tbuf[j++] = tbuf[i];
+
+ assert(op->offset < target - *tbuf);
+ source = tbuf + op->offset;
+ end = tbuf + op->offset + buf_len;
+
+ if (end <= target)
+ for (; source + sizeof (unsigned) <= end;
+ source += sizeof (unsigned), target += sizeof (unsigned))
+ *(unsigned*)(target) = *(unsigned*)(source);
+
+ for (; source != end; source++)
+ *(target++) = *source;
break;
case svn_txdelta_new:
/* Copy from window new area. */
assert(op->offset + op->length <= window->new_data->len);
- memcpy(tbuf + tpos,
- window->new_data->data + op->offset,
- buf_len);
+ if (buf_len > 7)
+ {
+ memcpy(target,
+ window->new_data->data + op->offset,
+ buf_len);
+ target += buf_len;
+ }
+ else
+ {
+ /* memcpy is not exactly fast for small block sizes.
+ Since they are common, let's run optimized code for them. */
+ end = window->new_data->data + op->offset + buf_len;
+ for (source = window->new_data->data + op->offset; source != end; source++)
+ *(target++) = *source;
+ }
break;
default:
assert(!"Invalid delta instruction code");
}
-
- tpos += op->length;
- if (tpos >= *tlen)
- return; /* The buffer is full. */
}
- /* Check that we produced the right amount of data. */
- assert(tpos == window->tview_len);
- *tlen = tpos;
+ *tlen = to_fill;
}
/* This is a private interlibrary compatibility wrapper. */
Index: subversion/libsvn_subr/svn_string.c
===================================================================
--- subversion/libsvn_subr/svn_string.c (revision 937673)
+++ subversion/libsvn_subr/svn_string.c (working copy)
@@ -391,20 +391,34 @@
apr_size_t total_len;
void *start_address;
- total_len = str->len + count; /* total size needed */
+ /* This function is frequently called by svn_stream_readline
+ adding one char at a time. Eliminate the 'evil' memcpy in
+ that case unless the buffer must be resized. */
- /* +1 for null terminator. */
- svn_stringbuf_ensure(str, (total_len + 1));
+ apr_size_t old_len = str->len;
+ if ((count == 1) && (str->blocksize > old_len + 1))
+ {
+ str->data[old_len] = *bytes;
+ str->data[old_len+1] = '\0';
+ str->len++;
+ }
+ else
+ {
+ total_len = old_len + count; /* total size needed */
- /* get address 1 byte beyond end of original bytestring */
- start_address = (str->data + str->len);
+ /* +1 for null terminator. */
+ svn_stringbuf_ensure(str, (total_len + 1));
- memcpy(start_address, bytes, count);
- str->len = total_len;
+ /* get address 1 byte beyond end of original bytestring */
+ start_address = (str->data + old_len);
- str->data[str->len] = '\0'; /* We don't know if this is binary
- data or not, but convention is
- to null-terminate. */
+ memcpy(start_address, bytes, count);
+ str->len = total_len;
+
+ str->data[str->len] = '\0'; /* We don't know if this is binary
+ data or not, but convention is
+ to null-terminate. */
+ }
}
Received on 2010-04-25 16:52:32 CEST