[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

[PATCH] Revised text delta applicator patch

From: Greg Hudson <ghudson_at_mit.edu>
Date: 2000-09-19 18:54:57 CEST

Here is a revised patch based on Branko's feedback and a CVS update.

Index: include/svn_delta.h
===================================================================
RCS file: /cvs/subversion/subversion/include/svn_delta.h,v
retrieving revision 1.95
diff -u -r1.95 svn_delta.h
--- include/svn_delta.h 2000/09/18 17:30:00 1.95
+++ include/svn_delta.h 2000/09/19 16:01:07
@@ -91,24 +91,31 @@
 
 
 /* An `svn_txdelta_window_t' object describes how to reconstruct a
- contiguous section of the target string. It contains a series of
- instructions which assemble the new target string text by pulling
- together substrings from:
- - the source file,
- - the target file text so far, and
- - a string of new data (accessible to this window only). */
+ contiguous section of the target string (the "target view") using a
+ specified contiguous region of the source string (the "source
+ view"). It contains a series of instructions which assemble the
+ new target string text by pulling together substrings from:
+ - the source view,
+ - the previously constructed portion of the target view,
+ - a string of new data contained within the window structure
+
+ The source view must always slide forward from one window to the
+ next; that is, neither the beginning nor the end of the source view
+ may move to the left as we read from a window stream. This
+ property allows us to apply deltas to non-seekable source streams
+ without making a full copy of the source stream. */
 
 /* A single text delta instruction. */
 typedef struct svn_txdelta_op_t {
   enum {
- /* Append the LEN bytes at OFFSET in the source string to the
+ /* Append the LEN bytes at OFFSET in the source view to the
        target. It must be the case that 0 <= OFFSET < OFFSET + LEN <=
- size of source string. */
+ size of source view. */
     svn_txdelta_source,
 
- /* Append the LEN bytes at OFFSET in the target file, to the
- target. It must be the case that 0 <= OFFSET < current size of
- the target string.
+ /* Append the LEN bytes at OFFSET in the target view, to the
+ target. It must be the case that 0 <= OFFSET < current
+ position in the target view.
 
        However! OFFSET + LEN may be *beyond* the end of the existing
        target data. "Where the heck does the text come from, then?"
@@ -136,6 +143,14 @@
 /* How to produce the next stretch of the target string. */
 typedef struct svn_txdelta_window_t {
 
+ /* The offset and length of the source view for this window. */
+ apr_off_t sview_offset;
+ apr_size_t sview_len;
+
+ /* The length of the target view for this window, i.e. the number of
+ * bytes which will be reconstructed by the instruction stream. */
+ apr_size_t tview_len;
+
   /* The number of instructions in this window. */
   int num_ops;
 
@@ -188,6 +203,17 @@
                                  svn_read_fn_t *target_fn,
                                  void *target_baton,
                                  apr_pool_t *pool);
+
+
+/* Apply DELTA to a source stream to produce a target stream. Call
+ * SOURCE_FN to gather as much source data as needed and TARGET_FN to
+ * output the generated target data. Do any necessary allocation in a
+ * sub-pool of DELTA's pool. */
+extern svn_error_t *svn_txdelta_apply (svn_txdelta_stream_t *delta,
+ svn_read_fn_t *source_fn,
+ void *source_baton,
+ svn_write_fn_t *target_fn,
+ void *target_baton);
 
 
 /* Free the delta stream STREAM. */
Index: include/svn_io.h
===================================================================
RCS file: /cvs/subversion/subversion/include/svn_io.h,v
retrieving revision 1.3
diff -u -r1.3 svn_io.h
--- include/svn_io.h 2000/09/09 16:11:27 1.3
+++ include/svn_io.h 2000/09/19 16:01:08
@@ -73,10 +73,24 @@
    BUFFER is a buffer to hold the data, and *LEN indicates how many
    bytes to read. Upon return, the function should set *LEN to the
    number of bytes actually read, or zero at the end of the data
- stream. */
+ stream. *LEN should only change when there is a read error or the
+ input stream ends before the full count of bytes can be read; the
+ generic read function is obligated to perform a full read when
+ possible.
+
+ Any necessary temporary allocation should be done in a sub-pool of
+ POOL. (If the read function needs to perform allocations which
+ last beyond the lifetime of the function, it must use a different
+ pool, e.g. one referenced through BATON.) */
 typedef svn_error_t *svn_read_fn_t (void *baton,
                                     char *buffer,
                                     apr_size_t *len,
                                     apr_pool_t *pool);
+
+/* Similar to svn_read_fn_t, but for writing. */
+typedef svn_error_t *svn_write_fn_t (void *baton,
+ const char *data,
+ apr_size_t *len,
+ apr_pool_t *pool);
 
 #endif /* SVN_IO_H */
Index: libsvn_delta/text_delta.c
===================================================================
RCS file: /cvs/subversion/subversion/libsvn_delta/text_delta.c,v
retrieving revision 1.1
diff -u -r1.1 text_delta.c
--- libsvn_delta/text_delta.c 2000/09/15 06:08:49 1.1
+++ libsvn_delta/text_delta.c 2000/09/19 16:01:10
@@ -50,9 +50,14 @@
 
 #include <assert.h>
 #include "svn_delta.h"
+#include "svn_io.h"
 #include "delta.h"
 
+#ifndef MAX
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#endif
 
+
 
 /* Text delta stream descriotor. */
 
@@ -66,6 +71,7 @@
   /* Private data */
   apr_pool_t* pool; /* Pool to allocate stream data from. */
   svn_boolean_t more; /* TRUE if there are more data in the pool. */
+ apr_off_t pos; /* Position in source file. */
 };
 
 
@@ -80,6 +86,9 @@
   assert (pool != NULL);
 
   (*window) = apr_palloc (pool, sizeof (**window));
+ (*window)->sview_offset = 0;
+ (*window)->sview_len = 0;
+ (*window)->tview_len = 0;
   (*window)->num_ops = 0;
   (*window)->ops_size = 0;
   (*window)->ops = NULL;
@@ -165,6 +174,7 @@
   (*stream)->target_baton = target_baton;
   (*stream)->pool = subpool;
   (*stream)->more = TRUE;
+ (*stream)->pos = 0;
   return SVN_NO_ERROR;
 }
 
@@ -191,6 +201,7 @@
   else
     {
       svn_error_t *err = SVN_NO_ERROR;
+ apr_off_t source_offset = stream->pos;
       apr_size_t source_len = svn_txdelta__window_size;
       apr_size_t target_len = svn_txdelta__window_size;
       apr_pool_t *temp_pool = apr_make_sub_pool (stream->pool, NULL);
@@ -201,6 +212,7 @@
                          &source_len, NULL/*FIXME:*/);
       stream->target_fn (stream->target_baton, buffer + source_len,
                          &target_len, NULL/*FIXME:*/);
+ stream->pos += source_len;
 
       /* Forget everything if there's no target data. */
       *window = NULL;
@@ -214,9 +226,14 @@
       /* Create the delta window */
       err = svn_txdelta__init_window (window, stream);
       if (err == SVN_NO_ERROR)
- err = svn_txdelta__vdelta (*window, buffer,
- source_len, target_len,
- temp_pool);
+ {
+ (*window)->sview_offset = source_offset;
+ (*window)->sview_len = source_len;
+ (*window)->tview_len = target_len;
+ err = svn_txdelta__vdelta (*window, buffer,
+ source_len, target_len,
+ temp_pool);
+ }
 
       /* That's it. */
       apr_destroy_pool (temp_pool);
@@ -235,6 +252,156 @@
 {
   if (window)
     apr_destroy_pool (window->pool);
+}
+
+
+
+/* Reallocate a */
+static APR_INLINE void
+size_buffer (char **buf, apr_size_t *buf_size,
+ apr_off_t view_len, apr_pool_t *pool)
+{
+ if (view_len > *buf_size)
+ {
+ *buf_size *= 2;
+ if (*buf_size < view_len)
+ *buf_size = view_len;
+ *buf = apr_palloc (pool, *buf_size);
+ }
+}
+
+/* Apply WINDOW to a source view SBUF to produce a target view TBUF.
+ * SBUF is assumed to have window->sview_len bytes of data and TBUF is
+ * assumed to have room for window->tview_len bytes of output. This
+ * is purely a memory operation; nothing can go wrong as long as we
+ * have a valid window. */
+
+static void
+apply_window (svn_txdelta_window_t *window, const char *sbuf, char *tbuf)
+{
+ svn_txdelta_op_t *op;
+ apr_off_t i, tpos = 0;
+
+ for (op = window->ops; op < window->ops + window->num_ops; op++)
+ {
+ /* Check some invariants common to all instructions. */
+ assert (op->offset >= 0 && op->length >= 0);
+ assert (tpos + op->length <= window->tview_len);
+
+ switch (op->action_code)
+ {
+ case svn_txdelta_source:
+ /* Copy from source area. */
+ assert (op->offset + op->length <= window->sview_len);
+ memcpy (tbuf + tpos, sbuf + op->offset, op->length);
+ tpos += op->length;
+ break;
+
+ case svn_txdelta_target:
+ /* Copy from target area. Don't use memcpy() since its
+ semantics aren't guaranteed for overlapping memory areas,
+ and target copies are allowed to overlap to generate
+ repeated data. */
+ assert (op->offset < tpos);
+ for (i = op->offset; i < op->offset + op->length; i++)
+ tbuf[tpos++] = tbuf[i];
+ break;
+
+ case svn_txdelta_new:
+ /* Copy from window new area. */
+ assert (op->offset + op->length <= window->new->len);
+ memcpy (tbuf + tpos, window->new->data + op->offset, op->length);
+ tpos += op->length;
+ break;
+
+ default:
+ assert ("Invalid delta instruction code" == NULL);
+ }
+ }
+
+ /* Check that we produced the right amount of data. */
+ assert (tpos == window->tview_len);
+}
+
+
+/* Apply a text delta to an input stream. */
+
+svn_error_t *
+svn_txdelta_apply (svn_txdelta_stream_t *delta,
+ svn_read_fn_t *source_fn,
+ void *source_baton,
+ svn_write_fn_t *target_fn,
+ void *target_baton)
+{
+ apr_pool_t *temp_pool = apr_make_sub_pool (delta->pool, NULL);
+ char *sbuf = NULL, *tbuf = NULL;
+ apr_size_t sbuf_size = 0, tbuf_size = 0, len;
+ apr_off_t sbuf_offset = 0, sbuf_len = 0;
+ svn_error_t *err;
+ svn_txdelta_window_t *window = NULL, *oldwin;
+
+ while (1)
+ {
+ /* Get a new window. */
+ oldwin = window;
+ err = svn_txdelta_next_window (&window, delta);
+ if (err != SVN_NO_ERROR || window == NULL)
+ break;
+
+ /* Make sure the source view didn't slide backwards. */
+ assert (window->sview_offset >= sbuf_offset
+ && (window->sview_offset + window->sview_len
+ >= sbuf_offset + sbuf_len));
+
+ /* Make sure there's enough room in the target buffer. */
+ size_buffer (&tbuf, &tbuf_size, window->tview_len, temp_pool);
+
+ /* Prepare the source buffer for reading from the input stream. */
+ if (window->sview_offset != sbuf_offset || window->sview_len > sbuf_size)
+ {
+ char *old_sbuf = sbuf;
+
+ /* Make sure there's enough room. */
+ size_buffer (&sbuf, &sbuf_size, window->sview_len, temp_pool);
+
+ /* If the existing view overlaps with the new view, copy the
+ * overlap to the beginning of the new buffer. */
+ if (sbuf_offset + sbuf_len > window->sview_offset)
+ {
+ apr_size_t start = window->sview_offset - sbuf_offset;
+ memmove (sbuf, old_sbuf + start, sbuf_len - start);
+ sbuf_len -= start;
+ }
+ else
+ sbuf_len = 0;
+ sbuf_offset = window->sview_offset;
+ }
+
+ /* Read the remainder of the source view into the buffer. */
+ if (sbuf_len < window->sview_len)
+ {
+ len = window->sview_len - sbuf_len;
+ err = source_fn (source_baton, sbuf + sbuf_len, &len, temp_pool);
+ if (err == SVN_NO_ERROR && len != window->sview_len - sbuf_len)
+ err = svn_create_error (SVN_ERR_INCOMPLETE_DATA, 0,
+ "Delta source ended unexpectedly",
+ NULL, delta->pool);
+ sbuf_len = window->sview_len;
+ }
+
+ /* Apply the window instructions to the source view to generate
+ the target view. */
+ apply_window (window, sbuf, tbuf);
+
+ /* Write out the output. */
+ len = window->tview_len;
+ err = target_fn (target_baton, tbuf, &len, temp_pool);
+ if (err != SVN_NO_ERROR)
+ break;
+ }
+
+ apr_destroy_pool (temp_pool);
+ return err;
 }
 
 
Index: libsvn_delta/tests/.cvsignore
===================================================================
RCS file: /cvs/subversion/subversion/libsvn_delta/tests/.cvsignore,v
retrieving revision 1.4
diff -u -r1.4 .cvsignore
--- libsvn_delta/tests/.cvsignore 2000/09/15 06:15:34 1.4
+++ libsvn_delta/tests/.cvsignore 2000/09/19 16:01:11
@@ -5,3 +5,4 @@
 .libs
 deltaparse-test
 vdelta-test
+random-test
Index: libsvn_delta/tests/Makefile.am
===================================================================
RCS file: /cvs/subversion/subversion/libsvn_delta/tests/Makefile.am,v
retrieving revision 1.4
diff -u -r1.4 Makefile.am
--- libsvn_delta/tests/Makefile.am 2000/09/15 06:08:50 1.4
+++ libsvn_delta/tests/Makefile.am 2000/09/19 16:01:11
@@ -1,8 +1,9 @@
 ## Makefile.in is generated from this by automake.
 
-noinst_PROGRAMS = deltaparse-test vdelta-test
+noinst_PROGRAMS = deltaparse-test vdelta-test random-test
 deltaparse_test_SOURCES = deltaparse-test.c
 vdelta_test_SOURCES = vdelta-test.c
+random_test_SOURCES = random-test.c
 
 ## Flags needed when compiling:
 CFLAGS = -pthread
@@ -18,7 +19,15 @@
 vdelta_test_LDADD = ../libsvn_delta.la \
                     ../../libsvn_subr/libsvn_subr.la \
                     ../../libsvn_string/libsvn_string.la \
- ../../../apr/libapr.a
+ ../../../apr/libapr.a \
+ ../../../expat-lite/libexpat.la
+
+random_test_LDADD = ../libsvn_delta.la \
+ ../../libsvn_subr/libsvn_subr.la \
+ ../../libsvn_string/libsvn_string.la \
+ ../../../apr/libapr.a \
+ ../../../expat-lite/libexpat.la
+
 
 ## Make libtool be quiet
 LIBTOOL = @LIBTOOL@ --silent
Index: libsvn_delta/tests/random-test.c
===================================================================
--- /dev/null Tue May 5 16:32:27 1998
+++ libsvn_delta/tests/random-test.c Mon Sep 18 10:12:30 2000
@@ -0,0 +1,208 @@
+/*
+ * random-test.c: Test delta generation and application using random data.
+ *
+ * ================================================================
+ * Copyright (c) 2000 CollabNet. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ * any, must include the following acknowlegement: "This product includes
+ * software developed by CollabNet (http://www.Collab.Net)."
+ * Alternately, this acknowlegement may appear in the software itself, if
+ * and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The hosted project names must not be used to endorse or promote
+ * products derived from this software without prior written
+ * permission. For written permission, please contact info@collab.net.
+ *
+ * 5. Products derived from this software may not use the "Tigris" name
+ * nor may "Tigris" appear in their names without prior written
+ * permission of CollabNet.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL COLLABNET OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of CollabNet.
+ */
+
+
+#include <stdio.h>
+#include <assert.h>
+#include "apr_general.h"
+#include "apr_getopt.h"
+#include "svn_delta.h"
+
+#define DEFAULT_MAXLEN (100 * 1024)
+
+/* Generate a temporary file containing random data. */
+static FILE *
+generate_random_file (int maxlen)
+{
+ int len, i;
+ FILE *fp;
+
+ fp = tmpfile ();
+ assert (fp != NULL);
+ len = rand () % maxlen;
+ for (i = 0; i < len; i++)
+ putc (rand () % 256, fp);
+ rewind (fp);
+ return fp;
+}
+
+static FILE *
+copy_tempfile (FILE *fp)
+{
+ FILE *newfp;
+ int c;
+
+ newfp = tmpfile ();
+ assert (newfp != NULL);
+ while ((c = getc (fp)) != EOF)
+ putc (c, newfp);
+ rewind (newfp);
+ return newfp;
+}
+
+/* NOTE: Does no error-checking. */
+static svn_error_t *
+read_from_file (void *baton, char *buffer, apr_size_t *len, apr_pool_t *pool)
+{
+ FILE *fp = baton;
+
+ if (!fp || feof (fp) || ferror (fp))
+ *len = 0;
+ else
+ *len = fread (buffer, 1, *len, fp);
+ return SVN_NO_ERROR;
+}
+
+/* NOTE: Does no error-checking. */
+static svn_error_t *
+write_to_file (void *baton, const char *data, apr_size_t *len,
+ apr_pool_t *pool)
+{
+ FILE *fp = baton;
+
+ *len = fwrite (data, 1, *len, fp);
+ return SVN_NO_ERROR;
+}
+
+int
+main (int argc, char **argv)
+{
+ FILE *source, *source_copy, *target, *target_regen;
+ apr_getopt_t *opt;
+ apr_pool_t *pool;
+ apr_status_t status;
+ char optch;
+ const char *optarg;
+ unsigned int seed;
+ int seed_set = 0, maxlen = DEFAULT_MAXLEN, c1, c2;
+ svn_txdelta_stream_t *stream;
+ svn_error_t *err;
+
+ apr_initialize();
+
+ /* Read options. */
+ pool = apr_make_sub_pool (NULL, NULL);
+ apr_initopt (&opt, NULL, argc, argv);
+ while ((status = apr_getopt (opt, "s:l:", &optch, &optarg)) == APR_SUCCESS)
+ {
+ switch (optch)
+ {
+ case 's':
+ seed = atoi (optarg);
+ seed_set = 1;
+ break;
+ case 'l':
+ maxlen = atoi (optarg);
+ break;
+ }
+ }
+ apr_destroy_pool (pool);
+ if (status != APR_EOF)
+ {
+ fprintf (stderr, "Usage: %s [-s seed]\n", argv[0]);
+ return 1;
+ }
+
+ if (!seed_set)
+ {
+ seed = (unsigned int) apr_now () ^ (unsigned int) getpid ();
+ printf ("Using seed %d\n", seed);
+ srand (seed);
+ }
+
+ /* Generate source and target files for the delta and its application. */
+ source = generate_random_file (maxlen);
+ target = generate_random_file (maxlen);
+ source_copy = copy_tempfile (source);
+ rewind (source);
+ target_regen = tmpfile ();
+
+ /* Create and simultaneously apply a delta between the source and target. */
+ pool = apr_make_sub_pool (NULL, NULL);
+ err = svn_txdelta (&stream,
+ read_from_file, source,
+ read_from_file, target,
+ pool);
+ if (err == SVN_NO_ERROR)
+ err = svn_txdelta_apply (stream,
+ read_from_file, source_copy,
+ write_to_file, target_regen);
+ if (err != SVN_NO_ERROR)
+ {
+ svn_handle_error (err, stderr);
+ exit (1);
+ }
+ apr_destroy_pool (pool);
+
+ /* Compare the two files. */
+ rewind (target);
+ rewind (target_regen);
+ while (1)
+ {
+ c1 = getc (target);
+ c2 = getc (target_regen);
+ if (c1 == EOF && c2 == EOF)
+ break;
+ if (c1 != c2)
+ {
+ printf ("Regenerated files differ; test failed.\n");
+ exit (1);
+ }
+ }
+ printf ("Test succeeded.\n");
+ exit (0);
+}
+
+
+
+/*
+ * local variables:
+ * eval: (load-file "../../svn-dev.el")
+ * end:
+ */
Received on Sat Oct 21 14:36:08 2006

This is an archived mail posted to the Subversion Dev mailing list.