[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: [PATCH] Compressed streams

From: Eric Dorland <eric.dorland_at_mail.mcgill.ca>
Date: 2003-03-27 22:48:00 CET

* Philip Martin (philip@codematters.co.uk) wrote:
> Eric Dorland <eric.dorland@mail.mcgill.ca> writes:
>
> > I think this code could be useful above and beyond my pet project of
> > compressing text-base. BTW, is there any chance of that sort of code
> > getting in before 1.0?
>
> I'd support it.

Great! :)
 
> > Index: subversion/include/svn_io.h
> > ===================================================================
> > +++ subversion/include/svn_io.h (working copy)
> > @@ -389,6 +389,19 @@
> > svn_stream_t *svn_stream_from_stringbuf (svn_stringbuf_t *str,
> > apr_pool_t *pool);
> >
> > +/** Wrap zlib compression functions around the stream @a stream.
> > + *
> > + * Return a stream that decompresses all data read and compresses all
> > + * data written. The stream @a stream is used to read and write all
> > + * compressed data. All compression data structures are allocated on
> > + * @a pool. If compression support is not compiled in then @c
> > + * svn_stream_compressed() returns @a stream unmodified. Make sure you
> > + * call @c svn_stream_close() on the stream returned by this function,
> > + * so that all data is flushed and cleaned up.
> > + */
> > +svn_stream_t *svn_stream_compressed (svn_stream_t *stream,
> > + apr_pool_t *pool);
> > +
> > /** Read from a generic stream. */
> > svn_error_t *svn_stream_read (svn_stream_t *stream, char *buffer,
> > apr_size_t *len);
> > Index: subversion/libsvn_subr/stream.c
> > ===================================================================
> > +++ subversion/libsvn_subr/stream.c (working copy)
> > @@ -16,6 +16,8 @@
> > * ====================================================================
> > */
> >
> > +#include "svn_private_config.h"
> > +
> > #include <assert.h>
> > #include <stdio.h>
> >
> > @@ -25,6 +27,11 @@
> > #include <apr_file_io.h>
> > #include <apr_errno.h>
> >
> > +#ifdef ZLIB
> > +#include <zlib.h>
> > +#endif
> > +
> > +#include "svn_pools.h"
> > #include "svn_io.h"
> > #include "svn_error.h"
> > #include "svn_string.h"
> > @@ -250,7 +257,279 @@
> > return stream;
> > }
> >
> > +
> > +/* Compressed stream support */
> >
> > +#ifdef ZLIB
>
> Is ZLIB local to the Subversion code? If it escapes to the application
> code we will need a more "obscure" name.

It's defined only in svn_private_config.h, so it's private to
subversion.
 
> > +
> > +#define ZBUFFER_SIZE 4096
>
> A number like this needs a comment, what units is it? Why 4096? If
> it's just a guess then say so!
>
> > +
> > +struct zbaton {
> > + z_stream *in;
> > + z_stream *out;
> > + svn_read_fn_t read;
> > + svn_write_fn_t write;
> > + svn_close_fn_t close;
> > + void *read_buffer;
> > + int read_flush;
> > + apr_pool_t *pool;
> > + void *subbaton;
> > +};
> > +
>
> The structure above, and all the static functions below should have a
> comment explaining what they are for, and what the parameters represent.

Good idea.
 
> > +static voidpf
> > +zalloc(voidpf opaque, uInt items, uInt size)
> > +{
> > + apr_pool_t *pool = opaque;
> > +
> > + return apr_palloc(pool, items * size);
> > +}
> > +
> > +static void
> > +zfree(voidpf opaque, voidpf address)
> > +{
> > + /* Empty, since we allocate on the pool */
> > +}
> > +
> > +static svn_error_t *
> > +zerr_to_svn_error (int zerr, const char *function, z_stream *stream)
> > +{
> > + apr_status_t status;
> > + char *message;
> > +
> > + if (zerr == Z_OK)
> > + return SVN_NO_ERROR;
> > +
> > + switch (zerr)
> > + {
> > + case Z_STREAM_ERROR:
> > + status = SVN_ERR_STREAM_MALFORMED_DATA;
> > + message = "stream error";
> > + break;
> > +
> > + case Z_MEM_ERROR:
> > + status = APR_ENOMEM;
> > + message = "out of memory";
> > + break;
> > +
> > + case Z_BUF_ERROR:
> > + status = APR_ENOMEM;
> > + message = "buffer error";
> > + break;
> > +
> > + case Z_VERSION_ERROR:
> > + status = SVN_ERR_STREAM_UNRECOGNIZED_DATA;
> > + message = "version error";
> > + break;
> > +
> > + case Z_DATA_ERROR:
> > + status = SVN_ERR_STREAM_MALFORMED_DATA;
> > + message = "corrupted data";
> > + break;
> > +
> > + default:
> > + status = SVN_ERR_STREAM_UNRECOGNIZED_DATA;
> > + message = "error";
> > + break;
> > + }
> > +
> > + if (stream->msg != NULL)
> > + return svn_error_createf (status, NULL, "zlib (%s): %s: %s", function,
> > + message, stream->msg);
> > + else
> > + return svn_error_createf (status, NULL, "zlib (%s): %s", function,
> > + message);
> > +}
> > +
> > +static svn_error_t *
> > +read_helper_gz (svn_read_fn_t read, void *baton, char *buffer,
> > + apr_size_t *len, int *zflush)
> > +{
> > + apr_size_t orig_len = *len;
> > +
> > + SVN_ERR ((*read) (baton, buffer, len));
> > +
> > + /* I wanted to use Z_FINISH here, but we need to know our buffer is
> > + big enough */
> > + *zflush = (*len) < orig_len ? Z_SYNC_FLUSH : Z_SYNC_FLUSH;
> > +
> > + return SVN_NO_ERROR;
> > +}
> > +
> > +static svn_error_t *
> > +read_handler_gz (void *baton, char *buffer, apr_size_t *len)
> > +{
> > + struct zbaton *btn = baton;
> > + int zerr;
> > +
> > + if (btn->in == NULL)
> > + {
> > + btn->in = apr_palloc (btn->pool, sizeof (z_stream));
> > + btn->in->zalloc = (alloc_func) zalloc;
> > + btn->in->zfree = (free_func) zfree;
>
> Are these casts necessary?

No, they're not.
 
> > + btn->in->opaque = (voidpf) btn->pool;
> > + btn->read_buffer = apr_palloc(btn->pool, ZBUFFER_SIZE);
> > + btn->in->next_in = btn->read_buffer;
> > + btn->in->avail_in = ZBUFFER_SIZE;
> > +
> > + SVN_ERR (read_helper_gz (btn->read, btn->subbaton, btn->read_buffer,
> > + &btn->in->avail_in, &btn->read_flush));
> > +
> > + zerr = inflateInit (btn->in);
> > + SVN_ERR (zerr_to_svn_error (zerr, "inflateInit", btn->in));
> > + }
> > +
> > + btn->in->next_out = buffer;
> > + btn->in->avail_out = *len;
> > +
> > + while (btn->in->avail_out > 0)
> > + {
> > + if (btn->in->avail_in <= 0)
> > + {
> > + btn->in->avail_in = ZBUFFER_SIZE;
> > + btn->in->next_in = btn->read_buffer;
> > + SVN_ERR (read_helper_gz (btn->read, btn->subbaton, btn->read_buffer,
> > + &btn->in->avail_in, &btn->read_flush));
> > + }
> > +
> > + zerr = inflate (btn->in, btn->read_flush);
> > + if (zerr == Z_STREAM_END)
> > + break;
> > + else if (zerr != Z_OK)
> > + return zerr_to_svn_error(zerr, "inflate", btn->in);
> > + }
> > +
> > + *len = *len - btn->in->avail_out;
>
> -=?

Duh, of course :)
 
> > + return SVN_NO_ERROR;
> > +}
> > +
> > +
> > +
> > +static svn_error_t *
> > +write_handler_gz (void *baton, const char *buffer, apr_size_t *len)
> > +{
> > + struct zbaton *btn = baton;
> > + apr_pool_t *subpool;
> > + void *write_buf;
> > + apr_size_t buf_size, write_len;
> > + int zerr;
> > +
> > + if (btn->out == NULL)
> > + {
> > + btn->out = apr_palloc (btn->pool, sizeof (z_stream));
> > + btn->out->zalloc = (alloc_func) zalloc;
> > + btn->out->zfree = (free_func) zfree;
> > + btn->out->opaque = (voidpf) btn->pool;
>
> Those casts again.
>
> > +
> > + zerr = deflateInit (btn->out, Z_DEFAULT_COMPRESSION);
> > + SVN_ERR (zerr_to_svn_error (zerr, "deflateInit", btn->out));
> > + }
> > +
> > + buf_size = *len + (*len / 1000) + 13;
>
> This magic algorithm needs some explanation.

Yes, adding a comment.
 
> > + subpool = svn_pool_create (btn->pool);
> > + write_buf = apr_palloc (subpool, buf_size);
> > +
> > + btn->out->next_in = (char *) buffer;
> > + btn->out->avail_in = *len;
> > +
> > + while (btn->out->avail_in > 0)
> > + {
> > + btn->out->next_out = write_buf;
> > + btn->out->avail_out = buf_size;
> > +
> > + zerr = deflate (btn->out, Z_NO_FLUSH);
> > + SVN_ERR (zerr_to_svn_error (zerr, "deflate", btn->out));
> > + write_len = buf_size - btn->out->avail_out;
> > + if (write_len > 0)
> > + SVN_ERR (btn->write (btn->subbaton, write_buf, &write_len));
> > + }
> > +
> > + svn_pool_destroy (subpool);
> > +
> > + return SVN_NO_ERROR;
> > +}
> > +
> > +static svn_error_t *
> > +close_handler_gz (void *baton)
> > +{
> > + struct zbaton *btn = baton;
> > + int zerr;
> > +
> > + if (btn->in != NULL)
> > + {
> > + zerr = inflateEnd(btn->in);
> > + SVN_ERR (zerr_to_svn_error (zerr, "inflateEnd", btn->in));
> > + }
> > +
> > + if (btn->out != NULL)
> > + {
> > + void *buf;
> > + apr_size_t write_len;
> > +
> > + buf = apr_palloc (btn->pool, ZBUFFER_SIZE);
> > +
> > + while (TRUE)
> > + {
> > + btn->out->next_out = buf;
> > + btn->out->avail_out = ZBUFFER_SIZE;
> > +
> > + zerr = deflate (btn->out, Z_FINISH);
> > + if (zerr != Z_STREAM_END && zerr != Z_OK)
> > + return zerr_to_svn_error (zerr, "deflate", btn->out);
> > + write_len = ZBUFFER_SIZE - btn->out->avail_out;
> > + if (write_len > 0)
> > + SVN_ERR (btn->write (btn->subbaton, buf, &write_len));
> > + if (zerr == Z_STREAM_END)
> > + break;
> > + }
> > +
> > + zerr = deflateEnd(btn->out);
> > + SVN_ERR (zerr_to_svn_error (zerr, "deflateEnd", btn->out));
> > + }
> > +
> > + if (btn->close != NULL)
> > + return btn->close (btn->subbaton);
> > + else
> > + return SVN_NO_ERROR;
> > +}
> > +
> > +#endif /* ZLIB */
> > +
> > +svn_stream_t *
> > +svn_stream_compressed (svn_stream_t *stream, apr_pool_t *pool)
> > +{
> > +#ifdef ZLIB
> > +
> > + struct svn_stream_t *zstream, *old_stream = (struct svn_stream_t *) stream;
>
> Is this cast necessary? Why use old_stream at all?

No the cast isn't, but we need to use old stream. stream is of type
svn_stream_t * which is opaque, so we need to make it a struct
svn_stream_t *.

> > + struct zbaton *baton;
> > +
> > + assert(stream != NULL);
> > +
> > + baton = apr_palloc (pool, sizeof (*baton));
> > + baton->in = baton->out = NULL;
> > + baton->read = old_stream->read_fn;
> > + baton->write = old_stream->write_fn;
> > + baton->close = old_stream->close_fn;
> > + baton->subbaton = old_stream->baton;
> > + baton->pool = pool;
> > + baton->read_buffer = NULL;
> > + baton->read_flush = Z_SYNC_FLUSH;
> > +
> > + zstream = svn_stream_create(baton, pool);
> > + svn_stream_set_read(zstream, read_handler_gz);
> > + svn_stream_set_write(zstream, write_handler_gz);
> > + svn_stream_set_close(zstream, close_handler_gz);
> > +
> > + return zstream;
> > +
> > +#else
> > +
> > + return stream; /* Just return the stream if
> > + compression support is not
> > + enabled */
>
> The comment is redundant. It repeats the documentation in the header
> and the code can be understood without it.

Ok.
 
> > +
> > +#endif /* ZLIB */
> > +}
> > +
> > Index: subversion/tests/libsvn_subr/stream-test.c
> > ===================================================================
> > +++ subversion/tests/libsvn_subr/stream-test.c (working copy)
> > @@ -20,6 +20,7 @@
> > #include <svn_pools.h>
> > #include <svn_io.h>
> > #include <apr_general.h>
> > +#include "svn_private_config.h"
> > #include "svn_test.h"
> >
> >
> > @@ -118,6 +119,89 @@
> > return SVN_NO_ERROR;
> > }
> >
> > +static svn_error_t *
> > +test_stream_compressed (const char **msg,
> > + svn_boolean_t msg_only,
> > + apr_pool_t *pool)
> > +{
> > + int i;
> > + apr_pool_t *subpool = svn_pool_create (pool);
> > +
> > +#define NUM_TEST_STRINGS 4
> > +#define TEST_BUF_SIZE 10
> > +
> > + static const char * const strings[NUM_TEST_STRINGS] = {
> > + /* 0 */
> > + "",
> > + /* 1 */
> > + "This is a string.",
> > + /* 2 */
> > + "This is, by comparison to the previous string, a much longer string.",
> > + /* 3 */
> > + "And if you thought that last string was long, you just wait until "
> > + "I'm finished here. I mean, how can a string really claim to be long "
> > + "when it fits on a single line of 80-columns? Give me a break. "
> > + "Now, I'm not saying that I'm the longest string out there--far from "
> > + "it--but I feel that it is safe to assume that I'm far longer than my "
> > + "peers. And that demands some amount of respect, wouldn't you say?"
>
> How does the size of these strings relate to the 4096 above? Do you
> need a test that has an input (or an output perhaps?) greater than
> 4096?

Good point. But the compressed representation of the string would need
to be more than 4096, not just the size of the string, since the it is
the read function that uses this buffer size. Any ideas how i could
generate that data?
 
> > + };
> > +
> > +
> > + *msg = "test compressed streams";
> > +
> > + if (msg_only)
> > + return SVN_NO_ERROR;
> > +
> > +#ifdef ZLIB
>
> Why is this protected? I thought that the presence of the compressed
> stream was supposed to be transparent to the application. Does the
> test fail if zlib is not enabled?

No, I just thought there was much point in testing it in if it's not
compiled in. But you're right, the transparency of it should be
exercised too.

> > +
> > + for (i = 0; i < NUM_TEST_STRINGS; i++)
> > + {
> > + svn_stream_t *stream;
> > + svn_stringbuf_t *origbuf, *inbuf, *outbuf;
> > + char buf[TEST_BUF_SIZE];
> > + apr_size_t len;
> > +
> > + origbuf = svn_stringbuf_create (strings[i], subpool);
> > + inbuf = svn_stringbuf_create ("", subpool);
> > + outbuf = svn_stringbuf_create ("", subpool);
> > +
> > + stream = svn_stream_compressed (svn_stream_from_stringbuf (outbuf,
> > + subpool),
> > + subpool);
> > + len = strlen (strings[i]);
> > + SVN_ERR (svn_stream_write (stream, strings[i], &len));
> > + SVN_ERR (svn_stream_close (stream));
> > +
> > + stream = svn_stream_compressed (svn_stream_from_stringbuf (outbuf,
> > + subpool),
> > + subpool);
> > + len = TEST_BUF_SIZE;
> > + while (len >= TEST_BUF_SIZE)
> > + {
>
> The indentation has gone astray here.

Oops.
 
> > + len = TEST_BUF_SIZE;
> > + SVN_ERR (svn_stream_read (stream, buf, &len));
> > + if (len > 0)
> > + svn_stringbuf_appendbytes (inbuf, buf, len);
> > + }
> > +
> > + if (! svn_stringbuf_compare (inbuf, origbuf))
> > + return svn_error_create (SVN_ERR_TEST_FAILED, NULL,
> > + "Got unexpected result.");
> > +
> > + SVN_ERR (svn_stream_close (stream));
> > +
> > + svn_pool_clear (subpool);
> > + }
> > +
> > +#endif /* ZLIB */
> > +
> > +#undef NUM_TEST_STRINGS
> > +#undef TEST_BUF_SIZE
> > +
> > + svn_pool_destroy (subpool);
> > + return SVN_NO_ERROR;
> > +}
> > +
> > @@ -125,5 +209,6 @@
> > {
> > SVN_TEST_NULL,
> > SVN_TEST_PASS (test_stream_from_string),
> > - SVN_TEST_NULL
> > + SVN_TEST_PASS (test_stream_compressed),
> > + SVN_TEST_NULL
> > };
> > Index: configure.in
> > ===================================================================
> > +++ configure.in (working copy)
> > @@ -309,6 +309,21 @@
> >
> > ])
> >
> > +AC_ARG_WITH(zlib,
> > +AC_HELP_STRING([--with-zlib], [enable zlib support]),
> > +[
> > +
> > + if test "$withval" = "yes" ; then
> > + AC_CHECK_HEADER(zlib.h, [
> > + AC_CHECK_LIB(z, inflate, [
> > + AC_DEFINE([ZLIB], [1], [Is zlib support enabled?])
> > + LIBS="$LIBS -lz"
> > + ])
>
> I'm not much of an autoconf guy, does this support --with-zlib=PREFIX?
> Do we need to?

This doesn't support --with-zlib=PREFIX. I basically stole this code
from neon, and this is how it does it. I doubt prefix is that
important, but what does everyone think?
 
> > + ])
> > + fi
> > +
> > +])
> > +
> > MOD_ACTIVATION="-a"
> > AC_ARG_ENABLE(mod-activation,
> > AC_HELP_STRING([--disable-mod-activation],

Attached is a new patch with fixes.

-- 
Eric Dorland <eric.dorland@mail.mcgill.ca>
ICQ: #61138586, Jabber: hooty@jabber.com
1024D/16D970C6 097C 4861 9934 27A0 8E1C  2B0A 61E9 8ECF 16D9 70C6
-----BEGIN GEEK CODE BLOCK-----
Version: 3.12
GCS d- s++: a-- C+++ UL+++ P++ L++ E++ W++ N+ o K- w+ 
O? M++ V-- PS+ PE Y+ PGP++ t++ 5++ X+ R tv++ b+++ DI+ D+ 
G e h! r- y+ 
------END GEEK CODE BLOCK------

  • application/pgp-signature attachment: stored
Received on Thu Mar 27 22:48:55 2003

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.