[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: [PATCH] Compressed streams

From: Philip Martin <philip_at_codematters.co.uk>
Date: 2003-03-27 17:59:32 CET

Eric Dorland <eric.dorland@mail.mcgill.ca> writes:

> I think this code could be useful above and beyond my pet project of
> compressing text-base. BTW, is there any chance of that sort of code
> getting in before 1.0?

I'd support it.

> Index: subversion/include/svn_io.h
> ===================================================================
> --- subversion/include/svn_io.h (revision 5482)
> +++ subversion/include/svn_io.h (working copy)
> @@ -389,6 +389,19 @@
> svn_stream_t *svn_stream_from_stringbuf (svn_stringbuf_t *str,
> apr_pool_t *pool);
>
> +/** Wrap zlib compression functions around the stream @a stream.
> + *
> + * Return a stream that decompresses all data read and compresses all
> + * data written. The stream @a stream is used to read and write all
> + * compressed data. All compression data structures are allocated on
> + * @a pool. If compression support is not compiled in then @c
> + * svn_stream_compressed() returns @a stream unmodified. Make sure you
> + * call @c svn_stream_close() on the stream returned by this function,
> + * so that all data is flushed and cleaned up.
> + */
> +svn_stream_t *svn_stream_compressed (svn_stream_t *stream,
> + apr_pool_t *pool);
> +
> /** Read from a generic stream. */
> svn_error_t *svn_stream_read (svn_stream_t *stream, char *buffer,
> apr_size_t *len);
> Index: subversion/libsvn_subr/stream.c
> ===================================================================
> --- subversion/libsvn_subr/stream.c (revision 5482)
> +++ subversion/libsvn_subr/stream.c (working copy)
> @@ -16,6 +16,8 @@
> * ====================================================================
> */
>
> +#include "svn_private_config.h"
> +
> #include <assert.h>
> #include <stdio.h>
>
> @@ -25,6 +27,11 @@
> #include <apr_file_io.h>
> #include <apr_errno.h>
>
> +#ifdef ZLIB
> +#include <zlib.h>
> +#endif
> +
> +#include "svn_pools.h"
> #include "svn_io.h"
> #include "svn_error.h"
> #include "svn_string.h"
> @@ -250,7 +257,279 @@
> return stream;
> }
>
> +
> +/* Compressed stream support */
>
> +#ifdef ZLIB

Is ZLIB local to the Subversion code? If it escapes to the application
code we will need a more "obscure" name.

> +
> +#define ZBUFFER_SIZE 4096

A number like this needs a comment, what units is it? Why 4096? If
it's just a guess then say so!

> +
> +struct zbaton {
> + z_stream *in;
> + z_stream *out;
> + svn_read_fn_t read;
> + svn_write_fn_t write;
> + svn_close_fn_t close;
> + void *read_buffer;
> + int read_flush;
> + apr_pool_t *pool;
> + void *subbaton;
> +};
> +

The structure above, and all the static functions below should have a
comment explaining what they are for, and what the parameters represent.

> +static voidpf
> +zalloc(voidpf opaque, uInt items, uInt size)
> +{
> + apr_pool_t *pool = opaque;
> +
> + return apr_palloc(pool, items * size);
> +}
> +
> +static void
> +zfree(voidpf opaque, voidpf address)
> +{
> + /* Empty, since we allocate on the pool */
> +}
> +
> +static svn_error_t *
> +zerr_to_svn_error (int zerr, const char *function, z_stream *stream)
> +{
> + apr_status_t status;
> + char *message;
> +
> + if (zerr == Z_OK)
> + return SVN_NO_ERROR;
> +
> + switch (zerr)
> + {
> + case Z_STREAM_ERROR:
> + status = SVN_ERR_STREAM_MALFORMED_DATA;
> + message = "stream error";
> + break;
> +
> + case Z_MEM_ERROR:
> + status = APR_ENOMEM;
> + message = "out of memory";
> + break;
> +
> + case Z_BUF_ERROR:
> + status = APR_ENOMEM;
> + message = "buffer error";
> + break;
> +
> + case Z_VERSION_ERROR:
> + status = SVN_ERR_STREAM_UNRECOGNIZED_DATA;
> + message = "version error";
> + break;
> +
> + case Z_DATA_ERROR:
> + status = SVN_ERR_STREAM_MALFORMED_DATA;
> + message = "corrupted data";
> + break;
> +
> + default:
> + status = SVN_ERR_STREAM_UNRECOGNIZED_DATA;
> + message = "error";
> + break;
> + }
> +
> + if (stream->msg != NULL)
> + return svn_error_createf (status, NULL, "zlib (%s): %s: %s", function,
> + message, stream->msg);
> + else
> + return svn_error_createf (status, NULL, "zlib (%s): %s", function,
> + message);
> +}
> +
> +static svn_error_t *
> +read_helper_gz (svn_read_fn_t read, void *baton, char *buffer,
> + apr_size_t *len, int *zflush)
> +{
> + apr_size_t orig_len = *len;
> +
> + SVN_ERR ((*read) (baton, buffer, len));
> +
> + /* I wanted to use Z_FINISH here, but we need to know our buffer is
> + big enough */
> + *zflush = (*len) < orig_len ? Z_SYNC_FLUSH : Z_SYNC_FLUSH;
> +
> + return SVN_NO_ERROR;
> +}
> +
> +static svn_error_t *
> +read_handler_gz (void *baton, char *buffer, apr_size_t *len)
> +{
> + struct zbaton *btn = baton;
> + int zerr;
> +
> + if (btn->in == NULL)
> + {
> + btn->in = apr_palloc (btn->pool, sizeof (z_stream));
> + btn->in->zalloc = (alloc_func) zalloc;
> + btn->in->zfree = (free_func) zfree;

Are these casts necessary?

> + btn->in->opaque = (voidpf) btn->pool;
> + btn->read_buffer = apr_palloc(btn->pool, ZBUFFER_SIZE);
> + btn->in->next_in = btn->read_buffer;
> + btn->in->avail_in = ZBUFFER_SIZE;
> +
> + SVN_ERR (read_helper_gz (btn->read, btn->subbaton, btn->read_buffer,
> + &btn->in->avail_in, &btn->read_flush));
> +
> + zerr = inflateInit (btn->in);
> + SVN_ERR (zerr_to_svn_error (zerr, "inflateInit", btn->in));
> + }
> +
> + btn->in->next_out = buffer;
> + btn->in->avail_out = *len;
> +
> + while (btn->in->avail_out > 0)
> + {
> + if (btn->in->avail_in <= 0)
> + {
> + btn->in->avail_in = ZBUFFER_SIZE;
> + btn->in->next_in = btn->read_buffer;
> + SVN_ERR (read_helper_gz (btn->read, btn->subbaton, btn->read_buffer,
> + &btn->in->avail_in, &btn->read_flush));
> + }
> +
> + zerr = inflate (btn->in, btn->read_flush);
> + if (zerr == Z_STREAM_END)
> + break;
> + else if (zerr != Z_OK)
> + return zerr_to_svn_error(zerr, "inflate", btn->in);
> + }
> +
> + *len = *len - btn->in->avail_out;

-=?

> + return SVN_NO_ERROR;
> +}
> +
> +
> +
> +static svn_error_t *
> +write_handler_gz (void *baton, const char *buffer, apr_size_t *len)
> +{
> + struct zbaton *btn = baton;
> + apr_pool_t *subpool;
> + void *write_buf;
> + apr_size_t buf_size, write_len;
> + int zerr;
> +
> + if (btn->out == NULL)
> + {
> + btn->out = apr_palloc (btn->pool, sizeof (z_stream));
> + btn->out->zalloc = (alloc_func) zalloc;
> + btn->out->zfree = (free_func) zfree;
> + btn->out->opaque = (voidpf) btn->pool;

Those casts again.

> +
> + zerr = deflateInit (btn->out, Z_DEFAULT_COMPRESSION);
> + SVN_ERR (zerr_to_svn_error (zerr, "deflateInit", btn->out));
> + }
> +
> + buf_size = *len + (*len / 1000) + 13;

This magic algorithm needs some explanation.

> + subpool = svn_pool_create (btn->pool);
> + write_buf = apr_palloc (subpool, buf_size);
> +
> + btn->out->next_in = (char *) buffer;
> + btn->out->avail_in = *len;
> +
> + while (btn->out->avail_in > 0)
> + {
> + btn->out->next_out = write_buf;
> + btn->out->avail_out = buf_size;
> +
> + zerr = deflate (btn->out, Z_NO_FLUSH);
> + SVN_ERR (zerr_to_svn_error (zerr, "deflate", btn->out));
> + write_len = buf_size - btn->out->avail_out;
> + if (write_len > 0)
> + SVN_ERR (btn->write (btn->subbaton, write_buf, &write_len));
> + }
> +
> + svn_pool_destroy (subpool);
> +
> + return SVN_NO_ERROR;
> +}
> +
> +static svn_error_t *
> +close_handler_gz (void *baton)
> +{
> + struct zbaton *btn = baton;
> + int zerr;
> +
> + if (btn->in != NULL)
> + {
> + zerr = inflateEnd(btn->in);
> + SVN_ERR (zerr_to_svn_error (zerr, "inflateEnd", btn->in));
> + }
> +
> + if (btn->out != NULL)
> + {
> + void *buf;
> + apr_size_t write_len;
> +
> + buf = apr_palloc (btn->pool, ZBUFFER_SIZE);
> +
> + while (TRUE)
> + {
> + btn->out->next_out = buf;
> + btn->out->avail_out = ZBUFFER_SIZE;
> +
> + zerr = deflate (btn->out, Z_FINISH);
> + if (zerr != Z_STREAM_END && zerr != Z_OK)
> + return zerr_to_svn_error (zerr, "deflate", btn->out);
> + write_len = ZBUFFER_SIZE - btn->out->avail_out;
> + if (write_len > 0)
> + SVN_ERR (btn->write (btn->subbaton, buf, &write_len));
> + if (zerr == Z_STREAM_END)
> + break;
> + }
> +
> + zerr = deflateEnd(btn->out);
> + SVN_ERR (zerr_to_svn_error (zerr, "deflateEnd", btn->out));
> + }
> +
> + if (btn->close != NULL)
> + return btn->close (btn->subbaton);
> + else
> + return SVN_NO_ERROR;
> +}
> +
> +#endif /* ZLIB */
> +
> +svn_stream_t *
> +svn_stream_compressed (svn_stream_t *stream, apr_pool_t *pool)
> +{
> +#ifdef ZLIB
> +
> + struct svn_stream_t *zstream, *old_stream = (struct svn_stream_t *) stream;

Is this cast necessary? Why use old_stream at all?

> + struct zbaton *baton;
> +
> + assert(stream != NULL);
> +
> + baton = apr_palloc (pool, sizeof (*baton));
> + baton->in = baton->out = NULL;
> + baton->read = old_stream->read_fn;
> + baton->write = old_stream->write_fn;
> + baton->close = old_stream->close_fn;
> + baton->subbaton = old_stream->baton;
> + baton->pool = pool;
> + baton->read_buffer = NULL;
> + baton->read_flush = Z_SYNC_FLUSH;
> +
> + zstream = svn_stream_create(baton, pool);
> + svn_stream_set_read(zstream, read_handler_gz);
> + svn_stream_set_write(zstream, write_handler_gz);
> + svn_stream_set_close(zstream, close_handler_gz);
> +
> + return zstream;
> +
> +#else
> +
> + return stream; /* Just return the stream if
> + compression support is not
> + enabled */

The comment is redundant. It repeats the documentation in the header
and the code can be understood without it.

> +
> +#endif /* ZLIB */
> +}
> +
>
> /* Miscellaneous stream functions. */
> struct string_stream_baton
> Index: subversion/tests/libsvn_subr/stream-test.c
> ===================================================================
> --- subversion/tests/libsvn_subr/stream-test.c (revision 5482)
> +++ subversion/tests/libsvn_subr/stream-test.c (working copy)
> @@ -20,6 +20,7 @@
> #include <svn_pools.h>
> #include <svn_io.h>
> #include <apr_general.h>
> +#include "svn_private_config.h"
> #include "svn_test.h"
>
>
> @@ -118,6 +119,89 @@
> return SVN_NO_ERROR;
> }
>
> +static svn_error_t *
> +test_stream_compressed (const char **msg,
> + svn_boolean_t msg_only,
> + apr_pool_t *pool)
> +{
> + int i;
> + apr_pool_t *subpool = svn_pool_create (pool);
> +
> +#define NUM_TEST_STRINGS 4
> +#define TEST_BUF_SIZE 10
> +
> + static const char * const strings[NUM_TEST_STRINGS] = {
> + /* 0 */
> + "",
> + /* 1 */
> + "This is a string.",
> + /* 2 */
> + "This is, by comparison to the previous string, a much longer string.",
> + /* 3 */
> + "And if you thought that last string was long, you just wait until "
> + "I'm finished here. I mean, how can a string really claim to be long "
> + "when it fits on a single line of 80-columns? Give me a break. "
> + "Now, I'm not saying that I'm the longest string out there--far from "
> + "it--but I feel that it is safe to assume that I'm far longer than my "
> + "peers. And that demands some amount of respect, wouldn't you say?"

How does the size of these strings relate to the 4096 above? Do you
need a test that has an input (or an output perhaps?) greater than
4096?

> + };
> +
> +
> + *msg = "test compressed streams";
> +
> + if (msg_only)
> + return SVN_NO_ERROR;
> +
> +#ifdef ZLIB

Why is this protected? I thought that the presence of the compressed
stream was supposed to be transparent to the application. Does the
test fail if zlib is not enabled?

> +
> + for (i = 0; i < NUM_TEST_STRINGS; i++)
> + {
> + svn_stream_t *stream;
> + svn_stringbuf_t *origbuf, *inbuf, *outbuf;
> + char buf[TEST_BUF_SIZE];
> + apr_size_t len;
> +
> + origbuf = svn_stringbuf_create (strings[i], subpool);
> + inbuf = svn_stringbuf_create ("", subpool);
> + outbuf = svn_stringbuf_create ("", subpool);
> +
> + stream = svn_stream_compressed (svn_stream_from_stringbuf (outbuf,
> + subpool),
> + subpool);
> + len = strlen (strings[i]);
> + SVN_ERR (svn_stream_write (stream, strings[i], &len));
> + SVN_ERR (svn_stream_close (stream));
> +
> + stream = svn_stream_compressed (svn_stream_from_stringbuf (outbuf,
> + subpool),
> + subpool);
> + len = TEST_BUF_SIZE;
> + while (len >= TEST_BUF_SIZE)
> + {

The indentation has gone astray here.

> + len = TEST_BUF_SIZE;
> + SVN_ERR (svn_stream_read (stream, buf, &len));
> + if (len > 0)
> + svn_stringbuf_appendbytes (inbuf, buf, len);
> + }
> +
> + if (! svn_stringbuf_compare (inbuf, origbuf))
> + return svn_error_create (SVN_ERR_TEST_FAILED, NULL,
> + "Got unexpected result.");
> +
> + SVN_ERR (svn_stream_close (stream));
> +
> + svn_pool_clear (subpool);
> + }
> +
> +#endif /* ZLIB */
> +
> +#undef NUM_TEST_STRINGS
> +#undef TEST_BUF_SIZE
> +
> + svn_pool_destroy (subpool);
> + return SVN_NO_ERROR;
> +}
> +
>
> /* The test table. */
>
> @@ -125,5 +209,6 @@
> {
> SVN_TEST_NULL,
> SVN_TEST_PASS (test_stream_from_string),
> - SVN_TEST_NULL
> + SVN_TEST_PASS (test_stream_compressed),
> + SVN_TEST_NULL
> };
> Index: configure.in
> ===================================================================
> --- configure.in (revision 5482)
> +++ configure.in (working copy)
> @@ -309,6 +309,21 @@
>
> ])
>
> +AC_ARG_WITH(zlib,
> +AC_HELP_STRING([--with-zlib], [enable zlib support]),
> +[
> +
> + if test "$withval" = "yes" ; then
> + AC_CHECK_HEADER(zlib.h, [
> + AC_CHECK_LIB(z, inflate, [
> + AC_DEFINE([ZLIB], [1], [Is zlib support enabled?])
> + LIBS="$LIBS -lz"
> + ])

I'm not much of an autoconf guy, does this support --with-zlib=PREFIX?
Do we need to?

> + ])
> + fi
> +
> +])
> +
> MOD_ACTIVATION="-a"
> AC_ARG_ENABLE(mod-activation,
> AC_HELP_STRING([--disable-mod-activation],

-- 
Philip Martin
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Thu Mar 27 18:00:46 2003

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.