Issue #649 is the harder half of issue #689; together, they're my
front burner right now. I'm posting this patch for review because I
haven't been in the filesystem code for a while -- am especially
hoping cmpilato will have time to review it :-), but any eyeballs are
welcome.
It passes 'make check' except for 'strings-reps-test 3', which I'm
currently tweaking. I'm more looking for comments about the general
shape of the checksum code.
Thanks,
-K
---------------------------------------------------------------------
More work on issue #649: add checksum checking code, and some of the
checksum generating code, for filesystem representations.
* subversion/libsvn_fs/fs.h
(struct svn_fs__representation_t): Use an inline raw checksum,
instead of a pointer to base64-encoded data.
* subversion/libsvn_fs/util/fs_skels.c
(svn_fs__parse_representation_skel,
svn_fs__unparse_representation_skel): Adjust accordingly.
* subversion/libsvn_fs/reps-strings.c
(checksums_match): New helper function.
(make_fulltext_rep): Take checksum argument.
(svn_fs__get_mutable_rep): Compute a checksum for the new rep.
(svn_fs__rep_contents_clear): Store the empty string's checksum.
(svn_fs__rep_deltify): Migrate rep checksum from old to new rep.
(svn_fs__rep_undeltify): Compare checksums on the source data.
* subversion/tests/libsvn_fs/strings-reps-test.c
(read_rep): Adjust to pass when checksum is introduced.
Index: subversion/libsvn_fs/reps-strings.c
===================================================================
--- subversion/libsvn_fs/reps-strings.c (revision 4270)
+++ subversion/libsvn_fs/reps-strings.c (working copy)
@@ -56,13 +56,39 @@
}
-/* Return a `fulltext' representation which references the string
- STR_KEY, performing allocations in POOL. If TXN_ID is non-zero and
- non-NULL, make the representation mutable under that TXN_ID. If
- non-NULL, STR_KEY will be copied into an allocation of POOL. */
+/* Compare digests D1 and D2, each MD5_DIGESTSIZE bytes long. If
+ * neither is all zeros, and they do not match, then return false.
+ * Else return true.
+ */
+static svn_boolean_t
+checksums_match (unsigned const char d1[], unsigned const char d2[])
+{
+ auto unsigned char zeros[MD5_DIGESTSIZE] = { 0 };
+
+ if ((strncmp (d1, zeros, MD5_DIGESTSIZE) == 0)
+ || (strncmp (d2, zeros, MD5_DIGESTSIZE) == 0))
+ return TRUE;
+ else
+ return (strncmp (d1, d2, MD5_DIGESTSIZE) == 0);
+}
+
+
+/* Return a `fulltext' representation, allocated in POOL, which
+ * references the string STR_KEY.
+ *
+ * If TXN_ID is non-zero and non-NULL, make the representation mutable
+ * under that TXN_ID.
+ *
+ * If STR_KEY is non-null, copy it into an allocation from POOL.
+ *
+ * If CHECKSUM is non-null, use it as the checksum for the new rep;
+ * else initialize the rep with an all-zero (i.e., always successful)
+ * checksum.
+ */
static svn_fs__representation_t *
make_fulltext_rep (const char *str_key,
const char *txn_id,
+ const unsigned char *checksum,
apr_pool_t *pool)
{
@@ -70,6 +96,12 @@
if (txn_id && *txn_id)
rep->txn_id = apr_pstrdup (pool, txn_id);
rep->kind = svn_fs__rep_kind_fulltext;
+
+ if (checksum)
+ memcpy (rep->checksum, checksum, MD5_DIGESTSIZE);
+ else
+ memset (rep->checksum, 0, MD5_DIGESTSIZE);
+
rep->contents.fulltext.string_key
= str_key ? apr_pstrdup (pool, str_key) : NULL;
return rep;
@@ -540,6 +572,11 @@
apr_pool_t *subpool;
char *buf;
+ struct apr_md5_ctx_t context;
+ unsigned char digest[MD5_DIGESTSIZE];
+
+ apr_md5_init (&context);
+
SVN_ERR (svn_fs__rep_contents_size (&size, fs, rep_key, trail));
subpool = svn_pool_create (trail->pool);
@@ -554,12 +591,16 @@
SVN_ERR (rep_read_range (fs, rep_key, buf,
offset, &amount, trail));
+ apr_md5_update (&context, buf, amount);
SVN_ERR (svn_fs__bdb_string_append (fs, &new_str, amount, buf,
trail));
}
svn_pool_destroy (subpool);
- rep = make_fulltext_rep (new_str, txn_id, trail->pool);
+
+ apr_md5_final (digest, &context);
+
+ rep = make_fulltext_rep (new_str, txn_id, digest, trail->pool);
}
else /* unknown kind */
abort ();
@@ -567,8 +608,15 @@
else /* no key, so make a new, empty, mutable, fulltext rep */
{
const char *new_str = NULL;
+ struct apr_md5_ctx_t context;
+ unsigned char digest[MD5_DIGESTSIZE];
+
+ /* Checksum digest for the empty string. */
+ apr_md5_init (&context);
+ apr_md5_final (digest, &context);
+
SVN_ERR (svn_fs__bdb_string_append (fs, &new_str, 0, NULL, trail));
- rep = make_fulltext_rep (new_str, txn_id, trail->pool);
+ rep = make_fulltext_rep (new_str, txn_id, digest, trail->pool);
}
/* If we made it here, there's a new rep to store in the fs. */
@@ -1019,6 +1067,12 @@
{
svn_fs__representation_t *rep;
const char *str_key;
+ struct apr_md5_ctx_t context;
+ unsigned char digest[MD5_DIGESTSIZE];
+
+ /* Get a checksum digest for the empty string. */
+ apr_md5_init (&context);
+ apr_md5_final (digest, &context);
SVN_ERR (svn_fs__bdb_read_rep (&rep, fs, rep_key, trail));
@@ -1038,6 +1092,7 @@
/* Else, clear the string the rep has. */
SVN_ERR (svn_fs__bdb_string_clear (fs, str_key, trail));
+ memcpy (rep->checksum, digest, MD5_DIGESTSIZE);
}
else if (rep->kind == svn_fs__rep_kind_delta)
{
@@ -1052,7 +1107,7 @@
behind it, and replace it in the filesystem. */
str_key = NULL;
SVN_ERR (svn_fs__bdb_string_append (fs, &str_key, 0, NULL, trail));
- rep = make_fulltext_rep (str_key, txn_id, trail->pool);
+ rep = make_fulltext_rep (str_key, txn_id, digest, trail->pool);
SVN_ERR (svn_fs__bdb_write_rep (fs, rep_key, rep, trail));
/* Now delete those old strings. */
@@ -1231,6 +1286,9 @@
/* TARGET's original string keys */
apr_array_header_t *orig_str_keys;
+ /* The digest for the representation's fulltext contents. */
+ unsigned char rep_digest[MD5_DIGESTSIZE];
+
/* MD5 digest */
const unsigned char *digest;
@@ -1349,6 +1407,9 @@
SVN_ERR (delta_string_keys (&orig_str_keys, old_rep, pool));
else /* unknown kind */
abort ();
+
+ /* Save the checksum, since the new rep needs it. */
+ memcpy (rep_digest, old_rep->checksum, MD5_DIGESTSIZE);
}
/* Hook the new strings we wrote into the rest of the filesystem by
@@ -1361,7 +1422,10 @@
new_rep.kind = svn_fs__rep_kind_delta;
new_rep.txn_id = NULL;
- new_rep.checksum = NULL;
+
+ /* Migrate the old rep's checksum to the new rep. */
+ memcpy (new_rep.checksum, rep_digest, MD5_DIGESTSIZE);
+
chunks = apr_array_make (pool, windows->nelts, sizeof (chunk));
/* Loop through the windows we wrote, creating and adding new
@@ -1404,7 +1468,8 @@
const char *rep_key,
trail_t *trail)
{
- /* ### todo: Make this thing `delta'-aware! */
+ /* ### todo: Make this thing `delta'-aware! */
+ /* (Uh, what does that mean? -kfogel, 6 Jan 2003) */
svn_fs__representation_t *rep;
svn_stream_t *source_stream; /* stream to read the source */
svn_stream_t *target_stream; /* stream to write the fulltext */
@@ -1414,6 +1479,8 @@
apr_pool_t *subpool;
char *buf;
+ struct apr_md5_ctx_t context;
+ unsigned char digest[MD5_DIGESTSIZE];
/* Read the rep skel. */
SVN_ERR (svn_fs__bdb_read_rep (&rep, fs, rep_key, trail));
@@ -1439,6 +1506,7 @@
source_stream = svn_fs__rep_contents_read_stream (fs, rep_key, 0,
trail, trail->pool);
+ apr_md5_init (&context);
subpool = svn_pool_create (trail->pool);
buf = apr_palloc (subpool, SVN_STREAM_CHUNK_SIZE);
do
@@ -1447,6 +1515,7 @@
len = SVN_STREAM_CHUNK_SIZE;
SVN_ERR (svn_stream_read (source_stream, buf, &len));
+ apr_md5_update (&context, buf, len);
len_read = len;
SVN_ERR (svn_stream_write (target_stream, buf, &len));
if (len_read != len)
@@ -1457,10 +1526,17 @@
while (len);
svn_pool_destroy (subpool);
+ apr_md5_final (digest, &context);
+
+ if (! checksums_match (rep->checksum, digest))
+ svn_error_createf
+ (SVN_ERR_FS_CORRUPT, NULL,
+ "svn_fs__rep_undeltify: checksum mismatch");
+
/* Now `target_baton.key' has the key of the new string. We
should hook it into the representation. So we make a new rep,
write it out... */
- rep = make_fulltext_rep (target_baton.key, NULL, trail->pool);
+ rep = make_fulltext_rep (target_baton.key, NULL, digest, trail->pool);
SVN_ERR (svn_fs__bdb_write_rep (fs, rep_key, rep, trail));
/* ...then we delete our original strings. */
Index: subversion/libsvn_fs/fs.h
===================================================================
--- subversion/libsvn_fs/fs.h (revision 4270)
+++ subversion/libsvn_fs/fs.h (working copy)
@@ -215,9 +215,9 @@
regardless of how the rep stores the data under the hood. It is
independent of the storage (fulltext, delta, whatever).
- If NULL, then for compatibility behave as though the absent
- checksum matches the expected checksum. */
- const char *checksum;
+ If all the bytes are 0, then for compatibility behave as though
+ this checksum matches the expected checksum. */
+ unsigned char checksum[MD5_DIGESTSIZE];
/* kind-specific stuff */
union
Index: subversion/libsvn_fs/util/fs_skels.c
===================================================================
--- subversion/libsvn_fs/util/fs_skels.c (revision 4270)
+++ subversion/libsvn_fs/util/fs_skels.c (working copy)
@@ -161,7 +161,7 @@
optionally a CHECKSUM (which is a list form). */
header = skel->children;
header_len = svn_fs__list_length (header);
- if (! (((header_len == 2) /* 2 means checksum absent */
+ if (! (((header_len == 2) /* 2 means old repository, checksum absent */
&& (header->children->is_atom)
&& (header->children->next->is_atom))
|| ((header_len == 3) /* 3 means checksum present */
@@ -478,9 +478,16 @@
/* CHECKSUM */
if (header_skel->children->next->next)
- rep->checksum = apr_pstrmemdup (pool,
- header_skel->children->next->next->data,
- header_skel->children->next->next->len);
+ {
+ memcpy (rep->checksum,
+ header_skel->children->next->next->children->next->data,
+ MD5_DIGESTSIZE);
+ }
+ else
+ {
+ /* Older repository, no checksum, so manufacture an all-zero checksum */
+ memset (rep->checksum, 0, MD5_DIGESTSIZE);
+ }
/* KIND-SPECIFIC stuff */
if (rep->kind == svn_fs__rep_kind_fulltext)
@@ -888,17 +895,15 @@
those parts first. **/
/* CHECKSUM */
- if (rep->checksum)
- {
- skel_t *checksum_skel = svn_fs__make_empty_list (pool);
-
- svn_fs__prepend (svn_fs__mem_atom
- (rep->checksum,
- MD5_DIGESTSIZE / sizeof (*(rep->checksum)), pool),
- checksum_skel);
- svn_fs__prepend (svn_fs__str_atom ("md5", pool), checksum_skel);
- svn_fs__prepend (checksum_skel, header_skel);
- }
+ {
+ skel_t *checksum_skel = svn_fs__make_empty_list (pool);
+ svn_fs__prepend (svn_fs__mem_atom
+ (rep->checksum,
+ MD5_DIGESTSIZE / sizeof (*(rep->checksum)), pool),
+ checksum_skel);
+ svn_fs__prepend (svn_fs__str_atom ("md5", pool), checksum_skel);
+ svn_fs__prepend (checksum_skel, header_skel);
+ }
/* TXN */
if (rep->txn_id)
Index: subversion/tests/libsvn_fs/strings-reps-test.c
===================================================================
--- subversion/tests/libsvn_fs/strings-reps-test.c (revision 4270)
+++ subversion/tests/libsvn_fs/strings-reps-test.c (working copy)
@@ -178,8 +178,14 @@
struct rep_args new_args;
struct rep_args args;
struct rep_args read_args;
- const char *new_rep = "((fulltext 0 ) a83t2Z0)";
+
const char *rep = "((fulltext 0 ) kfogel31337)";
+ const char *new_rep_before = "((fulltext 0 ) a83t2Z0)";
+ /* The zeros in the checksum make it look like this afterwards.
+ (This test now also confirms the introduction of the checksum
+ placeholder; and eventually will confirm the checksum itself. */
+ const char *new_rep_after = "((fulltext 0 (md5 16 ";
+
svn_stringbuf_t *skel_data;
svn_fs_t *fs;
@@ -194,7 +200,8 @@
/* Set up transaction baton */
new_args.fs = fs;
- new_args.skel = svn_fs__parse_skel ((char *)new_rep, strlen (new_rep), pool);
+ new_args.skel = svn_fs__parse_skel ((char *)new_rep_before,
+ strlen (new_rep_before), pool);
new_args.key = NULL;
/* Write new rep to reps table. */
@@ -219,10 +226,10 @@
"error reading new representation");
skel_data = svn_fs__unparse_skel (read_args.skel, pool);
- if (strcmp (skel_data->data, new_rep))
+ if (strcmp (skel_data->data, new_rep_after))
return svn_error_createf (SVN_ERR_FS_GENERAL, NULL,
"representation corrupted (\"%s\" != \"%s\")",
- skel_data->data, new_rep);
+ skel_data->data, new_rep_after);
/* Set up transaction baton for re-writing reps. */
args.fs = new_args.fs;
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Mon Jan 6 23:36:24 2003