[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

[PATCH] for some of issue #649

From: Karl Fogel <kfogel_at_newton.ch.collab.net>
Date: 2003-01-06 22:51:29 CET

Issue #649 is the harder half of issue #689; together, they're my
front burner right now. I'm posting this patch for review because I
haven't been in the filesystem code for a while -- am especially
hoping cmpilato will have time to review it :-), but any eyeballs are
welcome.

It passes 'make check' except for 'strings-reps-test 3', which I'm
currently tweaking. I'm more looking for comments about the general
shape of the checksum code.

Thanks,
-K

---------------------------------------------------------------------
More work on issue #649: add checksum checking code, and some of the
checksum generating code, for filesystem representations.

* subversion/libsvn_fs/fs.h
  (struct svn_fs__representation_t): Use an inline raw checksum,
  instead of a pointer to base64-encoded data.

* subversion/libsvn_fs/util/fs_skels.c
  (svn_fs__parse_representation_skel,
   svn_fs__unparse_representation_skel): Adjust accordingly.

* subversion/libsvn_fs/reps-strings.c
  (checksums_match): New helper function.
  (make_fulltext_rep): Take checksum argument.
  (svn_fs__get_mutable_rep): Compute a checksum for the new rep.
  (svn_fs__rep_contents_clear): Store the empty string's checksum.
  (svn_fs__rep_deltify): Migrate rep checksum from old to new rep.
  (svn_fs__rep_undeltify): Compare checksums on the source data.

* subversion/tests/libsvn_fs/strings-reps-test.c
  (read_rep): Adjust to pass when checksum is introduced.

Index: subversion/libsvn_fs/reps-strings.c
===================================================================
--- subversion/libsvn_fs/reps-strings.c (revision 4270)
+++ subversion/libsvn_fs/reps-strings.c (working copy)
@@ -56,13 +56,39 @@
 }
 
 
-/* Return a `fulltext' representation which references the string
- STR_KEY, performing allocations in POOL. If TXN_ID is non-zero and
- non-NULL, make the representation mutable under that TXN_ID. If
- non-NULL, STR_KEY will be copied into an allocation of POOL. */
+/* Compare digests D1 and D2, each MD5_DIGESTSIZE bytes long. If
+ * neither is all zeros, and they do not match, then return false.
+ * Else return true.
+ */
+static svn_boolean_t
+checksums_match (unsigned const char d1[], unsigned const char d2[])
+{
+ auto unsigned char zeros[MD5_DIGESTSIZE] = { 0 };
+
+ if ((strncmp (d1, zeros, MD5_DIGESTSIZE) == 0)
+ || (strncmp (d2, zeros, MD5_DIGESTSIZE) == 0))
+ return TRUE;
+ else
+ return (strncmp (d1, d2, MD5_DIGESTSIZE) == 0);
+}
+
+
+/* Return a `fulltext' representation, allocated in POOL, which
+ * references the string STR_KEY.
+ *
+ * If TXN_ID is non-zero and non-NULL, make the representation mutable
+ * under that TXN_ID.
+ *
+ * If STR_KEY is non-null, copy it into an allocation from POOL.
+ *
+ * If CHECKSUM is non-null, use it as the checksum for the new rep;
+ * else initialize the rep with an all-zero (i.e., always successful)
+ * checksum.
+ */
 static svn_fs__representation_t *
 make_fulltext_rep (const char *str_key,
                    const char *txn_id,
+ const unsigned char *checksum,
                    apr_pool_t *pool)
 
 {
@@ -70,6 +96,12 @@
   if (txn_id && *txn_id)
     rep->txn_id = apr_pstrdup (pool, txn_id);
   rep->kind = svn_fs__rep_kind_fulltext;
+
+ if (checksum)
+ memcpy (rep->checksum, checksum, MD5_DIGESTSIZE);
+ else
+ memset (rep->checksum, 0, MD5_DIGESTSIZE);
+
   rep->contents.fulltext.string_key
     = str_key ? apr_pstrdup (pool, str_key) : NULL;
   return rep;
@@ -540,6 +572,11 @@
           apr_pool_t *subpool;
           char *buf;
 
+ struct apr_md5_ctx_t context;
+ unsigned char digest[MD5_DIGESTSIZE];
+
+ apr_md5_init (&context);
+
           SVN_ERR (svn_fs__rep_contents_size (&size, fs, rep_key, trail));
 
           subpool = svn_pool_create (trail->pool);
@@ -554,12 +591,16 @@
 
               SVN_ERR (rep_read_range (fs, rep_key, buf,
                                        offset, &amount, trail));
+ apr_md5_update (&context, buf, amount);
               SVN_ERR (svn_fs__bdb_string_append (fs, &new_str, amount, buf,
                                                   trail));
             }
 
           svn_pool_destroy (subpool);
- rep = make_fulltext_rep (new_str, txn_id, trail->pool);
+
+ apr_md5_final (digest, &context);
+
+ rep = make_fulltext_rep (new_str, txn_id, digest, trail->pool);
         }
       else /* unknown kind */
         abort ();
@@ -567,8 +608,15 @@
   else /* no key, so make a new, empty, mutable, fulltext rep */
     {
       const char *new_str = NULL;
+ struct apr_md5_ctx_t context;
+ unsigned char digest[MD5_DIGESTSIZE];
+
+ /* Checksum digest for the empty string. */
+ apr_md5_init (&context);
+ apr_md5_final (digest, &context);
+
       SVN_ERR (svn_fs__bdb_string_append (fs, &new_str, 0, NULL, trail));
- rep = make_fulltext_rep (new_str, txn_id, trail->pool);
+ rep = make_fulltext_rep (new_str, txn_id, digest, trail->pool);
     }
 
   /* If we made it here, there's a new rep to store in the fs. */
@@ -1019,6 +1067,12 @@
 {
   svn_fs__representation_t *rep;
   const char *str_key;
+ struct apr_md5_ctx_t context;
+ unsigned char digest[MD5_DIGESTSIZE];
+
+ /* Get a checksum digest for the empty string. */
+ apr_md5_init (&context);
+ apr_md5_final (digest, &context);
 
   SVN_ERR (svn_fs__bdb_read_rep (&rep, fs, rep_key, trail));
 
@@ -1038,6 +1092,7 @@
 
       /* Else, clear the string the rep has. */
       SVN_ERR (svn_fs__bdb_string_clear (fs, str_key, trail));
+ memcpy (rep->checksum, digest, MD5_DIGESTSIZE);
     }
   else if (rep->kind == svn_fs__rep_kind_delta)
     {
@@ -1052,7 +1107,7 @@
          behind it, and replace it in the filesystem. */
       str_key = NULL;
       SVN_ERR (svn_fs__bdb_string_append (fs, &str_key, 0, NULL, trail));
- rep = make_fulltext_rep (str_key, txn_id, trail->pool);
+ rep = make_fulltext_rep (str_key, txn_id, digest, trail->pool);
       SVN_ERR (svn_fs__bdb_write_rep (fs, rep_key, rep, trail));
 
       /* Now delete those old strings. */
@@ -1231,6 +1286,9 @@
   /* TARGET's original string keys */
   apr_array_header_t *orig_str_keys;
   
+ /* The digest for the representation's fulltext contents. */
+ unsigned char rep_digest[MD5_DIGESTSIZE];
+
   /* MD5 digest */
   const unsigned char *digest;
 
@@ -1349,6 +1407,9 @@
       SVN_ERR (delta_string_keys (&orig_str_keys, old_rep, pool));
     else /* unknown kind */
       abort ();
+
+ /* Save the checksum, since the new rep needs it. */
+ memcpy (rep_digest, old_rep->checksum, MD5_DIGESTSIZE);
   }
 
   /* Hook the new strings we wrote into the rest of the filesystem by
@@ -1361,7 +1422,10 @@
 
     new_rep.kind = svn_fs__rep_kind_delta;
     new_rep.txn_id = NULL;
- new_rep.checksum = NULL;
+
+ /* Migrate the old rep's checksum to the new rep. */
+ memcpy (new_rep.checksum, rep_digest, MD5_DIGESTSIZE);
+
     chunks = apr_array_make (pool, windows->nelts, sizeof (chunk));
 
     /* Loop through the windows we wrote, creating and adding new
@@ -1404,7 +1468,8 @@
                        const char *rep_key,
                        trail_t *trail)
 {
- /* ### todo: Make this thing `delta'-aware! */
+ /* ### todo: Make this thing `delta'-aware! */
+ /* (Uh, what does that mean? -kfogel, 6 Jan 2003) */
   svn_fs__representation_t *rep;
   svn_stream_t *source_stream; /* stream to read the source */
   svn_stream_t *target_stream; /* stream to write the fulltext */
@@ -1414,6 +1479,8 @@
   apr_pool_t *subpool;
   char *buf;
 
+ struct apr_md5_ctx_t context;
+ unsigned char digest[MD5_DIGESTSIZE];
 
   /* Read the rep skel. */
   SVN_ERR (svn_fs__bdb_read_rep (&rep, fs, rep_key, trail));
@@ -1439,6 +1506,7 @@
   source_stream = svn_fs__rep_contents_read_stream (fs, rep_key, 0,
                                                     trail, trail->pool);
 
+ apr_md5_init (&context);
   subpool = svn_pool_create (trail->pool);
   buf = apr_palloc (subpool, SVN_STREAM_CHUNK_SIZE);
   do
@@ -1447,6 +1515,7 @@
 
       len = SVN_STREAM_CHUNK_SIZE;
       SVN_ERR (svn_stream_read (source_stream, buf, &len));
+ apr_md5_update (&context, buf, len);
       len_read = len;
       SVN_ERR (svn_stream_write (target_stream, buf, &len));
       if (len_read != len)
@@ -1457,10 +1526,17 @@
   while (len);
   svn_pool_destroy (subpool);
 
+ apr_md5_final (digest, &context);
+
+ if (! checksums_match (rep->checksum, digest))
+ svn_error_createf
+ (SVN_ERR_FS_CORRUPT, NULL,
+ "svn_fs__rep_undeltify: checksum mismatch");
+
   /* Now `target_baton.key' has the key of the new string. We
      should hook it into the representation. So we make a new rep,
      write it out... */
- rep = make_fulltext_rep (target_baton.key, NULL, trail->pool);
+ rep = make_fulltext_rep (target_baton.key, NULL, digest, trail->pool);
   SVN_ERR (svn_fs__bdb_write_rep (fs, rep_key, rep, trail));
 
   /* ...then we delete our original strings. */
Index: subversion/libsvn_fs/fs.h
===================================================================
--- subversion/libsvn_fs/fs.h (revision 4270)
+++ subversion/libsvn_fs/fs.h (working copy)
@@ -215,9 +215,9 @@
      regardless of how the rep stores the data under the hood. It is
      independent of the storage (fulltext, delta, whatever).
 
- If NULL, then for compatibility behave as though the absent
- checksum matches the expected checksum. */
- const char *checksum;
+ If all the bytes are 0, then for compatibility behave as though
+ this checksum matches the expected checksum. */
+ unsigned char checksum[MD5_DIGESTSIZE];
 
   /* kind-specific stuff */
   union
Index: subversion/libsvn_fs/util/fs_skels.c
===================================================================
--- subversion/libsvn_fs/util/fs_skels.c (revision 4270)
+++ subversion/libsvn_fs/util/fs_skels.c (working copy)
@@ -161,7 +161,7 @@
      optionally a CHECKSUM (which is a list form). */
   header = skel->children;
   header_len = svn_fs__list_length (header);
- if (! (((header_len == 2) /* 2 means checksum absent */
+ if (! (((header_len == 2) /* 2 means old repository, checksum absent */
           && (header->children->is_atom)
           && (header->children->next->is_atom))
          || ((header_len == 3) /* 3 means checksum present */
@@ -478,9 +478,16 @@
   
   /* CHECKSUM */
   if (header_skel->children->next->next)
- rep->checksum = apr_pstrmemdup (pool,
- header_skel->children->next->next->data,
- header_skel->children->next->next->len);
+ {
+ memcpy (rep->checksum,
+ header_skel->children->next->next->children->next->data,
+ MD5_DIGESTSIZE);
+ }
+ else
+ {
+ /* Older repository, no checksum, so manufacture an all-zero checksum */
+ memset (rep->checksum, 0, MD5_DIGESTSIZE);
+ }
   
   /* KIND-SPECIFIC stuff */
   if (rep->kind == svn_fs__rep_kind_fulltext)
@@ -888,17 +895,15 @@
       those parts first. **/
   
   /* CHECKSUM */
- if (rep->checksum)
- {
- skel_t *checksum_skel = svn_fs__make_empty_list (pool);
-
- svn_fs__prepend (svn_fs__mem_atom
- (rep->checksum,
- MD5_DIGESTSIZE / sizeof (*(rep->checksum)), pool),
- checksum_skel);
- svn_fs__prepend (svn_fs__str_atom ("md5", pool), checksum_skel);
- svn_fs__prepend (checksum_skel, header_skel);
- }
+ {
+ skel_t *checksum_skel = svn_fs__make_empty_list (pool);
+ svn_fs__prepend (svn_fs__mem_atom
+ (rep->checksum,
+ MD5_DIGESTSIZE / sizeof (*(rep->checksum)), pool),
+ checksum_skel);
+ svn_fs__prepend (svn_fs__str_atom ("md5", pool), checksum_skel);
+ svn_fs__prepend (checksum_skel, header_skel);
+ }
   
   /* TXN */
   if (rep->txn_id)
Index: subversion/tests/libsvn_fs/strings-reps-test.c
===================================================================
--- subversion/tests/libsvn_fs/strings-reps-test.c (revision 4270)
+++ subversion/tests/libsvn_fs/strings-reps-test.c (working copy)
@@ -178,8 +178,14 @@
   struct rep_args new_args;
   struct rep_args args;
   struct rep_args read_args;
- const char *new_rep = "((fulltext 0 ) a83t2Z0)";
+
   const char *rep = "((fulltext 0 ) kfogel31337)";
+ const char *new_rep_before = "((fulltext 0 ) a83t2Z0)";
+ /* The zeros in the checksum make it look like this afterwards.
+ (This test now also confirms the introduction of the checksum
+ placeholder; and eventually will confirm the checksum itself. */
+ const char *new_rep_after = "((fulltext 0 (md5 16 ";
+
   svn_stringbuf_t *skel_data;
   svn_fs_t *fs;
 
@@ -194,7 +200,8 @@
 
   /* Set up transaction baton */
   new_args.fs = fs;
- new_args.skel = svn_fs__parse_skel ((char *)new_rep, strlen (new_rep), pool);
+ new_args.skel = svn_fs__parse_skel ((char *)new_rep_before,
+ strlen (new_rep_before), pool);
   new_args.key = NULL;
 
   /* Write new rep to reps table. */
@@ -219,10 +226,10 @@
                              "error reading new representation");
   
   skel_data = svn_fs__unparse_skel (read_args.skel, pool);
- if (strcmp (skel_data->data, new_rep))
+ if (strcmp (skel_data->data, new_rep_after))
     return svn_error_createf (SVN_ERR_FS_GENERAL, NULL,
                               "representation corrupted (\"%s\" != \"%s\")",
- skel_data->data, new_rep);
+ skel_data->data, new_rep_after);
   
   /* Set up transaction baton for re-writing reps. */
   args.fs = new_args.fs;

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Mon Jan 6 23:36:24 2003

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.