[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

FSFS changed-paths handling - review

From: Julian Foad <julianfoad_at_btopenworld.com>
Date: Wed, 1 Oct 2014 16:35:27 +0100

I took a look through the FSFS code that deals with the changed-path lists, with a special interest in how it reports no-op changes. In the course of this code inspection I found a number of issues that want clarification and a few possible bugs. I have not tried to produce actual buggy behaviour from these observations.

Some of this code is new or changed for format 7, and some is old.

Comments, concerns and suggested code updates together in the form of a diff:

[[[
Index: subversion/include/svn_fs.h
===================================================================
--- subversion/include/svn_fs.h    (revision 1628514)
+++ subversion/include/svn_fs.h    (working copy)
@@ -1427,38 +1427,61 @@ typedef enum svn_fs_path_change_kind_t
 /** Change descriptor.
  *
  * @note Fields may be added to the end of this structure in future
  * versions.  Therefore, to preserve binary compatibility, users
  * should not directly allocate structures of this type.
  *
+ * @note The @c text_mod, @c prop_mod and @c mergeinfo_mod flags mean the
+ * text, properties and mergeinfo property (respectively) were "touched"
+ * by the commit API; this does not mean the new value is different from
+ * the old value.
+ *
  * @since New in 1.6. */
 typedef struct svn_fs_path_change2_t
 {
   /** node revision id of changed path */
   const svn_fs_id_t *node_rev_id;

   /** kind of change */
   svn_fs_path_change_kind_t change_kind;

-  /** were there text mods? */
+  /** was the text touched?
+   * For node_kind=dir: always false. For node_kind=file:
+   *   modify:      true iff text touched.
+   *   add (copy):  true iff text touched.
+   *   add (plain): always true.
+   *   delete:      always false.
+   *   replace:     as for the add/copy part of the replacement.
+   */
   svn_boolean_t text_mod;

-  /** were there property mods? */
+  /** were the properties touched?
+   *   modify:      true iff props touched.
+   *   add (copy):  true iff props touched.
+   *   add (plain): true iff props touched.
+   *   delete:      always false.
+   *   replace:     as for the add/copy part of the replacement.
+   */
   svn_boolean_t prop_mod;

   /** what node kind is the path?
       (Note: it is legal for this to be #svn_node_unknown.) */
   svn_node_kind_t node_kind;

   /** Copyfrom revision and path; this is only valid if copyfrom_known
    * is true. */
   svn_boolean_t copyfrom_known;
   svn_revnum_t copyfrom_rev;
   const char *copyfrom_path;

-  /** were there mergeinfo mods?
+  /** was the mergeinfo property touched?
+   *   modify:      } true iff svn:mergeinfo property add/del/mod
+   *   add (copy):  }          and fs format supports mergeinfo.
+   *   add (plain): }
+   *   delete:      always false.
+   *   replace:     as for the add/copy part of the replacement.
    * (Note: Pre-1.9 repositories will report #svn_tristate_unknown.)
    * @since New in 1.9. */
   svn_tristate_t mergeinfo_mod;
   /* NOTE! Please update svn_fs_path_change2_create() when adding new
      fields here. */
 } svn_fs_path_change2_t;
Index: subversion/libsvn_fs_fs/low_level.c
===================================================================
--- subversion/libsvn_fs_fs/low_level.c    (revision 1628514)
+++ subversion/libsvn_fs_fs/low_level.c    (working copy)
@@ -353,12 +353,15 @@ read_change(change_t **change_p,
       return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
                               _("Invalid prop-mod flag in rev-file"));
     }

   /* Get the mergeinfo-mod flag if given.  Otherwise, the next thing
      is the path starting with a slash. */
+  /* ### Must initialize info->mergeinfo_mod to 'unknown' rather than 0,
+         else write_change_entry() would assume it's definitely false. */
+  /* info->mergeinfo_mod = svn_tristate_unknown; */
   if (*last_str != '/')
     {
       str = svn_cstring_tokenize(" ", &last_str);
       if (str == NULL)
         return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
                                 _("Invalid changes line in rev-file"));
@@ -470,9 +473,10 @@

-/* Write a single change entry, path PATH, change CHANGE, and copyfrom
-   string COPYFROM, into the file specified by FILE.  Only include the
+/* Write a single change entry, path PATH, change CHANGE, to STREAM.
+
+   Only include the
    node kind field if INCLUDE_NODE_KIND is true.  Only include the
    mergeinfo-mod field if INCLUDE_MERGEINFO_MODS is true.  All temporary
    allocations are in SCRATCH_POOL. */
 static svn_error_t *
 write_change_entry(svn_stream_t *stream,
                    const char *path,
@@ -563,18 +567,34 @@ svn_fs_fs__write_changes(svn_stream_t *s
   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
   fs_fs_data_t *ffd = fs->fsap_data;
   svn_boolean_t include_node_kinds =
       ffd->format >= SVN_FS_FS__MIN_KIND_IN_CHANGED_FORMAT;
   svn_boolean_t include_mergeinfo_mods =
       ffd->format >= SVN_FS_FS__MIN_MERGEINFO_IN_CHANGES_FORMAT;
+      /* ### Rename s/_CHANGES_/_CHANGED_/ to match MIN_KIND_...? */
   apr_array_header_t *sorted_changed_paths;
   int i;

   /* For the sake of the repository administrator sort the changes so
      that the final file is deterministic and repeatable, however the
      rest of the FSFS code doesn't require any particular order here. */
+
+  /* ### This sorting would not be effective if the caller writes changes
+         in multiple batches. This sorting would not be safe if CHANGES
+         included multiple changes for the same path.
+
+         While building up the txn we call this from svn_fs_fs__add_change(),
+         multiple changes per path overall, but only one change at a time.
+         While writing the final proto-rev file we call this from
+         write_final_changed_path_info() with all the changes at once, but
+         only one change per path.
+
+         Both of the current callers get safe and effective behaviour, but
+         consider sorting in write_final_changed_path_info() instead to
+         make all this clearer. (This internal API would have to change.)
+   */
   sorted_changed_paths = svn_sort__hash(changes,
                                         svn_sort_compare_items_lexically,
                                         scratch_pool);

   /* Write all items to disk in the new order. */
   for (i = 0; i < sorted_changed_paths->nelts; ++i)
Index: subversion/libsvn_fs_fs/transaction.c
===================================================================
--- subversion/libsvn_fs_fs/transaction.c    (revision 1628676)
+++ subversion/libsvn_fs_fs/transaction.c    (working copy)
@@ -624,20 +624,29 @@ replace_change
 /* Copy the contents of NEW_CHANGE into OLD_CHANGE assuming that both
-   belong to the same path.  Allocate copies in POOL.
+   belong to the same path.  Do not copy the change_kind field.
+
+   ### For semantic simplicity, I suggest making this copy the *whole*
+       contents, and renaming it to 'copy_change' or 'svn_fs_path_change2_dup'
+       if it also allocates a new object (caller adjustment needed). It
+       could call memcpy and then duplicate the sub-objects, simplifying
+       the implementation too.
+
+   Allocate copies in POOL.
  */
 static void
 replace_change(svn_fs_path_change2_t *old_change,
                const svn_fs_path_change2_t *new_change,
                apr_pool_t *pool)
 {
   old_change->node_kind = new_change->node_kind;
   old_change->node_rev_id = svn_fs_fs__id_copy(new_change->node_rev_id,
                                                pool);
   old_change->text_mod = new_change->text_mod;
   old_change->prop_mod = new_change->prop_mod;
   old_change->mergeinfo_mod = new_change->mergeinfo_mod;
+  /* ### What about copyfrom_known? (Is it *always* TRUE?) */
   if (new_change->copyfrom_rev == SVN_INVALID_REVNUM)
     {
       old_change->copyfrom_rev = SVN_INVALID_REVNUM;
       old_change->copyfrom_path = NULL;
     }
   else
@@ -714,18 +723,22 @@ fold_change(apr_hash_t *changed_paths,

         case svn_fs_path_change_delete:
           if (old_change->change_kind == svn_fs_path_change_add)
             {
               /* If the path was introduced in this transaction via an
                  add, and we are deleting it, just remove the path
-                 altogether. */
+                 altogether.  (The caller will delete any child paths.) */
               old_change = NULL;
             }
           else
             {
-              /* A deletion overrules all previous changes. */
+              /* A deletion overrules a previous change (modify/replace). */
+              /* ### Why not call replace_change(old_change, info, pool); ? */
+              /* ### What about node_rev_id? Would be wrong after a replace? */
+              /* ### What about node_kind? Could be wrong after a replace? */
+              /* ### What about copyfrom_known? (Is it *always* TRUE?) */
               old_change->change_kind = svn_fs_path_change_delete;
               old_change->text_mod = info->text_mod;
               old_change->prop_mod = info->prop_mod;
               old_change->mergeinfo_mod = info->mergeinfo_mod;
               old_change->copyfrom_rev = SVN_INVALID_REVNUM;
               old_change->copyfrom_path = NULL;
@@ -739,12 +752,15 @@ fold_change(apr_hash_t *changed_paths,
           replace_change(old_change, info, pool);
           old_change->change_kind = svn_fs_path_change_replace;
           break;

         case svn_fs_path_change_modify:
         default:
+          /* If the new change modifies some attribute of the node, set
+             the corresponding flag, whether it already was set or not.
+             Note: We do not reset a flag to FALSE if a change is undone. */
           if (info->text_mod)
             old_change->text_mod = TRUE;
           if (info->prop_mod)
             old_change->prop_mod = TRUE;
           if (info->mergeinfo_mod == svn_tristate_true)
             old_change->mergeinfo_mod = svn_tristate_true;
@@ -761,12 +777,13 @@ fold_change(apr_hash_t *changed_paths,
          structure from the internal one (in the hash's pool), and dup
          the path into the hash's pool, too. */
       new_change = apr_pmemdup(pool, info, sizeof(*new_change));
       new_change->node_rev_id = svn_fs_fs__id_copy(info->node_rev_id, pool);
       if (info->copyfrom_path)
         new_change->copyfrom_path = apr_pstrdup(pool, info->copyfrom_path);
+      /* ### Use copy_change() or svn_fs_path_change2_dup() instead. */

       /* Add this path.  The API makes no guarantees that this (new) key
         will not be retained.  Thus, we copy the key into the target pool
         to ensure a proper lifetime.  */
       apr_hash_set(changed_paths,
                    apr_pstrmemdup(pool, path->data, path->len), path->len,
@@ -1585,16 +1602,24 @@ svn_fs_fs__add_change(svn_fs_t *fs,
   change->text_mod = text_mod;
   change->prop_mod = prop_mod;
   change->mergeinfo_mod = mergeinfo_mod
                         ? svn_tristate_true
                         : svn_tristate_false;
   change->node_kind = node_kind;
+  /* ### copyfrom_known remains FALSE, but svn_fs_fs__write_changes()
+         ignores it. Should set TRUE for clarity, and also document
+         svn_fs_fs__write_changes() as ignoring it? */
   change->copyfrom_rev = copyfrom_rev;
   change->copyfrom_path = apr_pstrdup(pool, copyfrom_path);
+  /* ### COPYFROM_PATH may be NULL. apr_pstrdup < 1.5 does not promise
+         null-safety, although the implementation since 0.9.0 is null-safe.
+         For consistency, should conditionalize it. */

   svn_hash_sets(changes, path, change);
   SVN_ERR(svn_fs_fs__write_changes(svn_stream_from_aprfile2(file, TRUE, pool),
                                    fs, changes, FALSE, pool));
+  /* ### There is no call (in this context) that sets the 'terminate_list'
+         parameter true as required by the doc string. */

   return svn_io_file_close(file, pool);
 }

Index: subversion/libsvn_fs_fs/tree.c
===================================================================
--- subversion/libsvn_fs_fs/tree.c    (revision 1628514)
+++ subversion/libsvn_fs_fs/tree.c    (working copy)
@@ -1534,13 +1534,13 @@ fs_change_node_prop(svn_fs_root_t *root,
                     const svn_string_t *value,
                     apr_pool_t *pool)
 {
   parent_path_t *parent_path;
   apr_hash_t *proplist;
   const svn_fs_fs__id_part_t *txn_id;
-  svn_boolean_t modeinfo_mod = FALSE;
+  svn_boolean_t modeinfo_mod = FALSE; /* ### "mergeinfo_mod"? */

   if (! root->is_txn_root)
     return SVN_FS__NOT_TXN(root);
   txn_id = root_txn_id(root);

   path = svn_fs__canonicalize_abspath(path, pool);
]]]

- Julian
Received on 2014-10-01 17:42:40 CEST

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.