Holy cow!  I logged in tonight specifically to do just this... And
found it already done by Tobias.  Zounds.
Have I mentioned lately how much I *love* this project?
Thanks, bliss!
-Karl
bliss@tigris.org writes:
> Author: bliss
> Date: Sat Feb 14 06:56:23 2004
> New Revision: 8643
> 
> Modified:
>    trunk/tools/cvs2svn/cvs2svn.py
> Log:
> Clean up the cvs2svn.py database handling by creating a new class that
> looks like a regular hash (or anydbm), but does the marshalling
> implicitly.
> 
> * tools/cvs2svn/cvs2svn.py:
> 
>   (Database): New class.
> 
>   (RepositoryMirror.__init__, RepositoryMirror._stabilize_directory,
>   RepositoryMirror.probe_path, RepositoryMirror.change_path,
>   RepositoryMirror.delete_path, RepositoryMirror.is_prunable,
>   SymbolicNameTracker.__init__, SymbolicNameTracker.probe_path,
>   SymbolicNameTracker.bump_rev_count, SymbolicNameTracker.enroot_names,
>   SymbolicNameTracker.close_names, SymbolicNameTracker.cleanup_entries,
>   SymbolicNameTracker.copy_descend, SymbolicNameTracker.fill_name,
>   SymbolicNameTracker.finish, main): Use the new Database class and remove
>   the calls to mashal.loads and marshal.dumps.
> 
> 
> Modified: trunk/tools/cvs2svn/cvs2svn.py
> ==============================================================================
> --- trunk/tools/cvs2svn/cvs2svn.py	(original)
> +++ trunk/tools/cvs2svn/cvs2svn.py	Sat Feb 14 06:56:23 2004
> @@ -131,6 +131,37 @@
>  symbolic_name_re = re.compile('^[a-zA-Z].*$')
>  symbolic_name_transtbl = string.maketrans('/\\',',;')
>  
> +# A custom database used to store simple python objects. If a filename
> +# is passed, the anydbm database will be used, and all object will be
> +# converted to strings using the marshal module. If filename is None,
> +# an in-memory hash will be used.
> +class Database:
> +  def __init__(self, filename=None):
> +    if filename:
> +      self.need_marshal = 1
> +      self.db = anydbm.open(filename, 'n')
> +    else:
> +      self.need_marshal = 0
> +      self.db = {}
> +
> +  def has_key(self, key):
> +    return self.db.has_key(key)
> +
> +  def __getitem__(self, key):
> +    if self.need_marshal:
> +      return marshal.loads(self.db[key])
> +    else:
> +      return self.db[key]
> +
> +  def __setitem__(self, key, value):
> +    if self.need_marshal:
> +      self.db[key] = marshal.dumps(value)
> +    else:
> +      self.db[key] = value
> +
> +  def __delitem__(self, key):
> +    del self.db[key]
> +
>  class CollectData(rcsparse.Sink):
>    def __init__(self, cvsroot, log_fname_base, default_branches_db):
>      self.cvsroot = cvsroot
> @@ -567,13 +598,13 @@
>    def __init__(self):
>      # This corresponds to the 'revisions' table in a Subversion fs.
>      self.revs_db_file = SVN_REVISIONS_DB
> -    self.revs_db = anydbm.open(self.revs_db_file, 'n')
> +    self.revs_db = Database(self.revs_db_file)
>  
>      # This corresponds to the 'nodes' table in a Subversion fs.  (We
>      # don't need a 'representations' or 'strings' table because we
>      # only track metadata, not file contents.)
>      self.nodes_db_file = NODES_DB
> -    self.nodes_db = anydbm.open(self.nodes_db_file, 'n')
> +    self.nodes_db = Database(self.nodes_db_file)
>  
>      # This tracks which symbolic names the current "head" of a given
>      # filepath could be the origin node for.  When the next commit on
> @@ -581,10 +612,10 @@
>      # originated in the previous version, and signal back to the
>      # caller that the file can no longer be the origin for those names.
>      #
> -    # The values are marshalled tuples, (tags, branches), where each
> -    # value is a list.
> +    # The values are tuples, (tags, branches), where each value is a
> +    # list.
>      self.symroots_db_file = SYMBOLIC_NAME_ROOTS_DB
> -    self.symroots_db = anydbm.open(self.symroots_db_file, 'n')
> +    self.symroots_db = Database(self.symroots_db_file)
>  
>      # When copying a directory (say, to create part of a branch), we
>      # pass change_path() a list of expected entries, so it can remove
> @@ -607,8 +638,8 @@
>      # Init a root directory with no entries at revision 0.
>      self.youngest = 0
>      youngest_key = gen_key()
> -    self.revs_db[str(self.youngest)] = marshal.dumps(youngest_key)
> -    self.nodes_db[youngest_key] = marshal.dumps({})
> +    self.revs_db[str(self.youngest)] = youngest_key
> +    self.nodes_db[youngest_key] = {}
>  
>    def new_revision(self):
>      """Stabilize the current revision, then start the next one.
> @@ -620,7 +651,7 @@
>  
>    def _stabilize_directory(self, key):
>      """Close the directory whose node key is KEY."""
> -    dir = marshal.loads(self.nodes_db[key])
> +    dir = self.nodes_db[key]
>      if dir.has_key(self.mutable_flag):
>        del dir[self.mutable_flag]
>        if dir.has_key(self.approved_entries):
> @@ -628,11 +659,11 @@
>        for entry_key in dir.keys():
>          if not entry_key[0] == '/':
>            self._stabilize_directory(dir[entry_key])
> -      self.nodes_db[key] = marshal.dumps(dir)
> +      self.nodes_db[key] = dir
>  
>    def stabilize_youngest(self):
>      """Stabilize the current revision by removing mutable flags."""
> -    root_key = marshal.loads(self.revs_db[str(self.youngest)])
> +    root_key = self.revs_db[str(self.youngest)]
>      self._stabilize_directory(root_key)
>  
>    def probe_path(self, path, revision=-1, debugging=None):
> @@ -647,8 +678,8 @@
>      if debugging:
>        print "PROBING path: '%s' in %d" % (path, revision)
>  
> -    parent_key = marshal.loads(self.revs_db[str(revision)])
> -    parent = marshal.loads(self.nodes_db[parent_key])
> +    parent_key = self.revs_db[str(revision)]
> +    parent = self.nodes_db[parent_key]
>      previous_component = "/"
>  
>      i = 1
> @@ -665,7 +696,7 @@
>          return None
>  
>        this_entry_key = parent[component]
> -      this_entry_val = marshal.loads(self.nodes_db[this_entry_key])
> +      this_entry_val = self.nodes_db[this_entry_key]
>        parent_key = this_entry_key
>        parent = this_entry_val
>        previous_component = component
> @@ -729,13 +760,13 @@
>  
>      deletions = []
>  
> -    parent_key = marshal.loads(self.revs_db[str(self.youngest)])
> -    parent = marshal.loads(self.nodes_db[parent_key])
> +    parent_key = self.revs_db[str(self.youngest)]
> +    parent = self.nodes_db[parent_key]
>      if not parent.has_key(self.mutable_flag):
>        parent_key = gen_key()
>        parent[self.mutable_flag] = 1
> -      self.nodes_db[parent_key] = marshal.dumps(parent)
> -      self.revs_db[str(self.youngest)] = marshal.dumps(parent_key)
> +      self.nodes_db[parent_key] = parent
> +      self.revs_db[str(self.youngest)] = parent_key
>  
>      for component in components[:-1]:
>        # parent is always mutable at the top of the loop
> @@ -752,8 +783,8 @@
>          # else
>          new_child_key = gen_key()
>          parent[component] = new_child_key
> -        self.nodes_db[new_child_key] = marshal.dumps(self.empty_mutable_thang)
> -        self.nodes_db[parent_key] = marshal.dumps(parent)
> +        self.nodes_db[new_child_key] = self.empty_mutable_thang
> +        self.nodes_db[parent_key] = parent
>          if intermediate_dir_func:
>            intermediate_dir_func(path_so_far)
>  
> @@ -764,13 +795,13 @@
>        # data structures, we could modify self.empty_mutable_thang,
>        # which must not happen.)
>        this_entry_key = parent[component]
> -      this_entry_val = marshal.loads(self.nodes_db[this_entry_key])
> +      this_entry_val = self.nodes_db[this_entry_key]
>        if not this_entry_val.has_key(self.mutable_flag):
>          this_entry_val[self.mutable_flag] = 1
>          this_entry_key = gen_key()
>          parent[component] = this_entry_key
> -        self.nodes_db[this_entry_key] = marshal.dumps(this_entry_val)
> -        self.nodes_db[parent_key] = marshal.dumps(parent)
> +        self.nodes_db[this_entry_key] = this_entry_val
> +        self.nodes_db[parent_key] = parent
>  
>        parent_key = this_entry_key
>        parent = this_entry_val
> @@ -779,7 +810,7 @@
>      # top of the above loop, parent is already mutable.
>      op = OP_ADD
>      if self.symroots_db.has_key(path):
> -      old_names = marshal.loads(self.symroots_db[path])
> +      old_names = self.symroots_db[path]
>      else:
>        old_names = [], []
>      last_component = components[-1]
> @@ -791,7 +822,7 @@
>          return Change(OP_NOOP, old_names[0], old_names[1], deletions)
>        # else
>        op = OP_CHANGE
> -      new_val = marshal.loads(self.nodes_db[parent[last_component]])
> +      new_val = self.nodes_db[parent[last_component]]
>      elif only_if_already_exists:
>        return Change(OP_NOOP, [], [], deletions)
>  
> @@ -817,10 +848,10 @@
>              new_approved_entries[ent] = 1
>        new_val[self.approved_entries] = new_approved_entries
>      parent[last_component] = leaf_key
> -    self.nodes_db[parent_key] = marshal.dumps(parent)
> -    self.symroots_db[path] = marshal.dumps((tags, branches))
> +    self.nodes_db[parent_key] = parent
> +    self.symroots_db[path] = (tags, branches)
>      new_val[self.mutable_flag] = 1
> -    self.nodes_db[leaf_key] = marshal.dumps(new_val)
> +    self.nodes_db[leaf_key] = new_val
>  
>      return Change(op, old_names[0], old_names[1], deletions, copyfrom_rev)
>  
> @@ -856,8 +887,8 @@
>      components = string.split(path, '/')
>      path_so_far = None
>  
> -    parent_key = marshal.loads(self.revs_db[str(self.youngest)])
> -    parent = marshal.loads(self.nodes_db[parent_key])
> +    parent_key = self.revs_db[str(self.youngest)]
> +    parent = self.nodes_db[parent_key]
>  
>      # As we walk down to find the dest, we remember each parent
>      # directory's name and db key, in reverse order: push each new key
> @@ -908,7 +939,7 @@
>  
>        # Otherwise continue downward, dropping breadcrumbs.
>        this_entry_key = parent[component]
> -      this_entry_val = marshal.loads(self.nodes_db[this_entry_key])
> +      this_entry_val = self.nodes_db[this_entry_key]
>        parent_key = this_entry_key
>        parent = this_entry_val
>        parent_chain.insert(0, (component, parent_key))
> @@ -919,7 +950,7 @@
>      if not parent.has_key(last_component):
>        return None, [], []
>      elif self.symroots_db.has_key(path):
> -      old_names = marshal.loads(self.symroots_db[path])
> +      old_names = self.symroots_db[path]
>        del self.symroots_db[path]
>  
>      # The target is present, so remove it and bubble up, making a new
> @@ -929,7 +960,7 @@
>      new_key = None
>      for parent_item in parent_chain:
>        pkey = parent_item[1]
> -      pval = marshal.loads(self.nodes_db[pkey])
> +      pval = self.nodes_db[pkey]
>  
>        # If we're pruning at all, and we're looking at a prunable thing
>        # (and that thing isn't one of our top-level directories --
> @@ -952,14 +983,14 @@
>  
>        prev_entry_name = parent_item[0]
>        if new_key:
> -        self.nodes_db[new_key] = marshal.dumps(pval)
> +        self.nodes_db[new_key] = pval
>  
>      if new_key is None:
>        new_key = gen_key()
> -      self.nodes_db[new_key] = marshal.dumps(self.empty_mutable_thang)
> +      self.nodes_db[new_key] = self.empty_mutable_thang
>  
>      # Install the new root entry.
> -    self.revs_db[str(self.youngest)] = marshal.dumps(new_key)
> +    self.revs_db[str(self.youngest)] = new_key
>  
>      # Sanity check -- this should be a "can't happen".
>      if pruned_count > len(components):
> @@ -1429,9 +1460,9 @@
>  
>    def __init__(self):
>      self.db_file = SYMBOLIC_NAMES_DB
> -    self.db = anydbm.open(self.db_file, 'n')
> +    self.db = Database(self.db_file)
>      self.root_key = gen_key()
> -    self.db[self.root_key] = marshal.dumps({})
> +    self.db[self.root_key] = {}
>  
>      # The keys for the opening and closing revision lists attached to
>      # each directory or file.  Includes "/" so as never to conflict
> @@ -1461,7 +1492,7 @@
>        print "PROBING SYMBOLIC NAME:\n", components
>  
>      parent_key = self.root_key
> -    parent = marshal.loads(self.db[parent_key])
> +    parent = self.db[parent_key]
>      last_component = "/"
>      i = 1
>      for component in components:
> @@ -1476,7 +1507,7 @@
>          sys.exit(1)
>  
>        this_entry_key = parent[component]
> -      this_entry_val = marshal.loads(self.db[this_entry_key])
> +      this_entry_val = self.db[this_entry_key]
>        parent_key = this_entry_key
>        parent = this_entry_val
>        last_component = component
> @@ -1515,7 +1546,7 @@
>  
>      The list is sorted by ascending revision both before and after."""
>  
> -    entry_val = marshal.loads(self.db[item_key])
> +    entry_val = self.db[item_key]
>      
>      if not entry_val.has_key(revlist_key):
>        entry_val[revlist_key] = [(rev, 1)]
> @@ -1535,7 +1566,7 @@
>          rev_counts.append((rev, 1))
>        entry_val[revlist_key] = rev_counts
>  
> -    self.db[item_key] = marshal.dumps(entry_val)
> +    self.db[item_key] = entry_val
>  
>    # The verb form of "root" is "root", but that would be misleading in
>    # this case; and the opposite of "uproot" is presumably "downroot",
> @@ -1556,15 +1587,15 @@
>        parent_key = self.root_key
>        for component in components:
>          self.bump_rev_count(parent_key, svn_rev, opening_key)
> -        parent = marshal.loads(self.db[parent_key])
> +        parent = self.db[parent_key]
>          if not parent.has_key(component):
>            new_child_key = gen_key()
>            parent[component] = new_child_key
> -          self.db[new_child_key] = marshal.dumps({})
> -          self.db[parent_key] = marshal.dumps(parent)
> +          self.db[new_child_key] = {}
> +          self.db[parent_key] = parent
>          # One way or another, parent now has an entry for component.
>          this_entry_key = parent[component]
> -        this_entry_val = marshal.loads(self.db[this_entry_key])
> +        this_entry_val = self.db[this_entry_key]
>          # Swaparoo.
>          parent_key = this_entry_key
>          parent = this_entry_val
> @@ -1599,7 +1630,7 @@
>        parent_key = self.root_key
>        for component in components:
>          self.bump_rev_count(parent_key, svn_rev, closing_key)
> -        parent = marshal.loads(self.db[parent_key])
> +        parent = self.db[parent_key]
>          # Check for a "can't happen".
>          if not parent.has_key(component):
>            sys.stderr.write("%s: in path '%s', value for parent key '%s' "
> @@ -1607,7 +1638,7 @@
>                             % (error_prefix, svn_path, parent_key, component))
>            sys.exit(1)
>          this_entry_key = parent[component]
> -        this_entry_val = marshal.loads(self.db[this_entry_key])
> +        this_entry_val = self.db[this_entry_key]
>          # Swaparoo.
>          parent_key = this_entry_key
>          parent = this_entry_val
> @@ -1702,7 +1733,7 @@
>        if key[0] == '/': # Skip flags
>          continue
>        entry = entries.get(key)
> -      val = marshal.loads(self.db[entry])
> +      val = self.db[entry]
>        scores = self.score_revisions(val.get(opening_key), val.get(closing_key))
>        best_rev = self.best_rev(scores, rev + 1)
>        if rev == best_rev:
> @@ -1732,7 +1763,7 @@
>      ### there's a clearer way to do it?
>  
>      key = parent[entry_name]
> -    val = marshal.loads(self.db[key])
> +    val = self.db[key]
>  
>      if is_tag:
>        opening_key = self.tags_opening_revs_key
> @@ -1777,7 +1808,7 @@
>            del val[opening_key]
>          if val.has_key(closing_key):
>            del val[closing_key]
> -        self.db[key] = marshal.dumps(val)
> +        self.db[key] = val
>  
>      for ent in val.keys():
>        if not ent[0] == '/':
> @@ -1825,7 +1856,7 @@
>      # must be adjusted to match.
>  
>      parent_key = self.root_key
> -    parent = marshal.loads(self.db[parent_key])
> +    parent = self.db[parent_key]
>  
>      # If there are no origin records, then we must've messed up earlier.
>      if not parent.has_key(name):
> @@ -1838,7 +1869,7 @@
>        sys.exit(1)
>  
>      parent_key = parent[name]
> -    parent = marshal.loads(self.db[parent_key])
> +    parent = self.db[parent_key]
>  
>      # All Subversion source paths under the branch start with one of
>      # three things:
> @@ -1876,7 +1907,7 @@
>                          is_tag, jit_new_rev)
>      if parent.has_key(ctx.branches_base):
>        branch_base_key = parent[ctx.branches_base]
> -      branch_base = marshal.loads(self.db[branch_base_key])
> +      branch_base = self.db[branch_base_key]
>        for this_source in branch_base.keys():
>          # We skip special names beginning with '/' for the usual
>          # reason.  We skip cases where (this_source == name) for a
> @@ -1924,7 +1955,7 @@
>      determine the source and destination paths in the Subversion
>      repository."""
>      parent_key = self.root_key
> -    parent = marshal.loads(self.db[parent_key])
> +    parent = self.db[parent_key]
>      # Do all branches first, then all tags.  We don't bother to check
>      # here whether a given name is a branch or a tag, or is done
>      # already; the fill_foo() methods will just do nothing if there's
> @@ -2494,7 +2525,7 @@
>    ctx.print_help = 0
>    ctx.skip_cleanup = 0
>    ctx.cvs_revnums = 0
> -  ctx.default_branches_db = anydbm.open(DEFAULT_BRANCHES_DB, 'n')
> +  ctx.default_branches_db = Database(DEFAULT_BRANCHES_DB)
>  
>    start_pass = 1
>  
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: svn-unsubscribe@subversion.tigris.org
> For additional commands, e-mail: svn-help@subversion.tigris.org
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Sun Feb 15 00:49:02 2004