I have collected some of the various changes to cvs2svn
posted on this list, and added just a couple of my own.
This is the result--cvs2svn that converts tags and branches.
It converted a rather small 1300 revision CVS repository
rather well, I believe, although it has been pointed out
that all the changes are not necessary & that there are
still bugs in the branching/tagging portion of the script.
(Or at least, in the bits that I collected.)
It is improved a little from the various sources--It only
does branch/tag operations when there is something to do,
resulting in approximately half the commits as the old one.
Also, for my own use, I use a 3 minute gap instead of 5
because most of my changes were using a local repository.
Diff against revision 3843 of:
http://svn.collab.net/repos/svn/trunk/tools/cvs2svn/cvs2svn.py
Note that the diff is a little dirty--some of the changes are
definately spurious.
Laramie Leavitt.
--- cvs2svn.py.orig	2002-11-20 22:14:45.000000000 -0800
+++ cvs2svn.py	2002-11-20 22:15:02.000000000 -0800
@@ -1,9 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2.2
 #
 # cvs2svn: ...
 #
-import rcsparse
+# With a small number of experimental patches applied...
+
 import os
 import sys
 import sha
@@ -13,17 +14,22 @@
 import string
 import getopt
 import statcache
+import rcsparse
 from svn import fs, util, _delta, _repos
 ### these should go somewhere else. should have SWIG export them.
-svn_node_none = 0
-svn_node_file = 1
-svn_node_dir = 2
+svn_node_none    = 0
+svn_node_file    = 1
+svn_node_dir     = 2
 svn_node_unknown = 3
-trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
+trunk_rev  = re.compile('^[0-9]+\\.[0-9]+$')
+branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
+vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$') # XXX?
+
+
 DATAFILE = 'cvs2svn-data'
 REVS_SUFFIX = '.revs'
@@ -35,7 +41,7 @@
 SVNROOT = 'svnroot'
 ATTIC = os.sep + 'Attic'
-COMMIT_THRESHOLD = 5 * 60	# flush a commit if a 5 minute gap occurs
+COMMIT_THRESHOLD = 3 * 60        # flush a commit if a 3 minute gap occurs
 OP_DELETE = 'D'
 OP_CHANGE = 'C'
@@ -59,9 +65,54 @@
     # revision -> [timestamp, author, operation, old-timestamp]
     self.rev_data = { }
     self.prev = { }
+    self.branch_names = {}
+    self.taglist = {}
+    self.branchlist = { }
+
+  def set_branch_name(self, revision, name):
+    self.branch_names[revision] = name
+
+  def get_branch_name(self, revision):
+    brev = revision[:revision.rindex(".")];
+    if not self.branch_names.has_key(brev):
+      return None
+    return self.branch_names[brev]
+
+  def add_branch_point(self, revision, branch_name):
+    if not self.branchlist.has_key(revision):
+      self.branchlist[revision] = []
+    self.branchlist[revision].append(branch_name)
+
+  def add_cvs_branch(self, revision, branch_name):
+    last_dot = revision.rfind(".");
+    branch_rev = revision[:last_dot];
+    last2_dot = branch_rev.rfind(".");
+    branch_rev = branch_rev[:last2_dot] + revision[last_dot:];
+    self.set_branch_name(branch_rev, branch_name)
+    self.add_branch_point(branch_rev[:last2_dot], branch_name)
+
+  def get_tags(self, revision):
+    if self.taglist.has_key(revision):
+      return self.taglist[revision]
+    else:
+      return []
+  def get_branches(self, revision):
+    if self.branchlist.has_key(revision):
+      return self.branchlist[revision]
+    else:
+      return []
+
   def define_tag(self, name, revision):
     self.tags.write('%s %s %s\n' % (name, revision, self.fname))
+    if branch_tag.match(revision):
+      self.add_cvs_branch(revision, name)
+    elif vendor_tag.match(revision):
+      self.set_branch_name(revision, name)
+    else:
+      if not self.taglist.has_key(revision):
+        self.taglist[revision] = [];
+      self.taglist[revision].append(name)
   def define_revision(self, revision, timestamp, author, state,
                       branches, next):
@@ -108,8 +159,8 @@
           # shove the previous revision back in time (and any before it that
           # may need to shift).
           while t_p >= t_c:
-            self.rev_data[prev][0] = t_c - 1	# new timestamp
-            self.rev_data[prev][3] = t_p	# old timestamp
+            self.rev_data[prev][0] = t_c - 1        # new timestamp
+            self.rev_data[prev][3] = t_p        # old timestamp
             print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
                   % (relative_name(self.cvsroot, self.fname),
@@ -119,7 +170,7 @@
             prev = self.prev[current]
             if not prev:
               break
-            t_c = t_c - 1		# self.rev_data[current][0]
+            t_c = t_c - 1                # self.rev_data[current][0]
             t_p = self.rev_data[prev][0]
           # break from the for-loop
@@ -137,14 +188,19 @@
       # for this time and log message.
       self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
-    self.revs.write('%08lx %s %s %s %s\n' % (timestamp, digest,
-                                             op, revision, self.fname))
+    branch_name = self.get_branch_name(revision)
+
+    write_revs_line(self.revs,
+                    timestamp, digest, op, revision, self.fname,
branch_name,
+                    self.get_tags(revision), self.get_branches(revision))
+
+
 def branch_path(ctx, branch_name = None):
-  if branch_name:
-     return ctx.branches_base + '/' + branch_name + '/'
-  else:
+  if branch_name == None:
      return ctx.trunk_base + '/'
+  else:
+     return ctx.branches_base + '/' + branch_name + '/'
 def relative_name(cvsroot, fname):
   l = len(cvsroot)
@@ -171,10 +227,11 @@
     p.parse(open(pathname), cd)
     stats[0] = stats[0] + 1
+
 class RevInfoParser(rcsparse.Sink):
   def __init__(self):
-    self.authors = { }	# revision -> author
-    self.logs = { }	# revision -> log message
+    self.authors = { }        # revision -> author
+    self.logs = { }        # revision -> log message
   def define_revision(self, revision, timestamp, author, state,
                       branches, next):
@@ -274,37 +331,83 @@
   def __init__(self):
     self.changes = [ ]
     self.deletes = [ ]
+    self.loghash = {}
+
     self.t_min = 1<<30
     self.t_max = 0
-  def add(self, t, op, file, rev):
+  def ready_to_process( self, tstamp, fname ):
+    if self.t_max + COMMIT_THRESHOLD < tstamp:
+      return 1
+    for x in self.changes:
+      if x[0] == fname:
+	return 1
+    for x in self.deletes:
+      if x[0] == fname:
+	return 1
+    return 0
+
+  def add(self, t, op, file, rev, branch_name, tags, branches):
     # record the time range of this commit
     if t < self.t_min:
       self.t_min = t
     if t > self.t_max:
       self.t_max = t
+    # Keep track of all files...
     if op == OP_CHANGE:
-      self.changes.append((file, rev))
+      self.changes.append((file, rev, branch_name, tags, branches))
     else:
       # OP_DELETE
-      self.deletes.append((file, rev))
+      self.deletes.append((file, rev, branch_name, tags, branches))
+
-  def get_metadata(self, pool):
+  def get_logmessages(self, mods, ctx, modifier):
+    # get the log messages for those mods (change or delete)
+    authorhash = {}
+    currlog = ''
+    lastlog = ''
+
+    rip = RevInfoParser()
+
+    for file, rev, br, tags, branches in mods:
+      repos_path = relative_name(ctx.cvsroot, file[:-2])
+      # now, fetch the author/log from the ,v file
+      rip.parse_cvs_file(file)
+      author = rip.authors[rev]
+      authorhash[author] = 1
+      currlog = rip.logs[rev].strip()
+
+      tmp = self.loghash.get(currlog, [])
+      # ugly hack
+      if 'M' == modifier and '1.1' == rev:
+        tmp.append('A %s (%s)' % (repos_path, rev))
+      else:
+        tmp.append('%s %s (%s)' % (modifier, repos_path, rev))
+      self.loghash[currlog] = tmp
+    return author
+
+  def get_metadata(self, pool, ctx):
     # by definition, the author and log message must be the same for all
     # items that went into this commit. therefore, just grab any item from
     # our record of changes/deletes.
     if self.changes:
-      file, rev = self.changes[0]
+      file, rev, br, tags, branches = self.changes[0]
     else:
-      # there better be one...
-      file, rev = self.deletes[0]
+       # there better be one...
+      file, rev, br, tags, branches = self.deletes[0]
+    log = ''
+
     # now, fetch the author/log from the ,v file
-    rip = RevInfoParser()
-    rip.parse_cvs_file(file)
-    author = rip.authors[rev]
-    log = rip.logs[rev]
+    if self.changes:
+      author = self.get_logmessages(self.changes, ctx, 'M')
+    if self.deletes:
+      author = self.get_logmessages(self.deletes, ctx, 'D')
+
+    for msg, files in self.loghash.items():
+      log += '\n'.join(files)
+      log += ":\n  " + msg + '\n'
     # format the date properly
     a_t = util.apr_time_ansi_put(self.t_max)[1]
@@ -312,21 +415,22 @@
     return author, log, date
+
   def commit(self, t_fs, ctx):
     # commit this transaction
     print 'committing: %s, over %d seconds' % (time.ctime(self.t_min),
                                                self.t_max - self.t_min)
-
+
     if ctx.dry_run:
-      for f, r in self.changes:
+      for f, r, br, tags, branches in self.changes:
         # compute a repository path. ensure we have a leading "/" and drop
         # the ,v from the file name
-        repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
+        repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot,
f[:-2])
         print '    changing %s : %s' % (r, repos_path)
-      for f, r in self.deletes:
+      for f, r, br, tags, branches in self.deletes:
         # compute a repository path. ensure we have a leading "/" and drop
         # the ,v from the file name
-        repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
+        repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot,
f[:-2])
         print '    deleting %s : %s' % (r, repos_path)
       print '    (skipped; dry run enabled)'
       return
@@ -334,8 +438,8 @@
     # create a pool for the entire commit
     c_pool = util.svn_pool_create(ctx.pool)
-    rev = fs.youngest_rev(t_fs, c_pool)
-    txn = fs.begin_txn(t_fs, rev, c_pool)
+    rev  = fs.youngest_rev(t_fs, c_pool)
+    txn  = fs.begin_txn(t_fs, rev, c_pool)
     root = fs.txn_root(txn, c_pool)
     lastcommit = (None, None)
@@ -343,10 +447,10 @@
     # create a pool for each file; it will be cleared on each iteration
     f_pool = util.svn_pool_create(c_pool)
-    for f, r in self.changes:
+    for f, r, br, tags, branches in self.changes:
       # compute a repository path. ensure we have a leading "/" and drop
       # the ,v from the file name
-      repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
+      repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
       #print 'DEBUG:', repos_path
       print '    changing %s : %s' % (r, repos_path)
@@ -418,10 +522,10 @@
       # remember what we just did, for the next iteration
       lastcommit = (repos_path, r)
-    for f, r in self.deletes:
+    for f, r, br, tags, branches in self.deletes:
       # compute a repository path. ensure we have a leading "/" and drop
       # the ,v from the file name
-      repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
+      repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
       print '    deleting %s : %s' % (r, repos_path)
@@ -436,7 +540,7 @@
       util.svn_pool_clear(f_pool)
     # get the metadata for this commit
-    author, log, date = self.get_metadata(c_pool)
+    author, log, date = self.get_metadata(c_pool, ctx )
     fs.change_txn_prop(txn, 'svn:author', author, c_pool)
     fs.change_txn_prop(txn, 'svn:log', log, c_pool)
@@ -446,6 +550,84 @@
     fs.change_rev_prop(t_fs, new_rev, 'svn:date', date, c_pool)
     ### how come conflicts is a newline?
+    if len( conflicts ) > 1:
+      print '    CONFLICTS:', conflicts
+    print '    new revision:', new_rev
+
+
+    # don't do tags if we don't need to.
+    dotags = 0
+    for f, r, br, tags, branches in self.changes:
+      dotags += len( tags )
+      dotags += len( branches )
+
+    if not dotags:
+      util.svn_pool_destroy(c_pool)
+      return
+
+    # make a new transaction for the tags
+    rev = fs.youngest_rev(t_fs, c_pool)
+    txn = fs.begin_txn(t_fs, rev, c_pool)
+    root = fs.txn_root(txn, c_pool)
+
+    for f, r, br, tags, branches in self.changes:
+      for tag in tags:
+        tag_path = ctx.tags_base + '/' + tag + '/' +
relative_name(ctx.cvsroot, f[:-2])
+        repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot,
f[:-2])
+
+        print "tagging", tag, "to", tag_path, "from", repos_path
+
+        t_root = fs.revision_root(t_fs, rev, f_pool);
+
+        ### hmm. need to clarify OS path separators vs FS path separators
+        dirname = os.path.dirname(tag_path)
+        if dirname != '/':
+          # get the components of the path (skipping the leading '/')
+          parts = string.split(dirname[1:], os.sep)
+          for i in range(1, len(parts) + 1):
+            # reassemble the pieces, adding a leading slash
+            parent_dir = '/' + string.join(parts[:i], '/')
+            if fs.check_path(root, parent_dir, f_pool) == svn_node_none:
+              print '    making dir:', parent_dir
+              fs.make_dir(root, parent_dir, f_pool) ### XXX COPY FROM
BRANCH?
+
+        fs.copy(t_root, repos_path, root, tag_path, f_pool)
+
+        # clear the pool after each copy
+        util.svn_pool_clear(f_pool)
+
+    for f, r, br, tags, branches in self.changes:
+      for br2 in branches:
+        new_branch_path = branch_path(ctx, br2) +
relative_name(ctx.cvsroot, f[:-2])
+        repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot,
f[:-2])
+
+        print "branching", r, "to", new_branch_path, "from", repos_path
+
+        t_root = fs.revision_root(t_fs, rev, f_pool);
+
+        ### hmm. need to clarify OS path separators vs FS path separators
+        dirname = os.path.dirname(new_branch_path)
+        if dirname != '/':
+          # get the components of the path (skipping the leading '/')
+          parts = string.split(dirname[1:], os.sep)
+          for i in range(1, len(parts) + 1):
+            # reassemble the pieces, adding a leading slash
+            parent_dir = '/' + string.join(parts[:i], '/')
+            if fs.check_path(root, parent_dir, f_pool) == svn_node_none:
+              print '    making dir:', parent_dir
+              fs.make_dir(root, parent_dir, f_pool) ### XXX COPY FROM
BRANCH?
+
+        fs.copy(t_root, repos_path, root, new_branch_path, f_pool)
+
+        # clear the pool after each copy
+        util.svn_pool_clear(f_pool)
+
+    for f, r, br, tags, branches in self.deletes:
+      for br2 in branches:
+        new_branch_path = branch_path(ctx, br2) +
relative_name(ctx.cvsroot, f[:-2])
+        print "file:", f, "created on branch:", br2, "revision:", r
+
+    conflicts, new_rev = fs.commit_txn(txn)
     if conflicts != '\n':
       print '    CONFLICTS:', `conflicts`
     print '    new revision:', new_rev
@@ -486,15 +668,39 @@
       resync[digest] = [ [t1_l, t1_u, t2] ]
   return resync
-def parse_revs_line(line):
-  timestamp = int(line[:8], 16)
-  id = line[9:DIGEST_END_IDX]
-  op = line[DIGEST_END_IDX + 1]
-  idx = string.find(line, ' ', DIGEST_END_IDX + 3)
-  rev = line[DIGEST_END_IDX+3:idx]
-  fname = line[idx+1:-1]
+def write_revs_line(output,
+                    timestamp, digest, op, revision, fname,
+                    branch_name, tags, branches):
+  if not branch_name:
+    branch_name = "*"
+  output.write('%08lx %s %s %s %s' % (timestamp, digest,
+                                      op, revision, branch_name))
+  output.write(' %d ' % (len(tags)));
+  for tag in tags:
+    output.write('%s ' % (tag));
+  output.write('%d ' % (len(branches)));
+  for branch in branches:
+    output.write('%s ' % (branch));
+  output.write('%s\n' % fname);
-  return timestamp, id, op, rev, fname
+def parse_revs_line(line):
+  data = line.split(' ', 6)
+  ##print "DATA", repr(data)
+  timestamp = int(data[0], 16)
+  id = data[1]
+  op = data[2]
+  rev = data[3]
+  branch_name = data[4]
+  if branch_name == "*":
+    branch_name = None
+  ntags = int(data[5])
+  tags = data[6].split(' ', ntags + 1)
+  nbranches = int(tags[ntags])
+  branches = tags[ntags + 1].split(' ', nbranches + 1)
+  fname = branches[nbranches][:-1]
+  tags = tags[:ntags]
+  branches = branches[:nbranches]
+  return timestamp, id, op, rev, fname, branch_name, tags, branches
 def pass1(ctx):
@@ -519,7 +725,9 @@
   # process the revisions file, looking for items to clean up
   for line in fileinput.FileInput(ctx.log_fname_base + REVS_SUFFIX):
-    timestamp, digest, op, rev, fname = parse_revs_line(line)
+    timestamp, digest, op, rev, fname, branch_name, tags, branches = \
+      parse_revs_line(line)
+
     if not resync.has_key(digest):
       output.write(line)
       continue
@@ -529,8 +737,9 @@
     for record in resync[digest]:
       if record[0] <= timestamp <= record[1]:
         # bingo! remap the time on this (record[2] is the new time).
-        output.write('%08lx %s %s %s %s\n'
-                     % (record[2], digest, op, rev, fname))
+        write_revs_line(output,
+                        record[2], digest, op, rev, fname, branch_name,
+                        tags, branches)
         print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
               % (relative_name(ctx.cvsroot, fname),
@@ -568,18 +777,13 @@
   count = 0
   for line in fileinput.FileInput(ctx.log_fname_base + SORTED_REVS_SUFFIX):
-    timestamp, id, op, rev, fname = parse_revs_line(line)
-
-    ### only handle changes on the trunk for now
-    if not trunk_rev.match(rev):
-      ### technically, the timestamp on this could/should cause a flush.
-      ### don't worry about it; the next item will handle it
-      continue
+    timestamp, id, op, rev, fname, branch_name, tags, branches = \
+               parse_revs_line(line)
     # scan for commits to process
     process = [ ]
     for scan_id, scan_c in commits.items():
-      if scan_c.t_max + COMMIT_THRESHOLD < timestamp:
+      if scan_c.ready_to_process( timestamp, fname ):
         process.append((scan_c.t_max, scan_c))
         del commits[scan_id]
@@ -588,13 +792,16 @@
     for t_max, c in process:
       c.commit(t_fs, ctx)
     count = count + len(process)
+    process = []
     # add this item into the set of commits we're assembling
     if commits.has_key(id):
       c = commits[id]
     else:
       c = commits[id] = Commit()
-    c.add(timestamp, op, fname, rev)
+    c.add(timestamp, op, fname, rev, branch_name, tags, branches)
+
+  print 'flushing final commits...'
   # if there are any pending commits left, then flush them
   if commits:
@@ -616,9 +823,11 @@
   pass4,
   ]
+
 class _ctx:
   pass
+
 def convert(pool, ctx, start_pass=1):
   "Convert a CVS repository to an SVN repository."
@@ -700,3 +909,4 @@
 if __name__ == '__main__':
   main()
+
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Thu Nov 21 07:34:23 2002