Hi!
A new work-in-progress patch for cvs2svn.
ChangeLog (relative to rev 3516):
1) includes bug fix for preventing the commits on the same file from
being combined
2) remembers the branch for each file in the .*revs files
3) remembers the tags and branch points for each file in .*revs file
4) a file-by-file copy is done to create each tag and branch.
I am now working on a new system that will optimize 3) and 4) and do
whole-tree copies when possible.
Mark
--- ../cvs2svn/cvs2svn/cvs2svn.py 2002-10-28 06:58:47.000000000 +0100
+++ cvs2svn.py 2002-10-28 07:05:23.000000000 +0100
@@ -24,6 +24,8 @@
trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
+branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
+vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
DATAFILE = 'cvs2svn-data'
REVS_SUFFIX = '.revs'
@@ -59,9 +61,54 @@
# revision -> [timestamp, author, operation, old-timestamp]
self.rev_data = { }
self.prev = { }
+ self.branch_names = {}
+ self.taglist = {}
+ self.branchlist = {}
+
+ def set_branch_name(self, revision, name):
+ self.branch_names[revision] = name
+
+ def get_branch_name(self, revision):
+ brev = revision[:revision.rindex(".")];
+ if not self.branch_names.has_key(brev):
+ return None
+ return self.branch_names[brev]
+
+ def add_branch_point(self, revision, branch_name):
+ if not self.branchlist.has_key(revision):
+ self.branchlist[revision] = []
+ self.branchlist[revision].append(branch_name)
+
+ def add_cvs_branch(self, revision, branch_name):
+ last_dot = revision.rfind(".");
+ branch_rev = revision[:last_dot];
+ last2_dot = branch_rev.rfind(".");
+ branch_rev = branch_rev[:last2_dot] + revision[last_dot:];
+ self.set_branch_name(branch_rev, branch_name)
+ self.add_branch_point(branch_rev[:last2_dot], branch_name)
+
+ def get_tags(self, revision):
+ if self.taglist.has_key(revision):
+ return self.taglist[revision]
+ else:
+ return []
+
+ def get_branches(self, revision):
+ if self.branchlist.has_key(revision):
+ return self.branchlist[revision]
+ else:
+ return []
def define_tag(self, name, revision):
self.tags.write('%s %s %s\n' % (name, revision, self.fname))
+ if branch_tag.match(revision):
+ self.add_cvs_branch(revision, name)
+ elif vendor_tag.match(revision):
+ self.set_branch_name(revision, name)
+ else:
+ if not self.taglist.has_key(revision):
+ self.taglist[revision] = [];
+ self.taglist[revision].append(name)
def define_revision(self, revision, timestamp, author, state,
branches, next):
@@ -137,14 +184,17 @@
# for this time and log message.
self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
- self.revs.write('%08lx %s %s %s %s\n' % (timestamp, digest,
- op, revision, self.fname))
+ branch_name = self.get_branch_name(revision)
+
+ write_revs_line(self.revs,
+ timestamp, digest, op, revision, self.fname, branch_name,
+ self.get_tags(revision), self.get_branches(revision))
def branch_path(ctx, branch_name = None):
- if branch_name:
- return ctx.branches_base + '/' + branch_name + '/'
- else:
+ if branch_name == None:
return ctx.trunk_base + '/'
+ else:
+ return ctx.branches_base + '/' + branch_name + '/'
def relative_name(cvsroot, fname):
l = len(cvsroot)
@@ -272,12 +322,16 @@
class Commit:
def __init__(self):
+ self.files = { }
self.changes = [ ]
self.deletes = [ ]
self.t_min = 1<<30
self.t_max = 0
- def add(self, t, op, file, rev):
+ def has_file(self, fname):
+ return self.files.has_key(fname)
+
+ def add(self, t, op, file, rev, branch_name, tags, branches):
# record the time range of this commit
if t < self.t_min:
self.t_min = t
@@ -285,20 +339,21 @@
self.t_max = t
if op == OP_CHANGE:
- self.changes.append((file, rev))
+ self.changes.append((file, rev, branch_name, tags, branches))
else:
# OP_DELETE
- self.deletes.append((file, rev))
+ self.deletes.append((file, rev, branch_name, tags, branches))
+ self.files[file] = 1
def get_metadata(self, pool):
# by definition, the author and log message must be the same for all
# items that went into this commit. therefore, just grab any item from
# our record of changes/deletes.
if self.changes:
- file, rev = self.changes[0]
+ file, rev, br, tags, branches = self.changes[0]
else:
# there better be one...
- file, rev = self.deletes[0]
+ file, rev, br, tags, branches = self.deletes[0]
# now, fetch the author/log from the ,v file
rip = RevInfoParser()
@@ -314,19 +369,22 @@
def commit(self, t_fs, ctx):
# commit this transaction
+ ##print "CHANGES: ", repr(self.changes)
+ ##print "DELETES: ", repr(self.deletes)
+
print 'committing: %s, over %d seconds' % (time.ctime(self.t_min),
self.t_max - self.t_min)
if ctx.dry_run:
- for f, r in self.changes:
+ for f, r, br, tags, branches in self.changes:
# compute a repository path. ensure we have a leading "/" and drop
# the ,v from the file name
- repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
+ repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
print ' changing %s : %s' % (r, repos_path)
- for f, r in self.deletes:
+ for f, r, br, tags, branches in self.deletes:
# compute a repository path. ensure we have a leading "/" and drop
# the ,v from the file name
- repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
+ repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
print ' deleting %s : %s' % (r, repos_path)
print ' (skipped; dry run enabled)'
return
@@ -343,10 +401,10 @@
# create a pool for each file; it will be cleared on each iteration
f_pool = util.svn_pool_create(c_pool)
- for f, r in self.changes:
+ for f, r, br, tags, branches in self.changes:
# compute a repository path. ensure we have a leading "/" and drop
# the ,v from the file name
- repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
+ repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
#print 'DEBUG:', repos_path
print ' changing %s : %s' % (r, repos_path)
@@ -418,10 +476,10 @@
# remember what we just did, for the next iteration
lastcommit = (repos_path, r)
- for f, r in self.deletes:
+ for f, r, br, tags, branches in self.deletes:
# compute a repository path. ensure we have a leading "/" and drop
# the ,v from the file name
- repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
+ repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
print ' deleting %s : %s' % (r, repos_path)
@@ -450,6 +508,73 @@
print ' CONFLICTS:', `conflicts`
print ' new revision:', new_rev
+ # make a new transaction for the tags
+ rev = fs.youngest_rev(t_fs, c_pool)
+ txn = fs.begin_txn(t_fs, rev, c_pool)
+ root = fs.txn_root(txn, c_pool)
+
+ for f, r, br, tags, branches in self.changes:
+ for tag in tags:
+ tag_path = ctx.tags_base + '/' + tag + '/' + relative_name(ctx.cvsroot, f[:-2])
+ repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
+
+ print "tagging", tag, "to", tag_path, "from", repos_path
+
+ t_root = fs.revision_root(t_fs, rev, f_pool);
+
+ ### hmm. need to clarify OS path separators vs FS path separators
+ dirname = os.path.dirname(tag_path)
+ if dirname != '/':
+ # get the components of the path (skipping the leading '/')
+ parts = string.split(dirname[1:], os.sep)
+ for i in range(1, len(parts) + 1):
+ # reassemble the pieces, adding a leading slash
+ parent_dir = '/' + string.join(parts[:i], '/')
+ if fs.check_path(root, parent_dir, f_pool) == svn_node_none:
+ print ' making dir:', parent_dir
+ fs.make_dir(root, parent_dir, f_pool) ### XXX COPY FROM BRANCH?
+
+ fs.copy(t_root, repos_path, root, tag_path, f_pool)
+
+ # clear the pool after each copy
+ util.svn_pool_clear(f_pool)
+
+ for f, r, br, tags, branches in self.changes:
+ for br2 in branches:
+ new_branch_path = branch_path(ctx, br2) + relative_name(ctx.cvsroot, f[:-2])
+ repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
+
+ print "branching", r, "to", new_branch_path, "from", repos_path
+
+ t_root = fs.revision_root(t_fs, rev, f_pool);
+
+ ### hmm. need to clarify OS path separators vs FS path separators
+ dirname = os.path.dirname(new_branch_path)
+ if dirname != '/':
+ # get the components of the path (skipping the leading '/')
+ parts = string.split(dirname[1:], os.sep)
+ for i in range(1, len(parts) + 1):
+ # reassemble the pieces, adding a leading slash
+ parent_dir = '/' + string.join(parts[:i], '/')
+ if fs.check_path(root, parent_dir, f_pool) == svn_node_none:
+ print ' making dir:', parent_dir
+ fs.make_dir(root, parent_dir, f_pool) ### XXX COPY FROM BRANCH?
+
+ fs.copy(t_root, repos_path, root, new_branch_path, f_pool)
+
+ # clear the pool after each copy
+ util.svn_pool_clear(f_pool)
+
+ for f, r, br, tags, branches in self.deletes:
+ for br2 in branches:
+ new_branch_path = branch_path(ctx, br2) + relative_name(ctx.cvsroot, f[:-2])
+ print "file:", f, "created on branch:", br2, "revision:", r
+
+ conflicts, new_rev = fs.commit_txn(txn)
+ if conflicts != '\n':
+ print ' CONFLICTS:', `conflicts`
+ print ' new revision:', new_rev
+
# done with the commit and file pools
util.svn_pool_destroy(c_pool)
@@ -486,16 +611,46 @@
resync[digest] = [ [t1_l, t1_u, t2] ]
return resync
+def write_revs_line(output,
+ timestamp, digest, op, revision, fname,
+ branch_name, tags, branches):
+ if not branch_name:
+ branch_name = "*"
+ output.write('%08lx %s %s %s %s' % (timestamp, digest,
+ op, revision, branch_name))
+ output.write(' %d ' % (len(tags)));
+ for tag in tags:
+ output.write('%s ' % (tag));
+ output.write('%d ' % (len(branches)));
+ for branch in branches:
+ output.write('%s ' % (branch));
+ output.write('%s\n' % fname);
+
def parse_revs_line(line):
- timestamp = int(line[:8], 16)
- id = line[9:DIGEST_END_IDX]
- op = line[DIGEST_END_IDX + 1]
- idx = string.find(line, ' ', DIGEST_END_IDX + 3)
- rev = line[DIGEST_END_IDX+3:idx]
- fname = line[idx+1:-1]
+ data = line.split(' ', 6)
+ ##print "DATA", repr(data)
+ timestamp = int(data[0], 16)
+ id = data[1]
+ op = data[2]
+ rev = data[3]
+ branch_name = data[4]
+ if branch_name == "*":
+ branch_name = None
+
+ ntags = int(data[5])
+ tags = data[6].split(' ', ntags + 1)
+ nbranches = int(tags[ntags])
+ branches = tags[ntags + 1].split(' ', nbranches + 1)
+ fname = branches[nbranches][:-1]
+
+ ##print "TAGS: ", ntags, repr(tags);
+ ##print "BRANCHES: ", nbranches, repr(branches)
+ ##print "FNAME: ", repr(fname)
- return timestamp, id, op, rev, fname
+ tags = tags[:ntags]
+ branches = branches[:nbranches]
+ return timestamp, id, op, rev, fname, branch_name, tags, branches
def pass1(ctx):
cd = CollectData(ctx.cvsroot, DATAFILE)
@@ -519,7 +674,8 @@
# process the revisions file, looking for items to clean up
for line in fileinput.FileInput(ctx.log_fname_base + REVS_SUFFIX):
- timestamp, digest, op, rev, fname = parse_revs_line(line)
+ timestamp, digest, op, rev, fname, branch_name, tags, branches = \
+ parse_revs_line(line)
if not resync.has_key(digest):
output.write(line)
continue
@@ -529,8 +685,8 @@
for record in resync[digest]:
if record[0] <= timestamp <= record[1]:
# bingo! remap the time on this (record[2] is the new time).
- output.write('%08lx %s %s %s %s\n'
- % (record[2], digest, op, rev, fname))
+ write_revs_line(output,
+ record[2], digest, op, rev, fname, branch_name, tags, branches)
print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
% (relative_name(ctx.cvsroot, fname),
@@ -568,18 +724,14 @@
count = 0
for line in fileinput.FileInput(ctx.log_fname_base + SORTED_REVS_SUFFIX):
- timestamp, id, op, rev, fname = parse_revs_line(line)
-
- ### only handle changes on the trunk for now
- if not trunk_rev.match(rev):
- ### technically, the timestamp on this could/should cause a flush.
- ### don't worry about it; the next item will handle it
- continue
+ timestamp, id, op, rev, fname, branch_name, tags, branches = \
+ parse_revs_line(line)
# scan for commits to process
process = [ ]
for scan_id, scan_c in commits.items():
- if scan_c.t_max + COMMIT_THRESHOLD < timestamp:
+ if scan_c.t_max + COMMIT_THRESHOLD < timestamp or \
+ scan_c.has_file(fname):
process.append((scan_c.t_max, scan_c))
del commits[scan_id]
@@ -594,7 +746,7 @@
c = commits[id]
else:
c = commits[id] = Commit()
- c.add(timestamp, op, fname, rev)
+ c.add(timestamp, op, fname, rev, branch_name, tags, branches)
# if there are any pending commits left, then flush them
if commits:
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Mon Oct 28 07:42:38 2002