[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: Changes to cvs2svn + [patch] cvs2svn.py

From: Karl Fogel <kfogel_at_newton.ch.collab.net>
Date: 2002-11-21 07:14:51 CET

"Laramie Leavitt" <lar@leavitt.us> writes:
> Diff against revision 3843 of:
> http://svn.collab.net/repos/svn/trunk/tools/cvs2svn/cvs2svn.py
>
> Note that the diff is a little dirty--some of the changes are
> definately spurious.

Thanks! Can you write a log message for the change? (The HACKING
file gives more guidance on this.) It's harder to review without a
log message.

-K

> --- cvs2svn.py.orig 2002-11-20 22:14:45.000000000 -0800
> +++ cvs2svn.py 2002-11-20 22:15:02.000000000 -0800
> @@ -1,9 +1,10 @@
> -#!/usr/bin/env python
> +#!/usr/bin/env python2.2
> #
> # cvs2svn: ...
> #
>
> -import rcsparse
> +# With a small number of experimental patches applied...
> +
> import os
> import sys
> import sha
> @@ -13,17 +14,22 @@
> import string
> import getopt
> import statcache
> +import rcsparse
>
> from svn import fs, util, _delta, _repos
>
> ### these should go somewhere else. should have SWIG export them.
> -svn_node_none = 0
> -svn_node_file = 1
> -svn_node_dir = 2
> +svn_node_none = 0
> +svn_node_file = 1
> +svn_node_dir = 2
> svn_node_unknown = 3
>
>
> -trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
> +trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
> +branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
> +vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$') # XXX?
> +
> +
>
> DATAFILE = 'cvs2svn-data'
> REVS_SUFFIX = '.revs'
> @@ -35,7 +41,7 @@
> SVNROOT = 'svnroot'
> ATTIC = os.sep + 'Attic'
>
> -COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
> +COMMIT_THRESHOLD = 3 * 60 # flush a commit if a 3 minute gap occurs
>
> OP_DELETE = 'D'
> OP_CHANGE = 'C'
> @@ -59,9 +65,54 @@
> # revision -> [timestamp, author, operation, old-timestamp]
> self.rev_data = { }
> self.prev = { }
> + self.branch_names = {}
> + self.taglist = {}
> + self.branchlist = { }
> +
> + def set_branch_name(self, revision, name):
> + self.branch_names[revision] = name
> +
> + def get_branch_name(self, revision):
> + brev = revision[:revision.rindex(".")];
> + if not self.branch_names.has_key(brev):
> + return None
> + return self.branch_names[brev]
> +
> + def add_branch_point(self, revision, branch_name):
> + if not self.branchlist.has_key(revision):
> + self.branchlist[revision] = []
> + self.branchlist[revision].append(branch_name)
> +
> + def add_cvs_branch(self, revision, branch_name):
> + last_dot = revision.rfind(".");
> + branch_rev = revision[:last_dot];
> + last2_dot = branch_rev.rfind(".");
> + branch_rev = branch_rev[:last2_dot] + revision[last_dot:];
> + self.set_branch_name(branch_rev, branch_name)
> + self.add_branch_point(branch_rev[:last2_dot], branch_name)
> +
> + def get_tags(self, revision):
> + if self.taglist.has_key(revision):
> + return self.taglist[revision]
> + else:
> + return []
>
> + def get_branches(self, revision):
> + if self.branchlist.has_key(revision):
> + return self.branchlist[revision]
> + else:
> + return []
> +
> def define_tag(self, name, revision):
> self.tags.write('%s %s %s\n' % (name, revision, self.fname))
> + if branch_tag.match(revision):
> + self.add_cvs_branch(revision, name)
> + elif vendor_tag.match(revision):
> + self.set_branch_name(revision, name)
> + else:
> + if not self.taglist.has_key(revision):
> + self.taglist[revision] = [];
> + self.taglist[revision].append(name)
>
> def define_revision(self, revision, timestamp, author, state,
> branches, next):
> @@ -108,8 +159,8 @@
> # shove the previous revision back in time (and any before it that
> # may need to shift).
> while t_p >= t_c:
> - self.rev_data[prev][0] = t_c - 1 # new timestamp
> - self.rev_data[prev][3] = t_p # old timestamp
> + self.rev_data[prev][0] = t_c - 1 # new timestamp
> + self.rev_data[prev][3] = t_p # old timestamp
>
> print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
> % (relative_name(self.cvsroot, self.fname),
> @@ -119,7 +170,7 @@
> prev = self.prev[current]
> if not prev:
> break
> - t_c = t_c - 1 # self.rev_data[current][0]
> + t_c = t_c - 1 # self.rev_data[current][0]
> t_p = self.rev_data[prev][0]
>
> # break from the for-loop
> @@ -137,14 +188,19 @@
> # for this time and log message.
> self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
>
> - self.revs.write('%08lx %s %s %s %s\n' % (timestamp, digest,
> - op, revision, self.fname))
> + branch_name = self.get_branch_name(revision)
> +
> + write_revs_line(self.revs,
> + timestamp, digest, op, revision, self.fname,
> branch_name,
> + self.get_tags(revision), self.get_branches(revision))
> +
> +
>
> def branch_path(ctx, branch_name = None):
> - if branch_name:
> - return ctx.branches_base + '/' + branch_name + '/'
> - else:
> + if branch_name == None:
> return ctx.trunk_base + '/'
> + else:
> + return ctx.branches_base + '/' + branch_name + '/'
>
> def relative_name(cvsroot, fname):
> l = len(cvsroot)
> @@ -171,10 +227,11 @@
> p.parse(open(pathname), cd)
> stats[0] = stats[0] + 1
>
> +
> class RevInfoParser(rcsparse.Sink):
> def __init__(self):
> - self.authors = { } # revision -> author
> - self.logs = { } # revision -> log message
> + self.authors = { } # revision -> author
> + self.logs = { } # revision -> log message
>
> def define_revision(self, revision, timestamp, author, state,
> branches, next):
> @@ -274,37 +331,83 @@
> def __init__(self):
> self.changes = [ ]
> self.deletes = [ ]
> + self.loghash = {}
> +
> self.t_min = 1<<30
> self.t_max = 0
>
> - def add(self, t, op, file, rev):
> + def ready_to_process( self, tstamp, fname ):
> + if self.t_max + COMMIT_THRESHOLD < tstamp:
> + return 1
> + for x in self.changes:
> + if x[0] == fname:
> + return 1
> + for x in self.deletes:
> + if x[0] == fname:
> + return 1
> + return 0
> +
> + def add(self, t, op, file, rev, branch_name, tags, branches):
> # record the time range of this commit
> if t < self.t_min:
> self.t_min = t
> if t > self.t_max:
> self.t_max = t
>
> + # Keep track of all files...
> if op == OP_CHANGE:
> - self.changes.append((file, rev))
> + self.changes.append((file, rev, branch_name, tags, branches))
> else:
> # OP_DELETE
> - self.deletes.append((file, rev))
> + self.deletes.append((file, rev, branch_name, tags, branches))
> +
>
> - def get_metadata(self, pool):
> + def get_logmessages(self, mods, ctx, modifier):
> + # get the log messages for those mods (change or delete)
> + authorhash = {}
> + currlog = ''
> + lastlog = ''
> +
> + rip = RevInfoParser()
> +
> + for file, rev, br, tags, branches in mods:
> + repos_path = relative_name(ctx.cvsroot, file[:-2])
> + # now, fetch the author/log from the ,v file
> + rip.parse_cvs_file(file)
> + author = rip.authors[rev]
> + authorhash[author] = 1
> + currlog = rip.logs[rev].strip()
> +
> + tmp = self.loghash.get(currlog, [])
> + # ugly hack
> + if 'M' == modifier and '1.1' == rev:
> + tmp.append('A %s (%s)' % (repos_path, rev))
> + else:
> + tmp.append('%s %s (%s)' % (modifier, repos_path, rev))
> + self.loghash[currlog] = tmp
> + return author
> +
> + def get_metadata(self, pool, ctx):
> # by definition, the author and log message must be the same for all
> # items that went into this commit. therefore, just grab any item from
> # our record of changes/deletes.
> if self.changes:
> - file, rev = self.changes[0]
> + file, rev, br, tags, branches = self.changes[0]
> else:
> - # there better be one...
> - file, rev = self.deletes[0]
> + # there better be one...
> + file, rev, br, tags, branches = self.deletes[0]
>
> + log = ''
> +
> # now, fetch the author/log from the ,v file
> - rip = RevInfoParser()
> - rip.parse_cvs_file(file)
> - author = rip.authors[rev]
> - log = rip.logs[rev]
> + if self.changes:
> + author = self.get_logmessages(self.changes, ctx, 'M')
> + if self.deletes:
> + author = self.get_logmessages(self.deletes, ctx, 'D')
> +
> + for msg, files in self.loghash.items():
> + log += '\n'.join(files)
> + log += ":\n " + msg + '\n'
>
> # format the date properly
> a_t = util.apr_time_ansi_put(self.t_max)[1]
> @@ -312,21 +415,22 @@
>
> return author, log, date
>
> +
> def commit(self, t_fs, ctx):
> # commit this transaction
> print 'committing: %s, over %d seconds' % (time.ctime(self.t_min),
> self.t_max - self.t_min)
> -
> +
> if ctx.dry_run:
> - for f, r in self.changes:
> + for f, r, br, tags, branches in self.changes:
> # compute a repository path. ensure we have a leading "/" and drop
> # the ,v from the file name
> - repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
> + repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot,
> f[:-2])
> print ' changing %s : %s' % (r, repos_path)
> - for f, r in self.deletes:
> + for f, r, br, tags, branches in self.deletes:
> # compute a repository path. ensure we have a leading "/" and drop
> # the ,v from the file name
> - repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
> + repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot,
> f[:-2])
> print ' deleting %s : %s' % (r, repos_path)
> print ' (skipped; dry run enabled)'
> return
> @@ -334,8 +438,8 @@
> # create a pool for the entire commit
> c_pool = util.svn_pool_create(ctx.pool)
>
> - rev = fs.youngest_rev(t_fs, c_pool)
> - txn = fs.begin_txn(t_fs, rev, c_pool)
> + rev = fs.youngest_rev(t_fs, c_pool)
> + txn = fs.begin_txn(t_fs, rev, c_pool)
> root = fs.txn_root(txn, c_pool)
>
> lastcommit = (None, None)
> @@ -343,10 +447,10 @@
> # create a pool for each file; it will be cleared on each iteration
> f_pool = util.svn_pool_create(c_pool)
>
> - for f, r in self.changes:
> + for f, r, br, tags, branches in self.changes:
> # compute a repository path. ensure we have a leading "/" and drop
> # the ,v from the file name
> - repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
> + repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
> #print 'DEBUG:', repos_path
>
> print ' changing %s : %s' % (r, repos_path)
> @@ -418,10 +522,10 @@
> # remember what we just did, for the next iteration
> lastcommit = (repos_path, r)
>
> - for f, r in self.deletes:
> + for f, r, br, tags, branches in self.deletes:
> # compute a repository path. ensure we have a leading "/" and drop
> # the ,v from the file name
> - repos_path = branch_path(ctx) + relative_name(ctx.cvsroot, f[:-2])
> + repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot, f[:-2])
>
> print ' deleting %s : %s' % (r, repos_path)
>
> @@ -436,7 +540,7 @@
> util.svn_pool_clear(f_pool)
>
> # get the metadata for this commit
> - author, log, date = self.get_metadata(c_pool)
> + author, log, date = self.get_metadata(c_pool, ctx )
> fs.change_txn_prop(txn, 'svn:author', author, c_pool)
> fs.change_txn_prop(txn, 'svn:log', log, c_pool)
>
> @@ -446,6 +550,84 @@
> fs.change_rev_prop(t_fs, new_rev, 'svn:date', date, c_pool)
>
> ### how come conflicts is a newline?
> + if len( conflicts ) > 1:
> + print ' CONFLICTS:', conflicts
> + print ' new revision:', new_rev
> +
> +
> + # don't do tags if we don't need to.
> + dotags = 0
> + for f, r, br, tags, branches in self.changes:
> + dotags += len( tags )
> + dotags += len( branches )
> +
> + if not dotags:
> + util.svn_pool_destroy(c_pool)
> + return
> +
> + # make a new transaction for the tags
> + rev = fs.youngest_rev(t_fs, c_pool)
> + txn = fs.begin_txn(t_fs, rev, c_pool)
> + root = fs.txn_root(txn, c_pool)
> +
> + for f, r, br, tags, branches in self.changes:
> + for tag in tags:
> + tag_path = ctx.tags_base + '/' + tag + '/' +
> relative_name(ctx.cvsroot, f[:-2])
> + repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot,
> f[:-2])
> +
> + print "tagging", tag, "to", tag_path, "from", repos_path
> +
> + t_root = fs.revision_root(t_fs, rev, f_pool);
> +
> + ### hmm. need to clarify OS path separators vs FS path separators
> + dirname = os.path.dirname(tag_path)
> + if dirname != '/':
> + # get the components of the path (skipping the leading '/')
> + parts = string.split(dirname[1:], os.sep)
> + for i in range(1, len(parts) + 1):
> + # reassemble the pieces, adding a leading slash
> + parent_dir = '/' + string.join(parts[:i], '/')
> + if fs.check_path(root, parent_dir, f_pool) == svn_node_none:
> + print ' making dir:', parent_dir
> + fs.make_dir(root, parent_dir, f_pool) ### XXX COPY FROM
> BRANCH?
> +
> + fs.copy(t_root, repos_path, root, tag_path, f_pool)
> +
> + # clear the pool after each copy
> + util.svn_pool_clear(f_pool)
> +
> + for f, r, br, tags, branches in self.changes:
> + for br2 in branches:
> + new_branch_path = branch_path(ctx, br2) +
> relative_name(ctx.cvsroot, f[:-2])
> + repos_path = branch_path(ctx, br) + relative_name(ctx.cvsroot,
> f[:-2])
> +
> + print "branching", r, "to", new_branch_path, "from", repos_path
> +
> + t_root = fs.revision_root(t_fs, rev, f_pool);
> +
> + ### hmm. need to clarify OS path separators vs FS path separators
> + dirname = os.path.dirname(new_branch_path)
> + if dirname != '/':
> + # get the components of the path (skipping the leading '/')
> + parts = string.split(dirname[1:], os.sep)
> + for i in range(1, len(parts) + 1):
> + # reassemble the pieces, adding a leading slash
> + parent_dir = '/' + string.join(parts[:i], '/')
> + if fs.check_path(root, parent_dir, f_pool) == svn_node_none:
> + print ' making dir:', parent_dir
> + fs.make_dir(root, parent_dir, f_pool) ### XXX COPY FROM
> BRANCH?
> +
> + fs.copy(t_root, repos_path, root, new_branch_path, f_pool)
> +
> + # clear the pool after each copy
> + util.svn_pool_clear(f_pool)
> +
> + for f, r, br, tags, branches in self.deletes:
> + for br2 in branches:
> + new_branch_path = branch_path(ctx, br2) +
> relative_name(ctx.cvsroot, f[:-2])
> + print "file:", f, "created on branch:", br2, "revision:", r
> +
> + conflicts, new_rev = fs.commit_txn(txn)
> if conflicts != '\n':
> print ' CONFLICTS:', `conflicts`
> print ' new revision:', new_rev
> @@ -486,15 +668,39 @@
> resync[digest] = [ [t1_l, t1_u, t2] ]
> return resync
>
> -def parse_revs_line(line):
> - timestamp = int(line[:8], 16)
> - id = line[9:DIGEST_END_IDX]
> - op = line[DIGEST_END_IDX + 1]
> - idx = string.find(line, ' ', DIGEST_END_IDX + 3)
> - rev = line[DIGEST_END_IDX+3:idx]
> - fname = line[idx+1:-1]
> +def write_revs_line(output,
> + timestamp, digest, op, revision, fname,
> + branch_name, tags, branches):
> + if not branch_name:
> + branch_name = "*"
> + output.write('%08lx %s %s %s %s' % (timestamp, digest,
> + op, revision, branch_name))
> + output.write(' %d ' % (len(tags)));
> + for tag in tags:
> + output.write('%s ' % (tag));
> + output.write('%d ' % (len(branches)));
> + for branch in branches:
> + output.write('%s ' % (branch));
> + output.write('%s\n' % fname);
>
> - return timestamp, id, op, rev, fname
> +def parse_revs_line(line):
> + data = line.split(' ', 6)
> + ##print "DATA", repr(data)
> + timestamp = int(data[0], 16)
> + id = data[1]
> + op = data[2]
> + rev = data[3]
> + branch_name = data[4]
> + if branch_name == "*":
> + branch_name = None
> + ntags = int(data[5])
> + tags = data[6].split(' ', ntags + 1)
> + nbranches = int(tags[ntags])
> + branches = tags[ntags + 1].split(' ', nbranches + 1)
> + fname = branches[nbranches][:-1]
> + tags = tags[:ntags]
> + branches = branches[:nbranches]
> + return timestamp, id, op, rev, fname, branch_name, tags, branches
>
>
> def pass1(ctx):
> @@ -519,7 +725,9 @@
>
> # process the revisions file, looking for items to clean up
> for line in fileinput.FileInput(ctx.log_fname_base + REVS_SUFFIX):
> - timestamp, digest, op, rev, fname = parse_revs_line(line)
> + timestamp, digest, op, rev, fname, branch_name, tags, branches = \
> + parse_revs_line(line)
> +
> if not resync.has_key(digest):
> output.write(line)
> continue
> @@ -529,8 +737,9 @@
> for record in resync[digest]:
> if record[0] <= timestamp <= record[1]:
> # bingo! remap the time on this (record[2] is the new time).
> - output.write('%08lx %s %s %s %s\n'
> - % (record[2], digest, op, rev, fname))
> + write_revs_line(output,
> + record[2], digest, op, rev, fname, branch_name,
> + tags, branches)
>
> print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
> % (relative_name(ctx.cvsroot, fname),
> @@ -568,18 +777,13 @@
> count = 0
>
> for line in fileinput.FileInput(ctx.log_fname_base + SORTED_REVS_SUFFIX):
> - timestamp, id, op, rev, fname = parse_revs_line(line)
> -
> - ### only handle changes on the trunk for now
> - if not trunk_rev.match(rev):
> - ### technically, the timestamp on this could/should cause a flush.
> - ### don't worry about it; the next item will handle it
> - continue
> + timestamp, id, op, rev, fname, branch_name, tags, branches = \
> + parse_revs_line(line)
>
> # scan for commits to process
> process = [ ]
> for scan_id, scan_c in commits.items():
> - if scan_c.t_max + COMMIT_THRESHOLD < timestamp:
> + if scan_c.ready_to_process( timestamp, fname ):
> process.append((scan_c.t_max, scan_c))
> del commits[scan_id]
>
> @@ -588,13 +792,16 @@
> for t_max, c in process:
> c.commit(t_fs, ctx)
> count = count + len(process)
> + process = []
>
> # add this item into the set of commits we're assembling
> if commits.has_key(id):
> c = commits[id]
> else:
> c = commits[id] = Commit()
> - c.add(timestamp, op, fname, rev)
> + c.add(timestamp, op, fname, rev, branch_name, tags, branches)
> +
> + print 'flushing final commits...'
>
> # if there are any pending commits left, then flush them
> if commits:
> @@ -616,9 +823,11 @@
> pass4,
> ]
>
> +
> class _ctx:
> pass
>
> +
> def convert(pool, ctx, start_pass=1):
> "Convert a CVS repository to an SVN repository."
>
> @@ -700,3 +909,4 @@
>
> if __name__ == '__main__':
> main()
> +
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
> For additional commands, e-mail: dev-help@subversion.tigris.org

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Thu Nov 21 07:50:39 2002

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.