[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

[PATCH]: Revised cvs2svn.py patches

From: Daniel Berlin <dan_at_dberlin.org>
Date: 2002-02-08 07:25:11 CET

In order to make Greg's life easier in terms of extracting pieces of the
patch, i cut the cvs parser down to only store the author and revision
log, and removed the file format change to store the author (since we get
it elsewhere now).

The only functionality change from last time is that it also now sets the
date on the commits properly (since it gets overwritten when we commit
the transaction, we have to go and change it afterwards).

HTH,
Dan

PS I just noticed that bisect and sink are imported, please ignore them
(since sink is pointless now, and bisect is for something i haven't
submitted yet).

Index: ./cvs2svn.py
===================================================================
--- ./cvs2svn.py
+++ ./cvs2svn.py Thu Feb 7 21:38:23 2002
@@ -2,8 +2,10 @@
 #
 # cvs2svn: ...
 #
-
+import bisect
+import statcache
 import rcsparse
+import sink
 import os
 import sys
 import sha
@@ -12,8 +14,12 @@
 import fileinput
 import string
 import getopt
-
-
+import Cache
+from svn import fs, _util, _delta
+_util.apr_initialize()
+pool = _util.svn_pool_create(None)
+fspool = _util.svn_pool_create(None)
+logcache = Cache.Cache(size=50)
 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
 
 DATAFILE = 'cvs2svn-data'
@@ -26,7 +32,7 @@
 SVNROOT = 'svnroot'
 ATTIC = os.sep + 'Attic'
 
-COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
+COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
 
 OP_DELETE = 'D'
 OP_CHANGE = 'C'
@@ -99,8 +105,8 @@
           # shove the previous revision back in time (and any before it that
           # may need to shift).
           while t_p >= t_c:
- self.rev_data[prev][0] = t_c - 1 # new timestamp
- self.rev_data[prev][3] = t_p # old timestamp
+ self.rev_data[prev][0] = t_c - 1 # new timestamp
+ self.rev_data[prev][3] = t_p # old timestamp
 
             print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
                   % (relative_name(self.cvsroot, self.fname),
@@ -110,7 +116,7 @@
             prev = self.prev[current]
             if not prev:
               break
- t_c = t_c - 1 # self.rev_data[current][0]
+ t_c = t_c - 1 # self.rev_data[current][0]
             t_p = self.rev_data[prev][0]
 
           # break from the for-loop
@@ -129,7 +135,8 @@
       self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
 
     self.revs.write('%08lx %s %s %s %s\n' % (timestamp, digest,
- op, revision, self.fname))
+ op, revision,
+ self.fname))
 
 def relative_name(cvsroot, fname):
   l = len(cvsroot)
@@ -140,7 +147,7 @@
   return l
 
 def visit_file(arg, dirname, files):
- cd, p, stats = arg
+ cd, stats = arg
   for fname in files:
     if fname[-2:] != ',v':
       continue
@@ -151,15 +158,59 @@
       cd.set_fname(pathname)
     if verbose:
       print pathname
- p.parse(open(pathname), cd)
+ rcsparse.Parser().parse(open(pathname), cd)
     stats[0] = stats[0] + 1
 
+class RevInfoParser(rcsparse.Sink):
+
+ def __init__(self):
+ self.Reset()
+ def Reset(self):
+ self.revision_author = {}
+ self.revision_log = {}
+ def define_revision(self, revision, timestamp, author, state,
+ branches, next):
+ # save author
+ self.revision_author[revision] = author
+
+
+ # Construct associative arrays containing info about individual revisions.
+ #
+ # The following associative arrays are created, keyed by revision number:
+ # revision_log -- log message
+ def set_revision_info(self, revision, log, text):
+ self.revision_log[revision] = log
+
+ def parse_cvs_file(self, rcs_pathname, opt_rev = None, opt_m_timestamp = None):
+ # Args in: opt_rev - requested revision
+ # opt_m - time since modified
+ # Args out: revision_map
+ # timestamp
+ # revision_deltatext
+
+ # CheckHidden(rcs_pathname);
+ try:
+ rcsfile = open(rcs_pathname, 'r')
+ except:
+ try:
+ rcs_pathname = os.path.join(os.path.split(rcs_pathname)[0],
+ "Attic", os.path.split(rcs_pathname)[1])
+ rcsfile = open(rcs_pathname, 'r')
+ except:
+ raise RuntimeError, ('error: %s appeared to be under CVS control, '
+ + 'but the RCS file is inaccessible.') % rcs_pathname
+
+ rcsparse.Parser().parse(rcsfile, self)
+ rcsfile.close()
+
 class BuildRevision(rcsparse.Sink):
   def __init__(self, rev, get_metadata=0):
     self.rev = rev
     self.get_metadata = get_metadata
     self.result = None
-
+ self.prev_delta = {}
+ self.d_command = re.compile("^d(\d+)\s+(\d+)")
+ self.a_command = re.compile("^a(\d+)\s+(\d+)")
   def define_revision(self, revision, timestamp, author, state,
                       branches, next):
     for branch in branches:
@@ -177,7 +228,6 @@
       revision = self.prev_delta.get(revision)
     path.reverse()
     self.collect = path
-
   def set_revision_info(self, revision, log, text):
     if not self.collect:
       # nothing more to do
@@ -200,7 +250,7 @@
     else:
       adjust = 0
       diffs = string.split(text, '\n')
-
+ add_lines_remaining = 0
       for command in diffs:
         if add_lines_remaining > 0:
           # Insertion lines from a prior "a" command
@@ -223,6 +273,7 @@
             count = string.atoi(amatch.group(2))
             add_lines_remaining = count
           else:
+ print "Diff commands:%s Current: %s" % (diffs, command)
             raise RuntimeError, 'Error parsing diff commands'
 
 class Commit:
@@ -240,20 +291,117 @@
       self.t_max = t
 
     if op == OP_CHANGE:
- self.changes.append((file, rev))
+ self.changes.append((file[0:-2], rev))
     else:
       # OP_DELETE
- self.deletes.append((file, rev))
+ self.deletes.append((file[0:-2], rev))
 
   def commit(self):
     # commit this transaction
     print 'committing: %s, over %d seconds' % (time.ctime(self.t_min),
                                                self.t_max - self.t_min)
+ rev = fs.youngest_rev(fsob, pool)
+ txn = fs.begin_txn(fsob, rev, pool)
+
+ root = fs.txn_root(txn, pool)
+ lastcommit = (None, None)
     for f, r in self.changes:
       print ' changing %s : %s' % (r, f)
+ ps = os.path.split(f)[0]
+ ps = string.split(ps,os.sep)
+ for i in xrange(1, len(ps)+1):
+ if (fs.check_path(root, string.join(ps[0:i],os.sep), pool) == 0):
+ print "Making dir %s" % string.join(ps[0:i],os.sep)
+ fs.make_dir(root, string.join(ps[0:i],os.sep), pool)
+ repofilepath = f
+ if (fs.check_path(root, f, pool) == 0):
+ justmadefile = 1
+ fs.make_file(root, f, pool)
+ else:
+ justmadefile = 0
+ handler, baton = fs.apply_textdelta(root, f, pool)
+
+ f = f + ",v"
+
+ # See if we have a revision and author log for this file in the cache
+ # Otherwise, parse the file with the cvs parser and recache the
+ # log.
+ temptuple = logcache.get(f)
+ if temptuple is None:
+ cvp = RevInfoParser()
+ cvp.parse_cvs_file (f)
+ logcache[f] = (cvp.revision_log, cvp.revision_author)
+ revlog = cvp.revision_log
+ authorlog = cvp.revision_author
+ del cvp
+ else:
+ revlog, authorlog = temptuple
+
+ # Get the real file path to give to co
+ try:
+ statcache.stat (f)
+ except:
+ f = os.path.join(os.path.split(f)[0], "Attic", os.path.split(f)[1])
+ statcache.stat (f)
+
+ # If we just made the file, we can just send a string for the new file,
+ # rather than streaming it.
+ if justmadefile:
+ _delta.svn_txdelta_send_string(os.popen("co -q -p%s %s" %(r, f), "r", 102400).read(), handler, baton, pool)
+ else:
+ # Open the pipe to co
+ infile = os.popen("co -q -p%s %s" % (r, f), "r", 102400)
+
+ # Open a SVN stream for that pipe
+ stream2 = _util.svn_stream_from_stdio (infile, pool)
+
+ # Get the current file contents from the repo, or,
+ # if we have multiple CVS revisions to the same file
+ # being done in this single commit, then get the
+ # contents of the previous revision from co, or
+ # else the delta won't be correct because the contents
+ # in the repo won't have changed yet.
+ if repofilepath == lastcommit[0]:
+ infile2 = os.popen("co -q -p%s %s" % (lastcommit[1], f), "r", 102400)
+ stream1 = _util.svn_stream_from_stdio (infile2, pool)
+ else:
+ stream1 = fs.file_contents (root, repofilepath, pool)
+ txstream = _delta.svn_txdelta(stream1, stream2, pool)
+ _delta.svn_txdelta_send_txstream (txstream, handler, baton, pool)
+ _util.svn_stream_close (stream2)
+ infile.close()
+ if repofilepath == lastcommit[0]:
+ infile2.close()
+
+ # We might as well reset the properties on every change
+ # for right now
+ fs.change_txn_prop (txn, "svn:log", revlog[r], pool)
+ fs.change_txn_prop (txn, "svn:author", authorlog[r], pool)
+ fs.change_txn_prop (txn, "svn:date", _util.svn_time_to_nts(_util.apr_ansi_time_to_apr_time(self.t_max)[1], pool),pool)
+ lastcommit = (repofilepath, r)
+
     for f, r in self.deletes:
       print ' deleting %s : %s' % (r, f)
 
+ # If the file was initially added on a branch, the first mainline
+ # revision will be marked dead, and thus, attempts to delete it will
+ # fail, since it doesn't really exist.
+ if (r != "1.1"):
+ fs.delete(root, f, pool)
+
+ conflicts, new_rev = fs.commit_txn(txn)
+ newdate = _util.svn_time_to_nts(_util.apr_ansi_time_to_apr_time(self.t_max)[1], pool)
+ fs.change_rev_prop (fsob, new_rev, "svn:date", newdate, pool)
+
+ # If we don't clear the pool, we'll continually eat up memory.
+ # This pool only contains objects it's okay to delete. The fs object is
+ # in a different pool.
+ _util.svn_pool_clear (pool)
+
+ if conflicts:
+ print 'conflicts:', conflicts
+ print 'New revision:', new_rev
+
 def read_resync(fname):
   "Read the .resync file into memory."
 
@@ -300,9 +448,8 @@
 
 def pass1(ctx):
   cd = CollectData(ctx.cvsroot, DATAFILE)
- p = rcsparse.Parser()
   stats = [ 0 ]
- os.path.walk(ctx.cvsroot, visit_file, (cd, p, stats))
+ os.path.walk(ctx.cvsroot, visit_file, (cd, stats))
   if ctx.verbose:
     print 'processed', stats[0], 'files'
 
@@ -357,15 +504,16 @@
   # process the logfiles, creating the target
   commits = { }
   count = 0
-
   for line in fileinput.FileInput(ctx.log_fname_base + SORTED_REVS_SUFFIX):
     timestamp, id, op, rev, fname = parse_revs_line(line)
-
- if commits.has_key(id):
- c = commits[id]
- else:
- c = commits[id] = Commit()
- c.add(timestamp, op, fname, rev)
+ # Only handle trunk revision commits until branch handling is
+ # finished in the committer
+ if (trunk_rev.match(rev)):
+ if commits.has_key(id):
+ c = commits[id]
+ else:
+ c = commits[id] = Commit()
+ c.add(timestamp, op, fname, rev)
 
     # scan for commits to process
     process = [ ]
@@ -373,12 +521,23 @@
       if c.t_max + COMMIT_THRESHOLD < timestamp:
         process.append((c.t_max, c))
         del commits[id]
-
     process.sort()
     for t_max, c in process:
       c.commit()
     count = count + len(process)
 
+ # I have a repository with all commits occurring within the
+ # first 5 minutes. Thus, none of the commits will be processed
+ # since c.t_max + COMMIT_THRESHOLD is always > timestamp
+ # Check for this by seeing if some commits are still in the commits
+ # list, and if so, commit them
+ if (len(commits) != 0):
+ for id, c in commits.items():
+ process.append((c.t_max, c))
+ process.sort()
+ for t_max, c in process:
+ c.commit()
+ count = count + len(process)
   if ctx.verbose:
     print count, 'commits processed.'
 
@@ -417,11 +576,14 @@
     print ' total:', int(times[len(_passes)] - times[start_pass-1]), 'seconds'
 
 def usage():
- print 'USAGE: %s [-p pass] repository-path' % sys.argv[0]
+ print 'USAGE: %s [-p pass] [ -h db home ] repository-path' % sys.argv[0]
   sys.exit(1)
 
 def main():
- opts, args = getopt.getopt(sys.argv[1:], 'p:v')
+ global fsob
+ db_path = os.curdir
+ _util.apr_initialize()
+ opts, args = getopt.getopt(sys.argv[1:], 'p:h:v')
   if len(args) != 1:
     usage()
   verbose = 0
@@ -435,7 +597,18 @@
         sys.exit(1)
     elif opt == '-v':
       verbose = 1
+ elif opt == '-h':
+ home = value
+ db_path = os.path.join(home, 'db')
+ if not os.path.exists(db_path):
+ db_path = home
+ fsob = fs.new(fspool)
+ fs.open_berkeley(fsob, db_path)
   convert(args[0], start_pass=start_pass, verbose=verbose)
+ fs.close_fs(fsob)
+ _util.apr_terminate()
+
 
 if __name__ == '__main__':
   main()
+

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Sat Oct 21 14:37:05 2006

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.