Diffs to cvs2svn.py

From: Daniel Berlin <dan_at_dberlin.org>
Date: 2002-02-03 04:10:42 CET

One new file (The LRU cache module, which isn't mine, and i just noticed
has no author name in it, i'll add proper attribution later), and a diff.

The bindings diffs are coming up next.

For those interested in speed (which is, of course, probably the top
concern), pass time for 1, 2, and 3 could be sped up by a faster rcs
parser.

Pass 4 can be sped up by using the faster rcs parser to get at the
revision log messages (though this is only a constant factor speedup,
since the cvsparser it's using it currently specifically ignoring the
deltatext, and the cache hit rate is very high, unless you make
completely random changes to your repository :P), and probably moreso by
not forking to co, which would mean rewriting a revision extractor in
c/swigging it, and/or using the reverse application i suggested so we don't get O(n^2)
behavior. Forking to co still gives us O(n^2) behavior, it's just making
the constant a whole lot lower. You'd have to reverse apply the diff
commands to get O(n) behavior. I don't know if the constant in doing
this in python would be low enough to beat out co, i'll investigate.

This has been tested on some homemade repositories (which is how i found
the leftover commit bug), and gcc's repository.

--Dan

Index: ./cvs2svn.py
===================================================================
--- ./cvs2svn.py
+++ ./cvs2svn.py Sat Feb 2 22:06:18 2002
@@ -2,7 +2,7 @@
#
# cvs2svn: ...
#
-
+import statcache
import rcsparse
import os
import sys
@@ -12,8 +12,13 @@
import fileinput
import string
import getopt
+import Cache
+from svn import fs, _util, _delta

-
+_util.apr_initialize()
+pool = _util.svn_pool_create(None)
+fspool = _util.svn_pool_create(None)
+logcache = Cache.Cache(size=50)
trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')

DATAFILE = 'cvs2svn-data'
@@ -26,7 +31,7 @@
SVNROOT = 'svnroot'
ATTIC = os.sep + 'Attic'

-COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
+COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs

OP_DELETE = 'D'
OP_CHANGE = 'C'
@@ -99,8 +104,8 @@
           # shove the previous revision back in time (and any before it that
           # may need to shift).
           while t_p >= t_c:
- self.rev_data[prev][0] = t_c - 1 # new timestamp
- self.rev_data[prev][3] = t_p # old timestamp
+ self.rev_data[prev][0] = t_c - 1 # new timestamp
+ self.rev_data[prev][3] = t_p # old timestamp

             print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
                   % (relative_name(self.cvsroot, self.fname),
@@ -110,7 +115,7 @@
             prev = self.prev[current]
             if not prev:
               break
- t_c = t_c - 1 # self.rev_data[current][0]
+ t_c = t_c - 1 # self.rev_data[current][0]
             t_p = self.rev_data[prev][0]

           # break from the for-loop
@@ -128,8 +133,10 @@
       # for this time and log message.
       self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))

- self.revs.write('%08lx %s %s %s %s\n' % (timestamp, digest,
- op, revision, self.fname))
+ self.revs.write('%08lx %s %s %s %s %s\n' % (timestamp, digest,
+ op, revision,
+ string.strip(author),
+ self.fname))

def relative_name(cvsroot, fname):
   l = len(cvsroot)
@@ -140,7 +147,7 @@
   return l

def visit_file(arg, dirname, files):
- cd, p, stats = arg
+ cd, stats = arg
   for fname in files:
     if fname[-2:] != ',v':
       continue
@@ -151,15 +158,405 @@
       cd.set_fname(pathname)
     if verbose:
       print pathname
- p.parse(open(pathname), cd)
+ rcsparse.Parser().parse(open(pathname), cd)
     stats[0] = stats[0] + 1

+class CVSParser(rcsparse.Sink):
+ # Precompiled regular expressions
+ trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
+ last_branch = re.compile('(.*)\\.[0-9]+')
+ is_branch = re.compile('(.*)\\.0\\.([0-9]+)')
+ d_command = re.compile('^d(\d+)\\s(\\d+)')
+ a_command = re.compile('^a(\d+)\\s(\\d+)')
+
+ SECONDS_PER_DAY = 86400
+
+ def __init__(self):
+ self.Reset()
+ def Reset(self):
+ self.last_revision = {}
+ self.prev_revision = {}
+ self.revision_date = {}
+ self.revision_author = {}
+ self.revision_branches = {}
+ self.next_delta = {}
+ self.prev_delta = {}
+ self.tag_revision = {}
+ self.revision_symbolic_name = {}
+ self.timestamp = {}
+ self.revision_ctime = {}
+ self.revision_age = {}
+ self.revision_log = {}
+ self.revision_deltatext = {}
+# self.revision_map = []
+ self.lines_added = {}
+ self.lines_removed = {}
+
+ # Map a tag to a numerical revision number. The tag can be a symbolic
+ # branch tag, a symbolic revision tag, or an ordinary numerical
+ # revision number.
+ def map_tag_to_revision(self, tag_or_revision):
+ try:
+ revision = self.tag_revision[tag_or_revision]
+ match = self.is_branch.match(revision)
+ if match:
+ branch = match.group(1) + '.' + match.group(2)
+ if self.last_revision.get(branch):
+ return self.last_revision[branch]
+ else:
+ return match.group(1)
+ else:
+ return revision
+ except:
+ return ''
+
+ # Construct an ordered list of ancestor revisions to the given
+ # revision, starting with the immediate ancestor and going back
+ # to the primordial revision (1.1).
+ #
+ # Note: The generated path does not traverse the tree the same way
+ # that the individual revision deltas do. In particular,
+ # the path traverses the tree "backwards" on branches.
+ def ancestor_revisions(self, revision):
+ ancestors = []
+ revision = self.prev_revision.get(revision)
+ while revision:
+ ancestors.append(revision)
+ revision = self.prev_revision.get(revision)
+
+ return ancestors
+
+ # Split deltatext specified by rev to each line.
+ def deltatext_split(self, rev):
+ lines = string.split(self.revision_deltatext[rev], '\n')
+ if lines[-1] == '':
+ del lines[-1]
+ return lines
+
+ # Extract the given revision from the digested RCS file.
+ # (Essentially the equivalent of cvs up -rXXX)
+ def extract_revision(self, revision):
+ path = []
+ add_lines_remaining = 0
+ start_line = 0
+ count = 0
+ while revision:
+ path.append(revision)
+ revision = self.prev_delta.get(revision)
+ path.reverse()
+ path = path[1:] # Get rid of head revision
+
+ text = self.deltatext_split(self.head_revision)
+
+ # Iterate, applying deltas to previous revision
+ for revision in path:
+ adjust = 0
+ diffs = self.deltatext_split(revision)
+ self.lines_added[revision] = 0
+ self.lines_removed[revision] = 0
+ lines_added_now = 0
+ lines_removed_now = 0
+
+ for command in diffs:
+ dmatch = self.d_command.match(command)
+ amatch = self.a_command.match(command)
+ if add_lines_remaining > 0:
+ # Insertion lines from a prior "a" command
+ text.insert(start_line + adjust, command)
+ add_lines_remaining = add_lines_remaining - 1
+ adjust = adjust + 1
+ elif dmatch:
+ # "d" - Delete command
+ start_line = string.atoi(dmatch.group(1))
+ count = string.atoi(dmatch.group(2))
+ begin = start_line + adjust - 1
+ del text[begin:begin + count]
+ adjust = adjust - count
+ lines_removed_now = lines_removed_now + count
+ elif amatch:
+ # "a" - Add command
+ start_line = string.atoi(amatch.group(1))
+ count = string.atoi(amatch.group(2))
+ add_lines_remaining = count
+ lines_added_now = lines_added_now + count
+ else:
+ raise RuntimeError, 'Error parsing diff commands'
+
+ self.lines_added[revision] = self.lines_added[revision] + lines_added_now
+ self.lines_removed[revision] = self.lines_removed[revision] + lines_removed_now
+ return text
+
+ def set_head_revision(self, revision):
+ self.head_revision = revision
+
+ def set_principal_branch(self, branch_name):
+ self.principal_branch = branch_name
+
+ def define_tag(self, name, revision):
+ # Create an associate array that maps from tag name to
+ # revision number and vice-versa.
+ self.tag_revision[name] = revision
+
+ ### actually, this is a bit bogus... a rev can have multiple names
+ self.revision_symbolic_name[revision] = name
+
+ def set_comment(self, comment):
+ self.file_description = comment
+
+ def set_description(self, description):
+ self.rcs_file_description = description
+
+ # Construct dicts that represent the topology of the RCS tree
+ # and other arrays that contain info about individual revisions.
+ #
+ # The following dicts are created, keyed by revision number:
+ # self.revision_date -- e.g. "96.02.23.00.21.52"
+ # self.timestamp -- seconds since 12:00 AM, Jan 1, 1970 GMT
+ # self.revision_author -- e.g. "tom"
+ # self.revision_branches -- descendant branch revisions,separated by spaces
+ # e.g. "1.21.4.1 1.21.2.6.1"
+ # self.prev_revision -- revision number of previous *ancestor*
+ # in RCS tree.
+ # Traversal of this array occurs in the direction
+ # of the primordial (1.1) revision.
+ # self.prev_delta -- revision number of previous revision which
+ # forms the basis for the edit commands in
+ # this revision.
+ # This causes the tree to be traversed towards
+ # the trunk when on a branch, and towards the
+ # latest trunk revision when on the trunk.
+ # self.next_delta -- revision number of next "delta". Inverts
+ # prev_delta.
+ #
+ # Also creates self.last_revision, keyed by a branch revision number, which
+ # indicates the latest revision on a given branch,
+ # e.g. self.last_revision{"1.2.8"} == 1.2.8.5
+ def define_revision(self, revision, timestamp, author, state,
+ branches, next):
+ self.tag_revision[revision] = revision
+ branch = self.last_branch.match(revision).group(1)
+ self.last_revision[branch] = revision
+
+ #self.revision_date[revision] = date
+ self.timestamp[revision] = timestamp
+
+ # Pretty print the date string
+ ltime = time.localtime(self.timestamp[revision])
+ formatted_date = time.strftime("%d %b %Y %H:%M", ltime)
+ self.revision_ctime[revision] = formatted_date
+
+ # Save age
+ self.revision_age[revision] = ((time.time() - self.timestamp[revision])
+ / self.SECONDS_PER_DAY)
+
+ # save author
+ self.revision_author[revision] = author
+
+ # ignore the state
+
+ # process the branch information
+ branch_text = ''
+ for branch in branches:
+ self.prev_revision[branch] = revision
+ self.next_delta[revision] = branch
+ self.prev_delta[branch] = revision
+ branch_text = branch_text + branch + ''
+ self.revision_branches[revision] = branch_text
+
+ # process the "next revision" information
+ if next:
+ self.next_delta[revision] = next
+ self.prev_delta[next] = revision
+ is_trunk_revision = self.trunk_rev.match(revision) is not None
+ if is_trunk_revision:
+ self.prev_revision[revision] = next
+ else:
+ self.prev_revision[next] = revision
+
+ # Construct associative arrays containing info about individual revisions.
+ #
+ # The following associative arrays are created, keyed by revision number:
+ # revision_log -- log message
+ # revision_deltatext -- Either the complete text of the revision,
+ # in the case of the head revision, or the
+ # encoded delta between this revision and another.
+ # The delta is either with respect to the successor
+ # revision if this revision is on the trunk or
+ # relative to its immediate predecessor if this
+ # revision is on a branch.
+ def set_revision_info(self, revision, log, text):
+ self.revision_log[revision] = log
+ self.revision_deltatext[revision] = text
+
+ def parse_cvs_file(self, rcs_pathname, opt_rev = None, opt_m_timestamp = None):
+ # Args in: opt_rev - requested revision
+ # opt_m - time since modified
+ # Args out: revision_map
+ # timestamp
+ # revision_deltatext
+
+ # CheckHidden(rcs_pathname);
+ try:
+ rcsfile = open(rcs_pathname, 'r')
+ except:
+ try:
+ rcs_pathname = os.path.join(os.path.split(rcs_pathname)[0],
+ "Attic", os.path.split(rcs_pathname)[1])
+ rcsfile = open(rcs_pathname, 'r')
+ except:
+ raise RuntimeError, ('error: %s appeared to be under CVS control, '
+ + 'but the RCS file is inaccessible.') % rcs_pathname
+
+ rcsparse.Parser().parse(rcsfile, self)
+ rcsfile.close()
+ if opt_rev in [None, '', 'HEAD']:
+ # Explicitly specified topmost revision in tree
+ revision = self.head_revision
+ else:
+ # Symbolic tag or specific revision number specified.
+ revision = self.map_tag_to_revision(opt_rev)
+ if revision == '':
+ raise RuntimeError, 'error: -r: No such revision: ' + opt_rev
+
+ # The primordial revision is not always 1.1! Go find it.
+ primordial = revision
+ while self.prev_revision.get(primordial):
+ primordial = self.prev_revision[primordial]
+
+ # Don't display file at all, if -m option is specified and no
+ # changes have been made in the specified file.
+ if opt_m_timestamp and self.timestamp[revision] < opt_m_timestamp:
+ return ''
+ # Don't deal with deltatext for now, we'll use a pipe to co,
+ # since it's faster.
+ a="""
+ # Figure out how many lines were in the primordial, i.e. version 1.1,
+ # check-in by moving backward in time from the head revision to the
+ # first revision.
+ line_count = 0
+ if self.revision_deltatext.get(self.head_revision):
+ tmp_array = self.deltatext_split(self.head_revision)
+ line_count = len(tmp_array)
+
+ skip = 0
+
+ rev = self.prev_revision.get(self.head_revision)
+ while rev:
+ diffs = self.deltatext_split(rev)
+ for command in diffs:
+ dmatch = self.d_command.match(command)
+ amatch = self.a_command.match(command)
+ if skip > 0:
+ # Skip insertion lines from a prior "a" command
+ skip = skip - 1
+ elif dmatch:
+ # "d" - Delete command
+ start_line = string.atoi(dmatch.group(1))
+ count = string.atoi(dmatch.group(2))
+ line_count = line_count - count
+ elif amatch:
+ # "a" - Add command
+ start_line = string.atoi(amatch.group(1))
+ count = string.atoi(amatch.group(2))
+ skip = count;
+ line_count = line_count + count
+ elif command == "@":
+ continue
+ else:
+ raise RuntimeError, 'error: illegal RCS file'
+
+ rev = self.prev_revision.get(rev)
+
+ # Now, play the delta edit commands *backwards* from the primordial
+ # revision forward, but rather than applying the deltas to the text of
+ # each revision, apply the changes to an array of revision numbers.
+ # This creates a "revision map" -- an array where each element
+ # represents a line of text in the given revision but contains only
+ # the revision number in which the line was introduced rather than
+ # the line text itself.
+ #
+ # Note: These are backward deltas for revisions on the trunk and
+ # forward deltas for branch revisions.
+
+ # Create initial revision map for primordial version.
+ self.revision_map = [primordial] * line_count
+
+ ancestors = [revision, ] + self.ancestor_revisions(revision)
+ ancestors = ancestors[:-1] # Remove "1.1"
+ last_revision = primordial
+ ancestors.reverse()
+ for revision in ancestors:
+ is_trunk_revision = self.trunk_rev.match(revision) is not None
+
+ if is_trunk_revision:
+ diffs = self.deltatext_split(last_revision)
+
+ # Revisions on the trunk specify deltas that transform a
+ # revision into an earlier revision, so invert the translation
+ # of the 'diff' commands.
+ for command in diffs:
+ if skip > 0:
+ skip = skip - 1
+ else:
+ dmatch = self.d_command.match(command)
+ amatch = self.a_command.match(command)
+ if dmatch:
+ start_line = string.atoi(dmatch.group(1))
+ count = string.atoi(dmatch.group(2))
+ temp = []
+ while count > 0:
+ temp.append(revision)
+ count = count - 1
+ self.revision_map = (self.revision_map[:start_line - 1] +
+ temp + self.revision_map[start_line - 1:])
+ elif amatch:
+ start_line = string.atoi(amatch.group(1))
+ count = string.atoi(amatch.group(2))
+ del self.revision_map[start_line:start_line + count]
+ skip = count
+ else:
+ raise RuntimeError, 'Error parsing diff commands'
+
+ else:
+ # Revisions on a branch are arranged backwards from those on
+ # the trunk. They specify deltas that transform a revision
+ # into a later revision.
+ adjust = 0
+ diffs = self.deltatext_split(revision)
+ for command in diffs:
+ if skip > 0:
+ skip = skip - 1
+ else:
+ dmatch = self.d_command.match(command)
+ amatch = self.a_command.match(command)
+ if dmatch:
+ start_line = string.atoi(dmatch.group(1))
+ count = string.atoi(dmatch.group(2))
+ del self.revision_map[start_line + adjust - 1:start_line + adjust - 1 + count]
+ adjust = adjust - count
+ elif amatch:
+ start_line = string.atoi(amatch.group(1))
+ count = string.atoi(amatch.group(2))
+ skip = count
+ temp = []
+ while count > 0:
+ temp.append(revision)
+ count = count - 1
+ self.revision_map = (self.revision_map[:start_line + adjust] +
+ temp + self.revision_map[start_line + adjust:])
+ adjust = adjust + skip
+ else:
+ raise RuntimeError, 'Error parsing diff commands'
+ last_revision = revision """
+ return revision
class BuildRevision(rcsparse.Sink):
   def __init__(self, rev, get_metadata=0):
     self.rev = rev
     self.get_metadata = get_metadata
     self.result = None
-
+ self.prev_delta = {}
+ self.d_command = re.compile("^d(\d+)\s+(\d+)")
+ self.a_command = re.compile("^a(\d+)\s+(\d+)")
   def define_revision(self, revision, timestamp, author, state,
                       branches, next):
     for branch in branches:
@@ -177,7 +574,6 @@
       revision = self.prev_delta.get(revision)
     path.reverse()
     self.collect = path
-
   def set_revision_info(self, revision, log, text):
     if not self.collect:
       # nothing more to do
@@ -200,7 +596,7 @@
     else:
       adjust = 0
       diffs = string.split(text, '\n')
-
+ add_lines_remaining = 0
       for command in diffs:
         if add_lines_remaining > 0:
           # Insertion lines from a prior "a" command
@@ -223,6 +619,7 @@
             count = string.atoi(amatch.group(2))
             add_lines_remaining = count
           else:
+ print "Diff commands:%s Current: %s" % (diffs, command)
             raise RuntimeError, 'Error parsing diff commands'

class Commit:
@@ -232,7 +629,7 @@
     self.t_min = 1<<30
     self.t_max = 0

- def add(self, t, op, file, rev):
+ def add(self, t, op, file, author, rev):
     # record the time range of this commit
     if t < self.t_min:
       self.t_min = t
@@ -240,20 +637,114 @@
       self.t_max = t

     if op == OP_CHANGE:
- self.changes.append((file, rev))
+ self.changes.append((file[0:-2], rev, author))
     else:
       # OP_DELETE
- self.deletes.append((file, rev))
+ self.deletes.append((file[0:-2], rev, author))

   def commit(self):
     # commit this transaction
     print 'committing: %s, over %d seconds' % (time.ctime(self.t_min),
                                                self.t_max - self.t_min)
- for f, r in self.changes:
+ rev = fs.youngest_rev(fsob, pool)
+ txn = fs.begin_txn(fsob, rev, pool)
+
+ root = fs.txn_root(txn, pool)
+ lastcommit = (None, None)
+ for f, r, author in self.changes:
       print ' changing %s : %s' % (r, f)
- for f, r in self.deletes:
+ ps = os.path.split(f)[0]
+ ps = string.split(ps,os.sep)
+ for i in xrange(1, len(ps)+1):
+ if (fs.check_path(root, string.join(ps[0:i],os.sep), pool) == 0):
+ print "Making dir %s" % string.join(ps[0:i],os.sep)
+ fs.make_dir(root, string.join(ps[0:i],os.sep), pool)
+ repofilepath = f
+ if (fs.check_path(root, f, pool) == 0):
+ justmadefile = 1
+ fs.make_file(root, f, pool)
+ else:
+ justmadefile = 0
+ handler, baton = fs.apply_textdelta(root, f, pool)
+
+ f = f + ",v"
+
+ # See if we have a revision log for this file in the cache
+ # Otherwise, parse the file with the cvs parser and recache the
+ # log.
+ revlog = logcache.get(f)
+
+ if revlog is None:
+ cvp = CVSParser()
+ cvp.parse_cvs_file (f)
+ logcache[f]=cvp.revision_log
+ revlog=cvp.revision_log
+ del cvp
+
+ # Get the real file path to give to co
+ try:
+ statcache.stat (f)
+ except:
+ f = os.path.join(os.path.split(f)[0], "Attic", os.path.split(f)[1])
+ statcache.stat (f)
+
+ # If we just made the file, we can just send a string for the new file,
+ # rather than streaming it.
+ if justmadefile:
+ _delta.svn_txdelta_send_string(os.popen("co -q -p%s %s" %(r, f), "r", 102400).read(), handler, baton, pool)
+ else:
+ # Open the pipe to co
+ infile = os.popen("co -q -p%s %s" % (r, f), "r", 102400)
+
+ # Open a SVN stream for that pipe
+ stream2 = _util.svn_stream_from_stdio (infile, pool)
+
+ # Get the current file contents from the repo, or,
+ # if we have multiple CVS revisions to the same file
+ # being done in this single commit, then get the
+ # contents of the previous revision from co, or
+ # else the delta won't be correct because the contents
+ # in the repo won't have changed yet.
+ if repofilepath == lastcommit[0]:
+ infile2 = os.popen("co -q -p%s %s" % (lastcommit[1], f), "r", 102400)
+ stream1 = _util.svn_stream_from_stdio (infile, pool)
+ else:
+ stream1 = fs.file_contents (root, repofilepath, pool)
+ txstream = _delta.svn_txdelta(stream1, stream2, pool)
+ _delta.svn_txdelta_send_txstream (txstream, handler, baton, pool)
+ _util.svn_stream_close (stream2)
+ infile.close()
+ if repofilepath == lastcommit[0]:
+ infile2.close()
+
+ # We might as well reset the properties on every change
+ # for right now
+ fs.change_txn_prop (txn, "svn:log", revlog[r], pool)
+ fs.change_txn_prop (txn, "svn:author", author, pool)
+
+ lastcommit = (repofilepath, r)
+
+ for f, r, author in self.deletes:
       print ' deleting %s : %s' % (r, f)

+ # If the file was initially added on a branch, the first mainline
+ # revision will be marked dead, and thus, attempts to delete it will
+ # fail, since it doesn't really exist.
+ if (r != "1.1"):
+ fs.delete(root, f, pool)
+ fs.change_txn_prop (txn, "svn:author", author, pool)
+
+ conflicts, new_rev = fs.commit_txn(txn)
+
+ # If we don't clear the pool, we'll continually eat up memory.
+ # This pool only contains objects it's okay to delete. The fs object is
+ # in a different pool.
+ _util.svn_pool_clear (pool)
+
+ if conflicts:
+ print 'conflicts:', conflicts
+ print 'New revision:', new_rev
+
def read_resync(fname):
   "Read the .resync file into memory."

@@ -293,16 +784,17 @@
   op = line[DIGEST_END_IDX + 1]
   idx = string.find(line, ' ', DIGEST_END_IDX + 3)
   rev = line[DIGEST_END_IDX+3:idx]
- fname = line[idx+1:-1]
+ idx2 = string.find(line, ' ', idx+1)
+ author = line[idx:idx2]
+ fname = line[idx2+1:-1]

- return timestamp, id, op, rev, fname
+ return timestamp, id, op, rev, author, fname

def pass1(ctx):
   cd = CollectData(ctx.cvsroot, DATAFILE)
- p = rcsparse.Parser()
   stats = [ 0 ]
- os.path.walk(ctx.cvsroot, visit_file, (cd, p, stats))
+ os.path.walk(ctx.cvsroot, visit_file, (cd, stats))
   if ctx.verbose:
     print 'processed', stats[0], 'files'

@@ -320,7 +812,7 @@

   # process the revisions file, looking for items to clean up
   for line in fileinput.FileInput(ctx.log_fname_base + REVS_SUFFIX):
- timestamp, digest, op, rev, fname = parse_revs_line(line)
+ timestamp, digest, op, rev, author, fname = parse_revs_line(line)
     if not resync.has_key(digest):
       output.write(line)
       continue
@@ -330,8 +822,9 @@
     for record in resync[digest]:
       if record[0] <= timestamp <= record[1]:
         # bingo! remap the time on this (record[2] is the new time).
- output.write('%08lx %s %s %s %s\n'
- % (record[2], digest, op, rev, fname))
+ output.write('%08lx %s %s %s %s %s\n'
+ % (record[2], digest, op, rev,
+ string.strip(author), fname))

         print 'RESYNC: %s (%s) : old time="%s" new time="%s"' \
               % (relative_name(ctx.cvsroot, fname),
@@ -357,15 +850,16 @@
   # process the logfiles, creating the target
   commits = { }
   count = 0
-
   for line in fileinput.FileInput(ctx.log_fname_base + SORTED_REVS_SUFFIX):
- timestamp, id, op, rev, fname = parse_revs_line(line)
-
- if commits.has_key(id):
- c = commits[id]
- else:
- c = commits[id] = Commit()
- c.add(timestamp, op, fname, rev)
+ timestamp, id, op, rev, author, fname = parse_revs_line(line)
+ # Only handle trunk revision commits until branch handling is
+ # finished in the committer
+ if (trunk_rev.match(rev)):
+ if commits.has_key(id):
+ c = commits[id]
+ else:
+ c = commits[id] = Commit()
+ c.add(timestamp, op, fname, author, rev)

     # scan for commits to process
     process = [ ]
@@ -373,12 +867,23 @@
       if c.t_max + COMMIT_THRESHOLD < timestamp:
         process.append((c.t_max, c))
         del commits[id]
-
     process.sort()
     for t_max, c in process:
       c.commit()
     count = count + len(process)

+ # I have a repository with all commits occurring within the
+ # first 5 minutes. Thus, none of the commits will be processed
+ # since c.t_max + COMMIT_THRESHOLD is always > timestamp
+ # Check for this by seeing if some commits are still in the commits
+ # list, and if so, commit them
+ if (len(commits) != 0):
+ for id, c in commits.items():
+ process.append((c.t_max, c))
+ process.sort()
+ for t_max, c in process:
+ c.commit()
+ count = count + len(process)
   if ctx.verbose:
     print count, 'commits processed.'

@@ -417,11 +922,14 @@
     print ' total:', int(times[len(_passes)] - times[start_pass-1]), 'seconds'

def usage():
- print 'USAGE: %s [-p pass] repository-path' % sys.argv[0]
+ print 'USAGE: %s [-p pass] [ -h db home ] repository-path' % sys.argv[0]
   sys.exit(1)

def main():
- opts, args = getopt.getopt(sys.argv[1:], 'p:v')
+ global fsob
+ db_path = os.curdir
+ _util.apr_initialize()
+ opts, args = getopt.getopt(sys.argv[1:], 'p:h:v')
   if len(args) != 1:
     usage()
   verbose = 0
@@ -435,7 +943,17 @@
         sys.exit(1)
     elif opt == '-v':
       verbose = 1
+ elif opt == '-h':
+ home = value
+ db_path = os.path.join(home, 'db')
+ if not os.path.exists(db_path):
+ db_path = home
+ fsob = fs.new(fspool)
+ fs.open_berkeley(fsob, db_path)
   convert(args[0], start_pass=start_pass, verbose=verbose)
+ fs.close_fs(fsob)
+ _util.apr_terminate()
+

if __name__ == '__main__':
   main()

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

TEXT/PLAIN attachment: Cache.py

Received on Sat Oct 21 14:37:03 2006

This message: [ Message body ]
Next message: Daniel Berlin: "Re: Things I learned making cvs2svn work today"
Previous message: Kirby C. Bohling: "Re: Memory Consumption..."
Next in thread: Daniel Berlin: "Re: Diffs to cvs2svn.py"
Reply: Daniel Berlin: "Re: Diffs to cvs2svn.py"

Contemporary messages sorted: [ By Date ] [ By Thread ] [ By Subject ] [ By Author ] [ By messages with attachments ]