The patch below fixes issue 1255. Karl asked me to extend the testsuite for
cvs2svn. I have added a shell script to the issue, but I'm having some
difficulty translating that to cross platform portable code within the test suite.
Maybe it's not possible.
Anyway: I'd like to commit this to trunk one of these days.
bye,
Erik.
Log:
[[[
Fix issue #1255: cvs2svn.py generates non-utf8 paths on repositories with
non-ascii characters in filenames.
* subversion/tools/cvs2svn/cvs2svn.py:
(Dumper): Use new utf8_path member for encoding path information written
to the dump file.
(Dumper.utf8_path): New. Encode path name to utf8 or stop cvs2svn.py
on error.
]]]
Index: tools/cvs2svn/cvs2svn.py
===================================================================
--- tools/cvs2svn/cvs2svn.py (revision 8044)
+++ tools/cvs2svn/cvs2svn.py (working copy)
@@ -862,6 +862,7 @@
self.target = ctx.target
self.dump_only = ctx.dump_only
self.dumpfile = None
+ self.path_encoding = ctx.encoding
# If all we're doing here is dumping, we can go ahead and
# initialize our single dumpfile. Else, if we're suppose to
@@ -978,8 +979,22 @@
"\n"
"PROPS-END\n"
"\n"
- "\n" % path)
+ "\n" % self.utf8_path(path))
+ def utf8_path(self, path):
+ """Return UTF-8 encoded 'path' based on ctx.path_encoding."""
+ try:
+ ### Log messages can be converted with 'replace' strategy.
+ ### We can't afford that here.
+ unicode_path = unicode(path, self.path_encoding, 'strict')
+ return unicode_path.encode('utf-8')
+
+ except UnicodeError:
+ print "Unable to convert a path '%s' to internal encoding." % path
+ print "Try rerunning with (for example) '--encoding=latin1'"
+ sys.exit(1)
+
+
def probe_path(self, path):
"""Return true if PATH exists in the youngest tree of the svn
repository, else return None. PATH does not start with '/'."""
@@ -1011,12 +1026,14 @@
'Node-copyfrom-rev: %d\n'
'Node-copyfrom-path: /%s\n'
'\n'
- % (svn_dst_path, change.copyfrom_rev,
svn_src_path))
+ % (self.utf8_path(svn_dst_path),
+ change.copyfrom_rev,
+ self.utf8_path(svn_src_path)))
for ent in change.deleted_entries:
self.dumpfile.write('Node-path: %s\n'
'Node-action: delete\n'
- '\n' % (svn_dst_path + '/' + ent))
+ '\n' % (self.utf8_path(svn_dst_path + '/' +
ent)))
def prune_entries(self, path, expected):
"""Delete any entries in PATH that are not in list EXPECTED.
@@ -1030,7 +1047,7 @@
for ent in change.deleted_entries:
self.dumpfile.write('Node-path: %s\n'
'Node-action: delete\n'
- '\n' % (path + '/' + ent))
+ '\n' % (self.utf8_path(path + '/' + ent)))
def add_or_change_path(self, cvs_path, svn_path, cvs_rev, rcs_file,
tags, branches):
@@ -1079,7 +1096,7 @@
'Node-action: %s\n'
'Prop-content-length: %d\n'
'Text-content-length: '
- % (svn_path, action, props_len))
+ % (self.utf8_path(svn_path), action, props_len))
pos = self.dumpfile.tell()
@@ -1144,11 +1161,10 @@
deleted_path, closed_tags, closed_branches \
= self.repos_mirror.delete_path(svn_path, tags,
branches, prune)
- if deleted_path:
- print ' (deleted %s)' % deleted_path
- self.dumpfile.write('Node-path: %s\n'
- 'Node-action: delete\n'
- '\n' % deleted_path)
+
+ self.dumpfile.write('Node-path: %s\n'
+ 'Node-action: delete\n'
+ '\n' % self.utf8_path(deleted_path))
return deleted_path, closed_tags, closed_branches
def close(self):
@@ -1844,7 +1860,7 @@
print " date: '%s'" % date
for rcs_file, cvs_rev, br, tags, branches in self.changes:
print " rev %s of '%s'" % (cvs_rev, rcs_file)
- print 'Try rerunning with (for example) \"--encoding=latin1\".'
+ print "Try rerunning with (for example) '--encoding=latin1'."
sys.exit(1)
# Tells whether we actually wrote anything to the dumpfile.
--
+++ GMX - die erste Adresse für Mail, Message, More +++
Neu: Preissenkung für MMS und FreeMMS! http://www.gmx.net
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Sat Dec 20 20:09:51 2003