On Fri, Jan 15, 2021 at 01:44:43AM +0900, Yasuhito FUTATSUKI wrote:
> On 2020/12/29 0:49, Stefan Sperling wrote:
> > On Tue, Dec 15, 2020 at 07:47:59PM +0900, Yasuhito FUTATSUKI wrote:
> >> As far as I read the code again, it seems this is already support
> >> locales other than C/C.UTF-8 if mailer.py is kicked with appropriate
> >> LC_CTYPE environment. The REPOS-PATH argment comes from sys.argv[2]
> >> already decoded in file system encoding (i.e. encoding corresponding
> >> to LC_CTYPE) and Python encodes it if access to file system. On the
> >> other hand, svn.repos.open() wrapper function accepts str and encode
> >> it to UTF-8 string, as the svn_repos_open() C API expects.
> >>
> >> Then I overlooked sys.argv[x] and cmd_args[x] in main() are
> >> already str.
> >>
> >> Perhaps it needs (although the last hunk is not nessesary):
> >
> > Yes, indeed. If python already encodes arguments to strings then this
> > patch looks correct to me. It also works as expected in my testing.
> I revised the patch and checked in UTF-8 locale. Now it seems it works
> with post-revprop-change, post-lock and post-unlock hooks.
>
> If this patch is OK, then I'll do:
>
> (1) Make it support Python 2 again, to merge it to 1.14.x.
> (2) Fix an issue to encode Unicode paths in Subject header.
> (3) Fix an issue of max line length in Subject header.
>
> Perhaps (2) and (3) will be resolved at the same time.
Yes, in my opinion this patch is OK.
The mailer.py test is still passing and your changes look fine to me.
Thank you! My experience with python2/3 conversion is limited, and your
help with this is much appreciated.
Regards,
Stefan
> Cheers,
> --
> Yasuhito FUTATSUKI <futatuki_at_yf.bsclub.org>
> Follow up to r1884427: mailer.py: Additional fix for Python 3 support
>
> * tools/hook-scripts/mailer/mailer.py
> ():
> - Don't include StringIO.
> - import locale for output class 'StandardOutput'.
> (main): Don't decode cmd_args[x], because they are already str.
> (OutputBase.write_binary): New method, as the abstract base class of
> output classes.
> (OutputBase.write): Tweak comment. This method has implementation now.
> (OutputBase.run): Use self.write_binary() to write content of buf.
> (StandardOutput.write): Override default write method to use default
> encoding for console output.
> (PropChange.generate): Use sys.stdin.buffer instead of sys.stdin to read
> from stdin as bytes.
> (Lock.__init__):
> - Use sys.stdin.buffer instead of sys.stdin to read from stdin as bytes.
> - Strip new line character from items of self.dirlist
> (DiffGenerator.__getitem__): Pass change.path as is to svn.fs.FileDiff.
> (TextCommitRenderer.render): Don't decode data.author. data.author is
> already bytes (or None) here.
> (Repository.__init__): Ensure self.author is str or None.
> ((entry routine)): Encode repository path as UTF-8 before canonicalize
> explicitly.
>
> Index: tools/hook-scripts/mailer/mailer.py
> ===================================================================
> --- tools/hook-scripts/mailer/mailer.py (revision 1885374)
> +++ tools/hook-scripts/mailer/mailer.py (working copy)
> @@ -50,10 +50,11 @@
> from urllib.parse import quote as urllib_parse_quote
> import time
> import subprocess
> -from io import BytesIO, StringIO
> +from io import BytesIO
> import smtplib
> import re
> import tempfile
> +import locale
>
> # Minimal version of Subversion's bindings required
> _MIN_SVN_VERSION = [1, 5, 0]
> @@ -88,10 +89,10 @@
> messenger = Commit(pool, cfg, repos)
> elif cmd == 'propchange' or cmd == 'propchange2':
> revision = int(cmd_args[0])
> - author = cmd_args[1].decode('utf-8')
> - propname = cmd_args[2].decode('utf-8')
> + author = cmd_args[1]
> + propname = cmd_args[2]
> if cmd == 'propchange2' and cmd_args[3]:
> - action = cmd_args[3].decode('utf-8')
> + action = cmd_args[3]
> else:
> action = 'A'
> repos = Repository(repos_dir, revision, pool)
> @@ -103,7 +104,7 @@
> })
> messenger = PropChange(pool, cfg, repos, author, propname, action)
> elif cmd == 'lock' or cmd == 'unlock':
> - author = cmd_args[0].decode('utf-8')
> + author = cmd_args[0]
> repos = Repository(repos_dir, 0, pool) ### any old revision will do
> # Override the repos revision author with the author of the lock/unlock
> repos.author = author
> @@ -169,9 +170,13 @@
> representation."""
> raise NotImplementedError
>
> + def write_binary(self, output):
> + """Override this method.
> + Append the binary data OUTPUT to the output representation."""
> + raise NotImplementedError
> +
> def write(self, output):
> - """Override this method.
> - Append the literal text string OUTPUT to the output representation."""
> + """Append the literal text string OUTPUT to the output representation."""
> return self.write_binary(output.encode('utf-8'))
>
> def run(self, cmd):
> @@ -184,7 +189,7 @@
>
> buf = pipe_ob.stdout.read(self._CHUNKSIZE)
> while buf:
> - self.write(buf)
> + self.write_binary(buf)
> buf = pipe_ob.stdout.read(self._CHUNKSIZE)
>
> # wait on the child so we don't end up with a billion zombies
> @@ -360,7 +365,12 @@
> def finish(self):
> pass
>
> + def write(self, output):
> + """Write text as *default* encoding string"""
> + return self.write_binary(output.encode(locale.getpreferredencoding(),
> + 'backslashreplace'))
>
> +
> class PipeOutput(MailedOutput):
> "Deliver a mail message to an MTA via a pipe."
>
> @@ -532,7 +542,7 @@
> elif self.action == 'M':
> self.output.write('Property diff:\n')
> tempfile1 = tempfile.NamedTemporaryFile()
> - tempfile1.write(sys.stdin.read())
> + tempfile1.write(sys.stdin.buffer.read())
> tempfile1.flush()
> tempfile2 = tempfile.NamedTemporaryFile()
> tempfile2.write(self.repos.get_rev_prop(self.propname))
> @@ -594,9 +604,8 @@
> or 'unlock_subject_prefix'))
>
> # read all the locked paths from STDIN and strip off the trailing newlines
> - self.dirlist = [x for x in sys.stdin.readlines()]
> - for i in range(len(self.dirlist)):
> - self.dirlist[i] = self.dirlist[i].decode('utf-8')
> + self.dirlist = [x.decode('utf-8').rstrip()
> + for x in sys.stdin.buffer.readlines()]
>
> # collect the set of groups and the unique sets of params for the options
> self.groups = { }
> @@ -910,7 +919,7 @@
> kind = 'C'
> if self.diffsels.copy:
> diff = svn.fs.FileDiff(None, None, self.repos.root_this,
> - change.path.decode('utf-8'), self.pool)
> + change.path, self.pool)
> label1 = '/dev/null\t00:00:00 1970\t' \
> '(empty, because file is newly added)'
> label2 = '%s\t%s\t(r%s, copy of r%s, %s)' \
> @@ -1092,7 +1101,7 @@
>
> w = self.output.write
>
> - w('Author: %s\nDate: %s\nNew Revision: %s\n' % (data.author.decode('utf-8'),
> + w('Author: %s\nDate: %s\nNew Revision: %s\n' % (data.author,
> data.date,
> data.rev))
>
> @@ -1221,6 +1230,8 @@
> self.root_this = self.get_root(rev)
>
> self.author = self.get_rev_prop(svn.core.SVN_PROP_REVISION_AUTHOR)
> + if self.author is not None:
> + self.author = self.author.decode('utf-8')
>
> def get_rev_prop(self, propname, rev = None):
> if not rev:
> @@ -1510,7 +1521,7 @@
> usage()
>
> cmd = sys.argv[1]
> - repos_dir = svn.core.svn_path_canonicalize(sys.argv[2])
> + repos_dir = svn.core.svn_path_canonicalize(sys.argv[2].encode('utf-8'))
> repos_dir = repos_dir.decode('utf-8')
> try:
> expected_args = cmd_list[cmd]
Received on 2021-01-14 23:30:31 CET