Nuutti Kotivuori <naked@iki.fi> writes:
> Argh! I almost shit my pants when I saw this. I was born into UTF-8
> late, so I catched the latest specification, which mentioned this
> explictly and forbade it's use - and I bought it, hook, line and sink
> - swearing to crusify any parser which didn't error out on sequences
> like this and to do the same to anyone and her whole family if
> something generated sequences like it.
>
> When I think about it objectively - if it's entirely internal to
> Subversion and we control the decoders as well, then who cares, might
> as well do that. It's one alternative.
The other one would be to use 0xfe for non-path-separating slashes and
0xff for embedded NULs (or possibly 0xc0 and 0xc1, respectively), but
I think that would be even more ad-hoc. It would have the upside that
svn_path_internal_style() and svn_path_local_style() don't have to
change the length of the string though.
Here's a proof of concept patch, using 0xfe/0xff notation, which
should take care of the client side. (Maybe the server side as well,
I haven't checked). There still needs to be something in utf.c that
converts the special octets to '/' and NUL on UTF-8 decoding to get
printouts right (could perhaps have been avoided if the 0xc0 0xXX
representaion had been used instead), but that's no biggie. Or should
printouts use local style?
// Marcus
Index: subversion/include/svn_path.h
===================================================================
--- subversion/include/svn_path.h
+++ subversion/include/svn_path.h Thu Aug 29 23:03:07 2002
@@ -44,7 +44,7 @@
void svn_path_internal_style (svn_stringbuf_t *path);
/* Convert PATH from the canonical internal style to the local
style. */
-void svn_path_local_style (svn_stringbuf_t *path);
+svn_error_t *svn_path_local_style (svn_stringbuf_t *path);
/* Join a base path (BASE) with a component (COMPONENT), allocated in POOL.
Index: subversion/libsvn_subr/path.c
===================================================================
--- subversion/libsvn_subr/path.c
+++ subversion/libsvn_subr/path.c Thu Aug 29 23:08:14 2002
@@ -56,9 +56,23 @@
{
/* Convert all local-style separators to the canonical ones. */
char *p;
- for (p = path->data; *p != '\0'; ++p)
+ apr_size_t c;
+ for (p = path->data, c = path->len; c--; ++p)
if (*p == SVN_PATH_LOCAL_SEPARATOR)
*p = SVN_PATH_SEPARATOR;
+ else if (*p == SVN_PATH_SEPARATOR)
+ *(unsigned char *)p = 0xfe;
+ else if (*p == 0)
+ *(unsigned char *)p = 0xff;
+ }
+ else
+ {
+ /* Only need to handle embedded NULs here */
+ char *p;
+ apr_size_t c;
+ for (p = path->data, c = path->len; c--; ++p)
+ if (*p == 0)
+ *(unsigned char *)p = 0xff;
}
svn_path_canonicalize (path);
@@ -66,9 +80,12 @@
}
-void
+svn_error_t *
svn_path_local_style (svn_stringbuf_t *path)
{
+ /* Danger Will Robinson! Upon return, path may contain embedded NULs.
+ Make sure to check for them before using result as cstring. */
+
svn_path_canonicalize (path);
/* FIXME: Should also remove trailing /.'s, if the style says so. */
@@ -76,10 +93,35 @@
{
/* Convert all canonical separators to the local-style ones. */
char *p;
- for (p = path->data; *p != '\0'; ++p)
+ apr_size_t c;
+ for (p = path->data, c = path->len; c--; ++p)
if (*p == SVN_PATH_SEPARATOR)
*p = SVN_PATH_LOCAL_SEPARATOR;
+ else if(*(unsigned char *)p == 0xfe)
+ *p = SVN_PATH_SEPARATOR;
+ else if(*(unsigned char *)p == 0xff)
+ *p = 0;
+ else if(*p == SVN_PATH_LOCAL_SEPARATOR)
+ return svn_error_createf(SVN_ERR_BAD_FILENAME, 0, NULL, path->pool,
+ "Can't use '%c' as regular path character.",
+ SVN_PATH_LOCAL_SEPARATOR);
}
+ else
+ {
+ /* Just convert embedded NULs and check for path separator as
+ regular path character. */
+ char *p;
+ apr_size_t c;
+ for (p = path->data, c = path->len; c--; ++p)
+ if(*(unsigned char *)p == 0xff)
+ *p = 0;
+ else if(*(unsigned char *)p == 0xfe)
+ return svn_error_createf(SVN_ERR_BAD_FILENAME, 0, NULL, path->pool,
+ "Can't use '%c' as regular path character.",
+ SVN_PATH_SEPARATOR);
+ }
+
+ return SVN_NO_ERROR;
}
@@ -890,6 +932,14 @@
svn_stringbuf_appendbytes (retstr, path + copied,
i - copied);
+ /* In case the offending character is in fact an placeholder for
+ '/' or NUL, we don't want to encode placeholder, but rather
+ the escaped character itself. So let's extract it. */
+ if (c == 0xfe)
+ c = SVN_PATH_SEPARATOR;
+ else if(c == 0xff)
+ c = 0;
+
/* Now, sprintf() in our escaped character, making sure our
buffer is big enough to hold the '%' and two digits. We cast
the C to unsigned char here because the 'X' format character
@@ -953,6 +1003,10 @@
digitz[1] = path[++i];
digitz[2] = '\0';
c = (char)(strtol (digitz, NULL, 16));
+ if (c == 0)
+ c = 0xff;
+ else if (c == SVN_PATH_SEPARATOR)
+ c = 0xfe;
}
retstr->data[retstr->len++] = c;
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Thu Aug 29 23:19:24 2002