Let's discuss about unicode compositions for filenames!
From: Hiroaki Nakamura <hnakamur_at_gmail.com>
Date: Sun, 29 Jan 2012 19:38:44 +0900
Hi folks!
I read the note about unicode compositions for filenames
First, for me, the short term solution (4) seems too difficult to implement.
It is simple. We convert all input paths into the 'normal' normal form (NFC),
I made a quick-and-dirty proof-of-concept patch for the further discussion.
If you run apache + mod_dav_svn with this patch,
This patch has following limitations right now but we can fix them.
To test this patch, please do the steps below.
(1) build and install utf8proc
wget http://www.public-software-group.org/pub/projects/utf8proc/v1.1.5/utf8proc-v1.1.5.tar.gz
(2) build Subversion 1.7.2 with this patch.
One thing I'd like to discuss is how we link to utf8proc.
The option (1) needs the utf8proc package to be created for each OS distribution
Am I on the right track?
-- )Hiroaki Nakamura) hnakamur_at_gmail.com ==== subversion-1.7.2-NFC.diff diff -ruN subversion-1.7.2.orig/subversion/include/svn_utf.h subversion-1.7.2/subversion/include/svn_utf.h --- subversion-1.7.2.orig/subversion/include/svn_utf.h 2009-11-17 04:07:17.000000000 +0900 +++ subversion-1.7.2/subversion/include/svn_utf.h 2012-01-29 11:54:20.150665621 +0900 @@ -220,6 +220,14 @@ const svn_string_t *src, apr_pool_t *pool); +/** Set @a *dest to a NFC canonicalized C string from string @a src; + * allocate @a *dest in @a pool. + */ +svn_error_t * +svn_utf_cstring_NFC(const char **dest, + const char *src, + apr_pool_t *pool); + #ifdef __cplusplus } #endif /* __cplusplus */ diff -ruN subversion-1.7.2.orig/subversion/libsvn_subr/path.c subversion-1.7.2/subversion/libsvn_subr/path.c --- subversion-1.7.2.orig/subversion/libsvn_subr/path.c 2011-01-18 06:45:39.000000000 +0900 +++ subversion-1.7.2/subversion/libsvn_subr/path.c 2012-01-29 18:01:06.900398904 +0900 @@ -1119,15 +1119,17 @@ const char *path_apr, apr_pool_t *pool) { + char *path_nfc; + SVN_ERR(svn_utf_cstring_NFC(&path_nfc, path_apr, pool)); svn_boolean_t path_is_utf8; SVN_ERR(get_path_encoding(&path_is_utf8, pool)); if (path_is_utf8) { - *path_utf8 = apr_pstrdup(pool, path_apr); + *path_utf8 = apr_pstrdup(pool, path_nfc); return SVN_NO_ERROR; } else - return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool); + return svn_utf_cstring_to_utf8(path_utf8, path_nfc, pool); } diff -ruN subversion-1.7.2.orig/subversion/libsvn_subr/utf.c subversion-1.7.2/subversion/libsvn_subr/utf.c --- subversion-1.7.2.orig/subversion/libsvn_subr/utf.c 2011-08-24 00:04:38.000000000 +0900 +++ subversion-1.7.2/subversion/libsvn_subr/utf.c 2012-01-29 17:55:33.643895922 +0900 @@ -42,6 +42,7 @@ #include "private/svn_utf_private.h" #include "private/svn_dep_compat.h" #include "private/svn_string_private.h" +#include "utf8proc.h" @@ -1029,3 +1030,58 @@ return err; } + +static ssize_t svn_utf_map( + const uint8_t *str, ssize_t len, uint8_t **dstptr, int options, + apr_pool_t *pool +) { + int32_t *buffer; + ssize_t result; + *dstptr = NULL; + /* We use svn_utf_map only for conversion from NFC/NFD to NFC. + * NFC is the most compact form of the two (NFC and NFD). + * So the result buffer length never exceeds the source string length. + * Therefore we first allocate the result buffer and run decompose only once. + */ + if (options & UTF8PROC_NULLTERM) + len = strlen(str); + buffer = apr_palloc(pool, len * sizeof(int32_t) + 1); + result = utf8proc_decompose(str, len, buffer, len, options); + if (result < 0) + return result; + if (result > len) + { + /* We never reach here when converting to NFC. */ + buffer = apr_palloc(pool, result * sizeof(int32_t) + 1); + result = utf8proc_decompose(str, len, buffer, result, options); + if (result < 0) + return result; + } + result = utf8proc_reencode(buffer, result, options); + if (result < 0) + return result; + /* We don't shrink the result buffer because: + * - the buffer will be short lived and freed at the end of the transaction. + * - APR does not have realloc and free API, so if we ever allocate another + * buffer, we use more memory. + */ + *dstptr = (uint8_t *)buffer; + return result; +} + +svn_error_t * +svn_utf_cstring_NFC(const char **dest, + const char *src, + apr_pool_t *pool) +{ + ssize_t ret = svn_utf_map(src, 0, dest, + UTF8PROC_NULLTERM | UTF8PROC_STABLE | + UTF8PROC_COMPOSE, + pool); + if (ret < 0) + { + /* TODO: implement error handling. */ + return SVN_NO_ERROR; + } + return SVN_NO_ERROR; +} diff -ruN subversion-1.7.2.orig/subversion/mod_dav_svn/repos.c subversion-1.7.2/subversion/mod_dav_svn/repos.c --- subversion-1.7.2.orig/subversion/mod_dav_svn/repos.c 2011-11-29 01:12:28.000000000 +0900 +++ subversion-1.7.2/subversion/mod_dav_svn/repos.c 2012-01-29 18:00:50.166648215 +0900 @@ -48,6 +48,7 @@ #include "mod_dav_svn.h" #include "svn_ra.h" /* for SVN_RA_CAPABILITY_* */ #include "svn_dirent_uri.h" +#include "svn_utf.h" #include "private/svn_log.h" #include "private/svn_fspath.h" @@ -2672,6 +2673,7 @@ dav_stream **stream) { svn_node_kind_t kind; + char *repos_path; dav_error *derr; svn_error_t *serr; @@ -2694,19 +2696,28 @@ } #endif + serr = svn_utf_cstring_NFC(&repos_path, resource->info->repos_path, + resource->pool); + if (serr != NULL) + { + return dav_svn__convert_err(serr, HTTP_INTERNAL_SERVER_ERROR, + "Could not canonicalize filename to NFC.", + resource->pool); + } + /* start building the stream structure */ *stream = apr_pcalloc(resource->pool, sizeof(**stream)); (*stream)->res = resource; derr = fs_check_path(&kind, resource->info->root.root, - resource->info->repos_path, resource->pool); + repos_path, resource->pool); if (derr != NULL) return derr; if (kind == svn_node_none) /* No existing file. */ { serr = svn_fs_make_file(resource->info->root.root, - resource->info->repos_path, + repos_path, resource->pool); if (serr != NULL) @@ -2730,7 +2741,7 @@ serr = svn_fs_node_prop(&mime_type, resource->info->root.root, - resource->info->repos_path, + repos_path, SVN_PROP_MIME_TYPE, resource->pool); @@ -2744,7 +2755,7 @@ if (!mime_type) { serr = svn_fs_change_node_prop(resource->info->root.root, - resource->info->repos_path, + repos_path, SVN_PROP_MIME_TYPE, svn_string_create (resource->info->r->content_type, @@ -2762,7 +2773,7 @@ serr = svn_fs_apply_textdelta(&(*stream)->delta_handler, &(*stream)->delta_baton, resource->info->root.root, - resource->info->repos_path, + repos_path, resource->info->base_checksum, resource->info->result_checksum, resource->pool);Received on 2012-01-29 11:39:20 CET |
This is an archived mail posted to the Subversion Dev mailing list.
This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.