Index: subversion/svnsync/sync.h =================================================================== --- subversion/svnsync/sync.h (revision 995839) +++ subversion/svnsync/sync.h (working copy) @@ -33,15 +33,15 @@ #include "svn_delta.h" -/* Normalize the line ending style of the values of properties in REV_PROPS - * that "need translation" (according to svn_prop_needs_translation(), - * currently all svn:* props) so that they contain only LF (\n) line endings. - * The number of properties that needed normalization is returned in - * *NORMALIZED_COUNT. +/* Translate the encoding and line ending style of the values of properties + * in rev_props that "need translation" (according to + * svn_prop_needs_translation(), which is currently all svn:* props) so that + * they are encoded in UTF-8 and contain only LF (\n) line endings. No + * re-encoding is performed if encoding is NULL. */ svn_error_t * -svnsync_normalize_revprops(apr_hash_t *rev_props, - int *normalized_count, +svnsync_translate_revprops(apr_hash_t *rev_props, + const char *encoding, apr_pool_t *pool); @@ -52,18 +52,20 @@ * which the commit is being made. * * As the sync editor encounters property values, it might see the need to - * normalize them (to LF line endings). Each carried out normalization adds 1 - * to the *NORMALIZED_NODE_PROPS_COUNTER (for notification). + * translate them (re-encode and/or change to LF line endings). + * If PROP_ENCODING is NULL, then property values are presumed to be encoded + * in UTF-8 and are not re-encoded. Otherwise, the property values are + * presumed to be encoded in PROP_ENCODING, and are translated to UTF-8. */ svn_error_t * svnsync_get_sync_editor(const svn_delta_editor_t *wrapped_editor, void *wrapped_edit_baton, svn_revnum_t base_revision, const char *to_url, + const char *prop_encoding, svn_boolean_t quiet, const svn_delta_editor_t **editor, void **edit_baton, - int *normalized_node_props_counter, apr_pool_t *pool); Index: subversion/svnsync/main.c =================================================================== --- subversion/svnsync/main.c (revision 995839) +++ subversion/svnsync/main.c (working copy) @@ -61,6 +61,7 @@ svnsync_opt_sync_password, svnsync_opt_config_dir, svnsync_opt_config_options, + svnsync_opt_source_encoding, svnsync_opt_disable_locking, svnsync_opt_version, svnsync_opt_trust_server_cert, @@ -104,8 +105,8 @@ "the destination repository by any method other than 'svnsync'.\n" "In other words, the destination repository should be a read-only\n" "mirror of the source repository.\n"), - { SVNSYNC_OPTS_DEFAULT, 'q', svnsync_opt_allow_non_empty, - svnsync_opt_disable_locking } }, + { SVNSYNC_OPTS_DEFAULT, svnsync_opt_source_encoding, 'q', + svnsync_opt_allow_non_empty, svnsync_opt_disable_locking } }, { "synchronize", synchronize_cmd, { "sync" }, N_("usage: svnsync synchronize DEST_URL [SOURCE_URL]\n" "\n" @@ -117,7 +118,8 @@ "source URL. Specifying SOURCE_URL is recommended in particular\n" "if untrusted users/administrators may have write access to the\n" "DEST_URL repository.\n"), - { SVNSYNC_OPTS_DEFAULT, 'q', svnsync_opt_disable_locking } }, + { SVNSYNC_OPTS_DEFAULT, svnsync_opt_source_encoding, 'q', + svnsync_opt_disable_locking } }, { "copy-revprops", copy_revprops_cmd, { 0 }, N_("usage:\n" "\n" @@ -137,7 +139,8 @@ "DEST_URL repository.\n" "\n" "Form 2 is deprecated syntax, equivalent to specifying \"-rREV[:REV2]\".\n"), - { SVNSYNC_OPTS_DEFAULT, 'q', 'r', svnsync_opt_disable_locking } }, + { SVNSYNC_OPTS_DEFAULT, svnsync_opt_source_encoding, 'q', 'r', + svnsync_opt_disable_locking } }, { "info", info_cmd, { 0 }, N_("usage: svnsync info DEST_URL\n" "\n" @@ -200,6 +203,12 @@ "For example:\n" " " " servers:global:http-library=serf")}, + {"source-encoding", svnsync_opt_source_encoding, 1, + N_("convert translatable properties from encoding ARG\n" + " " + "to UTF-8. If not specified, then properties are\n" + " " + "presumed to be encoded in UTF-8.")}, {"disable-locking", svnsync_opt_disable_locking, 0, N_("Disable built-in locking. Use of this option can\n" " " @@ -227,6 +236,7 @@ const char *sync_password; const char *config_dir; apr_hash_t *config; + const char *source_encoding; svn_boolean_t disable_locking; svn_boolean_t quiet; svn_boolean_t allow_non_empty; @@ -352,6 +362,9 @@ svn_boolean_t quiet; svn_boolean_t allow_non_empty; const char *to_url; + + /* initialize, synchronize, and copy-revprops only */ + const char *source_encoding; /* initialize only */ const char *from_url; @@ -568,26 +581,7 @@ return SVN_NO_ERROR; } -/* Print a notification that NORMALIZED_REV_PROPS_COUNT rev-props and - * NORMALIZED_NODE_PROPS_COUNT node-props were normalized to LF line - * endings, if either of those numbers is non-zero. */ -static svn_error_t * -log_properties_normalized(int normalized_rev_props_count, - int normalized_node_props_count, - apr_pool_t *pool) -{ - if (normalized_rev_props_count > 0 || normalized_node_props_count > 0) - SVN_ERR(svn_cmdline_printf(pool, - _("NOTE: Normalized %s* properties " - "to LF line endings (%d rev-props, " - "%d node-props).\n"), - SVN_PROP_PREFIX, - normalized_rev_props_count, - normalized_node_props_count)); - return SVN_NO_ERROR; -} - /* Copy all the revision properties, except for those that have the * "svn:sync-" prefix, from revision REV of the repository associated * with RA session FROM_SESSION, to the repository associated with RA @@ -597,16 +591,14 @@ * do not exist on the source revision will be removed. * * Make sure the values of svn:* revision properties use only LF (\n) - * lineending style, correcting their values as necessary. The number - * of properties that were normalized is returned in *NORMALIZED_COUNT. + * lineending style, correcting their values as necessary. */ static svn_error_t * copy_revprops(svn_ra_session_t *from_session, svn_ra_session_t *to_session, + subcommand_baton_t *sb, svn_revnum_t rev, svn_boolean_t sync, - svn_boolean_t quiet, - int *normalized_count, apr_pool_t *pool) { apr_pool_t *subpool = svn_pool_create(pool); @@ -621,9 +613,8 @@ /* Get the list of revision properties on REV of SOURCE. */ SVN_ERR(svn_ra_rev_proplist(from_session, rev, &rev_props, subpool)); - /* If necessary, normalize line ending style, and return the count - of changes in int *NORMALIZED_COUNT. */ - SVN_ERR(svnsync_normalize_revprops(rev_props, normalized_count, pool)); + /* If necessary, translate encoding and line ending style. */ + SVN_ERR(svnsync_translate_revprops(rev_props, sb->source_encoding, pool)); /* Copy all but the svn:svnsync properties. */ SVN_ERR(write_revprops(&filtered_count, to_session, rev, rev_props, pool)); @@ -633,7 +624,7 @@ SVN_ERR(remove_props_not_in_source(to_session, rev, rev_props, existing_props, pool)); - if (! quiet) + if (! sb->quiet) SVN_ERR(log_properties_copied(filtered_count > 0, rev, pool)); svn_pool_destroy(subpool); @@ -664,6 +655,7 @@ b->quiet = opt_baton->quiet; b->allow_non_empty = opt_baton->allow_non_empty; b->to_url = to_url; + b->source_encoding = opt_baton->source_encoding; b->from_url = from_url; b->start_rev = start_rev; b->end_rev = end_rev; @@ -686,7 +678,6 @@ svn_string_t *from_url; svn_revnum_t latest, from_latest; const char *uuid, *root_url; - int normalized_rev_props_count; /* First, sanity check to see that we're copying into a brand new repos. If we aren't, and we aren't being asked to forcibly @@ -756,18 +747,14 @@ pool)); /* Copy all non-svnsync revprops from the LATEST rev in the source - repository into the destination, notifying about normalized - props, if any. When LATEST is 0, this serves the practical - purpose of initializing data that would otherwise be overlooked + repository into the destination. When LATEST is 0, this serves the + practical purpose of initializing data that would otherwise be overlooked by the sync process (which is going to begin with r1). When LATEST is not 0, this really serves merely aesthetic and informational purposes, keeping the output of this command consistent while allowing folks to see what the latest revision is. */ - SVN_ERR(copy_revprops(from_session, to_session, latest, FALSE, - baton->quiet, &normalized_rev_props_count, pool)); + SVN_ERR(copy_revprops(from_session, to_session, baton, latest, FALSE, pool)); - SVN_ERR(log_properties_normalized(normalized_rev_props_count, 0, pool)); - /* TODO: It would be nice if we could set the dest repos UUID to be equal to the UUID of the source repos, at least optionally. That way people could check out/log/diff using a local fast mirror, @@ -785,7 +772,7 @@ { const char *to_url, *from_url; svn_ra_session_t *to_session; - opt_baton_t *opt_baton = b; + opt_baton_t *opt_baton = (opt_baton_t*)b; apr_array_header_t *targets; subcommand_baton_t *baton; @@ -897,8 +884,6 @@ svn_ra_session_t *to_session; subcommand_baton_t *sb; svn_boolean_t has_commit_revprops_capability; - int normalized_rev_props_count; - int normalized_node_props_count; } replay_baton_t; /* Return a replay baton allocated from POOL and populated with @@ -986,7 +971,6 @@ replay_baton_t *rb = replay_baton; apr_hash_t *filtered; int filtered_count; - int normalized_count; /* We set this property so that if we error out for some reason we can later determine where we were in the process of @@ -1025,10 +1009,8 @@ apr_hash_set(filtered, SVN_PROP_REVISION_LOG, APR_HASH_KEY_STRING, svn_string_create("", pool)); - /* If necessary, normalize line ending style, and add the number - of changes to the overall count in the replay baton. */ - SVN_ERR(svnsync_normalize_revprops(filtered, &normalized_count, pool)); - rb->normalized_rev_props_count += normalized_count; + /* If necessary, translate encoding and line ending style. */ + SVN_ERR(svnsync_translate_revprops(filtered, rb->sb->source_encoding, pool)); SVN_ERR(svn_ra_get_commit_editor3(rb->to_session, &commit_editor, &commit_baton, @@ -1040,9 +1022,9 @@ over the RA interface, so we need an editor that's smart enough to filter those out for us. */ SVN_ERR(svnsync_get_sync_editor(commit_editor, commit_baton, revision - 1, - rb->sb->to_url, rb->sb->quiet, - &sync_editor, &sync_baton, - &(rb->normalized_node_props_count), pool)); + rb->sb->to_url, rb->sb->source_encoding, + rb->sb->quiet, &sync_editor, &sync_baton, + pool)); SVN_ERR(svn_delta_get_cancellation_editor(check_cancel, NULL, sync_editor, sync_baton, @@ -1070,7 +1052,6 @@ replay_baton_t *rb = replay_baton; apr_hash_t *filtered, *existing_props; int filtered_count; - int normalized_count; SVN_ERR(editor->close_edit(edit_baton, pool)); @@ -1095,10 +1076,8 @@ : filter_exclude_log), subpool); - /* If necessary, normalize line ending style, and add the number - of changes to the overall count in the replay baton. */ - SVN_ERR(svnsync_normalize_revprops(filtered, &normalized_count, pool)); - rb->normalized_rev_props_count += normalized_count; + /* If necessary, translate encoding and line ending style. */ + SVN_ERR(svnsync_translate_revprops(filtered, rb->sb->source_encoding, pool)); SVN_ERR(write_revprops(&filtered_count, rb->to_session, revision, filtered, subpool)); @@ -1148,7 +1127,6 @@ svn_revnum_t to_latest, copying, last_merged; svn_revnum_t start_revision, end_revision; replay_baton_t *rb; - int normalized_rev_props_count = 0; SVN_ERR(open_source_session(&from_session, &last_merged_rev, baton->from_url, to_session, @@ -1200,10 +1178,8 @@ { if (copying > last_merged) { - SVN_ERR(copy_revprops(from_session, to_session, - to_latest, TRUE, baton->quiet, - &normalized_rev_props_count, - pool)); + SVN_ERR(copy_revprops(from_session, to_session, baton, + to_latest, TRUE, pool)); last_merged = copying; last_merged_rev = svn_string_create (apr_psprintf(pool, "%ld", last_merged), pool); @@ -1262,12 +1238,7 @@ 0, TRUE, replay_rev_started, replay_rev_finished, rb, pool)); - SVN_ERR(log_properties_normalized(rb->normalized_rev_props_count - + normalized_rev_props_count, - rb->normalized_node_props_count, - pool)); - return SVN_NO_ERROR; } @@ -1277,7 +1248,7 @@ synchronize_cmd(apr_getopt_t *os, void *b, apr_pool_t *pool) { svn_ra_session_t *to_session; - opt_baton_t *opt_baton = b; + opt_baton_t *opt_baton = (opt_baton_t*)b; apr_array_header_t *targets; subcommand_baton_t *baton; const char *to_url, *from_url; @@ -1336,7 +1307,6 @@ svn_string_t *last_merged_rev; svn_revnum_t i; svn_revnum_t step = 1; - int normalized_rev_props_count = 0; SVN_ERR(open_source_session(&from_session, &last_merged_rev, baton->from_url, to_session, @@ -1365,16 +1335,10 @@ step = (baton->start_rev > baton->end_rev) ? -1 : 1; for (i = baton->start_rev; i != baton->end_rev + step; i = i + step) { - int normalized_count; SVN_ERR(check_cancel(NULL)); - SVN_ERR(copy_revprops(from_session, to_session, i, FALSE, - baton->quiet, &normalized_count, pool)); - normalized_rev_props_count += normalized_count; + SVN_ERR(copy_revprops(from_session, to_session, baton, i, FALSE, pool)); } - /* Notify about normalized props, if any. */ - SVN_ERR(log_properties_normalized(normalized_rev_props_count, 0, pool)); - return SVN_NO_ERROR; } @@ -1456,7 +1420,7 @@ copy_revprops_cmd(apr_getopt_t *os, void *b, apr_pool_t *pool) { svn_ra_session_t *to_session; - opt_baton_t *opt_baton = b; + opt_baton_t *opt_baton = (opt_baton_t*)b; apr_array_header_t *targets; subcommand_baton_t *baton; const char *to_url = NULL; @@ -1566,7 +1530,7 @@ info_cmd(apr_getopt_t *os, void *b, apr_pool_t * pool) { svn_ra_session_t *to_session; - opt_baton_t *opt_baton = b; + opt_baton_t *opt_baton = (opt_baton_t*)b; apr_array_header_t *targets; subcommand_baton_t *baton; const char *to_url; @@ -1625,9 +1589,9 @@ /* SUBCOMMAND: help */ static svn_error_t * -help_cmd(apr_getopt_t *os, void *baton, apr_pool_t *pool) +help_cmd(apr_getopt_t *os, void *b, apr_pool_t *pool) { - opt_baton_t *opt_baton = baton; + opt_baton_t *opt_baton = (opt_baton_t*)b; const char *header = _("general usage: svnsync SUBCOMMAND DEST_URL [ARGS & OPTIONS ...]\n" @@ -1671,6 +1635,7 @@ int opt_id, i; const char *username = NULL, *source_username = NULL, *sync_username = NULL; const char *password = NULL, *source_password = NULL, *sync_password = NULL; + const char *source_encoding = NULL; apr_array_header_t *config_options = NULL; apr_allocator_t *allocator; @@ -1795,6 +1760,10 @@ if (err) return svn_cmdline_handle_exit_error(err, pool, "svnsync: "); break; + + case svnsync_opt_source_encoding: + opt_err = svn_utf_cstring_to_utf8(&source_encoding, opt_arg, pool); + break; case svnsync_opt_disable_locking: opt_baton.disable_locking = TRUE; @@ -1982,6 +1951,8 @@ config = apr_hash_get(opt_baton.config, SVN_CONFIG_CATEGORY_CONFIG, APR_HASH_KEY_STRING); + + opt_baton.source_encoding = source_encoding; apr_signal(SIGINT, signal_handler); Index: subversion/svnsync/sync.c =================================================================== --- subversion/svnsync/sync.c (revision 995839) +++ subversion/svnsync/sync.c (working copy) @@ -45,29 +45,39 @@ #include -/* Normalize the line ending style of *STR, so that it contains only - * LF (\n) line endings. After return, *STR may point at a new - * svn_string_t* allocated from POOL. +/* Translate the encoding and line ending style of *STR, so that it contains + * only LF (\n) line endings and is encoded in UTF-8. After return, *STR may + * point at a new svn_string_t* allocated from POOL if *WAS_TRANSLATED. If + * ENCODING is not NULL, then *STR is presumed to be encoded in UTF-8. * - * *WAS_NORMALIZED is set to TRUE when *STR needed to be normalized, + * *WAS_TRANSLATED is set to TRUE when *STR needed to be translated, * and to FALSE if *STR remains unchanged. */ static svn_error_t * -normalize_string(const svn_string_t **str, - svn_boolean_t *was_normalized, +translate_string(const svn_string_t **str, + svn_boolean_t *was_translated, + const char *encoding, apr_pool_t *pool) { - *was_normalized = FALSE; + *was_translated = FALSE; if (*str == NULL) return SVN_NO_ERROR; SVN_ERR_ASSERT((*str)->data != NULL); - /* Detect inconsistent line ending style simply by looking - for carriage return (\r) characters. */ - if (strchr((*str)->data, '\r') != NULL) + if (encoding) { + svn_string_t *new_str = NULL; + SVN_ERR(svn_subst_translate_string(&new_str, *str, encoding, pool)); + *str = new_str; + *was_translated = TRUE; + } + + /* Only detect inconsistent line ending style. This is accomplished by simply + looking for carriage return (\r) characters. */ + else if (strchr((*str)->data, '\r') != NULL) + { /* Found some. Normalize. */ const char* cstring = NULL; SVN_ERR(svn_subst_translate_cstring2((*str)->data, &cstring, @@ -75,26 +85,25 @@ NULL, FALSE, pool)); *str = svn_string_create(cstring, pool); - *was_normalized = TRUE; + *was_translated = TRUE; } return SVN_NO_ERROR; } -/* Normalize the line ending style of the values of properties in REV_PROPS - * that "need translation" (according to svn_prop_needs_translation(), - * currently all svn:* props) so that they contain only LF (\n) line endings. - * The number of properties that needed normalization is returned in - * *NORMALIZED_COUNT. +/* Translate the encoding and line ending style of the values of properties + * in rev_props that "need translation" (according to + * svn_prop_needs_translation(), which is currently all svn:* props) so that + * they are encoded in UTF-8 and contain only LF (\n) line endings. No + * re-encoding is performed if encoding is NULL. */ svn_error_t * -svnsync_normalize_revprops(apr_hash_t *rev_props, - int *normalized_count, +svnsync_translate_revprops(apr_hash_t *rev_props, + const char *encoding, apr_pool_t *pool) { apr_hash_index_t *hi; - *normalized_count = 0; for (hi = apr_hash_first(pool, rev_props); hi; @@ -105,14 +114,12 @@ if (svn_prop_needs_translation(propname)) { - svn_boolean_t was_normalized; - SVN_ERR(normalize_string(&propval, &was_normalized, pool)); - if (was_normalized) + svn_boolean_t was_translated; + SVN_ERR(translate_string(&propval, &was_translated, encoding, pool)); + if (was_translated) { /* Replace the existing prop value. */ apr_hash_set(rev_props, propname, APR_HASH_KEY_STRING, propval); - /* And count this. */ - (*normalized_count)++; } } } @@ -141,6 +148,7 @@ const svn_delta_editor_t *wrapped_editor; void *wrapped_edit_baton; const char *to_url; /* URL we're copying into, for correct copyfrom URLs */ + const char *prop_encoding; svn_boolean_t called_open_root; svn_boolean_t got_textdeltas; svn_revnum_t base_revision; @@ -150,7 +158,6 @@ svn_boolean_t mergeinfo_stripped; /* Did we strip svn:mergeinfo? */ svn_boolean_t svnmerge_migrated; /* Did we convert svnmerge.py data? */ svn_boolean_t svnmerge_blocked; /* Was there any blocked svnmerge data? */ - int *normalized_node_props_counter; /* Where to count normalizations? */ } edit_baton_t; @@ -403,13 +410,12 @@ eb->svnmerge_blocked = TRUE; } - /* Normalize svn:* properties as necessary. */ + /* Translate svn:* properties as necessary. */ if (svn_prop_needs_translation(name)) { - svn_boolean_t was_normalized; - SVN_ERR(normalize_string(&value, &was_normalized, pool)); - if (was_normalized) - (*(eb->normalized_node_props_counter))++; + svn_boolean_t was_translated; + SVN_ERR(translate_string(&value, &was_translated, eb->prop_encoding, + pool)); } return eb->wrapped_editor->change_file_prop(fb->wrapped_node_baton, @@ -501,13 +507,11 @@ eb->svnmerge_blocked = TRUE; } - /* Normalize svn:* properties as necessary. */ + /* Translate svn:* properties as necessary. */ if (svn_prop_needs_translation(name)) { - svn_boolean_t was_normalized; - SVN_ERR(normalize_string(&value, &was_normalized, pool)); - if (was_normalized) - (*(eb->normalized_node_props_counter))++; + svn_boolean_t was_translated; + SVN_ERR(translate_string(&value, &was_translated, eb->prop_encoding, pool)); } return eb->wrapped_editor->change_dir_prop(db->wrapped_node_baton, @@ -572,10 +576,10 @@ void *wrapped_edit_baton, svn_revnum_t base_revision, const char *to_url, + const char *prop_encoding, svn_boolean_t quiet, const svn_delta_editor_t **editor, void **edit_baton, - int *normalized_node_props_counter, apr_pool_t *pool) { svn_delta_editor_t *tree_editor = svn_delta_default_editor(pool); @@ -602,8 +606,8 @@ eb->wrapped_edit_baton = wrapped_edit_baton; eb->base_revision = base_revision; eb->to_url = to_url; + eb->prop_encoding = prop_encoding; eb->quiet = quiet; - eb->normalized_node_props_counter = normalized_node_props_counter; if (getenv("SVNSYNC_UNSUPPORTED_STRIP_MERGEINFO")) {