Index: subversion/include/svn_subst.h =================================================================== --- subversion/include/svn_subst.h (revision 1032431) +++ subversion/include/svn_subst.h (working copy) @@ -592,19 +592,46 @@ svn_subst_stream_detranslated(svn_stream_t **strea /* EOL conversion and character encodings */ +/** @deprecated Provided for backward compatibility with the 1.6 API. Callers + * should use svn_subst_translate_string2(). + * + * Similar to svn_subst_translate_string2(), except that the information about + * whether re-encoding or line ending translation were performed is discarded. + */ +SVN_DEPRECATED +svn_error_t *svn_subst_translate_string(svn_string_t **new_value, + const svn_string_t *value, + const char *encoding, + apr_pool_t *pool); + /** Translate the string @a value from character encoding @a encoding to * UTF8, and also from its current line-ending style to LF line-endings. If * @a encoding is @c NULL, translate from the system-default encoding. * + * If @a translated_to_utf8 is not @c NULL, then + * *translated_to_utf8 is set to @c TRUE if at least one + * character of @a value in the source character encoding was translated to + * UTF-8; to @c FALSE otherwise. If @a translated_line_endings is not @c NULL, + * then *translated_line_endings is set to @c TRUE if at least one + * line ending was changed to LF; to @c FALSE otherwise. + * * Recognized line endings are LF, CR, CRLF. If @a value has inconsistent * line endings, return @c SVN_ERR_IO_INCONSISTENT_EOL. * - * Set @a *new_value to the translated string, allocated in @a pool. + * Set @a *new_value to the translated string, allocated in @a result_pool. + * + * @a scratch_pool is used for temporary allocations. + * + * @since New in 1.7. */ -svn_error_t *svn_subst_translate_string(svn_string_t **new_value, - const svn_string_t *value, - const char *encoding, - apr_pool_t *pool); +svn_error_t * +svn_subst_translate_string2(svn_string_t **new_value, + svn_boolean_t *translated_to_utf8, + svn_boolean_t *translated_line_endings, + const svn_string_t *value, + const char *encoding, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); /** Translate the string @a value from UTF8 and LF line-endings into native * character encoding and native line-endings. If @a for_output is TRUE, Index: subversion/libsvn_subr/subst.c =================================================================== --- subversion/libsvn_subr/subst.c (revision 1032431) +++ subversion/libsvn_subr/subst.c (working copy) @@ -607,23 +607,129 @@ translate_keyword(char *buf, } +struct translation_baton; + +/* Baton for translate_chunk() to store its state in. */ +struct translation_baton +{ + const char *eol_str; + svn_boolean_t *translated_eol; + svn_error_t* (*translate_newline_fn)(const char *eol_str, + apr_size_t eol_str_len, + char *src_format, + apr_size_t *src_format_len, + const char *newline_buf, + apr_size_t newline_len, + svn_stream_t *dst, + struct translation_baton *b); + svn_boolean_t repair; + apr_hash_t *keywords; + svn_boolean_t expand; + + /* 'short boolean' array that encodes what character values + may trigger a translation action, hence are 'interesting' */ + char interesting[256]; + + /* Length of the string EOL_STR points to. */ + apr_size_t eol_str_len; + + /* Buffer to cache any newline state between translation chunks */ + char newline_buf[2]; + + /* Offset (within newline_buf) of the first *unused* character */ + apr_size_t newline_off; + + /* Buffer to cache keyword-parsing state between translation chunks */ + char keyword_buf[SVN_KEYWORD_MAX_LEN]; + + /* Offset (within keyword-buf) to the first *unused* character */ + apr_size_t keyword_off; + + /* EOL style used in the chunk-source */ + char src_format[2]; + + /* Length of the EOL style string found in the chunk-source, + or zero if none encountered yet */ + apr_size_t src_format_len; + + /* If this is svn_tristate_false, translate_newline_fn will be called + for every newline in the file */ + svn_tristate_t nl_translation_skippable; +}; + + +/* Similar to translate_newline() except that b->translated_eol is NULL or it + was already set to TRUE, so the code that deals with b->translated_eol is + omitted. + + If any part of this is changed, translate_newline() must also be updated + as appropriate!!!!! */ +static svn_error_t * +translate_newline_alt(const char *eol_str, + apr_size_t eol_str_len, + char *src_format, + apr_size_t *src_format_len, + const char *newline_buf, + apr_size_t newline_len, + svn_stream_t *dst, + struct translation_baton *b) +{ + /* If we've seen a newline before, compare it with our cache to + check for consistency, else cache it for future comparisons. */ + if (*src_format_len) + { + /* Comparing with cache. If we are inconsistent and + we are NOT repairing the file, generate an error! */ + if ((! b->repair) && + ((*src_format_len != newline_len) || + (strncmp(src_format, newline_buf, newline_len)))) + return svn_error_create(SVN_ERR_IO_INCONSISTENT_EOL, NULL, NULL); + } + else + { + /* This is our first line ending, so cache it before + handling it. */ + strncpy(src_format, newline_buf, newline_len); + *src_format_len = newline_len; + } + + /* Translate the newline */ + SVN_ERR(translate_write(dst, eol_str, eol_str_len)); + + return SVN_NO_ERROR; +} + + /* Translate the newline string NEWLINE_BUF (of length NEWLINE_LEN) to the newline string EOL_STR (of length EOL_STR_LEN), writing the result (which is always EOL_STR) to the stream DST. + + This function assumes that NEWLINE_BUF and EOL_STR are either "\n", "\r", or + "\r\n". Also check for consistency of the source newline strings across multiple calls, using SRC_FORMAT (length *SRC_FORMAT_LEN) as a cache of the first newline found. If the current newline is not the same - as SRC_FORMAT, look to the REPAIR parameter. If REPAIR is TRUE, + as SRC_FORMAT, look to b->repair. If b->repair is TRUE, ignore the inconsistency, else return an SVN_ERR_IO_INCONSISTENT_EOL error. If *SRC_FORMAT_LEN is 0, assume we are examining the first newline in the file, and copy it to {SRC_FORMAT, *SRC_FORMAT_LEN} to use for later consistency checks. - Note: all parameters are required even if REPAIR is TRUE. - ### We could require that REPAIR must not change across a sequence of + To use this variant, b->translated_eol must not be NULL. + + Sets *b->translated_eol to TRUE if the newline string that was written + (EOL_STR) is not the same as the newline string that was translated + (NEWLINE_BUF). + + Note: all parameters are required even if b->repair is TRUE. + ### We could require that b->repair must not change across a sequence of calls, and could then optimize by not using SRC_FORMAT at all if - REPAIR is TRUE. + b->repair is TRUE. + + + If any part of this is changed, translate_newline_alt() must also be updated + as appropriate!!!!! */ static svn_error_t * translate_newline(const char *eol_str, @@ -633,15 +739,22 @@ translate_newline(const char *eol_str, const char *newline_buf, apr_size_t newline_len, svn_stream_t *dst, - svn_boolean_t repair) + struct translation_baton *b) { + assert((eol_str_len == 2 && eol_str[0] == '\r' && eol_str[1] == '\n') || + (eol_str_len == 1 && (eol_str[0] == '\n' || eol_str[0] == '\r'))); + assert((newline_len == 2 && newline_buf[0] == '\r' && + newline_buf[1] == '\n') || + (newline_len == 1 && (newline_buf[0] == '\n' || + newline_buf[0] == '\r'))); + /* If we've seen a newline before, compare it with our cache to check for consistency, else cache it for future comparisons. */ if (*src_format_len) { /* Comparing with cache. If we are inconsistent and we are NOT repairing the file, generate an error! */ - if ((! repair) && + if ((! b->repair) && ((*src_format_len != newline_len) || (strncmp(src_format, newline_buf, newline_len)))) return svn_error_create(SVN_ERR_IO_INCONSISTENT_EOL, NULL, NULL); @@ -653,8 +766,36 @@ translate_newline(const char *eol_str, strncpy(src_format, newline_buf, newline_len); *src_format_len = newline_len; } - /* Write the desired newline */ - return translate_write(dst, eol_str, eol_str_len); + + /* Translate the newline */ + SVN_ERR(translate_write(dst, eol_str, eol_str_len)); + + /* Set *b->translated_eol to TRUE if a different line ending string was + written out. */ + assert(b->translated_eol != NULL); + + /* We know that NEWLINE_BUF and EOL_STR are either "\n", "\r", or + "\r\n". If the length of NEWLINE_BUF (NEWLINE_LEN) is not the same + as the length of EOL_STR (EOL_STR_LEN), then NEWLINE_BUF and + EOL_STR_BUF are of course different. Otherwise, NEWLINE_LEN and + EOL_STR_LEN are both 1. We need only check the one character for + equality to determine whether NEWLINE_BUF and EOL_STR_BUF are + the same in that case. */ + if ((newline_len != eol_str_len) || (*newline_buf != *eol_str)) + { + *b->translated_eol = TRUE; + b->translate_newline_fn = &translate_newline_alt; // Now that + // TRANSLATED_EOL has + // been set to TRUE, + // switch the + // translate_newline + // function that is used + // to the alternate + // which does not care + // about TRANSLATED_EOL + } + + return SVN_NO_ERROR; } @@ -765,46 +906,6 @@ svn_subst_keywords_differ2(apr_hash_t *a, return FALSE; } -/* Baton for translate_chunk() to store its state in. */ -struct translation_baton -{ - const char *eol_str; - svn_boolean_t repair; - apr_hash_t *keywords; - svn_boolean_t expand; - - /* 'short boolean' array that encodes what character values - may trigger a translation action, hence are 'interesting' */ - char interesting[256]; - - /* Length of the string EOL_STR points to. */ - apr_size_t eol_str_len; - - /* Buffer to cache any newline state between translation chunks */ - char newline_buf[2]; - - /* Offset (within newline_buf) of the first *unused* character */ - apr_size_t newline_off; - - /* Buffer to cache keyword-parsing state between translation chunks */ - char keyword_buf[SVN_KEYWORD_MAX_LEN]; - - /* Offset (within keyword-buf) to the first *unused* character */ - apr_size_t keyword_off; - - /* EOL style used in the chunk-source */ - char src_format[2]; - - /* Length of the EOL style string found in the chunk-source, - or zero if none encountered yet */ - apr_size_t src_format_len; - - /* If this is svn_tristate_false, translate_newline() will be called - for every newline in the file */ - svn_tristate_t nl_translation_skippable; -}; - - /* Allocate a baton for use with translate_chunk() in POOL and * initialize it for the first iteration. * @@ -813,6 +914,7 @@ svn_subst_keywords_differ2(apr_hash_t *a, */ static struct translation_baton * create_translation_baton(const char *eol_str, + svn_boolean_t *translated_eol, svn_boolean_t repair, apr_hash_t *keywords, svn_boolean_t expand, @@ -826,6 +928,9 @@ create_translation_baton(const char *eol_str, b->eol_str = eol_str; b->eol_str_len = eol_str ? strlen(eol_str) : 0; + b->translated_eol = translated_eol; + b->translate_newline_fn = (translated_eol ? &translate_newline + : &translate_newline_alt); b->repair = repair; b->keywords = keywords; b->expand = expand; @@ -921,10 +1026,11 @@ translate_chunk(svn_stream_t *dst, if (*p == '\n') b->newline_buf[b->newline_off++] = *p++; - SVN_ERR(translate_newline(b->eol_str, b->eol_str_len, - b->src_format, - &b->src_format_len, b->newline_buf, - b->newline_off, dst, b->repair)); + SVN_ERR(b->translate_newline_fn(b->eol_str, b->eol_str_len, + b->src_format, + &b->src_format_len, + b->newline_buf, b->newline_off, + dst, b)); b->newline_off = 0; } @@ -985,17 +1091,17 @@ translate_chunk(svn_stream_t *dst, continue; } - /* translate_newline will modify the baton for src_format_len==0 + /* translate_newline_fn will modify the baton for src_format_len==0 or may return an error if b->repair is FALSE. In all other cases, we can skip the newline translation as long as source EOL format and actual EOL format match. If there is a - mismatch, translate_newline will be called regardless of + mismatch, translate_newline_fn will be called regardless of nl_translation_skippable. */ if (b->nl_translation_skippable == svn_tristate_unknown && b->src_format_len > 0) { - /* test whether translate_newline may return an error */ + /* test whether translate_newline_fn may return an error */ if (b->eol_str_len == b->src_format_len && strncmp(b->eol_str, b->src_format, b->eol_str_len) == 0) b->nl_translation_skippable = svn_tristate_true; @@ -1066,11 +1172,11 @@ translate_chunk(svn_stream_t *dst, case '\n': b->newline_buf[b->newline_off++] = *p++; - SVN_ERR(translate_newline(b->eol_str, b->eol_str_len, - b->src_format, - &b->src_format_len, - b->newline_buf, - b->newline_off, dst, b->repair)); + SVN_ERR(b->translate_newline_fn(b->eol_str, b->eol_str_len, + b->src_format, + &b->src_format_len, + b->newline_buf, + b->newline_off, dst, b)); b->newline_off = 0; break; @@ -1083,10 +1189,10 @@ translate_chunk(svn_stream_t *dst, { if (b->newline_off) { - SVN_ERR(translate_newline(b->eol_str, b->eol_str_len, - b->src_format, &b->src_format_len, - b->newline_buf, b->newline_off, - dst, b->repair)); + SVN_ERR(b->translate_newline_fn(b->eol_str, b->eol_str_len, + b->src_format, &b->src_format_len, + b->newline_buf, b->newline_off, + dst, b)); b->newline_off = 0; } @@ -1350,13 +1456,14 @@ svn_subst_read_specialfile(svn_stream_t **stream, } -svn_stream_t * -svn_subst_stream_translated(svn_stream_t *stream, - const char *eol_str, - svn_boolean_t repair, - apr_hash_t *keywords, - svn_boolean_t expand, - apr_pool_t *result_pool) +static svn_stream_t * +stream_translated(svn_stream_t *stream, + const char *eol_str, + svn_boolean_t *translated_eol, + svn_boolean_t repair, + apr_hash_t *keywords, + svn_boolean_t expand, + apr_pool_t *result_pool) { struct translated_stream_baton *baton = apr_palloc(result_pool, sizeof(*baton)); @@ -1398,9 +1505,11 @@ svn_subst_read_specialfile(svn_stream_t **stream, /* Setup the baton fields */ baton->stream = stream; baton->in_baton - = create_translation_baton(eol_str, repair, keywords, expand, result_pool); + = create_translation_baton(eol_str, translated_eol, repair, keywords, + expand, result_pool); baton->out_baton - = create_translation_baton(eol_str, repair, keywords, expand, result_pool); + = create_translation_baton(eol_str, translated_eol, repair, keywords, + expand, result_pool); baton->written = FALSE; baton->readbuf = svn_stringbuf_create("", result_pool); baton->readbuf_off = 0; @@ -1417,15 +1526,28 @@ svn_subst_read_specialfile(svn_stream_t **stream, return s; } +svn_stream_t * +svn_subst_stream_translated(svn_stream_t *stream, + const char *eol_str, + svn_boolean_t repair, + apr_hash_t *keywords, + svn_boolean_t expand, + apr_pool_t *result_pool) +{ + return stream_translated(stream, eol_str, NULL, repair, keywords, expand, + result_pool); +} -svn_error_t * -svn_subst_translate_cstring2(const char *src, - const char **dst, - const char *eol_str, - svn_boolean_t repair, - apr_hash_t *keywords, - svn_boolean_t expand, - apr_pool_t *pool) + +static svn_error_t * +translate_cstring(const char **dst, + svn_boolean_t *translated_eol, + const char *src, + const char *eol_str, + svn_boolean_t repair, + apr_hash_t *keywords, + svn_boolean_t expand, + apr_pool_t *pool) { svn_stringbuf_t *dst_stringbuf; svn_stream_t *dst_stream; @@ -1442,9 +1564,12 @@ svn_subst_read_specialfile(svn_stream_t **stream, dst_stringbuf = svn_stringbuf_create("", pool); dst_stream = svn_stream_from_stringbuf(dst_stringbuf, pool); + if (translated_eol) + *translated_eol = FALSE; + /* Another wrapper to translate the content. */ - dst_stream = svn_subst_stream_translated(dst_stream, eol_str, repair, - keywords, expand, pool); + dst_stream = stream_translated(dst_stream, eol_str, translated_eol, repair, + keywords, expand, pool); /* Jam the text into the destination stream (to translate it). */ SVN_ERR(svn_stream_write(dst_stream, src, &len)); @@ -1456,6 +1581,19 @@ svn_subst_read_specialfile(svn_stream_t **stream, return SVN_NO_ERROR; } +svn_error_t * +svn_subst_translate_cstring2(const char *src, + const char **dst, + const char *eol_str, + svn_boolean_t repair, + apr_hash_t *keywords, + svn_boolean_t expand, + apr_pool_t *pool) +{ + return translate_cstring(dst, NULL, src, eol_str, repair, keywords, expand, + pool); +} + /* Given a special file at SRC, generate a textual representation of it in a normal file at DST. Perform all allocations in POOL. */ /* ### this should be folded into svn_subst_copy_and_translate3 */ @@ -1768,14 +1906,16 @@ svn_subst_stream_from_specialfile(svn_stream_t **s /*** String translation */ svn_error_t * -svn_subst_translate_string(svn_string_t **new_value, - const svn_string_t *value, - const char *encoding, - apr_pool_t *pool) +svn_subst_translate_string2(svn_string_t **new_value, + svn_boolean_t *translated_to_utf8, + svn_boolean_t *translated_line_endings, + const svn_string_t *value, + const char *encoding, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) { const char *val_utf8; const char *val_utf8_lf; - apr_pool_t *scratch_pool = svn_pool_create(pool); if (value == NULL) { @@ -1793,16 +1933,19 @@ svn_error_t * SVN_ERR(svn_utf_cstring_to_utf8(&val_utf8, value->data, scratch_pool)); } - SVN_ERR(svn_subst_translate_cstring2(val_utf8, - &val_utf8_lf, - "\n", /* translate to LF */ - FALSE, /* no repair */ - NULL, /* no keywords */ - FALSE, /* no expansion */ - scratch_pool)); + if (translated_to_utf8) + *translated_to_utf8 = (strcmp(value->data, val_utf8) != 0); - *new_value = svn_string_create(val_utf8_lf, pool); - svn_pool_destroy(scratch_pool); + SVN_ERR(translate_cstring(&val_utf8_lf, + translated_line_endings, + val_utf8, + "\n", /* translate to LF */ + FALSE, /* no repair */ + NULL, /* no keywords */ + FALSE, /* no expansion */ + scratch_pool)); + + *new_value = svn_string_create(val_utf8_lf, result_pool); return SVN_NO_ERROR; } Index: subversion/libsvn_subr/deprecated.c =================================================================== --- subversion/libsvn_subr/deprecated.c (revision 1032431) +++ subversion/libsvn_subr/deprecated.c (working copy) @@ -250,6 +250,19 @@ svn_subst_stream_translated_to_normal_form(svn_str } svn_error_t * +svn_subst_translate_string(svn_string_t **new_value, + const svn_string_t *value, + const char *encoding, + apr_pool_t *result_pool) +{ + apr_pool_t *scratch_pool = svn_pool_create(result_pool); + svn_error_t *res = svn_subst_translate_string2(new_value, NULL, NULL, value, + encoding, result_pool, scratch_pool); + svn_pool_destroy(scratch_pool); + return res; +} + +svn_error_t * svn_subst_stream_detranslated(svn_stream_t **stream_p, const char *src, svn_subst_eol_style_t eol_style,