Re: svn commit: r1480406 - in /subversion/trunk: ./ subversion/include/ subversion/include/private/ subversion/libsvn_subr/ subversion/libsvn_subr/utf8proc/ subversion/tests/ subversion/tests/libsvn_subr/

From: Joe Swatosh <joe.swatosh_at_gmail.com>
Date: Mon, 13 May 2013 18:53:35 -0700
Is SIZE_MAX c89? Should it be APR_SIZE_MAX in utf8proc.h (along with
including svn_dep_compat.h)?
--
Joe
On Wed, May 8, 2013 at 11:56 AM,  <brane_at_apache.org> wrote:
> Author: brane
> Date: Wed May  8 18:56:18 2013
> New Revision: 1480406
>
> URL: http://svn.apache.org/r1480406
> Log:
> Reintegrate wc-collate-path branch to trunk.
>
> * subversion/libsvn_subr/sqlite.c: Protect changes related to working copy
>   database functionality with #ifdef SVN_UNICODE_NORMALIZATION_FIXES.
>
> Added:
>     subversion/trunk/subversion/libsvn_subr/utf8proc/
>       - copied from r1480384, subversion/branches/wc-collate-path/subversion/libsvn_subr/utf8proc/
>     subversion/trunk/subversion/libsvn_subr/utf8proc.c
>       - copied unchanged from r1480384, subversion/branches/wc-collate-path/subversion/libsvn_subr/utf8proc.c
>     subversion/trunk/subversion/tests/diacritical.txt
>       - copied unchanged from r1480384, subversion/branches/wc-collate-path/subversion/tests/diacritical.txt
> Modified:
>     subversion/trunk/build.conf
>     subversion/trunk/subversion/include/private/svn_utf_private.h
>     subversion/trunk/subversion/include/svn_error_codes.h
>     subversion/trunk/subversion/libsvn_subr/sqlite.c
>     subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
>
> Modified: subversion/trunk/build.conf
> URL: http://svn.apache.org/viewvc/subversion/trunk/build.conf?rev=1480406&r1=1480405&r2=1480406&view=diff
> ==============================================================================
> --- subversion/trunk/build.conf (original)
> +++ subversion/trunk/build.conf Wed May  8 18:56:18 2013
> @@ -40,6 +40,9 @@ private-includes =
>          subversion/libsvn_delta/compose_delta.c
>          subversion/bindings/cxxhl/include/*.hpp
>          subversion/bindings/cxxhl/include/svncxxhl/*.hpp
> +        subversion/libsvn_subr/utf8proc/utf8proc.c
> +        subversion/libsvn_subr/utf8proc/utf8proc.h
> +        subversion/libsvn_subr/utf8proc/utf8proc_data.c
>  private-built-includes =
>          subversion/svn_private_config.h
>          subversion/libsvn_fs_fs/rep-cache-db.h
>
> Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_utf_private.h?rev=1480406&r1=1480405&r2=1480406&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/include/private/svn_utf_private.h (original)
> +++ subversion/trunk/subversion/include/private/svn_utf_private.h Wed May  8 18:56:18 2013
> @@ -21,7 +21,7 @@
>   * @endcopyright
>   *
>   * @file svn_utf_private.h
> - * @brief UTF validation routines
> + * @brief UTF validation and normalization routines
>   */
>
>  #ifndef SVN_UTF_PRIVATE_H
> @@ -31,6 +31,8 @@
>  #include <apr_pools.h>
>
>  #include "svn_types.h"
> +#include "svn_string.h"
> +#include "svn_string_private.h"
>
>  #ifdef __cplusplus
>  extern "C" {
> @@ -79,6 +81,57 @@ svn_utf__cstring_from_utf8_fuzzy(const c
>                                                 const char *,
>                                                 apr_pool_t *));
>
> +/* A constant used for many length parameters in the utf8proc wrappers
> + * to indicate that the length of a string is unknonw. */
> +#define SVN_UTF__UNKNOWN_LENGTH ((apr_size_t) -1)
> +
> +
> +/* Compare two UTF-8 strings, ignoring normalization, using buffers
> + * BUF1 and BUF2 for temporary storage. If either of LEN1 or LEN2 is
> + * SVN_UTF__UNKNOWN_LENGTH, assume the associated string is
> + * null-terminated; otherwise, consider the string only up to the
> + * given length.
> + *
> + * Return compare value in *RESULT.
> + */
> +svn_error_t *
> +svn_utf__normcmp(const char *str1, apr_size_t len1,
> +                 const char *str2, apr_size_t len2,
> +                 svn_membuf_t *buf1, svn_membuf_t *buf2,
> +                 int *result);
> +
> +
> +/* Pattern matching similar to the the SQLite LIKE and GLOB
> + * operators. PATTERN, KEY and ESCAPE must all point to UTF-8
> + * strings. Furthermore, ESCAPE, if provided, must be a character from
> + * the ASCII subset.
> + *
> + * If any of PATTERN_LEN, STRING_LEN or ESCAPE_LEN are
> + * SVN_UTF__UNKNOWN_LENGTH, assume the associated string is
> + * null-terminated; otherwise, consider the string only up to the
> + * given length.
> + *
> + * Use buffers PATTERN_BUF, STRING_BUF and TEMP_BUF for temporary storage.
> + *
> + * If SQL_LIKE is true, interpret PATTERN as a pattern used by the SQL
> + * LIKE operator and notice ESCAPE. Otherwise it's a Unix fileglob
> + * pattern, and ESCAPE must be NULL.
> + *
> + * Set *MATCH to the result of the comparison.
> +*/
> +svn_error_t *
> +svn_utf__glob(const char *pattern, apr_size_t pattern_len,
> +              const char *string, apr_size_t string_len,
> +              const char *escape, apr_size_t escape_len,
> +              svn_boolean_t sql_like,
> +              svn_membuf_t *pattern_buf,
> +              svn_membuf_t *string_buf,
> +              svn_membuf_t *temp_buf,
> +              svn_boolean_t *match);
> +
> +/* Return the version of the wrapped utf8proc library. */
> +const char *
> +svn_utf__utf8proc_version(void);
>
>  #ifdef __cplusplus
>  }
>
> Modified: subversion/trunk/subversion/include/svn_error_codes.h
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_error_codes.h?rev=1480406&r1=1480405&r2=1480406&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/include/svn_error_codes.h (original)
> +++ subversion/trunk/subversion/include/svn_error_codes.h Wed May  8 18:56:18 2013
> @@ -1330,6 +1330,16 @@ SVN_ERROR_START
>               SVN_ERR_MISC_CATEGORY_START + 38,
>               "Atomic data storage is corrupt")
>
> +  /** @since New in 1.8. */
> +  SVN_ERRDEF(SVN_ERR_UTF8PROC_ERROR,
> +             SVN_ERR_MISC_CATEGORY_START + 39,
> +             "utf8proc library error")
> +
> +  /** @since New in 1.8. */
> +  SVN_ERRDEF(SVN_ERR_UTF8_GLOB,
> +             SVN_ERR_MISC_CATEGORY_START + 40,
> +             "Bad arguments to SQL operators GLOB or LIKE")
> +
>    /* command-line client errors */
>
>    SVN_ERRDEF(SVN_ERR_CL_ARG_PARSING_ERROR,
>
> Modified: subversion/trunk/subversion/libsvn_subr/sqlite.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/sqlite.c?rev=1480406&r1=1480405&r2=1480406&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_subr/sqlite.c (original)
> +++ subversion/trunk/subversion/libsvn_subr/sqlite.c Wed May  8 18:56:18 2013
> @@ -38,6 +38,11 @@
>  #include "private/svn_skel.h"
>  #include "private/svn_token.h"
>
> +#ifdef SVN_UNICODE_NORMALIZATION_FIXES
> +#include "private/svn_utf_private.h"
> +#include "private/svn_string_private.h"
> +#endif /* SVN_UNICODE_NORMALIZATION_FIXES */
> +
>  #ifdef SQLITE3_DEBUG
>  #include "private/svn_debug.h"
>  #endif
> @@ -60,6 +65,13 @@ extern int (*const svn_sqlite3__api_conf
>  #error SQLite is too old -- version 3.7.12 is the minimum required version
>  #endif
>
> +#ifdef SVN_UNICODE_NORMALIZATION_FIXES
> +/* Limit the length of a GLOB or LIKE pattern. */
> +#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
> +# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
> +#endif
> +#endif /* SVN_UNICODE_NORMALIZATION_FIXES */
> +
>  const char *
>  svn_sqlite__compiled_version(void)
>  {
> @@ -104,6 +116,13 @@ struct svn_sqlite__db_t
>    int nbr_statements;
>    svn_sqlite__stmt_t **prepared_stmts;
>    apr_pool_t *state_pool;
> +
> +#ifdef SVN_UNICODE_NORMALIZATION_FIXES
> +  /* Buffers for SQLite extensoins. */
> +  svn_membuf_t sqlext_buf1;
> +  svn_membuf_t sqlext_buf2;
> +  svn_membuf_t sqlext_buf3;
> +#endif /* SVN_UNICODE_NORMALIZATION_FIXES */
>  };
>
>  struct svn_sqlite__stmt_t
> @@ -873,6 +892,99 @@ close_apr(void *data)
>    return APR_SUCCESS;
>  }
>
> +#ifdef SVN_UNICODE_NORMALIZATION_FIXES
> +/* Unicode normalizing collation for WC paths */
> +static int
> +collate_ucs_nfd(void *baton,
> +                int len1, const void *key1,
> +                int len2, const void *key2)
> +{
> +  svn_sqlite__db_t *db = baton;
> +  int result;
> +
> +  if (svn_utf__normcmp(key1, len1, key2, len2,
> +                       &db->sqlext_buf1, &db->sqlext_buf2, &result))
> +    {
> +      /* There is really nothing we can do here if an error occurs
> +         during Unicode normalizetion, and attempting to recover could
> +         result in the wc.db index being corrupted. Presumably this
> +         can only happen if the index already contains invalid UTF-8
> +         strings, which should never happen in any case ... */
> +      SVN_ERR_MALFUNCTION_NO_RETURN();
> +    }
> +
> +  return result;
> +}
> +
> +static void
> +glob_like_ucs_nfd_common(sqlite3_context *context,
> +                         int argc, sqlite3_value **argv,
> +                         svn_boolean_t sql_like)
> +{
> +  svn_sqlite__db_t *const db = sqlite3_user_data(context);
> +
> +  const char *const pattern = (void*)sqlite3_value_text(argv[0]);
> +  const apr_size_t pattern_len = sqlite3_value_bytes(argv[0]);
> +  const char *const string = (void*)sqlite3_value_text(argv[1]);
> +  const apr_size_t string_len = sqlite3_value_bytes(argv[1]);
> +
> +  const char *escape = NULL;
> +  apr_size_t escape_len = 0;
> +
> +  svn_boolean_t match;
> +  svn_error_t *err;
> +
> +  if (pattern_len > SQLITE_MAX_LIKE_PATTERN_LENGTH)
> +    {
> +      sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
> +      return;
> +    }
> +
> +  if (argc == 3 && sql_like)
> +    {
> +      escape = (void*)sqlite3_value_text(argv[2]);
> +      escape_len = sqlite3_value_bytes(argv[2]);
> +    }
> +
> +  if (pattern && string)
> +    {
> +      err = svn_utf__glob(pattern, pattern_len, string, string_len,
> +                          escape, escape_len, sql_like,
> +                          &db->sqlext_buf1, &db->sqlext_buf2, &db->sqlext_buf3,
> +                          &match);
> +
> +      if (err)
> +        {
> +          const char *errmsg;
> +          svn_membuf__ensure(&db->sqlext_buf1, 512);
> +          errmsg = svn_err_best_message(err,
> +                                        db->sqlext_buf1.data,
> +                                        db->sqlext_buf1.size - 1);
> +          svn_error_clear(err);
> +          sqlite3_result_error(context, errmsg, -1);
> +          return;
> +        }
> +
> +      sqlite3_result_int(context, match);
> +    }
> +}
> +
> +/* Unicode normalizing implementation of GLOB */
> +static void
> +glob_ucs_nfd(sqlite3_context *context,
> +             int argc, sqlite3_value **argv)
> +{
> +  glob_like_ucs_nfd_common(context, argc, argv, FALSE);
> +}
> +
> +/* Unicode normalizing implementation of LIKE */
> +static void
> +like_ucs_nfd(sqlite3_context *context,
> +             int argc, sqlite3_value **argv)
> +{
> +  glob_like_ucs_nfd_common(context, argc, argv, TRUE);
> +}
> +#endif /* SVN_UNICODE_NORMALIZATION_FIXES */
>
>  svn_error_t *
>  svn_sqlite__open(svn_sqlite__db_t **db, const char *path,
> @@ -887,6 +999,28 @@ svn_sqlite__open(svn_sqlite__db_t **db,
>
>    SVN_ERR(internal_open(&(*db)->db3, path, mode, scratch_pool));
>
> +#ifdef SVN_UNICODE_NORMALIZATION_FIXES
> +  /* Create extension buffers with space for 200 UCS-4 characters. */
> +  svn_membuf__create(&(*db)->sqlext_buf1, 800, result_pool);
> +  svn_membuf__create(&(*db)->sqlext_buf2, 800, result_pool);
> +  svn_membuf__create(&(*db)->sqlext_buf3, 800, result_pool);
> +
> +  /* Register collation and LIKE and GLOB operator replacements. */
> +  SQLITE_ERR(sqlite3_create_collation((*db)->db3,
> +                                      "svn-ucs-nfd", SQLITE_UTF8,
> +                                      *db, collate_ucs_nfd),
> +             *db);
> +  SQLITE_ERR(sqlite3_create_function((*db)->db3, "glob", 2, SQLITE_UTF8,
> +                                     *db, glob_ucs_nfd, NULL, NULL),
> +             *db);
> +  SQLITE_ERR(sqlite3_create_function((*db)->db3, "like", 2, SQLITE_UTF8,
> +                                     *db, like_ucs_nfd, NULL, NULL),
> +             *db);
> +  SQLITE_ERR(sqlite3_create_function((*db)->db3, "like", 3, SQLITE_UTF8,
> +                                     *db, like_ucs_nfd, NULL, NULL),
> +             *db);
> +#endif /* SVN_UNICODE_NORMALIZATION_FIXES */
> +
>  #ifdef SQLITE3_DEBUG
>    sqlite3_trace((*db)->db3, sqlite_tracer, (*db)->db3);
>  #endif
>
> Modified: subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_subr/utf-test.c?rev=1480406&r1=1480405&r2=1480406&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/tests/libsvn_subr/utf-test.c (original)
> +++ subversion/trunk/subversion/tests/libsvn_subr/utf-test.c Wed May  8 18:56:18 2013
> @@ -294,6 +294,324 @@ test_utf_cstring_from_utf8_ex2(apr_pool_
>    return SVN_NO_ERROR;
>  }
>
> +/* Test normalization-independent UTF-8 string comparison */
> +static svn_error_t *
> +test_utf_collated_compare(apr_pool_t *pool)
> +{
> +  /* Normalized: NFC */
> +  static const char nfc[] =
> +    "\xe1\xb9\xa8"              /* S with dot above and below */
> +    "\xc5\xaf"                  /* u with ring */
> +    "\xe1\xb8\x87"              /* b with macron below */
> +    "\xe1\xb9\xbd"              /* v with tilde */
> +    "\xe1\xb8\x9d"              /* e with breve and cedilla */
> +    "\xc8\x91"                  /* r with double grave */
> +    "\xc5\xa1"                  /* s with caron */
> +    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
> +    "\xe1\xbb\x9d"              /* o with grave and hook */
> +    "\xe1\xb9\x8b";             /* n with circumflex below */
> +
> +  /* Normalized: NFD */
> +  static const char nfd[] =
> +    "S\xcc\xa3\xcc\x87"         /* S with dot above and below */
> +    "u\xcc\x8a"                 /* u with ring */
> +    "b\xcc\xb1"                 /* b with macron below */
> +    "v\xcc\x83"                 /* v with tilde */
> +    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
> +    "r\xcc\x8f"                 /* r with double grave */
> +    "s\xcc\x8c"                 /* s with caron */
> +    "i\xcc\x88\xcc\x81"         /* i with diaeresis and acute */
> +    "o\xcc\x9b\xcc\x80"         /* o with grave and hook */
> +    "n\xcc\xad";                /* n with circumflex below */
> +
> +  /* Mixed, denormalized */
> +  static const char mixup[] =
> +    "S\xcc\x87\xcc\xa3"         /* S with dot above and below */
> +    "\xc5\xaf"                  /* u with ring */
> +    "b\xcc\xb1"                 /* b with macron below */
> +    "\xe1\xb9\xbd"              /* v with tilde */
> +    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
> +    "\xc8\x91"                  /* r with double grave */
> +    "s\xcc\x8c"                 /* s with caron */
> +    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
> +    "o\xcc\x80\xcc\x9b"         /* o with grave and hook */
> +    "\xe1\xb9\x8b";             /* n with circumflex below */
> +
> +  static const char longer[] =
> +    "\xe1\xb9\xa8"              /* S with dot above and below */
> +    "\xc5\xaf"                  /* u with ring */
> +    "\xe1\xb8\x87"              /* b with macron below */
> +    "\xe1\xb9\xbd"              /* v with tilde */
> +    "\xe1\xb8\x9d"              /* e with breve and cedilla */
> +    "\xc8\x91"                  /* r with double grave */
> +    "\xc5\xa1"                  /* s with caron */
> +    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
> +    "\xe1\xbb\x9d"              /* o with grave and hook */
> +    "\xe1\xb9\x8b"              /* n with circumflex below */
> +    "X";
> +
> +  static const char shorter[] =
> +    "\xe1\xb9\xa8"              /* S with dot above and below */
> +    "\xc5\xaf"                  /* u with ring */
> +    "\xe1\xb8\x87"              /* b with macron below */
> +    "\xe1\xb9\xbd"              /* v with tilde */
> +    "\xe1\xb8\x9d"              /* e with breve and cedilla */
> +    "\xc8\x91"                  /* r with double grave */
> +    "\xc5\xa1"                  /* s with caron */
> +    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
> +    "\xe1\xbb\x9d";             /* o with grave and hook */
> +
> +  static const char lowcase[] =
> +    "s\xcc\x87\xcc\xa3"         /* s with dot above and below */
> +    "\xc5\xaf"                  /* u with ring */
> +    "b\xcc\xb1"                 /* b with macron below */
> +    "\xe1\xb9\xbd"              /* v with tilde */
> +    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
> +    "\xc8\x91"                  /* r with double grave */
> +    "s\xcc\x8c"                 /* s with caron */
> +    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
> +    "o\xcc\x80\xcc\x9b"         /* o with grave and hook */
> +    "\xe1\xb9\x8b";             /* n with circumflex below */
> +
> +  static const struct utfcmp_test_t {
> +    const char *stra;
> +    char op;
> +    const char *strb;
> +    const char *taga;
> +    const char *tagb;
> +  } utfcmp_tests[] = {
> +    /* Empty key */
> +    {"",  '=', "",  "empty",    "empty"},
> +    {"",  '<', "a", "empty",    "nonempty"},
> +    {"a", '>', "",  "nonempty", "empty"},
> +
> +    /* Deterministic ordering */
> +    {"a", '<', "b", "a", "b"},
> +    {"b", '<', "c", "b", "c"},
> +    {"a", '<', "c", "a", "c"},
> +
> +    /* Normalized equality */
> +    {nfc,   '=', nfd,    "nfc",   "nfd"},
> +    {nfd,   '=', nfc,    "nfd",   "nfc"},
> +    {nfc,   '=', mixup,  "nfc",   "mixup"},
> +    {nfd,   '=', mixup,  "nfd",   "mixup"},
> +    {mixup, '=', nfd,    "mixup", "nfd"},
> +    {mixup, '=', nfc,    "mixup", "nfc"},
> +
> +    /* Key length */
> +    {nfc,     '<', longer,    "nfc",     "longer"},
> +    {longer,  '>', nfc,       "longer",  "nfc"},
> +    {nfd,     '>', shorter,   "nfd",     "shorter"},
> +    {shorter, '<', nfd,       "shorter", "nfd"},
> +    {mixup,   '<', lowcase,   "mixup",   "lowcase"},
> +    {lowcase, '>', mixup,     "lowcase",  "mixup"},
> +
> +    {NULL, 0, NULL, NULL, NULL}
> +  };
> +
> +
> +  svn_stringbuf_t *bufa = svn_stringbuf_create_empty(pool);
> +  svn_stringbuf_t *bufb = svn_stringbuf_create_empty(pool);
> +  const struct utfcmp_test_t *ut;
> +
> +  srand(111);
> +  for (ut = utfcmp_tests; ut->stra; ++ut)
> +    {
> +      const svn_boolean_t implicit_size = (rand() % 17) & 1;
> +      const apr_size_t lena = (implicit_size
> +                               ? SVN_UTF__UNKNOWN_LENGTH : strlen(ut->stra));
> +      const apr_size_t lenb = (implicit_size
> +                               ? SVN_UTF__UNKNOWN_LENGTH : strlen(ut->strb));
> +      int result;
> +
> +      SVN_ERR(svn_utf__normcmp(ut->stra, lena, ut->strb, lenb,
> +                               bufa, bufb, &result));
> +
> +      /* UCS-4 debugging dump of the decomposed strings
> +      {
> +        const apr_int32_t *const ucsbufa = (void*)bufa->data;
> +        const apr_int32_t *const ucsbufb = (void*)bufb->data;
> +        apr_size_t i;
> +
> +        printf("(%c)%7s %c %s\n", ut->op,
> +               ut->taga, (!result ? '=' : (result < 0 ? '<' : '>')), ut->tagb);
> +
> +        for (i = 0; i < bufa->len || i < bufb->len; ++i)
> +        {
> +          if (i < bufa->len && i < bufb->len)
> +            printf("    U+%04X   U+%04X\n", ucsbufa[i], ucsbufb[i]);
> +          else if (i < bufa->len)
> +            printf("    U+%04X\n", ucsbufa[i]);
> +          else
> +            printf("             U+%04X\n", ucsbufb[i]);
> +        }
> +      }
> +      */
> +
> +      if (('=' == ut->op && 0 != result)
> +          || ('<' == ut->op && 0 <= result)
> +          || ('>' == ut->op && 0 >= result))
> +        {
> +          return svn_error_createf
> +            (SVN_ERR_TEST_FAILED, NULL,
> +             "Ut->Op '%s' %c '%s' but '%s' %c '%s'",
> +             ut->taga, ut->op, ut->tagb,
> +             ut->taga, (!result ? '=' : (result < 0 ? '<' : '>')), ut->tagb);
> +        }
> +    }
> +
> +  return SVN_NO_ERROR;
> +}
> +
> +
> +
> +static svn_error_t *
> +test_utf_pattern_match(apr_pool_t *pool)
> +{
> +  static const struct glob_test_t {
> +    svn_boolean_t sql_like;
> +    svn_boolean_t matches;
> +    const char *pattern;
> +    const char *string;
> +    const char *escape;
> +  } glob_tests[] = {
> +#define LIKE_MATCH TRUE, TRUE
> +#define LIKE_FAIL  TRUE, FALSE
> +#define GLOB_MATCH FALSE, TRUE
> +#define GLOB_FAIL  FALSE, FALSE
> +
> +    {LIKE_FAIL,  "",     "test", NULL},
> +    {GLOB_FAIL,  "",     "test", NULL},
> +    {LIKE_FAIL,  "",     "%",    NULL},
> +    {GLOB_FAIL,  "",     "*",    NULL},
> +    {LIKE_FAIL,  "test", "%",    NULL},
> +    {GLOB_FAIL,  "test", "*",    NULL},
> +    {LIKE_MATCH, "test", "test", NULL},
> +    {GLOB_MATCH, "test", "test", NULL},
> +    {LIKE_MATCH, "t\xe1\xb8\x9dst", "te\xcc\xa7\xcc\x86st", NULL},
> +    {GLOB_MATCH, "te\xcc\xa7\xcc\x86st", "t\xe1\xb8\x9dst", NULL},
> +
> +    {LIKE_FAIL,  "test", "test", "\xe1\xb8\x9d"}, /* escape char not ascii */
> +
> +    {LIKE_MATCH, "te#st",    "test",   "#"},
> +    {LIKE_FAIL,  "te#st",    "test",   NULL},
> +    {GLOB_MATCH, "te\\st",   "test",   NULL},
> +    {LIKE_MATCH, "te##st",   "te#st",  "#"},
> +    {LIKE_FAIL,  "te##st",   "te#st",  NULL},
> +    {GLOB_MATCH, "te\\\\st", "te\\st", NULL},
> +    {GLOB_FAIL,  "te\\\\st", "te\\st", "\\"}, /* escape char with glob */
> +    {LIKE_FAIL,  "te#%t",    "te%t",   NULL},
> +    {LIKE_MATCH, "te#%t",    "te%t",   "#"},
> +    {GLOB_MATCH, "te\\*t",   "te*t",   NULL},
> +    {LIKE_FAIL,  "te#%t",    "test",   NULL},
> +    {GLOB_FAIL,  "te\\*t",   "test",   NULL},
> +    {LIKE_FAIL,  "te#_t",    "te_t",   NULL},
> +    {LIKE_MATCH, "te#_t",    "te_t",   "#"},
> +    {GLOB_MATCH, "te\\?t",   "te?t",   NULL},
> +    {LIKE_FAIL,  "te#_t",    "test",   NULL},
> +    {LIKE_FAIL,  "te#_t",    "test",   "#"},
> +    {GLOB_FAIL,  "te\\?t",   "test",   NULL},
> +
> +    {LIKE_MATCH, "_est",     "test",   NULL},
> +    {GLOB_MATCH, "?est",     "test",   NULL},
> +    {LIKE_MATCH, "te_t",     "test",   NULL},
> +    {GLOB_MATCH, "te?t",     "test",   NULL},
> +    {LIKE_MATCH, "tes_",     "test",   NULL},
> +    {GLOB_MATCH, "tes?",     "test",   NULL},
> +    {LIKE_FAIL,  "test_",    "test",   NULL},
> +    {GLOB_FAIL,  "test?",    "test",   NULL},
> +
> +    {LIKE_MATCH, "[s%n]",   "[subversion]", NULL},
> +    {GLOB_FAIL,  "[s*n]",   "[subversion]", NULL},
> +    {LIKE_MATCH, "#[s%n]",  "[subversion]", "#"},
> +    {GLOB_MATCH, "\\[s*n]", "[subversion]", NULL},
> +
> +    {GLOB_MATCH, ".[\\-\\t]", ".t",           NULL},
> +    {GLOB_MATCH, "test*?*[a-z]*", "testgoop", NULL},
> +    {GLOB_MATCH, "te[^x]t", "test",           NULL},
> +    {GLOB_MATCH, "te[^abc]t", "test",         NULL},
> +    {GLOB_MATCH, "te[^x]t", "test",           NULL},
> +    {GLOB_MATCH, "te[!x]t", "test",           NULL},
> +    {GLOB_FAIL,  "te[^x]t", "text",           NULL},
> +    {GLOB_FAIL,  "te[^\\x]t", "text",         NULL},
> +    {GLOB_FAIL,  "te[^x\\", "text",           NULL},
> +    {GLOB_FAIL,  "te[/]t", "text",            NULL},
> +    {GLOB_MATCH, "te[r-t]t", "test",          NULL},
> +    {GLOB_MATCH, "te[r-Tz]t", "tezt",         NULL},
> +    {GLOB_FAIL,  "te[R-T]t", "tent",          NULL},
> +/*  {GLOB_MATCH, "tes[]t]", "test",           NULL}, */
> +    {GLOB_MATCH, "tes[t-]", "test",           NULL},
> +    {GLOB_MATCH, "tes[t-]]", "test]",         NULL},
> +    {GLOB_FAIL,  "tes[t-]]", "test",          NULL},
> +    {GLOB_FAIL,  "tes[u-]", "test",           NULL},
> +    {GLOB_FAIL,  "tes[t-]", "tes[t-]",        NULL},
> +    {GLOB_MATCH, "test[/-/]", "test/",        NULL},
> +    {GLOB_MATCH, "test[\\/-/]", "test/",      NULL},
> +    {GLOB_MATCH, "test[/-\\/]", "test/",      NULL},
> +
> +#undef LIKE_MATCH
> +#undef LIKE_FAIL
> +#undef GLOB_MATCH
> +#undef GLOB_FAIL
> +
> +    {FALSE, FALSE, NULL, NULL, NULL}
> +  };
> +
> +  svn_stringbuf_t *bufa = svn_stringbuf_create_empty(pool);
> +  svn_stringbuf_t *bufb = svn_stringbuf_create_empty(pool);
> +  svn_stringbuf_t *bufc = svn_stringbuf_create_empty(pool);
> +  const struct glob_test_t *gt;
> +
> +  srand(79);
> +  for (gt = glob_tests; gt->pattern; ++gt)
> +    {
> +      const svn_boolean_t implicit_size = (rand() % 13) & 1;
> +      const apr_size_t lenptn = (implicit_size
> +                                 ? SVN_UTF__UNKNOWN_LENGTH
> +                                 : strlen(gt->pattern));
> +      const apr_size_t lenstr = (implicit_size
> +                                 ? SVN_UTF__UNKNOWN_LENGTH
> +                                 : strlen(gt->string));
> +      const apr_size_t lenesc = (implicit_size
> +                                 ? SVN_UTF__UNKNOWN_LENGTH
> +                                 : (gt->escape ? strlen(gt->escape) : 0));
> +      svn_boolean_t match;
> +      svn_error_t *err;
> +
> +
> +      err = svn_utf__glob(gt->pattern, lenptn,
> +                          gt->string, lenstr,
> +                          gt->escape, lenesc,
> +                          gt->sql_like, bufa, bufb, bufc, &match);
> +
> +      if (!gt->sql_like && gt->escape && !err)
> +        return svn_error_create
> +          (SVN_ERR_TEST_FAILED, err, "Failed to detect GLOB ESCAPE");
> +
> +      if ((err && gt->matches)
> +          || (!err && !match != !gt->matches))
> +        {
> +          if (gt->sql_like)
> +            return svn_error_createf
> +              (SVN_ERR_TEST_FAILED, err,
> +               "Wrong result: %s'%s' LIKE '%s'%s%s%s%s",
> +               (gt->matches ? "NOT " : ""), gt->string, gt->pattern,
> +               (gt->escape ? " ESCAPE " : ""), (gt->escape ? "'" : ""),
> +               (gt->escape ? gt->escape : ""), (gt->escape ? "'" : ""));
> +          else
> +            return svn_error_createf
> +              (SVN_ERR_TEST_FAILED, err, "Wrong result: %s%s GLOB %s",
> +               (gt->matches ? "NOT " : ""), gt->string, gt->pattern);
> +        }
> +
> +      if (err)
> +        svn_error_clear(err);
> +    }
> +
> +  return SVN_NO_ERROR;
> +}
> +
> +
>
>  /* The test table.  */
>
> @@ -308,5 +626,9 @@ struct svn_test_descriptor_t test_funcs[
>                     "test svn_utf_cstring_to_utf8_ex2"),
>      SVN_TEST_PASS2(test_utf_cstring_from_utf8_ex2,
>                     "test svn_utf_cstring_from_utf8_ex2"),
> +    SVN_TEST_PASS2(test_utf_collated_compare,
> +                   "test svn_utf__normcmp"),
> +    SVN_TEST_PASS2(test_utf_pattern_match,
> +                   "test svn_utf__match"),
>      SVN_TEST_NULL
>    };
>
>
Received on 2013-05-14 03:54:09 CEST
Contemporary messages sorted: [ by date ] [ by thread ] [ by subject ] [ by author ] [ by messages with attachments ]