[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: svn commit: r38202 - trunk/subversion/libsvn_wc

From: Hyrum K. Wright <hyrum_at_hyrumwright.org>
Date: Thu, 25 Jun 2009 23:19:43 -0500

On Jun 25, 2009, at 6:53 PM, Greg Stein wrote:

> Woah!
>
> Still catching up here, but I don't think you should be using LIKE in
> *any* SQL statement. That is a yellow flag right there, for more
> investigation. For the most part, it says "do a full table scan". Bad
> bad bad.

Agreed that full table scans are bad. Looking at the SQLite docs, a
LIKE does *not* guarantee a full table scan--the optimizer will turn
"bar like 'foo%'" into something like "bar < 'foozzzz' and bar >
'foo'" or some such, and then filter the table using the index. But
you are right in saying that LIKE clauses are not friendly, generally,
and should be avoided. If we can find a way to, I'd be happy to lose
this one as well.

> Looking at the code, I think what you really want is to examine
> parent_relpath, NOT do a LIKE clause.

The real problem here is getting the transitive closure on all the
nodes which have a given parent. So while using parent_relpath would
find all the immediate children, we'd need to continue to recurse to
nail every potential child node. That is, unless not grasping an
important bit of our repository information inheritance scheme.

-Hyrum

> On Thu, Jun 25, 2009 at 21:02, Hyrum K. Wright
> <hyrum_at_hyrumwright.org> wrote:
>>
>> Author: hwright
>> Date: Thu Jun 25 12:02:05 2009
>> New Revision: 38202
>>
>> Log:
>> Simplify a little bit of the relocate sql updating logic. Also,
>> properly
>> escape URLs being passed into a 'like' clause.
>>
>> * subversion/libsvn_wc/wc_db.c
>> (statement_keys, statements): Remove and refresh a few statements.
>> (LIKE_ESCAPE_CHAR): New.
>> (escape_sqlite_like): New.
>> (relocate_txn): Use the improved SQL statements, and be sure to
>> escape
>> URLs used in like clauses.
>>
>> Modified:
>> trunk/subversion/libsvn_wc/wc_db.c
>>
>> Modified: trunk/subversion/libsvn_wc/wc_db.c
>> URL: http://svn.collab.net/viewvc/svn/trunk/subversion/libsvn_wc/wc_db.c?pathrev=38202&r1=38201&r2=38202
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- trunk/subversion/libsvn_wc/wc_db.c Thu Jun 25 08:17:12
>> 2009 (r38201)
>> +++ trunk/subversion/libsvn_wc/wc_db.c Thu Jun 25 12:02:05
>> 2009 (r38202)
>> @@ -190,13 +190,15 @@ enum statement_keys {
>> STMT_SELECT_DELETION_INFO,
>> STMT_SELECT_PARENT_STUB_INFO,
>> STMT_DELETE_LOCK,
>> - STMT_UPDATE_BASE_REPO,
>> - STMT_UPDATE_BASE_CHILDREN_REPO,
>> - STMT_UPDATE_WORKING_COPYFROM_REPO,
>> - STMT_UPDATE_WORKING_CHILDREN_COPYFROM_REPO,
>> + STMT_UPDATE_BASE_RECURSIVE_REPO,
>> + STMT_UPDATE_WORKING_RECURSIVE_COPYFROM_REPO,
>> STMT_UPDATE_LOCK_REPOS_ID
>> };
>>
>> +/* This is a character used to escape itself and the globbing
>> character in
>> + globbing sql expressions below. See escape_sqlite_glob(). */
>> +#define LIKE_ESCAPE_CHAR "#"
>> +
>> static const char * const statements[] = {
>> /* STMT_SELECT_BASE_NODE */
>> "select wc_id, local_relpath, repos_id, repos_relpath, "
>> @@ -332,26 +334,23 @@ static const char * const statements[] =
>> "delete from lock "
>> "where repos_id = ?1 and repos_relpath = ?2;",
>>
>> - /* STMT_UPDATE_BASE_REPO */
>> - "update base_node set repos_id = ?3 "
>> - "where wc_id = ?1 and local_relpath = ?2;",
>> -
>> - /* STMT_UPDATE_BASE_CHILDREN_REPO */
>> - "update base_node set repos_id = ?3 "
>> - "where repos_id is not null and wc_id = ?1 and parent_relpath
>> like ?2;",
>> -
>> - /* STMT_UPDATE_WORKING_COPYFROM_REPO */
>> - "update working_node set copyfrom_repos_id = ?3 "
>> - "where copyfrom_repos_id is not null and wc_id = ?1 and
>> local_relpath = ?2;",
>> -
>> - /* STMT_UPDATE_WORKING_CHILDREN_COPYFROM_REPO */
>> - "update working_node set copyfrom_repos_id = ?3 "
>> - "where copyfrom_repos_id is not null and wc_id = ?1 "
>> - " and parent_relpath like ?2;",
>> + /* STMT_UPDATE_BASE_RECURSIVE_REPO */
>> + "update base_node set repos_id = ?4 "
>> + "where repos_id is not null and wc_id = ?1 and "
>> + " (local_relpath = ?2 or "
>> + " local_relpath like ?3 escape '" LIKE_ESCAPE_CHAR "');",
>> +
>> + /* STMT_UPDATE_WORKING_RECURSIVE_COPYFROM_REPO */
>> + "update working_node set copyfrom_repos_id = ?4 "
>> + "where copyfrom_repos_id is not null and wc_id = ?1 and "
>> + " (local_relpath = ?2 or "
>> + " local_relpath like ?3 escape '" LIKE_ESCAPE_CHAR "');",
>>
>> /* STMT_UPDATE_LOCK_REPOS_ID */
>> - "update lock set repos_id = ?3 "
>> - "where repos_id = ?1 and repos_relpath like ?2;",
>> + "update lock set repos_id = ?4 "
>> + "where repos_id = ?1 and "
>> + " (repos_relpath = ?2 or "
>> + " repos_relpath like ?3 escape '" LIKE_ESCAPE_CHAR "');",
>>
>> NULL
>> };
>> @@ -485,6 +484,42 @@ get_translated_size(svn_sqlite__stmt_t *
>> return svn_sqlite__column_int64(stmt, slot);
>> }
>>
>> +static const char *
>> +escape_sqlite_like(const char * const str, apr_pool_t *result_pool)
>> +{
>> + char *result;
>> + const char *old_ptr;
>> + char *new_ptr;
>> + int len = 0;
>> +
>> + /* Count the number of extra characters we'll need in the
>> escaped string.
>> + We could just use the worst case (double) value, but we'd
>> still need to
>> + iterate over the string to get it's length. So why not do
>> something
>> + useful why iterating over it, and save some memory at the
>> same time? */
>> + for (old_ptr = str; *old_ptr; ++old_ptr)
>> + {
>> + len++;
>> + if (*old_ptr == '%'
>> + || *old_ptr == '_'
>> + || *old_ptr == LIKE_ESCAPE_CHAR[0])
>> + len++;
>> + }
>> +
>> + result = apr_pcalloc(result_pool, len + 1);
>> +
>> + /* Now do the escaping. */
>> + for (old_ptr = str, new_ptr = result; *old_ptr; ++old_ptr, +
>> +new_ptr)
>> + {
>> + if (*old_ptr == '%'
>> + || *old_ptr == '_'
>> + || *old_ptr == LIKE_ESCAPE_CHAR[0])
>> + *(new_ptr++) = LIKE_ESCAPE_CHAR[0];
>> + *new_ptr = *old_ptr;
>> + }
>> +
>> + return result;
>> +}
>> +
>>
>> /* Construct a new wcroot_t. The WCROOT_ABSPATH and SDB parameters
>> must
>> have lifetime of at least RESULT_POOL. */
>> @@ -3083,6 +3118,7 @@ static svn_error_t *
>> relocate_txn(void *baton, svn_sqlite__db_t *sdb)
>> {
>> struct relocate_baton *rb = baton;
>> + const char *like_arg;
>> apr_pool_t *scratch_pool = rb->scratch_pool;
>> svn_sqlite__stmt_t *stmt;
>> apr_int64_t new_repos_id;
>> @@ -3098,51 +3134,41 @@ relocate_txn(void *baton, svn_sqlite__db
>> SVN_ERR(create_repos_id(&new_repos_id, rb->repos_root_url, rb-
>> >repos_uuid,
>> sdb, scratch_pool));
>>
>> - if (rb->have_base_node)
>> - {
>> - /* Update the BASE_NODE.repos_id. */
>> - SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
>> STMT_UPDATE_BASE_REPO));
>> - SVN_ERR(svn_sqlite__bindf(stmt, "isi", rb->wc_id, rb-
>> >local_relpath,
>> - new_repos_id));
>> -
>> - SVN_ERR(svn_sqlite__step_done(stmt));
>> - }
>> -
>> - /* Update a non-NULL WORKING_NODE.copyfrom_repos_id. */
>> - SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
>> -
>> STMT_UPDATE_WORKING_COPYFROM_REPO));
>> - SVN_ERR(svn_sqlite__bindf(stmt, "isi", rb->wc_id, rb-
>> >local_relpath,
>> - new_repos_id));
>> - SVN_ERR(svn_sqlite__step_done(stmt));
>> + if (rb->local_relpath[0] == 0)
>> + like_arg = "%";
>> + else
>> + like_arg = apr_psprintf(scratch_pool, "%s/%%",
>> + escape_sqlite_like(rb->local_relpath,
>> scratch_pool));
>>
>> - /* Update and child working nodes. */
>> + /* Update non-NULL WORKING_NODE.copyfrom_repos_id. */
>> SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
>> -
>> STMT_UPDATE_WORKING_CHILDREN_COPYFROM_REPO));
>> - SVN_ERR(svn_sqlite__bindf(stmt, "isi", rb->wc_id,
>> - apr_psprintf(scratch_pool, "%s/%%",
>> - rb->local_relpath),
>> - new_repos_id));
>> +
>> STMT_UPDATE_WORKING_RECURSIVE_COPYFROM_REPO));
>> + SVN_ERR(svn_sqlite__bindf(stmt, "issi", rb->wc_id, rb-
>> >local_relpath,
>> + like_arg, new_repos_id));
>> SVN_ERR(svn_sqlite__step_done(stmt));
>>
>> /* Do a bunch of stuff which is conditional on us actually having a
>> base_node in the first place. */
>> if (rb->have_base_node)
>> {
>> - /* Update any children which have non-NULL repos_id's */
>> + /* Update any BASE which have non-NULL repos_id's */
>> SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
>> -
>> STMT_UPDATE_BASE_CHILDREN_REPO));
>> - SVN_ERR(svn_sqlite__bindf(stmt, "isi", rb->wc_id, rb-
>> >local_relpath,
>> - new_repos_id));
>> +
>> STMT_UPDATE_BASE_RECURSIVE_REPO));
>> + SVN_ERR(svn_sqlite__bindf(stmt, "issi", rb->wc_id, rb-
>> >local_relpath,
>> + like_arg, new_repos_id));
>> SVN_ERR(svn_sqlite__step_done(stmt));
>>
>> /* Update any locks for the root or its children. */
>> + if (rb->repos_relpath[0] == 0)
>> + like_arg = "%";
>> + else
>> + like_arg = apr_psprintf(scratch_pool, "%s/%%",
>> + escape_sqlite_like(rb->repos_relpath,
>> scratch_pool));
>> +
>> SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
>> STMT_UPDATE_LOCK_REPOS_ID));
>> - SVN_ERR(svn_sqlite__bindf(stmt, "isi",
>> - rb->old_repos_id,
>> - apr_psprintf(scratch_pool, "%s/%%",
>> - rb->repos_relpath),
>> - new_repos_id));
>> + SVN_ERR(svn_sqlite__bindf(stmt, "issi", rb->old_repos_id,
>> + rb->repos_relpath, like_arg,
>> new_repos_id));
>> SVN_ERR(svn_sqlite__step_done(stmt));
>> }
>>
>> ------------------------------------------------------
>> http://subversion.tigris.org/ds/viewMessage.do?dsForumId=495&dsMessageId=2365439
>
> ------------------------------------------------------
> http://subversion.tigris.org/ds/viewMessage.do?dsForumId=462&dsMessageId=2365537

------------------------------------------------------
http://subversion.tigris.org/ds/viewMessage.do?dsForumId=462&dsMessageId=2365587
Received on 2009-06-26 06:20:02 CEST

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.