[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: svn commit: r952493 - in /subversion/trunk/subversion/libsvn_wc: wc-queries.sql wc_db.c

From: Greg Stein <gstein_at_gmail.com>
Date: Mon, 7 Jun 2010 20:53:55 -0400

Hey Paul,

Can you try this change on your large-file-count working copies? I
believe this should fix the performance problems you were seeing.

Cheers,
-g

On Mon, Jun 7, 2010 at 20:47, <gstein_at_apache.org> wrote:
> Author: gstein
> Date: Tue Jun  8 00:47:22 2010
> New Revision: 952493
>
> URL: http://svn.apache.org/viewvc?rev=952493&view=rev
> Log:
> The query that we used to fetch all children in BASE_NODE and WORKING_NODE
> used a UNION between two SELECT statements. The idea was to have SQLite
> remove all duplicates for us in a single query. Unfortunately, this caused
> SQLite to create an ephemeral (temporary) table and place the results of
> each query into that table. It created an index to remove dupliates. Then
> it returned the values in that ephemeral table. For large numbers of
> nodes, the construction of the table and its index becomes very costly.
>
> This change rebuilds gather_children() in wc_db.c to do the duplicate
> removal manually using a hash table. It does some simple scanning straight
> into an array when it knows duplicates cannot exist (one of BASE or
> WORKING is empty).
>
> The performance problem of svn_wc__db_read_children() was first observed
> in issue #3499. The actual performance improvement is untested so far, but
> I'm assuming pburba can pick up this change and try in his scenario.
>
> * subversion/libsvn_wc/wc_db.c:
>  (count_children): new helper to count the number of children of a given
>    PARENT_RELPATH within a specific table.
>  (add_children_to_hash): new helper to scan children, placing their names
>    into a hash table as keys (and the mapped values).
>  (union_children): new helper to scan both BASE_NODE and WORKING_NODE and
>    manually create a union of the resulting names using a hash table.
>  (single_table_children): new helper to return the children from a single
>    table.
>  (gather_children): rebuilt in terms of the above helpers
>
> * subversion/libsvn_wc/wc-queries.sql:
>  (STMT_COUNT_BASE_NODE_CHILDREN, STMT_COUNT_WORKING_NODE_CHILDREN): new
>    statements to count the number of children for a given parent_relpath
>  (STMT_SELECT_WORKING_CHILDREN): removed
>  (STMT_SELECT_WORKING_NODE_CHILDREN): like STMT_SELECT_BASE_NODE_CHILDREN,
>    this scan all children in WORKING_NODE for a given parent_relpath
>
> Modified:
>    subversion/trunk/subversion/libsvn_wc/wc-queries.sql
>    subversion/trunk/subversion/libsvn_wc/wc_db.c
>
> Modified: subversion/trunk/subversion/libsvn_wc/wc-queries.sql
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc-queries.sql?rev=952493&r1=952492&r2=952493&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_wc/wc-queries.sql (original)
> +++ subversion/trunk/subversion/libsvn_wc/wc-queries.sql Tue Jun  8 00:47:22 2010
> @@ -87,16 +87,21 @@ INSERT OR IGNORE INTO WORKING_NODE (
>   wc_id, local_relpath, parent_relpath, presence, kind)
>  VALUES (?1, ?2, ?3, 'incomplete', 'unknown');
>
> +-- STMT_COUNT_BASE_NODE_CHILDREN
> +SELECT COUNT(*) FROM BASE_NODE
> +WHERE wc_id = ?1 AND parent_relpath = ?2;
> +
> +-- STMT_COUNT_WORKING_NODE_CHILDREN
> +SELECT COUNT(*) FROM WORKING_NODE
> +WHERE wc_id = ?1 AND parent_relpath = ?2;
> +
>  -- STMT_SELECT_BASE_NODE_CHILDREN
>  select local_relpath from base_node
>  where wc_id = ?1 and parent_relpath = ?2;
>
> --- STMT_SELECT_WORKING_CHILDREN
> -select local_relpath from base_node
> -where wc_id = ?1 and parent_relpath = ?2
> -union
> -select local_relpath from working_node
> -where wc_id = ?1 and parent_relpath = ?2;
> +-- STMT_SELECT_WORKING_NODE_CHILDREN
> +SELECT local_relpath FROM WORKING_NODE
> +WHERE wc_id = ?1 AND parent_relpath = ?2;
>
>  -- STMT_SELECT_WORKING_IS_FILE
>  select kind == 'file' from working_node
>
> Modified: subversion/trunk/subversion/libsvn_wc/wc_db.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db.c?rev=952493&r1=952492&r2=952493&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_wc/wc_db.c (original)
> +++ subversion/trunk/subversion/libsvn_wc/wc_db.c Tue Jun  8 00:47:22 2010
> @@ -813,40 +813,97 @@ insert_working_node(void *baton,
>  }
>
>
> -/* */
>  static svn_error_t *
> -gather_children(const apr_array_header_t **children,
> -                svn_boolean_t base_only,
> -                svn_wc__db_t *db,
> -                const char *local_abspath,
> -                apr_pool_t *result_pool,
> -                apr_pool_t *scratch_pool)
> +count_children(int *count,
> +               int stmt_idx,
> +               svn_sqlite__db_t *sdb,
> +               apr_int64_t wc_id,
> +               const char *parent_relpath)
> +{
> +  svn_sqlite__stmt_t *stmt;
> +
> +  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
> +  SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
> +  SVN_ERR(svn_sqlite__step_row(stmt));
> +  *count = svn_sqlite__column_int(stmt, 0);
> +  return svn_error_return(svn_sqlite__reset(stmt));
> +}
> +
> +
> +/* Each name is allocated in RESULT_POOL and stored into CHILDREN as a key
> +   pointed to the same name.  */
> +static svn_error_t *
> +add_children_to_hash(apr_hash_t *children,
> +                     int stmt_idx,
> +                     svn_sqlite__db_t *sdb,
> +                     apr_int64_t wc_id,
> +                     const char *parent_relpath,
> +                     apr_pool_t *result_pool)
>  {
> -  svn_wc__db_pdh_t *pdh;
> -  const char *local_relpath;
>   svn_sqlite__stmt_t *stmt;
> -  apr_array_header_t *child_names;
>   svn_boolean_t have_row;
>
> -  SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath));
> +  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
> +  SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
> +  SVN_ERR(svn_sqlite__step(&have_row, stmt));
> +  while (have_row)
> +    {
> +      const char *child_relpath = svn_sqlite__column_text(stmt, 0, NULL);
> +      const char *name = svn_relpath_basename(child_relpath, result_pool);
>
> -  SVN_ERR(svn_wc__db_pdh_parse_local_abspath(&pdh, &local_relpath, db,
> -                              local_abspath, svn_sqlite__mode_readonly,
> -                              scratch_pool, scratch_pool));
> -  VERIFY_USABLE_PDH(pdh);
> +      apr_hash_set(children, name, APR_HASH_KEY_STRING, name);
>
> -  SVN_ERR(svn_sqlite__get_statement(&stmt, pdh->wcroot->sdb,
> -                                    base_only
> -                                      ? STMT_SELECT_BASE_NODE_CHILDREN
> -                                      : STMT_SELECT_WORKING_CHILDREN));
> -  SVN_ERR(svn_sqlite__bindf(stmt, "is", pdh->wcroot->wc_id, local_relpath));
> +      SVN_ERR(svn_sqlite__step(&have_row, stmt));
> +    }
> +
> +  return svn_sqlite__reset(stmt);
> +}
> +
> +
> +static svn_error_t *
> +union_children(const apr_array_header_t **children,
> +               svn_sqlite__db_t *sdb,
> +               apr_int64_t wc_id,
> +               const char *parent_relpath,
> +               apr_pool_t *result_pool,
> +               apr_pool_t *scratch_pool)
> +{
> +  /* ### it would be nice to pre-size this hash table.  */
> +  apr_hash_t *names = apr_hash_make(scratch_pool);
> +  apr_array_header_t *names_array;
> +
> +  /* All of the names get allocated in RESULT_POOL.  */
> +  SVN_ERR(add_children_to_hash(names, STMT_SELECT_BASE_NODE_CHILDREN,
> +                               sdb, wc_id, parent_relpath, result_pool));
> +  SVN_ERR(add_children_to_hash(names, STMT_SELECT_WORKING_NODE_CHILDREN,
> +                               sdb, wc_id, parent_relpath, result_pool));
> +
> +  SVN_ERR(svn_hash_keys(&names_array, names, result_pool));
> +  *children = names_array;
> +
> +  return SVN_NO_ERROR;
> +}
> +
> +
> +static svn_error_t *
> +single_table_children(const apr_array_header_t **children,
> +                      int stmt_idx,
> +                      int start_size,
> +                      svn_sqlite__db_t *sdb,
> +                      apr_int64_t wc_id,
> +                      const char *parent_relpath,
> +                      apr_pool_t *result_pool)
> +{
> +  svn_sqlite__stmt_t *stmt;
> +  apr_array_header_t *child_names;
> +  svn_boolean_t have_row;
> +
> +  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
> +  SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
>
>   /* ### should test the node to ensure it is a directory */
>
> -  /* ### 10 is based on Subversion's average of 8.5 files per versioned
> -     ### directory in its repository. maybe use a different value? or
> -     ### count rows first?  */
> -  child_names = apr_array_make(result_pool, 10, sizeof(const char *));
> +  child_names = apr_array_make(result_pool, start_size, sizeof(const char *));
>
>   SVN_ERR(svn_sqlite__step(&have_row, stmt));
>   while (have_row)
> @@ -866,6 +923,76 @@ gather_children(const apr_array_header_t
>
>
>  /* */
> +static svn_error_t *
> +gather_children(const apr_array_header_t **children,
> +                svn_boolean_t base_only,
> +                svn_wc__db_t *db,
> +                const char *local_abspath,
> +                apr_pool_t *result_pool,
> +                apr_pool_t *scratch_pool)
> +{
> +  svn_wc__db_pdh_t *pdh;
> +  const char *local_relpath;
> +  int base_count;
> +  int working_count;
> +
> +  SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath));
> +
> +  SVN_ERR(svn_wc__db_pdh_parse_local_abspath(&pdh, &local_relpath, db,
> +                                             local_abspath,
> +                                             svn_sqlite__mode_readonly,
> +                                             scratch_pool, scratch_pool));
> +  VERIFY_USABLE_PDH(pdh);
> +
> +  if (base_only)
> +    {
> +      /* 10 is based on Subversion's average of 8.7 files per versioned
> +         directory in its repository.
> +
> +         ### note "files". should redo count with subdirs included */
> +      return svn_error_return(single_table_children(
> +                                children, STMT_SELECT_BASE_NODE_CHILDREN,
> +                                10 /* start_size */,
> +                                pdh->wcroot->sdb, pdh->wcroot->wc_id,
> +                                local_relpath, result_pool));
> +    }
> +
> +  SVN_ERR(count_children(&base_count, STMT_COUNT_BASE_NODE_CHILDREN,
> +                         pdh->wcroot->sdb, pdh->wcroot->wc_id, local_relpath));
> +  SVN_ERR(count_children(&working_count, STMT_COUNT_WORKING_NODE_CHILDREN,
> +                         pdh->wcroot->sdb, pdh->wcroot->wc_id, local_relpath));
> +
> +  if (base_count == 0)
> +    {
> +      if (working_count == 0)
> +        {
> +          *children = apr_array_make(result_pool, 0, sizeof(const char *));
> +          return SVN_NO_ERROR;
> +        }
> +
> +      return svn_error_return(single_table_children(
> +                                children, STMT_SELECT_WORKING_NODE_CHILDREN,
> +                                working_count,
> +                                pdh->wcroot->sdb, pdh->wcroot->wc_id,
> +                                local_relpath, result_pool));
> +    }
> +  if (working_count == 0)
> +    return svn_error_return(single_table_children(
> +                              children, STMT_SELECT_BASE_NODE_CHILDREN,
> +                              base_count,
> +                              pdh->wcroot->sdb, pdh->wcroot->wc_id,
> +                              local_relpath, result_pool));
> +
> +  /* ### it would be nice to pass BASE_COUNT and WORKING_COUNT, but there is
> +     ### nothing union_children() can do with those.  */
> +  return svn_error_return(union_children(children,
> +                                         pdh->wcroot->sdb, pdh->wcroot->wc_id,
> +                                         local_relpath,
> +                                         result_pool, scratch_pool));
> +}
> +
> +
> +/* */
>  static void
>  flush_entries(const svn_wc__db_pdh_t *pdh)
>  {
>
>
>
Received on 2010-06-08 02:54:35 CEST

This is an archived mail posted to the Subversion Dev mailing list.