Hey Paul,
Can you try this change on your large-file-count working copies? I
believe this should fix the performance problems you were seeing.
Cheers,
-g
On Mon, Jun 7, 2010 at 20:47, <gstein_at_apache.org> wrote:
> Author: gstein
> Date: Tue Jun 8 00:47:22 2010
> New Revision: 952493
>
> URL: http://svn.apache.org/viewvc?rev=952493&view=rev
> Log:
> The query that we used to fetch all children in BASE_NODE and WORKING_NODE
> used a UNION between two SELECT statements. The idea was to have SQLite
> remove all duplicates for us in a single query. Unfortunately, this caused
> SQLite to create an ephemeral (temporary) table and place the results of
> each query into that table. It created an index to remove dupliates. Then
> it returned the values in that ephemeral table. For large numbers of
> nodes, the construction of the table and its index becomes very costly.
>
> This change rebuilds gather_children() in wc_db.c to do the duplicate
> removal manually using a hash table. It does some simple scanning straight
> into an array when it knows duplicates cannot exist (one of BASE or
> WORKING is empty).
>
> The performance problem of svn_wc__db_read_children() was first observed
> in issue #3499. The actual performance improvement is untested so far, but
> I'm assuming pburba can pick up this change and try in his scenario.
>
> * subversion/libsvn_wc/wc_db.c:
> (count_children): new helper to count the number of children of a given
> PARENT_RELPATH within a specific table.
> (add_children_to_hash): new helper to scan children, placing their names
> into a hash table as keys (and the mapped values).
> (union_children): new helper to scan both BASE_NODE and WORKING_NODE and
> manually create a union of the resulting names using a hash table.
> (single_table_children): new helper to return the children from a single
> table.
> (gather_children): rebuilt in terms of the above helpers
>
> * subversion/libsvn_wc/wc-queries.sql:
> (STMT_COUNT_BASE_NODE_CHILDREN, STMT_COUNT_WORKING_NODE_CHILDREN): new
> statements to count the number of children for a given parent_relpath
> (STMT_SELECT_WORKING_CHILDREN): removed
> (STMT_SELECT_WORKING_NODE_CHILDREN): like STMT_SELECT_BASE_NODE_CHILDREN,
> this scan all children in WORKING_NODE for a given parent_relpath
>
> Modified:
> subversion/trunk/subversion/libsvn_wc/wc-queries.sql
> subversion/trunk/subversion/libsvn_wc/wc_db.c
>
> Modified: subversion/trunk/subversion/libsvn_wc/wc-queries.sql
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc-queries.sql?rev=952493&r1=952492&r2=952493&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_wc/wc-queries.sql (original)
> +++ subversion/trunk/subversion/libsvn_wc/wc-queries.sql Tue Jun 8 00:47:22 2010
> @@ -87,16 +87,21 @@ INSERT OR IGNORE INTO WORKING_NODE (
> wc_id, local_relpath, parent_relpath, presence, kind)
> VALUES (?1, ?2, ?3, 'incomplete', 'unknown');
>
> +-- STMT_COUNT_BASE_NODE_CHILDREN
> +SELECT COUNT(*) FROM BASE_NODE
> +WHERE wc_id = ?1 AND parent_relpath = ?2;
> +
> +-- STMT_COUNT_WORKING_NODE_CHILDREN
> +SELECT COUNT(*) FROM WORKING_NODE
> +WHERE wc_id = ?1 AND parent_relpath = ?2;
> +
> -- STMT_SELECT_BASE_NODE_CHILDREN
> select local_relpath from base_node
> where wc_id = ?1 and parent_relpath = ?2;
>
> --- STMT_SELECT_WORKING_CHILDREN
> -select local_relpath from base_node
> -where wc_id = ?1 and parent_relpath = ?2
> -union
> -select local_relpath from working_node
> -where wc_id = ?1 and parent_relpath = ?2;
> +-- STMT_SELECT_WORKING_NODE_CHILDREN
> +SELECT local_relpath FROM WORKING_NODE
> +WHERE wc_id = ?1 AND parent_relpath = ?2;
>
> -- STMT_SELECT_WORKING_IS_FILE
> select kind == 'file' from working_node
>
> Modified: subversion/trunk/subversion/libsvn_wc/wc_db.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db.c?rev=952493&r1=952492&r2=952493&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_wc/wc_db.c (original)
> +++ subversion/trunk/subversion/libsvn_wc/wc_db.c Tue Jun 8 00:47:22 2010
> @@ -813,40 +813,97 @@ insert_working_node(void *baton,
> }
>
>
> -/* */
> static svn_error_t *
> -gather_children(const apr_array_header_t **children,
> - svn_boolean_t base_only,
> - svn_wc__db_t *db,
> - const char *local_abspath,
> - apr_pool_t *result_pool,
> - apr_pool_t *scratch_pool)
> +count_children(int *count,
> + int stmt_idx,
> + svn_sqlite__db_t *sdb,
> + apr_int64_t wc_id,
> + const char *parent_relpath)
> +{
> + svn_sqlite__stmt_t *stmt;
> +
> + SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
> + SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
> + SVN_ERR(svn_sqlite__step_row(stmt));
> + *count = svn_sqlite__column_int(stmt, 0);
> + return svn_error_return(svn_sqlite__reset(stmt));
> +}
> +
> +
> +/* Each name is allocated in RESULT_POOL and stored into CHILDREN as a key
> + pointed to the same name. */
> +static svn_error_t *
> +add_children_to_hash(apr_hash_t *children,
> + int stmt_idx,
> + svn_sqlite__db_t *sdb,
> + apr_int64_t wc_id,
> + const char *parent_relpath,
> + apr_pool_t *result_pool)
> {
> - svn_wc__db_pdh_t *pdh;
> - const char *local_relpath;
> svn_sqlite__stmt_t *stmt;
> - apr_array_header_t *child_names;
> svn_boolean_t have_row;
>
> - SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath));
> + SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
> + SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
> + SVN_ERR(svn_sqlite__step(&have_row, stmt));
> + while (have_row)
> + {
> + const char *child_relpath = svn_sqlite__column_text(stmt, 0, NULL);
> + const char *name = svn_relpath_basename(child_relpath, result_pool);
>
> - SVN_ERR(svn_wc__db_pdh_parse_local_abspath(&pdh, &local_relpath, db,
> - local_abspath, svn_sqlite__mode_readonly,
> - scratch_pool, scratch_pool));
> - VERIFY_USABLE_PDH(pdh);
> + apr_hash_set(children, name, APR_HASH_KEY_STRING, name);
>
> - SVN_ERR(svn_sqlite__get_statement(&stmt, pdh->wcroot->sdb,
> - base_only
> - ? STMT_SELECT_BASE_NODE_CHILDREN
> - : STMT_SELECT_WORKING_CHILDREN));
> - SVN_ERR(svn_sqlite__bindf(stmt, "is", pdh->wcroot->wc_id, local_relpath));
> + SVN_ERR(svn_sqlite__step(&have_row, stmt));
> + }
> +
> + return svn_sqlite__reset(stmt);
> +}
> +
> +
> +static svn_error_t *
> +union_children(const apr_array_header_t **children,
> + svn_sqlite__db_t *sdb,
> + apr_int64_t wc_id,
> + const char *parent_relpath,
> + apr_pool_t *result_pool,
> + apr_pool_t *scratch_pool)
> +{
> + /* ### it would be nice to pre-size this hash table. */
> + apr_hash_t *names = apr_hash_make(scratch_pool);
> + apr_array_header_t *names_array;
> +
> + /* All of the names get allocated in RESULT_POOL. */
> + SVN_ERR(add_children_to_hash(names, STMT_SELECT_BASE_NODE_CHILDREN,
> + sdb, wc_id, parent_relpath, result_pool));
> + SVN_ERR(add_children_to_hash(names, STMT_SELECT_WORKING_NODE_CHILDREN,
> + sdb, wc_id, parent_relpath, result_pool));
> +
> + SVN_ERR(svn_hash_keys(&names_array, names, result_pool));
> + *children = names_array;
> +
> + return SVN_NO_ERROR;
> +}
> +
> +
> +static svn_error_t *
> +single_table_children(const apr_array_header_t **children,
> + int stmt_idx,
> + int start_size,
> + svn_sqlite__db_t *sdb,
> + apr_int64_t wc_id,
> + const char *parent_relpath,
> + apr_pool_t *result_pool)
> +{
> + svn_sqlite__stmt_t *stmt;
> + apr_array_header_t *child_names;
> + svn_boolean_t have_row;
> +
> + SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
> + SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
>
> /* ### should test the node to ensure it is a directory */
>
> - /* ### 10 is based on Subversion's average of 8.5 files per versioned
> - ### directory in its repository. maybe use a different value? or
> - ### count rows first? */
> - child_names = apr_array_make(result_pool, 10, sizeof(const char *));
> + child_names = apr_array_make(result_pool, start_size, sizeof(const char *));
>
> SVN_ERR(svn_sqlite__step(&have_row, stmt));
> while (have_row)
> @@ -866,6 +923,76 @@ gather_children(const apr_array_header_t
>
>
> /* */
> +static svn_error_t *
> +gather_children(const apr_array_header_t **children,
> + svn_boolean_t base_only,
> + svn_wc__db_t *db,
> + const char *local_abspath,
> + apr_pool_t *result_pool,
> + apr_pool_t *scratch_pool)
> +{
> + svn_wc__db_pdh_t *pdh;
> + const char *local_relpath;
> + int base_count;
> + int working_count;
> +
> + SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath));
> +
> + SVN_ERR(svn_wc__db_pdh_parse_local_abspath(&pdh, &local_relpath, db,
> + local_abspath,
> + svn_sqlite__mode_readonly,
> + scratch_pool, scratch_pool));
> + VERIFY_USABLE_PDH(pdh);
> +
> + if (base_only)
> + {
> + /* 10 is based on Subversion's average of 8.7 files per versioned
> + directory in its repository.
> +
> + ### note "files". should redo count with subdirs included */
> + return svn_error_return(single_table_children(
> + children, STMT_SELECT_BASE_NODE_CHILDREN,
> + 10 /* start_size */,
> + pdh->wcroot->sdb, pdh->wcroot->wc_id,
> + local_relpath, result_pool));
> + }
> +
> + SVN_ERR(count_children(&base_count, STMT_COUNT_BASE_NODE_CHILDREN,
> + pdh->wcroot->sdb, pdh->wcroot->wc_id, local_relpath));
> + SVN_ERR(count_children(&working_count, STMT_COUNT_WORKING_NODE_CHILDREN,
> + pdh->wcroot->sdb, pdh->wcroot->wc_id, local_relpath));
> +
> + if (base_count == 0)
> + {
> + if (working_count == 0)
> + {
> + *children = apr_array_make(result_pool, 0, sizeof(const char *));
> + return SVN_NO_ERROR;
> + }
> +
> + return svn_error_return(single_table_children(
> + children, STMT_SELECT_WORKING_NODE_CHILDREN,
> + working_count,
> + pdh->wcroot->sdb, pdh->wcroot->wc_id,
> + local_relpath, result_pool));
> + }
> + if (working_count == 0)
> + return svn_error_return(single_table_children(
> + children, STMT_SELECT_BASE_NODE_CHILDREN,
> + base_count,
> + pdh->wcroot->sdb, pdh->wcroot->wc_id,
> + local_relpath, result_pool));
> +
> + /* ### it would be nice to pass BASE_COUNT and WORKING_COUNT, but there is
> + ### nothing union_children() can do with those. */
> + return svn_error_return(union_children(children,
> + pdh->wcroot->sdb, pdh->wcroot->wc_id,
> + local_relpath,
> + result_pool, scratch_pool));
> +}
> +
> +
> +/* */
> static void
> flush_entries(const svn_wc__db_pdh_t *pdh)
> {
>
>
>
Received on 2010-06-08 02:54:35 CEST