Justin Erenkrantz <justin_at_erenkrantz.com> writes:
> When compiled with SVN_DEBUG and SQLITE3_DEBUG and 'svn st' against a
> svn trunk WC, a number of things pop out.
>
> We perform 28,062 SQL queries.
It's not just repeat queries that are the problem, we simply make too
many queries. This is mainly because the code is converted from 1.6
when we had in an in memory entry cache.
In 1.7 'svn status' is doing queries per-node, something like:
dir_status:
node_status
"select count(*) from base_node where parent_relpath = "
"select local_relpath, kind from base_node where parent_relpath = "
for each child
"select * from base_node where local_relpath = "
if kind is dir
recurse
else
node_status
Back in February I wrote a test program to investigate whether it was
possible for SQLite and single-db to be fast enough:
http://mail-archives.apache.org/mod_mbox/subversion-dev/201002.mbox/<87y6iq2ym4.fsf%40stat.home.lan>
http://svn.haxx.se/dev/archive-2010-02/0457.shtml
The status code in my test program was doing queries per-directory,
not per-node, something like:
dir_status:
node_status
"select * from base_node where parent_relpath = "
for each row
if kind is dir
push subdirs(name)
else
node_status
for each subdir
recurse
I've modified my test program to do per-node queries[1], making it
more like 1.7, and the performance is significantly worse. It's about
a factor of 7 slower, both in CPU and runtime, on the dataset I was
using. The test program goes from being a little faster than 1.6 to be
being significantly slower.
To use a per-directory query strategy we would probably have to cache
data in memory, although not to the same extent as in 1.6. We should
probably avoid having Subversion make status callbacks into the
application while a query is in progress, so we would accumulate all
the row data and complete the query before making any callbacks. Some
sort of private svn_wc__db_node_t to hold the results of the select
would probably be sufficient.
[1] client to query database
#include "svn_pools.h"
#include "svn_sqlite.h"
#include <stdio.h>
static svn_error_t *
status_query(svn_sqlite__db_t *sdb,
const char *local_relpath,
svn_boolean_t display,
apr_pool_t *pool)
{
svn_sqlite__stmt_t *stmt;
svn_boolean_t have_row;
const char *kind;
apr_pool_t *subpool;
apr_array_header_t *subdirs = apr_array_make(pool, 10, sizeof(const char *));
int i;
apr_array_header_t *files;
SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, 0));
SVN_ERR(svn_sqlite__bindf(stmt, "is", 1, local_relpath));
SVN_ERR(svn_sqlite__step(&have_row, stmt));
if (!have_row)
{
SVN_ERR(svn_sqlite__reset(stmt));
return SVN_NO_ERROR;
}
kind = svn_sqlite__column_text(stmt, 0, pool);
if (display)
printf("%s %s\n", local_relpath, kind);
SVN_ERR(svn_sqlite__reset(stmt));
if (!strcmp(kind, "dir"))
{
#ifdef PER_NODE
int num;
SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, 2));
SVN_ERR(svn_sqlite__bindf(stmt, "is", 1, local_relpath));
SVN_ERR(svn_sqlite__step(&have_row, stmt));
if (!have_row)
{
SVN_ERR(svn_sqlite__reset(stmt));
return SVN_NO_ERROR;
}
num = svn_sqlite__column_int64(stmt, 0);
SVN_ERR(svn_sqlite__reset(stmt));
files = apr_array_make(pool, num, sizeof(const char *));
#endif
SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, 1));
SVN_ERR(svn_sqlite__bindf(stmt, "is", 1, local_relpath));
SVN_ERR(svn_sqlite__step(&have_row, stmt));
while (have_row)
{
const char *child_relpath = svn_sqlite__column_text(stmt, 0, NULL);
kind = svn_sqlite__column_text(stmt, 1, NULL);
if (!strcmp(kind, "dir"))
APR_ARRAY_PUSH(subdirs, const char *)
= apr_pstrdup(pool, child_relpath);
#ifdef PER_NODE
else
APR_ARRAY_PUSH(files, const char *)
= apr_pstrdup(pool, child_relpath);
#else
else if (display)
printf("%s %s\n", child_relpath, kind);
#endif
SVN_ERR(svn_sqlite__step(&have_row, stmt));
}
SVN_ERR(svn_sqlite__reset(stmt));
}
subpool = svn_pool_create(pool);
#ifdef PER_NODE
for (i = 0; i < files->nelts; ++i)
{
const char *child_relpath = APR_ARRAY_IDX(files, i, const char*);
svn_pool_clear(subpool);
SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, 0));
SVN_ERR(svn_sqlite__bindf(stmt, "is", 1, child_relpath));
SVN_ERR(svn_sqlite__step(&have_row, stmt));
if (!have_row)
{
SVN_ERR(svn_sqlite__reset(stmt));
return SVN_NO_ERROR;
}
kind = svn_sqlite__column_text(stmt, 0, pool);
if (display)
printf("%s %s\n", child_relpath, kind);
SVN_ERR(svn_sqlite__reset(stmt));
}
#endif
for (i = 0; i < subdirs->nelts; ++i)
{
svn_pool_clear(subpool);
SVN_ERR(status_query(sdb, APR_ARRAY_IDX(subdirs, i, const char*), display,
subpool));
}
svn_pool_destroy(subpool);
return SVN_NO_ERROR;
}
int main()
{
apr_pool_t *pool;
svn_sqlite__db_t *sdb;
const char * const statements[] = {
"select kind from base_node" \
" where wc_id = ?1 and local_relpath = ?2;",
"select local_relpath, kind from base_node" \
" where wc_id = ?1 and parent_relpath = ?2;",
"select count(*) from base_node" \
" where wc_id = ?1 and parent_relpath = ?2;",
NULL
};
apr_initialize();
pool = svn_pool_create(NULL);
SVN_INT_ERR(svn_sqlite__open(&sdb, "wcx.db", svn_sqlite__mode_rwcreate,
statements, 0, NULL, pool, pool));
SVN_INT_ERR(status_query(sdb, "", FALSE, pool));
return EXIT_SUCCESS;
}
--
Philip
Received on 2010-09-06 12:17:52 CEST