Hi,
(my humble attempts to make some improvement can be read at the end)
I suffer from the slowness of svn rm since the upgrade to 1.7 from 1.6,
but I couldn't find the time to profile it until now.
My setup is: FreeBSD 9-STABLE/amd64 with zfs, eight fast cores, SAN, 32
GiB of RAM.
Versions:
svn, version 1.7.5 (r1336830)
sqlite: 3.7.12.1 2012-05-22 02:45:53
6d326d44fd1d626aae0e8456e5fa2049f1ce0789
The working copy currently consists of 1992210 entries (files and
directories), the wc.db database is 1.5 GiB (which can easily fit into
memory, and pre-heated, so sqlite won't have to read a bit from the disks).
Deleting a single file and committing the change take 25.43 seconds
combined:
# time svn rm test > /tmp/svn-rm
4.825u 6.640s 0:11.47 99.9% 223+2544k 4+95io 0pf+0w
# time svn commit -m 't' test > /tmp/svn-commit
5.380u 6.167s 0:13.96 82.6% 223+2544k 6+143io 0pf+0w
I've modified subversion/libsvn_subr/sqlite.c in svn_sqlite__step() to
measure time for each SQL operations and print it along with the
original statement:
svn_error_t *
svn_sqlite__step(svn_boolean_t *got_row, svn_sqlite__stmt_t *stmt)
{
clock_t start = clock();
long i;
int sqlite_result = sqlite3_step(stmt->s3stmt);
printf ( "%f, %s\n", ( (double)clock() - start ) /
CLOCKS_PER_SEC,sqlite3_sql(stmt->s3stmt) );
Here are the numerically sorted top lines from svn-rm and svn-commit
(I've included the last zero-time operations, so every more than zero
lines can be observed):
rm:
0.000000, SELECT root, uuid FROM repository WHERE id = ?1
0.007812, DELETE FROM actual_node WHERE wc_id = ?1 AND (?2 = ''
OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') ) AND (changelist IS NULL OR NOT
EXISTS (SELECT 1 FROM nodes_current c WHERE
c.wc_id = ?1 AND c.local_relpath =
actual_node.local_relpath AND c.kind = 'file'))
0.007812, UPDATE actual_node SET properties = NULL, text_mod =
NULL, tree_conflict_data = NULL, conflict_old = NULL,
conflict_new = NULL, conflict_working = NULL, prop_reject =
NULL, older_checksum = NULL, left_checksum = NULL,
right_checksum = NULL WHERE wc_id = ?1 AND (?2 = '' OR
local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') )
1.882812, DELETE FROM nodes WHERE wc_id = ?1 AND (?2 = '' OR
local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') ) AND op_depth >= ?3
9.539062, SELECT local_relpath FROM nodes WHERE wc_id = ?1 AND (?2 =
'' OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/'
AND (local_relpath) < (?2) || '0') ) AND op_depth = 0 AND presence =
'absent' LIMIT 1
commit:
0.000000, SELECT root, uuid FROM repository WHERE id = ?1
1.171875, SELECT nodes.repos_id, nodes.repos_path, lock_token FROM nodes
LEFT JOIN lock ON nodes.repos_id = lock.repos_id AND nodes.repos_path
= lock.repos_relpath WHERE wc_id = ?1 AND op_depth = 0 AND (?2 =
'' OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/'
AND (local_relpath) < (?2) || '0') )
2.007812, DELETE FROM nodes WHERE wc_id = ?1 AND (?2 = '' OR
local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') ) AND op_depth >= ?3
8.320312, SELECT nodes.repos_id, nodes.repos_path, lock_token FROM nodes
LEFT JOIN lock ON nodes.repos_id = lock.repos_id AND nodes.repos_path
= lock.repos_relpath WHERE wc_id = ?1 AND op_depth = 0 AND (?2 =
'' OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/'
AND (local_relpath) < (?2) || '0') )
It can be seen that in case of delete, two statements, in case of
commit, three statements take most of the time.
For the curious, I've repeated the above test with 100 files to see how
multiple files affect this (in my application I delete 1000s of files
regularly in one svn rm "transaction", this often takes days, literally).
So deleting 100 files takes 1353.219 seconds (so it has some benefits
compared to deleting the files one by one):
# time svn rm test* > /tmp/svn-rm
508.826u 692.233s 20:01.47 99.9% 223+2544k 7+8720io 0pf+0w
# time svn commit -m 't' > /tmp/svn-commit
523.866u 660.986s 19:47.65 99.7% 223+2544k 6+6574io 0pf+0w
and still printing statements, which has a bigger aggregated execution
time than 0:
rm:
0.000000, SELECT root, uuid FROM repository WHERE id = ?1
0.007812, DELETE FROM wc_lock WHERE wc_id = ?1 AND (?2 = '' OR
local_dir_relpath = ?2 OR ((local_dir_relpath) > (?2) || '/' AND
(local_dir_relpath) < (?2) || '0') ) AND NOT EXISTS (SELECT 1 FROM
nodes WHERE nodes.wc_id = ?1 AND
nodes.local_relpath = wc_lock.local_dir_relpath)
0.007812, SELECT 1 FROM nodes WHERE wc_id = ?1 AND local_relpath = ?2
LIMIT 1
0.007812, SELECT id, work FROM work_queue ORDER BY id LIMIT 1
0.007812, SELECT local_dir_relpath FROM wc_lock WHERE wc_id = ?1 AND
local_dir_relpath LIKE ?2 ESCAPE '#'
0.007812, SELECT prop_reject, changelist, conflict_old, conflict_new,
conflict_working, tree_conflict_data, properties FROM actual_node WHERE
wc_id = ?1 AND local_relpath = ?2
0.015624, SELECT local_relpath, def_local_relpath FROM externals WHERE
wc_id = ?1 AND (?2 = '' OR def_local_relpath = ?2 OR
((def_local_relpath) > (?2) || '/' AND (def_local_relpath) < (?2) || '0') )
0.015624, SELECT op_depth, nodes.repos_id, nodes.repos_path, presence,
kind, revision, checksum, translated_size, changed_revision,
changed_date, changed_author, depth, symlink_target, last_mod_time,
properties, lock_token, lock_owner, lock_comment, lock_date FROM nodes
LEFT OUTER JOIN lock ON nodes.repos_id = lock.repos_id AND
nodes.repos_path = lock.repos_relpath WHERE wc_id = ?1 AND local_relpath
= ?2 ORDER BY op_depth DESC
0.015624, SELECT presence, kind, def_local_relpath, repos_id,
def_repos_relpath, def_operational_revision, def_revision, presence FROM
externals WHERE wc_id = ?1 AND local_relpath = ?2 LIMIT 1
0.023436, INSERT INTO delete_list(local_relpath) SELECT local_relpath
FROM nodes n WHERE wc_id = ?1 AND (local_relpath = ?2 OR
((local_relpath) > (?2) || '/' AND (local_relpath) < (?2) || '0') )
AND op_depth >= ?3 AND presence NOT IN ('base-deleted', 'not-present',
'excluded', 'absent') AND op_depth = (SELECT MAX(op_depth) FROM nodes
s WHERE s.wc_id = n.wc_id AND s.local_relpath =
n.local_relpath)
0.031248, SELECT local_dir_relpath, locked_levels FROM wc_lock WHERE
wc_id = ?1 AND ((local_dir_relpath <= ?2 AND local_dir_relpath >=
?3) OR local_dir_relpath = '') ORDER BY local_dir_relpath DESC
0.03906, INSERT INTO nodes ( wc_id, local_relpath, op_depth,
parent_relpath, presence, kind) SELECT wc_id, local_relpath, ?4 ,
parent_relpath, 'base-deleted', kind FROM nodes WHERE wc_id =
?1 AND (local_relpath = ?2 OR ((local_relpath) > (?2) || '/'
AND (local_relpath) < (?2) || '0') ) AND op_depth = ?3 AND presence
NOT IN ('base-deleted', 'not-present', 'excluded', 'absent')
0.03906, INSERT INTO wc_lock (wc_id, local_dir_relpath, locked_levels)
VALUES (?1, ?2, ?3)
0.046872, SELECT op_depth, repos_id, repos_path, presence, kind,
revision, checksum, translated_size, changed_revision, changed_date,
changed_author, depth, symlink_target, last_mod_time, properties FROM
nodes WHERE wc_id = ?1 AND local_relpath = ?2 ORDER BY op_depth DESC
0.062496, DELETE FROM wc_lock WHERE wc_id = ?1 AND local_dir_relpath = ?2
0.398413, UPDATE actual_node SET properties = NULL, text_mod =
NULL, tree_conflict_data = NULL, conflict_old = NULL,
conflict_new = NULL, conflict_working = NULL, prop_reject =
NULL, older_checksum = NULL, left_checksum = NULL,
right_checksum = NULL WHERE wc_id = ?1 AND (?2 = '' OR
local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') )
1.031218, DELETE FROM actual_node WHERE wc_id = ?1 AND (?2 = ''
OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') ) AND (changelist IS NULL OR NOT
EXISTS (SELECT 1 FROM nodes_current c WHERE
c.wc_id = ?1 AND c.local_relpath =
actual_node.local_relpath AND c.kind = 'file'))
205.515629, DELETE FROM nodes WHERE wc_id = ?1 AND (?2 = '' OR
local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') ) AND op_depth >= ?3
993.296875, SELECT local_relpath FROM nodes WHERE wc_id = ?1 AND (?2 =
'' OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/'
AND (local_relpath) < (?2) || '0') ) AND op_depth = 0 AND presence =
'absent' LIMIT 1
commit:
0.000000, SELECT root, uuid FROM repository WHERE id = ?1
0.007812, DELETE FROM nodes WHERE wc_id = ?1 AND local_relpath = ?2
AND op_depth = (SELECT MIN(op_depth) FROM nodes WHERE
wc_id = ?1 AND local_relpath = ?2 AND op_depth > 0) AND presence =
'base-deleted'
0.007812, DELETE FROM wc_lock WHERE wc_id = ?1 AND local_dir_relpath = ?2
0.015624, INSERT OR REPLACE INTO nodes ( wc_id, local_relpath,
op_depth, parent_relpath, repos_id, repos_path, revision, presence,
depth, kind, changed_revision, changed_date, changed_author, checksum,
properties, translated_size, last_mod_time, dav_cache, symlink_target,
file_external) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11,
?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20)
0.015624, SELECT prop_reject, changelist, conflict_old, conflict_new,
conflict_working, tree_conflict_data, properties FROM actual_node WHERE
wc_id = ?1 AND local_relpath = ?2
0.015624, SELECT repos_id, repos_path, presence, kind, revision,
checksum, translated_size, changed_revision, changed_date,
changed_author, depth, symlink_target, last_mod_time, properties,
file_external IS NOT NULL FROM nodes WHERE wc_id = ?1 AND local_relpath
= ?2 AND op_depth = 0
0.023436, SELECT op_depth, repos_id, repos_path, presence, kind,
revision, checksum, translated_size, changed_revision, changed_date,
changed_author, depth, symlink_target, last_mod_time, properties FROM
nodes WHERE wc_id = ?1 AND local_relpath = ?2 ORDER BY op_depth DESC
0.031248, SELECT op_depth, nodes.repos_id, nodes.repos_path, presence,
kind, revision, checksum, translated_size, changed_revision,
changed_date, changed_author, depth, symlink_target, last_mod_time,
properties, lock_token, lock_owner, lock_comment, lock_date FROM nodes
LEFT OUTER JOIN lock ON nodes.repos_id = lock.repos_id AND
nodes.repos_path = lock.repos_relpath WHERE wc_id = ?1 AND local_relpath
= ?2 ORDER BY op_depth DESC
0.539034, DELETE FROM actual_node WHERE wc_id = ?1 AND (?2 = ''
OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') )
205.703121, DELETE FROM nodes WHERE wc_id = ?1 AND (?2 = '' OR
local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND
(local_relpath) < (?2) || '0') ) AND op_depth >= ?3
978.265612, SELECT nodes.repos_id, nodes.repos_path, lock_token FROM
nodes LEFT JOIN lock ON nodes.repos_id = lock.repos_id AND
nodes.repos_path = lock.repos_relpath WHERE wc_id = ?1 AND op_depth =
0 AND (?2 = '' OR local_relpath = ?2 OR ((local_relpath)
> (?2) || '/' AND (local_relpath) < (?2) || '0') )
Looking at these, it seems some indexes are missing.
Particularly:
nodes has an index of: CREATE INDEX I_NODES_PARENT ON NODES (wc_id,
parent_relpath, op_depth);
but the long running statement query the local_relpath instead of the
parent one.
Placing the following index on it makes svn rm run in 3.43 seconds
instead of 11.47 and commit in 4.29 seconds instead of 13.96 (deleting
only one file):
sqlite> CREATE INDEX I_NODES_LOCAL ON NODES (wc_id, local_relpath,
op_depth);
wc.db's size was increased to 1.6GiB (not much for the speedup).
There is another similar query, where presence is used, so I've placed
another one:
sqlite> CREATE INDEX I_NODES_LOCAL_PRESENCE ON NODES (wc_id,
local_relpath, op_depth, presence);
wc.db increased to 1.7 GiB
With these, removing 100 files took 4:46.21 seconds (was 20:01.47) and
committing 6:00.25 (was 19:47.65). Still sucks, but much better than before.
Any ideas about how could it be made even faster without ditching
sqlite? Deleting 100 files and committing the change shouldn't take 40
(10, with the above indexes) minutes, even with 2 million files... I guess.
Received on 2012-06-25 14:37:38 CEST