fsfs: Use the `WITHOUT ROWID` optimization for rep-cache.db in format 8. This optimization, introduced in SQLite 3.8.2, works well for tables that have non-integer primary keys, such as hash TEXT NOT NULL PRIMARY KEY in the rep-cache.db. (See the https://sqlite.org/withoutrowid.html article for additional details.) A quick experiment showed a reduction of the on-disk size of the database by ~1.75x. The lookups should also be faster, both due to the reduced database size and due to the lesser amount of internal bsearches. This should improve the times of new commits and `svnadmin load`, especially for large repositories that also have large rep-cache.db files. In order to maintain compatibility, since SQLite versions prior to 3.8.2 do not support this statement, we only start using it for fsfs format 8 repositories and simultaneously bump the minimal required SQLite version from 3.7.12 (May 2012) to 3.8.2 (December 2013). The last step ensures that all binaries compiled to support format 8 can work with the tables with this optimization. * subversion/libsvn_fs_fs/rep-cache-db.sql (STMT_CREATE_SCHEMA): Rename this ... (STMT_CREATE_SCHEMA_V1): ...to this. (STMT_CREATE_SCHEMA_V2): New, enables `WITHOUT ROWID` optimization. (STMT_GET_REP, STMT_SET_REP, STMT_GET_REPS_FOR_RANGE, STMT_GET_MAX_REV, STMT_DEL_REPS_YOUNGER_THAN_REV, STMT_LOCK_REP, STMT_UNLOCK_REP): Note that these statements work for both V1 and V2 schemas. * subversion/libsvn_fs_fs/fs.h (SVN_FS_FS__MIN_REP_CACHE_SCHEMA_V2_FORMAT): New. * subversion/libsvn_fs_fs/rep-cache.c (REP_CACHE_SCHEMA_FORMAT): Remove. (open_rep_cache): Select between creating a V1 or V2 schemas based on the format of the filesystem. * subversion/libsvn_subr/sqlite.c (): Bump minimum required SQLite version to 3.8.2. * subversion/tests/cmdline/svnadmin_tests.py (check_hotcopy_fsfs_fsx): Check if the Python's built-in SQLite version is enough to interpret the schema of rep-cache.db, and skip the check if it's not. Index: subversion/libsvn_fs_fs/fs.h =================================================================== --- subversion/libsvn_fs_fs/fs.h (revision 1816730) +++ subversion/libsvn_fs_fs/fs.h (working copy) @@ -196,6 +196,10 @@ extern "C" { */ #define SVN_FS_FS__MIN_REP_STRING_OPTIONAL_VALUES_FORMAT 8 + /* The minimum format number that supports V2 schema of the rep-cache.db + database. */ +#define SVN_FS_FS__MIN_REP_CACHE_SCHEMA_V2_FORMAT 8 + /* On most operating systems apr implements file locks per process, not per file. On Windows apr implements the locking as per file handle locks, so we don't have to add our own mutex for just in-process Index: subversion/libsvn_fs_fs/rep-cache-db.sql =================================================================== --- subversion/libsvn_fs_fs/rep-cache-db.sql (revision 1816730) +++ subversion/libsvn_fs_fs/rep-cache-db.sql (working copy) @@ -21,7 +21,7 @@ * ==================================================================== */ --- STMT_CREATE_SCHEMA +-- STMT_CREATE_SCHEMA_V1 /* A table mapping representation hashes to locations in a rev file. */ CREATE TABLE rep_cache ( hash TEXT NOT NULL PRIMARY KEY, @@ -33,26 +33,50 @@ CREATE TABLE rep_cache ( PRAGMA USER_VERSION = 1; +-- STMT_CREATE_SCHEMA_V2 +/* A table mapping representation hashes to locations in a rev file. + Same as in V1 schema, except that it uses the `WITHOUT ROWID` optimization: + https://sqlite.org/withoutrowid.html + Note that this optimization is only supported starting from SQLite version + 3.8.2 (2013-12-06). To keep compatibility with existing binaries, it is + only used for newer filesystem formats that were released together with + bumping the minimum required SQLite version. + */ +CREATE TABLE rep_cache ( + hash TEXT NOT NULL PRIMARY KEY, + revision INTEGER NOT NULL, + offset INTEGER NOT NULL, + size INTEGER NOT NULL, + expanded_size INTEGER NOT NULL + ) WITHOUT ROWID; + +PRAGMA USER_VERSION = 2; + -- STMT_GET_REP +/* Works for both V1 and V2 schemas. */ SELECT revision, offset, size, expanded_size FROM rep_cache WHERE hash = ?1 -- STMT_SET_REP +/* Works for both V1 and V2 schemas. */ INSERT OR FAIL INTO rep_cache (hash, revision, offset, size, expanded_size) VALUES (?1, ?2, ?3, ?4, ?5) -- STMT_GET_REPS_FOR_RANGE +/* Works for both V1 and V2 schemas. */ SELECT hash, revision, offset, size, expanded_size FROM rep_cache WHERE revision >= ?1 AND revision <= ?2 -- STMT_GET_MAX_REV +/* Works for both V1 and V2 schemas. */ SELECT MAX(revision) FROM rep_cache -- STMT_DEL_REPS_YOUNGER_THAN_REV +/* Works for both V1 and V2 schemas. */ DELETE FROM rep_cache WHERE revision > ?1 @@ -59,10 +83,13 @@ WHERE revision > ?1 /* An INSERT takes an SQLite reserved lock that prevents other writes but doesn't block reads. The incomplete transaction means that no permanent change is made to the database and the transaction is - removed when the database is closed. */ + removed when the database is closed. + + Works for both V1 and V2 schemas. */ -- STMT_LOCK_REP BEGIN TRANSACTION; INSERT INTO rep_cache VALUES ('dummy', 0, 0, 0, 0) -- STMT_UNLOCK_REP +/* Works for both V1 and V2 schemas. */ ROLLBACK TRANSACTION; Index: subversion/libsvn_fs_fs/rep-cache.c =================================================================== --- subversion/libsvn_fs_fs/rep-cache.c (revision 1816730) +++ subversion/libsvn_fs_fs/rep-cache.c (working copy) @@ -36,9 +36,6 @@ #include "rep-cache-db.h" -/* A few magic values */ -#define REP_CACHE_SCHEMA_FORMAT 1 - REP_CACHE_DB_SQL_DECLARE_STATEMENTS(statements); @@ -102,13 +99,17 @@ open_rep_cache(void *baton, SVN_SQLITE__ERR_CLOSE(svn_sqlite__read_schema_version(&version, sdb, pool), sdb); - if (version < REP_CACHE_SCHEMA_FORMAT) + /* If we have an uninitialized database, go ahead and create the schema. */ + if (version <= 0) { - /* Must be 0 -- an uninitialized (no schema) database. Create - the schema. Results in schema version of 1. */ - SVN_SQLITE__ERR_CLOSE(svn_sqlite__exec_statements(sdb, - STMT_CREATE_SCHEMA), - sdb); + int stmt; + + if (ffd->format >= SVN_FS_FS__MIN_REP_CACHE_SCHEMA_V2_FORMAT) + stmt = STMT_CREATE_SCHEMA_V2; + else + stmt = STMT_CREATE_SCHEMA_V1; + + SVN_SQLITE__ERR_CLOSE(svn_sqlite__exec_statements(sdb, stmt), sdb); } /* This is used as a flag that the database is available so don't Index: subversion/libsvn_subr/sqlite.c =================================================================== --- subversion/libsvn_subr/sqlite.c (revision 1816730) +++ subversion/libsvn_subr/sqlite.c (working copy) @@ -65,8 +65,8 @@ extern int (*const svn_sqlite3__api_config)(int, . # include #endif -#if !SQLITE_VERSION_AT_LEAST(3,7,12) -#error SQLite is too old -- version 3.7.12 is the minimum required version +#if !SQLITE_VERSION_AT_LEAST(3,8,2) +#error SQLite is too old -- version 3.8.2 is the minimum required version #endif #ifndef SQLITE_DETERMINISTIC Index: subversion/tests/cmdline/svnadmin_tests.py =================================================================== --- subversion/tests/cmdline/svnadmin_tests.py (revision 1816730) +++ subversion/tests/cmdline/svnadmin_tests.py (working copy) @@ -129,6 +129,15 @@ def check_hotcopy_fsfs_fsx(src, dst): if src_file == 'rep-cache.db': db1 = svntest.sqlite3.connect(src_path) db2 = svntest.sqlite3.connect(dst_path) + schema1 = db1.execute("pragma user_version").fetchone()[0] + schema2 = db2.execute("pragma user_version").fetchone()[0] + if schema1 != schema2: + raise svntest.Failure("rep-cache schema differs: '%s' vs. '%s'" + % (schema1, schema2)) + # Can't test newer rep-cache schemas with an old built-in SQLite. + if schema1 >= 2 and svntest.sqlite3.sqlite_version_info < (3, 8, 2): + continue + rows1 = [] rows2 = [] for row in db1.execute("select * from rep_cache order by hash"):