Greg Stein wrote:
> On Sat, Oct 18, 2008 at 6:12 AM, <hwright_at_tigris.org> wrote:
>> ...
>
>> +++ trunk/subversion/include/svn_error_codes.h Sat Oct 18 06:11:59 2008 (r33730)
>> @@ -673,6 +673,11 @@ SVN_ERROR_START
>> SVN_ERR_FS_CATEGORY_START + 47,
>> "Filesystem upgrade is not supported")
>>
>> + /** @since New in 1.6. */
>> + SVN_ERRDEF(SVN_ERR_FS_NO_SUCH_CHECKSUM_REP,
>> + SVN_ERR_FS_CATEGORY_START + 49,
>> + "Filesystem has no such checksum-representation index record")
>
> This should be + 48.
r33801.
>> ...
>
>> +++ trunk/subversion/include/svn_fs.h Sat Oct 18 06:11:59 2008 (r33730)
>> @@ -31,6 +31,7 @@
>> #include "svn_delta.h"
>> #include "svn_io.h"
>> #include "svn_mergeinfo.h"
>> +#include "svn_checksum.h"
>
> Oof. This shoulda been in there already since svn_checksum_t was in use.
Yeah, it was probably being pulled in by another header somewhere (such as
svn_io.h).
>> ...
>> +++ trunk/subversion/libsvn_fs_base/bdb/dbt.c Sat Oct 18 06:11:59 2008 (r33730)
>> @@ -18,6 +18,8 @@
>> #include <stdlib.h>
>> #include <string.h>
>> #include <apr_pools.h>
>> +#include <apr_md5.h>
>> +#include <apr_sha1.h>
>>
>> #define APU_WANT_DB
>> #include <apu_want.h>
>> @@ -153,3 +155,20 @@ svn_fs_base__str_to_dbt(DBT *dbt, const
>> svn_fs_base__set_dbt(dbt, str, strlen(str));
>> return dbt;
>> }
>> +
>> +DBT *
>> +svn_fs_base__checksum_to_dbt(DBT *dbt, svn_checksum_t *checksum)
>> +{
>> + switch (checksum->kind)
>> + {
>> + case svn_checksum_md5:
>> + svn_fs_base__set_dbt(dbt, checksum->digest, APR_MD5_DIGESTSIZE);
>> + break;
>> +
>> + case svn_checksum_sha1:
>> + svn_fs_base__set_dbt(dbt, checksum->digest, APR_SHA1_DIGESTSIZE);
>> + break;
>> + }
>> +
>> + return dbt;
>> +}
>
> Maybe export the DIGESTSIZE() macro out of libsvn_subr/checksum.c ??
Funny story: We already do! I've updated this function to use
svn_checksum_size() in r33801.
>> ...
>> +++ trunk/subversion/libsvn_fs_base/dag.c Sat Oct 18 06:11:59 2008 (r33730)
>> @@ -33,6 +33,7 @@
>> #include "reps-strings.h"
>> #include "revs-txns.h"
>> #include "id.h"
>> +#include "fsguid.h"
>>
>> #include "util/fs_skels.h"
>>
>> @@ -42,6 +43,7 @@
>> #include "bdb/copies-table.h"
>> #include "bdb/reps-table.h"
>> #include "bdb/strings-table.h"
>> +#include "bdb/checksum-reps-table.h"
>>
>> #include "private/svn_fs_util.h"
>> #include "../libsvn_fs/fs-loader.h"
>> @@ -576,9 +578,15 @@ svn_fs_base__dag_set_proplist(dag_node_t
>> trail_t *trail,
>> apr_pool_t *pool)
>> {
>> + svn_error_t *err;
>> node_revision_t *noderev;
>> - const char *rep_key, *mutable_rep_key;
>> + const char *rep_key, *mutable_rep_key, *dup_rep_key;
>> svn_fs_t *fs = svn_fs_base__dag_get_fs(node);
>> + svn_stream_t *wstream;
>> + apr_size_t len;
>> + skel_t *proplist_skel;
>> + svn_stringbuf_t *raw_proplist_buf;
>> + svn_checksum_t *checksum;
>>
>> /* Sanity check: this node better be mutable! */
>> if (! svn_fs_base__dag_check_mutable(node, txn_id))
>> @@ -595,6 +603,34 @@ svn_fs_base__dag_set_proplist(dag_node_t
>> trail, pool));
>> rep_key = noderev->prop_key;
>>
>> + /* Flatten the proplist into a string, and calculate its md5 hash. */
>> + SVN_ERR(svn_fs_base__unparse_proplist_skel(&proplist_skel,
>> + proplist, pool));
>> + raw_proplist_buf = svn_fs_base__unparse_skel(proplist_skel, pool);
>> + SVN_ERR(svn_checksum(&checksum, svn_checksum_sha1, raw_proplist_buf->data,
>> + raw_proplist_buf->len, pool));
>
> The comment is wrong.
r33801.
>> +
>> + /* If the resulting property list is exactly the same as another
>> + string in the database, just use the previously existing string
>> + and get outta here. */
>> + err = svn_fs_bdb__get_checksum_rep(&dup_rep_key, fs, checksum,
>> + trail, pool);
>> + if (! err)
>> + {
>> + if (noderev->prop_key)
>> + SVN_ERR(svn_fs_base__delete_rep_if_mutable(fs, noderev->prop_key,
>> + txn_id, trail, pool));
>> + noderev->prop_key = dup_rep_key;
>> + return svn_fs_bdb__put_node_revision(fs, node->id, noderev,
>> + trail, pool);
>> + }
>> + else if (err && (err->apr_err == SVN_ERR_FS_NO_SUCH_CHECKSUM_REP))
>> + {
>> + svn_error_clear(err);
>> + err = SVN_NO_ERROR;
>> + }
>> + SVN_ERR(err);
>
> This last part seems a bit easier to do:
>
> else if (err)
> {
> if (err->apr_err != ...)
> return err;
> svn_error_clear(err);
> }
>
> I mean, the SVN_ERR(err) is only useful for the else-block. The first
> if() means you don't need to check err again. etc... it is simply more
> checking than needed :-P
Good point, again in r33801.
>> ...
>> + if (! err)
>> + {
>> + useless_data_key = noderev->edit_key;
>> + err = svn_fs_base__reserve_fsguid(trail->fs, &data_key_uniquifier,
>> + trail, pool);
>> + }
>> + else if (err && (err->apr_err == SVN_ERR_FS_NO_SUCH_CHECKSUM_REP))
>> + {
>> + svn_error_clear(err);
>> + err = SVN_NO_ERROR;
>> + new_data_key = noderev->edit_key;
>> + }
>> + SVN_ERR(err);
>
> Same here.
>
>> ...
>> +++ trunk/subversion/libsvn_fs_base/fs.h Sat Oct 18 06:11:59 2008 (r33730)
>> @@ -41,6 +41,9 @@ extern "C" {
>> back-end's format. */
>> #define SVN_FS_BASE__FORMAT_NUMBER 4
>>
>> +/* Minimum format number that supports representation sharing */
>> +#define SVN_FS_BASE__MIN_REP_SHARING_FORMAT 4
>> +
>> /* Minimum format number that supports the 'miscellaneous' table */
>> #define SVN_FS_BASE__MIN_MISCELLANY_FORMAT 4
>
> Seems like these two MIN constants should be combined into one, for simplicity.
We thought about that over on the BDB side, but decided that overloading the
different features into one macro was a bit confusing. I'd like to keep it as is.
>> ...
>> +++ trunk/subversion/libsvn_fs_base/notes/structure Sat Oct 18 06:11:59 2008 (r33730)
>
> Maybe make FSGUID explicit?
>
> Also: where to note about a format upgrade that will include FSGUID?
> If you're going to migrate all key generation to FSGUID, then you'll
> want to start FSGUID at max(all-next-key-columns). I think if you
> start FSGUID at "0" like the patch is currently doing, you *might*
> close off the door to broader use. Haven't thought it through yet tho.
> But if true, then you're going to want to do some work to get that
> max() before we cut 1.6.
I'll defer this one to Mike.
>> ...
>> +++ trunk/subversion/libsvn_fs_fs/rep-cache.c Sat Oct 18 06:11:59 2008 (r33730, copy of r33729, branches/fs-rep-sharing/subversion/libsvn_fs_fs/rep-cache.c)
>> ...
>> +const char *upgrade_sql[] = { NULL,
>> + "pragma auto_vacuum = 1;"
>> + APR_EOL_STR
>> + "create table rep_cache (hash text not null, "
>> + " revision integer not null, "
>> + " offset integer not null, "
>> + " size integer not null, "
>> + " expanded_size integer not null, "
>> + " reuse_count integer not null); "
>> + APR_EOL_STR
>> + "create unique index i_hash on rep_cache(hash); "
>
> Shouldn't need this custom index if you just mark the hash column as
> the primary key (which would be nice for docco purposes anyways).
>
>> ...
>> +svn_error_t *
>> +svn_fs_fs__get_rep_reference(representation_t **rep,
>> + svn_fs_t *fs,
>> + svn_checksum_t *checksum,
>> + apr_pool_t *pool)
>> +{
>> + fs_fs_data_t *ffd = fs->fsap_data;
>> + svn_boolean_t have_row;
>> + svn_sqlite__stmt_t *stmt;
>> +
>> + SVN_ERR(svn_sqlite__prepare(&stmt, ffd->rep_cache,
>> + "select revision, offset, size, expanded_size from rep_cache "
>> + "where hash = :1", pool));
>
> I would recommend preparing this statement when you open the db. Then
> each time this function is called, you can just bind some new values,
> run the query, and finalize the statement. Definitely more efficient.
>
>> ...
>> +svn_error_t *
>> +svn_fs_fs__set_rep_reference(svn_fs_t *fs,
>> + representation_t *rep,
>> + svn_boolean_t reject_dup,
>> + apr_pool_t *pool)
>> +{
>> ...
>> + SVN_ERR(svn_sqlite__prepare(&stmt, ffd->rep_cache,
>> + "insert into rep_cache (hash, revision, offset, size, "
>> + "expanded_size, reuse_count) "
>> + "values (:1, :2, :3, :4, :5, 0);", pool));
>
> Likewise.
>
>> ...
>> +svn_error_t *
>> +svn_fs_fs__inc_rep_reuse(svn_fs_t *fs,
>> + representation_t *rep,
>> + apr_pool_t *pool)
>> +{
>> + fs_fs_data_t *ffd = fs->fsap_data;
>> + svn_boolean_t have_row;
>> + svn_sqlite__stmt_t *stmt;
>> +
>> + /* Fetch the current count. */
>> + SVN_ERR(svn_sqlite__prepare(&stmt, ffd->rep_cache,
>> + "select reuse_count from rep_cache where hash = :1", pool));
>> ...
>> + /* Update the reuse_count. */
>> + SVN_ERR(svn_sqlite__prepare(&stmt, ffd->rep_cache,
>> + "update rep_cache set reuse_count = :1 where hash = :2",
>> + pool));
>
> And two more :-)
Yeah, I can definitely work on fixing up the sqlite usage here. I'll do that in
a separate commit.
-Hyrum
Received on 2008-10-21 11:59:23 CEST