OK, sure, so this prevents "two files with the same brand-new
contents", but does this really prevent "two files with the same
contents, which already existed in a previous revision"? ie, both
files have the same rep as a *previously-cached* third rep.
--dave
On Fri, Dec 5, 2008 at 2:12 PM, Hyrum K. Wright
<hyrum_wright_at_mail.utexas.edu> wrote:
> Nope, and here's why:
> Representations aren't added to the rep cache until a commit is being finalized,
> because it's only at that point that we know for sure which revision those reps
> are in. So, we can't have any rep sharing going on between reps in the same
> revision, or txns which are occurring in parallel. Although this may lead to
> some little bit of sharing loss, it also means that the reps are different,
> eliminating the chance of false positives.
>
> -Hyrum
>
> David Glasser wrote:
>> Won't this break (ie, have a false positive in
>> svn_fs_fs__noderev_same_rep_key) if you have two reps in the same
>> revision with the same contents? (And, perhaps, a rep in a previous
>> revision with the same contents.)
>>
>> --dave
>>
>> On Fri, Dec 5, 2008 at 11:23 AM, Hyrum K. Wright <hyrum_at_hyrumwright.org> wrote:
>>> Author: hwright
>>> Date: Fri Dec 5 11:23:26 2008
>>> New Revision: 34588
>>>
>>> Log:
>>> Remove the rep-reuse count from FSFS, and instead use the original txn id as
>>> a uniquifier for node revisions. This removes a potential race condition
>>> with the reuse count, which was just a uniquifier, not a true reference count.
>>>
>>> [Note: this will probably break existing 1.6-era filesystems. Them's the
>>> breaks of running trunk!]
>>>
>>> * subversion/libsvn_fs_fs/fs_fs.c
>>> (read_rep_offsets): Parse the original txn id.
>>> (representation_string): Replace the reuse count with the orignal txn id.
>>> (svn_fs_fs__noderev_same_rep_key): Use the original txn id as a uniquifier
>>> when comparing representations.
>>> (svn_fs_fs__rep_copy): Copy the original txn id.
>>> (svn_fs_fs__set_entry, rep_write_contents_close): Set the original txn id.
>>>
>>> * subversion/libsvn_fs_fs/fs.h
>>> (representation_t): Remove reuse count member, and replace with the original
>>> txn id field.
>>>
>>> * subversion/libsvn_fs_fs/rep-cache.c
>>> (upgrade_sql): Remove reuse_count column from the rep_cache.
>>> (svn_fs_fs__set_rep_reference): Don't insert the reuse_count column.
>>> (svn_fs_fs__inc_rep_reuse): Remove.
>>>
>>> * subversion/libsvn_fs_fs/rep-cache.h
>>> (svn_fs_fs__inc_rep_reuse): Remove.
>>>
>>> Modified:
>>> trunk/subversion/libsvn_fs_fs/fs.h
>>> trunk/subversion/libsvn_fs_fs/fs_fs.c
>>> trunk/subversion/libsvn_fs_fs/rep-cache.c
>>> trunk/subversion/libsvn_fs_fs/rep-cache.h
>>>
>>> Modified: trunk/subversion/libsvn_fs_fs/fs.h
>>> URL: http://svn.collab.net/viewvc/svn/trunk/subversion/libsvn_fs_fs/fs.h?pathrev=34588&r1=34587&r2=34588
>>> ==============================================================================
>>> --- trunk/subversion/libsvn_fs_fs/fs.h Fri Dec 5 11:18:14 2008 (r34587)
>>> +++ trunk/subversion/libsvn_fs_fs/fs.h Fri Dec 5 11:23:26 2008 (r34588)
>>> @@ -295,10 +295,10 @@ typedef struct
>>> /* Is this representation a transaction? */
>>> const char *txn_id;
>>>
>>> - /* Is this representation reusing another one, and how much is that rep
>>> - being reused? */
>>> - apr_int64_t reuse_count;
>>> -
>>> + /* For rep-sharing, we need a way of uniquifying node-revs which share the
>>> + same represenation (see svn_fs_fs__noderev_same_rep_key() ). So, we
>>> + store the original txn of the node rev (not the rep!) here. */
>>> + const char *orig_txn_id;
>>> } representation_t;
>>>
>>>
>>>
>>> Modified: trunk/subversion/libsvn_fs_fs/fs_fs.c
>>> URL: http://svn.collab.net/viewvc/svn/trunk/subversion/libsvn_fs_fs/fs_fs.c?pathrev=34588&r1=34587&r2=34588
>>> ==============================================================================
>>> --- trunk/subversion/libsvn_fs_fs/fs_fs.c Fri Dec 5 11:18:14 2008 (r34587)
>>> +++ trunk/subversion/libsvn_fs_fs/fs_fs.c Fri Dec 5 11:23:26 2008 (r34588)
>>> @@ -1806,9 +1806,13 @@ read_rep_offsets(representation_t **rep_
>>> SVN_ERR(svn_checksum_parse_hex(&rep->sha1_checksum, svn_checksum_sha1, str,
>>> pool));
>>>
>>> - /* Read the reuse count. */
>>> + /* Read the original txn id. */
>>> str = apr_strtok(NULL, " ", &last_str);
>>> - rep->reuse_count = apr_atoi64(str);
>>> + if (str == NULL)
>>> + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
>>> + _("Malformed text rep offset line in node-rev"));
>>> +
>>> + rep->orig_txn_id = apr_pstrdup(pool, str);
>>>
>>> return SVN_NO_ERROR;
>>> }
>>> @@ -2035,7 +2039,7 @@ representation_string(representation_t *
>>> pool));
>>>
>>> return apr_psprintf(pool, "%ld %" APR_OFF_T_FMT " %" SVN_FILESIZE_T_FMT
>>> - " %" SVN_FILESIZE_T_FMT " %s %s %" APR_INT64_T_FMT,
>>> + " %" SVN_FILESIZE_T_FMT " %s %s %s",
>>> rep->revision, rep->offset, rep->size,
>>> rep->expanded_size,
>>> svn_checksum_to_cstring_display(rep->md5_checksum,
>>> @@ -2044,7 +2048,7 @@ representation_string(representation_t *
>>> svn_checksum_to_cstring_display(rep->sha1_checksum,
>>> pool) :
>>> "0000000000000000000000000000000000000000",
>>> - rep->reuse_count);
>>> + rep->orig_txn_id);
>>> }
>>>
>>>
>>> @@ -3428,10 +3432,7 @@ svn_fs_fs__noderev_same_rep_key(represen
>>> if (a->revision != b->revision)
>>> return FALSE;
>>>
>>> - if (a->reuse_count != b->reuse_count)
>>> - return FALSE;
>>> -
>>> - return TRUE;
>>> + return strcmp(a->orig_txn_id, b->orig_txn_id) == 0;
>>> }
>>>
>>> svn_error_t *
>>> @@ -3476,6 +3477,7 @@ svn_fs_fs__rep_copy(representation_t *re
>>> memcpy(rep_new, rep, sizeof(*rep_new));
>>> rep_new->md5_checksum = svn_checksum_dup(rep->md5_checksum, pool);
>>> rep_new->sha1_checksum = svn_checksum_dup(rep->sha1_checksum, pool);
>>> + rep_new->orig_txn_id = apr_pstrdup(pool, rep->orig_txn_id);
>>>
>>> return rep_new;
>>> }
>>> @@ -4467,6 +4469,7 @@ svn_fs_fs__set_entry(svn_fs_t *fs,
>>> rep = apr_pcalloc(pool, sizeof(*rep));
>>> rep->revision = SVN_INVALID_REVNUM;
>>> rep->txn_id = txn_id;
>>> + rep->orig_txn_id = txn_id;
>>> parent_noderev->data_rep = rep;
>>> SVN_ERR(svn_fs_fs__put_node_revision(fs, parent_noderev->id,
>>> parent_noderev, FALSE, pool));
>>> @@ -4795,6 +4798,7 @@ rep_write_contents_close(void *baton)
>>> /* Fill in the rest of the representation field. */
>>> rep->expanded_size = b->rep_size;
>>> rep->txn_id = svn_fs_fs__id_txn_id(b->noderev->id);
>>> + rep->orig_txn_id = rep->txn_id;
>>> rep->revision = SVN_INVALID_REVNUM;
>>>
>>> /* Finalize the checksum. */
>>> @@ -4818,10 +4822,8 @@ rep_write_contents_close(void *baton)
>>>
>>> /* Use the old rep for this content. */
>>> old_rep->md5_checksum = rep->md5_checksum;
>>> + old_rep->orig_txn_id = rep->orig_txn_id;
>>> b->noderev->data_rep = old_rep;
>>> -
>>> - /* Get the reuse count and put it in the node-rev. */
>>> - SVN_ERR(svn_fs_fs__inc_rep_reuse(b->fs, b->noderev->data_rep, b->pool));
>>> }
>>> else
>>> {
>>>
>>> Modified: trunk/subversion/libsvn_fs_fs/rep-cache.c
>>> URL: http://svn.collab.net/viewvc/svn/trunk/subversion/libsvn_fs_fs/rep-cache.c?pathrev=34588&r1=34587&r2=34588
>>> ==============================================================================
>>> --- trunk/subversion/libsvn_fs_fs/rep-cache.c Fri Dec 5 11:18:14 2008 (r34587)
>>> +++ trunk/subversion/libsvn_fs_fs/rep-cache.c Fri Dec 5 11:23:26 2008 (r34588)
>>> @@ -35,8 +35,7 @@ static const char * const upgrade_sql[]
>>> " revision integer not null, "
>>> " offset integer not null, "
>>> " size integer not null, "
>>> - " expanded_size integer not null, "
>>> - " reuse_count integer not null); "
>>> + " expanded_size integer not null); "
>>> APR_EOL_STR
>>> };
>>>
>>> @@ -197,8 +196,8 @@ svn_fs_fs__set_rep_reference(svn_fs_t *f
>>> if (!ffd->rep_cache.set_rep_stmt)
>>> SVN_ERR(svn_sqlite__prepare(&ffd->rep_cache.set_rep_stmt, ffd->rep_cache.db,
>>> "insert into rep_cache (hash, revision, offset, size, "
>>> - "expanded_size, reuse_count) "
>>> - "values (:1, :2, :3, :4, :5, 0);", fs->pool));
>>> + "expanded_size) "
>>> + "values (:1, :2, :3, :4, :5);", fs->pool));
>>>
>>> SVN_ERR(svn_sqlite__bind_text(ffd->rep_cache.set_rep_stmt, 1,
>>> svn_checksum_to_cstring(rep->sha1_checksum,
>>> @@ -212,53 +211,3 @@ svn_fs_fs__set_rep_reference(svn_fs_t *f
>>> SVN_ERR(svn_sqlite__step(&have_row, ffd->rep_cache.set_rep_stmt));
>>> return svn_sqlite__reset(ffd->rep_cache.set_rep_stmt);
>>> }
>>> -
>>> -svn_error_t *
>>> -svn_fs_fs__inc_rep_reuse(svn_fs_t *fs,
>>> - representation_t *rep,
>>> - apr_pool_t *pool)
>>> -{
>>> - fs_fs_data_t *ffd = fs->fsap_data;
>>> - svn_boolean_t have_row;
>>> -
>>> - if (ffd->rep_cache.db == NULL)
>>> - return SVN_NO_ERROR;
>>> -
>>> - /* Fetch the current count. */
>>> - if (!ffd->rep_cache.inc_select_stmt)
>>> - SVN_ERR(svn_sqlite__prepare(&ffd->rep_cache.inc_select_stmt,
>>> - ffd->rep_cache.db,
>>> - "select reuse_count from rep_cache where hash = :1",
>>> - fs->pool));
>>> -
>>> - SVN_ERR(svn_sqlite__bind_text(ffd->rep_cache.inc_select_stmt, 1,
>>> - svn_checksum_to_cstring(rep->sha1_checksum,
>>> - pool)));
>>> - SVN_ERR(svn_sqlite__step(&have_row, ffd->rep_cache.inc_select_stmt));
>>> -
>>> - if (!have_row)
>>> - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
>>> - _("Representation for hash '%s' not found"),
>>> - svn_checksum_to_cstring_display(rep->sha1_checksum,
>>> - pool));
>>> -
>>> - rep->reuse_count =
>>> - svn_sqlite__column_int(ffd->rep_cache.inc_select_stmt, 0) + 1;
>>> - SVN_ERR(svn_sqlite__reset(ffd->rep_cache.inc_select_stmt));
>>> -
>>> - /* Update the reuse_count. */
>>> - if (!ffd->rep_cache.inc_update_stmt)
>>> - SVN_ERR(svn_sqlite__prepare(&ffd->rep_cache.inc_update_stmt,
>>> - ffd->rep_cache.db,
>>> - "update rep_cache set reuse_count = :1 where hash = :2",
>>> - fs->pool));
>>> -
>>> - SVN_ERR(svn_sqlite__bind_int64(ffd->rep_cache.inc_update_stmt, 1,
>>> - rep->reuse_count));
>>> - SVN_ERR(svn_sqlite__bind_text(ffd->rep_cache.inc_update_stmt, 2,
>>> - svn_checksum_to_cstring(rep->sha1_checksum,
>>> - pool)));
>>> - SVN_ERR(svn_sqlite__step_done(ffd->rep_cache.inc_update_stmt));
>>> -
>>> - return svn_sqlite__reset(ffd->rep_cache.inc_update_stmt);
>>> -}
>>>
>>> Modified: trunk/subversion/libsvn_fs_fs/rep-cache.h
>>> URL: http://svn.collab.net/viewvc/svn/trunk/subversion/libsvn_fs_fs/rep-cache.h?pathrev=34588&r1=34587&r2=34588
>>> ==============================================================================
>>> --- trunk/subversion/libsvn_fs_fs/rep-cache.h Fri Dec 5 11:18:14 2008 (r34587)
>>> +++ trunk/subversion/libsvn_fs_fs/rep-cache.h Fri Dec 5 11:23:26 2008 (r34588)
>>> @@ -57,15 +57,6 @@ svn_fs_fs__set_rep_reference(svn_fs_t *f
>>> svn_boolean_t reject_dup,
>>> apr_pool_t *pool);
>>>
>>> -/* Incremenent the usage count of the reference used by REP->CHECKSUM,
>>> - and return the new value in REP->REUSE_COUNT.
>>> -
>>> - If the rep cache database has not been opened, this may be a no op. */
>>> -svn_error_t *
>>> -svn_fs_fs__inc_rep_reuse(svn_fs_t *fs,
>>> - representation_t *rep,
>>> - apr_pool_t *pool);
>>> -
>>> #ifdef __cplusplus
>>> }
>>> #endif /* __cplusplus */
>>>
>>> ------------------------------------------------------
>>> http://subversion.tigris.org/ds/viewMessage.do?dsForumId=495&dsMessageId=980282
>>>
>>
>>
>>
>
>
>
--
David Glasser | glasser@davidglasser.net | http://www.davidglasser.net/
------------------------------------------------------
http://subversion.tigris.org/ds/viewMessage.do?dsForumId=462&dsMessageId=980402
Received on 2008-12-08 22:58:08 CET