Index: notes =================================================================== --- notes (revision 1623988) +++ notes (working copy) Property changes on: notes ___________________________________________________________________ Modified: svn:mergeinfo Reverse-merged /subversion/trunk/notes:r1603891-1620574 Index: subversion/include/svn_fs.h =================================================================== --- subversion/include/svn_fs.h (revision 1623988) +++ subversion/include/svn_fs.h (working copy) @@ -110,6 +110,12 @@ */ #define SVN_FS_CONFIG_FSFS_CACHE_NS "fsfs-cache-namespace" +/** Enable / disable the FSFS format 7 "block read" feature. + * + * @since New in 1.9. + */ +#define SVN_FS_CONFIG_FSFS_BLOCK_READ "fsfs-block-read" + /* Note to maintainers: if you add further SVN_FS_CONFIG_FSFS_CACHE_* knobs, update fs_fs.c:verify_as_revision_before_current_plus_plus(). */ Index: subversion/libsvn_fs_fs/cached_data.c =================================================================== --- subversion/libsvn_fs_fs/cached_data.c (revision 1623988) +++ subversion/libsvn_fs_fs/cached_data.c (working copy) @@ -46,6 +46,17 @@ #include "svn_private_config.h" +/* forward-declare. See implementation for the docstring */ +static svn_error_t * +block_read(void **result, + svn_fs_t *fs, + svn_revnum_t revision, + apr_uint64_t item_index, + svn_fs_fs__revision_file_t *revision_file, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); + + /* Defined this to enable access logging via dgb__log_access #define SVN_FS_FS__LOG_ACCESS */ @@ -70,6 +81,7 @@ #ifdef SVN_FS_FS__LOG_ACCESS fs_fs_data_t *ffd = fs->fsap_data; apr_off_t end_offset = 0; + svn_fs_fs__p2l_entry_t *entry = NULL; static const char *types[] = {"", "frep ", "drep ", "fprop", "dprop", "node ", "chgs ", "rep "}; const char *description = ""; @@ -141,11 +153,36 @@ } } - /* reduced logging for format 6 and earlier */ - printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \ - " %s\n", - pack, (apr_uint64_t)(offset), type, revision, item_index, - description); + /* some info is only available in format7 repos */ + if (svn_fs_fs__use_log_addressing(fs, revision)) + { + /* reverse index lookup: get item description in ENTRY */ + SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision, + offset, scratch_pool)); + if (entry) + { + /* more details */ + end_offset = offset + entry->size; + type = types[entry->type]; + } + + /* line output */ + printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n", + pack, (long)(offset / ffd->block_size), + (long)(offset % ffd->block_size), + (long)(end_offset / ffd->block_size), + (long)(end_offset % ffd->block_size), + type, revision, item_index, description); + } + else + { + /* reduced logging for format 6 and earlier */ + printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \ + " %s\n", + pack, (apr_uint64_t)(offset), type, revision, item_index, + description); + } + #endif return SVN_NO_ERROR; @@ -240,6 +277,17 @@ id_str->data, fs->path); } +/* Return TRUE, if REVISION in FS is of a format that supports block-read + and the feature has been enabled. */ +static svn_boolean_t +use_block_read(svn_fs_t *fs, + svn_revnum_t revision) +{ + fs_fs_data_t *ffd = fs->fsap_data; + return svn_fs_fs__use_log_addressing(fs, revision) + && ffd->use_block_read; +} + /* Get the node-revision for the node ID in FS. Set *NODEREV_P to the new node-revision structure, allocated in POOL. See svn_fs_fs__get_node_revision, which wraps this and adds another @@ -312,21 +360,35 @@ rev_item->number, scratch_pool)); - /* physical addressing mode reading, parsing and caching */ - SVN_ERR(svn_fs_fs__read_noderev(noderev_p, - revision_file->stream, - result_pool, - scratch_pool)); + if (use_block_read(fs, rev_item->revision)) + { + /* block-read will parse the whole block and will also return + the one noderev that we need right now. */ + SVN_ERR(block_read((void **)noderev_p, fs, + rev_item->revision, + rev_item->number, + revision_file, + result_pool, + scratch_pool)); + } + else + { + /* physical addressing mode reading, parsing and caching */ + SVN_ERR(svn_fs_fs__read_noderev(noderev_p, + revision_file->stream, + result_pool, + scratch_pool)); - /* Workaround issue #4031: is-fresh-txn-root in revision files. */ - (*noderev_p)->is_fresh_txn_root = FALSE; + /* Workaround issue #4031: is-fresh-txn-root in revision files. */ + (*noderev_p)->is_fresh_txn_root = FALSE; - /* The noderev is not in cache, yet. Add it, if caching has been enabled. */ - if (ffd->node_revision_cache) - SVN_ERR(svn_cache__set(ffd->node_revision_cache, - &key, - *noderev_p, - scratch_pool)); + /* The noderev is not in cache, yet. Add it, if caching has been enabled. */ + if (ffd->node_revision_cache) + SVN_ERR(svn_cache__set(ffd->node_revision_cache, + &key, + *noderev_p, + scratch_pool)); + } SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); } @@ -488,33 +550,41 @@ apr_pool_t *scratch_pool) { fs_fs_data_t *ffd = fs->fsap_data; - svn_fs_fs__revision_file_t *revision_file; - apr_off_t root_offset; - svn_fs_id_t *root_id = NULL; - svn_boolean_t is_cached; - SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool)); - SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached, - ffd->rev_root_id_cache, &rev, result_pool)); - if (is_cached) - return SVN_NO_ERROR; + if (svn_fs_fs__use_log_addressing(fs, rev)) + { + *root_id_p = svn_fs_fs__id_create_root(rev, result_pool); + } + else + { + svn_fs_fs__revision_file_t *revision_file; + apr_off_t root_offset; + svn_fs_id_t *root_id = NULL; + svn_boolean_t is_cached; - SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev, - scratch_pool, scratch_pool)); - SVN_ERR(get_root_changes_offset(&root_offset, NULL, - revision_file, fs, rev, - scratch_pool)); + SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached, + ffd->rev_root_id_cache, &rev, result_pool)); + if (is_cached) + return SVN_NO_ERROR; - SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev, - root_offset, result_pool)); + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev, + scratch_pool, scratch_pool)); + SVN_ERR(get_root_changes_offset(&root_offset, NULL, + revision_file, fs, rev, + scratch_pool)); - SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); + SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev, + root_offset, result_pool)); - SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id, scratch_pool)); + SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); - *root_id_p = root_id; + SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id, + scratch_pool)); + *root_id_p = root_id; + } + return SVN_NO_ERROR; } @@ -770,9 +840,13 @@ /* populate the cache if appropriate */ if (! svn_fs_fs__id_txn_used(&rep->txn_id)) { - if (ffd->rep_header_cache) - SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh, - scratch_pool)); + if (use_block_read(fs, rep->revision)) + SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index, + rs->sfile->rfile, result_pool, scratch_pool)); + else + if (ffd->rep_header_cache) + SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh, + scratch_pool)); } } @@ -848,13 +922,48 @@ void **hint, apr_pool_t *scratch_pool) { - rep_state_t *rs; - svn_fs_fs__rep_header_t *rep_header; + if (svn_fs_fs__use_log_addressing(fs, rep->revision)) + { + apr_off_t offset; + svn_fs_fs__p2l_entry_t *entry; - /* ### Should this be using read_rep_line() directly? */ - SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint, - rep, fs, scratch_pool, scratch_pool)); + svn_fs_fs__revision_file_t *rev_file; + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision, + scratch_pool, scratch_pool)); + /* This will auto-retry if there was a background pack. */ + SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision, + NULL, rep->item_index, scratch_pool)); + + /* This may fail if there is a background pack operation (can't auto- + retry because the item offset lookup has to be redone as well). */ + SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, rep->revision, + offset, scratch_pool)); + + if ( entry == NULL + || entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP + || entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS) + return svn_error_createf(SVN_ERR_REPOS_CORRUPTED, NULL, + _("No representation found at offset %s " + "for item %s in revision %ld"), + apr_off_t_toa(scratch_pool, offset), + apr_psprintf(scratch_pool, + "%" APR_UINT64_T_FMT, + rep->item_index), + rep->revision); + + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + } + else + { + rep_state_t *rs; + svn_fs_fs__rep_header_t *rep_header; + + /* ### Should this be using read_rep_line() directly? */ + SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint, + rep, fs, scratch_pool, scratch_pool)); + } + return SVN_NO_ERROR; } @@ -1352,6 +1461,25 @@ /* someone has to actually read the data from file. Open it */ SVN_ERR(auto_open_shared_file(rs->sfile)); + /* invoke the 'block-read' feature for non-txn data. + However, don't do that if we are in the middle of some representation, + because the block is unlikely to contain other data. */ + if ( rs->chunk_index == 0 + && SVN_IS_VALID_REVNUM(rs->revision) + && use_block_read(rs->sfile->fs, rs->revision) + && rs->raw_window_cache) + { + SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index, + rs->sfile->rfile, result_pool, scratch_pool)); + + /* reading the whole block probably also provided us with the + desired txdelta window */ + SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached, + result_pool, scratch_pool)); + if (is_cached) + return SVN_NO_ERROR; + } + /* data is still not cached -> we need to read it. Make sure we have all the necessary info. */ SVN_ERR(auto_set_start_offset(rs, scratch_pool)); @@ -2571,31 +2699,48 @@ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev, scratch_pool, scratch_pool)); - /* physical addressing mode code path */ - SVN_ERR(get_root_changes_offset(NULL, &changes_offset, - revision_file, fs, rev, - scratch_pool)); + if (use_block_read(fs, rev)) + { + /* 'block-read' will also provide us with the desired data */ + SVN_ERR(block_read((void **)changes, fs, + rev, SVN_FS_FS__ITEM_INDEX_CHANGES, + revision_file, result_pool, scratch_pool)); + } + else + { + /* Addressing is very different for old formats + * (needs to read the revision trailer). */ + if (svn_fs_fs__use_log_addressing(fs, rev)) + SVN_ERR(svn_fs_fs__item_offset(&changes_offset, fs, + revision_file, rev, NULL, + SVN_FS_FS__ITEM_INDEX_CHANGES, + scratch_pool)); + else + SVN_ERR(get_root_changes_offset(NULL, &changes_offset, + revision_file, fs, rev, + scratch_pool)); - /* Actual reading and parsing are the same, though. */ - SVN_ERR(aligned_seek(fs, revision_file->file, NULL, changes_offset, - scratch_pool)); - SVN_ERR(svn_fs_fs__read_changes(changes, revision_file->stream, - result_pool, scratch_pool)); + /* Actual reading and parsing are the same, though. */ + SVN_ERR(aligned_seek(fs, revision_file->file, NULL, changes_offset, + scratch_pool)); + SVN_ERR(svn_fs_fs__read_changes(changes, revision_file->stream, + result_pool, scratch_pool)); - /* cache for future reference */ + /* cache for future reference */ - if (ffd->changes_cache) - { - /* Guesstimate for the size of the in-cache representation. */ - apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts; + if (ffd->changes_cache) + { + /* Guesstimate for the size of the in-cache representation. */ + apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts; - /* Don't even serialize data that probably won't fit into the - * cache. This often implies that either CHANGES is very - * large, memory is scarce or both. Having a huge temporary - * copy would not be a good thing in either case. */ - if (svn_cache__is_cachable(ffd->changes_cache, estimated_size)) - SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes, - scratch_pool)); + /* Don't even serialize data that probably won't fit into the + * cache. This often implies that either CHANGES is very + * large, memory is scarce or both. Having a huge temporary + * copy would not be a good thing in either case. */ + if (svn_cache__is_cachable(ffd->changes_cache, estimated_size)) + SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes, + scratch_pool)); + } } SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); @@ -2608,6 +2753,46 @@ return SVN_NO_ERROR; } +/* Inialize the representation read state RS for the given REP_HEADER and + * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS. + * Use RESULT_POOL for allocations. + */ +static svn_error_t * +init_rep_state(rep_state_t *rs, + svn_fs_fs__rep_header_t *rep_header, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *file, + svn_fs_fs__p2l_entry_t* entry, + apr_pool_t *result_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file)); + + /* this function does not apply to representation containers */ + SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP + && entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS); + + shared_file->rfile = file; + shared_file->fs = fs; + shared_file->revision = entry->item.revision; + shared_file->pool = result_pool; + + rs->sfile = shared_file; + rs->revision = entry->item.revision; + rs->item_index = entry->item.number; + rs->header_size = rep_header->header_size; + rs->start = entry->offset + rs->header_size; + rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4; + rs->size = entry->size - rep_header->header_size - 7; + rs->ver = 1; + rs->chunk_index = 0; + rs->raw_window_cache = ffd->raw_window_cache; + rs->window_cache = ffd->txdelta_window_cache; + rs->combined_cache = ffd->combined_window_cache; + + return SVN_NO_ERROR; +} + /* Implement svn_cache__partial_getter_func_t for txdelta windows. * Instead of the whole window data, return only END_OFFSET member. */ @@ -2734,3 +2919,425 @@ svn_pool_destroy(iterpool); return SVN_NO_ERROR; } + +/* Read all txdelta / plain windows following REP_HEADER in FS as described + * by ENTRY. Read the data from the already open FILE and the wrapping + * STREAM object. If MAX_OFFSET is not -1, don't read windows that start + * at or beyond that offset. Use SCRATCH_POOL for temporary allocations. + * If caching is not enabled, this is a no-op. + */ +static svn_error_t * +block_read_windows(svn_fs_fs__rep_header_t *rep_header, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_fs__p2l_entry_t* entry, + apr_off_t max_offset, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + rep_state_t rs = { 0 }; + apr_off_t offset; + window_cache_key_t key = { 0 }; + + if ( (rep_header->type != svn_fs_fs__rep_plain + && (!ffd->txdelta_window_cache || !ffd->raw_window_cache)) + || (rep_header->type == svn_fs_fs__rep_plain + && !ffd->combined_window_cache)) + return SVN_NO_ERROR; + + SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry, + result_pool)); + + /* RS->FILE may be shared between RS instances -> make sure we point + * to the right data. */ + offset = rs.start + rs.current; + if (rep_header->type == svn_fs_fs__rep_plain) + { + svn_stringbuf_t *plaintext; + svn_boolean_t is_cached; + + /* already in cache? */ + SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache, + get_window_key(&key, &rs), + scratch_pool)); + if (is_cached) + return SVN_NO_ERROR; + + /* for larger reps, the header may have crossed a block boundary. + * make sure we still read blocks properly aligned, i.e. don't use + * plain seek here. */ + SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool)); + + plaintext = svn_stringbuf_create_ensure(rs.size, result_pool); + SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data, + rs.size, &plaintext->len, NULL, + result_pool)); + plaintext->data[plaintext->len] = 0; + rs.current += rs.size; + + SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool)); + } + else + { + SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool)); + } + + return SVN_NO_ERROR; +} + +/* Try to get the representation header identified by KEY from FS's cache. + * If it has not been cached, read it from the current position in STREAM + * and put it into the cache (if caching has been enabled for rep headers). + * Return the result in *REP_HEADER. Use POOL for allocations. + */ +static svn_error_t * +read_rep_header(svn_fs_fs__rep_header_t **rep_header, + svn_fs_t *fs, + svn_stream_t *stream, + pair_cache_key_t *key, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_boolean_t is_cached = FALSE; + + if (ffd->rep_header_cache) + { + SVN_ERR(svn_cache__get((void**)rep_header, &is_cached, + ffd->rep_header_cache, key, + result_pool)); + if (is_cached) + return SVN_NO_ERROR; + } + + SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool, + scratch_pool)); + + if (ffd->rep_header_cache) + SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header, + scratch_pool)); + + return SVN_NO_ERROR; +} + +/* Fetch the representation data (header, txdelta / plain windows) + * addressed by ENTRY->ITEM in FS and cache it if caches are enabled. + * Read the data from the already open FILE and the wrapping + * STREAM object. If MAX_OFFSET is not -1, don't read windows that start + * at or beyond that offset. + * Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +block_read_contents(svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_fs__p2l_entry_t* entry, + apr_off_t max_offset, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + pair_cache_key_t header_key = { 0 }; + svn_fs_fs__rep_header_t *rep_header; + + header_key.revision = (apr_int32_t)entry->item.revision; + header_key.second = entry->item.number; + + SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key, + result_pool, scratch_pool)); + SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset, + result_pool, scratch_pool)); + + return SVN_NO_ERROR; +} + +/* For the given REV_FILE in FS, in *STREAM return a stream covering the + * item specified by ENTRY. Also, verify the item's content by low-level + * checksum. Allocate the result in POOL. + */ +static svn_error_t * +read_item(svn_stream_t **stream, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_fs__p2l_entry_t* entry, + apr_pool_t *pool) +{ + apr_uint32_t digest; + svn_checksum_t *expected, *actual; + apr_uint32_t plain_digest; + + /* Read item into string buffer. */ + svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool); + text->len = entry->size; + text->data[text->len] = 0; + SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len, + NULL, NULL, pool)); + + /* Return (construct, calculate) stream and checksum. */ + *stream = svn_stream_from_stringbuf(text, pool); + digest = svn__fnv1a_32x4(text->data, text->len); + + /* Checksums will match most of the time. */ + if (entry->fnv1_checksum == digest) + return SVN_NO_ERROR; + + /* Construct proper checksum objects from their digests to allow for + * nice error messages. */ + plain_digest = htonl(entry->fnv1_checksum); + expected = svn_checksum__from_digest_fnv1a_32x4( + (const unsigned char *)&plain_digest, pool); + plain_digest = htonl(digest); + actual = svn_checksum__from_digest_fnv1a_32x4( + (const unsigned char *)&plain_digest, pool); + + /* Construct the full error message with all the info we have. */ + return svn_checksum_mismatch_err(expected, actual, pool, + _("Low-level checksum mismatch while reading\n" + "%s bytes of meta data at offset %s " + "for item %s in revision %ld"), + apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size), + apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset), + apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number), + entry->item.revision); +} + +/* If not already cached or if MUST_READ is set, read the changed paths + * list addressed by ENTRY in FS and retúrn it in *CHANGES. Cache the + * result if caching is enabled. Read the data from the already open + * FILE and wrapping FILE_STREAM. Use POOL for allocations. + */ +static svn_error_t * +block_read_changes(apr_array_header_t **changes, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_fs__p2l_entry_t *entry, + svn_boolean_t must_read, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_stream_t *stream; + if (!must_read && !ffd->changes_cache) + return SVN_NO_ERROR; + + /* already in cache? */ + if (!must_read && ffd->changes_cache) + { + svn_boolean_t is_cached; + SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, + &entry->item.revision, + scratch_pool)); + if (is_cached) + return SVN_NO_ERROR; + } + + SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); + + /* read changes from revision file */ + SVN_ERR(svn_fs_fs__read_changes(changes, stream, result_pool, + scratch_pool)); + + /* cache for future reference */ + if (ffd->changes_cache) + SVN_ERR(svn_cache__set(ffd->changes_cache, &entry->item.revision, + *changes, scratch_pool)); + + return SVN_NO_ERROR; +} + +/* If not already cached or if MUST_READ is set, read the nod revision + * addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the + * result if caching is enabled. Read the data from the already open + * FILE and wrapping FILE_STREAM. Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +block_read_noderev(node_revision_t **noderev_p, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_fs__p2l_entry_t *entry, + svn_boolean_t must_read, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_stream_t *stream; + + pair_cache_key_t key = { 0 }; + key.revision = entry->item.revision; + key.second = entry->item.number; + + if (!must_read && !ffd->node_revision_cache) + return SVN_NO_ERROR; + + /* already in cache? */ + if (!must_read && ffd->node_revision_cache) + { + svn_boolean_t is_cached; + SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache, + &key, scratch_pool)); + if (is_cached) + return SVN_NO_ERROR; + } + + SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); + + /* read node rev from revision file */ + SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream, + result_pool, scratch_pool)); + + /* Workaround issue #4031: is-fresh-txn-root in revision files. */ + (*noderev_p)->is_fresh_txn_root = FALSE; + + if (ffd->node_revision_cache) + SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p, + scratch_pool)); + + return SVN_NO_ERROR; +} + +/* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS + * and put all data into cache. If necessary and depending on heuristics, + * neighboring blocks may also get read. The data is being read from + * already open REVISION_FILE, which must be the correct rev / pack file + * w.r.t. REVISION. + * + * For noderevs and changed path lists, the item fetched can be allocated + * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL. + */ +static svn_error_t * +block_read(void **result, + svn_fs_t *fs, + svn_revnum_t revision, + apr_uint64_t item_index, + svn_fs_fs__revision_file_t *revision_file, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_off_t offset, wanted_offset = 0; + apr_off_t block_start = 0; + apr_array_header_t *entries; + int run_count = 0; + int i; + apr_pool_t *iterpool; + + /* Block read is an optional feature. If the caller does not want anything + * specific we may not have to read anything. */ + if (!result) + return SVN_NO_ERROR; + + iterpool = svn_pool_create(scratch_pool); + + /* don't try this on transaction protorev files */ + SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision)); + + /* index lookup: find the OFFSET of the item we *must* read plus (in the + * "do-while" block) the list of items in the same block. */ + SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file, + revision, NULL, item_index, iterpool)); + + offset = wanted_offset; + + /* Heuristics: + * + * Read this block. If the last item crosses the block boundary, read + * the next block but stop there. Because cross-boundary items cause + * blocks to be read twice, this heuristics will limit this effect to + * approx. 50% of blocks, probably less, while providing a sensible + * amount of read-ahead. + */ + do + { + /* fetch list of items in the block surrounding OFFSET */ + block_start = offset - (offset % ffd->block_size); + SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file, + revision, block_start, + ffd->block_size, scratch_pool)); + + SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset, + iterpool)); + + /* read all items from the block */ + for (i = 0; i < entries->nelts; ++i) + { + svn_boolean_t is_result, is_wanted; + apr_pool_t *pool; + svn_fs_fs__p2l_entry_t* entry; + + svn_pool_clear(iterpool); + + /* skip empty sections */ + entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); + if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) + continue; + + /* the item / container we were looking for? */ + is_wanted = entry->offset == wanted_offset + && entry->item.revision == revision + && entry->item.number == item_index; + is_result = result && is_wanted; + + /* select the pool that we want the item to be allocated in */ + pool = is_result ? result_pool : iterpool; + + /* handle all items that start within this block and are relatively + * small (i.e. < block size). Always read the item we need to return. + */ + if (is_result || ( entry->offset >= block_start + && entry->size < ffd->block_size)) + { + void *item = NULL; + SVN_ERR(svn_io_file_seek(revision_file->file, SEEK_SET, + &entry->offset, iterpool)); + switch (entry->type) + { + case SVN_FS_FS__ITEM_TYPE_FILE_REP: + case SVN_FS_FS__ITEM_TYPE_DIR_REP: + case SVN_FS_FS__ITEM_TYPE_FILE_PROPS: + case SVN_FS_FS__ITEM_TYPE_DIR_PROPS: + SVN_ERR(block_read_contents(fs, revision_file, entry, + is_wanted + ? -1 + : block_start + ffd->block_size, + pool, iterpool)); + break; + + case SVN_FS_FS__ITEM_TYPE_NODEREV: + if (ffd->node_revision_cache || is_result) + SVN_ERR(block_read_noderev((node_revision_t **)&item, + fs, revision_file, + entry, is_result, pool, + iterpool)); + break; + + case SVN_FS_FS__ITEM_TYPE_CHANGES: + SVN_ERR(block_read_changes((apr_array_header_t **)&item, + fs, revision_file, + entry, is_result, + pool, iterpool)); + break; + + default: + break; + } + + if (is_result) + *result = item; + + /* if we crossed a block boundary, read the remainder of + * the last block as well */ + offset = entry->offset + entry->size; + if (offset > block_start + ffd->block_size) + ++run_count; + } + } + + } + while(run_count++ == 1); /* can only be true once and only if a block + * boundary got crossed */ + + /* if the caller requested a result, we must have provided one by now */ + assert(!result || *result); + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} Index: subversion/libsvn_fs_fs/caching.c =================================================================== --- subversion/libsvn_fs_fs/caching.c (revision 1623988) +++ subversion/libsvn_fs_fs/caching.c (working copy) @@ -25,6 +25,7 @@ #include "id.h" #include "dag.h" #include "tree.h" +#include "index.h" #include "temp_serializer.h" #include "../libsvn_fs/fs-loader.h" @@ -639,6 +640,71 @@ ffd->combined_window_cache = NULL; } + if (ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT) + { + SVN_ERR(create_cache(&(ffd->l2p_header_cache), + NULL, + membuffer, + 64, 16, /* entry size varies but we must cover + a reasonable number of revisions (1k) */ + svn_fs_fs__serialize_l2p_header, + svn_fs_fs__deserialize_l2p_header, + sizeof(pair_cache_key_t), + apr_pstrcat(pool, prefix, "L2P_HEADER", + (char *)NULL), + SVN_CACHE__MEMBUFFER_HIGH_PRIORITY, + fs, + no_handler, + fs->pool, pool)); + SVN_ERR(create_cache(&(ffd->l2p_page_cache), + NULL, + membuffer, + 64, 16, /* entry size varies but we must cover + a reasonable number of revisions (1k) */ + svn_fs_fs__serialize_l2p_page, + svn_fs_fs__deserialize_l2p_page, + sizeof(svn_fs_fs__page_cache_key_t), + apr_pstrcat(pool, prefix, "L2P_PAGE", + (char *)NULL), + SVN_CACHE__MEMBUFFER_HIGH_PRIORITY, + fs, + no_handler, + fs->pool, pool)); + SVN_ERR(create_cache(&(ffd->p2l_header_cache), + NULL, + membuffer, + 4, 1, /* Large entries. Rarely used. */ + svn_fs_fs__serialize_p2l_header, + svn_fs_fs__deserialize_p2l_header, + sizeof(pair_cache_key_t), + apr_pstrcat(pool, prefix, "P2L_HEADER", + (char *)NULL), + SVN_CACHE__MEMBUFFER_HIGH_PRIORITY, + fs, + no_handler, + fs->pool, pool)); + SVN_ERR(create_cache(&(ffd->p2l_page_cache), + NULL, + membuffer, + 4, 16, /* Variably sized entries. Rarely used. */ + svn_fs_fs__serialize_p2l_page, + svn_fs_fs__deserialize_p2l_page, + sizeof(svn_fs_fs__page_cache_key_t), + apr_pstrcat(pool, prefix, "P2L_PAGE", + (char *)NULL), + SVN_CACHE__MEMBUFFER_HIGH_PRIORITY, + fs, + no_handler, + fs->pool, pool)); + } + else + { + ffd->l2p_header_cache = NULL; + ffd->l2p_page_cache = NULL; + ffd->p2l_header_cache = NULL; + ffd->p2l_page_cache = NULL; + } + return SVN_NO_ERROR; } Index: subversion/libsvn_fs_fs/fs.c =================================================================== --- subversion/libsvn_fs_fs/fs.c (revision 1623988) +++ subversion/libsvn_fs_fs/fs.c (working copy) @@ -265,6 +265,7 @@ initialize_fs_struct(svn_fs_t *fs) { fs_fs_data_t *ffd = apr_pcalloc(fs->pool, sizeof(*ffd)); + ffd->min_log_addressing_rev = SVN_INVALID_REVNUM; fs->vtable = &fs_vtable; fs->fsap_data = ffd; Index: subversion/libsvn_fs_fs/fs.h =================================================================== --- subversion/libsvn_fs_fs/fs.h (revision 1623988) +++ subversion/libsvn_fs_fs/fs.h (working copy) @@ -72,6 +72,10 @@ #define PATH_PACKED "pack" /* Packed revision data file */ #define PATH_EXT_PACKED_SHARD ".pack" /* Extension for packed shards */ +#define PATH_EXT_L2P_INDEX ".l2p" /* extension of the log- + to-phys index */ +#define PATH_EXT_P2L_INDEX ".p2l" /* extension of the phys- + to-log index */ /* If you change this, look at tests/svn_test_fs.c(maybe_install_fsfs_conf) */ #define PATH_CONFIG "fsfs.conf" /* Configuration */ @@ -87,6 +91,9 @@ #define PATH_EXT_PROPS ".props" /* Extension for node props */ #define PATH_EXT_REV ".rev" /* Extension of protorev file */ #define PATH_EXT_REV_LOCK ".rev-lock" /* Extension of protorev lock file */ +#define PATH_TXN_ITEM_INDEX "itemidx" /* File containing the current item + index number */ +#define PATH_INDEX "index" /* name of index files w/o ext */ /* Names of files in legacy FS formats */ #define PATH_REV "rev" /* Proto rev file */ @@ -108,6 +115,8 @@ #define CONFIG_OPTION_COMPRESS_PACKED_REVPROPS "compress-packed-revprops" #define CONFIG_SECTION_IO "io" #define CONFIG_OPTION_BLOCK_SIZE "block-size" +#define CONFIG_OPTION_L2P_PAGE_SIZE "l2p-page-size" +#define CONFIG_OPTION_P2L_PAGE_SIZE "p2l-page-size" #define CONFIG_SECTION_DEBUG "debug" #define CONFIG_OPTION_PACK_AFTER_COMMIT "pack-after-commit" @@ -161,6 +170,9 @@ /* The minimum format number that supports packed revprops. */ #define SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT 6 +/* The minimum format number that supports packed revprops. */ +#define SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT 7 + /* Minimum format number that providing a separate lock file for pack ops */ #define SVN_FS_FS__MIN_PACK_LOCK_FORMAT 7 @@ -291,9 +303,26 @@ layouts) or zero (for linear layouts). */ int max_files_per_dir; + /* The first revision that uses logical addressing. SVN_INVALID_REVNUM + if there is no such revision (pre-f7 or non-sharded). May be a + future revision if the current shard started with physical addressing + and is not complete, yet. */ + svn_revnum_t min_log_addressing_rev; + /* Rev / pack file read granularity in bytes. */ apr_int64_t block_size; + /* Capacity in entries of log-to-phys index pages */ + apr_int64_t l2p_page_size; + + /* Rev / pack file granularity (in bytes) covered by a single phys-to-log + * index page. */ + apr_int64_t p2l_page_size; + + /* If set, parse and cache *all* data of each block that we read + * (not just the one bit that we need, atm). */ + svn_boolean_t use_block_read; + /* The revision that was youngest, last time we checked. */ svn_revnum_t youngest_rev_cache; @@ -382,6 +411,23 @@ if the node has mergeinfo, "0" if it doesn't. */ svn_cache__t *mergeinfo_existence_cache; + /* Cache for l2p_header_t objects; the key is (revision, is-packed). + Will be NULL for pre-format7 repos */ + svn_cache__t *l2p_header_cache; + + /* Cache for l2p_page_t objects; the key is svn_fs_fs__page_cache_key_t. + Will be NULL for pre-format7 repos */ + svn_cache__t *l2p_page_cache; + + /* Cache for p2l_header_t objects; the key is (revision, is-packed). + Will be NULL for pre-format7 repos */ + svn_cache__t *p2l_header_cache; + + /* Cache for apr_array_header_t objects containing svn_fs_fs__p2l_entry_t + elements; the key is svn_fs_fs__page_cache_key_t. + Will be NULL for pre-format7 repos */ + svn_cache__t *p2l_page_cache; + /* TRUE while the we hold a lock on the write lock file. */ svn_boolean_t has_write_lock; Index: subversion/libsvn_fs_fs/fs_fs.c =================================================================== --- subversion/libsvn_fs_fs/fs_fs.c (revision 1623988) +++ subversion/libsvn_fs_fs/fs_fs.c (working copy) @@ -456,15 +456,19 @@ } /* Read the format number and maximum number of files per directory - from PATH and return them in *PFORMAT and *MAX_FILES_PER_DIR respectively. + from PATH and return them in *PFORMAT, *MAX_FILES_PER_DIR and + MIN_LOG_ADDRESSING_REV respectively. *MAX_FILES_PER_DIR is obtained from the 'layout' format option, and will be set to zero if a linear scheme should be used. + *MIN_LOG_ADDRESSING_REV is obtained from the 'addressing' format option, + and will be set to SVN_INVALID_REVNUM for physical addressing. Use POOL for temporary allocation. */ static svn_error_t * read_format(int *pformat, int *max_files_per_dir, + svn_revnum_t *min_log_addressing_rev, const char *path, apr_pool_t *pool) { @@ -511,6 +515,7 @@ /* Set the default values for anything that can be set via an option. */ *max_files_per_dir = 0; + *min_log_addressing_rev = SVN_INVALID_REVNUM; /* Read any options. */ while (!eos) @@ -537,11 +542,41 @@ } } + if (*pformat >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT && + strncmp(buf->data, "addressing ", 11) == 0) + { + if (strcmp(buf->data + 11, "physical") == 0) + { + *min_log_addressing_rev = SVN_INVALID_REVNUM; + continue; + } + + if (strncmp(buf->data + 11, "logical ", 8) == 0) + { + int value; + + /* Check that the argument is numeric. */ + SVN_ERR(check_format_file_buffer_numeric(buf->data, 19, path, pool)); + SVN_ERR(svn_cstring_atoi(&value, buf->data + 19)); + *min_log_addressing_rev = value; + continue; + } + } + return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, _("'%s' contains invalid filesystem format option '%s'"), svn_dirent_local_style(path, pool), buf->data); } + /* Non-sharded repositories never use logical addressing. + * If the format file is inconsistent in that respect, something + * probably went wrong. + */ + if (*min_log_addressing_rev != SVN_INVALID_REVNUM && !*max_files_per_dir) + return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, + _("'%s' specifies logical addressing for a non-sharded repository"), + svn_dirent_local_style(path, pool)); + return SVN_NO_ERROR; } @@ -573,6 +608,17 @@ svn_stringbuf_appendcstr(sb, "layout linear\n"); } + if (ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT) + { + if (ffd->min_log_addressing_rev == SVN_INVALID_REVNUM) + svn_stringbuf_appendcstr(sb, "addressing physical\n"); + else + svn_stringbuf_appendcstr(sb, + apr_psprintf(pool, + "addressing logical %ld\n", + ffd->min_log_addressing_rev)); + } + /* svn_io_write_version_file() does a load of magic to allow it to replace version files that already exist. We only need to do that when we're allowed to overwrite an existing file. */ @@ -682,8 +728,33 @@ ffd->compress_packed_revprops = FALSE; } + if (ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT) + { + SVN_ERR(svn_config_get_int64(config, &ffd->block_size, + CONFIG_SECTION_IO, + CONFIG_OPTION_BLOCK_SIZE, + 64)); + SVN_ERR(svn_config_get_int64(config, &ffd->l2p_page_size, + CONFIG_SECTION_IO, + CONFIG_OPTION_L2P_PAGE_SIZE, + 0x2000)); + SVN_ERR(svn_config_get_int64(config, &ffd->p2l_page_size, + CONFIG_SECTION_IO, + CONFIG_OPTION_P2L_PAGE_SIZE, + 0x400)); + + /* convert kBytes to bytes */ + ffd->block_size *= 0x400; + ffd->p2l_page_size *= 0x400; + /* L2P pages are in entries - not in (k)Bytes */ + } + else + { /* should be irrelevant but we initialize them anyway */ - ffd->block_size = 0x1000; + ffd->block_size = 0x1000; /* Matches default APR file buffer size. */ + ffd->l2p_page_size = 0x2000; /* Matches above default. */ + ffd->p2l_page_size = 0x100000; /* Matches above default in bytes. */ + } if (ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) { @@ -876,6 +947,34 @@ "### Can be changed at any time but must be a power of 2." NL "### block-size is given in kBytes and with a default of 64 kBytes." NL "# " CONFIG_OPTION_BLOCK_SIZE " = 64" NL +"###" NL +"### The log-to-phys index maps data item numbers to offsets within the" NL +"### rev or pack file. This index is organized in pages of a fixed maximum" NL +"### capacity. To access an item, the page table and the respective page" NL +"### must be read." NL +"### This parameter only affects revisions with thousands of changed paths." NL +"### If you have several extremely large revisions (~1 mio changes), think" NL +"### about increasing this setting. Reducing the value will rarely result" NL +"### in a net speedup." NL +"### This is an expert setting. Must be a power of 2." NL +"### l2p-page-size is 8192 entries by default." NL +"# " CONFIG_OPTION_L2P_PAGE_SIZE " = 8192" NL +"###" NL +"### The phys-to-log index maps positions within the rev or pack file to" NL +"### to data items, i.e. describes what piece of information is being" NL +"### stored at any particular offset. The index describes the rev file" NL +"### in chunks (pages) and keeps a global list of all those pages. Large" NL +"### pages mean a shorter page table but a larger per-page description of" NL +"### data items in it. The latency sweetspot depends on the change size" NL +"### distribution but covers a relatively wide range." NL +"### If the repository contains very large files, i.e. individual changes" NL +"### of tens of MB each, increasing the page size will shorten the index" NL +"### file at the expense of a slightly increased latency in sections with" NL +"### smaller changes." NL +"### For source code repositories, this should be about 16x the block-size." NL +"### Must be a power of 2." NL +"### p2l-page-size is given in kBytes and with a default of 1024 kBytes." NL +"# " CONFIG_OPTION_P2L_PAGE_SIZE " = 1024" NL ; #undef NL return svn_io_file_create(svn_dirent_join(fs->path, PATH_CONFIG, pool), @@ -888,9 +987,13 @@ read_global_config(svn_fs_t *fs) { fs_fs_data_t *ffd = fs->fsap_data; + ffd->use_block_read + = svn_hash__get_bool(fs->config, SVN_FS_CONFIG_FSFS_BLOCK_READ, FALSE); + /* Ignore the user-specified larger block size if we don't use block-read. Defaulting to 4k gives us the same access granularity in format 7 as in older formats. */ + if (!ffd->use_block_read) ffd->block_size = MIN(0x1000, ffd->block_size); return SVN_NO_ERROR; @@ -938,16 +1041,18 @@ { fs_fs_data_t *ffd = fs->fsap_data; int format, max_files_per_dir; + svn_revnum_t min_log_addressing_rev; fs->path = apr_pstrdup(fs->pool, path); /* Read the FS format number. */ - SVN_ERR(read_format(&format, &max_files_per_dir, + SVN_ERR(read_format(&format, &max_files_per_dir, &min_log_addressing_rev, path_format(fs, pool), pool)); /* Now we've got a format number no matter what. */ ffd->format = format; ffd->max_files_per_dir = max_files_per_dir; + ffd->min_log_addressing_rev = min_log_addressing_rev; /* Read in and cache the repository uuid. */ SVN_ERR(read_uuid(fs, pool)); @@ -998,12 +1103,13 @@ svn_fs_t *fs = upgrade_baton->fs; fs_fs_data_t *ffd = fs->fsap_data; int format, max_files_per_dir; + svn_revnum_t min_log_addressing_rev; const char *format_path = path_format(fs, pool); svn_node_kind_t kind; svn_boolean_t needs_revprop_shard_cleanup = FALSE; /* Read the FS format number and max-files-per-dir setting. */ - SVN_ERR(read_format(&format, &max_files_per_dir, + SVN_ERR(read_format(&format, &max_files_per_dir, &min_log_addressing_rev, format_path, pool)); /* If the config file does not exist, create one. */ @@ -1072,6 +1178,14 @@ pool)); } + if ( format < SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT + && max_files_per_dir > 0) + { + min_log_addressing_rev + = (ffd->youngest_rev_cache / max_files_per_dir + 1) + * max_files_per_dir; + } + /* We will need the UUID info shortly ... Read it before the format bump as the UUID file still uses the old format. */ @@ -1081,6 +1195,7 @@ down will use the format from FS to create missing info. */ ffd->format = SVN_FS_FS__FORMAT_NUMBER; ffd->max_files_per_dir = max_files_per_dir; + ffd->min_log_addressing_rev = min_log_addressing_rev; /* Always add / bump the instance ID such that no form of caching accidentally uses outdated information. Keep the UUID. */ @@ -1398,11 +1513,75 @@ write_revision_zero(svn_fs_t *fs, apr_pool_t *scratch_pool) { - const char *path_revision_zero = svn_fs_fs__path_rev(fs, 0, scratch_pool); + /* Use an explicit sub-pool to have full control over temp file lifetimes. + * Since we have it, use it for everything else as well. */ + apr_pool_t *subpool = svn_pool_create(scratch_pool); + const char *path_revision_zero = svn_fs_fs__path_rev(fs, 0, subpool); apr_hash_t *proplist; svn_string_t date; /* Write out a rev file for revision 0. */ + if (svn_fs_fs__use_log_addressing(fs, 0)) + { + apr_array_header_t *index_entries; + svn_fs_fs__p2l_entry_t *entry; + svn_fs_fs__revision_file_t *rev_file; + const char *l2p_proto_index, *p2l_proto_index; + + /* Write a skeleton r0 with no indexes. */ + SVN_ERR(svn_io_file_create(path_revision_zero, + "PLAIN\nEND\nENDREP\n" + "id: 0.0.r0/2\n" + "type: dir\n" + "count: 0\n" + "text: 0 3 4 4 " + "2d2977d1c96f487abe4a1e202dd03b4e\n" + "cpath: /\n" + "\n\n", subpool)); + + /* Construct the index P2L contents: describe the 3 items we have. + Be sure to create them in on-disk order. */ + index_entries = apr_array_make(subpool, 3, sizeof(entry)); + + entry = apr_pcalloc(subpool, sizeof(*entry)); + entry->offset = 0; + entry->size = 17; + entry->type = SVN_FS_FS__ITEM_TYPE_DIR_REP; + entry->item.revision = 0; + entry->item.number = SVN_FS_FS__ITEM_INDEX_FIRST_USER; + APR_ARRAY_PUSH(index_entries, svn_fs_fs__p2l_entry_t *) = entry; + + entry = apr_pcalloc(subpool, sizeof(*entry)); + entry->offset = 17; + entry->size = 89; + entry->type = SVN_FS_FS__ITEM_TYPE_NODEREV; + entry->item.revision = 0; + entry->item.number = SVN_FS_FS__ITEM_INDEX_ROOT_NODE; + APR_ARRAY_PUSH(index_entries, svn_fs_fs__p2l_entry_t *) = entry; + + entry = apr_pcalloc(subpool, sizeof(*entry)); + entry->offset = 106; + entry->size = 1; + entry->type = SVN_FS_FS__ITEM_TYPE_CHANGES; + entry->item.revision = 0; + entry->item.number = SVN_FS_FS__ITEM_INDEX_CHANGES; + APR_ARRAY_PUSH(index_entries, svn_fs_fs__p2l_entry_t *) = entry; + + /* Now re-open r0, create proto-index files from our entries and + rewrite the index section of r0. */ + SVN_ERR(svn_fs_fs__open_pack_or_rev_file_writable(&rev_file, fs, 0, + subpool, subpool)); + SVN_ERR(svn_fs_fs__p2l_index_from_p2l_entries(&p2l_proto_index, fs, + rev_file, index_entries, + subpool, subpool)); + SVN_ERR(svn_fs_fs__l2p_index_from_p2l_entries(&l2p_proto_index, fs, + index_entries, + subpool, subpool)); + SVN_ERR(svn_fs_fs__add_index_data(fs, rev_file->file, l2p_proto_index, + p2l_proto_index, 0, subpool)); + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + } + else SVN_ERR(svn_io_file_create(path_revision_zero, "PLAIN\nEND\nENDREP\n" "id: 0.0.r0/17\n" @@ -1411,17 +1590,18 @@ "text: 0 0 4 4 " "2d2977d1c96f487abe4a1e202dd03b4e\n" "cpath: /\n" - "\n\n17 107\n", scratch_pool)); + "\n\n17 107\n", subpool)); - SVN_ERR(svn_io_set_file_read_only(path_revision_zero, FALSE, scratch_pool)); + SVN_ERR(svn_io_set_file_read_only(path_revision_zero, FALSE, subpool)); /* Set a date on revision 0. */ - date.data = svn_time_to_cstring(apr_time_now(), scratch_pool); + date.data = svn_time_to_cstring(apr_time_now(), subpool); date.len = strlen(date.data); - proplist = apr_hash_make(scratch_pool); + proplist = apr_hash_make(subpool); svn_hash_sets(proplist, SVN_PROP_REVISION_DATE, &date); - SVN_ERR(svn_fs_fs__set_revision_proplist(fs, 0, proplist, scratch_pool)); + SVN_ERR(svn_fs_fs__set_revision_proplist(fs, 0, proplist, subpool)); + svn_pool_destroy(subpool); return SVN_NO_ERROR; } @@ -1474,6 +1654,12 @@ if (format >= SVN_FS_FS__MIN_LAYOUT_FORMAT_OPTION_FORMAT) ffd->max_files_per_dir = SVN_FS_FS_DEFAULT_MAX_FILES_PER_DIR; + /* Select the addressing mode depending on the format. */ + if (format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT) + ffd->min_log_addressing_rev = 0; + else + ffd->min_log_addressing_rev = SVN_INVALID_REVNUM; + /* Create the revision data directories. */ if (ffd->max_files_per_dir) SVN_ERR(svn_io_make_dir_recursively(svn_fs_fs__path_rev_shard(fs, 0, Index: subversion/libsvn_fs_fs/hotcopy.c =================================================================== --- subversion/libsvn_fs_fs/hotcopy.c (revision 1623988) +++ subversion/libsvn_fs_fs/hotcopy.c (working copy) @@ -423,6 +423,15 @@ SVN_ERR(hotcopy_remove_file(dst_subdir_shard, apr_psprintf(iterpool, "%ld", rev), iterpool)); + if (remove_indexes && svn_fs_fs__use_log_addressing(dst_fs, rev)) + { + SVN_ERR(hotcopy_remove_file(dst_subdir_shard, + apr_psprintf(iterpool, "%ld.p2l", rev), + iterpool)); + SVN_ERR(hotcopy_remove_file(dst_subdir_shard, + apr_psprintf(iterpool, "%ld.l2p", rev), + iterpool)); + } } svn_pool_destroy(iterpool); @@ -627,7 +636,7 @@ if (pack_end_rev > dst_youngest) { SVN_ERR(svn_fs_fs__write_current(dst_fs, pack_end_rev, 0, 0, - iterpool)); + iterpool)); } /* When notifying about packed shards, make things simpler by either @@ -711,7 +720,7 @@ { SVN_ERR(svn_fs_fs__write_current(dst_fs, rev, 0, 0, iterpool)); - } + } } if (notify_func && !skipped) @@ -1038,6 +1047,7 @@ dst_fs->path = apr_pstrdup(pool, dst_path); dst_ffd->max_files_per_dir = src_ffd->max_files_per_dir; + dst_ffd->min_log_addressing_rev = src_ffd->min_log_addressing_rev; dst_ffd->format = src_ffd->format; /* Create the revision data directories. */ Index: subversion/libsvn_fs_fs/index.c =================================================================== --- subversion/libsvn_fs_fs/index.c (revision 1623988) +++ subversion/libsvn_fs_fs/index.c (working copy) @@ -40,7 +40,1487 @@ #include "../libsvn_fs/fs-loader.h" +/* maximum length of a uint64 in an 7/8b encoding */ +#define ENCODED_INT_LENGTH 10 + +/* Page tables in the log-to-phys index file exclusively contain entries + * of this type to describe position and size of a given page. + */ +typedef struct l2p_page_table_entry_t +{ + /* global offset on the page within the index file */ + apr_uint64_t offset; + + /* number of mapping entries in that page */ + apr_uint32_t entry_count; + + /* size of the page on disk (in the index file) */ + apr_uint32_t size; +} l2p_page_table_entry_t; + +/* Master run-time data structure of an log-to-phys index. It contains + * the page tables of every revision covered by that index - but not the + * pages themselves. + */ +typedef struct l2p_header_t +{ + /* first revision covered by this index */ + svn_revnum_t first_revision; + + /* number of revisions covered */ + apr_size_t revision_count; + + /* (max) number of entries per page */ + apr_uint32_t page_size; + + /* indexes into PAGE_TABLE that mark the first page of the respective + * revision. PAGE_TABLE_INDEX[REVISION_COUNT] points to the end of + * PAGE_TABLE. + */ + apr_size_t * page_table_index; + + /* Page table covering all pages in the index */ + l2p_page_table_entry_t * page_table; +} l2p_header_t; + +/* Run-time data structure containing a single log-to-phys index page. + */ +typedef struct l2p_page_t +{ + /* number of entries in the OFFSETS array */ + apr_uint32_t entry_count; + + /* global file offsets (item index is the array index) within the + * packed or non-packed rev file. Offset will be -1 for unused / + * invalid item index values. */ + apr_uint64_t *offsets; +} l2p_page_t; + +/* All of the log-to-phys proto index file consist of entries of this type. + */ +typedef struct l2p_proto_entry_t +{ + /* phys offset + 1 of the data container. 0 for "new revision" entries. */ + apr_uint64_t offset; + + /* corresponding item index. 0 for "new revision" entries. */ + apr_uint64_t item_index; +} l2p_proto_entry_t; + +/* Master run-time data structure of an phys-to-log index. It contains + * an array with one offset value for each rev file cluster. + */ +typedef struct p2l_header_t +{ + /* first revision covered by the index (and rev file) */ + svn_revnum_t first_revision; + + /* number of bytes in the rev files covered by each p2l page */ + apr_uint64_t page_size; + + /* number of pages / clusters in that rev file */ + apr_size_t page_count; + + /* number of bytes in the rev file */ + apr_uint64_t file_size; + + /* offsets of the pages / cluster descriptions within the index file */ + apr_off_t *offsets; +} p2l_header_t; + +/* + * packed stream + * + * This is a utility object that will read files containing 7b/8b encoded + * unsigned integers. It decodes them in batches to minimize overhead + * and supports random access to random file locations. + */ + +/* How many numbers we will pre-fetch and buffer in a packed number stream. + */ +enum { MAX_NUMBER_PREFETCH = 64 }; + +/* Prefetched number entry in a packed number stream. + */ +typedef struct value_position_pair_t +{ + /* prefetched number */ + apr_uint64_t value; + + /* number of bytes read, *including* this number, since the buffer start */ + apr_size_t total_len; +} value_position_pair_t; + +/* State of a prefetching packed number stream. It will read compressed + * index data efficiently and present it as a series of non-packed uint64. + */ +struct svn_fs_fs__packed_number_stream_t +{ + /* underlying data file containing the packed values */ + apr_file_t *file; + + /* Offset within FILE at which the stream data starts + * (i.e. which offset will reported as offset 0 by packed_stream_offset). */ + apr_off_t stream_start; + + /* First offset within FILE after the stream data. + * Attempts to read beyond this will cause an "Unexpected End Of Stream" + * error. */ + apr_off_t stream_end; + + /* number of used entries in BUFFER (starting at index 0) */ + apr_size_t used; + + /* index of the next number to read from the BUFFER (0 .. USED). + * If CURRENT == USED, we need to read more data upon get() */ + apr_size_t current; + + /* offset in FILE from which the first entry in BUFFER has been read */ + apr_off_t start_offset; + + /* offset in FILE from which the next number has to be read */ + apr_off_t next_offset; + + /* read the file in chunks of this size */ + apr_size_t block_size; + + /* pool to be used for file ops etc. */ + apr_pool_t *pool; + + /* buffer for prefetched values */ + value_position_pair_t buffer[MAX_NUMBER_PREFETCH]; +}; + +/* Return an svn_error_t * object for error ERR on STREAM with the given + * MESSAGE string. The latter must have a placeholder for the index file + * name ("%s") and the current read offset (e.g. "0x%lx"). + */ +static svn_error_t * +stream_error_create(svn_fs_fs__packed_number_stream_t *stream, + apr_status_t err, + const char *message) +{ + const char *file_name; + apr_off_t offset = 0; + SVN_ERR(svn_io_file_name_get(&file_name, stream->file, + stream->pool)); + SVN_ERR(svn_io_file_seek(stream->file, SEEK_CUR, &offset, stream->pool)); + + return svn_error_createf(err, NULL, message, file_name, + (apr_uint64_t)offset); +} + +/* Read up to MAX_NUMBER_PREFETCH numbers from the STREAM->NEXT_OFFSET in + * STREAM->FILE and buffer them. + * + * We don't want GCC and others to inline this (infrequently called) + * function into packed_stream_get() because it prevents the latter from + * being inlined itself. + */ +SVN__PREVENT_INLINE +static svn_error_t * +packed_stream_read(svn_fs_fs__packed_number_stream_t *stream) +{ + unsigned char buffer[MAX_NUMBER_PREFETCH]; + apr_size_t read = 0; + apr_size_t i; + value_position_pair_t *target; + apr_off_t block_start = 0; + apr_off_t block_left = 0; + apr_status_t err; + + /* all buffered data will have been read starting here */ + stream->start_offset = stream->next_offset; + + /* packed numbers are usually not aligned to MAX_NUMBER_PREFETCH blocks, + * i.e. the last number has been incomplete (and not buffered in stream) + * and need to be re-read. Therefore, always correct the file pointer. + */ + SVN_ERR(svn_io_file_aligned_seek(stream->file, stream->block_size, + &block_start, stream->next_offset, + stream->pool)); + + /* prefetch at least one number but, if feasible, don't cross block + * boundaries. This shall prevent jumping back and forth between two + * blocks because the extra data was not actually request _now_. + */ + read = sizeof(buffer); + block_left = stream->block_size - (stream->next_offset - block_start); + if (block_left >= 10 && block_left < read) + read = block_left; + + /* Don't read beyond the end of the file section that belongs to this + * index / stream. */ + read = MIN(read, stream->stream_end - stream->next_offset); + + err = apr_file_read(stream->file, buffer, &read); + if (err && !APR_STATUS_IS_EOF(err)) + return stream_error_create(stream, err, + _("Can't read index file '%s' at offset 0x%" APR_UINT64_T_HEX_FMT)); + + /* if the last number is incomplete, trim it from the buffer */ + while (read > 0 && buffer[read-1] >= 0x80) + --read; + + /* we call read() only if get() requires more data. So, there must be + * at least *one* further number. */ + if SVN__PREDICT_FALSE(read == 0) + return stream_error_create(stream, err, + _("Unexpected end of index file %s at offset 0x%"APR_UINT64_T_HEX_FMT)); + + /* parse file buffer and expand into stream buffer */ + target = stream->buffer; + for (i = 0; i < read;) + { + if (buffer[i] < 0x80) + { + /* numbers < 128 are relatively frequent and particularly easy + * to decode. Give them special treatment. */ + target->value = buffer[i]; + ++i; + target->total_len = i; + ++target; + } + else + { + apr_uint64_t value = 0; + apr_uint64_t shift = 0; + while (buffer[i] >= 0x80) + { + value += ((apr_uint64_t)buffer[i] & 0x7f) << shift; + shift += 7; + ++i; + } + + target->value = value + ((apr_uint64_t)buffer[i] << shift); + ++i; + target->total_len = i; + ++target; + + /* let's catch corrupted data early. It would surely cause + * havoc further down the line. */ + if SVN__PREDICT_FALSE(shift > 8 * sizeof(value)) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Corrupt index: number too large")); + } + } + + /* update stream state */ + stream->used = target - stream->buffer; + stream->next_offset = stream->start_offset + i; + stream->current = 0; + + return SVN_NO_ERROR; +} + +/* Create and open a packed number stream reading from offsets START to + * END in FILE and return it in *STREAM. Access the file in chunks of + * BLOCK_SIZE bytes. Use POOL for allocations. + */ +static svn_error_t * +packed_stream_open(svn_fs_fs__packed_number_stream_t **stream, + apr_file_t *file, + apr_off_t start, + apr_off_t end, + apr_size_t block_size, + apr_pool_t *pool) +{ + svn_fs_fs__packed_number_stream_t *result + = apr_palloc(pool, sizeof(*result)); + + result->pool = pool; + result->file = file; + result->stream_start = start; + result->stream_end = end; + + result->used = 0; + result->current = 0; + result->start_offset = result->stream_start; + result->next_offset = result->stream_start; + result->block_size = block_size; + + *stream = result; + + return SVN_NO_ERROR; +} + +/* + * The forced inline is required for performance reasons: This is a very + * hot code path (called for every item we read) but e.g. GCC would rather + * chose to inline packed_stream_read() here, preventing packed_stream_get + * from being inlined itself. + */ +SVN__FORCE_INLINE +static svn_error_t* +packed_stream_get(apr_uint64_t *value, + svn_fs_fs__packed_number_stream_t *stream) +{ + if (stream->current == stream->used) + SVN_ERR(packed_stream_read(stream)); + + *value = stream->buffer[stream->current].value; + ++stream->current; + + return SVN_NO_ERROR; +} + +/* Navigate STREAM to packed stream offset OFFSET. There will be no checks + * whether the given OFFSET is valid. + */ +static void +packed_stream_seek(svn_fs_fs__packed_number_stream_t *stream, + apr_off_t offset) +{ + apr_off_t file_offset = offset + stream->stream_start; + + if ( stream->used == 0 + || offset < stream->start_offset + || offset >= stream->next_offset) + { + /* outside buffered data. Next get() will read() from OFFSET. */ + stream->start_offset = file_offset; + stream->next_offset = file_offset; + stream->current = 0; + stream->used = 0; + } + else + { + /* Find the suitable location in the stream buffer. + * Since our buffer is small, it is efficient enough to simply scan + * it for the desired position. */ + apr_size_t i; + for (i = 0; i < stream->used; ++i) + if (stream->buffer[i].total_len > file_offset - stream->start_offset) + break; + + stream->current = i; + } +} + +/* Return the packed stream offset of at which the next number in the stream + * can be found. + */ +static apr_off_t +packed_stream_offset(svn_fs_fs__packed_number_stream_t *stream) +{ + apr_off_t file_offset + = stream->current == 0 + ? stream->start_offset + : stream->buffer[stream->current-1].total_len + stream->start_offset; + + return file_offset - stream->stream_start; +} + +/* Encode VALUE as 7/8b into P and return the number of bytes written. + * This will be used when _writing_ packed data. packed_stream_* is for + * read operations only. + */ +static apr_size_t +encode_uint(unsigned char *p, apr_uint64_t value) +{ + unsigned char *start = p; + while (value >= 0x80) + { + *p = (unsigned char)((value % 0x80) + 0x80); + value /= 0x80; + ++p; + } + + *p = (unsigned char)(value % 0x80); + return (p - start) + 1; +} + +/* Encode VALUE as 7/8b into P and return the number of bytes written. + * This maps signed ints onto unsigned ones. + */ +static apr_size_t +encode_int(unsigned char *p, apr_int64_t value) +{ + return encode_uint(p, (apr_uint64_t)(value < 0 ? -1 - 2*value : 2*value)); +} + +/* Map unsigned VALUE back to signed integer. + */ +static apr_int64_t +decode_int(apr_uint64_t value) +{ + return (apr_int64_t)(value % 2 ? -1 - value / 2 : value / 2); +} + +/* + * log-to-phys index + */ + +/* Write ENTRY to log-to-phys PROTO_INDEX file and verify the results. + * Use POOL for allocations. + */ +static svn_error_t * +write_entry_to_proto_index(apr_file_t *proto_index, + l2p_proto_entry_t entry, + apr_pool_t *pool) +{ + apr_size_t written = sizeof(entry); + + SVN_ERR(svn_io_file_write(proto_index, &entry, &written, pool)); + SVN_ERR_ASSERT(written == sizeof(entry)); + + return SVN_NO_ERROR; +} + +/* Write the log-2-phys index page description for the l2p_page_entry_t + * array ENTRIES, starting with element START up to but not including END. + * Write the resulting representation into BUFFER. Use POOL for temporary + * allocations. + */ +static svn_error_t * +encode_l2p_page(apr_array_header_t *entries, + int start, + int end, + svn_spillbuf_t *buffer, + apr_pool_t *pool) +{ + unsigned char encoded[ENCODED_INT_LENGTH]; + int i; + const apr_uint64_t *values = (const apr_uint64_t *)entries->elts; + apr_uint64_t last_value = 0; + + /* encode items */ + for (i = start; i < end; ++i) + { + apr_int64_t diff = values[i] - last_value; + last_value = values[i]; + SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, + encode_int(encoded, diff), pool)); + } + + return SVN_NO_ERROR; +} + svn_error_t * +svn_fs_fs__l2p_proto_index_open(apr_file_t **proto_index, + const char *file_name, + apr_pool_t *pool) +{ + SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE + | APR_CREATE | APR_APPEND | APR_BUFFERED, + APR_OS_DEFAULT, pool)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__l2p_proto_index_add_revision(apr_file_t *proto_index, + apr_pool_t *pool) +{ + l2p_proto_entry_t entry; + entry.offset = 0; + entry.item_index = 0; + + return svn_error_trace(write_entry_to_proto_index(proto_index, entry, + pool)); +} + +svn_error_t * +svn_fs_fs__l2p_proto_index_add_entry(apr_file_t *proto_index, + apr_off_t offset, + apr_uint64_t item_index, + apr_pool_t *pool) +{ + l2p_proto_entry_t entry; + + /* make sure the conversion to uint64 works */ + SVN_ERR_ASSERT(offset >= -1); + + /* we support offset '-1' as a "not used" indication */ + entry.offset = (apr_uint64_t)offset + 1; + + /* make sure we can use item_index as an array index when building the + * final index file */ + SVN_ERR_ASSERT(item_index < UINT_MAX / 2); + entry.item_index = item_index; + + return svn_error_trace(write_entry_to_proto_index(proto_index, entry, + pool)); +} + +svn_error_t * +svn_fs_fs__l2p_index_append(svn_fs_t *fs, + apr_file_t *index_file, + const char *proto_file_name, + svn_revnum_t revision, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_file_t *proto_index = NULL; + int i; + apr_uint64_t entry; + svn_boolean_t eof = FALSE; + unsigned char encoded[ENCODED_INT_LENGTH]; + + int last_page_count = 0; /* total page count at the start of + the current revision */ + + /* temporary data structures that collect the data which will be moved + to the target file in a second step */ + apr_pool_t *local_pool = svn_pool_create(pool); + apr_pool_t *iterpool = svn_pool_create(local_pool); + apr_array_header_t *page_counts + = apr_array_make(local_pool, 16, sizeof(apr_uint64_t)); + apr_array_header_t *page_sizes + = apr_array_make(local_pool, 16, sizeof(apr_uint64_t)); + apr_array_header_t *entry_counts + = apr_array_make(local_pool, 16, sizeof(apr_uint64_t)); + + /* collect the item offsets and sub-item value for the current revision */ + apr_array_header_t *entries + = apr_array_make(local_pool, 256, sizeof(apr_uint64_t)); + + /* 64k blocks, spill after 16MB */ + svn_spillbuf_t *buffer + = svn_spillbuf__create(0x10000, 0x1000000, local_pool); + + /* Paranoia check that makes later casting to int32 safe. + * The current implementation is limited to 2G entries per page. */ + if (ffd->l2p_page_size > APR_INT32_MAX) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_OVERFLOW , NULL, + _("L2P index page size %s" + " exceeds current limit of 2G entries"), + apr_psprintf(local_pool, "%" APR_UINT64_T_FMT, + ffd->l2p_page_size)); + + /* start at the beginning of the source file */ + SVN_ERR(svn_io_file_open(&proto_index, proto_file_name, + APR_READ | APR_CREATE | APR_BUFFERED, + APR_OS_DEFAULT, pool)); + + /* process all entries until we fail due to EOF */ + for (entry = 0; !eof; ++entry) + { + l2p_proto_entry_t proto_entry; + apr_size_t read = 0; + + /* (attempt to) read the next entry from the source */ + SVN_ERR(svn_io_file_read_full2(proto_index, + &proto_entry, sizeof(proto_entry), + &read, &eof, local_pool)); + SVN_ERR_ASSERT(eof || read == sizeof(proto_entry)); + + /* handle new revision */ + if ((entry > 0 && proto_entry.offset == 0) || eof) + { + /* dump entries, grouped into pages */ + + int entry_count = 0; + for (i = 0; i < entries->nelts; i += entry_count) + { + /* 1 page with up to 8k entries */ + apr_size_t last_buffer_size = svn_spillbuf__get_size(buffer); + + svn_pool_clear(iterpool); + + entry_count = ffd->l2p_page_size < entries->nelts - i + ? (int)ffd->l2p_page_size + : entries->nelts - i; + SVN_ERR(encode_l2p_page(entries, i, i + entry_count, + buffer, iterpool)); + + APR_ARRAY_PUSH(entry_counts, apr_uint64_t) = entry_count; + APR_ARRAY_PUSH(page_sizes, apr_uint64_t) + = svn_spillbuf__get_size(buffer) - last_buffer_size; + } + + apr_array_clear(entries); + + /* store the number of pages in this revision */ + APR_ARRAY_PUSH(page_counts, apr_uint64_t) + = page_sizes->nelts - last_page_count; + + last_page_count = page_sizes->nelts; + } + else + { + int idx; + + /* store the mapping in our array */ + if (proto_entry.item_index > APR_INT32_MAX) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_OVERFLOW , NULL, + _("Item index %s too large " + "in l2p proto index for revision %ld"), + apr_psprintf(local_pool, "%" APR_UINT64_T_FMT, + proto_entry.item_index), + revision + page_counts->nelts); + + idx = (int)proto_entry.item_index; + while (idx >= entries->nelts) + APR_ARRAY_PUSH(entries, apr_uint64_t) = 0; + + APR_ARRAY_IDX(entries, idx, apr_uint64_t) = proto_entry.offset; + } + } + + /* close the source file */ + SVN_ERR(svn_io_file_close(proto_index, local_pool)); + + /* Paranoia check that makes later casting to int32 safe. + * The current implementation is limited to 2G pages per index. */ + if (page_counts->nelts > APR_INT32_MAX) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_OVERFLOW , NULL, + _("L2P index page count %d" + " exceeds current limit of 2G pages"), + page_counts->nelts); + + /* write header info */ + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, revision), + NULL, local_pool)); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, ffd->l2p_page_size), + NULL, local_pool)); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, page_counts->nelts), + NULL, local_pool)); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, page_sizes->nelts), + NULL, local_pool)); + + /* write the revision table */ + for (i = 0; i < page_counts->nelts; ++i) + { + apr_uint64_t value = APR_ARRAY_IDX(page_counts, i, apr_uint64_t); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, value), + NULL, local_pool)); + } + + /* write the page table */ + for (i = 0; i < page_sizes->nelts; ++i) + { + apr_uint64_t value = APR_ARRAY_IDX(page_sizes, i, apr_uint64_t); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, value), + NULL, local_pool)); + value = APR_ARRAY_IDX(entry_counts, i, apr_uint64_t); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, value), + NULL, local_pool)); + } + + /* append page contents */ + SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool), + svn_stream_from_aprfile2(index_file, TRUE, + local_pool), + NULL, NULL, local_pool)); + + svn_pool_destroy(local_pool); + + return SVN_NO_ERROR; +} + +/* If REV_FILE->L2P_STREAM is NULL, create a new stream for the log-to-phys + * index for REVISION in FS and return it in REV_FILE. + */ +static svn_error_t * +auto_open_l2p_index(svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t revision) +{ + if (rev_file->l2p_stream == NULL) + { + fs_fs_data_t *ffd = fs->fsap_data; + + SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); + SVN_ERR(packed_stream_open(&rev_file->l2p_stream, + rev_file->file, + rev_file->l2p_offset, + rev_file->p2l_offset, + ffd->block_size, + rev_file->pool)); + } + + return SVN_NO_ERROR; +} + +/* Read the header data structure of the log-to-phys index for REVISION + * in FS and return it in *HEADER. Use REV_FILE to access on-disk data. + * Use POOL for allocations. + */ +static svn_error_t * +get_l2p_header_body(l2p_header_t **header, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t revision, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_uint64_t value; + int i; + apr_size_t page, page_count; + apr_off_t offset; + l2p_header_t *result = apr_pcalloc(pool, sizeof(*result)); + apr_size_t page_table_index; + + pair_cache_key_t key; + key.revision = rev_file->start_revision; + key.second = rev_file->is_packed; + + SVN_ERR(auto_open_l2p_index(rev_file, fs, revision)); + packed_stream_seek(rev_file->l2p_stream, 0); + + /* Read the table sizes. Check the data for plausibility and + * consistency with other bits. */ + SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); + result->first_revision = (svn_revnum_t)value; + if (result->first_revision != rev_file->start_revision) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Index rev / pack file revision numbers do not match")); + + SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); + result->page_size = (apr_uint32_t)value; + if (!result->page_size || (result->page_size & (result->page_size - 1))) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("L2P index page size is not a power of two")); + + SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); + result->revision_count = (int)value; + if ( result->revision_count != 1 + && result->revision_count != ffd->max_files_per_dir) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Invalid number of revisions in L2P index")); + + SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); + page_count = (apr_size_t)value; + if (page_count < result->revision_count) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Fewer L2P index pages than revisions")); + if (page_count > (rev_file->p2l_offset - rev_file->l2p_offset) / 2) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("L2P index page count implausibly large")); + + if ( result->first_revision > revision + || result->first_revision + result->revision_count <= revision) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Corrupt L2P index for r%ld only covers r%ld:%ld"), + revision, result->first_revision, + result->first_revision + result->revision_count); + + /* allocate the page tables */ + result->page_table + = apr_pcalloc(pool, page_count * sizeof(*result->page_table)); + result->page_table_index + = apr_pcalloc(pool, (result->revision_count + 1) + * sizeof(*result->page_table_index)); + + /* read per-revision page table sizes (i.e. number of pages per rev) */ + page_table_index = 0; + result->page_table_index[0] = page_table_index; + + for (i = 0; i < result->revision_count; ++i) + { + SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); + if (value == 0) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Revision with no L2P index pages")); + + page_table_index += (apr_size_t)value; + if (page_table_index > page_count) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("L2P page table exceeded")); + + result->page_table_index[i+1] = page_table_index; + } + + if (page_table_index != page_count) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Revisions do not cover the full L2P index page table")); + + /* read actual page tables */ + for (page = 0; page < page_count; ++page) + { + SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); + if (value == 0) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Empty L2P index page")); + + result->page_table[page].size = (apr_uint32_t)value; + SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); + if (value > result->page_size) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Page exceeds L2P index page size")); + + result->page_table[page].entry_count = (apr_uint32_t)value; + } + + /* correct the page description offsets */ + offset = packed_stream_offset(rev_file->l2p_stream); + for (page = 0; page < page_count; ++page) + { + result->page_table[page].offset = offset; + offset += result->page_table[page].size; + } + + /* return and cache the header */ + *header = result; + SVN_ERR(svn_cache__set(ffd->l2p_header_cache, &key, result, pool)); + + return SVN_NO_ERROR; +} + +/* Data structure that describes which l2p page info shall be extracted + * from the cache and contains the fields that receive the result. + */ +typedef struct l2p_page_info_baton_t +{ + /* input data: we want the page covering (REVISION,ITEM_INDEX) */ + svn_revnum_t revision; + apr_uint64_t item_index; + + /* out data */ + /* page location and size of the page within the l2p index file */ + l2p_page_table_entry_t entry; + + /* page number within the pages for REVISION (not l2p index global!) */ + apr_uint32_t page_no; + + /* offset of ITEM_INDEX within that page */ + apr_uint32_t page_offset; + + /* revision identifying the l2p index file, also the first rev in that */ + svn_revnum_t first_revision; +} l2p_page_info_baton_t; + + +/* Utility function that copies the info requested by BATON->REVISION and + * BATON->ITEM_INDEX and from HEADER and PAGE_TABLE into the output fields + * of *BATON. Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +l2p_page_info_copy(l2p_page_info_baton_t *baton, + const l2p_header_t *header, + const l2p_page_table_entry_t *page_table, + const apr_size_t *page_table_index, + apr_pool_t *scratch_pool) +{ + /* revision offset within the index file */ + apr_size_t rel_revision = baton->revision - header->first_revision; + if (rel_revision >= header->revision_count) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_REVISION , NULL, + _("Revision %ld not covered by item index"), + baton->revision); + + /* select the relevant page */ + if (baton->item_index < header->page_size) + { + /* most revs fit well into a single page */ + baton->page_offset = (apr_uint32_t)baton->item_index; + baton->page_no = 0; + baton->entry = page_table[page_table_index[rel_revision]]; + } + else + { + const l2p_page_table_entry_t *first_entry; + const l2p_page_table_entry_t *last_entry; + apr_uint64_t max_item_index; + + /* range of pages for this rev */ + first_entry = page_table + page_table_index[rel_revision]; + last_entry = page_table + page_table_index[rel_revision + 1]; + + /* do we hit a valid index page? */ + max_item_index = (apr_uint64_t)header->page_size + * (last_entry - first_entry); + if (baton->item_index >= max_item_index) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_OVERFLOW , NULL, + _("Item index %s exceeds l2p limit " + "of %s for revision %ld"), + apr_psprintf(scratch_pool, + "%" APR_UINT64_T_FMT, + baton->item_index), + apr_psprintf(scratch_pool, + "%" APR_UINT64_T_FMT, + max_item_index), + baton->revision); + + /* all pages are of the same size and full, except for the last one */ + baton->page_offset = (apr_uint32_t)(baton->item_index % header->page_size); + baton->page_no = (apr_uint32_t)(baton->item_index / header->page_size); + baton->entry = first_entry[baton->page_no]; + } + + baton->first_revision = header->first_revision; + + return SVN_NO_ERROR; +} + +/* Implement svn_cache__partial_getter_func_t: copy the data requested in + * l2p_page_info_baton_t *BATON from l2p_header_t *DATA into the output + * fields in *BATON. + */ +static svn_error_t * +l2p_page_info_access_func(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *result_pool) +{ + /* resolve all pointer values of in-cache data */ + const l2p_header_t *header = data; + const l2p_page_table_entry_t *page_table + = svn_temp_deserializer__ptr(header, + (const void *const *)&header->page_table); + const apr_size_t *page_table_index + = svn_temp_deserializer__ptr(header, + (const void *const *)&header->page_table_index); + + /* copy the info */ + return l2p_page_info_copy(baton, header, page_table, page_table_index, + result_pool); +} + +/* Get the page info requested in *BATON from FS and set the output fields + * in *BATON. Use REV_FILE for on-disk file access. + * Use POOL for allocations. + */ +static svn_error_t * +get_l2p_page_info(l2p_page_info_baton_t *baton, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + l2p_header_t *result; + svn_boolean_t is_cached = FALSE; + void *dummy = NULL; + + /* try to find the info in the cache */ + pair_cache_key_t key; + key.revision = rev_file->start_revision; + key.second = rev_file->is_packed; + SVN_ERR(svn_cache__get_partial((void**)&dummy, &is_cached, + ffd->l2p_header_cache, &key, + l2p_page_info_access_func, baton, + pool)); + if (is_cached) + return SVN_NO_ERROR; + + /* read from disk, cache and copy the result */ + SVN_ERR(get_l2p_header_body(&result, rev_file, fs, baton->revision, pool)); + SVN_ERR(l2p_page_info_copy(baton, result, result->page_table, + result->page_table_index, pool)); + + return SVN_NO_ERROR; +} + +/* Data request structure used by l2p_page_table_access_func. + */ +typedef struct l2p_page_table_baton_t +{ + /* revision for which to read the page table */ + svn_revnum_t revision; + + /* page table entries (of type l2p_page_table_entry_t). + * Must be created by caller and will be filled by callee. */ + apr_array_header_t *pages; +} l2p_page_table_baton_t; + +/* Implement svn_cache__partial_getter_func_t: copy the data requested in + * l2p_page_baton_t *BATON from l2p_page_t *DATA into BATON->PAGES and *OUT. + */ +static svn_error_t * +l2p_page_table_access_func(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *result_pool) +{ + /* resolve in-cache pointers */ + l2p_page_table_baton_t *table_baton = baton; + const l2p_header_t *header = (const l2p_header_t *)data; + const l2p_page_table_entry_t *page_table + = svn_temp_deserializer__ptr(header, + (const void *const *)&header->page_table); + const apr_size_t *page_table_index + = svn_temp_deserializer__ptr(header, + (const void *const *)&header->page_table_index); + + /* copy the revision's page table into BATON */ + apr_size_t rel_revision = table_baton->revision - header->first_revision; + if (rel_revision < header->revision_count) + { + const l2p_page_table_entry_t *entry + = page_table + page_table_index[rel_revision]; + const l2p_page_table_entry_t *last_entry + = page_table + page_table_index[rel_revision + 1]; + + for (; entry < last_entry; ++entry) + APR_ARRAY_PUSH(table_baton->pages, l2p_page_table_entry_t) + = *entry; + } + + /* set output as a courtesy to the caller */ + *out = table_baton->pages; + + return SVN_NO_ERROR; +} + +/* Read the l2p index page table for REVISION in FS from cache and return + * it in PAGES. The later must be provided by the caller (and can be + * re-used); existing entries will be removed before writing the result. + * If the data cannot be found in the cache, the result will be empty + * (it never can be empty for a valid REVISION if the data is cached). + * Use the info from REV_FILE to determine pack / rev file properties. + * Use POOL for temporary allocations. + */ +static svn_error_t * +get_l2p_page_table(apr_array_header_t *pages, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_boolean_t is_cached = FALSE; + l2p_page_table_baton_t baton; + + pair_cache_key_t key; + key.revision = rev_file->start_revision; + key.second = rev_file->is_packed; + + apr_array_clear(pages); + baton.revision = revision; + baton.pages = pages; + SVN_ERR(svn_cache__get_partial((void**)&pages, &is_cached, + ffd->l2p_header_cache, &key, + l2p_page_table_access_func, &baton, pool)); + + return SVN_NO_ERROR; +} + +/* From the log-to-phys index file starting at START_REVISION in FS, read + * the mapping page identified by TABLE_ENTRY and return it in *PAGE. + * Use REV_FILE to access on-disk files. + * Use POOL for allocations. + */ +static svn_error_t * +get_l2p_page(l2p_page_t **page, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t start_revision, + l2p_page_table_entry_t *table_entry, + apr_pool_t *pool) +{ + apr_uint32_t i; + l2p_page_t *result = apr_pcalloc(pool, sizeof(*result)); + apr_uint64_t last_value = 0; + + /* open index file and select page */ + SVN_ERR(auto_open_l2p_index(rev_file, fs, start_revision)); + packed_stream_seek(rev_file->l2p_stream, table_entry->offset); + + /* initialize the page content */ + result->entry_count = table_entry->entry_count; + result->offsets = apr_pcalloc(pool, result->entry_count + * sizeof(*result->offsets)); + + /* read all page entries (offsets in rev file and container sub-items) */ + for (i = 0; i < result->entry_count; ++i) + { + apr_uint64_t value = 0; + SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); + last_value += decode_int(value); + result->offsets[i] = last_value - 1; + } + + *page = result; + + return SVN_NO_ERROR; +} + +/* Utility function. Read the l2p index pages for REVISION in FS from + * REV_FILE and put them into the cache. Skip page number EXLCUDED_PAGE_NO + * (use -1 for 'skip none') and pages outside the MIN_OFFSET, MAX_OFFSET + * range in the l2p index file. The index is being identified by + * FIRST_REVISION. PAGES is a scratch container provided by the caller. + * SCRATCH_POOL is used for temporary allocations. + * + * This function may be a no-op if the header cache lookup fails / misses. + */ +static svn_error_t * +prefetch_l2p_pages(svn_boolean_t *end, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t first_revision, + svn_revnum_t revision, + apr_array_header_t *pages, + int exlcuded_page_no, + apr_off_t min_offset, + apr_off_t max_offset, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + int i; + apr_pool_t *iterpool; + svn_fs_fs__page_cache_key_t key = { 0 }; + + /* get the page table for REVISION from cache */ + *end = FALSE; + SVN_ERR(get_l2p_page_table(pages, fs, rev_file, revision, scratch_pool)); + if (pages->nelts == 0 || rev_file->l2p_stream == NULL) + { + /* not found -> we can't continue without hitting the disk again */ + *end = TRUE; + return SVN_NO_ERROR; + } + + /* prefetch pages individually until all are done or we found one in + * the cache */ + iterpool = svn_pool_create(scratch_pool); + assert(revision <= APR_UINT32_MAX); + key.revision = (apr_uint32_t)revision; + key.is_packed = rev_file->is_packed; + + for (i = 0; i < pages->nelts && !*end; ++i) + { + svn_boolean_t is_cached; + + l2p_page_table_entry_t *entry + = &APR_ARRAY_IDX(pages, i, l2p_page_table_entry_t); + svn_pool_clear(iterpool); + + if (i == exlcuded_page_no) + continue; + + /* skip pages outside the specified index file range */ + if ( entry->offset < min_offset + || entry->offset + entry->size > max_offset) + { + *end = TRUE; + continue; + } + + /* page already in cache? */ + key.page = i; + SVN_ERR(svn_cache__has_key(&is_cached, ffd->l2p_page_cache, + &key, iterpool)); + if (!is_cached) + { + /* no in cache -> read from stream (data already buffered in APR) + * and cache the result */ + l2p_page_t *page = NULL; + SVN_ERR(get_l2p_page(&page, rev_file, fs, first_revision, entry, + iterpool)); + + SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page, + iterpool)); + } + } + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Request data structure for l2p_entry_access_func. + */ +typedef struct l2p_entry_baton_t +{ + /* in data */ + /* revision. Used for error messages only */ + svn_revnum_t revision; + + /* item index to look up. Used for error messages only */ + apr_uint64_t item_index; + + /* offset within the cached page */ + apr_uint32_t page_offset; + + /* out data */ + /* absolute item or container offset in rev / pack file */ + apr_uint64_t offset; +} l2p_entry_baton_t; + +/* Return the rev / pack file offset of the item at BATON->PAGE_OFFSET in + * OFFSETS of PAGE and write it to *OFFSET. + */ +static svn_error_t * +l2p_page_get_entry(l2p_entry_baton_t *baton, + const l2p_page_t *page, + const apr_uint64_t *offsets, + apr_pool_t *scratch_pool) +{ + /* overflow check */ + if (page->entry_count <= baton->page_offset) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_OVERFLOW , NULL, + _("Item index %s" + " too large in revision %ld"), + apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT, + baton->item_index), + baton->revision); + + /* return the result */ + baton->offset = offsets[baton->page_offset]; + + return SVN_NO_ERROR; +} + +/* Implement svn_cache__partial_getter_func_t: copy the data requested in + * l2p_entry_baton_t *BATON from l2p_page_t *DATA into BATON->OFFSET. + * *OUT remains unchanged. + */ +static svn_error_t * +l2p_entry_access_func(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *result_pool) +{ + /* resolve all in-cache pointers */ + const l2p_page_t *page = data; + const apr_uint64_t *offsets + = svn_temp_deserializer__ptr(page, (const void *const *)&page->offsets); + + /* return the requested data */ + return l2p_page_get_entry(baton, page, offsets, result_pool); +} + +/* Using the log-to-phys indexes in FS, find the absolute offset in the + * rev file for (REVISION, ITEM_INDEX) and return it in *OFFSET. + * Use POOL for allocations. + */ +static svn_error_t * +l2p_index_lookup(apr_off_t *offset, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_uint64_t item_index, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + l2p_page_info_baton_t info_baton; + l2p_entry_baton_t page_baton; + l2p_page_t *page = NULL; + svn_fs_fs__page_cache_key_t key = { 0 }; + svn_boolean_t is_cached = FALSE; + void *dummy = NULL; + + /* read index master data structure and extract the info required to + * access the l2p index page for (REVISION,ITEM_INDEX)*/ + info_baton.revision = revision; + info_baton.item_index = item_index; + SVN_ERR(get_l2p_page_info(&info_baton, rev_file, fs, pool)); + + /* try to find the page in the cache and get the OFFSET from it */ + page_baton.revision = revision; + page_baton.item_index = item_index; + page_baton.page_offset = info_baton.page_offset; + + assert(revision <= APR_UINT32_MAX); + key.revision = (apr_uint32_t)revision; + key.is_packed = svn_fs_fs__is_packed_rev(fs, revision); + key.page = info_baton.page_no; + + SVN_ERR(svn_cache__get_partial(&dummy, &is_cached, + ffd->l2p_page_cache, &key, + l2p_entry_access_func, &page_baton, pool)); + + if (!is_cached) + { + /* we need to read the info from disk (might already be in the + * APR file buffer, though) */ + apr_array_header_t *pages; + svn_revnum_t prefetch_revision; + svn_revnum_t last_revision + = info_baton.first_revision + + (key.is_packed ? ffd->max_files_per_dir : 1); + svn_boolean_t end; + apr_off_t max_offset + = APR_ALIGN(info_baton.entry.offset + info_baton.entry.size, + ffd->block_size); + apr_off_t min_offset = max_offset - ffd->block_size; + + /* read the relevant page */ + SVN_ERR(get_l2p_page(&page, rev_file, fs, info_baton.first_revision, + &info_baton.entry, pool)); + + /* cache the page and extract the result we need */ + SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page, pool)); + SVN_ERR(l2p_page_get_entry(&page_baton, page, page->offsets, pool)); + + if (ffd->use_block_read) + { + apr_pool_t *iterpool = svn_pool_create(pool); + + /* prefetch pages from following and preceding revisions */ + pages = apr_array_make(pool, 16, sizeof(l2p_page_table_entry_t)); + end = FALSE; + for (prefetch_revision = revision; + prefetch_revision < last_revision && !end; + ++prefetch_revision) + { + int excluded_page_no = prefetch_revision == revision + ? info_baton.page_no + : -1; + svn_pool_clear(iterpool); + + SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file, + info_baton.first_revision, + prefetch_revision, pages, + excluded_page_no, min_offset, + max_offset, iterpool)); + } + + end = FALSE; + for (prefetch_revision = revision-1; + prefetch_revision >= info_baton.first_revision && !end; + --prefetch_revision) + { + svn_pool_clear(iterpool); + + SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file, + info_baton.first_revision, + prefetch_revision, pages, -1, + min_offset, max_offset, iterpool)); + } + + svn_pool_destroy(iterpool); + } + } + + *offset = page_baton.offset; + + return SVN_NO_ERROR; +} + +/* Using the log-to-phys proto index in transaction TXN_ID in FS, find the + * absolute offset in the proto rev file for the given ITEM_INDEX and return + * it in *OFFSET. Use POOL for allocations. + */ +static svn_error_t * +l2p_proto_index_lookup(apr_off_t *offset, + svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + apr_uint64_t item_index, + apr_pool_t *pool) +{ + svn_boolean_t eof = FALSE; + apr_file_t *file = NULL; + SVN_ERR(svn_io_file_open(&file, + svn_fs_fs__path_l2p_proto_index(fs, txn_id, pool), + APR_READ | APR_BUFFERED, APR_OS_DEFAULT, pool)); + + /* process all entries until we fail due to EOF */ + *offset = -1; + while (!eof) + { + l2p_proto_entry_t entry; + apr_size_t read = 0; + + /* (attempt to) read the next entry from the source */ + SVN_ERR(svn_io_file_read_full2(file, &entry, sizeof(entry), + &read, &eof, pool)); + SVN_ERR_ASSERT(eof || read == sizeof(entry)); + + /* handle new revision */ + if (!eof && entry.item_index == item_index) + { + *offset = (apr_off_t)entry.offset - 1; + break; + } + } + + SVN_ERR(svn_io_file_close(file, pool)); + + return SVN_NO_ERROR; +} + +/* Read the log-to-phys header info of the index covering REVISION from FS + * and return it in *HEADER. REV_FILE provides the pack / rev status. + * Use POOL for allocations. + */ +static svn_error_t * +get_l2p_header(l2p_header_t **header, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t revision, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_boolean_t is_cached = FALSE; + + /* first, try cache lookop */ + pair_cache_key_t key; + key.revision = rev_file->start_revision; + key.second = rev_file->is_packed; + SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->l2p_header_cache, + &key, pool)); + if (is_cached) + return SVN_NO_ERROR; + + /* read from disk and cache the result */ + SVN_ERR(get_l2p_header_body(header, rev_file, fs, revision, pool)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids, + svn_fs_t *fs, + svn_revnum_t start_rev, + apr_size_t count, + apr_pool_t *pool) +{ + l2p_header_t *header = NULL; + svn_revnum_t revision; + svn_revnum_t last_rev = (svn_revnum_t)(start_rev + count); + svn_fs_fs__revision_file_t *rev_file; + apr_pool_t *header_pool = svn_pool_create(pool); + + /* read index master data structure for the index covering START_REV */ + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start_rev, + header_pool, header_pool)); + SVN_ERR(get_l2p_header(&header, rev_file, fs, start_rev, header_pool)); + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + + /* Determine the length of the item index list for each rev. + * Read new index headers as required. */ + *max_ids = apr_array_make(pool, (int)count, sizeof(apr_uint64_t)); + for (revision = start_rev; revision < last_rev; ++revision) + { + apr_uint64_t full_page_count; + apr_uint64_t item_count; + apr_size_t first_page_index, last_page_index; + + if (revision >= header->first_revision + header->revision_count) + { + /* need to read the next index. Clear up memory used for the + * previous one. Note that intermittent pack runs do not change + * the number of items in a revision, i.e. there is no consistency + * issue here. */ + svn_pool_clear(header_pool); + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision, + header_pool, header_pool)); + SVN_ERR(get_l2p_header(&header, rev_file, fs, revision, + header_pool)); + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + } + + /* in a revision with N index pages, the first N-1 index pages are + * "full", i.e. contain HEADER->PAGE_SIZE entries */ + first_page_index + = header->page_table_index[revision - header->first_revision]; + last_page_index + = header->page_table_index[revision - header->first_revision + 1]; + full_page_count = last_page_index - first_page_index - 1; + item_count = full_page_count * header->page_size + + header->page_table[last_page_index - 1].entry_count; + + APR_ARRAY_PUSH(*max_ids, apr_uint64_t) = item_count; + } + + svn_pool_destroy(header_pool); + return SVN_NO_ERROR; +} + +svn_error_t * svn_fs_fs__item_offset(apr_off_t *absolute_position, svn_fs_t *fs, svn_fs_fs__revision_file_t *rev_file, @@ -52,9 +1532,25 @@ svn_error_t *err = SVN_NO_ERROR; if (txn_id) { + if (svn_fs_fs__use_log_addressing(fs, txn_id->revision + 1)) + { + /* the txn is going to produce a rev with logical addressing. + So, we need to get our info from the (proto) index file. */ + SVN_ERR(l2p_proto_index_lookup(absolute_position, fs, txn_id, + item_index, pool)); + } + else + { /* for data in txns, item_index *is* the offset */ *absolute_position = item_index; } + } + else if (svn_fs_fs__use_log_addressing(fs, revision)) + { + /* ordinary index lookup */ + SVN_ERR(l2p_index_lookup(absolute_position, fs, rev_file, revision, + item_index, pool)); + } else if (rev_file->is_packed) { /* pack file with physical addressing */ @@ -71,3 +1567,1517 @@ return svn_error_trace(err); } + +/* + * phys-to-log index + */ +svn_error_t * +svn_fs_fs__p2l_proto_index_open(apr_file_t **proto_index, + const char *file_name, + apr_pool_t *pool) +{ + SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE + | APR_CREATE | APR_APPEND | APR_BUFFERED, + APR_OS_DEFAULT, pool)); + + return SVN_NO_ERROR; +} + + +svn_error_t * +svn_fs_fs__p2l_proto_index_add_entry(apr_file_t *proto_index, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool) +{ + apr_size_t written = sizeof(*entry); + + SVN_ERR(svn_io_file_write_full(proto_index, entry, sizeof(*entry), + &written, pool)); + SVN_ERR_ASSERT(written == sizeof(*entry)); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__p2l_proto_index_next_offset(apr_off_t *next_offset, + apr_file_t *proto_index, + apr_pool_t *pool) +{ + apr_off_t offset = 0; + + /* Empty index file? */ + SVN_ERR(svn_io_file_seek(proto_index, APR_END, &offset, pool)); + if (offset == 0) + { + *next_offset = 0; + } + else + { + /* At least one entry. Read last entry. */ + svn_fs_fs__p2l_entry_t entry; + offset -= sizeof(entry); + + SVN_ERR(svn_io_file_seek(proto_index, APR_SET, &offset, pool)); + SVN_ERR(svn_io_file_read_full2(proto_index, &entry, sizeof(entry), + NULL, NULL, pool)); + + /* Return next offset. */ + *next_offset = entry.offset + entry.size; + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__p2l_index_append(svn_fs_t *fs, + apr_file_t *index_file, + const char *proto_file_name, + svn_revnum_t revision, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_uint64_t page_size = ffd->p2l_page_size; + apr_file_t *proto_index = NULL; + int i; + svn_boolean_t eof = FALSE; + unsigned char encoded[ENCODED_INT_LENGTH]; + svn_revnum_t last_revision = revision; + apr_uint64_t last_compound = 0; + + apr_uint64_t last_entry_end = 0; + apr_uint64_t last_page_end = 0; + apr_size_t last_buffer_size = 0; /* byte offset in the spill buffer at + the begin of the current revision */ + apr_uint64_t file_size = 0; + + /* temporary data structures that collect the data which will be moved + to the target file in a second step */ + apr_pool_t *local_pool = svn_pool_create(pool); + apr_array_header_t *table_sizes + = apr_array_make(local_pool, 16, sizeof(apr_uint64_t)); + + /* 64k blocks, spill after 16MB */ + svn_spillbuf_t *buffer + = svn_spillbuf__create(0x10000, 0x1000000, local_pool); + + /* for loop temps ... */ + apr_pool_t *iter_pool = svn_pool_create(pool); + + /* start at the beginning of the source file */ + SVN_ERR(svn_io_file_open(&proto_index, proto_file_name, + APR_READ | APR_CREATE | APR_BUFFERED, + APR_OS_DEFAULT, pool)); + + /* process all entries until we fail due to EOF */ + while (!eof) + { + svn_fs_fs__p2l_entry_t entry; + apr_size_t read = 0; + apr_uint64_t entry_end; + svn_boolean_t new_page = svn_spillbuf__get_size(buffer) == 0; + apr_uint64_t compound; + apr_int64_t rev_diff, compound_diff; + + svn_pool_clear(iter_pool); + + /* (attempt to) read the next entry from the source */ + SVN_ERR(svn_io_file_read_full2(proto_index, &entry, sizeof(entry), + &read, &eof, iter_pool)); + SVN_ERR_ASSERT(eof || read == sizeof(entry)); + + /* "unused" (and usually non-existent) section to cover the offsets + at the end the of the last page. */ + if (eof) + { + file_size = last_entry_end; + + entry.offset = last_entry_end; + entry.size = APR_ALIGN(entry.offset, page_size) - entry.offset; + entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED; + entry.fnv1_checksum = 0; + entry.item.revision = last_revision; + entry.item.number = 0; + } + else + { + /* fix-up items created when the txn's target rev was unknown */ + if (entry.item.revision == SVN_INVALID_REVNUM) + entry.item.revision = revision; + } + + /* end pages if entry is extending beyond their boundaries */ + entry_end = entry.offset + entry.size; + while (entry_end - last_page_end > page_size) + { + apr_uint64_t buffer_size = svn_spillbuf__get_size(buffer); + APR_ARRAY_PUSH(table_sizes, apr_uint64_t) + = buffer_size - last_buffer_size; + + last_buffer_size = buffer_size; + last_page_end += page_size; + new_page = TRUE; + } + + /* this entry starts a new table -> store its offset + (all following entries in the same table will store sizes only) */ + if (new_page) + { + SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, + encode_uint(encoded, entry.offset), + iter_pool)); + last_revision = revision; + last_compound = 0; + } + + /* write simple item entry */ + SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, + encode_uint(encoded, entry.size), + iter_pool)); + + rev_diff = entry.item.revision - last_revision; + last_revision = entry.item.revision; + + compound = entry.item.number * 8 + entry.type; + compound_diff = compound - last_compound; + last_compound = compound; + + SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, + encode_int(encoded, compound_diff), + iter_pool)); + SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, + encode_int(encoded, rev_diff), + iter_pool)); + SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, + encode_uint(encoded, entry.fnv1_checksum), + iter_pool)); + + last_entry_end = entry_end; + } + + /* close the source file */ + SVN_ERR(svn_io_file_close(proto_index, local_pool)); + + /* store length of last table */ + APR_ARRAY_PUSH(table_sizes, apr_uint64_t) + = svn_spillbuf__get_size(buffer) - last_buffer_size; + + /* write the start revision, file size and page size */ + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, revision), + NULL, local_pool)); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, file_size), + NULL, local_pool)); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, page_size), + NULL, local_pool)); + + /* write the page table (actually, the sizes of each page description) */ + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, table_sizes->nelts), + NULL, local_pool)); + for (i = 0; i < table_sizes->nelts; ++i) + { + apr_uint64_t value = APR_ARRAY_IDX(table_sizes, i, apr_uint64_t); + SVN_ERR(svn_io_file_write_full(index_file, encoded, + encode_uint(encoded, value), + NULL, local_pool)); + } + + /* append page contents */ + SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool), + svn_stream_from_aprfile2(index_file, TRUE, + local_pool), + NULL, NULL, local_pool)); + + svn_pool_destroy(iter_pool); + svn_pool_destroy(local_pool); + + return SVN_NO_ERROR; +} + +/* If REV_FILE->P2L_STREAM is NULL, create a new stream for the phys-to-log + * index for REVISION in FS using the rev / pack file provided by REV_FILE. + */ +static svn_error_t * +auto_open_p2l_index(svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t revision) +{ + if (rev_file->p2l_stream == NULL) + { + fs_fs_data_t *ffd = fs->fsap_data; + + SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); + SVN_ERR(packed_stream_open(&rev_file->p2l_stream, + rev_file->file, + rev_file->p2l_offset, + rev_file->footer_offset, + ffd->block_size, rev_file->pool)); + } + + return SVN_NO_ERROR; +} + + +/* Read the header data structure of the phys-to-log index for REVISION in + * FS and return it in *HEADER. Use REV_FILE to access on-disk data. + * Use POOL for allocations. + */ +static svn_error_t * +get_p2l_header(p2l_header_t **header, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t revision, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_uint64_t value; + apr_size_t i; + apr_off_t offset; + p2l_header_t *result; + svn_boolean_t is_cached = FALSE; + + /* look for the header data in our cache */ + pair_cache_key_t key; + key.revision = rev_file->start_revision; + key.second = rev_file->is_packed; + + SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->p2l_header_cache, + &key, pool)); + if (is_cached) + return SVN_NO_ERROR; + + /* not found -> must read it from disk. + * Open index file or position read pointer to the begin of the file */ + if (rev_file->p2l_stream == NULL) + SVN_ERR(auto_open_p2l_index(rev_file, fs, key.revision)); + else + packed_stream_seek(rev_file->p2l_stream, 0); + + /* allocate result data structure */ + result = apr_pcalloc(pool, sizeof(*result)); + + /* Read table sizes, check them for plausibility and allocate page array. */ + SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); + result->first_revision = (svn_revnum_t)value; + if (result->first_revision != rev_file->start_revision) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Index rev / pack file revision numbers do not match")); + + SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); + result->file_size = value; + if (result->file_size != rev_file->l2p_offset) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Index offset and rev / pack file size do not match")); + + SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); + result->page_size = value; + if (!result->page_size || (result->page_size & (result->page_size - 1))) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("P2L index page size is not a power of two")); + + SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); + result->page_count = (apr_size_t)value; + if (result->page_count != (result->file_size - 1) / result->page_size + 1) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("P2L page count does not match rev / pack file size")); + + result->offsets + = apr_pcalloc(pool, (result->page_count + 1) * sizeof(*result->offsets)); + + /* read page sizes and derive page description offsets from them */ + result->offsets[0] = 0; + for (i = 0; i < result->page_count; ++i) + { + SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); + result->offsets[i+1] = result->offsets[i] + (apr_off_t)value; + } + + /* correct the offset values */ + offset = packed_stream_offset(rev_file->p2l_stream); + for (i = 0; i <= result->page_count; ++i) + result->offsets[i] += offset; + + /* cache the header data */ + SVN_ERR(svn_cache__set(ffd->p2l_header_cache, &key, result, pool)); + + /* return the result */ + *header = result; + + return SVN_NO_ERROR; +} + +/* Data structure that describes which p2l page info shall be extracted + * from the cache and contains the fields that receive the result. + */ +typedef struct p2l_page_info_baton_t +{ + /* input variables */ + /* revision identifying the index file */ + svn_revnum_t revision; + + /* offset within the page in rev / pack file */ + apr_off_t offset; + + /* output variables */ + /* page containing OFFSET */ + apr_size_t page_no; + + /* first revision in this p2l index */ + svn_revnum_t first_revision; + + /* offset within the p2l index file describing this page */ + apr_off_t start_offset; + + /* offset within the p2l index file describing the following page */ + apr_off_t next_offset; + + /* PAGE_NO * PAGE_SIZE (if <= OFFSET) */ + apr_off_t page_start; + + /* total number of pages indexed */ + apr_size_t page_count; + + /* size of each page in pack / rev file */ + apr_uint64_t page_size; +} p2l_page_info_baton_t; + +/* From HEADER and the list of all OFFSETS, fill BATON with the page info + * requested by BATON->OFFSET. + */ +static void +p2l_page_info_copy(p2l_page_info_baton_t *baton, + const p2l_header_t *header, + const apr_off_t *offsets) +{ + /* if the requested offset is out of bounds, return info for + * a zero-sized empty page right behind the last page. + */ + if (baton->offset / header->page_size < header->page_count) + { + baton->page_no = baton->offset / header->page_size; + baton->start_offset = offsets[baton->page_no]; + baton->next_offset = offsets[baton->page_no + 1]; + baton->page_size = header->page_size; + } + else + { + baton->page_no = header->page_count; + baton->start_offset = offsets[baton->page_no]; + baton->next_offset = offsets[baton->page_no]; + baton->page_size = 0; + } + + baton->first_revision = header->first_revision; + baton->page_start = (apr_off_t)(header->page_size * baton->page_no); + baton->page_count = header->page_count; +} + +/* Implement svn_cache__partial_getter_func_t: extract the p2l page info + * requested by BATON and return it in BATON. + */ +static svn_error_t * +p2l_page_info_func(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *result_pool) +{ + /* all the pointers to cached data we need */ + const p2l_header_t *header = data; + const apr_off_t *offsets + = svn_temp_deserializer__ptr(header, + (const void *const *)&header->offsets); + + /* copy data from cache to BATON */ + p2l_page_info_copy(baton, header, offsets); + return SVN_NO_ERROR; +} + +/* Read the header data structure of the phys-to-log index for revision + * BATON->REVISION in FS. Return in *BATON all info relevant to read the + * index page for the rev / pack file offset BATON->OFFSET. + * Use REV_FILE to access on-disk data. Use POOL for allocations. + */ +static svn_error_t * +get_p2l_page_info(p2l_page_info_baton_t *baton, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + p2l_header_t *header; + svn_boolean_t is_cached = FALSE; + void *dummy = NULL; + + /* look for the header data in our cache */ + pair_cache_key_t key; + key.revision = rev_file->start_revision; + key.second = rev_file->is_packed; + + SVN_ERR(svn_cache__get_partial(&dummy, &is_cached, ffd->p2l_header_cache, + &key, p2l_page_info_func, baton, pool)); + if (is_cached) + return SVN_NO_ERROR; + + SVN_ERR(get_p2l_header(&header, rev_file, fs, baton->revision, pool)); + + /* copy the requested info into *BATON */ + p2l_page_info_copy(baton, header, header->offsets); + + return SVN_NO_ERROR; +} + +/* Read a mapping entry from the phys-to-log index STREAM and append it to + * RESULT. *ITEM_INDEX contains the phys offset for the entry and will + * be moved forward by the size of entry. Use POOL for allocations. + */ +static svn_error_t * +read_entry(svn_fs_fs__packed_number_stream_t *stream, + apr_off_t *item_offset, + svn_revnum_t *last_revision, + apr_uint64_t *last_compound, + apr_array_header_t *result, + apr_pool_t *pool) +{ + apr_uint64_t value; + + svn_fs_fs__p2l_entry_t entry; + + entry.offset = *item_offset; + SVN_ERR(packed_stream_get(&value, stream)); + entry.size = (apr_off_t)value; + + SVN_ERR(packed_stream_get(&value, stream)); + *last_compound += decode_int(value); + + entry.type = (int)(*last_compound & 7); + entry.item.number = *last_compound / 8; + + /* Verify item type. */ + if (entry.type > SVN_FS_FS__ITEM_TYPE_CHANGES) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Invalid item type in P2L index")); + if ( entry.type == SVN_FS_FS__ITEM_TYPE_CHANGES + && entry.item.number != SVN_FS_FS__ITEM_INDEX_CHANGES) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Changed path list must have item number 1")); + + SVN_ERR(packed_stream_get(&value, stream)); + *last_revision += (svn_revnum_t)decode_int(value); + entry.item.revision = *last_revision; + + SVN_ERR(packed_stream_get(&value, stream)); + entry.fnv1_checksum = (apr_uint32_t)value; + + /* Some of the index data for empty rev / pack file sections will not be + * used during normal operation. Thus, we have strict rules for the + * contents of those unused fields. */ + if (entry.type == SVN_FS_FS__ITEM_TYPE_UNUSED) + if ( entry.item.number != SVN_FS_FS__ITEM_INDEX_UNUSED + || entry.fnv1_checksum != 0) + return svn_error_create(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, NULL, + _("Empty regions must have item number 0 and checksum 0")); + + APR_ARRAY_PUSH(result, svn_fs_fs__p2l_entry_t) = entry; + *item_offset += entry.size; + + return SVN_NO_ERROR; +} + +/* Read the phys-to-log mappings for the cluster beginning at rev file + * offset PAGE_START from the index for START_REVISION in FS. The data + * can be found in the index page beginning at START_OFFSET with the next + * page beginning at NEXT_OFFSET. PAGE_SIZE is the L2P index page size. + * Return the relevant index entries in *ENTRIES. Use REV_FILE to access + * on-disk data. Use POOL for other allocations. + */ +static svn_error_t * +get_p2l_page(apr_array_header_t **entries, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t start_revision, + apr_off_t start_offset, + apr_off_t next_offset, + apr_off_t page_start, + apr_uint64_t page_size, + apr_pool_t *pool) +{ + apr_uint64_t value; + apr_array_header_t *result + = apr_array_make(pool, 16, sizeof(svn_fs_fs__p2l_entry_t)); + apr_off_t item_offset; + apr_off_t offset; + svn_revnum_t last_revision; + apr_uint64_t last_compound; + + /* open index and navigate to page start */ + SVN_ERR(auto_open_p2l_index(rev_file, fs, start_revision)); + packed_stream_seek(rev_file->p2l_stream, start_offset); + + /* read rev file offset of the first page entry (all page entries will + * only store their sizes). */ + SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); + item_offset = (apr_off_t)value; + + /* read all entries of this page */ + last_revision = start_revision; + last_compound = 0; + do + { + SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset, &last_revision, + &last_compound, result, pool)); + offset = packed_stream_offset(rev_file->p2l_stream); + } + while (offset < next_offset); + + /* if we haven't covered the cluster end yet, we must read the first + * entry of the next page */ + if (item_offset < page_start + page_size) + { + SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); + item_offset = (apr_off_t)value; + last_revision = start_revision; + last_compound = 0; + SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset, &last_revision, + &last_compound, result, pool)); + } + + *entries = result; + + return SVN_NO_ERROR; +} + +/* If it cannot be found in FS's caches, read the p2l index page selected + * by BATON->OFFSET from REV_FILE. Don't read the page if it precedes + * MIN_OFFSET. Set *END to TRUE if the caller should stop refeching. + * + * *BATON will be updated with the selected page's info and SCRATCH_POOL + * will be used for temporary allocations. If the data is alread in the + * cache, descrease *LEAKING_BUCKET and increase it otherwise. With that + * pattern we will still read all pages from the block even if some of + * them survived in the cached. + */ +static svn_error_t * +prefetch_p2l_page(svn_boolean_t *end, + int *leaking_bucket, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + p2l_page_info_baton_t *baton, + apr_off_t min_offset, + apr_pool_t *scratch_pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_boolean_t already_cached; + apr_array_header_t *page; + svn_fs_fs__page_cache_key_t key = { 0 }; + + /* fetch the page info */ + *end = FALSE; + baton->revision = baton->first_revision; + SVN_ERR(get_p2l_page_info(baton, rev_file, fs, scratch_pool)); + if (baton->start_offset < min_offset || !rev_file->p2l_stream) + { + /* page outside limits -> stop prefetching */ + *end = TRUE; + return SVN_NO_ERROR; + } + + /* do we have that page in our caches already? */ + assert(baton->first_revision <= APR_UINT32_MAX); + key.revision = (apr_uint32_t)baton->first_revision; + key.is_packed = svn_fs_fs__is_packed_rev(fs, baton->first_revision); + key.page = baton->page_no; + SVN_ERR(svn_cache__has_key(&already_cached, ffd->p2l_page_cache, + &key, scratch_pool)); + + /* yes, already cached */ + if (already_cached) + { + /* stop prefetching if most pages are already cached. */ + if (!--*leaking_bucket) + *end = TRUE; + + return SVN_NO_ERROR; + } + + ++*leaking_bucket; + + /* read from disk */ + SVN_ERR(get_p2l_page(&page, rev_file, fs, + baton->first_revision, + baton->start_offset, + baton->next_offset, + baton->page_start, + baton->page_size, + scratch_pool)); + + /* and put it into our cache */ + SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page, scratch_pool)); + + return SVN_NO_ERROR; +} + +/* Lookup & construct the baton and key information that we will need for + * a P2L page cache lookup. We want the page covering OFFSET in the rev / + * pack file containing REVSION in FS. Return the results in *PAGE_INFO_P + * and *KEY_P. Read data through REV_FILE. Use POOL for allocations. + */ +static svn_error_t * +get_p2l_keys(p2l_page_info_baton_t *page_info_p, + svn_fs_fs__page_cache_key_t *key_p, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t revision, + apr_off_t offset, + apr_pool_t *pool) +{ + p2l_page_info_baton_t page_info; + + /* request info for the index pages that describes the pack / rev file + * contents at pack / rev file position OFFSET. */ + page_info.offset = offset; + page_info.revision = revision; + SVN_ERR(get_p2l_page_info(&page_info, rev_file, fs, pool)); + + /* if the offset refers to a non-existent page, bail out */ + if (page_info.page_count <= page_info.page_no) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_OVERFLOW , NULL, + _("Offset %s too large in revision %ld"), + apr_off_t_toa(pool, offset), revision); + + /* return results */ + if (page_info_p) + *page_info_p = page_info; + + /* construct cache key */ + if (key_p) + { + svn_fs_fs__page_cache_key_t key = { 0 }; + assert(page_info.first_revision <= APR_UINT32_MAX); + key.revision = (apr_uint32_t)page_info.first_revision; + key.is_packed = rev_file->is_packed; + key.page = page_info.page_no; + + *key_p = key; + } + + return SVN_NO_ERROR; +} + +/* qsort-compatible compare function that compares the OFFSET of the + * svn_fs_fs__p2l_entry_t in *LHS with the apr_off_t in *RHS. */ +static int +compare_start_p2l_entry(const void *lhs, + const void *rhs) +{ + const svn_fs_fs__p2l_entry_t *entry = lhs; + apr_off_t start = *(const apr_off_t*)rhs; + apr_off_t diff = entry->offset - start; + + /* restrict result to int */ + return diff < 0 ? -1 : (diff == 0 ? 0 : 1); +} + +/* From the PAGE_ENTRIES array of svn_fs_fs__p2l_entry_t, ordered + * by their OFFSET member, copy all elements overlapping the range + * [BLOCK_START, BLOCK_END) to ENTRIES. */ +static void +append_p2l_entries(apr_array_header_t *entries, + apr_array_header_t *page_entries, + apr_off_t block_start, + apr_off_t block_end) +{ + const svn_fs_fs__p2l_entry_t *entry; + int idx = svn_sort__bsearch_lower_bound(page_entries, &block_start, + compare_start_p2l_entry); + + /* start at the first entry that overlaps with BLOCK_START */ + if (idx > 0) + { + entry = &APR_ARRAY_IDX(page_entries, idx - 1, svn_fs_fs__p2l_entry_t); + if (entry->offset + entry->size > block_start) + --idx; + } + + /* copy all entries covering the requested range */ + for ( ; idx < page_entries->nelts; ++idx) + { + entry = &APR_ARRAY_IDX(page_entries, idx, svn_fs_fs__p2l_entry_t); + if (entry->offset >= block_end) + break; + + APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t) = *entry; + } +} + +/* Auxilliary struct passed to p2l_entries_func selecting the relevant + * data range. */ +typedef struct p2l_entries_baton_t +{ + apr_off_t start; + apr_off_t end; +} p2l_entries_baton_t; + +/* Implement svn_cache__partial_getter_func_t: extract p2l entries from + * the page in DATA which overlap the p2l_entries_baton_t in BATON. + * The target array is already provided in *OUT. + */ +static svn_error_t * +p2l_entries_func(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *result_pool) +{ + apr_array_header_t *entries = *(apr_array_header_t **)out; + const apr_array_header_t *raw_page = data; + p2l_entries_baton_t *block = baton; + + /* Make PAGE a readable APR array. */ + apr_array_header_t page = *raw_page; + page.elts = (void *)svn_temp_deserializer__ptr(raw_page, + (const void * const *)&raw_page->elts); + + /* append relevant information to result */ + append_p2l_entries(entries, &page, block->start, block->end); + + return SVN_NO_ERROR; +} + + +/* Body of svn_fs_fs__p2l_index_lookup. However, do a single index page + * lookup and append the result to the ENTRIES array provided by the caller. + * Use successive calls to cover larger ranges. + */ +static svn_error_t * +p2l_index_lookup(apr_array_header_t *entries, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_t *fs, + svn_revnum_t revision, + apr_off_t block_start, + apr_off_t block_end, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_fs_fs__page_cache_key_t key; + svn_boolean_t is_cached = FALSE; + p2l_page_info_baton_t page_info; + apr_array_header_t *local_result = entries; + + /* baton selecting the relevant entries from the one page we access */ + p2l_entries_baton_t block; + block.start = block_start; + block.end = block_end; + + /* if we requested an empty range, the result would be empty */ + SVN_ERR_ASSERT(block_start < block_end); + + /* look for the fist page of the range in our cache */ + SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, block_start, + pool)); + SVN_ERR(svn_cache__get_partial((void**)&local_result, &is_cached, + ffd->p2l_page_cache, &key, p2l_entries_func, + &block, pool)); + + if (!is_cached) + { + svn_boolean_t end; + apr_pool_t *iterpool = svn_pool_create(pool); + apr_off_t original_page_start = page_info.page_start; + int leaking_bucket = 4; + p2l_page_info_baton_t prefetch_info = page_info; + apr_array_header_t *page_entries; + + apr_off_t max_offset + = APR_ALIGN(page_info.next_offset, ffd->block_size); + apr_off_t min_offset + = APR_ALIGN(page_info.start_offset, ffd->block_size) - ffd->block_size; + + /* Since we read index data in larger chunks, we probably got more + * page data than we requested. Parse & cache that until either we + * encounter pages already cached or reach the end of the buffer. + */ + + /* pre-fetch preceding pages */ + if (ffd->use_block_read) + { + end = FALSE; + prefetch_info.offset = original_page_start; + while (prefetch_info.offset >= prefetch_info.page_size && !end) + { + svn_pool_clear(iterpool); + + prefetch_info.offset -= prefetch_info.page_size; + SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file, + &prefetch_info, min_offset, + iterpool)); + } + } + + /* fetch page from disk and put it into the cache */ + SVN_ERR(get_p2l_page(&page_entries, rev_file, fs, + page_info.first_revision, + page_info.start_offset, + page_info.next_offset, + page_info.page_start, + page_info.page_size, iterpool)); + + SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page_entries, + iterpool)); + + /* append relevant information to result */ + append_p2l_entries(entries, page_entries, block_start, block_end); + + /* pre-fetch following pages */ + if (ffd->use_block_read) + { + end = FALSE; + leaking_bucket = 4; + prefetch_info = page_info; + prefetch_info.offset = original_page_start; + while ( prefetch_info.next_offset < max_offset + && prefetch_info.page_no + 1 < prefetch_info.page_count + && !end) + { + svn_pool_clear(iterpool); + + prefetch_info.offset += prefetch_info.page_size; + SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file, + &prefetch_info, min_offset, + iterpool)); + } + } + + svn_pool_destroy(iterpool); + } + + /* We access a valid page (otherwise, we had seen an error in the + * get_p2l_keys request). Hence, at least one entry must be found. */ + SVN_ERR_ASSERT(entries->nelts > 0); + + /* Add an "unused" entry if it extends beyond the end of the data file. + * Since the index page size might be smaller than the current data + * read block size, the trailing "unused" entry in this index may not + * fully cover the end of the last block. */ + if (page_info.page_no + 1 >= page_info.page_count) + { + svn_fs_fs__p2l_entry_t *entry + = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t); + + apr_off_t entry_end = entry->offset + entry->size; + if (entry_end < block_end) + { + if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) + { + /* extend the terminal filler */ + entry->size = block_end - entry->offset; + } + else + { + /* No terminal filler. Add one. */ + entry = apr_array_push(entries); + entry->offset = entry_end; + entry->size = block_end - entry_end; + entry->type = SVN_FS_FS__ITEM_TYPE_UNUSED; + entry->fnv1_checksum = 0; + entry->item.revision = SVN_INVALID_REVNUM; + entry->item.number = SVN_FS_FS__ITEM_INDEX_UNUSED; + } + } + } + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__p2l_index_lookup(apr_array_header_t **entries, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_off_t block_start, + apr_off_t block_size, + apr_pool_t *pool) +{ + apr_off_t block_end = block_start + block_size; + + /* the receiving container */ + int last_count = 0; + apr_array_header_t *result = apr_array_make(pool, 16, + sizeof(svn_fs_fs__p2l_entry_t)); + + /* Fetch entries page-by-page. Since the p2l index is supposed to cover + * every single byte in the rev / pack file - even unused sections - + * every iteration must result in some progress. */ + while (block_start < block_end) + { + svn_fs_fs__p2l_entry_t *entry; + SVN_ERR(p2l_index_lookup(result, rev_file, fs, revision, block_start, + block_end, pool)); + SVN_ERR_ASSERT(result->nelts > 0); + + /* continue directly behind last item */ + entry = &APR_ARRAY_IDX(result, result->nelts-1, svn_fs_fs__p2l_entry_t); + block_start = entry->offset + entry->size; + + /* Some paranoia check. Successive iterations should never return + * duplicates but if it did, we might get into trouble later on. */ + if (last_count > 0 && last_count < result->nelts) + { + entry = &APR_ARRAY_IDX(result, last_count - 1, + svn_fs_fs__p2l_entry_t); + SVN_ERR_ASSERT(APR_ARRAY_IDX(result, last_count, + svn_fs_fs__p2l_entry_t).offset + >= entry->offset + entry->size); + } + + last_count = result->nelts; + } + + *entries = result; + return SVN_NO_ERROR; +} + +/* compare_fn_t comparing a svn_fs_fs__p2l_entry_t at LHS with an offset + * RHS. + */ +static int +compare_p2l_entry_offsets(const void *lhs, const void *rhs) +{ + const svn_fs_fs__p2l_entry_t *entry = (const svn_fs_fs__p2l_entry_t *)lhs; + apr_off_t offset = *(const apr_off_t *)rhs; + + return entry->offset < offset ? -1 : (entry->offset == offset ? 0 : 1); +} + +/* Cached data extraction utility. DATA is a P2L index page, e.g. an APR + * array of svn_fs_fs__p2l_entry_t elements. Return the entry for the item + * starting at OFFSET or NULL if that's not an the start offset of any item. + */ +static svn_fs_fs__p2l_entry_t * +get_p2l_entry_from_cached_page(const void *data, + apr_uint64_t offset, + apr_pool_t *pool) +{ + /* resolve all pointer values of in-cache data */ + const apr_array_header_t *page = data; + apr_array_header_t *entries = apr_pmemdup(pool, page, sizeof(*page)); + svn_fs_fs__p2l_entry_t *entry; + + entries->elts = (char *)svn_temp_deserializer__ptr(page, + (const void *const *)&page->elts); + + /* search of the offset we want */ + entry = svn_sort__array_lookup(entries, &offset, NULL, + (int (*)(const void *, const void *))compare_p2l_entry_offsets); + + /* return it, if it is a perfect match */ + return entry ? apr_pmemdup(pool, entry, sizeof(*entry)) : NULL; +} + +/* Implements svn_cache__partial_getter_func_t for P2L index pages, copying + * the entry for the apr_off_t at BATON into *OUT. *OUT will be NULL if + * there is no matching entry in the index page at DATA. + */ +static svn_error_t * +p2l_entry_lookup_func(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *result_pool) +{ + svn_fs_fs__p2l_entry_t *entry + = get_p2l_entry_from_cached_page(data, *(apr_off_t *)baton, result_pool); + + *out = entry && entry->offset == *(apr_off_t *)baton + ? apr_pmemdup(result_pool, entry, sizeof(*entry)) + : NULL; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__p2l_entry_lookup(svn_fs_fs__p2l_entry_t **entry_p, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_off_t offset, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_fs_fs__page_cache_key_t key = { 0 }; + svn_boolean_t is_cached = FALSE; + p2l_page_info_baton_t page_info; + + *entry_p = NULL; + + /* look for this info in our cache */ + SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, offset, + pool)); + SVN_ERR(svn_cache__get_partial((void**)entry_p, &is_cached, + ffd->p2l_page_cache, &key, + p2l_entry_lookup_func, &offset, pool)); + if (!is_cached) + { + /* do a standard index lookup. This is will automatically prefetch + * data to speed up future lookups. */ + apr_array_header_t *entries = apr_array_make(pool, 1, sizeof(**entry_p)); + SVN_ERR(p2l_index_lookup(entries, rev_file, fs, revision, offset, + offset + 1, pool)); + + /* Find the entry that we want. */ + *entry_p = svn_sort__array_lookup(entries, &offset, NULL, + (int (*)(const void *, const void *))compare_p2l_entry_offsets); + } + + return SVN_NO_ERROR; +} + +/* Implements svn_cache__partial_getter_func_t for P2L headers, setting *OUT + * to the largest the first offset not covered by this P2L index. + */ +static svn_error_t * +p2l_get_max_offset_func(void **out, + const void *data, + apr_size_t data_len, + void *baton, + apr_pool_t *result_pool) +{ + const p2l_header_t *header = data; + apr_off_t max_offset = header->file_size; + *out = apr_pmemdup(result_pool, &max_offset, sizeof(max_offset)); + + return SVN_NO_ERROR; +} + +/* Core functionality of to svn_fs_fs__p2l_get_max_offset with identical + * signature. */ +static svn_error_t * +p2l_get_max_offset(apr_off_t *offset, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + p2l_header_t *header; + svn_boolean_t is_cached = FALSE; + apr_off_t *offset_p; + + /* look for the header data in our cache */ + pair_cache_key_t key; + key.revision = rev_file->start_revision; + key.second = rev_file->is_packed; + + SVN_ERR(svn_cache__get_partial((void **)&offset_p, &is_cached, + ffd->p2l_header_cache, &key, + p2l_get_max_offset_func, NULL, pool)); + if (is_cached) + { + *offset = *offset_p; + return SVN_NO_ERROR; + } + + SVN_ERR(get_p2l_header(&header, rev_file, fs, revision, pool)); + *offset = header->file_size; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__p2l_get_max_offset(apr_off_t *offset, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_pool_t *pool) +{ + return svn_error_trace(p2l_get_max_offset(offset, fs, rev_file, revision, + pool)); +} + +/* Calculate the FNV1 checksum over the offset range in REV_FILE, covered by + * ENTRY. Store the result in ENTRY->FNV1_CHECKSUM. Use POOL for temporary + * allocations. */ +static svn_error_t * +calc_fnv1(svn_fs_fs__p2l_entry_t *entry, + svn_fs_fs__revision_file_t *rev_file, + apr_pool_t *pool) +{ + unsigned char buffer[4096]; + svn_checksum_t *checksum; + svn_checksum_ctx_t *context + = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, pool); + apr_off_t size = entry->size; + + /* Special rules apply to unused sections / items. The data must be a + * sequence of NUL bytes (not checked here) and the checksum is fixed to 0. + */ + if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) + { + entry->fnv1_checksum = 0; + return SVN_NO_ERROR; + } + + /* Read the block and feed it to the checksum calculator. */ + SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &entry->offset, pool)); + while (size > 0) + { + apr_size_t to_read = size > sizeof(buffer) + ? sizeof(buffer) + : (apr_size_t)size; + SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, to_read, NULL, + NULL, pool)); + SVN_ERR(svn_checksum_update(context, buffer, to_read)); + size -= to_read; + } + + /* Store final checksum in ENTRY. */ + SVN_ERR(svn_checksum_final(&checksum, context, pool)); + entry->fnv1_checksum = ntohl(*(const apr_uint32_t *)checksum->digest); + + return SVN_NO_ERROR; +} + +/* + * Index (re-)creation utilities. + */ + +svn_error_t * +svn_fs_fs__p2l_index_from_p2l_entries(const char **protoname, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + apr_array_header_t *entries, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + apr_file_t *proto_index; + + /* Use a subpool for immediate temp file cleanup at the end of this + * function. */ + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + int i; + + /* Create a proto-index file. */ + SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL, + svn_io_file_del_on_pool_cleanup, + result_pool, scratch_pool)); + SVN_ERR(svn_fs_fs__p2l_proto_index_open(&proto_index, *protoname, + scratch_pool)); + + /* Write ENTRIES to proto-index file and calculate checksums as we go. */ + for (i = 0; i < entries->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *entry + = APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t *); + svn_pool_clear(iterpool); + + SVN_ERR(calc_fnv1(entry, rev_file, iterpool)); + SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(proto_index, entry, + iterpool)); + } + + /* Convert proto-index into final index and move it into position. + * Note that REV_FILE contains the start revision of the shard file if it + * has been packed while REVISION may be somewhere in the middle. For + * non-packed shards, they will have identical values. */ + SVN_ERR(svn_io_file_close(proto_index, iterpool)); + + /* Temp file cleanup. */ + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* A svn_sort__array compatible comparator function, sorting the + * svn_fs_fs__p2l_entry_t** given in LHS, RHS by revision. */ +static int +compare_p2l_entry_revision(const void *lhs, + const void *rhs) +{ + const svn_fs_fs__p2l_entry_t *lhs_entry + =*(const svn_fs_fs__p2l_entry_t **)lhs; + const svn_fs_fs__p2l_entry_t *rhs_entry + =*(const svn_fs_fs__p2l_entry_t **)rhs; + + if (lhs_entry->item.revision < rhs_entry->item.revision) + return -1; + + return lhs_entry->item.revision == rhs_entry->item.revision ? 0 : 1; +} + +svn_error_t * +svn_fs_fs__l2p_index_from_p2l_entries(const char **protoname, + svn_fs_t *fs, + apr_array_header_t *entries, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + apr_file_t *proto_index; + + /* Use a subpool for immediate temp file cleanup at the end of this + * function. */ + apr_pool_t *iterpool = svn_pool_create(scratch_pool); + int i; + svn_revnum_t last_revision = SVN_INVALID_REVNUM; + svn_revnum_t revision = SVN_INVALID_REVNUM; + + /* L2P index must be written in revision order. + * Sort ENTRIES accordingly. */ + svn_sort__array(entries, compare_p2l_entry_revision); + + /* Find the first revision in the index + * (must exist since no truly empty revs are allowed). */ + for (i = 0; i < entries->nelts && !SVN_IS_VALID_REVNUM(revision); ++i) + revision = APR_ARRAY_IDX(entries, i, const svn_fs_fs__p2l_entry_t *) + ->item.revision; + + /* Create the temporary proto-rev file. */ + SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL, + svn_io_file_del_on_pool_cleanup, + result_pool, scratch_pool)); + SVN_ERR(svn_fs_fs__l2p_proto_index_open(&proto_index, *protoname, + scratch_pool)); + + /* Write all entries. */ + for (i = 0; i < entries->nelts; ++i) + { + const svn_fs_fs__p2l_entry_t *entry + = APR_ARRAY_IDX(entries, i, const svn_fs_fs__p2l_entry_t *); + svn_pool_clear(iterpool); + + if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) + continue; + + if (last_revision != entry->item.revision) + { + SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(proto_index, + scratch_pool)); + last_revision = entry->item.revision; + } + + SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(proto_index, + entry->offset, + entry->item.number, + iterpool)); + } + + /* Convert proto-index into final index and move it into position. */ + SVN_ERR(svn_io_file_close(proto_index, iterpool)); + + /* Temp file cleanup. */ + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + + +/* + * Standard (de-)serialization functions + */ + +svn_error_t * +svn_fs_fs__serialize_l2p_header(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + l2p_header_t *header = in; + svn_temp_serializer__context_t *context; + svn_stringbuf_t *serialized; + apr_size_t page_count = header->page_table_index[header->revision_count]; + apr_size_t page_table_size = page_count * sizeof(*header->page_table); + apr_size_t index_size + = (header->revision_count + 1) * sizeof(*header->page_table_index); + apr_size_t data_size = sizeof(*header) + index_size + page_table_size; + + /* serialize header and all its elements */ + context = svn_temp_serializer__init(header, + sizeof(*header), + data_size + 32, + pool); + + /* page table index array */ + svn_temp_serializer__add_leaf(context, + (const void * const *)&header->page_table_index, + index_size); + + /* page table array */ + svn_temp_serializer__add_leaf(context, + (const void * const *)&header->page_table, + page_table_size); + + /* return the serialized result */ + serialized = svn_temp_serializer__get(context); + + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_l2p_header(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + l2p_header_t *header = (l2p_header_t *)data; + + /* resolve the pointers in the struct */ + svn_temp_deserializer__resolve(header, (void**)&header->page_table_index); + svn_temp_deserializer__resolve(header, (void**)&header->page_table); + + /* done */ + *out = header; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__serialize_l2p_page(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + l2p_page_t *page = in; + svn_temp_serializer__context_t *context; + svn_stringbuf_t *serialized; + apr_size_t of_table_size = page->entry_count * sizeof(*page->offsets); + + /* serialize struct and all its elements */ + context = svn_temp_serializer__init(page, + sizeof(*page), + of_table_size + sizeof(*page) + 32, + pool); + + /* offsets and sub_items arrays */ + svn_temp_serializer__add_leaf(context, + (const void * const *)&page->offsets, + of_table_size); + + /* return the serialized result */ + serialized = svn_temp_serializer__get(context); + + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_l2p_page(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + l2p_page_t *page = data; + + /* resolve the pointers in the struct */ + svn_temp_deserializer__resolve(page, (void**)&page->offsets); + + /* done */ + *out = page; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__serialize_p2l_header(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + p2l_header_t *header = in; + svn_temp_serializer__context_t *context; + svn_stringbuf_t *serialized; + apr_size_t table_size = (header->page_count + 1) * sizeof(*header->offsets); + + /* serialize header and all its elements */ + context = svn_temp_serializer__init(header, + sizeof(*header), + table_size + sizeof(*header) + 32, + pool); + + /* offsets array */ + svn_temp_serializer__add_leaf(context, + (const void * const *)&header->offsets, + table_size); + + /* return the serialized result */ + serialized = svn_temp_serializer__get(context); + + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_p2l_header(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + p2l_header_t *header = data; + + /* resolve the only pointer in the struct */ + svn_temp_deserializer__resolve(header, (void**)&header->offsets); + + /* done */ + *out = header; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__serialize_p2l_page(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool) +{ + apr_array_header_t *page = in; + svn_temp_serializer__context_t *context; + svn_stringbuf_t *serialized; + apr_size_t table_size = page->elt_size * page->nelts; + + /* serialize array header and all its elements */ + context = svn_temp_serializer__init(page, + sizeof(*page), + table_size + sizeof(*page) + 32, + pool); + + /* items in the array */ + svn_temp_serializer__add_leaf(context, + (const void * const *)&page->elts, + table_size); + + /* return the serialized result */ + serialized = svn_temp_serializer__get(context); + + *data = serialized->data; + *data_len = serialized->len; + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__deserialize_p2l_page(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool) +{ + apr_array_header_t *page = (apr_array_header_t *)data; + + /* resolve the only pointer in the struct */ + svn_temp_deserializer__resolve(page, (void**)&page->elts); + + /* patch up members */ + page->pool = pool; + page->nalloc = page->nelts; + + /* done */ + *out = page; + + return SVN_NO_ERROR; +} Index: subversion/libsvn_fs_fs/index.h =================================================================== --- subversion/libsvn_fs_fs/index.h (revision 1623988) +++ subversion/libsvn_fs_fs/index.h (working copy) @@ -48,6 +48,146 @@ #define SVN_FS_FS__ITEM_TYPE_ANY_REP 7 /* item is any representation. Only used in pre-format7. */ +/* (user visible) entry in the phys-to-log index. It describes a section + * of some packed / non-packed rev file as containing a specific item. + * There must be no overlapping / conflicting entries. + */ +typedef struct svn_fs_fs__p2l_entry_t +{ + /* offset of the first byte that belongs to the item */ + apr_off_t offset; + + /* length of the item in bytes */ + apr_off_t size; + + /* type of the item (see SVN_FS_FS__ITEM_TYPE_*) defines */ + unsigned type; + + /* modified FNV-1a checksum. 0 if unknown checksum */ + apr_uint32_t fnv1_checksum; + + /* item in that block */ + svn_fs_fs__id_part_t item; +} svn_fs_fs__p2l_entry_t; + +/* Open / create a log-to-phys index file with the full file path name + * FILE_NAME. Return the open file in *PROTO_INDEX and use POOL for + * allocations. + */ +svn_error_t * +svn_fs_fs__l2p_proto_index_open(apr_file_t **proto_index, + const char *file_name, + apr_pool_t *pool); + +/* Call this function before adding entries for the next revision to the + * log-to-phys index file in PROTO_INDEX. Use POOL for allocations. + */ +svn_error_t * +svn_fs_fs__l2p_proto_index_add_revision(apr_file_t *proto_index, + apr_pool_t *pool); + +/* Add a new mapping, ITEM_INDEX to the OFFSET, to log-to-phys index file + * in PROTO_INDEX. Please note that mappings may be added in any order + * but duplicate entries for the same ITEM_INDEX are not supported. + * Not all possible index values need to be used. OFFSET may be -1 to + * mark 'invalid' item indexes but that is already implied for all item + * indexes not explicitly given a mapping. + * + * Use POOL for allocations. + */ +svn_error_t * +svn_fs_fs__l2p_proto_index_add_entry(apr_file_t *proto_index, + apr_off_t offset, + apr_uint64_t item_index, + apr_pool_t *pool); + +/* Use the proto index file stored at PROTO_FILE_NAME, construct the final + * log-to-phys index and append it to INDEX_FILE. The first revision will + * be REVISION, entries to the next revision will be assigned to REVISION+1 + * and so forth. Use POOL for allocations. + */ +svn_error_t * +svn_fs_fs__l2p_index_append(svn_fs_t *fs, + apr_file_t *index_file, + const char *proto_file_name, + svn_revnum_t revision, + apr_pool_t *pool); + +/* Open / create a phys-to-log index file with the full file path name + * FILE_NAME. Return the open file in *PROTO_INDEX and use POOL for + * allocations. + */ +svn_error_t * +svn_fs_fs__p2l_proto_index_open(apr_file_t **proto_index, + const char *file_name, + apr_pool_t *pool); + +/* Add a new mapping ENTRY to the phys-to-log index file in PROTO_INDEX. + * The entries must be added in ascending offset order and must not leave + * intermittent ranges uncovered. The revision value in ENTRY may be + * SVN_INVALID_REVISION. Use POOL for allocations. + */ +svn_error_t * +svn_fs_fs__p2l_proto_index_add_entry(apr_file_t *proto_index, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool); + +/* Set *NEXT_OFFSET to the first offset behind the last entry in the + * phys-to-log proto index file PROTO_INDEX. This will be 0 for empty + * index files. Use POOL for temporary allocations. + */ +svn_error_t * +svn_fs_fs__p2l_proto_index_next_offset(apr_off_t *next_offset, + apr_file_t *proto_index, + apr_pool_t *pool); + +/* Use the proto index file stored at PROTO_FILE_NAME, construct the final + * phys-to-log index and append it to INDEX_FILE. Entries without a valid + * revision will be assigned to the REVISION given here. + * Use POOL for allocations. + */ +svn_error_t * +svn_fs_fs__p2l_index_append(svn_fs_t *fs, + apr_file_t *index_file, + const char *proto_file_name, + svn_revnum_t revision, + apr_pool_t *pool); + +/* Use the phys-to-log mapping files in FS to build a list of entries + * that (at least partly) overlap with the range given by BLOCK_START + * offset and BLOCK_SIZE in the rep / pack file containing REVISION. + * Return the array in *ENTRIES with svn_fs_fs__p2l_entry_t as elements. + * REV_FILE determines whether to access single rev or pack file data. + * If that is not available anymore (neither in cache nor on disk), + * return an error. Use POOL for allocations. + * + * Note that (only) the first and the last mapping may cross a cluster + * boundary. + */ +svn_error_t * +svn_fs_fs__p2l_index_lookup(apr_array_header_t **entries, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_off_t block_start, + apr_off_t block_size, + apr_pool_t *pool); + +/* Use the phys-to-log mapping files in FS to return the entry for the + * item starting at global OFFSET in the rep file containing REVISION in + * *ENTRY. Sets *ENTRY to NULL if no item starts at exactly that offset. + * REV_FILE determines whether to access single rev or pack file data. + * If that is not available anymore (neither in cache nor on disk), + * return an error. Use POOL for allocations. + */ +svn_error_t * +svn_fs_fs__p2l_entry_lookup(svn_fs_fs__p2l_entry_t **entry, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_off_t offset, + apr_pool_t *pool); + /* For ITEM_INDEX within REV in FS, return the position in the respective * rev or pack file in *ABSOLUTE_POSITION. If TXN_ID is not NULL, return * the file offset within that transaction and REV should be given as @@ -68,4 +208,155 @@ apr_uint64_t item_index, apr_pool_t *pool); +/* Use the log-to-phys indexes in FS to determine the maximum item indexes + * assigned to revision START_REV to START_REV + COUNT - 1. That is a + * close upper limit to the actual number of items in the respective revs. + * Return the results in *MAX_IDS, allocated in POOL. + */ +svn_error_t * +svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids, + svn_fs_t *fs, + svn_revnum_t start_rev, + apr_size_t count, + apr_pool_t *pool); + +/* In *OFFSET, return the last OFFSET in the pack / rev file containing. + * REV_FILE determines whether to access single rev or pack file data. + * If that is not available anymore (neither in cache nor on disk), re-open + * the rev / pack file and retry to open the index file. + * Use POOL for allocations. + */ +svn_error_t * +svn_fs_fs__p2l_get_max_offset(apr_off_t *offset, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_revnum_t revision, + apr_pool_t *pool); + +/* Index (re-)creation utilities. + */ + +/* For FS, create a new L2P auto-deleting proto index file in POOL and return + * its name in *PROTONAME. All entries to write are given in ENTRIES and + * entries are of type svn_fs_fs__p2l_entry_t* (sic!). The ENTRIES array + * will be reordered. Give the proto index file the lifetime of RESULT_POOL + * and use SCRATCH_POOL for temporary allocations. + */ +svn_error_t * +svn_fs_fs__l2p_index_from_p2l_entries(const char **protoname, + svn_fs_t *fs, + apr_array_header_t *entries, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); + +/* For FS, create a new P2L auto-deleting proto index file in POOL and return + * its name in *PROTONAME. All entries to write are given in ENTRIES and + * of type svn_fs_fs__p2l_entry_t*. The FVN1 checksums are not taken from + * ENTRIES but are begin calculated from the current contents of REV_FILE + * as we go. Give the proto index file the lifetime of RESULT_POOL and use + * SCRATCH_POOL for temporary allocations. + */ +svn_error_t * +svn_fs_fs__p2l_index_from_p2l_entries(const char **protoname, + svn_fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + apr_array_header_t *entries, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); + +/* Serialization and caching interface + */ + +/* We use this key type to address individual pages from both index types. + */ +typedef struct svn_fs_fs__page_cache_key_t +{ + /* in l2p: this is the revision of the items being mapped + in p2l: this is the start revision identifying the pack / rev file */ + apr_uint32_t revision; + + /* if TRUE, this is the index to a pack file + */ + svn_boolean_t is_packed; + + /* in l2p: page number within the revision + * in p2l: page number with the rev / pack file + */ + apr_uint64_t page; +} svn_fs_fs__page_cache_key_t; + +/* + * Implements svn_cache__serialize_func_t for l2p_header_t objects. + */ +svn_error_t * +svn_fs_fs__serialize_l2p_header(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/* + * Implements svn_cache__deserialize_func_t for l2p_header_t objects. + */ +svn_error_t * +svn_fs_fs__deserialize_l2p_header(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/* + * Implements svn_cache__serialize_func_t for l2p_page_t objects. + */ +svn_error_t * +svn_fs_fs__serialize_l2p_page(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/* + * Implements svn_cache__deserialize_func_t for l2p_page_t objects. + */ +svn_error_t * +svn_fs_fs__deserialize_l2p_page(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/* + * Implements svn_cache__serialize_func_t for p2l_header_t objects. + */ +svn_error_t * +svn_fs_fs__serialize_p2l_header(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/* + * Implements svn_cache__deserialize_func_t for p2l_header_t objects. + */ +svn_error_t * +svn_fs_fs__deserialize_p2l_header(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + +/* + * Implements svn_cache__serialize_func_t for apr_array_header_t objects + * with elements of type svn_fs_fs__p2l_entry_t. + */ +svn_error_t * +svn_fs_fs__serialize_p2l_page(void **data, + apr_size_t *data_len, + void *in, + apr_pool_t *pool); + +/* + * Implements svn_cache__deserialize_func_t for apr_array_header_t objects + * with elements of type svn_fs_fs__p2l_entry_t. + */ +svn_error_t * +svn_fs_fs__deserialize_p2l_page(void **out, + void *data, + apr_size_t data_len, + apr_pool_t *pool); + #endif Index: subversion/libsvn_fs_fs/pack.c =================================================================== --- subversion/libsvn_fs_fs/pack.c (revision 1623988) +++ subversion/libsvn_fs_fs/pack.c (working copy) @@ -35,6 +35,7 @@ #include "pack.h" #include "util.h" #include "id.h" +#include "index.h" #include "low_level.h" #include "revprops.h" #include "transaction.h" @@ -44,6 +45,577 @@ #include "svn_private_config.h" #include "temp_serializer.h" +/* Logical addressing packing logic: + * + * We pack files on a pack file basis (e.g. 1000 revs) without changing + * existing pack files nor the revision files outside the range to pack. + * + * First, we will scan the revision file indexes to determine the number + * of items to "place" (i.e. determine their optimal position within the + * future pack file). For each item, we will need a constant amount of + * memory to track it. A MAX_MEM parameter sets a limit to the number of + * items we may place in one go. That means, we may not be able to add + * all revisions at once. Instead, we will run the placement for a subset + * of revisions at a time. The very unlikely worst case will simply append + * all revision data with just a little reshuffling inside each revision. + * + * In a second step, we read all revisions in the selected range, build + * the item tracking information and copy the items themselves from the + * revision files to temporary files. The latter serve as buckets for a + * very coarse bucket presort: Separate change lists, file properties, + * directory properties and noderevs + representations from one another. + * + * The third step will determine an optimized placement for the items in + * each of the 4 buckets separately. The first three will simply order + * their items by revision, starting with the newest once. Placing rep + * and noderev items is a more elaborate process documented in the code. + * + * In short, we store items in the following order: + * - changed paths lists + * - node property + * - directory properties + * - directory representations corresponding noderevs, lexical path order + * with special treatment of "trunk" and "branches" + * - same for file representations + * + * Step 4 copies the items from the temporary buckets into the final + * pack file and writes the temporary index files. + * + * Finally, after the last range of revisions, create the final indexes. + */ + +/* Maximum amount of memory we allocate for placement information during + * the pack process. + */ +#define DEFAULT_MAX_MEM (64 * 1024 * 1024) + +/* Data structure describing a node change at PATH, REVISION. + * We will sort these instances by PATH and NODE_ID such that we can combine + * similar nodes in the same reps container and store containers in path + * major order. + */ +typedef struct path_order_t +{ + /* changed path */ + svn_prefix_string__t *path; + + /* node ID for this PATH in REVISION */ + svn_fs_fs__id_part_t node_id; + + /* when this change happened */ + svn_revnum_t revision; + + /* noderev predecessor count */ + int predecessor_count; + + /* this is a directory node */ + svn_boolean_t is_dir; + + /* length of the expanded representation content */ + apr_int64_t expanded_size; + + /* item ID of the noderev linked to the change. May be (0, 0). */ + svn_fs_fs__id_part_t noderev_id; + + /* item ID of the representation containing the new data. May be (0, 0). */ + svn_fs_fs__id_part_t rep_id; +} path_order_t; + +/* Represents a reference from item FROM to item TO. FROM may be a noderev + * or rep_id while TO is (currently) always a representation. We will sort + * them by TO which allows us to collect all dependent items. + */ +typedef struct reference_t +{ + svn_fs_fs__id_part_t to; + svn_fs_fs__id_part_t from; +} reference_t; + +/* This structure keeps track of all the temporary data and status that + * needs to be kept around during the creation of one pack file. After + * each revision range (in case we can't process all revs at once due to + * memory restrictions), parts of the data will get re-initialized. + */ +typedef struct pack_context_t +{ + /* file system that we operate on */ + svn_fs_t *fs; + + /* cancel function to invoke at regular intervals. May be NULL */ + svn_cancel_func_t cancel_func; + + /* baton to pass to CANCEL_FUNC */ + void *cancel_baton; + + /* first revision in the shard (and future pack file) */ + svn_revnum_t shard_rev; + + /* first revision in the range to process (>= SHARD_REV) */ + svn_revnum_t start_rev; + + /* first revision after the range to process (<= SHARD_END_REV) */ + svn_revnum_t end_rev; + + /* first revision after the current shard */ + svn_revnum_t shard_end_rev; + + /* log-to-phys proto index for the whole pack file */ + apr_file_t *proto_l2p_index; + + /* phys-to-log proto index for the whole pack file */ + apr_file_t *proto_p2l_index; + + /* full shard directory path (containing the unpacked revisions) */ + const char *shard_dir; + + /* full packed shard directory path (containing the pack file + indexes) */ + const char *pack_file_dir; + + /* full pack file path (including PACK_FILE_DIR) */ + const char *pack_file_path; + + /* current write position (i.e. file length) in the pack file */ + apr_off_t pack_offset; + + /* the pack file to ultimately write all data to */ + apr_file_t *pack_file; + + /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists. + * Will be filled in phase 2 and be cleared after each revision range. */ + apr_array_header_t *changes; + + /* temp file receiving all change list items (referenced by CHANGES). + * Will be filled in phase 2 and be cleared after each revision range. */ + apr_file_t *changes_file; + + /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties. + * Will be filled in phase 2 and be cleared after each revision range. */ + apr_array_header_t *file_props; + + /* temp file receiving all file prop items (referenced by FILE_PROPS). + * Will be filled in phase 2 and be cleared after each revision range.*/ + apr_file_t *file_props_file; + + /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties. + * Will be filled in phase 2 and be cleared after each revision range. */ + apr_array_header_t *dir_props; + + /* temp file receiving all directory prop items (referenced by DIR_PROPS). + * Will be filled in phase 2 and be cleared after each revision range.*/ + apr_file_t *dir_props_file; + + /* container for all PATH members in PATH_ORDER. */ + svn_prefix_tree__t *paths; + + /* array of path_order_t *. Will be filled in phase 2 and be cleared + * after each revision range. Sorted by PATH, NODE_ID. */ + apr_array_header_t *path_order; + + /* array of reference_t* linking representations to their delta bases. + * Will be filled in phase 2 and be cleared after each revision range. + * It will be sorted by the FROM members (for rep->base rep lookup). */ + apr_array_header_t *references; + + /* array of svn_fs_fs__p2l_entry_t*. Will be filled in phase 2 and be + * cleared after each revision range. During phase 3, we will set items + * to NULL that we already processed. */ + apr_array_header_t *reps; + + /* array of int, marking for each revision, the which offset their items + * begin in REPS. Will be filled in phase 2 and be cleared after + * each revision range. */ + apr_array_header_t *rev_offsets; + + /* temp file receiving all items referenced by REPS. + * Will be filled in phase 2 and be cleared after each revision range.*/ + apr_file_t *reps_file; + + /* pool used for temporary data structures that will be cleaned up when + * the next range of revisions is being processed */ + apr_pool_t *info_pool; +} pack_context_t; + +/* Create and initialize a new pack context for packing shard SHARD_REV in + * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL + * and return the structure in *CONTEXT. + * + * Limit the number of items being copied per iteration to MAX_ITEMS. + * Set CANCEL_FUNC and CANCEL_BATON as well. + */ +static svn_error_t * +initialize_pack_context(pack_context_t *context, + svn_fs_t *fs, + const char *pack_file_dir, + const char *shard_dir, + svn_revnum_t shard_rev, + int max_items, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + const char *temp_dir; + int max_revs = MIN(ffd->max_files_per_dir, max_items); + + SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT); + SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0); + + /* where we will place our various temp files */ + SVN_ERR(svn_io_temp_dir(&temp_dir, pool)); + + /* store parameters */ + context->fs = fs; + context->cancel_func = cancel_func; + context->cancel_baton = cancel_baton; + + context->shard_rev = shard_rev; + context->start_rev = shard_rev; + context->end_rev = shard_rev; + context->shard_end_rev = shard_rev + ffd->max_files_per_dir; + + /* Create the new directory and pack file. */ + context->shard_dir = shard_dir; + context->pack_file_dir = pack_file_dir; + context->pack_file_path + = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); + SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path, + APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL + | APR_CREATE, APR_OS_DEFAULT, pool)); + + /* Proto index files */ + SVN_ERR(svn_fs_fs__l2p_proto_index_open( + &context->proto_l2p_index, + svn_dirent_join(pack_file_dir, + PATH_INDEX PATH_EXT_L2P_INDEX, + pool), + pool)); + SVN_ERR(svn_fs_fs__p2l_proto_index_open( + &context->proto_p2l_index, + svn_dirent_join(pack_file_dir, + PATH_INDEX PATH_EXT_P2L_INDEX, + pool), + pool)); + + /* item buckets: one item info array and one temp file per bucket */ + context->changes = apr_array_make(pool, max_items, + sizeof(svn_fs_fs__p2l_entry_t *)); + SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir, + svn_io_file_del_on_close, pool, pool)); + context->file_props = apr_array_make(pool, max_items, + sizeof(svn_fs_fs__p2l_entry_t *)); + SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir, + svn_io_file_del_on_close, pool, pool)); + context->dir_props = apr_array_make(pool, max_items, + sizeof(svn_fs_fs__p2l_entry_t *)); + SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir, + svn_io_file_del_on_close, pool, pool)); + + /* noderev and representation item bucket */ + context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int)); + context->path_order = apr_array_make(pool, max_items, + sizeof(path_order_t *)); + context->references = apr_array_make(pool, max_items, + sizeof(reference_t *)); + context->reps = apr_array_make(pool, max_items, + sizeof(svn_fs_fs__p2l_entry_t *)); + SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir, + svn_io_file_del_on_close, pool, pool)); + + /* the pool used for temp structures */ + context->info_pool = svn_pool_create(pool); + context->paths = svn_prefix_tree__create(context->info_pool); + + return SVN_NO_ERROR; +} + +/* Clean up / free all revision range specific data and files in CONTEXT. + * Use POOL for temporary allocations. + */ +static svn_error_t * +reset_pack_context(pack_context_t *context, + apr_pool_t *pool) +{ + apr_array_clear(context->changes); + SVN_ERR(svn_io_file_trunc(context->changes_file, 0, pool)); + apr_array_clear(context->file_props); + SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, pool)); + apr_array_clear(context->dir_props); + SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, pool)); + + apr_array_clear(context->rev_offsets); + apr_array_clear(context->path_order); + apr_array_clear(context->references); + apr_array_clear(context->reps); + SVN_ERR(svn_io_file_trunc(context->reps_file, 0, pool)); + + svn_pool_clear(context->info_pool); + + return SVN_NO_ERROR; +} + +/* Call this after the last revision range. It will finalize all index files + * for CONTEXT and close any open files. Use POOL for temporary allocations. + */ +static svn_error_t * +close_pack_context(pack_context_t *context, + apr_pool_t *pool) +{ + const char *proto_l2p_index_path; + const char *proto_p2l_index_path; + + /* need the file names for the actual index creation call further down */ + SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path, + context->proto_l2p_index, pool)); + SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path, + context->proto_p2l_index, pool)); + + /* finalize proto index files */ + SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool)); + SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool)); + + /* Append the actual index data to the pack file. */ + SVN_ERR(svn_fs_fs__add_index_data(context->fs, context->pack_file, + proto_l2p_index_path, + proto_p2l_index_path, + context->shard_rev, + pool)); + + /* remove proto index files */ + SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool)); + SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool)); + + SVN_ERR(svn_io_file_close(context->pack_file, pool)); + + return SVN_NO_ERROR; +} + +/* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC + * from CONTEXT at regular intervals. Use POOL for allocations. + */ +static svn_error_t * +copy_file_data(pack_context_t *context, + apr_file_t *dest, + apr_file_t *source, + apr_off_t size, + apr_pool_t *pool) +{ + /* most non-representation items will be small. Minimize the buffer + * and infrastructure overhead in that case. */ + enum { STACK_BUFFER_SIZE = 1024 }; + + if (size < STACK_BUFFER_SIZE) + { + /* copy small data using a fixed-size buffer on stack */ + char buffer[STACK_BUFFER_SIZE]; + SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size, + NULL, NULL, pool)); + SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size, + NULL, pool)); + } + else + { + /* use streaming copies for larger data blocks. That may require + * the allocation of larger buffers and we should make sure that + * this extra memory is released asap. */ + fs_fs_data_t *ffd = context->fs->fsap_data; + apr_pool_t *copypool = svn_pool_create(pool); + char *buffer = apr_palloc(copypool, ffd->block_size); + + while (size) + { + apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size)); + if (context->cancel_func) + SVN_ERR(context->cancel_func(context->cancel_baton)); + + SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy, + NULL, NULL, pool)); + SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy, + NULL, pool)); + + size -= to_copy; + } + + svn_pool_destroy(copypool); + } + + return SVN_NO_ERROR; +} + +/* Writes SIZE bytes, all 0, to DEST. Uses POOL for allocations. + */ +static svn_error_t * +write_null_bytes(apr_file_t *dest, + apr_off_t size, + apr_pool_t *pool) +{ + /* Have a collection of high-quality, easy to access NUL bytes handy. */ + enum { BUFFER_SIZE = 1024 }; + static const char buffer[BUFFER_SIZE] = { 0 }; + + /* copy SIZE of them into the file's buffer */ + while (size) + { + apr_size_t to_write = MIN(size, BUFFER_SIZE); + SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool)); + size -= to_write; + } + + return SVN_NO_ERROR; +} + +/* Copy the "simple" item (changed paths list or property representation) + * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add + * a copy of ENTRY to ENTRIES but with an updated offset value that points + * to the copy destination in TEMP_FILE. Use POOL for allocations. + */ +static svn_error_t * +copy_item_to_temp(pack_context_t *context, + apr_array_header_t *entries, + apr_file_t *temp_file, + apr_file_t *rev_file, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool) +{ + svn_fs_fs__p2l_entry_t *new_entry + = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); + new_entry->offset = 0; + SVN_ERR(svn_io_file_seek(temp_file, SEEK_CUR, &new_entry->offset, pool)); + APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry; + + SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool)); + + return SVN_NO_ERROR; +} + +/* Return the offset within CONTEXT->REPS that corresponds to item + * ITEM_INDEX in REVISION. + */ +static int +get_item_array_index(pack_context_t *context, + svn_revnum_t revision, + apr_int64_t item_index) +{ + assert(revision >= context->start_rev); + return (int)item_index + APR_ARRAY_IDX(context->rev_offsets, + revision - context->start_rev, + int); +} + +/* Write INFO to the correct position in CONTEXT->REP_INFOS. The latter + * may need auto-expanding. Overwriting an array element is not allowed. + */ +static void +add_item_rep_mapping(pack_context_t *context, + svn_fs_fs__p2l_entry_t *entry) +{ + int idx; + + /* index of INFO */ + idx = get_item_array_index(context, + entry->item.revision, + entry->item.number); + + /* make sure the index exists in the array */ + while (context->reps->nelts <= idx) + APR_ARRAY_PUSH(context->reps, void *) = NULL; + + /* set the element. If there is already an entry, there are probably + * two items claiming to be the same -> bail out */ + assert(!APR_ARRAY_IDX(context->reps, idx, void *)); + APR_ARRAY_IDX(context->reps, idx, void *) = entry; +} + +/* Return the P2L entry from CONTEXT->REPS for the given ID. If there is + * none (or not anymore), return NULL. If RESET has been specified, set + * the array entry to NULL after returning the entry. + */ +static svn_fs_fs__p2l_entry_t * +get_item(pack_context_t *context, + const svn_fs_fs__id_part_t *id, + svn_boolean_t reset) +{ + svn_fs_fs__p2l_entry_t *result = NULL; + if (id->number && id->revision >= context->start_rev) + { + int idx = get_item_array_index(context, id->revision, id->number); + if (context->reps->nelts > idx) + { + result = APR_ARRAY_IDX(context->reps, idx, void *); + if (result && reset) + APR_ARRAY_IDX(context->reps, idx, void *) = NULL; + } + } + + return result; +} + +/* Copy representation item identified by ENTRY from the current position + * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by + * our placement algorithm to CONTEXT. Use POOL for temporary allocations. + */ +static svn_error_t * +copy_rep_to_temp(pack_context_t *context, + apr_file_t *rev_file, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool) +{ + svn_fs_fs__rep_header_t *rep_header; + svn_stream_t *stream; + apr_off_t source_offset = entry->offset; + + /* create a copy of ENTRY, make it point to the copy destination and + * store it in CONTEXT */ + entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); + entry->offset = 0; + SVN_ERR(svn_io_file_seek(context->reps_file, SEEK_CUR, &entry->offset, + pool)); + add_item_rep_mapping(context, entry); + + /* read & parse the representation header */ + stream = svn_stream_from_aprfile2(rev_file, TRUE, pool); + SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool, pool)); + svn_stream_close(stream); + + /* if the representation is a delta against some other rep, link the two */ + if ( rep_header->type == svn_fs_fs__rep_delta + && rep_header->base_revision >= context->start_rev) + { + reference_t *reference = apr_pcalloc(context->info_pool, + sizeof(*reference)); + reference->from = entry->item; + reference->to.revision = rep_header->base_revision; + reference->to.number = rep_header->base_item_index; + APR_ARRAY_PUSH(context->references, reference_t *) = reference; + } + + /* copy the whole rep (including header!) to our temp file */ + SVN_ERR(svn_io_file_seek(rev_file, SEEK_SET, &source_offset, pool)); + SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size, + pool)); + + return SVN_NO_ERROR; +} + +/* Directories first, dirs / files sorted by name in reverse lexical order. + * This maximizes the chance of two items being located close to one another + * in *all* pack files independent of their change order. It also groups + * multi-project repos nicely according to their sub-projects. The reverse + * order aspect gives "trunk" preference over "tags" and "branches", so + * trunk-related items are more likely to be contiguous. + */ +static int +compare_dir_entries_format7(const svn_sort__item_t *a, + const svn_sort__item_t *b) +{ + const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value; + const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value; + + if (lhs->kind != rhs->kind) + return lhs->kind == svn_node_dir ? -1 : 1; + + return strcmp(lhs->name, rhs->name); +} + /* Directories entries sorted by revision (decreasing - to max cache hits) * and offset (increasing - to max benefit from APR file buffering). */ @@ -78,7 +650,9 @@ { apr_array_header_t *ordered = svn_sort__hash(directory, - compare_dir_entries_format6, + svn_fs_fs__use_log_addressing(fs, revision) + ? compare_dir_entries_format7 + : compare_dir_entries_format6, pool); apr_array_header_t *result @@ -92,6 +666,914 @@ return result; } +/* Return a duplicate of the the ORIGINAL path and with special sub-strins + * (e.g. "trunk") modified in such a way that have a lower lexicographic + * value than any other "normal" file name. + */ +static const char * +tweak_path_for_ordering(const char *original, + apr_pool_t *pool) +{ + /* We may add further special cases as needed. */ + enum {SPECIAL_COUNT = 2}; + static const char *special[SPECIAL_COUNT] = {"trunk", "branch"}; + char *pos; + char *path = apr_pstrdup(pool, original); + int i; + + /* Replace the first char of any "special" sub-string we find by + * a control char, i.e. '\1' .. '\31'. In the rare event that this + * would clash with existing paths, no data will be lost but merely + * the node ordering will be sub-optimal. + */ + for (i = 0; i < SPECIAL_COUNT; ++i) + for (pos = strstr(path, special[i]); + pos; + pos = strstr(pos + 1, special[i])) + { + *pos = (char)(i + '\1'); + } + + return path; +} + +/* Copy node revision item identified by ENTRY from the current position + * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by + * our placement algorithm to CONTEXT. Use POOL for temporary allocations. + */ +static svn_error_t * +copy_node_to_temp(pack_context_t *context, + apr_file_t *rev_file, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool) +{ + path_order_t *path_order = apr_pcalloc(context->info_pool, + sizeof(*path_order)); + node_revision_t *noderev; + const char *sort_path; + svn_stream_t *stream; + apr_off_t source_offset = entry->offset; + + /* read & parse noderev */ + stream = svn_stream_from_aprfile2(rev_file, TRUE, pool); + SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool, pool)); + svn_stream_close(stream); + + /* create a copy of ENTRY, make it point to the copy destination and + * store it in CONTEXT */ + entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); + entry->offset = 0; + SVN_ERR(svn_io_file_seek(context->reps_file, SEEK_CUR, + &entry->offset, pool)); + add_item_rep_mapping(context, entry); + + /* copy the noderev to our temp file */ + SVN_ERR(svn_io_file_seek(rev_file, SEEK_SET, &source_offset, pool)); + SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size, + pool)); + + /* if the node has a data representation, make that the node's "base". + * This will (often) cause the noderev to be placed right in front of + * its data representation. */ + + if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev) + { + path_order->rep_id.revision = noderev->data_rep->revision; + path_order->rep_id.number = noderev->data_rep->item_index; + path_order->expanded_size = noderev->data_rep->expanded_size + ? noderev->data_rep->expanded_size + : noderev->data_rep->size; + } + + /* Sort path is the key used for ordering noderevs and associated reps. + * It will not be stored in the final pack file. */ + sort_path = tweak_path_for_ordering(noderev->created_path, pool); + path_order->path = svn_prefix_string__create(context->paths, sort_path); + path_order->node_id = *svn_fs_fs__id_node_id(noderev->id); + path_order->revision = svn_fs_fs__id_rev(noderev->id); + path_order->predecessor_count = noderev->predecessor_count; + path_order->is_dir = noderev->kind == svn_node_dir; + path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id); + APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order; + + return SVN_NO_ERROR; +} + +/* implements compare_fn_t. Bring all directories in front of the files + and sort descendingly by PATH, NODE_ID and REVISION. + */ +static int +compare_path_order(const path_order_t * const * lhs_p, + const path_order_t * const * rhs_p) +{ + const path_order_t * lhs = *lhs_p; + const path_order_t * rhs = *rhs_p; + + /* cluster all directories */ + int diff = rhs->is_dir - lhs->is_dir; + if (diff) + return diff; + + /* lexicographic order on path and node (i.e. latest first) */ + diff = svn_prefix_string__compare(lhs->path, rhs->path); + if (diff) + return diff; + + /* reverse order on node (i.e. latest first) */ + diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id); + if (diff) + return diff; + + /* reverse order on revision (i.e. latest first) */ + if (lhs->revision != rhs->revision) + return lhs->revision < rhs->revision ? 1 : -1; + + return 0; +} + +/* implements compare_fn_t. Sort ascendingly by FROM, TO. + */ +static int +compare_references(const reference_t * const * lhs_p, + const reference_t * const * rhs_p) +{ + const reference_t * lhs = *lhs_p; + const reference_t * rhs = *rhs_p; + + int diff = svn_fs_fs__id_part_compare(&lhs->from, &rhs->from); + return diff ? diff : svn_fs_fs__id_part_compare(&lhs->to, &rhs->to); +} + +/* implements compare_fn_t. Assume ascending order by FROM. + */ +static int +compare_ref_to_item(const reference_t * const * lhs_p, + const svn_fs_fs__id_part_t * rhs_p) +{ + return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p); +} + +/* implements compare_fn_t. Finds the DIR / FILE boundary. + */ +static int +compare_is_dir(const path_order_t * const * lhs_p, + const void *unused) +{ + return (*lhs_p)->is_dir ? -1 : 0; +} + +/* Look for the least significant bit set in VALUE and return the smallest + * number with the same property, i.e. the largest power of 2 that is a + * factor in VALUE. */ +static int +roundness(int value) +{ + return value ? value - (value & (value - 1)) : INT_MAX; +} + +/* Order a range of data collected in CONTEXT such that we can place them + * in the desired order. The input is taken from *PATH_ORDER, offsets FIRST + * to LAST and then written in the final order to the same range in *TEMP. + */ +static void +sort_reps_range(pack_context_t *context, + const path_order_t **path_order, + const path_order_t **temp, + int first, + int last) +{ + const svn_prefix_string__t *path; + int i, dest, best; + svn_fs_fs__id_part_t rep_id; + fs_fs_data_t *ffd = context->fs->fsap_data; + + /* The logic below would fail for empty ranges. */ + if (first == last) + return; + + /* Re-order noderevs like this: + * + * (1) Most likely to be referenced by future pack files, in path order. + * (2) highest revision rep per path + dependency chain + * (3) Remaining reps in path, rev order + * + * We simply pick & chose from the existing path, rev order. + */ + dest = first; + path = path_order[first]->path; + best = first; + + /* (1) For each path, pick the "roundest" representation and put it in + * front of all other nodes in the pack file. The "roundest" rep is + * the one most likely to be referenced from future pack files, i.e. we + * concentrate those potential "foreign link targets" in one section of + * the pack file. + * + * And we only apply this to reps outside the linear deltification + * sections because references *into* linear deltification ranges are + * much less likely. + */ + for (i = first; i < last; ++i) + { + /* Investigated all nodes for the current path? */ + if (svn_prefix_string__compare(path, path_order[i]->path)) + { + /* next path */ + path = path_order[i]->path; + + /* Pick roundest non-linear deltified node. */ + if (roundness(path_order[best]->predecessor_count) + >= ffd->max_linear_deltification) + { + temp[dest++] = path_order[best]; + path_order[best] = NULL; + best = i; + } + } + + /* next entry */ + if ( roundness(path_order[best]->predecessor_count) + < roundness(path_order[i]->predecessor_count)) + best = i; + } + + /* Treat the last path the same as all others. */ + if (roundness(path_order[best]->predecessor_count) + >= ffd->max_linear_deltification) + { + temp[dest++] = path_order[best]; + path_order[best] = NULL; + } + + /* (2) For each (remaining) path, pick the nodes along the delta chain + * for the highest revision. Due to our ordering, this is the first + * node we encounter for any path. + * + * Most references that don't hit a delta base picked in (1), will + * access HEAD of the respective path. Keeping all its dependency chain + * in one place turns reconstruction into a linear scan of minimal length. + */ + for (i = first; i < last; ++i) + if (path_order[i]) + { + /* This is the first path we still have to handle. */ + path = path_order[i]->path; + rep_id = path_order[i]->rep_id; + break; + } + + for (i = first; i < last; ++i) + if (path_order[i]) + { + /* New path? */ + if (svn_prefix_string__compare(path, path_order[i]->path)) + { + path = path_order[i]->path; + rep_id = path_order[i]->rep_id; + } + + /* Pick nodes along the deltification chain. Skip side-branches. */ + if (svn_fs_fs__id_part_eq(&path_order[i]->rep_id, &rep_id)) + { + reference_t **reference; + + temp[dest++] = path_order[i]; + path_order[i] = NULL; + + reference = svn_sort__array_lookup(context->references, + &rep_id, NULL, + (int (*)(const void *, const void *))compare_ref_to_item); + if (reference) + rep_id = (*reference)->to; + } + } + + /* (3) All remaining nodes in path, rev order. Linear deltification + * makes HEAD delta chains from (2) cover all or most of their deltas + * in a given pack file. So, this is just a few remnants that we put + * at the end of the pack file. + */ + for (i = first; i < last; ++i) + if (path_order[i]) + temp[dest++] = path_order[i]; + + /* We now know the final ordering. */ + assert(dest == last); +} + +/* Order the data collected in CONTEXT such that we can place them in the + * desired order. + */ +static void +sort_reps(pack_context_t *context) +{ + apr_pool_t *temp_pool; + const path_order_t **temp, **path_order; + int i, count, dir_count; + + /* We will later assume that there is at least one node / path. + */ + if (context->path_order->nelts == 0) + { + assert(context->references->nelts == 0); + return; + } + + /* Sort containers by path and IDs, respectively. + */ + svn_sort__array(context->path_order, + (int (*)(const void *, const void *))compare_path_order); + svn_sort__array(context->references, + (int (*)(const void *, const void *))compare_references); + + /* Directories are already in front; sort directories section and files + * section separately but use the same heuristics (see sub-function). + */ + temp_pool = svn_pool_create(context->info_pool); + count = context->path_order->nelts; + temp = apr_pcalloc(temp_pool, count * sizeof(*temp)); + path_order = (void *)context->path_order->elts; + + /* Find the boundary between DIR and FILE section. */ + dir_count = svn_sort__bsearch_lower_bound(context->path_order, NULL, + (int (*)(const void *, const void *))compare_is_dir); + + /* Sort those sub-sections separately. */ + sort_reps_range(context, path_order, temp, 0, dir_count); + sort_reps_range(context, path_order, temp, dir_count, count); + + /* We now know the final ordering. */ + for (i = 0; i < count; ++i) + path_order[i] = temp[i]; + + svn_pool_destroy(temp_pool); +} + +/* implements compare_fn_t. Place LHS before RHS, if the latter is older. + */ +static int +compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs, + const svn_fs_fs__p2l_entry_t * const * rhs) +{ + assert(*lhs != *rhs); + + if ((*lhs)->item.revision == (*rhs)->item.revision) + return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1; + + return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1; +} + +/* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age. Place the latest + * items first. + */ +static void +sort_items(apr_array_header_t *entries) +{ + svn_sort__array(entries, + (int (*)(const void *, const void *))compare_p2l_info); +} + +/* Return the remaining unused bytes in the current block in CONTEXT's + * pack file. + */ +static apr_ssize_t +get_block_left(pack_context_t *context) +{ + fs_fs_data_t *ffd = context->fs->fsap_data; + return ffd->block_size - (context->pack_offset % ffd->block_size); +} + +/* To prevent items from overlapping a block boundary, we will usually + * put them into the next block and top up the old one with NUL bytes. + * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does + * not fit into the current block and the padding is short enough. + * Use POOL for allocations. + */ +static svn_error_t * +auto_pad_block(pack_context_t *context, + apr_off_t to_add, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = context->fs->fsap_data; + + /* This is the maximum number of bytes "wasted" that way per block. + * Larger items will cross the block boundaries. */ + const apr_off_t max_padding = MAX(ffd->block_size / 50, 512); + + /* Is wasted space small enough to align the current item to the next + * block? */ + apr_off_t padding = get_block_left(context); + + if (padding < to_add && padding < max_padding) + { + /* Yes. To up with NUL bytes and don't forget to create + * an P2L index entry marking this section as unused. */ + svn_fs_fs__p2l_entry_t null_entry; + + null_entry.offset = context->pack_offset; + null_entry.size = padding; + null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED; + null_entry.item.revision = SVN_INVALID_REVNUM; + null_entry.item.number = SVN_FS_FS__ITEM_INDEX_UNUSED; + null_entry.fnv1_checksum = 0; + + SVN_ERR(write_null_bytes(context->pack_file, padding, pool)); + SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry( + context->proto_p2l_index, &null_entry, pool)); + context->pack_offset += padding; + } + + return SVN_NO_ERROR; +} + +/* Read the contents of ITEM, if not empty, from TEMP_FILE and write it + * to CONTEXT->PACK_FILE. Use POOL for allocations. + */ +static svn_error_t * +store_item(pack_context_t *context, + apr_file_t *temp_file, + svn_fs_fs__p2l_entry_t *item, + apr_pool_t *pool) +{ + apr_off_t safety_margin; + + /* skip empty entries */ + if (item->type == SVN_FS_FS__ITEM_TYPE_UNUSED) + return SVN_NO_ERROR; + + /* If the next item does not fit into the current block, auto-pad it. + Take special care of textual noderevs since their parsers may + prefetch up to 80 bytes and we don't want them to cross block + boundaries. */ + safety_margin = item->type == SVN_FS_FS__ITEM_TYPE_NODEREV + ? SVN__LINE_CHUNK_SIZE + : 0; + SVN_ERR(auto_pad_block(context, item->size + safety_margin, pool)); + + /* select the item in the source file and copy it into the target + * pack file */ + SVN_ERR(svn_io_file_seek(temp_file, SEEK_SET, &item->offset, pool)); + SVN_ERR(copy_file_data(context, context->pack_file, temp_file, + item->size, pool)); + + /* write index entry and update current position */ + item->offset = context->pack_offset; + context->pack_offset += item->size; + + SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(context->proto_p2l_index, + item, pool)); + + APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = item; + + return SVN_NO_ERROR; +} + +/* Read the contents of the non-empty items in ITEMS from TEMP_FILE and + * write them to CONTEXT->PACK_FILE. Use POOL for allocations. + */ +static svn_error_t * +store_items(pack_context_t *context, + apr_file_t *temp_file, + apr_array_header_t *items, + apr_pool_t *pool) +{ + int i; + apr_pool_t *iterpool = svn_pool_create(pool); + + /* copy all items in strict order */ + for (i = 0; i < items->nelts; ++i) + { + svn_pool_clear(iterpool); + SVN_ERR(store_item(context, temp_file, + APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *), + iterpool)); + } + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements + * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE. + * Use POOL for temporary allocations. + */ +static svn_error_t * +copy_reps_from_temp(pack_context_t *context, + apr_file_t *temp_file, + apr_pool_t *pool) +{ + apr_pool_t *iterpool = svn_pool_create(pool); + apr_array_header_t *path_order = context->path_order; + int i; + + /* copy items in path order. */ + for (i = 0; i < path_order->nelts; ++i) + { + path_order_t *current_path; + svn_fs_fs__p2l_entry_t *node_part; + svn_fs_fs__p2l_entry_t *rep_part; + + svn_pool_clear(iterpool); + + current_path = APR_ARRAY_IDX(path_order, i, path_order_t *); + node_part = get_item(context, ¤t_path->noderev_id, TRUE); + rep_part = get_item(context, ¤t_path->rep_id, TRUE); + + if (node_part) + SVN_ERR(store_item(context, temp_file, node_part, iterpool)); + if (rep_part) + SVN_ERR(store_item(context, temp_file, rep_part, iterpool)); + } + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* implements compare_fn_t. Place LHS before RHS, if the latter belongs to + * a newer revision. + */ +static int +compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p, + const svn_fs_fs__p2l_entry_t * const * rhs_p) +{ + const svn_fs_fs__p2l_entry_t * lhs = *lhs_p; + const svn_fs_fs__p2l_entry_t * rhs = *rhs_p; + + if (lhs->item.revision == rhs->item.revision) + return 0; + + return lhs->item.revision < rhs->item.revision ? -1 : 1; +} + +/* Write the log-to-phys proto index file for CONTEXT and use POOL for + * temporary allocations. All items in all buckets must have been placed + * by now. + */ +static svn_error_t * +write_l2p_index(pack_context_t *context, + apr_pool_t *pool) +{ + apr_pool_t *iterpool = svn_pool_create(pool); + svn_revnum_t prev_rev = SVN_INVALID_REVNUM; + int i, dest; + + /* eliminate empty entries from CONTEXT->REPS */ + for (i = 0, dest = 0; i < context->reps->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *entry + = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *); + if (entry) + APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *) + = entry; + } + context->reps->nelts = dest; + + /* we need to write the l2p index revision by revision */ + svn_sort__array(context->reps, + (int (*)(const void *, const void *))compare_p2l_info_rev); + + /* write index entries */ + for (i = 0; i < context->reps->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *p2l_entry + = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *); + if (p2l_entry == NULL) + continue; + + /* next revision? */ + if (prev_rev != p2l_entry->item.revision) + { + prev_rev = p2l_entry->item.revision; + SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision( + context->proto_l2p_index, iterpool)); + } + + /* add entry */ + SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index, + p2l_entry->offset, + p2l_entry->item.number, + iterpool)); + + /* keep memory usage in check */ + if (i % 256 == 0) + svn_pool_clear(iterpool); + } + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Pack the current revision range of CONTEXT, i.e. this covers phases 2 + * to 4. Use POOL for allocations. + */ +static svn_error_t * +pack_range(pack_context_t *context, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = context->fs->fsap_data; + apr_pool_t *revpool = svn_pool_create(pool); + apr_pool_t *iterpool = svn_pool_create(pool); + apr_pool_t *iterpool2 = svn_pool_create(pool); + + /* Phase 2: Copy items into various buckets and build tracking info */ + svn_revnum_t revision; + for (revision = context->start_rev; revision < context->end_rev; ++revision) + { + apr_off_t offset = 0; + svn_fs_fs__revision_file_t *rev_file; + + svn_pool_clear(revpool); + + /* Get the rev file dimensions (mainly index locations). */ + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs, + revision, revpool, iterpool)); + SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); + + /* store the indirect array index */ + APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts; + + /* read the phys-to-log index file until we covered the whole rev file. + * That index contains enough info to build both target indexes from it. */ + while (offset < rev_file->l2p_offset) + { + /* read one cluster */ + int i; + apr_array_header_t *entries; + + svn_pool_clear(iterpool); + + SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, + rev_file, revision, offset, + ffd->p2l_page_size, iterpool)); + + for (i = 0; i < entries->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *entry + = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); + + /* skip first entry if that was duplicated due crossing a + cluster boundary */ + if (offset > entry->offset) + continue; + + svn_pool_clear(iterpool2); + + /* process entry while inside the rev file */ + offset = entry->offset; + if (offset < rev_file->l2p_offset) + { + SVN_ERR(svn_io_file_seek(rev_file->file, SEEK_SET, &offset, + iterpool2)); + + if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES) + SVN_ERR(copy_item_to_temp(context, + context->changes, + context->changes_file, + rev_file->file, entry, + iterpool2)); + else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS) + SVN_ERR(copy_item_to_temp(context, + context->file_props, + context->file_props_file, + rev_file->file, entry, + iterpool2)); + else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS) + SVN_ERR(copy_item_to_temp(context, + context->dir_props, + context->dir_props_file, + rev_file->file, entry, + iterpool2)); + else if ( entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP + || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP) + SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry, + iterpool2)); + else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV) + SVN_ERR(copy_node_to_temp(context, rev_file->file, entry, + iterpool2)); + else + SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED); + + offset += entry->size; + } + } + + if (context->cancel_func) + SVN_ERR(context->cancel_func(context->cancel_baton)); + } + } + + svn_pool_destroy(iterpool2); + svn_pool_destroy(iterpool); + + /* phase 3: placement. + * Use "newest first" placement for simple items. */ + sort_items(context->changes); + sort_items(context->file_props); + sort_items(context->dir_props); + + /* follow dependencies recursively for noderevs and data representations */ + sort_reps(context); + + /* phase 4: copy bucket data to pack file. Write P2L index. */ + SVN_ERR(store_items(context, context->changes_file, context->changes, + revpool)); + svn_pool_clear(revpool); + SVN_ERR(store_items(context, context->file_props_file, context->file_props, + revpool)); + svn_pool_clear(revpool); + SVN_ERR(store_items(context, context->dir_props_file, context->dir_props, + revpool)); + svn_pool_clear(revpool); + SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool)); + svn_pool_clear(revpool); + + /* write L2P index as well (now that we know all target offsets) */ + SVN_ERR(write_l2p_index(context, revpool)); + + svn_pool_destroy(revpool); + + return SVN_NO_ERROR; +} + +/* Append CONTEXT->START_REV to the context's pack file with no re-ordering. + * This function will only be used for very large revisions (>>100k changes). + * Use POOL for temporary allocations. + */ +static svn_error_t * +append_revision(pack_context_t *context, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = context->fs->fsap_data; + apr_off_t offset = 0; + apr_pool_t *iterpool = svn_pool_create(pool); + svn_fs_fs__revision_file_t *rev_file; + apr_finfo_t finfo; + + /* Get the size of the file. */ + const char *path = svn_dirent_join(context->shard_dir, + apr_psprintf(iterpool, "%ld", + context->start_rev), + pool); + SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, pool)); + + /* Copy all the bits from the rev file to the end of the pack file. */ + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs, + context->start_rev, pool, + iterpool)); + SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file, + finfo.size, iterpool)); + + /* mark the start of a new revision */ + SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index, + pool)); + + /* read the phys-to-log index file until we covered the whole rev file. + * That index contains enough info to build both target indexes from it. */ + while (offset < finfo.size) + { + /* read one cluster */ + int i; + apr_array_header_t *entries; + + svn_pool_clear(iterpool); + SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file, + context->start_rev, offset, + ffd->p2l_page_size, iterpool)); + + for (i = 0; i < entries->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *entry + = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); + + /* skip first entry if that was duplicated due crossing a + cluster boundary */ + if (offset > entry->offset) + continue; + + /* process entry while inside the rev file */ + offset = entry->offset; + if (offset < finfo.size) + { + entry->offset += context->pack_offset; + offset += entry->size; + SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry( + context->proto_l2p_index, entry->offset, + entry->item.number, iterpool)); + SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry( + context->proto_p2l_index, entry, iterpool)); + } + } + } + + svn_pool_destroy(iterpool); + context->pack_offset += finfo.size; + + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + + return SVN_NO_ERROR; +} + +/* Logical addressing mode packing logic. + * + * Pack the revision shard starting at SHARD_REV in filesystem FS from + * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations. Limit + * the extra memory consumption to MAX_MEM bytes. CANCEL_FUNC and + * CANCEL_BATON are what you think they are. + */ +static svn_error_t * +pack_log_addressed(svn_fs_t *fs, + const char *pack_file_dir, + const char *shard_dir, + svn_revnum_t shard_rev, + apr_size_t max_mem, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + enum + { + /* estimated amount of memory used to represent one item in memory + * during rev file packing */ + PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t)) + + APR_ALIGN_DEFAULT(2 *sizeof(void*)) + + APR_ALIGN_DEFAULT(sizeof(reference_t)) + + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t)) + + 6 * sizeof(void*) + }; + + int max_items; + apr_array_header_t *max_ids; + pack_context_t context = { 0 }; + int i; + apr_size_t item_count = 0; + apr_pool_t *iterpool = svn_pool_create(pool); + + /* Prevent integer overflow. We use apr arrays to process the items so + * the maximum number of items is INT_MAX. */ + { + apr_size_t temp = max_mem / PER_ITEM_MEM; + SVN_ERR_ASSERT(temp <= INT_MAX); + max_items = (int)temp; + } + + /* set up a pack context */ + SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir, + shard_rev, max_items, cancel_func, + cancel_baton, pool)); + + /* phase 1: determine the size of the revisions to pack */ + SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev, + context.shard_end_rev - shard_rev, + pool)); + + /* pack revisions in ranges that don't exceed MAX_MEM */ + for (i = 0; i < max_ids->nelts; ++i) + if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items) + { + context.end_rev++; + } + else + { + svn_pool_clear(iterpool); + + /* some unpacked revisions before this one? */ + if (context.start_rev < context.end_rev) + { + /* pack them intelligently (might be just 1 rev but still ...) */ + SVN_ERR(pack_range(&context, iterpool)); + SVN_ERR(reset_pack_context(&context, iterpool)); + item_count = 0; + } + + /* next revision range is to start with the current revision */ + context.start_rev = i + context.shard_rev; + context.end_rev = context.start_rev + 1; + + /* if this is a very large revision, we must place it as is */ + if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items) + { + SVN_ERR(append_revision(&context, iterpool)); + context.start_rev++; + } + else + item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t); + } + + /* non-empty revision range at the end? */ + if (context.start_rev < context.end_rev) + SVN_ERR(pack_range(&context, iterpool)); + + /* last phase: finalize indexes and clean up */ + SVN_ERR(reset_pack_context(&context, iterpool)); + SVN_ERR(close_pack_context(&context, iterpool)); + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + /* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file. Use POOL for temporary allocations. */ svn_error_t * @@ -250,6 +1732,7 @@ const char *shard_path, apr_int64_t shard, int max_files_per_dir, + apr_size_t max_mem, svn_cancel_func_t cancel_func, void *cancel_baton, apr_pool_t *pool) @@ -267,9 +1750,14 @@ /* Create the new directory and pack file. */ SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool)); - SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev, - max_files_per_dir, cancel_func, - cancel_baton, pool)); + /* Index information files */ + if (svn_fs_fs__use_log_addressing(fs, shard_rev)) + SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev, + max_mem, cancel_func, cancel_baton, pool)); + else + SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev, + max_files_per_dir, cancel_func, + cancel_baton, pool)); SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool)); SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool)); @@ -409,7 +1897,7 @@ /* pack the revision content */ SVN_ERR(pack_rev_shard(baton->fs, rev_pack_file_dir, baton->rev_shard_path, baton->shard, ffd->max_files_per_dir, - baton->cancel_func, + DEFAULT_MAX_MEM, baton->cancel_func, baton->cancel_baton, pool)); /* For newer repo formats, we only acquired the pack lock so far. Index: subversion/libsvn_fs_fs/rev_file.c =================================================================== --- subversion/libsvn_fs_fs/rev_file.c (revision 1623988) +++ subversion/libsvn_fs_fs/rev_file.c (working copy) @@ -22,6 +22,7 @@ #include "rev_file.h" #include "fs_fs.h" +#include "index.h" #include "low_level.h" #include "util.h" @@ -45,6 +46,12 @@ file->file = NULL; file->stream = NULL; + file->p2l_stream = NULL; + file->l2p_stream = NULL; + file->block_size = ffd->block_size; + file->l2p_offset = -1; + file->p2l_offset = -1; + file->footer_offset = -1; file->pool = pool; } @@ -78,13 +85,48 @@ return status; } +/* If the file at PATH is read-only, attempt to make it writable. The + * original state will be restored with RESULT_POOL gets cleaned up. + * SCRATCH_POOL is for temporary allocations. */ +static svn_error_t * +auto_make_writable(const char *path, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + svn_boolean_t is_read_only; + apr_finfo_t finfo; + + SVN_ERR(svn_io_stat(&finfo, path, SVN__APR_FINFO_READONLY, scratch_pool)); + SVN_ERR(svn_io__is_finfo_read_only(&is_read_only, &finfo, scratch_pool)); + + if (is_read_only) + { + /* Tell the pool to restore the r/o state upon cleanup + (assuming the file will still exist, failing silently otherwise). */ + set_read_only_baton_t *baton = apr_pcalloc(result_pool, + sizeof(*baton)); + baton->pool = result_pool; + baton->file_path = apr_pstrdup(result_pool, path); + apr_pool_cleanup_register(result_pool, baton, + set_read_only, apr_pool_cleanup_null); + + /* Finally, allow write access (undoing it has already been scheduled + and is idempotent). */ + SVN_ERR(svn_io_set_file_read_write(path, FALSE, scratch_pool)); + } + + return SVN_NO_ERROR; +} + /* Core implementation of svn_fs_fs__open_pack_or_rev_file working on an - * existing, initialized FILE structure. + * existing, initialized FILE structure. If WRITABLE is TRUE, give write + * access to the file - temporarily resetting the r/o state if necessary. */ static svn_error_t * open_pack_or_rev_file(svn_fs_fs__revision_file_t *file, svn_fs_t *fs, svn_revnum_t rev, + svn_boolean_t writable, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { @@ -96,12 +138,20 @@ { const char *path = svn_fs_fs__path_rev_absolute(fs, rev, scratch_pool); apr_file_t *apr_file; + apr_int32_t flags = writable + ? APR_READ | APR_WRITE | APR_BUFFERED + : APR_READ | APR_BUFFERED; - /* open the revision file in buffered r/o mode */ - err = svn_io_file_open(&apr_file, path, APR_READ | APR_BUFFERED, - APR_OS_DEFAULT, result_pool); + /* We may have to *temporarily* enable write access. */ + err = writable ? auto_make_writable(path, result_pool, scratch_pool) + : SVN_NO_ERROR; + /* open the revision file in buffered r/o or r/w mode */ if (!err) + err = svn_io_file_open(&apr_file, path, flags, APR_OS_DEFAULT, + result_pool); + + if (!err) { file->file = apr_file; file->stream = svn_stream_from_aprfile2(apr_file, TRUE, @@ -157,11 +207,62 @@ *file = apr_palloc(result_pool, sizeof(**file)); init_revision_file(*file, fs, rev, result_pool); - return svn_error_trace(open_pack_or_rev_file(*file, fs, rev, result_pool, - scratch_pool)); + return svn_error_trace(open_pack_or_rev_file(*file, fs, rev, FALSE, + result_pool, scratch_pool)); } svn_error_t * +svn_fs_fs__open_pack_or_rev_file_writable(svn_fs_fs__revision_file_t** file, + svn_fs_t* fs, + svn_revnum_t rev, + apr_pool_t* result_pool, + apr_pool_t *scratch_pool) +{ + *file = apr_palloc(result_pool, sizeof(**file)); + init_revision_file(*file, fs, rev, result_pool); + + return svn_error_trace(open_pack_or_rev_file(*file, fs, rev, TRUE, + result_pool, scratch_pool)); +} + +svn_error_t * +svn_fs_fs__auto_read_footer(svn_fs_fs__revision_file_t *file) +{ + if (file->l2p_offset == -1) + { + apr_off_t filesize = 0; + unsigned char footer_length; + svn_stringbuf_t *footer; + + /* Determine file size. */ + SVN_ERR(svn_io_file_seek(file->file, APR_END, &filesize, file->pool)); + + /* Read last byte (containing the length of the footer). */ + SVN_ERR(svn_io_file_aligned_seek(file->file, file->block_size, NULL, + filesize - 1, file->pool)); + SVN_ERR(svn_io_file_read_full2(file->file, &footer_length, + sizeof(footer_length), NULL, NULL, + file->pool)); + + /* Read footer. */ + footer = svn_stringbuf_create_ensure(footer_length, file->pool); + SVN_ERR(svn_io_file_aligned_seek(file->file, file->block_size, NULL, + filesize - 1 - footer_length, + file->pool)); + SVN_ERR(svn_io_file_read_full2(file->file, footer->data, footer_length, + &footer->len, NULL, file->pool)); + footer->data[footer->len] = '\0'; + + /* Extract index locations. */ + SVN_ERR(svn_fs_fs__parse_footer(&file->l2p_offset, &file->p2l_offset, + footer, file->start_revision)); + file->footer_offset = filesize - footer_length - 1; + } + + return SVN_NO_ERROR; +} + +svn_error_t * svn_fs_fs__open_proto_rev_file(svn_fs_fs__revision_file_t **file, svn_fs_t *fs, const svn_fs_fs__id_part_t *txn_id, @@ -194,6 +295,8 @@ file->file = NULL; file->stream = NULL; + file->l2p_stream = NULL; + file->p2l_stream = NULL; return SVN_NO_ERROR; } Index: subversion/libsvn_fs_fs/rev_file.h =================================================================== --- subversion/libsvn_fs_fs/rev_file.h (revision 1623988) +++ subversion/libsvn_fs_fs/rev_file.h (working copy) @@ -57,6 +57,31 @@ /* stream based on FILE and not NULL exactly when FILE is not NULL */ svn_stream_t *stream; + /* the opened P2L index stream or NULL. Always NULL for txns. */ + svn_fs_fs__packed_number_stream_t *p2l_stream; + + /* the opened L2P index stream or NULL. Always NULL for txns. */ + svn_fs_fs__packed_number_stream_t *l2p_stream; + + /* Copied from FS->FFD->BLOCK_SIZE upon creation. It allows us to + * use aligned seek() without having the FS handy. */ + apr_off_t block_size; + + /* Offset within FILE at which the rev data ends and the L2P index + * data starts. Less than P2L_OFFSET. -1 if svn_fs_fs__auto_read_footer + * has not been called, yet. */ + apr_off_t l2p_offset; + + /* Offset within FILE at which the L2P index ends and the P2L index + * data starts. Greater than L2P_OFFSET. -1 if svn_fs_fs__auto_read_footer + * has not been called, yet. */ + apr_off_t p2l_offset; + + /* Offset within FILE at which the P2L index ends and the footer starts. + * Greater than P2L_OFFSET. -1 if svn_fs_fs__auto_read_footer has not + * been called, yet. */ + apr_off_t footer_offset; + /* pool containing this object */ apr_pool_t *pool; } svn_fs_fs__revision_file_t; @@ -73,15 +98,27 @@ apr_pool_t *result_pool, apr_pool_t *scratch_pool); -/* Close previous files as well as streams in FILE (if open) and open the - * rev / pack file for REVISION in FS. This is useful when a pack operation - * made the current files outdated or no longer available and the caller - * wants to keep the same revision file data structure. +/* Open the correct revision file for REV with read and write access. + * If necessary, temporarily reset the file's read-only state. If the + * filesystem FS has been packed, *FILE will be set to the packed file; + * otherwise, set *FILE to the revision file for REV. + * + * Return SVN_ERR_FS_NO_SUCH_REVISION if the file doesn't exist. + * Allocate *FILE in RESULT_POOL and use SCRATCH_POOLfor temporaries. */ +svn_error_t * +svn_fs_fs__open_pack_or_rev_file_writable(svn_fs_fs__revision_file_t **file, + svn_fs_t *fs, + svn_revnum_t rev, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool); + +/* If the footer data in FILE has not been read, yet, do so now. + * Index locations will only be read upon request as we assume they get + * cached and the FILE is usually used for REP data access only. + * Hence, the separate step. */ svn_error_t * -svn_fs_fs__reopen_revision_file(svn_fs_fs__revision_file_t *file, - svn_fs_t *fs, - svn_revnum_t revision); +svn_fs_fs__auto_read_footer(svn_fs_fs__revision_file_t *file); /* Open the proto-rev file of transaction TXN_ID in FS and return it in *FILE. * Allocate *FILE in RESULT_POOL use and SCRATCH_POOL for temporaries.. */ Index: subversion/libsvn_fs_fs/transaction.c =================================================================== --- subversion/libsvn_fs_fs/transaction.c (revision 1623988) +++ subversion/libsvn_fs_fs/transaction.c (working copy) @@ -376,6 +376,51 @@ return SVN_NO_ERROR; } +/* Make sure the length ACTUAL_LENGTH of the proto-revision file PROTO_REV + of transaction TXN_ID in filesystem FS matches the proto-index file. + Trim any crash / failure related extra data from the proto-rev file. + + If the prototype revision file is too short, we can't do much but bail out. + + Perform all allocations in POOL. */ +static svn_error_t * +auto_truncate_proto_rev(svn_fs_t *fs, + apr_file_t *proto_rev, + apr_off_t actual_length, + const svn_fs_fs__id_part_t *txn_id, + apr_pool_t *pool) +{ + /* Only relevant for newer FSFS formats. */ + if (svn_fs_fs__use_log_addressing(fs, txn_id->revision)) + { + /* Determine file range covered by the proto-index so far. Note that + we always append to both file, i.e. the last index entry also + corresponds to the last addition in the rev file. */ + const char *path = svn_fs_fs__path_p2l_proto_index(fs, txn_id, pool); + apr_file_t *file; + apr_off_t indexed_length; + + SVN_ERR(svn_fs_fs__p2l_proto_index_open(&file, path, pool)); + SVN_ERR(svn_fs_fs__p2l_proto_index_next_offset(&indexed_length, file, + pool)); + SVN_ERR(svn_io_file_close(file, pool)); + + /* Handle mismatches. */ + if (indexed_length < actual_length) + SVN_ERR(svn_io_file_trunc(proto_rev, indexed_length, pool)); + else if (indexed_length > actual_length) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT, + NULL, + _("p2l proto index offset %s beyond proto" + "rev file size %s for TXN %s"), + apr_off_t_toa(pool, indexed_length), + apr_off_t_toa(pool, actual_length), + svn_fs_fs__id_txn_unparse(txn_id, pool)); + } + + return SVN_NO_ERROR; +} + /* Get a handle to the prototype revision file for transaction TXN_ID in filesystem FS, and lock it for writing. Return FILE, a file handle positioned at the end of the file, and LOCKCOOKIE, a cookie that @@ -418,6 +463,14 @@ if (!err) err = svn_io_file_seek(*file, APR_END, &end_offset, pool); + /* We don't want unused sections (such as leftovers from failed delta + stream) in our file. If we use log addressing, we would need an + index entry for the unused section and that section would need to + be all NUL by convention. So, detect and fix those cases by truncating + the protorev file. */ + if (!err) + err = auto_truncate_proto_rev(fs, *file, end_offset, txn_id, pool); + if (err) { err = svn_error_compose_create( @@ -1547,6 +1600,183 @@ return svn_io_file_close(file, pool); } +/* If the transaction TXN_ID in FS uses logical addressing, store the + * (ITEM_INDEX, OFFSET) pair in the txn's log-to-phys proto index file. + * If FINAL_REVISION is not SVN_INVALID_REVNUM, use it to determine whether + * to actually write to the proto-index. Use POOL for allocations. + */ +static svn_error_t * +store_l2p_index_entry(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + svn_revnum_t final_revision, + apr_off_t offset, + apr_uint64_t item_index, + apr_pool_t *pool) +{ + if (final_revision == SVN_INVALID_REVNUM) + final_revision = txn_id->revision + 1; + + if (svn_fs_fs__use_log_addressing(fs, final_revision)) + { + const char *path = svn_fs_fs__path_l2p_proto_index(fs, txn_id, pool); + apr_file_t *file; + SVN_ERR(svn_fs_fs__l2p_proto_index_open(&file, path, pool)); + SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(file, offset, + item_index, pool)); + SVN_ERR(svn_io_file_close(file, pool)); + } + + return SVN_NO_ERROR; +} + +/* If the transaction TXN_ID in FS uses logical addressing, store ENTRY + * in the phys-to-log proto index file of transaction TXN_ID. + * If FINAL_REVISION is not SVN_INVALID_REVNUM, use it to determine whether + * to actually write to the proto-index. Use POOL for allocations. + */ +static svn_error_t * +store_p2l_index_entry(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + svn_revnum_t final_revision, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool) +{ + if (final_revision == SVN_INVALID_REVNUM) + final_revision = txn_id->revision + 1; + + if (svn_fs_fs__use_log_addressing(fs, final_revision)) + { + const char *path = svn_fs_fs__path_p2l_proto_index(fs, txn_id, pool); + apr_file_t *file; + SVN_ERR(svn_fs_fs__p2l_proto_index_open(&file, path, pool)); + SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(file, entry, pool)); + SVN_ERR(svn_io_file_close(file, pool)); + } + + return SVN_NO_ERROR; +} + +/* Allocate an item index for the given MY_OFFSET in the transaction TXN_ID + * of file system FS and return it in *ITEM_INDEX. For old formats, it + * will simply return the offset as item index; in new formats, it will + * increment the txn's item index counter file and store the mapping in + * the proto index file. If FINAL_REVISION is not SVN_INVALID_REVNUM, use + * it to determine whether to actually write to the proto-index. + * Use POOL for allocations. + */ +static svn_error_t * +allocate_item_index(apr_uint64_t *item_index, + svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + svn_revnum_t final_revision, + apr_off_t my_offset, + apr_pool_t *pool) +{ + if (final_revision == SVN_INVALID_REVNUM) + final_revision = txn_id->revision + 1; + + if (svn_fs_fs__use_log_addressing(fs, final_revision)) + { + apr_file_t *file; + char buffer[SVN_INT64_BUFFER_SIZE] = { 0 }; + svn_boolean_t eof = FALSE; + apr_size_t to_write; + apr_size_t read; + apr_off_t offset = 0; + + /* read number, increment it and write it back to disk */ + SVN_ERR(svn_io_file_open(&file, + svn_fs_fs__path_txn_item_index(fs, txn_id, pool), + APR_READ | APR_WRITE | APR_CREATE | APR_BUFFERED, + APR_OS_DEFAULT, pool)); + SVN_ERR(svn_io_file_read_full2(file, buffer, sizeof(buffer)-1, + &read, &eof, pool)); + if (read) + SVN_ERR(svn_cstring_atoui64(item_index, buffer)); + else + *item_index = SVN_FS_FS__ITEM_INDEX_FIRST_USER; + + to_write = svn__ui64toa(buffer, *item_index + 1); + SVN_ERR(svn_io_file_seek(file, SEEK_SET, &offset, pool)); + SVN_ERR(svn_io_file_write_full(file, buffer, to_write, NULL, pool)); + SVN_ERR(svn_io_file_close(file, pool)); + + /* write log-to-phys index */ + SVN_ERR(store_l2p_index_entry(fs, txn_id, final_revision, + my_offset, *item_index, pool)); + } + else + { + *item_index = (apr_uint64_t)my_offset; + } + + return SVN_NO_ERROR; +} + +/* Baton used by fnv1a_write_handler to calculate the FNV checksum + * before passing the data on to the INNER_STREAM. + */ +typedef struct fnv1a_stream_baton_t +{ + svn_stream_t *inner_stream; + svn_checksum_ctx_t *context; +} fnv1a_stream_baton_t; + +/* Implement svn_write_fn_t. + * Update checksum and pass data on to inner stream. + */ +static svn_error_t * +fnv1a_write_handler(void *baton, + const char *data, + apr_size_t *len) +{ + fnv1a_stream_baton_t *b = baton; + + SVN_ERR(svn_checksum_update(b->context, data, *len)); + SVN_ERR(svn_stream_write(b->inner_stream, data, len)); + + return SVN_NO_ERROR; +} + +/* Return a stream that calculates a FNV checksum in *CONTEXT + * over all data written to the stream and passes that data on + * to INNER_STREAM. Allocate objects in POOL. + */ +static svn_stream_t * +fnv1a_wrap_stream(svn_checksum_ctx_t **context, + svn_stream_t *inner_stream, + apr_pool_t *pool) +{ + svn_stream_t *outer_stream; + + fnv1a_stream_baton_t *baton = apr_pcalloc(pool, sizeof(*baton)); + baton->inner_stream = inner_stream; + baton->context = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, pool); + *context = baton->context; + + outer_stream = svn_stream_create(baton, pool); + svn_stream_set_write(outer_stream, fnv1a_write_handler); + + return outer_stream; +} + +/* Set *DIGEST to the FNV checksum calculated in CONTEXT. + * Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +fnv1a_checksum_finalize(apr_uint32_t *digest, + svn_checksum_ctx_t *context, + apr_pool_t *scratch_pool) +{ + svn_checksum_t *checksum; + + SVN_ERR(svn_checksum_final(&checksum, context, scratch_pool)); + SVN_ERR_ASSERT(checksum->kind == svn_checksum_fnv1a_32x4); + *digest = ntohl(*(const apr_uint32_t *)(checksum->digest)); + + return SVN_NO_ERROR; +} + /* This baton is used by the representation writing streams. It keeps track of the checksum information as well as the total size of the representation so far. */ @@ -1583,6 +1813,9 @@ svn_checksum_ctx_t *md5_checksum_ctx; svn_checksum_ctx_t *sha1_checksum_ctx; + /* calculate a modified FNV-1a checksum of the on-disk representation */ + svn_checksum_ctx_t *fnv1a_checksum_ctx; + /* Local / scratch pool, available for temporary allocations. */ apr_pool_t *scratch_pool; @@ -1840,7 +2073,10 @@ b->scratch_pool)); b->file = file; - b->rep_stream = svn_stream_from_aprfile2(file, TRUE, b->scratch_pool); + b->rep_stream = fnv1a_wrap_stream(&b->fnv1a_checksum_ctx, + svn_stream_from_aprfile2(file, TRUE, + b->scratch_pool), + b->scratch_pool); SVN_ERR(svn_fs_fs__get_file_offset(&b->rep_offset, file, b->scratch_pool)); @@ -2062,7 +2298,9 @@ { /* Write out our cosmetic end marker. */ SVN_ERR(svn_stream_puts(b->rep_stream, "ENDREP\n")); - rep->item_index = b->rep_offset; + SVN_ERR(allocate_item_index(&rep->item_index, b->fs, &rep->txn_id, + SVN_INVALID_REVNUM, b->rep_offset, + b->scratch_pool)); b->noderev->data_rep = rep; } @@ -2075,7 +2313,21 @@ FALSE, b->scratch_pool)); if (!old_rep) { + svn_fs_fs__p2l_entry_t entry; + + entry.offset = b->rep_offset; + SVN_ERR(svn_fs_fs__get_file_offset(&offset, b->file, b->scratch_pool)); + entry.size = offset - b->rep_offset; + entry.type = SVN_FS_FS__ITEM_TYPE_FILE_REP; + entry.item.revision = SVN_INVALID_REVNUM; + entry.item.number = rep->item_index; + SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum, + b->fnv1a_checksum_ctx, + b->scratch_pool)); + SVN_ERR(store_sha1_rep_mapping(b->fs, b->noderev, b->scratch_pool)); + SVN_ERR(store_p2l_index_entry(b->fs, &rep->txn_id, SVN_INVALID_REVNUM, + &entry, b->scratch_pool)); } SVN_ERR(svn_io_file_close(b->file, b->scratch_pool)); @@ -2273,6 +2525,7 @@ { svn_stream_t *stream; struct write_container_baton *whb; + svn_checksum_ctx_t *fnv1a_checksum_ctx; representation_t *old_rep; apr_off_t offset = 0; @@ -2280,7 +2533,10 @@ whb = apr_pcalloc(scratch_pool, sizeof(*whb)); - whb->stream = svn_stream_from_aprfile2(file, TRUE, scratch_pool); + whb->stream = fnv1a_wrap_stream(&fnv1a_checksum_ctx, + svn_stream_from_aprfile2(file, TRUE, + scratch_pool), + scratch_pool); whb->size = 0; whb->md5_ctx = svn_checksum_ctx_create(svn_checksum_md5, scratch_pool); whb->sha1_ctx = svn_checksum_ctx_create(svn_checksum_sha1, scratch_pool); @@ -2310,11 +2566,28 @@ } else { + svn_fs_fs__p2l_entry_t entry; + /* Write out our cosmetic end marker. */ SVN_ERR(svn_stream_puts(whb->stream, "ENDREP\n")); + SVN_ERR(allocate_item_index(&rep->item_index, fs, &rep->txn_id, + final_revision, offset, scratch_pool)); + + entry.offset = offset; + SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, scratch_pool)); + entry.size = offset - entry.offset; + entry.type = item_type; + entry.item.revision = SVN_INVALID_REVNUM; + entry.item.number = rep->item_index; + SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum, + fnv1a_checksum_ctx, + scratch_pool)); + + SVN_ERR(store_p2l_index_entry(fs, &rep->txn_id, final_revision, + &entry, scratch_pool)); + /* update the representation */ - rep->item_index = offset; rep->size = whb->size; rep->expanded_size = whb->size; } @@ -2356,6 +2629,7 @@ svn_stream_t *stream; representation_t *base_rep; representation_t *old_rep; + svn_checksum_ctx_t *fnv1a_checksum_ctx; svn_stream_t *source; svn_fs_fs__rep_header_t header = { 0 }; @@ -2388,7 +2662,10 @@ header.type = svn_fs_fs__rep_self_delta; } - file_stream = svn_stream_from_aprfile2(file, TRUE, scratch_pool); + file_stream = fnv1a_wrap_stream(&fnv1a_checksum_ctx, + svn_stream_from_aprfile2(file, TRUE, + scratch_pool), + scratch_pool); SVN_ERR(svn_fs_fs__write_rep_header(&header, file_stream, scratch_pool)); SVN_ERR(svn_fs_fs__get_file_offset(&delta_start, file, scratch_pool)); @@ -2432,14 +2709,31 @@ } else { + svn_fs_fs__p2l_entry_t entry; + /* Write out our cosmetic end marker. */ SVN_ERR(svn_fs_fs__get_file_offset(&rep_end, file, scratch_pool)); SVN_ERR(svn_stream_puts(file_stream, "ENDREP\n")); + SVN_ERR(allocate_item_index(&rep->item_index, fs, &rep->txn_id, + final_revision, offset, scratch_pool)); + + entry.offset = offset; + SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, scratch_pool)); + entry.size = offset - entry.offset; + entry.type = item_type; + entry.item.revision = SVN_INVALID_REVNUM; + entry.item.number = rep->item_index; + SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum, + fnv1a_checksum_ctx, + scratch_pool)); + + SVN_ERR(store_p2l_index_entry(fs, &rep->txn_id, final_revision, + &entry, scratch_pool)); + /* update the representation */ rep->expanded_size = whb->size; rep->size = rep_end - delta_start; - rep->item_index = offset; } return SVN_NO_ERROR; @@ -2575,7 +2869,9 @@ const svn_fs_id_t *new_id; svn_fs_fs__id_part_t node_id, copy_id, rev_item; fs_fs_data_t *ffd = fs->fsap_data; + const svn_fs_fs__id_part_t *txn_id = svn_fs_fs__id_txn_id(id); svn_stream_t *file_stream; + svn_checksum_ctx_t *fnv1a_checksum_ctx; apr_pool_t *subpool; *new_id_p = NULL; @@ -2641,13 +2937,16 @@ reset_txn_in_rep(noderev->data_rep); noderev->data_rep->revision = rev; - /* See issue 3845. Some unknown mechanism caused the - protorev file to get truncated, so check for that - here. */ - if (noderev->data_rep->item_index + noderev->data_rep->size - > initial_offset) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Truncated protorev file detected")); + if (!svn_fs_fs__use_log_addressing(fs, rev)) + { + /* See issue 3845. Some unknown mechanism caused the + protorev file to get truncated, so check for that + here. */ + if (noderev->data_rep->item_index + noderev->data_rep->size + > initial_offset) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Truncated protorev file detected")); + } } } @@ -2687,8 +2986,17 @@ /* root nodes have a fixed ID in log addressing mode */ SVN_ERR(svn_fs_fs__get_file_offset(&my_offset, file, pool)); + if (svn_fs_fs__use_log_addressing(fs, rev) && at_root) + { + /* reference the root noderev from the log-to-phys index */ + rev_item.number = SVN_FS_FS__ITEM_INDEX_ROOT_NODE; + SVN_ERR(store_l2p_index_entry(fs, txn_id, rev, my_offset, + rev_item.number, pool)); + } + else + SVN_ERR(allocate_item_index(&rev_item.number, fs, txn_id, rev, + my_offset, pool)); - rev_item.number = my_offset; rev_item.revision = rev; new_id = svn_fs_fs__id_rev_create(&node_id, ©_id, &rev_item, pool); @@ -2736,11 +3044,31 @@ if (at_root) SVN_ERR(validate_root_noderev(fs, noderev, rev, pool)); - file_stream = svn_stream_from_aprfile2(file, TRUE, pool); + file_stream = fnv1a_wrap_stream(&fnv1a_checksum_ctx, + svn_stream_from_aprfile2(file, TRUE, pool), + pool); SVN_ERR(svn_fs_fs__write_noderev(file_stream, noderev, ffd->format, svn_fs_fs__fs_supports_mergeinfo(fs), pool)); + /* reference the root noderev from the log-to-phys index */ + if (svn_fs_fs__use_log_addressing(fs, rev)) + { + svn_fs_fs__p2l_entry_t entry; + rev_item.revision = SVN_INVALID_REVNUM; + + entry.offset = my_offset; + SVN_ERR(svn_fs_fs__get_file_offset(&my_offset, file, pool)); + entry.size = my_offset - entry.offset; + entry.type = SVN_FS_FS__ITEM_TYPE_NODEREV; + entry.item = rev_item; + SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum, + fnv1a_checksum_ctx, + pool)); + + SVN_ERR(store_p2l_index_entry(fs, txn_id, rev, &entry, pool)); + } + /* Return our ID that references the revision file. */ *new_id_p = noderev->id; @@ -2763,15 +3091,38 @@ { apr_off_t offset; svn_stream_t *stream; + svn_checksum_ctx_t *fnv1a_checksum_ctx; SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, pool)); - /* write to target file */ - stream = svn_stream_from_aprfile2(file, TRUE, pool); + /* write to target file & calculate checksum */ + stream = fnv1a_wrap_stream(&fnv1a_checksum_ctx, + svn_stream_from_aprfile2(file, TRUE, pool), + pool); SVN_ERR(svn_fs_fs__write_changes(stream, fs, changed_paths, TRUE, pool)); *offset_p = offset; + /* reference changes from the indexes */ + if (svn_fs_fs__use_log_addressing(fs, new_rev)) + { + svn_fs_fs__p2l_entry_t entry; + + entry.offset = offset; + SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, pool)); + entry.size = offset - entry.offset; + entry.type = SVN_FS_FS__ITEM_TYPE_CHANGES; + entry.item.revision = SVN_INVALID_REVNUM; + entry.item.number = SVN_FS_FS__ITEM_INDEX_CHANGES; + SVN_ERR(fnv1a_checksum_finalize(&entry.fnv1_checksum, + fnv1a_checksum_ctx, + pool)); + + SVN_ERR(store_p2l_index_entry(fs, txn_id, new_rev, &entry, pool)); + SVN_ERR(store_l2p_index_entry(fs, txn_id, new_rev, entry.offset, + SVN_FS_FS__ITEM_INDEX_CHANGES, pool)); + } + return SVN_NO_ERROR; } @@ -2925,6 +3276,38 @@ return SVN_NO_ERROR; } +/* Return TRUE, if transaction TXN_ID in FS definitively uses logical + * addressing mode. Use POOL for temporary allocations. + */ +static svn_boolean_t +using_log_addressing(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + apr_pool_t *pool) +{ + /* As long as we don't write new data representations, we won't allocate + IDs and there is no difference between log & phys mode. + + After the first ID got allocated, it is logical mode and the proto- + index file does exist. + */ + svn_node_kind_t kind; + const char *path = svn_fs_fs__path_l2p_proto_index(fs, txn_id, pool); + + svn_error_t *err = svn_io_check_path(path, &kind, pool); + if (err) + { + /* We couldn't check for the presence of the index file. + + So, we probably won't be able to access it during later stages + of the commit. + */ + svn_error_clear(err); + return FALSE; + } + + return kind == svn_node_file; +} + /* Return TRUE, if the file with FILENAME contains a node revision. */ static svn_boolean_t @@ -2943,6 +3326,192 @@ return dot_count == 2; } +/* Determine the checksum for the SIZE bytes of data starting at START + * in FILE and return the result in *FNV1_CHECKSUM. + * Use POOL for tempoary allocations. + */ +static svn_error_t * +fnv1a_checksum_on_file_range(apr_uint32_t *fnv1_checksum, + apr_file_t *file, + apr_off_t start, + apr_off_t size, + apr_pool_t *pool) +{ + char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE); + + svn_checksum_ctx_t *checksum_ctx + = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, pool); + + SVN_ERR(svn_io_file_seek(file, APR_SET, &start, pool)); + while (size > 0) + { + apr_size_t to_read = MIN(size, sizeof(buffer)); + + SVN_ERR(svn_io_file_read_full2(file, buffer, to_read, &to_read, + NULL, pool)); + SVN_ERR(svn_checksum_update(checksum_ctx, buffer, to_read)); + size -= to_read; + } + SVN_ERR(fnv1a_checksum_finalize(fnv1_checksum, checksum_ctx, pool)); + + return SVN_NO_ERROR; +} + +/* qsort()-compatible comparison function sorting svn_fs_fs__p2l_entry_t + * by offset. + */ +static int +compare_sort_p2l_entry(const void *a, + const void *b) +{ + apr_off_t lhs = ((const svn_fs_fs__p2l_entry_t *)a)->offset; + apr_off_t rhs = ((const svn_fs_fs__p2l_entry_t *)b)->offset; + + return lhs < rhs ? -1 : rhs == lhs ? 0 : 1; +} + + +/* Upgrade the transaction TXN_ID in FS from physical addressing mode + * to logical addressing mode. FINAL_REVISION is the revision that this + * txn is being committed to. Use POOL for temporary allocations. + */ +static svn_error_t * +upgrade_transaction(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + svn_revnum_t final_revision, + apr_file_t *proto_file, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_hash_t *dirents; + int i; + apr_hash_index_t* hi; + + /* we allocate large temporary data and want to release it asap */ + + apr_pool_t *subpool = svn_pool_create(pool); + apr_pool_t *iterpool = svn_pool_create(pool); + + apr_hash_t *id_map = apr_hash_make(subpool); + const char *txn_dir = svn_fs_fs__path_txn_dir(fs, txn_id, subpool); + apr_array_header_t *p2l_entries + = apr_array_make(subpool, 16, sizeof(svn_fs_fs__p2l_entry_t)); + + /* scan the txn directory for noderev files and patch them up */ + + SVN_ERR(svn_io_get_dirents3(&dirents, txn_dir, TRUE, subpool, iterpool)); + for (hi = apr_hash_first(subpool, dirents); hi; hi = apr_hash_next(hi)) + { + apr_file_t *file; + const char *filename; + node_revision_t *noderev; + svn_stream_t *stream; + const char *name; + apr_uint64_t *old_index, *item_index, *new_index; + + svn_pool_clear(iterpool); + + /* We are only interested in file data reps of this txns. + Older IDs remain valid because they are already committed. + Other IDs (noderevs and their usage in directories) will only be + assigned later anyways. */ + + name = apr_hash_this_key(hi); + if (!is_noderev_file(name)) + continue; + + filename = svn_dirent_join(txn_dir, name, iterpool); + SVN_ERR(svn_io_file_open(&file, filename, + APR_READ | APR_WRITE | APR_BUFFERED, + APR_OS_DEFAULT, + iterpool)); + stream = svn_stream_from_aprfile2(file, TRUE, iterpool); + SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, iterpool, iterpool)); + if ( noderev->data_rep == NULL + || noderev->data_rep->revision != SVN_INVALID_REVNUM + || noderev->kind != svn_node_file) + continue; + + /* We need to assign an id. + We might already have one because of rep sharing. */ + + item_index = &noderev->data_rep->item_index; + new_index = apr_hash_get(id_map, item_index, sizeof(*item_index)); + + if (new_index) + { + *item_index = *new_index; + } + else + { + svn_fs_fs__rep_header_t *header; + svn_fs_fs__p2l_entry_t *entry; + + /* assign a logical ID and write the L2P proto-index */ + + old_index = apr_pmemdup(subpool, item_index, sizeof(*item_index)); + SVN_ERR(allocate_item_index(item_index, fs, txn_id, final_revision, + *old_index, iterpool)); + + new_index = apr_pmemdup(subpool, item_index, sizeof(*item_index)); + apr_hash_set(id_map, old_index, sizeof(*old_index), new_index); + + /* we need to know the length of the representation header + because it is not accounted for by the representation length */ + + entry = apr_array_push(p2l_entries); + entry->offset = *old_index; + SVN_ERR(svn_io_file_seek(proto_file, APR_SET, &entry->offset, + iterpool)); + SVN_ERR(svn_fs_fs__read_rep_header(&header, + svn_stream_from_aprfile2(proto_file, TRUE, iterpool), + iterpool, iterpool)); + + /* Create the corresponding entry for the P2L proto-index. + + We need to write that proto-index in strict offset order but + we have no control over the order in which we traverse the + data reps. Thus, we collect the entries in an array. */ + + entry->size = noderev->data_rep->size + header->header_size + 7; + /* 7 for the "ENDREP\n" */ + entry->type = SVN_FS_FS__ITEM_TYPE_FILE_REP; + entry->item.revision = SVN_INVALID_REVNUM; + entry->item.number = *new_index; + SVN_ERR(fnv1a_checksum_on_file_range(&entry->fnv1_checksum, + proto_file, + entry->offset, entry->size, + iterpool)); + } + + /* write the updated noderev to disk */ + + SVN_ERR(svn_io_file_trunc(file, 0, iterpool)); + SVN_ERR(svn_fs_fs__write_noderev(stream, noderev, ffd->format, + TRUE, iterpool)); + } + + /* Finally, write all P2L proto-index entries ordered by item offset. */ + + qsort(p2l_entries->elts, p2l_entries->nelts, p2l_entries->elt_size, + compare_sort_p2l_entry); + for (i = 0; i < p2l_entries->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *entry; + + svn_pool_clear(iterpool); + + entry = &APR_ARRAY_IDX(p2l_entries, i, svn_fs_fs__p2l_entry_t); + SVN_ERR(store_p2l_index_entry(fs, txn_id, final_revision, + entry, iterpool)); + } + + svn_pool_clear(iterpool); + svn_pool_clear(subpool); + + return SVN_NO_ERROR; +} + /* Return in *PATH the path to a file containing the properties that make up the final revision properties file. This involves setting svn:date and removing any temporary properties associated with the @@ -3003,6 +3572,42 @@ return SVN_NO_ERROR; } +svn_error_t * +svn_fs_fs__add_index_data(svn_fs_t *fs, + apr_file_t *file, + const char *l2p_proto_index, + const char *p2l_proto_index, + svn_revnum_t revision, + apr_pool_t *pool) +{ + apr_off_t l2p_offset; + apr_off_t p2l_offset; + svn_stringbuf_t *footer; + unsigned char footer_length; + + /* Append the actual index data to the pack file. */ + l2p_offset = 0; + SVN_ERR(svn_io_file_seek(file, APR_END, &l2p_offset, pool)); + SVN_ERR(svn_fs_fs__l2p_index_append(fs, file, l2p_proto_index, revision, + pool)); + + p2l_offset = 0; + SVN_ERR(svn_io_file_seek(file, APR_END, &p2l_offset, pool)); + SVN_ERR(svn_fs_fs__p2l_index_append(fs, file, p2l_proto_index, revision, + pool)); + + /* Append footer. */ + footer = svn_fs_fs__unparse_footer(l2p_offset, p2l_offset, pool); + SVN_ERR(svn_io_file_write_full(file, footer->data, footer->len, NULL, + pool)); + + footer_length = footer->len; + SVN_ERR_ASSERT(footer_length == footer->len); + SVN_ERR(svn_io_file_write_full(file, &footer_length, 1, NULL, pool)); + + return SVN_NO_ERROR; +} + /* Baton used for commit_body below. */ struct commit_baton { svn_revnum_t *new_rev_p; @@ -3032,7 +3637,6 @@ apr_off_t initial_offset, changed_path_offset; const svn_fs_fs__id_part_t *txn_id = svn_fs_fs__txn_get_id(cb->txn); apr_hash_t *changed_paths; - svn_stringbuf_t *trailer; /* Read the current youngest revision and, possibly, the next available node id and copy id (for old format filesystems). Update the cached @@ -3066,6 +3670,18 @@ cb->fs, txn_id, pool)); SVN_ERR(svn_fs_fs__get_file_offset(&initial_offset, proto_file, pool)); + /* Make sure that we don't try to commit an old txn that used physical + addressing but will be committed into the revision range that requires + logical addressing. + */ + if (svn_fs_fs__use_log_addressing(cb->fs, new_rev) + && !svn_fs_fs__use_log_addressing(cb->fs, txn_id->revision) + && !using_log_addressing(cb->fs, txn_id, pool)) + { + SVN_ERR(upgrade_transaction(cb->fs, txn_id, new_rev, proto_file, pool)); + SVN_ERR(svn_io_file_seek(proto_file, APR_SET, &initial_offset, pool)); + } + /* Write out all the node-revisions and directory contents. */ root_id = svn_fs_fs__id_txn_create_root(txn_id, pool); SVN_ERR(write_final_rev(&new_root_id, proto_file, new_rev, cb->fs, root_id, @@ -3078,16 +3694,28 @@ cb->fs, txn_id, changed_paths, new_rev, pool)); - /* Write the final line. */ + if (!svn_fs_fs__use_log_addressing(cb->fs, new_rev)) + { + /* Write the final line. */ - trailer - = svn_fs_fs__unparse_revision_trailer - ((apr_off_t)svn_fs_fs__id_item(new_root_id), - changed_path_offset, - pool); - SVN_ERR(svn_io_file_write_full(proto_file, trailer->data, trailer->len, - NULL, pool)); + svn_stringbuf_t *trailer + = svn_fs_fs__unparse_revision_trailer + ((apr_off_t)svn_fs_fs__id_item(new_root_id), + changed_path_offset, + pool); + SVN_ERR(svn_io_file_write_full(proto_file, trailer->data, trailer->len, + NULL, pool)); + } + else + { + /* Append the index data to the rev file. */ + SVN_ERR(svn_fs_fs__add_index_data(cb->fs, proto_file, + svn_fs_fs__path_l2p_proto_index(cb->fs, txn_id, pool), + svn_fs_fs__path_p2l_proto_index(cb->fs, txn_id, pool), + new_rev, pool)); + } + SVN_ERR(svn_io_file_flush_to_disk(proto_file, pool)); SVN_ERR(svn_io_file_close(proto_file, pool)); Index: subversion/libsvn_fs_fs/util.c =================================================================== --- subversion/libsvn_fs_fs/util.c (revision 1623988) +++ subversion/libsvn_fs_fs/util.c (working copy) @@ -245,7 +245,34 @@ SVN_VA_NULL); } +const char* +svn_fs_fs__path_l2p_proto_index(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + apr_pool_t *pool) +{ + return svn_dirent_join(svn_fs_fs__path_txn_dir(fs, txn_id, pool), + PATH_INDEX PATH_EXT_L2P_INDEX, pool); +} + +const char* +svn_fs_fs__path_p2l_proto_index(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + apr_pool_t *pool) +{ + return svn_dirent_join(svn_fs_fs__path_txn_dir(fs, txn_id, pool), + PATH_INDEX PATH_EXT_P2L_INDEX, pool); +} + const char * +svn_fs_fs__path_txn_item_index(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + apr_pool_t *pool) +{ + return svn_dirent_join(svn_fs_fs__path_txn_dir(fs, txn_id, pool), + PATH_TXN_ITEM_INDEX, pool); +} + +const char * svn_fs_fs__path_txn_proto_rev(svn_fs_t *fs, const svn_fs_fs__id_part_t *txn_id, apr_pool_t *pool) @@ -644,3 +671,12 @@ return SVN_NO_ERROR; } + +svn_boolean_t +svn_fs_fs__use_log_addressing(svn_fs_t *fs, + svn_revnum_t rev) +{ + fs_fs_data_t *ffd = fs->fsap_data; + return ffd->min_log_addressing_rev != SVN_INVALID_REVNUM + && ffd->min_log_addressing_rev <= rev; +} Index: subversion/libsvn_fs_fs/util.h =================================================================== --- subversion/libsvn_fs_fs/util.h (revision 1623988) +++ subversion/libsvn_fs_fs/util.h (working copy) @@ -238,6 +238,33 @@ const svn_fs_id_t *id, apr_pool_t *pool); +/* Return the path of the file containing the log-to-phys index for + * the transaction identified by TXN_ID in FS. + * The result will be allocated in POOL. + */ +const char* +svn_fs_fs__path_l2p_proto_index(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + apr_pool_t *pool); + +/* Return the path of the file containing the phys-to-log index for + * the transaction identified by TXN_ID in FS. + * The result will be allocated in POOL. + */ +const char* +svn_fs_fs__path_p2l_proto_index(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + apr_pool_t *pool); + +/* Return the path of the file containing item_index counter for + * the transaction identified by TXN_ID in FS. + * The result will be allocated in POOL. + */ +const char * +svn_fs_fs__path_txn_item_index(svn_fs_t *fs, + const svn_fs_fs__id_part_t *txn_id, + apr_pool_t *pool); + /* Return the path of the file containing the node origins cachs for * the given NODE_ID in FS. The result will be allocated in POOL. */ @@ -360,4 +387,9 @@ const char *perms_reference, apr_pool_t *pool); +/* Return TRUE, iff revision REV in FS requires logical addressing. */ +svn_boolean_t +svn_fs_fs__use_log_addressing(svn_fs_t *fs, + svn_revnum_t rev); + #endif Index: subversion/libsvn_fs_fs/verify.c =================================================================== --- subversion/libsvn_fs_fs/verify.c (revision 1623988) +++ subversion/libsvn_fs_fs/verify.c (working copy) @@ -30,6 +30,7 @@ #include "cached_data.h" #include "rep-cache.h" #include "util.h" +#include "index.h" #include "../libsvn_fs/fs-loader.h" @@ -158,6 +159,438 @@ return SVN_NO_ERROR; } +/* Verify that for all log-to-phys index entries for revisions START to + * START + COUNT-1 in FS there is a consistent entry in the phys-to-log + * index. If given, invoke CANCEL_FUNC with CANCEL_BATON at regular + * intervals. Use POOL for allocations. + */ +static svn_error_t * +compare_l2p_to_p2l_index(svn_fs_t *fs, + svn_revnum_t start, + svn_revnum_t count, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + svn_revnum_t i; + apr_pool_t *iterpool = svn_pool_create(pool); + apr_array_header_t *max_ids; + + /* common file access structure */ + svn_fs_fs__revision_file_t *rev_file; + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool, + iterpool)); + + /* determine the range of items to check for each revision */ + SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, start, count, pool)); + + /* check all items in all revisions if the given range */ + for (i = 0; i < max_ids->nelts; ++i) + { + apr_uint64_t k; + apr_uint64_t max_id = APR_ARRAY_IDX(max_ids, i, apr_uint64_t); + svn_revnum_t revision = start + i; + + for (k = 0; k < max_id; ++k) + { + apr_off_t offset; + svn_fs_fs__p2l_entry_t *p2l_entry; + svn_pool_clear(iterpool); + + /* get L2P entry. Ignore unused entries. */ + SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, + NULL, k, iterpool)); + if (offset == -1) + continue; + + /* find the corresponding P2L entry */ + SVN_ERR(svn_fs_fs__p2l_entry_lookup(&p2l_entry, fs, rev_file, + revision, offset, iterpool)); + + if (p2l_entry == NULL) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT, + NULL, + _("p2l index entry not found for " + "PHYS %s returned by " + "l2p index for LOG r%ld:i%ld"), + apr_off_t_toa(pool, offset), + revision, (long)k); + + if ( p2l_entry->item.number != k + || p2l_entry->item.revision != revision) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT, + NULL, + _("p2l index info LOG r%ld:i%ld" + " does not match " + "l2p index for LOG r%ld:i%ld"), + p2l_entry->item.revision, + (long)p2l_entry->item.number, + revision, (long)k); + } + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + } + + svn_pool_destroy(iterpool); + + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + + return SVN_NO_ERROR; +} + +/* Verify that for all phys-to-log index entries for revisions START to + * START + COUNT-1 in FS there is a consistent entry in the log-to-phys + * index. If given, invoke CANCEL_FUNC with CANCEL_BATON at regular + * intervals. Use POOL for allocations. + * + * Please note that we can only check on pack / rev file granularity and + * must only be called for a single rev / pack file. + */ +static svn_error_t * +compare_p2l_to_l2p_index(svn_fs_t *fs, + svn_revnum_t start, + svn_revnum_t count, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_pool_t *iterpool = svn_pool_create(pool); + apr_off_t max_offset; + apr_off_t offset = 0; + + /* common file access structure */ + svn_fs_fs__revision_file_t *rev_file; + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool, + iterpool)); + + /* get the size of the rev / pack file as covered by the P2L index */ + SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, rev_file, start, + pool)); + + /* for all offsets in the file, get the P2L index entries and check + them against the L2P index */ + for (offset = 0; offset < max_offset; ) + { + apr_array_header_t *entries; + svn_fs_fs__p2l_entry_t *last_entry; + int i; + + svn_pool_clear(iterpool); + + /* get all entries for the current block */ + SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, rev_file, start, + offset, ffd->p2l_page_size, + iterpool)); + if (entries->nelts == 0) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_CORRUPTION, + NULL, + _("p2l does not cover offset %s" + " for revision %ld"), + apr_off_t_toa(pool, offset), start); + + /* process all entries (and later continue with the next block) */ + last_entry + = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t); + offset = last_entry->offset + last_entry->size; + + for (i = 0; i < entries->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *entry + = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); + + /* check all sub-items for consist entries in the L2P index */ + if (entry->type != SVN_FS_FS__ITEM_TYPE_UNUSED) + { + apr_off_t l2p_offset; + SVN_ERR(svn_fs_fs__item_offset(&l2p_offset, fs, rev_file, + entry->item.revision, NULL, + entry->item.number, iterpool)); + + if (l2p_offset != entry->offset) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT, + NULL, + _("l2p index entry PHYS %s" + "does not match p2l index value " + "LOG r%ld:i%ld for PHYS %s"), + apr_off_t_toa(pool, l2p_offset), + entry->item.revision, + (long)entry->item.number, + apr_off_t_toa(pool, entry->offset)); + } + } + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + } + + svn_pool_destroy(iterpool); + + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + + return SVN_NO_ERROR; +} + +/* Items smaller than this can be read at once into a buffer and directly + * be checksummed. Larger items require stream processing. + * Must be a multiple of 8. */ +#define STREAM_THRESHOLD 4096 + +/* Verify that the next SIZE bytes read from FILE are NUL. + * SIZE must not exceed STREAM_THRESHOLD. Use POOL for allocations. + */ +static svn_error_t * +expect_buffer_nul(apr_file_t *file, + apr_off_t size, + apr_pool_t *pool) +{ + union + { + unsigned char buffer[STREAM_THRESHOLD]; + apr_uint64_t chunks[STREAM_THRESHOLD / sizeof(apr_uint64_t)]; + } data; + + apr_size_t i; + SVN_ERR_ASSERT(size <= STREAM_THRESHOLD); + + /* read the whole data block; error out on failure */ + data.chunks[(size - 1)/ sizeof(apr_uint64_t)] = 0; + SVN_ERR(svn_io_file_read_full2(file, data.buffer, size, NULL, NULL, pool)); + + /* chunky check */ + for (i = 0; i < size / sizeof(apr_uint64_t); ++i) + if (data.chunks[i] != 0) + break; + + /* byte-wise check upon mismatch or at the end of the block */ + for (i *= sizeof(apr_uint64_t); i < size; ++i) + if (data.buffer[i] != 0) + { + const char *file_name; + apr_off_t offset; + + SVN_ERR(svn_io_file_name_get(&file_name, file, pool)); + SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, pool)); + offset -= size - i; + + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Empty section in file %s contains " + "non-NUL data at offset %s"), + file_name, apr_off_t_toa(pool, offset)); + } + + return SVN_NO_ERROR; +} + +/* Verify that the next SIZE bytes read from FILE are NUL. + * Use POOL for allocations. + */ +static svn_error_t * +read_all_nul(apr_file_t *file, + apr_off_t size, + apr_pool_t *pool) +{ + for (; size >= STREAM_THRESHOLD; size -= STREAM_THRESHOLD) + SVN_ERR(expect_buffer_nul(file, STREAM_THRESHOLD, pool)); + + if (size) + SVN_ERR(expect_buffer_nul(file, size, pool)); + + return SVN_NO_ERROR; +} + +/* Compare the ACTUAL checksum with the one expected by ENTRY. + * Return an error in case of mismatch. Use the name of FILE + * in error message. Allocate data in POOL. + */ +static svn_error_t * +expected_checksum(apr_file_t *file, + svn_fs_fs__p2l_entry_t *entry, + apr_uint32_t actual, + apr_pool_t *pool) +{ + if (actual != entry->fnv1_checksum) + { + const char *file_name; + + SVN_ERR(svn_io_file_name_get(&file_name, file, pool)); + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Checksum mismatch item at offset %s of " + "length %s bytes in file %s"), + apr_off_t_toa(pool, entry->offset), + apr_off_t_toa(pool, entry->size), file_name); + } + + return SVN_NO_ERROR; +} + +/* Verify that the FNV checksum over the next ENTRY->SIZE bytes read + * from FILE will match ENTRY's expected checksum. SIZE must not + * exceed STREAM_THRESHOLD. Use POOL for allocations. + */ +static svn_error_t * +expected_buffered_checksum(apr_file_t *file, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool) +{ + unsigned char buffer[STREAM_THRESHOLD]; + SVN_ERR_ASSERT(entry->size <= STREAM_THRESHOLD); + + SVN_ERR(svn_io_file_read_full2(file, buffer, (apr_size_t)entry->size, + NULL, NULL, pool)); + SVN_ERR(expected_checksum(file, entry, + svn__fnv1a_32x4(buffer, (apr_size_t)entry->size), + pool)); + + return SVN_NO_ERROR; +} + +/* Verify that the FNV checksum over the next ENTRY->SIZE bytes read from + * FILE will match ENTRY's expected checksum. Use POOL for allocations. + */ +static svn_error_t * +expected_streamed_checksum(apr_file_t *file, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool) +{ + unsigned char buffer[STREAM_THRESHOLD]; + svn_checksum_t *checksum; + svn_checksum_ctx_t *context + = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, pool); + apr_off_t size = entry->size; + + while (size > 0) + { + apr_size_t to_read = size > sizeof(buffer) + ? sizeof(buffer) + : (apr_size_t)size; + SVN_ERR(svn_io_file_read_full2(file, buffer, to_read, NULL, NULL, + pool)); + SVN_ERR(svn_checksum_update(context, buffer, to_read)); + size -= to_read; + } + + SVN_ERR(svn_checksum_final(&checksum, context, pool)); + SVN_ERR(expected_checksum(file, entry, + ntohl(*(const apr_uint32_t *)checksum->digest), + pool)); + + return SVN_NO_ERROR; +} + +/* Verify that for all phys-to-log index entries for revisions START to + * START + COUNT-1 in FS match the actual pack / rev file contents. + * If given, invoke CANCEL_FUNC with CANCEL_BATON at regular intervals. + * Use POOL for allocations. + * + * Please note that we can only check on pack / rev file granularity and + * must only be called for a single rev / pack file. + */ +static svn_error_t * +compare_p2l_to_rev(svn_fs_t *fs, + svn_revnum_t start, + svn_revnum_t count, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_pool_t *iterpool = svn_pool_create(pool); + apr_off_t max_offset; + apr_off_t offset = 0; + svn_fs_fs__revision_file_t *rev_file; + + /* open the pack / rev file that is covered by the p2l index */ + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool, + iterpool)); + + /* check file size vs. range covered by index */ + SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); + SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, rev_file, start, + pool)); + + if (rev_file->l2p_offset != max_offset) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT, NULL, + _("File size of %s for revision r%ld does " + "not match p2l index size of %s"), + apr_off_t_toa(pool, rev_file->l2p_offset), start, + apr_off_t_toa(pool, max_offset)); + + SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, NULL, 0, + pool)); + + /* for all offsets in the file, get the P2L index entries and check + them against the L2P index */ + for (offset = 0; offset < max_offset; ) + { + apr_array_header_t *entries; + int i; + + svn_pool_clear(iterpool); + + /* get all entries for the current block */ + SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, rev_file, start, + offset, ffd->p2l_page_size, + iterpool)); + + /* process all entries (and later continue with the next block) */ + for (i = 0; i < entries->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *entry + = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); + + /* skip bits we previously checked */ + if (i == 0 && entry->offset < offset) + continue; + + /* skip zero-sized entries */ + if (entry->size == 0) + continue; + + /* p2l index must cover all rev / pack file offsets exactly once */ + if (entry->offset != offset) + return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT, + NULL, + _("p2l index entry for revision r%ld" + " is non-contiguous between offsets " + " %s and %s"), + start, + apr_off_t_toa(pool, offset), + apr_off_t_toa(pool, entry->offset)); + + /* empty sections must contain NUL bytes only */ + if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) + { + /* skip filler entry at the end of the p2l index */ + if (entry->offset != max_offset) + SVN_ERR(read_all_nul(rev_file->file, entry->size, pool)); + } + else if (entry->fnv1_checksum) + { + if (entry->size < STREAM_THRESHOLD) + SVN_ERR(expected_buffered_checksum(rev_file->file, entry, + pool)); + else + SVN_ERR(expected_streamed_checksum(rev_file->file, entry, + pool)); + } + + /* advance offset */ + offset += entry->size; + } + + if (cancel_func) + SVN_ERR(cancel_func(cancel_baton)); + } + + svn_pool_destroy(iterpool); + + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + + return SVN_NO_ERROR; +} + static svn_revnum_t pack_size(svn_fs_t *fs, svn_revnum_t rev) { @@ -166,6 +599,79 @@ return rev < ffd->min_unpacked_rev ? ffd->max_files_per_dir : 1; } +/* Verify that the log-to-phys indexes and phys-to-log indexes are + * consistent with each other. The function signature is similar to + * svn_fs_fs__verify. + * + * The values of START and END have already been auto-selected and + * verified. You may call this for format7 or higher repos. + */ +static svn_error_t * +verify_index_consistency(svn_fs_t *fs, + svn_revnum_t start, + svn_revnum_t end, + svn_fs_progress_notify_func_t notify_func, + void *notify_baton, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + svn_revnum_t revision, next_revision; + apr_pool_t *iterpool = svn_pool_create(pool); + + for (revision = start; revision <= end; revision = next_revision) + { + svn_error_t *err = SVN_NO_ERROR; + + svn_revnum_t count = pack_size(fs, revision); + svn_revnum_t pack_start = svn_fs_fs__packed_base_rev(fs, revision); + svn_revnum_t pack_end = pack_start + count; + + svn_pool_clear(iterpool); + + if (notify_func && (pack_start % ffd->max_files_per_dir == 0)) + notify_func(pack_start, notify_baton, iterpool); + + /* two-way index check */ + err = compare_l2p_to_p2l_index(fs, pack_start, pack_end - pack_start, + cancel_func, cancel_baton, iterpool); + if (!err) + err = compare_p2l_to_l2p_index(fs, pack_start, pack_end - pack_start, + cancel_func, cancel_baton, iterpool); + + /* verify in-index checksums and types vs. actual rev / pack files */ + if (!err) + err = compare_p2l_to_rev(fs, pack_start, pack_end - pack_start, + cancel_func, cancel_baton, iterpool); + + /* concurrent packing is one of the reasons why verification may fail. + Make sure, we operate on up-to-date information. */ + if (err) + SVN_ERR(svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, + fs, pool)); + + /* retry the whole shard if it got packed in the meantime */ + if (err && count != pack_size(fs, revision)) + { + svn_error_clear(err); + + /* We could simply assign revision here but the code below is + more intuitive to maintainers. */ + next_revision = svn_fs_fs__packed_base_rev(fs, revision); + } + else + { + SVN_ERR(err); + next_revision = pack_end; + } + } + + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + svn_error_t * svn_fs_fs__verify(svn_fs_t *fs, svn_revnum_t start, @@ -187,6 +693,14 @@ SVN_ERR(svn_fs_fs__ensure_revision_exists(start, fs, pool)); SVN_ERR(svn_fs_fs__ensure_revision_exists(end, fs, pool)); + /* log/phys index consistency. We need to check them first to make + sure we can access the rev / pack files in format7. */ + if (svn_fs_fs__use_log_addressing(fs, end)) + SVN_ERR(verify_index_consistency(fs, + MAX(start, ffd->min_log_addressing_rev), + end, notify_func, notify_baton, + cancel_func, cancel_baton, pool)); + /* rep cache consistency */ if (ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT) SVN_ERR(verify_rep_cache(fs, start, end, notify_func, notify_baton, Index: subversion/libsvn_fs_x =================================================================== --- subversion/libsvn_fs_x (revision 1623988) +++ subversion/libsvn_fs_x (working copy) Property changes on: subversion/libsvn_fs_x ___________________________________________________________________ Modified: svn:mergeinfo Reverse-merged /subversion/trunk/subversion/libsvn_fs_x:r1603891-1620574 Index: subversion/mod_dav_svn/mod_dav_svn.c =================================================================== --- subversion/mod_dav_svn/mod_dav_svn.c (revision 1623988) +++ subversion/mod_dav_svn/mod_dav_svn.c (working copy) @@ -105,6 +105,7 @@ enum conf_flag txdelta_cache; /* whether to enable txdelta caching */ enum conf_flag fulltext_cache; /* whether to enable fulltext caching */ enum conf_flag revprop_cache; /* whether to enable revprop caching */ + enum conf_flag block_read; /* whether to enable block read mode */ const char *hooks_env; /* path to hook script env config file */ } dir_conf_t; @@ -248,6 +249,7 @@ newconf->txdelta_cache = INHERIT_VALUE(parent, child, txdelta_cache); newconf->fulltext_cache = INHERIT_VALUE(parent, child, fulltext_cache); newconf->revprop_cache = INHERIT_VALUE(parent, child, revprop_cache); + newconf->block_read = INHERIT_VALUE(parent, child, block_read); newconf->root_dir = INHERIT_VALUE(parent, child, root_dir); newconf->hooks_env = INHERIT_VALUE(parent, child, hooks_env); @@ -544,6 +546,19 @@ } static const char * +SVNBlockRead_cmd(cmd_parms *cmd, void *config, int arg) +{ + dir_conf_t *conf = config; + + if (arg) + conf->block_read = CONF_FLAG_ON; + else + conf->block_read = CONF_FLAG_OFF; + + return NULL; +} + +static const char * SVNInMemoryCacheSize_cmd(cmd_parms *cmd, void *config, const char *arg1) { svn_cache_config_t settings = *svn_cache_config_get(); @@ -948,6 +963,15 @@ } +svn_boolean_t +dav_svn__get_block_read_flag(request_rec *r) +{ + dir_conf_t *conf; + + conf = ap_get_module_config(r->per_dir_config, &dav_svn_module); + return conf->block_read == CONF_FLAG_ON; +} + int dav_svn__get_compression_level(request_rec *r) { @@ -1289,6 +1313,13 @@ "in the documentation" "(default is Off)."), + /* per directory/location */ + AP_INIT_FLAG("SVNBlockRead", SVNBlockRead_cmd, NULL, + ACCESS_CONF|RSRC_CONF, + "speeds up operations of FSFS 1.9+ repositories if large" + "caches (see SVNInMemoryCacheSize) have been configured." + "(default is Off)."), + /* per server */ AP_INIT_TAKE1("SVNInMemoryCacheSize", SVNInMemoryCacheSize_cmd, NULL, RSRC_CONF, Index: subversion/mod_dav_svn/repos.c =================================================================== --- subversion/mod_dav_svn/repos.c (revision 1623988) +++ subversion/mod_dav_svn/repos.c (working copy) @@ -2269,6 +2269,8 @@ dav_svn__get_fulltext_cache_flag(r) ? "1" :"0"); svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_REVPROPS, dav_svn__get_revprop_cache_flag(r) ? "2" :"0"); + svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_BLOCK_READ, + dav_svn__get_block_read_flag(r) ? "1" :"0"); /* Disallow BDB/event until issue 4157 is fixed. */ if (!strcmp(ap_show_mpm(), "event")) Index: subversion/svnadmin/svnadmin.c =================================================================== --- subversion/svnadmin/svnadmin.c (revision 1623988) +++ subversion/svnadmin/svnadmin.c (working copy) @@ -54,6 +54,11 @@ /*** Code. ***/ +/* FSFS format 7's "block-read" feature performs poorly with small caches. + * Enable it only if caches above this threshold have been configured. + * The current threshold is 64MB. */ +#define BLOCK_READ_CACHE_THRESHOLD (0x40 * 0x100000) + /* A flag to see if we've been cancelled by the client or not. */ static volatile sig_atomic_t cancelled = FALSE; @@ -113,6 +118,10 @@ const char *path, apr_pool_t *pool) { + /* Enable the "block-read" feature (where it applies)? */ + svn_boolean_t use_block_read + = svn_cache_config_get()->cache_size > BLOCK_READ_CACHE_THRESHOLD; + /* construct FS configuration parameters: enable caches for r/o data */ apr_hash_t *fs_config = apr_hash_make(pool); svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_DELTAS, "1"); @@ -120,6 +129,8 @@ svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_REVPROPS, "2"); svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_NS, svn_uuid_generate(pool)); + svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_BLOCK_READ, + use_block_read ? "1" : "0"); /* now, open the requested repository */ SVN_ERR(svn_repos_open3(repos, path, fs_config, pool, pool)); Index: subversion/svnfsfs/stats-cmd.c =================================================================== --- subversion/svnfsfs/stats-cmd.c (revision 1623988) +++ subversion/svnfsfs/stats-cmd.c (working copy) @@ -807,6 +807,8 @@ result->offset = (apr_size_t)offset; result->size = (apr_size_t)size; + if (!svn_fs_fs__use_log_addressing(fs->fs, revision)) + { SVN_ERR(read_rep_base(&result->delta_base, &header_size, &is_plain, fs, file_content, (apr_size_t)offset, @@ -814,6 +816,7 @@ result->header_size = header_size; result->is_plain = is_plain; + } svn_sort__array_insert(revision_info->representations, &result, idx); } @@ -1176,7 +1179,8 @@ /* if this is a directory and has not been processed, yet, read and * process it recursively */ - if (is_dir && text && text->ref_count == 1) + if ( is_dir && text && text->ref_count == 1 + && !svn_fs_fs__use_log_addressing(fs->fs, revision_info->revision)) SVN_ERR(parse_dir(fs, file_content, text, revision_info, pool, scratch_pool)); @@ -1349,6 +1353,163 @@ return SVN_NO_ERROR; } +/* Read the item described by ENTRY from the REV_FILE in FS and return + * the respective byte sequence in *CONTENTS allocated in POOL. + */ +static svn_error_t * +read_item(svn_stringbuf_t **contents, + fs_t *fs, + svn_fs_fs__revision_file_t *rev_file, + svn_fs_fs__p2l_entry_t *entry, + apr_pool_t *pool) +{ + svn_stringbuf_t *item = svn_stringbuf_create_ensure(entry->size, pool); + item->len = entry->size; + item->data[item->len] = 0; + + SVN_ERR(svn_io_file_aligned_seek(rev_file->file, REV_FILE_BLOCK_SIZE, NULL, + entry->offset, pool)); + SVN_ERR(svn_io_file_read_full2(rev_file->file, item->data, item->len, + NULL, NULL, pool)); + + *contents = item; + + return SVN_NO_ERROR; +} + +/* Process the logically addressed revision contents of revisions BASE to + * BASE + COUNT - 1 in FS. Use POOL for allocations. + */ +static svn_error_t * +read_log_rev_or_packfile(fs_t *fs, + svn_revnum_t base, + int count, + apr_pool_t *pool) +{ + apr_pool_t *iterpool = svn_pool_create(pool); + apr_pool_t *localpool = svn_pool_create(pool); + apr_off_t max_offset; + apr_off_t offset = 0; + int i; + svn_fs_fs__revision_file_t *rev_file; + + /* we will process every revision in the rev / pack file */ + for (i = 0; i < count; ++i) + { + /* create the revision info for the current rev */ + revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); + info->representations = apr_array_make(pool, 4, sizeof(rep_stats_t*)); + info->revision = base + i; + + APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; + } + + /* open the pack / rev file that is covered by the p2l index */ + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs->fs, base, + localpool, iterpool)); + SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs->fs, rev_file, + base, localpool)); + + /* record the whole pack size in the first rev so the total sum will + still be correct */ + APR_ARRAY_IDX(fs->revisions, base, revision_info_t*)->end = max_offset; + + /* for all offsets in the file, get the P2L index entries and process + the interesting items (change lists, noderevs) */ + for (offset = 0; offset < max_offset; ) + { + apr_array_header_t *entries; + + svn_pool_clear(iterpool); + + /* get all entries for the current block */ + SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs->fs, rev_file, base, + offset, INDEX_BLOCK_SIZE, + iterpool)); + + /* process all entries (and later continue with the next block) */ + for (i = 0; i < entries->nelts; ++i) + { + svn_fs_fs__p2l_entry_t *entry + = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); + + /* skip bits we previously processed */ + if (i == 0 && entry->offset < offset) + continue; + + /* skip zero-sized entries */ + if (entry->size == 0) + continue; + + /* read and process interesting items */ + if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV) + { + svn_stringbuf_t *item; + revision_info_t *info = APR_ARRAY_IDX(fs->revisions, + entry->item.revision, + revision_info_t*); + SVN_ERR(read_item(&item, fs, rev_file, entry, iterpool)); + SVN_ERR(read_noderev(fs, item, 0, info, pool, iterpool)); + } + else if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES) + { + svn_stringbuf_t *item; + revision_info_t *info = APR_ARRAY_IDX(fs->revisions, + entry->item.revision, + revision_info_t*); + SVN_ERR(read_item(&item, fs, rev_file, entry, iterpool)); + info->change_count + = get_change_count(item->data + 0, item->len); + info->changes_len += entry->size; + } + + /* advance offset */ + offset += entry->size; + } + } + + /* clean up and close file handles */ + svn_pool_destroy(iterpool); + svn_pool_destroy(localpool); + + return SVN_NO_ERROR; +} + +/* Read the content of the pack file staring at revision BASE logical + * addressing mode and store it in FS. Use POOL for allocations. + */ +static svn_error_t * +read_log_pack_file(fs_t *fs, + svn_revnum_t base, + apr_pool_t *pool) +{ + SVN_ERR(read_log_rev_or_packfile(fs, base, fs->shard_size, pool)); + + /* one more pack file processed */ + print_progress(base); + + return SVN_NO_ERROR; +} + +/* Read the content of the file for REVISION in logical addressing mode + * and store its contents in FS. Use POOL for allocations. + */ +static svn_error_t * +read_log_revision_file(fs_t *fs, + svn_revnum_t revision, + apr_pool_t *pool) +{ + SVN_ERR(read_log_rev_or_packfile(fs, revision, 1, pool)); + + /* show progress every 1000 revs or so */ + if (fs->shard_size && (revision % fs->shard_size == 0)) + print_progress(revision); + if (!fs->shard_size && (revision % 1000 == 0)) + print_progress(revision); + + return SVN_NO_ERROR; +} + /* Read the repository at PATH and return the result in *FS. * Use POOL for allocations. */ @@ -1383,17 +1544,20 @@ FALSE, pool, pool)); /* read all packed revs */ - for ( revision = 0 + for ( revision = 0 ; revision < (*fs)->min_unpacked_rev ; revision += (*fs)->shard_size) - { + if (svn_fs_fs__use_log_addressing((*fs)->fs, revision)) + SVN_ERR(read_log_pack_file(*fs, revision, pool)); + else SVN_ERR(read_phys_pack_file(*fs, revision, pool)); - } + /* read non-packed revs */ for ( ; revision <= (*fs)->head; ++revision) - { + if (svn_fs_fs__use_log_addressing((*fs)->fs, revision)) + SVN_ERR(read_log_revision_file(*fs, revision, pool)); + else SVN_ERR(read_phys_revision_file(*fs, revision, pool)); - } return SVN_NO_ERROR; } Index: subversion/svnfsfs/svnfsfs.c =================================================================== --- subversion/svnfsfs/svnfsfs.c (revision 1623988) +++ subversion/svnfsfs/svnfsfs.c (working copy) @@ -148,6 +148,40 @@ "Describe the usage of this program or its subcommands.\n"), {0} }, + {"dump-index", subcommand__dump_index, {0}, N_ + ("usage: svnfsfs dump-index REPOS_PATH -r REV\n\n" + "Dump the index contents for the revision / pack file containing revision REV\n" + "to console. This is only available for FSFS format 7 (SVN 1.9+) repositories.\n" + "The table produced contains a header in the first line followed by one line\n" + "per index entry, ordered by location in the revision / pack file. Columns:\n\n" + " * Byte offset (hex) at which the item starts\n" + " * Length (hex) of the item in bytes\n" + " * Item type (string) is one of the following:\n\n" + " none ... Unused section. File contents shall be NULs.\n" + " frep ... File representation.\n" + " drep ... Directory representation.\n" + " fprop .. File property.\n" + " dprop .. Directory property.\n" + " node ... Node revision.\n" + " chgs ... Changed paths list.\n" + " rep .... Representation of unknown type. Should not be used.\n" + " ??? .... Invalid. Index data is corrupt.\n\n" + " The distinction between frep, drep, fprop and dprop is a mere internal\n" + " classification used for various optimizations and does not affect the\n" + " operational correctness.\n\n" + " * Revision that the item belongs to (decimal)\n" + " * Item number (decimal) within that revision\n" + " * Modified FNV1a checksum (8 hex digits)\n"), + {'r', 'M'} }, + + {"load-index", subcommand__load_index, {0}, N_ + ("usage: svnfsfs load-index REPOS_PATH\n\n" + "Read the index contents to console. The format is the same as produced by the\n" + "dump command, except that checksum as well as header are optional and will be\n" + "ignored. The data must cover the full revision / pack file; the revision\n" + "number is automatically extracted from input stream. No ordering is required.\n"), + {'M'} }, + {"stats", subcommand__stats, {0}, N_ ("usage: svnfsfs stats REPOS_PATH\n\n" "Write object size statistics to console.\n"), Index: subversion/svnserve/svnserve.c =================================================================== --- subversion/svnserve/svnserve.c (revision 1623988) +++ subversion/svnserve/svnserve.c (working copy) @@ -209,6 +209,7 @@ #define SVNSERVE_OPT_VIRTUAL_HOST 270 #define SVNSERVE_OPT_MIN_THREADS 271 #define SVNSERVE_OPT_MAX_THREADS 272 +#define SVNSERVE_OPT_BLOCK_READ 273 static const apr_getopt_option_t svnserve__options[] = { @@ -301,6 +302,14 @@ "ARG Mbit/s.\n" " " "Default is 0 (optimizations disabled).")}, + {"block-read", SVNSERVE_OPT_BLOCK_READ, 1, + N_("Parse and cache all data found in block instead\n" + " " + "of just the requested item.\n" + " " + "Default is no.\n" + " " + "[used for FSFS repositories in 1.9 format only]")}, #ifdef CONNECTION_HAVE_THREAD_OPTION /* ### Making the assumption here that WIN32 never has fork and so * ### this option never exists when --service exists. */ @@ -671,6 +680,7 @@ svn_boolean_t cache_fulltexts = TRUE; svn_boolean_t cache_txdeltas = TRUE; svn_boolean_t cache_revprops = FALSE; + svn_boolean_t use_block_read = FALSE; apr_uint16_t port = SVN_RA_SVN_PORT; const char *host = NULL; int family = APR_INET; @@ -858,6 +868,10 @@ cache_revprops = svn_tristate__from_word(arg) == svn_tristate_true; break; + case SVNSERVE_OPT_BLOCK_READ: + use_block_read = svn_tristate__from_word(arg) == svn_tristate_true; + break; + case SVNSERVE_OPT_CLIENT_SPEED: { apr_size_t bandwidth = (apr_size_t)apr_strtoi64(arg, NULL, 0); @@ -966,6 +980,8 @@ cache_fulltexts ? "1" :"0"); svn_hash_sets(params.fs_config, SVN_FS_CONFIG_FSFS_CACHE_REVPROPS, cache_revprops ? "2" :"0"); + svn_hash_sets(params.fs_config, SVN_FS_CONFIG_FSFS_BLOCK_READ, + use_block_read ? "1" :"0"); SVN_ERR(svn_repos__config_pool_create(¶ms.config_pool, is_multi_threaded, Index: subversion/tests/cmdline/svnadmin_tests.py =================================================================== --- subversion/tests/cmdline/svnadmin_tests.py (revision 1623988) +++ subversion/tests/cmdline/svnadmin_tests.py (working copy) @@ -294,6 +294,74 @@ return load_and_verify_dumpstream(sbox, None, None, None, False, dump, *varargs) +class FSFS_Index: + """Manages indexes of a rev file in a FSFS format 7 repository. + The interface returns P2L information and allows for item offsets + and lengths to be modified. """ + + def __init__(self, sbox, revision): + self.by_item = { } + self.revision = revision + self.repo_dir = sbox.repo_dir + + self._read() + + def _read(self): + """ Read P2L index using svnfsfs. """ + exit_code, output, errput = svntest.main.run_svnfsfs('dump-index', + '-r' + str(self.revision), + self.repo_dir) + svntest.verify.verify_outputs("Error while dumping index", + [], errput, [], []) + svntest.verify.verify_exit_code(None, exit_code, 0) + + self.by_item.clear() + for line in output: + values = line.split() + if len(values) >= 4 and values[0] != 'Start': + item = long(values[4]) + self.by_item[item] = values + + def _write(self): + """ Rewrite indexes using svnfsfs. """ + by_offset = {} + for values in self.by_item.itervalues(): + by_offset[long(values[0], 16)] = values + + lines = [] + for (offset, values) in sorted(by_offset.items()): + values = by_offset[offset] + line = values[0] + ' ' + values[1] + ' ' + values[2] + ' ' + \ + values[3] + ' ' + values[4] + '\n'; + lines.append(line) + + exit_code, output, errput = svntest.main.run_command_stdin( + svntest.main.svnfsfs_binary, 0, 0, True, lines, + 'load-index', self.repo_dir) + + svntest.verify.verify_outputs("Error while rewriting index", + output, errput, [], []) + svntest.verify.verify_exit_code(None, exit_code, 0) + + def get_item(self, item): + """ Return offset, length and type of ITEM. """ + values = self.by_item[item] + + offset = long(values[0], 16) + len = long(values[1], 16) + type = values[2] + + return (offset, len, type) + + def modify_item(self, item, offset, len): + """ Modify offset and length of ITEM. """ + values = self.by_item[item] + + values[0] = '%x' % offset + values[1] = '%x' % len + + self._write() + def repo_format(sbox): """ Return the repository format number for SBOX.""" @@ -307,15 +375,40 @@ """ Replace the changed paths list in the revision file REVISION in SBOX with the text CHANGES.""" + idx = None + # read full file fp = open(fsfs_file(sbox.repo_dir, 'revs', str(revision)), 'r+b') contents = fp.read() - - # replace the changed paths list length = len(contents) - header = contents[contents.rfind('\n', length - 64, length - 1):] - body_len = long(header.split(' ')[1]) + if repo_format(sbox) < 7: + # replace the changed paths list + header = contents[contents.rfind('\n', length - 64, length - 1):] + body_len = long(header.split(' ')[1]) + + else: + # read & parse revision file footer + footer_length = ord(contents[length-1]); + footer = contents[length - footer_length - 1:length-1] + l2p_offset = long(footer.split(' ')[0]) + p2l_offset = long(footer.split(' ')[1]) + + idx = FSFS_Index(sbox, revision) + (offset, item_len, item_type) = idx.get_item(1) + + # split file contents + body_len = offset + indexes = contents[l2p_offset:length - footer_length - 1] + + # construct new footer, include indexes as are + file_len = body_len + len(changes) + 1 + p2l_offset += file_len - l2p_offset + + header = str(file_len) + ' ' + str(p2l_offset) + header += chr(len(header)) + header = '\n' + indexes + header + contents = contents[:body_len] + changes + header # set new contents @@ -324,6 +417,9 @@ fp.truncate() fp.close() + if repo_format(sbox) >= 7: + idx.modify_item(1, offset, len(changes) + 1) + ###################################################################### # Tests @@ -2035,11 +2131,14 @@ exit_code, output, errput = svntest.main.run_svnadmin("verify", sbox.repo_dir) - exp_out = svntest.verify.RegexListOutput([".*Verified revision 0.", - ".*Verified revision 1.", - ".*Error verifying revision 2."]) - if (svntest.main.fs_has_rep_sharing()): - exp_out.insert(0, ".*Verifying repository metadata.*") + if (svntest.main.is_fs_log_addressing()): + exp_out = svntest.verify.RegexListOutput([".*Verifying metadata at revision 0"]) + else: + exp_out = svntest.verify.RegexListOutput([".*Verified revision 0.", + ".*Verified revision 1.", + ".*Error verifying revision 2."]) + if (svntest.main.fs_has_rep_sharing()): + exp_out.insert(0, ".*Verifying repository metadata.*") if svntest.verify.verify_outputs("Unexpected error while running 'svnadmin verify'.", output, errput, exp_out, exp_err): Index: subversion/tests/cmdline/svntest/main.py =================================================================== --- subversion/tests/cmdline/svntest/main.py (revision 1623988) +++ subversion/tests/cmdline/svntest/main.py (working copy) @@ -1394,7 +1394,8 @@ return options.fs_type == 'bdb' def is_fs_log_addressing(): - return is_fs_type_fsx() + return is_fs_type_fsx() or \ + (is_fs_type_fsfs() and options.server_minor_version >= 9) def fs_has_rep_sharing(): return is_fs_type_fsx() or \ Index: subversion/tests/libsvn_fs_fs/fs-fs-fuzzy-test.c =================================================================== --- subversion/tests/libsvn_fs_fs/fs-fs-fuzzy-test.c (revision 1623988) +++ subversion/tests/libsvn_fs_fs/fs-fs-fuzzy-test.c (working copy) @@ -28,7 +28,6 @@ #include "../../libsvn_fs_fs/fs.h" #include "../../libsvn_fs_fs/fs_fs.h" #include "../../libsvn_fs_fs/rev_file.h" -#include "../../libsvn_fs_fs/util.h" #include "svn_hash.h" #include "svn_pools.h" @@ -67,7 +66,7 @@ svn_fs_root_t *txn_root; svn_revnum_t rev; apr_hash_t *fs_config; - apr_file_t *file; + svn_fs_fs__revision_file_t *rev_file; apr_off_t filesize = 0; apr_off_t i; @@ -89,13 +88,10 @@ SVN_TEST_ASSERT(SVN_IS_VALID_REVNUM(rev)); /* Open the revision 1 file for modification. */ - SVN_ERR(svn_io_file_open(&file, - svn_fs_fs__path_rev_absolute(fs, rev, iterpool), - APR_READ | APR_WRITE | APR_BUFFERED, - APR_OS_DEFAULT, pool)); + SVN_ERR(svn_fs_fs__open_pack_or_rev_file_writable(&rev_file, fs, rev, + pool, iterpool)); + SVN_ERR(svn_io_file_seek(rev_file->file, APR_END, &filesize, iterpool)); - SVN_ERR(svn_io_file_seek(file, APR_END, &filesize, iterpool)); - /* We want all the caching we can get. More importantly, we want to change the cache namespace before each test iteration. */ fs_config = apr_hash_make(pool); @@ -102,6 +98,7 @@ svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_DELTAS, "1"); svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_FULLTEXTS, "1"); svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_REVPROPS, "2"); + svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_BLOCK_READ, "0"); /* Manipulate all bytes one at a time. */ for (i = 0; i < filesize; ++i) @@ -110,8 +107,8 @@ /* Read byte */ unsigned char c_old, c_new; - SVN_ERR(svn_io_file_seek(file, APR_SET, &i, iterpool)); - SVN_ERR(svn_io_file_getc((char *)&c_old, file, iterpool)); + SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &i, iterpool)); + SVN_ERR(svn_io_file_getc((char *)&c_old, rev_file->file, iterpool)); /* What to replace it with. Skip if there is no change. */ c_new = modifier(c_old); @@ -119,9 +116,9 @@ continue; /* Modify / corrupt the data. */ - SVN_ERR(svn_io_file_seek(file, APR_SET, &i, iterpool)); - SVN_ERR(svn_io_file_putc((char)c_new, file, iterpool)); - SVN_ERR(svn_io_file_flush(file, iterpool)); + SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &i, iterpool)); + SVN_ERR(svn_io_file_putc((char)c_new, rev_file->file, iterpool)); + SVN_ERR(svn_io_file_flush(rev_file->file, iterpool)); /* Make sure we use a different namespace for the caches during this iteration. */ @@ -146,8 +143,8 @@ svn_error_clear(err); /* Undo the corruption. */ - SVN_ERR(svn_io_file_seek(file, APR_SET, &i, iterpool)); - SVN_ERR(svn_io_file_putc((char)c_old, file, iterpool)); + SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &i, iterpool)); + SVN_ERR(svn_io_file_putc((char)c_old, rev_file->file, iterpool)); svn_pool_clear(iterpool); } Index: subversion/tests/libsvn_fs_fs/fs-fs-pack-test.c =================================================================== --- subversion/tests/libsvn_fs_fs/fs-fs-pack-test.c (revision 1623988) +++ subversion/tests/libsvn_fs_fs/fs-fs-pack-test.c (working copy) @@ -33,6 +33,7 @@ #include "svn_props.h" #include "svn_fs.h" #include "private/svn_string_private.h" +#include "private/svn_string_private.h" #include "../svn_test_fs.h" @@ -67,16 +68,35 @@ if (format >= SVN_FS_FS__MIN_LAYOUT_FORMAT_OPTION_FORMAT) { - if (max_files_per_dir) - contents = apr_psprintf(pool, - "%d\n" - "layout sharded %d\n", - format, max_files_per_dir); + if (format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT) + { + if (max_files_per_dir) + contents = apr_psprintf(pool, + "%d\n" + "layout sharded %d\n" + "addressing logical 0\n", + format, max_files_per_dir); + else + /* linear layouts never use logical addressing */ + contents = apr_psprintf(pool, + "%d\n" + "layout linear\n" + "addressing physical\n", + format); + } else - contents = apr_psprintf(pool, - "%d\n" - "layout linear\n", - format); + { + if (max_files_per_dir) + contents = apr_psprintf(pool, + "%d\n" + "layout sharded %d\n", + format, max_files_per_dir); + else + contents = apr_psprintf(pool, + "%d\n" + "layout linear\n", + format); + } } else { @@ -328,14 +348,17 @@ return svn_error_createf(SVN_ERR_FS_GENERAL, NULL, "Expected pack file '%s' not found", path); - path = svn_dirent_join_many(pool, REPO_NAME, "revs", - apr_psprintf(pool, "%d.pack", i / SHARD_SIZE), - "manifest", SVN_VA_NULL); - SVN_ERR(svn_io_check_path(path, &kind, pool)); - if (kind != svn_node_file) - return svn_error_createf(SVN_ERR_FS_GENERAL, NULL, - "Expected manifest file '%s' not found", - path); + if (opts->server_minor_version && (opts->server_minor_version < 9)) + { + path = svn_dirent_join_many(pool, REPO_NAME, "revs", + apr_psprintf(pool, "%d.pack", i / SHARD_SIZE), + "manifest", SVN_VA_NULL); + SVN_ERR(svn_io_check_path(path, &kind, pool)); + if (kind != svn_node_file) + return svn_error_createf(SVN_ERR_FS_GENERAL, NULL, + "Expected manifest file '%s' not found", + path); + } /* This directory should not exist. */ path = svn_dirent_join_many(pool, REPO_NAME, "revs", @@ -1100,6 +1123,62 @@ /* ------------------------------------------------------------------------ */ +#define REPO_NAME "metadata_checksumming" +static svn_error_t * +metadata_checksumming(const svn_test_opts_t *opts, + apr_pool_t *pool) +{ + svn_fs_t *fs; + const char *repo_path, *r0_path; + apr_hash_t *fs_config = apr_hash_make(pool); + svn_stringbuf_t *r0; + svn_fs_root_t *root; + apr_hash_t *dir; + + /* Skip this test unless we are FSFS f7+ */ + if ((strcmp(opts->fs_type, "fsfs") != 0) + || (opts->server_minor_version && (opts->server_minor_version < 9))) + return svn_error_create(SVN_ERR_TEST_SKIPPED, NULL, + "pre-1.9 SVN doesn't checksum metadata"); + + /* Create the file system to fiddle with. */ + SVN_ERR(svn_test__create_fs(&fs, REPO_NAME, opts, pool)); + repo_path = svn_fs_path(fs, pool); + + /* Manipulate the data on disk. + * (change id from '0.0.*' to '1.0.*') */ + r0_path = svn_dirent_join_many(pool, repo_path, "revs", "0", "0", + SVN_VA_NULL); + SVN_ERR(svn_stringbuf_from_file2(&r0, r0_path, pool)); + r0->data[21] = '1'; + SVN_ERR(svn_io_remove_file2(r0_path, FALSE, pool)); + SVN_ERR(svn_io_file_create_binary(r0_path, r0->data, r0->len, pool)); + + /* Reading the corrupted data on the normal code path triggers no error. + * Use a separate namespace to avoid simply reading data from cache. */ + svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_NS, + svn_uuid_generate(pool)); + SVN_ERR(svn_fs_open2(&fs, repo_path, fs_config, pool, pool)); + SVN_ERR(svn_fs_revision_root(&root, fs, 0, pool)); + SVN_ERR(svn_fs_dir_entries(&dir, root, "/", pool)); + + /* The block-read code path uses the P2L index information and compares + * low-level checksums. Again, separate cache namespace. */ + svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_CACHE_NS, + svn_uuid_generate(pool)); + svn_hash_sets(fs_config, SVN_FS_CONFIG_FSFS_BLOCK_READ, "1"); + SVN_ERR(svn_fs_open2(&fs, repo_path, fs_config, pool, pool)); + SVN_ERR(svn_fs_revision_root(&root, fs, 0, pool)); + SVN_TEST_ASSERT_ERROR(svn_fs_dir_entries(&dir, root, "/", pool), + SVN_ERR_CHECKSUM_MISMATCH); + + return SVN_NO_ERROR; +} + +#undef REPO_NAME + +/* ------------------------------------------------------------------------ */ + #define REPO_NAME "revprop_caching_on_off" static svn_error_t * revprop_caching_on_off(const svn_test_opts_t *opts, @@ -1273,6 +1352,8 @@ "upgrade txns to log addressing in shared FSFS"), SVN_TEST_OPTS_PASS(upgrade_old_txns_to_log_addressing, "upgrade txns started before svnadmin upgrade"), + SVN_TEST_OPTS_PASS(metadata_checksumming, + "metadata checksums being checked"), SVN_TEST_OPTS_PASS(revprop_caching_on_off, "change revprops with enabled and disabled caching"), SVN_TEST_OPTS_PASS(id_parser_test, Index: subversion/tests/libsvn_fs_x =================================================================== --- subversion/tests/libsvn_fs_x (revision 1623988) +++ subversion/tests/libsvn_fs_x (working copy) Property changes on: subversion/tests/libsvn_fs_x ___________________________________________________________________ Modified: svn:mergeinfo Reverse-merged /subversion/trunk/subversion/tests/libsvn_fs_x:r1603891-1623968 Index: . =================================================================== --- . (revision 1623988) +++ . (working copy) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Reverse-merged /subversion/trunk:r1603891-1623987