diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index d7e398f5f732a..fd043f4ea35e6 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -388,7 +388,7 @@ JournalTrimmerImpl::config_t::get_test( max_journal_bytes = 4 * roll_size; } else { assert(type == journal_type_t::RANDOM_BLOCK); - target_dirty_bytes = roll_size / 4; + target_dirty_bytes = roll_size / 36; target_alloc_bytes = roll_size / 4; max_journal_bytes = roll_size / 2; } diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 4d1dc929607d0..b81fe62906c7b 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1096,6 +1096,20 @@ record_t Cache::prepare_record( if (!i->is_exist_mutation_pending()) { DEBUGT("commit replace extent ... -- {}, prior={}", t, *i, *i->prior_instance); + // If inplace rewrite occurs during mutation, prev->version will + // be zero. Although this results in the version mismatch here, we can + // correct this by changing version to 1. This is because the inplace rewrite + // does not introduce any actual modification that could negatively + // impact system reliability + if (i->prior_instance->version == 0 && i->version > 1) { + assert(can_inplace_rewrite(i->get_type())); + assert(can_inplace_rewrite(i->prior_instance->get_type())); + assert(i->prior_instance->dirty_from_or_retired_at == JOURNAL_SEQ_MIN); + assert(i->prior_instance->state == CachedExtent::extent_state_t::CLEAN); + assert(i->prior_instance->get_paddr().get_addr_type() == + paddr_types_t::RANDOM_BLOCK); + i->version = 1; + } // extent with EXIST_MUTATION_PENDING doesn't have // prior_instance field so skip these extents. // the existing extents should be added into Cache @@ -1261,6 +1275,24 @@ record_t Cache::prepare_record( } } + for (auto &i: t.written_inplace_ool_block_list) { + if (!i->is_valid()) { + continue; + } + assert(i->state == CachedExtent::extent_state_t::DIRTY); + assert(i->version > 0); + remove_from_dirty(i); + // set the version to zero because the extent state is now clean + // in order to handle this transparently + i->version = 0; + i->dirty_from_or_retired_at = JOURNAL_SEQ_MIN; + i->state = CachedExtent::extent_state_t::CLEAN; + assert(i->is_logical()); + i->clear_modified_region(); + touch_extent(*i); + DEBUGT("inplace rewrite ool block is commmitted -- {}", t, *i); + } + for (auto &i: t.existing_block_list) { if (i->is_valid()) { alloc_delta.alloc_blk_ranges.emplace_back( @@ -1330,7 +1362,8 @@ record_t Cache::prepare_record( t.num_allocated_invalid_extents); auto& ool_stats = t.get_ool_write_stats(); - ceph_assert(ool_stats.extents.num == t.written_ool_block_list.size()); + ceph_assert(ool_stats.extents.num == t.written_ool_block_list.size() + + t.written_inplace_ool_block_list.size()); if (record.is_empty()) { SUBINFOT(seastore_t, @@ -1699,14 +1732,16 @@ Cache::replay_delta( segment_seq_printer_t{delta_paddr_segment_seq}, delta_paddr_segment_type, delta); - return replay_delta_ertr::make_ready_future(false); + return replay_delta_ertr::make_ready_future>( + std::make_pair(false, nullptr)); } } } if (delta.type == extent_types_t::JOURNAL_TAIL) { // this delta should have been dealt with during segment cleaner mounting - return replay_delta_ertr::make_ready_future(false); + return replay_delta_ertr::make_ready_future>( + std::make_pair(false, nullptr)); } // replay alloc @@ -1714,7 +1749,8 @@ Cache::replay_delta( if (journal_seq < alloc_tail) { DEBUG("journal_seq {} < alloc_tail {}, don't replay {}", journal_seq, alloc_tail, delta); - return replay_delta_ertr::make_ready_future(false); + return replay_delta_ertr::make_ready_future>( + std::make_pair(false, nullptr)); } alloc_delta_t alloc_delta; @@ -1738,14 +1774,16 @@ Cache::replay_delta( if (!backref_list.empty()) { backref_batch_update(std::move(backref_list), journal_seq); } - return replay_delta_ertr::make_ready_future(true); + return replay_delta_ertr::make_ready_future>( + std::make_pair(true, nullptr)); } // replay dirty if (journal_seq < dirty_tail) { DEBUG("journal_seq {} < dirty_tail {}, don't replay {}", journal_seq, dirty_tail, delta); - return replay_delta_ertr::make_ready_future(false); + return replay_delta_ertr::make_ready_future>( + std::make_pair(false, nullptr)); } if (delta.type == extent_types_t::ROOT) { @@ -1759,7 +1797,8 @@ Cache::replay_delta( journal_seq, record_base, delta, *root); root->set_modify_time(modify_time); add_extent(root); - return replay_delta_ertr::make_ready_future(true); + return replay_delta_ertr::make_ready_future>( + std::make_pair(true, root)); } else { auto _get_extent_if_cached = [this](paddr_t addr) -> get_extent_ertr::future { @@ -1799,17 +1838,26 @@ Cache::replay_delta( DEBUG("replay extent is not present, so delta is obsolete at {} {} -- {}", journal_seq, record_base, delta); assert(delta.pversion > 0); - return replay_delta_ertr::make_ready_future(true); + return replay_delta_ertr::make_ready_future>( + std::make_pair(false, nullptr)); } DEBUG("replay extent delta at {} {} ... -- {}, prv_extent={}", journal_seq, record_base, delta, *extent); - assert(extent->last_committed_crc == delta.prev_crc); - assert(extent->version == delta.pversion); - extent->apply_delta_and_adjust_crc(record_base, delta.bl); - extent->set_modify_time(modify_time); - assert(extent->last_committed_crc == delta.final_crc); + if (delta.paddr.get_addr_type() == paddr_types_t::SEGMENT || + !can_inplace_rewrite(delta.type)) { + ceph_assert_always(extent->last_committed_crc == delta.prev_crc); + assert(extent->version == delta.pversion); + extent->apply_delta_and_adjust_crc(record_base, delta.bl); + extent->set_modify_time(modify_time); + ceph_assert_always(extent->last_committed_crc == delta.final_crc); + } else { + assert(delta.paddr.get_addr_type() == paddr_types_t::RANDOM_BLOCK); + extent->apply_delta_and_adjust_crc(record_base, delta.bl); + extent->set_modify_time(modify_time); + // crc will be checked after journal replay is done + } extent->version++; if (extent->version == 1) { @@ -1821,7 +1869,8 @@ Cache::replay_delta( journal_seq, record_base, delta, *extent); } mark_dirty(extent); - return replay_delta_ertr::make_ready_future(true); + return replay_delta_ertr::make_ready_future>( + std::make_pair(true, extent)); }); } } diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index c79473f98ba28..0f32bfe87e66b 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -1039,7 +1039,8 @@ class Cache { */ using replay_delta_ertr = crimson::errorator< crimson::ct_error::input_output_error>; - using replay_delta_ret = replay_delta_ertr::future; + using replay_delta_ret = replay_delta_ertr::future< + std::pair>; replay_delta_ret replay_delta( journal_seq_t seq, paddr_t record_block_base, diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 5fcb8142527d9..4cb7e2b490cbd 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -587,6 +587,11 @@ class CachedExtent rewrite_generation = gen; } + void set_inplace_rewrite_generation() { + user_hint = placement_hint_t::REWRITE; + rewrite_generation = OOL_GENERATION; + } + bool is_inline() const { return poffset.is_relative(); } @@ -606,6 +611,10 @@ class CachedExtent return prior_instance; } + uint32_t get_last_committed_crc() const { + return last_committed_crc; + } + private: template friend class read_set_item_t; @@ -1237,6 +1246,16 @@ class LogicalCachedExtent : public ChildableCachedExtent { void on_replace_prior(Transaction &t) final; + struct modified_region_t { + extent_len_t offset; + extent_len_t len; + }; + virtual std::optional get_modified_region() { + return std::nullopt; + } + + virtual void clear_modified_region() {} + virtual ~LogicalCachedExtent(); protected: diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index b7aabefc64414..3acd3dbd63def 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -790,16 +790,36 @@ RandomBlockOolWriter::do_write( stats.num_records += 1; ex->prepare_write(); - return rbm->write(paddr, - ex->get_bptr() - ).handle_error( - alloc_write_iertr::pass_further{}, - crimson::ct_error::assert_all{ - "Invalid error when writing record"} - ).safe_then([&t, &ex, paddr, FNAME]() { + extent_len_t offset = 0; + bufferptr bp; + if (can_inplace_rewrite(t, ex)) { + auto r = ex->get_modified_region(); + ceph_assert(r.has_value()); + offset = p2align(r->offset, rbm->get_block_size()); + extent_len_t len = + p2roundup(r->offset + r->len, rbm->get_block_size()) - offset; + bp = ceph::bufferptr(ex->get_bptr(), offset, len); + } else { + bp = ex->get_bptr(); + } + return trans_intr::make_interruptible( + rbm->write(paddr + offset, + bp + ).handle_error( + alloc_write_iertr::pass_further{}, + crimson::ct_error::assert_all{ + "Invalid error when writing record"} + ) + ).si_then([this, &t, &ex, paddr, FNAME] { TRACET("ool extent written at {} -- {}", t, paddr, *ex); - t.mark_allocated_extent_ool(ex); + if (ex->is_initial_pending()) { + t.mark_allocated_extent_ool(ex); + } else if (can_inplace_rewrite(t, ex)) { + t.mark_inplace_rewrite_extent_ool(ex); + } else { + ceph_assert("impossible"); + } return alloc_write_iertr::now(); }); }); diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index b94c03ec34ada..371b49e074147 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -43,6 +43,9 @@ class ExtentOolWriter { using close_ertr = base_ertr; virtual close_ertr::future<> close() = 0; + + virtual bool can_inplace_rewrite(Transaction& t, + CachedExtentRef extent) = 0; }; using ExtentOolWriterRef = std::unique_ptr; @@ -79,6 +82,11 @@ class SegmentedOolWriter : public ExtentOolWriter { return make_delayed_temp_paddr(0); } + bool can_inplace_rewrite(Transaction& t, + CachedExtentRef extent) final { + return false; + } + private: alloc_write_iertr::future<> do_write( Transaction& t, @@ -122,6 +130,17 @@ class RandomBlockOolWriter : public ExtentOolWriter { return rb_cleaner->alloc_paddr(length); } + bool can_inplace_rewrite(Transaction& t, + CachedExtentRef extent) final { + if (!extent->is_dirty()) { + return false; + } + assert(t.get_src() == transaction_type_t::TRIM_DIRTY); + ceph_assert_always(extent->get_type() == extent_types_t::ROOT || + extent->get_paddr().is_absolute()); + return crimson::os::seastore::can_inplace_rewrite(extent->get_type()); + } + private: alloc_write_iertr::future<> do_write( Transaction& t, @@ -199,6 +218,14 @@ class ExtentPlacementManager { background_process.set_extent_callback(cb); } + bool can_inplace_rewrite(Transaction& t, CachedExtentRef extent) { + auto writer = get_writer(placement_hint_t::REWRITE, + get_extent_category(extent->get_type()), + OOL_GENERATION); + ceph_assert(writer); + return writer->can_inplace_rewrite(t, extent); + } + journal_type_t get_journal_type() const { return background_process.get_journal_type(); } diff --git a/src/crimson/os/seastore/journal.h b/src/crimson/os/seastore/journal.h index 18c0797a8b8b8..633aa84d7dbcd 100644 --- a/src/crimson/os/seastore/journal.h +++ b/src/crimson/os/seastore/journal.h @@ -8,6 +8,7 @@ #include "crimson/os/seastore/ordering_handle.h" #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/segment_seq_allocator.h" +#include "crimson/os/seastore/cached_extent.h" namespace crimson::os::seastore { @@ -88,7 +89,7 @@ class Journal { crimson::ct_error::erange>; using replay_ret = replay_ertr::future<>; using delta_handler_t = std::function< - replay_ertr::future( + replay_ertr::future>( const record_locator_t&, const delta_info_t&, const journal_seq_t&, // dirty_tail diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.cc b/src/crimson/os/seastore/journal/circular_bounded_journal.cc index ec41bfab14264..a02c745084174 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.cc +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.cc @@ -316,7 +316,8 @@ Journal::replay_ret CircularBoundedJournal::replay( return seastar::do_with( std::move(delta_handler), std::map(), - [this](auto &d_handler, auto &map) { + std::map>(), + [this](auto &d_handler, auto &map, auto &crc_info) { auto build_paddr_seq_map = [&map]( const auto &offsets, const auto &e, @@ -339,8 +340,8 @@ Journal::replay_ret CircularBoundedJournal::replay( // The first pass to build the paddr->journal_seq_t map // from extent allocations return scan_valid_record_delta(std::move(build_paddr_seq_map), tail - ).safe_then([this, &map, &d_handler, tail]() { - auto call_d_handler_if_valid = [this, &map, &d_handler]( + ).safe_then([this, &map, &d_handler, tail, &crc_info]() { + auto call_d_handler_if_valid = [this, &map, &d_handler, &crc_info]( const auto &offsets, const auto &e, sea_time_point modify_time) @@ -353,12 +354,27 @@ Journal::replay_ret CircularBoundedJournal::replay( get_dirty_tail(), get_alloc_tail(), modify_time - ); + ).safe_then([&e, &crc_info](auto ret) { + auto [applied, ext] = ret; + if (applied && ext && can_inplace_rewrite( + ext->get_type())) { + crc_info[ext->get_paddr()] = + std::make_pair(ext, e.final_crc); + } + return replay_ertr::make_ready_future(applied); + }); } return replay_ertr::make_ready_future(true); }; // The second pass to replay deltas - return scan_valid_record_delta(std::move(call_d_handler_if_valid), tail); + return scan_valid_record_delta(std::move(call_d_handler_if_valid), tail + ).safe_then([&crc_info]() { + for (auto p : crc_info) { + ceph_assert_always(p.second.first->get_last_committed_crc() == p.second.second); + } + crc_info.clear(); + return replay_ertr::now(); + }); }); }).safe_then([this]() { // make sure that committed_to is JOURNAL_SEQ_NULL if jounal is the initial state diff --git a/src/crimson/os/seastore/journal/segmented_journal.cc b/src/crimson/os/seastore/journal/segmented_journal.cc index 58df913749321..c40295dbacef4 100644 --- a/src/crimson/os/seastore/journal/segmented_journal.cc +++ b/src/crimson/os/seastore/journal/segmented_journal.cc @@ -291,7 +291,8 @@ SegmentedJournal::replay_segment( trimmer.get_dirty_tail(), trimmer.get_alloc_tail(), modify_time - ).safe_then([&stats, delta_type=delta.type](bool is_applied) { + ).safe_then([&stats, delta_type=delta.type](auto ret) { + auto [is_applied, ext] = ret; if (is_applied) { // see Cache::replay_delta() assert(delta_type != extent_types_t::JOURNAL_TAIL); diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index 3333fcfad9d4c..5e49a48a18809 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -423,6 +423,7 @@ void ObjectDataBlock::apply_delta(const ceph::bufferlist &bl) { for (auto &&d : deltas) { auto iter = d.bl.cbegin(); iter.copy(d.len, get_bptr().c_str() + d.offset); + modified_region.union_insert(d.offset, d.len); } } diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h index eaa05da8d5456..783d0919ce5db 100644 --- a/src/crimson/os/seastore/object_data_handler.h +++ b/src/crimson/os/seastore/object_data_handler.h @@ -35,10 +35,12 @@ struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent { std::vector delta = {}; + interval_set modified_region; + explicit ObjectDataBlock(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {} explicit ObjectDataBlock(const ObjectDataBlock &other) - : LogicalCachedExtent(other) {} + : LogicalCachedExtent(other), modified_region(other.modified_region) {} explicit ObjectDataBlock(extent_len_t length) : LogicalCachedExtent(length) {} @@ -55,11 +57,28 @@ struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent { auto iter = bl.cbegin(); iter.copy(bl.length(), get_bptr().c_str() + offset); delta.push_back({offset, bl.length(), bl}); + modified_region.union_insert(offset, bl.length()); } ceph::bufferlist get_delta() final; void apply_delta(const ceph::bufferlist &bl) final; + + std::optional get_modified_region() final { + if (modified_region.empty()) { + return std::nullopt; + } + return modified_region_t{modified_region.range_start(), + modified_region.range_end() - modified_region.range_start()}; + } + + void clear_modified_region() final { + modified_region.clear(); + } + + void logical_on_delta_write() final { + delta.clear(); + } }; using ObjectDataBlockRef = TCachedExtentRef; diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index 0acfdb74ebb54..93818ba025864 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -286,6 +286,10 @@ std::ostream &operator<<(std::ostream &out, data_category_t c) } } +bool can_inplace_rewrite(extent_types_t type) { + return get_extent_category(type) == data_category_t::DATA; +} + std::ostream &operator<<(std::ostream &out, sea_time_point_printer_t tp) { if (tp.tp == NULL_TIME) { diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 0b4ad853687fe..f0e40fed082e2 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -1307,6 +1307,8 @@ constexpr data_category_t get_extent_category(extent_types_t type) { } } +bool can_inplace_rewrite(extent_types_t type); + // type for extent modification time, milliseconds since the epoch using sea_time_point = seastar::lowres_system_clock::time_point; using sea_duration = seastar::lowres_system_clock::duration; diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index d423196feba70..a283982f699e1 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -217,6 +217,20 @@ class Transaction { written_ool_block_list.push_back(ref); } + void mark_inplace_rewrite_extent_ool(LogicalCachedExtentRef& ref) { + assert(ref->get_paddr().is_absolute()); + assert(!ref->is_inline()); + written_inplace_ool_block_list.push_back(ref); + } + + void add_inplace_rewrite_extent(CachedExtentRef ref) { + ceph_assert(!is_weak()); + ceph_assert(ref); + ceph_assert(ref->get_paddr().is_absolute()); + assert(ref->state == CachedExtent::extent_state_t::DIRTY); + pre_inplace_rewrite_list.emplace_back(ref->cast()); + } + void add_mutated_extent(CachedExtentRef ref) { ceph_assert(!is_weak()); assert(ref->is_exist_mutation_pending() || @@ -278,6 +292,11 @@ class Transaction { ++num_allocated_invalid_extents; } } + for (auto& extent : pre_inplace_rewrite_list) { + if (extent->is_valid()) { + ret.push_back(extent); + } + } return ret; } @@ -387,7 +406,9 @@ class Transaction { delayed_alloc_list.clear(); inline_block_list.clear(); written_ool_block_list.clear(); + written_inplace_ool_block_list.clear(); pre_alloc_list.clear(); + pre_inplace_rewrite_list.clear(); retired_set.clear(); existing_block_list.clear(); existing_block_stats = {}; @@ -529,16 +550,20 @@ class Transaction { io_stat_t fresh_block_stats; uint64_t num_delayed_invalid_extents = 0; uint64_t num_allocated_invalid_extents = 0; - /// blocks that will be committed with journal record inline - std::list inline_block_list; - /// blocks that will be committed with out-of-line record - std::list written_ool_block_list; - /// blocks with delayed allocation, may become inline or ool above + /// fresh blocks with delayed allocation, may become inline or ool below std::list delayed_alloc_list; - - /// Extents with pre-allocated addresses, - /// will be added to written_ool_block_list after write + /// fresh blocks with pre-allocated addresses with RBM, + /// should be released upon conflicts, will be added to ool below std::list pre_alloc_list; + /// dirty blocks for inplace rewrite with RBM, will be added to inplace ool below + std::list pre_inplace_rewrite_list; + + /// fresh blocks that will be committed with inline journal record + std::list inline_block_list; + /// fresh blocks that will be committed with out-of-line record + std::list written_ool_block_list; + /// dirty blocks that will be committed out-of-line with inplace rewrite + std::list written_inplace_ool_block_list; /// list of mutated blocks, holds refcounts, subset of write_set std::list mutated_block_list; diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 7261c84d2df73..e452244120573 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -487,6 +487,12 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent( assert(extent->is_valid() && !extent->is_initial_pending()); if (extent->is_dirty()) { + if (epm->can_inplace_rewrite(t, extent)) { + DEBUGT("delta overwriting extent -- {}", t, *extent); + t.add_inplace_rewrite_extent(extent); + extent->set_inplace_rewrite_generation(); + return rewrite_extent_iertr::now(); + } extent->set_target_rewrite_generation(INIT_GENERATION); } else { extent->set_target_rewrite_generation(target_generation); diff --git a/src/test/crimson/seastore/test_block.cc b/src/test/crimson/seastore/test_block.cc index f7a39b0ef59cc..7d673d8c23629 100644 --- a/src/test/crimson/seastore/test_block.cc +++ b/src/test/crimson/seastore/test_block.cc @@ -19,6 +19,7 @@ void TestBlock::apply_delta(const ceph::bufferlist &bl) { decode(deltas, biter); for (auto &&d : deltas) { set_contents(d.val, d.offset, d.len); + modified_region.union_insert(d.offset, d.len); } } diff --git a/src/test/crimson/seastore/test_block.h b/src/test/crimson/seastore/test_block.h index ccdafb7843fec..3bf119f77260f 100644 --- a/src/test/crimson/seastore/test_block.h +++ b/src/test/crimson/seastore/test_block.h @@ -24,8 +24,8 @@ struct test_extent_desc_t { struct test_block_delta_t { int8_t val = 0; - uint16_t offset = 0; - uint16_t len = 0; + extent_len_t offset = 0; + extent_len_t len = 0; DENC(test_block_delta_t, v, p) { @@ -49,10 +49,12 @@ struct TestBlock : crimson::os::seastore::LogicalCachedExtent { std::vector delta = {}; + interval_set modified_region; + TestBlock(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {} TestBlock(const TestBlock &other) - : LogicalCachedExtent(other) {} + : LogicalCachedExtent(other), modified_region(other.modified_region) {} CachedExtentRef duplicate_for_write(Transaction&) final { return CachedExtentRef(new TestBlock(*this)); @@ -65,9 +67,12 @@ struct TestBlock : crimson::os::seastore::LogicalCachedExtent { ceph::bufferlist get_delta() final; - void set_contents(char c, uint16_t offset, uint16_t len) { + void set_contents(char c, extent_len_t offset, extent_len_t len) { + assert(offset + len <= get_length()); + assert(len > 0); ::memset(get_bptr().c_str() + offset, c, len); delta.push_back({c, offset, len}); + modified_region.union_insert(offset, len); } void set_contents(char c) { @@ -79,6 +84,22 @@ struct TestBlock : crimson::os::seastore::LogicalCachedExtent { } void apply_delta(const ceph::bufferlist &bl) final; + + std::optional get_modified_region() final { + if (modified_region.empty()) { + return std::nullopt; + } + return modified_region_t{modified_region.range_start(), + modified_region.range_end() - modified_region.range_start()}; + } + + void clear_modified_region() final { + modified_region.clear(); + } + + void logical_on_delta_write() final { + delta.clear(); + } }; using TestBlockRef = TCachedExtentRef; @@ -102,7 +123,7 @@ struct TestBlockPhysical : crimson::os::seastore::CachedExtent{ return TYPE; } - void set_contents(char c, uint16_t offset, uint16_t len) { + void set_contents(char c, extent_len_t offset, extent_len_t len) { ::memset(get_bptr().c_str() + offset, c, len); delta.push_back({c, offset, len}); } @@ -123,13 +144,13 @@ struct test_block_mutator_t { std::numeric_limits::min(), std::numeric_limits::max()); - std::uniform_int_distribution - offset_distribution = std::uniform_int_distribution( + std::uniform_int_distribution + offset_distribution = std::uniform_int_distribution( 0, TestBlock::SIZE - 1); - std::uniform_int_distribution length_distribution(uint16_t offset) { - return std::uniform_int_distribution( - 0, TestBlock::SIZE - offset - 1); + std::uniform_int_distribution length_distribution(extent_len_t offset) { + return std::uniform_int_distribution( + 1, TestBlock::SIZE - offset); } diff --git a/src/test/crimson/seastore/test_cbjournal.cc b/src/test/crimson/seastore/test_cbjournal.cc index 0bf2d41358bfc..bacb3cd2f78ec 100644 --- a/src/test/crimson/seastore/test_cbjournal.cc +++ b/src/test/crimson/seastore/test_cbjournal.cc @@ -246,7 +246,8 @@ struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer } } assert(found == true); - return Journal::replay_ertr::make_ready_future(true); + return Journal::replay_ertr::make_ready_future< + std::pair>(true, nullptr); }); } @@ -576,7 +577,8 @@ TEST_F(cbjournal_test_t, multiple_submit_at_end) auto &dirty_seq, auto &alloc_seq, auto last_modified) { - return Journal::replay_ertr::make_ready_future(true); + return Journal::replay_ertr::make_ready_future< + std::pair>(true, nullptr); }).unsafe_get0(); assert(get_written_to() == old_written_to); }); diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc index ae44cb94a62d7..d054fc18f4804 100644 --- a/src/test/crimson/seastore/test_object_data_handler.cc +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -110,12 +110,17 @@ struct object_data_handler_test_t: bufferptr known_contents; extent_len_t size = 0; + std::random_device rd; + std::mt19937 gen; - object_data_handler_test_t() {} + object_data_handler_test_t() : gen(rd()) {} void write(Transaction &t, objaddr_t offset, extent_len_t len, char fill) { ceph_assert(offset + len <= known_contents.length()); size = std::max(size, offset + len); + Option::size_t olen = crimson::common::local_conf().get_val( + "seastore_data_delta_based_overwrite"); + ceph_assert(olen == 0 || len <= olen); memset( known_contents.c_str() + offset, fill, @@ -233,12 +238,18 @@ struct object_data_handler_test_t: } void set_overwrite_threshold() { - crimson::common::local_conf().set_val("seastore_data_delta_based_overwrite", "131072").get(); + crimson::common::local_conf().set_val("seastore_data_delta_based_overwrite", + "16777216").get(); } void unset_overwrite_threshold() { crimson::common::local_conf().set_val("seastore_data_delta_based_overwrite", "0").get(); } + laddr_t get_random_laddr(size_t block_size, laddr_t limit) { + return block_size * + std::uniform_int_distribution<>(0, (limit / block_size) - 1)(gen); + } + void test_multi_write() { write((1<<20) - (4<<10), 4<<10, 'a'); write(1<<20, 4<<10, 'b'); @@ -671,6 +682,38 @@ TEST_P(object_data_handler_test_t, multiple_overwrite) { }); } +TEST_P(object_data_handler_test_t, random_overwrite) { + constexpr size_t TOTAL = 4<<20; + constexpr size_t BSIZE = 4<<10; + constexpr size_t BLOCKS = TOTAL / BSIZE; + run_async([this] { + set_overwrite_threshold(); + size_t wsize = std::uniform_int_distribution<>(10, BSIZE - 1)(gen); + uint8_t div[3] = {1, 2, 4}; + uint8_t block_num = div[std::uniform_int_distribution<>(0, 2)(gen)]; + for (unsigned i = 0; i < BLOCKS / block_num; ++i) { + auto t = create_mutate_transaction(); + write(i * (BSIZE * block_num), BSIZE * block_num, 'a'); + } + + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = 0; j < 100; ++j) { + auto t = create_mutate_transaction(); + for (unsigned k = 0; k < 2; ++k) { + write(*t, get_random_laddr(BSIZE, TOTAL), wsize, + (char)((j*k) % std::numeric_limits::max())); + } + submit_transaction(std::move(t)); + } + restart(); + epm->check_usage(); + logger().info("random_writes: {} done replaying/checking", i); + } + read(0, 4<<20); + unset_overwrite_threshold(); + }); +} + INSTANTIATE_TEST_SUITE_P( object_data_handler_test, object_data_handler_test_t, diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc index 46ec723a3524f..ddd894349d21e 100644 --- a/src/test/crimson/seastore/test_seastore_journal.cc +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -218,7 +218,8 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider, JournalTrimmer { delta_checker = std::nullopt; advance(); } - return Journal::replay_ertr::make_ready_future(true); + return Journal::replay_ertr::make_ready_future< + std::pair>(true, nullptr); }).unsafe_get0(); ASSERT_EQ(record_iter, records.end()); for (auto &i : records) {