Skip to content

Commit 9aede08

Browse files
authored
Merge pull request ceph#57368 from myoungwon/wip-seastore-is-data-stable
crimson/os/seastore: add is_data_stable() to allow delta-overwrite on EXIST_CLEAN Reviewed-by: Yingxin Cheng <[email protected]>
2 parents 6e11983 + cb7548f commit 9aede08

File tree

9 files changed

+67
-29
lines changed

9 files changed

+67
-29
lines changed

src/common/options/crimson.yaml.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ options:
108108
level: advanced
109109
desc: split extent if ratio of total extent size to write size exceeds this value
110110
default: 1.25
111+
# TODO: seastore_obj_data_write_amplification is no longer correct if
112+
# seastore_data_delta_based_overwrite is enabled. So, this should be reconsidered.
111113
- name: seastore_max_concurrent_transactions
112114
type: uint
113115
level: advanced

src/crimson/os/seastore/btree/btree_range_pin.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ bool BtreeNodeMapping<key_t, val_t>::is_stable() const
3636
return p.is_child_stable(ctx, pos);
3737
}
3838

39+
template <typename key_t, typename val_t>
40+
bool BtreeNodeMapping<key_t, val_t>::is_data_stable() const
41+
{
42+
assert(parent);
43+
assert(parent->is_valid());
44+
assert(pos != std::numeric_limits<uint16_t>::max());
45+
auto &p = (FixedKVNode<key_t>&)*parent;
46+
return p.is_child_data_stable(ctx, pos);
47+
}
48+
3949
template class BtreeNodeMapping<laddr_t, paddr_t>;
4050
template class BtreeNodeMapping<paddr_t, laddr_t>;
4151
} // namespace crimson::os::seastore

src/crimson/os/seastore/btree/btree_range_pin.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ class BtreeNodeMapping : public PhysicalNodeMapping<key_t, val_t> {
196196

197197
get_child_ret_t<LogicalCachedExtent> get_logical_extent(Transaction&) final;
198198
bool is_stable() const final;
199+
bool is_data_stable() const final;
199200
};
200201

201202
}

src/crimson/os/seastore/btree/fixed_kv_node.h

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ struct FixedKVNode : ChildableCachedExtent {
227227
}
228228

229229
virtual bool is_child_stable(op_context_t<node_key_t>, uint16_t pos) const = 0;
230+
virtual bool is_child_data_stable(op_context_t<node_key_t>, uint16_t pos) const = 0;
230231

231232
template <typename T>
232233
get_child_ret_t<T> get_child(
@@ -597,6 +598,10 @@ struct FixedKVInternalNode
597598
ceph_abort("impossible");
598599
return false;
599600
}
601+
bool is_child_data_stable(op_context_t<NODE_KEY>, uint16_t pos) const final {
602+
ceph_abort("impossible");
603+
return false;
604+
}
600605

601606
bool validate_stable_children() final {
602607
LOG_PREFIX(FixedKVInternalNode::validate_stable_children);
@@ -986,6 +991,13 @@ struct FixedKVLeafNode
986991
//
987992
// For reserved mappings, the return values are undefined.
988993
bool is_child_stable(op_context_t<NODE_KEY> c, uint16_t pos) const final {
994+
return _is_child_stable(c, pos);
995+
}
996+
bool is_child_data_stable(op_context_t<NODE_KEY> c, uint16_t pos) const final {
997+
return _is_child_stable(c, pos, true);
998+
}
999+
1000+
bool _is_child_stable(op_context_t<NODE_KEY> c, uint16_t pos, bool data_only = false) const {
9891001
auto child = this->children[pos];
9901002
if (is_reserved_ptr(child)) {
9911003
return true;
@@ -994,15 +1006,23 @@ struct FixedKVLeafNode
9941006
ceph_assert(
9951007
child->is_pending_in_trans(c.trans.get_trans_id())
9961008
|| this->is_stable_written());
997-
return c.cache.is_viewable_extent_stable(c.trans, child);
1009+
if (data_only) {
1010+
return c.cache.is_viewable_extent_data_stable(c.trans, child);
1011+
} else {
1012+
return c.cache.is_viewable_extent_stable(c.trans, child);
1013+
}
9981014
} else if (this->is_pending()) {
9991015
auto key = this->iter_idx(pos).get_key();
10001016
auto &sparent = this->get_stable_for_key(key);
10011017
auto spos = sparent.child_pos_for_key(key);
10021018
auto child = sparent.children[spos];
10031019
if (is_valid_child_ptr(child)) {
10041020
ceph_assert(child->is_logical());
1005-
return c.cache.is_viewable_extent_stable(c.trans, child);
1021+
if (data_only) {
1022+
return c.cache.is_viewable_extent_data_stable(c.trans, child);
1023+
} else {
1024+
return c.cache.is_viewable_extent_stable(c.trans, child);
1025+
}
10061026
} else {
10071027
return true;
10081028
}

src/crimson/os/seastore/cache.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,15 @@ class Cache {
452452
return view->is_stable();
453453
}
454454

455+
bool is_viewable_extent_data_stable(
456+
Transaction &t,
457+
CachedExtentRef extent)
458+
{
459+
assert(extent);
460+
auto view = extent->get_transactional_view(t);
461+
return view->is_data_stable();
462+
}
463+
455464
using get_extent_ertr = base_ertr;
456465
get_extent_ertr::future<CachedExtentRef>
457466
get_extent_viewable_by_trans(

src/crimson/os/seastore/cached_extent.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,10 @@ class CachedExtent
436436
is_pending_io());
437437
}
438438

439+
bool is_data_stable() const {
440+
return is_stable() || is_exist_clean();
441+
}
442+
439443
/// Returns true if extent has a pending delta
440444
bool is_mutation_pending() const {
441445
return state == extent_state_t::MUTATION_PENDING;
@@ -1087,6 +1091,7 @@ class PhysicalNodeMapping {
10871091
// For reserved mappings, the return values are
10881092
// undefined although it won't crash
10891093
virtual bool is_stable() const = 0;
1094+
virtual bool is_data_stable() const = 0;
10901095
virtual bool is_clone() const = 0;
10911096
bool is_zero_reserved() const {
10921097
return !get_val().is_real();

src/crimson/os/seastore/object_data_handler.cc

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -297,14 +297,13 @@ overwrite_ops_t prepare_ops_list(
297297
interval_set<uint64_t> pre_alloc_addr_removed, pre_alloc_addr_remapped;
298298
if (delta_based_overwrite_max_extent_size) {
299299
for (auto &r : ops.to_remove) {
300-
// TODO: Introduce LBAMapping::is_data_stable() to include EXIST_CLEAN extents
301-
if (r->is_stable() && !r->is_zero_reserved()) {
300+
if (r->is_data_stable() && !r->is_zero_reserved()) {
302301
pre_alloc_addr_removed.insert(r->get_key(), r->get_length());
303302

304303
}
305304
}
306305
for (auto &r : ops.to_remap) {
307-
if (r.pin && r.pin->is_stable() && !r.pin->is_zero_reserved()) {
306+
if (r.pin && r.pin->is_data_stable() && !r.pin->is_zero_reserved()) {
308307
pre_alloc_addr_remapped.insert(r.pin->get_key(), r.pin->get_length());
309308
}
310309
}
@@ -641,8 +640,8 @@ struct overwrite_plan_t {
641640

642641
// helper member
643642
extent_len_t block_size;
644-
bool is_left_stable;
645-
bool is_right_stable;
643+
bool is_left_fresh;
644+
bool is_right_fresh;
646645

647646
public:
648647
extent_len_t get_left_size() const {
@@ -692,8 +691,8 @@ struct overwrite_plan_t {
692691
<< ", left_operation=" << overwrite_plan.left_operation
693692
<< ", right_operation=" << overwrite_plan.right_operation
694693
<< ", block_size=" << overwrite_plan.block_size
695-
<< ", is_left_stable=" << overwrite_plan.is_left_stable
696-
<< ", is_right_stable=" << overwrite_plan.is_right_stable
694+
<< ", is_left_fresh=" << overwrite_plan.is_left_fresh
695+
<< ", is_right_fresh=" << overwrite_plan.is_right_fresh
697696
<< ")";
698697
}
699698

@@ -712,8 +711,10 @@ struct overwrite_plan_t {
712711
left_operation(overwrite_operation_t::UNKNOWN),
713712
right_operation(overwrite_operation_t::UNKNOWN),
714713
block_size(block_size),
715-
is_left_stable(pins.front()->is_stable()),
716-
is_right_stable(pins.back()->is_stable()) {
714+
// TODO: introduce PhysicalNodeMapping::is_fresh()
715+
// Note: fresh write can be merged with overwrite if they overlap.
716+
is_left_fresh(!pins.front()->is_stable()),
717+
is_right_fresh(!pins.back()->is_stable()) {
717718
validate();
718719
evaluate_operations();
719720
assert(left_operation != overwrite_operation_t::UNKNOWN);
@@ -742,6 +743,9 @@ struct overwrite_plan_t {
742743
* seastore_obj_data_write_amplification; otherwise, split the
743744
* original extent into at most three parts: origin-left, part-to-be-modified
744745
* and origin-right.
746+
*
747+
* TODO: seastore_obj_data_write_amplification needs to be reconsidered because
748+
* delta-based overwrite is introduced
745749
*/
746750
void evaluate_operations() {
747751
auto actual_write_size = get_pins_size();
@@ -753,7 +757,7 @@ struct overwrite_plan_t {
753757
actual_write_size -= left_ext_size;
754758
left_ext_size = 0;
755759
left_operation = overwrite_operation_t::OVERWRITE_ZERO;
756-
} else if (!is_left_stable) {
760+
} else if (is_left_fresh) {
757761
aligned_data_size += left_ext_size;
758762
left_ext_size = 0;
759763
left_operation = overwrite_operation_t::MERGE_EXISTING;
@@ -763,7 +767,7 @@ struct overwrite_plan_t {
763767
actual_write_size -= right_ext_size;
764768
right_ext_size = 0;
765769
right_operation = overwrite_operation_t::OVERWRITE_ZERO;
766-
} else if (!is_right_stable) {
770+
} else if (is_right_fresh) {
767771
aligned_data_size += right_ext_size;
768772
right_ext_size = 0;
769773
right_operation = overwrite_operation_t::MERGE_EXISTING;

src/crimson/os/seastore/transaction_manager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ class TransactionManager : public ExtentCallbackInterface {
365365
read_extent_ret<T> get_mutable_extent_by_laddr(Transaction &t, laddr_t laddr, extent_len_t len) {
366366
return get_pin(t, laddr
367367
).si_then([this, &t, len](auto pin) {
368-
ceph_assert(pin->is_stable() && !pin->is_zero_reserved());
368+
ceph_assert(pin->is_data_stable() && !pin->is_zero_reserved());
369369
ceph_assert(!pin->is_clone());
370370
ceph_assert(pin->get_length() == len);
371371
return this->read_pin<T>(t, std::move(pin));

src/test/crimson/seastore/test_object_data_handler.cc

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -800,22 +800,9 @@ TEST_P(object_data_handler_test_t, overwrite_then_read_within_transaction) {
800800
auto pin1 = remap_pin(*t, std::move(pins.front()), 4096, 8192);
801801
auto ext = get_extent(*t, base + 4096, 4096 * 2);
802802
ASSERT_TRUE(ext->is_exist_clean());
803-
ext = tm->get_mutable_extent(*t, ext)->cast<ObjectDataBlock>();
804-
805-
auto l = 4096;
806-
memset(
807-
known_contents.c_str() + base + 4096,
808-
'z',
809-
l);
810-
bufferlist bl;
811-
bl.append(
812-
bufferptr(
813-
known_contents,
814-
base + 4096,
815-
l));
816-
817-
ext->overwrite(0, bl);
803+
write(*t, base + 4096, 4096, 'y');
818804
ASSERT_TRUE(ext->is_exist_mutation_pending());
805+
write(*t, base + 8092, 4096, 'z');
819806
}
820807
submit_transaction(std::move(t));
821808
read(base + 4096, 4096);

0 commit comments

Comments
 (0)