Skip to content

Commit

Permalink
[#23399] DocDB: Fix StopActiveTxnsPriorTo from using local variables …
Browse files Browse the repository at this point in the history
…in the call back

Summary:
use a shared_ptr instead of using variables on the stack, to handle the case where the callback may run after the function has exited (due to timeout)
Jira: DB-12318

Test Plan: yb_build.sh --cxx-test yb-admin-snapshot-schedule-test --gtest_filter YbAdminSnapshotScheduleTestWithYsql.TransactionDuringPITRRepro23399

Reviewers: asrivastava

Reviewed By: asrivastava

Subscribers: ybase

Differential Revision: https://phorge.dev.yugabyte.com/D37883
  • Loading branch information
amitanandaiyer committed Sep 12, 2024
1 parent d053e45 commit da4da45
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 26 deletions.
53 changes: 28 additions & 25 deletions src/yb/tablet/transaction_participant.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ DEFINE_RUNTIME_AUTO_bool(cdc_write_post_apply_metadata, kLocalPersisted, false,
DEFINE_RUNTIME_bool(cdc_immediate_transaction_cleanup, true,
"Clean up transactions from memory after apply, even if its changes have not yet been "
"streamed by CDC.");
DEFINE_test_flag(int32, stopactivetxns_sleep_in_abort_cb_ms, 0,
"Delays the abort callback in StopActiveTxns to repro GitHub #23399.");

DECLARE_int64(transaction_abort_check_timeout_ms);

Expand Down Expand Up @@ -1305,33 +1307,34 @@ class TransactionParticipant::Impl

// It is ok to attempt to abort txns that have committed. We don't care
// if our request succeeds or not.
CountDownLatch latch(ids_to_abort.size());
std::atomic<bool> failed{false};
Status return_status = Status::OK();
struct CallbackInfo {
CountDownLatch latch{0};
StatusHolder return_status;
};
auto cb_info = std::make_shared<CallbackInfo>();
cb_info->latch.Reset(ids_to_abort.size());
for (const auto& id : ids_to_abort) {
Abort(
id, [this, id, &failed, &return_status, &latch](Result<TransactionStatusResult> result) {
VLOG_WITH_PREFIX(2) << "Aborting " << id << " got " << result;
if (!result ||
(result->status != TransactionStatus::COMMITTED && result->status != ABORTED)) {
LOG(INFO) << "Could not abort " << id << " got " << result;

bool expected = false;
if (failed.compare_exchange_strong(expected, true)) {
if (!result) {
return_status = result.status();
} else {
return_status =
STATUS_FORMAT(IllegalState, "Wrong status after abort: $0", result->status);
}
}
}
latch.CountDown();
});
// Do not pass anything by reference, as the callback can outlive this function.
Abort(id, [this, id, cb_info](Result<TransactionStatusResult> result) {
VLOG_WITH_PREFIX(2) << "Aborting " << id << " got " << result;
if (FLAGS_TEST_stopactivetxns_sleep_in_abort_cb_ms > 0) {
VLOG(2) << "Sleeping for " << FLAGS_TEST_stopactivetxns_sleep_in_abort_cb_ms << "ms.";
SleepFor(MonoDelta::FromMilliseconds(FLAGS_TEST_stopactivetxns_sleep_in_abort_cb_ms));
}
if (!result ||
(result->status != TransactionStatus::COMMITTED && result->status != ABORTED)) {
LOG(INFO) << "Could not abort " << id << " got " << result;
cb_info->return_status.SetError(
!result
? result.status()
: STATUS_FORMAT(IllegalState, "Wrong status after abort: $0", result->status));
}
cb_info->latch.CountDown();
});
}

return latch.WaitUntil(deadline) ? return_status
: STATUS(TimedOut, "TimedOut while aborting old transactions");
return cb_info->latch.WaitUntil(deadline)
? cb_info->return_status.GetStatus()
: STATUS(TimedOut, "TimedOut while aborting old transactions");
}

Result<HybridTime> WaitForSafeTime(HybridTime safe_time, CoarseTimePoint deadline) {
Expand Down
22 changes: 21 additions & 1 deletion src/yb/tools/yb-admin-snapshot-schedule-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ class YbAdminSnapshotScheduleTestWithYsql : public YbAdminSnapshotScheduleTest {
}

void TestPgsqlDropDefault();
void TestTransactionDuringPITR();
};

class YbAdminSnapshotScheduleTestWithYsqlAndPackedRow : public YbAdminSnapshotScheduleTestWithYsql {
Expand Down Expand Up @@ -5418,7 +5419,7 @@ TEST_F_EX(YbAdminSnapshotScheduleTest, CreateDuplicateSchedules,
"already exists for the given keyspace ysql." + kTableName.namespace_name());
}

TEST_F(YbAdminSnapshotScheduleTestWithYsql, TransactionDuringPITR) {
void YbAdminSnapshotScheduleTestWithYsql::TestTransactionDuringPITR() {
ASSERT_OK(PrepareCommon());
std::string db_name = "test_db_name";
std::string table_name = "test_table";
Expand All @@ -5441,6 +5442,25 @@ TEST_F(YbAdminSnapshotScheduleTestWithYsql, TransactionDuringPITR) {
ASSERT_EQ(row_value, 3);
}

TEST_F(YbAdminSnapshotScheduleTestWithYsql, TransactionDuringPITR) {
TestTransactionDuringPITR();
}

class YbAdminSnapshotScheduleTestWithYsqlRepro23399 : public YbAdminSnapshotScheduleTestWithYsql {
public:
std::vector<std::string> ExtraTSFlags() override {
return {
"--TEST_stopactivetxns_sleep_in_abort_cb_ms=500"
};
}
};

TEST_F_EX(
YbAdminSnapshotScheduleTestWithYsql, TransactionDuringPITRRepro23399,
YbAdminSnapshotScheduleTestWithYsqlRepro23399) {
TestTransactionDuringPITR();
}

class YbAdminSnapshotScheduleTestWithYsqlTransactionalDDL
: public YbAdminSnapshotScheduleTestWithYsql {
public:
Expand Down

0 comments on commit da4da45

Please sign in to comment.