Skip to content

Commit

Permalink
[#8417] PITR: Implement delete_snapshot_schedule
Browse files Browse the repository at this point in the history
Summary:
This diff adds the ability to delete snapshot schedules.

There is a tricky part in detecting deleted schedules on the TServer side.
Suppose at some point TSTabletManager does not know schedule1.
Since it knows schedules received by heartbeat there are 2 possible options:
1) It is a new schedule that was not yet received by heartbeat.
2) It is deleted schedule.
So to distinguish between those 2 cases it uses the following logic.
Each time the heartbeat response is received and the schedules list is updated, we also increment the `snapshot_schedules_version_` field.
All missing schedules are added to the special map, along with the current value of `snapshot_schedules_version_`.
So when we again find the schedule as missing, we could compare the current `snapshot_schedules_version_` and version that we had when the schedule was first found as missing.
So if the master does not know this schedule also it means that it is an old schedule that was deleted.

But the following could happen:
Heartbeat processed by the master, but response not yet processed by tserver. The new schedule is created and sent to the tablet.
Then the tablet receives the response to this heart, and it would not contain such a schedule.
To avoid interpreting such schedule as deleted we wait that `snapshot_schedules_version_` to be incremented twice, before marking schedule as deleted at tserver.

Test Plan: ybd --gtest_filter YbAdminSnapshotScheduleTest.Delete

Reviewers: skedia, bogdan

Reviewed By: bogdan

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D11868
  • Loading branch information
spolitov committed Jun 17, 2021
1 parent 9f97b24 commit 7453192
Show file tree
Hide file tree
Showing 20 changed files with 464 additions and 100 deletions.
4 changes: 4 additions & 0 deletions ent/src/yb/master/catalog_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ class CatalogManager : public yb::master::CatalogManager, SnapshotCoordinatorCon
ListSnapshotSchedulesResponsePB* resp,
rpc::RpcContext* rpc);

CHECKED_STATUS DeleteSnapshotSchedule(const DeleteSnapshotScheduleRequestPB* req,
DeleteSnapshotScheduleResponsePB* resp,
rpc::RpcContext* rpc);

CHECKED_STATUS ChangeEncryptionInfo(const ChangeEncryptionInfoRequestPB* req,
ChangeEncryptionInfoResponsePB* resp) override;

Expand Down
9 changes: 9 additions & 0 deletions ent/src/yb/master/catalog_manager_ent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1912,6 +1912,15 @@ Status CatalogManager::ListSnapshotSchedules(const ListSnapshotSchedulesRequestP
return snapshot_coordinator_.ListSnapshotSchedules(snapshot_schedule_id, resp);
}

Status CatalogManager::DeleteSnapshotSchedule(const DeleteSnapshotScheduleRequestPB* req,
DeleteSnapshotScheduleResponsePB* resp,
rpc::RpcContext* rpc) {
auto snapshot_schedule_id = TryFullyDecodeSnapshotScheduleId(req->snapshot_schedule_id());

return snapshot_coordinator_.DeleteSnapshotSchedule(
snapshot_schedule_id, leader_ready_term(), rpc->GetClientDeadline());
}

void CatalogManager::DumpState(std::ostream* out, bool on_disk_dump) const {
super::DumpState(out, on_disk_dump);

Expand Down
11 changes: 11 additions & 0 deletions ent/src/yb/master/master_backup.proto
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ message SnapshotScheduleOptionsPB {
optional SnapshotScheduleFilterPB filter = 1;
optional uint64 interval_sec = 2; // interval for taking snapshot in seconds.
optional uint64 retention_duration_sec = 3; // how long store snapshots in seconds.
optional fixed64 delete_time = 4; // hybrid time when this schedule was deleted.
}

message CreateSnapshotScheduleRequestPB {
Expand Down Expand Up @@ -219,6 +220,14 @@ message ListSnapshotSchedulesResponsePB {
repeated SnapshotScheduleInfoPB schedules = 2;
}

message DeleteSnapshotScheduleRequestPB {
optional bytes snapshot_schedule_id = 1;
}

message DeleteSnapshotScheduleResponsePB {
optional MasterErrorPB error = 1;
}

service MasterBackupService {
// Client->Master RPCs
rpc CreateSnapshot(CreateSnapshotRequestPB) returns (CreateSnapshotResponsePB);
Expand All @@ -232,4 +241,6 @@ service MasterBackupService {
returns (CreateSnapshotScheduleResponsePB);
rpc ListSnapshotSchedules(ListSnapshotSchedulesRequestPB)
returns (ListSnapshotSchedulesResponsePB);
rpc DeleteSnapshotSchedule(DeleteSnapshotScheduleRequestPB)
returns (DeleteSnapshotScheduleResponsePB);
}
2 changes: 1 addition & 1 deletion ent/src/yb/master/master_backup_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace master {

#define YB_MASTER_BACKUP_SERVICE_METHODS \
(CreateSnapshot)(ListSnapshots)(ListSnapshotRestorations)(RestoreSnapshot)(DeleteSnapshot) \
(ImportSnapshotMeta)(CreateSnapshotSchedule)(ListSnapshotSchedules)
(ImportSnapshotMeta)(CreateSnapshotSchedule)(ListSnapshotSchedules)(DeleteSnapshotSchedule)

#define YB_MASTER_BACKUP_SERVICE_METHOD_DECLARE(r, data, elem) \
void elem( \
Expand Down
45 changes: 27 additions & 18 deletions ent/src/yb/tools/yb-admin_cli_ent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,33 +80,33 @@ Result<T> GetOptionalArg(const Args& args, size_t idx) {
void ClusterAdminCli::RegisterCommandHandlers(ClusterAdminClientClass* client) {
super::RegisterCommandHandlers(client);

std::string options = "";
for (auto flag : kListSnapshotsFlagList) {
options += Format(" [$0]", flag);
}
Register(
"list_snapshots", " [SHOW_DETAILS] [NOT_SHOW_RESTORED] [SHOW_DELETED]",
"list_snapshots", std::move(options),
[client](const CLIArguments& args) -> Status {
bool show_details = false;
bool show_restored = true;
bool show_deleted = false;
EnumBitSet<ListSnapshotsFlag> flags;

if (args.size() > 2) {
return ClusterAdminCli::kInvalidArguments;
}
for (int i = 0; i < args.size(); ++i) {
string uppercase_flag;
std::string uppercase_flag;
ToUpperCase(args[i], &uppercase_flag);

if (uppercase_flag == "SHOW_DETAILS") {
show_details = true;
} else if (uppercase_flag == "NOT_SHOW_RESTORED") {
show_restored = false;
} else if (uppercase_flag == "SHOW_DELETED") {
show_deleted = true;
} else {
return ClusterAdminCli::kInvalidArguments;
bool found = false;
for (auto flag : kListSnapshotsFlagList) {
if (uppercase_flag == ToString(flag)) {
flags.Set(flag);
found = true;
break;
}
}
if (!found) {
return STATUS_FORMAT(InvalidArgument, "Wrong flag: $0", args[i]);
}
}

RETURN_NOT_OK_PREPEND(client->ListSnapshots(show_details, show_restored, show_deleted),
"Unable to list snapshots");
RETURN_NOT_OK_PREPEND(client->ListSnapshots(flags), "Unable to list snapshots");
return Status::OK();
});

Expand Down Expand Up @@ -163,6 +163,15 @@ void ClusterAdminCli::RegisterCommandHandlers(ClusterAdminClientClass* client) {
return client->ListSnapshotSchedules(schedule_id);
});

RegisterJson(
"delete_snapshot_schedule",
" <schedule_id>",
[client](const CLIArguments& args) -> Result<rapidjson::Document> {
RETURN_NOT_OK(CheckArgumentsCount(args.size(), 1, 1));
auto schedule_id = VERIFY_RESULT(SnapshotScheduleId::FromString(args[0]));
return client->DeleteSnapshotSchedule(schedule_id);
});

RegisterJson(
"restore_snapshot_schedule",
Format(" <schedule_id> (<timestamp> | $0 <interval>)", kMinus),
Expand Down
7 changes: 6 additions & 1 deletion ent/src/yb/tools/yb-admin_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ namespace yb {
namespace tools {
namespace enterprise {

// Flags for list_snapshot command.
YB_DEFINE_ENUM(ListSnapshotsFlag, (SHOW_DETAILS)(NOT_SHOW_RESTORED)(SHOW_DELETED)(JSON));
using ListSnapshotsFlags = EnumBitSet<ListSnapshotsFlag>;

class ClusterAdminClient : public yb::tools::ClusterAdminClient {
typedef yb::tools::ClusterAdminClient super;
public:
Expand All @@ -36,7 +40,7 @@ class ClusterAdminClient : public yb::tools::ClusterAdminClient {
: super(init_master_addrs, timeout) {}

// Snapshot operations.
CHECKED_STATUS ListSnapshots(bool show_details, bool show_restored, bool show_deleted);
CHECKED_STATUS ListSnapshots(const ListSnapshotsFlags& flags);
CHECKED_STATUS CreateSnapshot(const std::vector<client::YBTableName>& tables,
const bool add_indexes = true,
const int flush_timeout_secs = 0);
Expand All @@ -46,6 +50,7 @@ class ClusterAdminClient : public yb::tools::ClusterAdminClient {
Result<rapidjson::Document> CreateSnapshotSchedule(const std::vector<client::YBTableName>& tables,
MonoDelta interval, MonoDelta retention);
Result<rapidjson::Document> ListSnapshotSchedules(const SnapshotScheduleId& schedule_id);
Result<rapidjson::Document> DeleteSnapshotSchedule(const SnapshotScheduleId& schedule_id);
Result<rapidjson::Document> RestoreSnapshotSchedule(
const SnapshotScheduleId& schedule_id, HybridTime restore_at);
CHECKED_STATUS RestoreSnapshot(const std::string& snapshot_id,
Expand Down
80 changes: 70 additions & 10 deletions ent/src/yb/tools/yb-admin_client_ent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "yb/cdc/cdc_service.proxy.h"
#include "yb/client/client.h"
#include "yb/common/entity_ids.h"
#include "yb/common/json_util.h"
#include "yb/common/wire_protocol.h"
#include "yb/gutil/strings/util.h"
#include "yb/master/master_defaults.h"
Expand Down Expand Up @@ -91,11 +92,13 @@ using master::SysSnapshotEntryPB;

PB_ENUM_FORMATTERS(yb::master::SysSnapshotEntryPB::State);

Status ClusterAdminClient::ListSnapshots(bool show_details, bool show_restored, bool show_deleted) {
Status ClusterAdminClient::ListSnapshots(const ListSnapshotsFlags& flags) {
rapidjson::Document document(rapidjson::kObjectType);
bool json = flags.Test(ListSnapshotsFlag::JSON);
RpcController rpc;
rpc.set_timeout(timeout_);
ListSnapshotsRequestPB req;
req.set_list_deleted_snapshots(show_deleted);
req.set_list_deleted_snapshots(flags.Test(ListSnapshotsFlag::SHOW_DELETED));
ListSnapshotsResponsePB resp;
RETURN_NOT_OK(master_backup_proxy_->ListSnapshots(req, &resp, &rpc));

Expand All @@ -104,19 +107,41 @@ Status ClusterAdminClient::ListSnapshots(bool show_details, bool show_restored,
}

if (resp.has_current_snapshot_id()) {
cout << "Current snapshot id: " << SnapshotIdToString(resp.current_snapshot_id()) << endl;
if (json) {
AddStringField("current_snapshot_id",
SnapshotIdToString(resp.current_snapshot_id()),
&document, &document.GetAllocator());

} else {
cout << "Current snapshot id: " << SnapshotIdToString(resp.current_snapshot_id()) << endl;
}
}

if (resp.snapshots_size()) {
cout << RightPadToUuidWidth("Snapshot UUID") << kColumnSep << "State" << endl;
rapidjson::Value json_snapshots(rapidjson::kArrayType);
if (json) {
document.AddMember("snapshots", json_snapshots, document.GetAllocator());
} else {
cout << "No snapshots" << endl;
if (resp.snapshots_size()) {
cout << RightPadToUuidWidth("Snapshot UUID") << kColumnSep << "State" << endl;
} else {
cout << "No snapshots" << endl;
}
}

for (SnapshotInfoPB& snapshot : *resp.mutable_snapshots()) {
cout << SnapshotIdToString(snapshot.id()) << kColumnSep << snapshot.entry().state() << endl;
rapidjson::Value json_snapshot(rapidjson::kObjectType);
if (json) {
AddStringField(
"id", SnapshotIdToString(snapshot.id()), &json_snapshot, &document.GetAllocator());
AddStringField(
"state", SysSnapshotEntryPB::State_Name(snapshot.entry().state()), &json_snapshot,
&document.GetAllocator());
} else {
cout << SnapshotIdToString(snapshot.id()) << kColumnSep << snapshot.entry().state() << endl;
}

if (show_details) {
// Not implemented in json mode.
if (flags.Test(ListSnapshotsFlag::SHOW_DETAILS)) {
for (SysRowEntry& entry : *snapshot.mutable_entry()->mutable_entries()) {
string decoded_data;
switch (entry.type()) {
Expand Down Expand Up @@ -145,6 +170,9 @@ Status ClusterAdminClient::ListSnapshots(bool show_details, bool show_restored,
}
}
}
if (json) {
json_snapshots.PushBack(json_snapshot, document.GetAllocator());
}
}

rpc.Reset();
Expand All @@ -153,15 +181,21 @@ Status ClusterAdminClient::ListSnapshots(bool show_details, bool show_restored,
ListSnapshotRestorationsResponsePB rest_resp;
RETURN_NOT_OK(master_backup_proxy_->ListSnapshotRestorations(rest_req, &rest_resp, &rpc));

if (json) {
std::cout << common::PrettyWriteRapidJsonToString(document) << std::endl;
return Status::OK();
}

if (rest_resp.restorations_size() == 0) {
cout << "No snapshot restorations" << endl;
} else if (!show_restored) {
} else if (flags.Test(ListSnapshotsFlag::NOT_SHOW_RESTORED)) {
cout << "Not show fully RESTORED entries" << endl;
}

bool title_printed = false;
for (const auto& restoration : rest_resp.restorations()) {
if (show_restored || restoration.entry().state() != SysSnapshotEntryPB::RESTORED) {
if (!flags.Test(ListSnapshotsFlag::NOT_SHOW_RESTORED) ||
restoration.entry().state() != SysSnapshotEntryPB::RESTORED) {
if (!title_printed) {
cout << RightPadToUuidWidth("Restoration UUID") << kColumnSep << "State" << endl;
title_printed = true;
Expand Down Expand Up @@ -356,6 +390,12 @@ Result<rapidjson::Document> ClusterAdminClient::ListSnapshotSchedules(
AddStringField("retention",
MonoDelta::FromSeconds(schedule.options().retention_duration_sec()).ToString(),
&options, &result.GetAllocator());
auto delete_time = HybridTime::FromPB(schedule.options().delete_time());
if (delete_time) {
AddStringField("delete_time", HybridTimeToString(delete_time), &options,
&result.GetAllocator());
}

json_schedule.AddMember("options", options, result.GetAllocator());
rapidjson::Value json_snapshots(rapidjson::kArrayType);
for (const auto& snapshot : schedule.snapshots()) {
Expand Down Expand Up @@ -383,6 +423,26 @@ Result<rapidjson::Document> ClusterAdminClient::ListSnapshotSchedules(
return result;
}

Result<rapidjson::Document> ClusterAdminClient::DeleteSnapshotSchedule(
const SnapshotScheduleId& schedule_id) {
RpcController rpc;
rpc.set_timeout(timeout_);
master::DeleteSnapshotScheduleRequestPB req;
master::DeleteSnapshotScheduleResponsePB resp;
req.set_snapshot_schedule_id(schedule_id.data(), schedule_id.size());

RETURN_NOT_OK(master_backup_proxy_->DeleteSnapshotSchedule(req, &resp, &rpc));

if (resp.has_error()) {
return StatusFromPB(resp.error().status());
}

rapidjson::Document document;
document.SetObject();
AddStringField("schedule_id", schedule_id.ToString(), &document, &document.GetAllocator());
return document;
}

bool SnapshotSuitableForRestoreAt(const SysSnapshotEntryPB& entry, HybridTime restore_at) {
return HybridTime::FromPB(entry.snapshot_hybrid_time()) >= restore_at &&
HybridTime::FromPB(entry.previous_snapshot_hybrid_time()) < restore_at;
Expand Down
Loading

0 comments on commit 7453192

Please sign in to comment.