Skip to content

Commit

Permalink
[Backport 2.6] [#7126] PITR: Cleanup deleted tablets
Browse files Browse the repository at this point in the history
Summary:
If table is participated in snapshot schedule, it's tablets are not being deleted immediately when the table is deleted.
Because those tablets could be restored by user request, we instead just mark them as hidden.

This diff adds logic to cleanup such tablets when there is no schedule that could be used to restore those tablets. This means both
- this tablet is still covered by some schedule's filter, but it is no longer in the retention interval for any of them
- this tablet is not covered by any schedule's filter anymore, as they've all been deleted

Also fixed a bug with `SnapshotState::TryStartDelete` when the snapshot did not have any tablets.

Original commit: D11489/4e9665ad7ee022ef0d118940a1086aac5ffd1110

Test Plan:
ybd --gtest_filter YbAdminSnapshotScheduleTest.CleanupDeletedTablets
Jenkins: rebase: 2.6

Reviewers: bogdan

Reviewed By: bogdan

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D11598
  • Loading branch information
spolitov committed May 14, 2021
1 parent db87adc commit ff95b71
Show file tree
Hide file tree
Showing 21 changed files with 332 additions and 90 deletions.
2 changes: 2 additions & 0 deletions ent/src/yb/master/catalog_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@ class CatalogManager : public yb::master::CatalogManager, SnapshotCoordinatorCon
CHECKED_STATUS RestoreSysCatalog(SnapshotScheduleRestoration* restoration) override;
CHECKED_STATUS VerifyRestoredObjects(const SnapshotScheduleRestoration& restoration) override;

void CleanupHiddenTablets(const ScheduleMinRestoreTime& schedule_min_restore_time) override;

rpc::Scheduler& Scheduler() override;

int64_t LeaderTerm() override;
Expand Down
64 changes: 63 additions & 1 deletion ent/src/yb/master/catalog_manager_ent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1505,12 +1505,74 @@ Status CatalogManager::VerifyRestoredObjects(const SnapshotScheduleRestoration&
}
for (const auto& id_and_type : objects_to_restore) {
return STATUS_FORMAT(
IllegalState, "Expected to restore $0/$1, but it does not present after restoration",
IllegalState, "Expected to restore $0/$1, but it is not present after restoration",
SysRowEntry::Type_Name(id_and_type.second), id_and_type.first);
}
return Status::OK();
}

void CatalogManager::CleanupHiddenTablets(const ScheduleMinRestoreTime& schedule_min_restore_time) {
VLOG_WITH_PREFIX_AND_FUNC(4) << AsString(schedule_min_restore_time);

std::vector<TabletInfoPtr> hidden_tablets;
{
SharedLock<LockType> l(lock_);
if (hidden_tablets_.empty()) {
return;
}
hidden_tablets = hidden_tablets_;
}
std::vector<TabletInfoPtr> tablets_to_delete;
std::vector<TabletInfoPtr> tablets_to_remove_from_hidden;

for (const auto& tablet : hidden_tablets) {
auto lock = tablet->LockForRead();
if (!lock->ListedAsHidden()) {
tablets_to_remove_from_hidden.push_back(tablet);
continue;
}
auto hide_hybrid_time = HybridTime::FromPB(lock->pb.hide_hybrid_time());
bool cleanup = true;
for (const auto& schedule_id_str : lock->pb.retained_by_snapshot_schedules()) {
auto schedule_id = TryFullyDecodeSnapshotScheduleId(schedule_id_str);
auto it = schedule_min_restore_time.find(schedule_id);
// If schedule is not present in schedule_min_restore_time then it means that schedule
// was deleted, so it should not retain the tablet.
if (it != schedule_min_restore_time.end() && it->second <= hide_hybrid_time) {
VLOG_WITH_PREFIX(1)
<< "Retaining tablet: " << tablet->tablet_id() << ", hide hybrid time: "
<< hide_hybrid_time << ", because of schedule: " << schedule_id
<< ", min restore time: " << it->second;
cleanup = false;
break;
}
}
if (cleanup) {
tablets_to_delete.push_back(tablet);
}
}
if (!tablets_to_delete.empty()) {
LOG_WITH_PREFIX(INFO) << "Cleanup hidden tablets: " << AsString(tablets_to_delete);
WARN_NOT_OK(DeleteTabletListAndSendRequests(tablets_to_delete, "Cleanup hidden tablets", {}),
"Failed to cleanup hidden tablets");
}
if (!tablets_to_remove_from_hidden.empty()) {
auto it = tablets_to_remove_from_hidden.begin();
std::lock_guard<LockType> l(lock_);
// Order of tablets in tablets_to_remove_from_hidden matches order in hidden_tablets_,
// so we could avoid searching in tablets_to_remove_from_hidden.
auto filter = [&it, end = tablets_to_remove_from_hidden.end()](const TabletInfoPtr& tablet) {
if (it != end && tablet.get() == it->get()) {
++it;
return true;
}
return false;
};
hidden_tablets_.erase(std::remove_if(hidden_tablets_.begin(), hidden_tablets_.end(), filter),
hidden_tablets_.end());
}
}

rpc::Scheduler& CatalogManager::Scheduler() {
return master_->messenger()->scheduler();
}
Expand Down
12 changes: 4 additions & 8 deletions src/yb/integration-tests/external_mini_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -551,16 +551,12 @@ Status ExternalMiniCluster::RemoveMaster(ExternalMaster* master) {
}

std::shared_ptr<ConsensusServiceProxy> ExternalMiniCluster::GetLeaderConsensusProxy() {
auto leader_master_sock = GetLeaderMaster()->bound_rpc_addr();

return std::make_shared<ConsensusServiceProxy>(proxy_cache_.get(), leader_master_sock);
return GetConsensusProxy(GetLeaderMaster());
}

std::shared_ptr<ConsensusServiceProxy> ExternalMiniCluster::GetConsensusProxy(
scoped_refptr<ExternalMaster> master) {
auto master_sock = master->bound_rpc_addr();

return std::make_shared<ConsensusServiceProxy>(proxy_cache_.get(), master_sock);
ExternalDaemon* external_deamon) {
return GetProxy<ConsensusServiceProxy>(external_deamon);
}

Status ExternalMiniCluster::StepDownMasterLeader(TabletServerErrorPB::Code* error_code) {
Expand Down Expand Up @@ -997,7 +993,7 @@ Status ExternalMiniCluster::GetLastOpIdForEachMasterPeer(
opid_req.set_dest_uuid(master->uuid());
opid_req.set_opid_type(opid_type);
RETURN_NOT_OK_PREPEND(
GetConsensusProxy(master)->GetLastOpId(opid_req, &opid_resp, &controller),
GetConsensusProxy(master.get())->GetLastOpId(opid_req, &opid_resp, &controller),
Substitute("Failed to fetch last op id from $0", master->bound_rpc_hostport().port()));
op_ids->push_back(opid_resp.opid());
controller.Reset();
Expand Down
21 changes: 19 additions & 2 deletions src/yb/integration-tests/external_mini_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,20 @@ class ExternalMiniCluster : public MiniClusterBase {
std::shared_ptr<consensus::ConsensusServiceProxy> GetLeaderConsensusProxy();

// Get the given master's consensus proxy.
std::shared_ptr<consensus::ConsensusServiceProxy> GetConsensusProxy(
scoped_refptr<ExternalMaster> master);
std::shared_ptr<consensus::ConsensusServiceProxy> GetConsensusProxy(ExternalDaemon* daemon);

template <class T>
std::shared_ptr<T> GetProxy(ExternalDaemon* daemon);

template <class T>
std::shared_ptr<T> GetTServerProxy(int i) {
return GetProxy<T>(tablet_server(i));
}

template <class T>
std::shared_ptr<T> GetMasterProxy(int i) {
return GetProxy<T>(master(i));
}

// If the cluster is configured for a single non-distributed master, return a proxy to that
// master. Requires that the single master is running.
Expand Down Expand Up @@ -803,5 +815,10 @@ struct MasterComparator {
const ExternalMaster* master_;
};

template <class T>
std::shared_ptr<T> ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) {
return std::make_shared<T>(proxy_cache_.get(), daemon->bound_rpc_addr());
}

} // namespace yb
#endif // YB_INTEGRATION_TESTS_EXTERNAL_MINI_CLUSTER_H_
6 changes: 4 additions & 2 deletions src/yb/integration-tests/mini_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -643,11 +643,13 @@ std::vector<tablet::TabletPeerPtr> ListTabletPeers(MiniCluster* cluster, ListPee
return ListTabletPeers(cluster, [](const auto& peer) { return true; });
case ListPeersFilter::kLeaders:
return ListTabletPeers(cluster, [](const auto& peer) {
return peer->consensus()->GetLeaderStatus() != consensus::LeaderStatus::NOT_LEADER;
auto consensus = peer->shared_consensus();
return consensus && consensus->GetLeaderStatus() != consensus::LeaderStatus::NOT_LEADER;
});
case ListPeersFilter::kNonLeaders:
return ListTabletPeers(cluster, [](const auto& peer) {
return peer->consensus()->GetLeaderStatus() == consensus::LeaderStatus::NOT_LEADER;
auto consensus = peer->shared_consensus();
return consensus && consensus->GetLeaderStatus() == consensus::LeaderStatus::NOT_LEADER;
});
}

Expand Down
2 changes: 1 addition & 1 deletion src/yb/master/catalog_entity_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ bool TableInfo::AreAllTabletsDeleted() const {
shared_lock<decltype(lock_)> l(lock_);
for (const TableInfo::TabletInfoMap::value_type& e : tablet_map_) {
auto tablet_lock = e.second->LockForRead();
if (!tablet_lock->is_deleted() && !tablet_lock->pb.hidden()) {
if (!tablet_lock->is_deleted() && !tablet_lock->is_hidden()) {
return false;
}
}
Expand Down
9 changes: 9 additions & 0 deletions src/yb/master/catalog_entity_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,15 @@ struct PersistentTabletInfo : public Persistent<SysTabletsEntryPB, SysRowEntry::
pb.state() == SysTabletsEntryPB::DELETED;
}

bool is_hidden() const {
return pb.hide_hybrid_time() != 0;
}

bool ListedAsHidden() const {
// Tablet was hidden, but not yet deleted (to avoid resending delete for it).
return is_hidden() && !is_deleted();
}

bool is_colocated() const {
return pb.colocated();
}
Expand Down
6 changes: 6 additions & 0 deletions src/yb/master/catalog_loaders.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ Status TabletLoader::Visit(const TabletId& tablet_id, const SysTabletsEntryPB& m
// Setup the tablet info.
std::vector<TableId> table_ids;
bool tablet_deleted;
bool listed_as_hidden;
TabletInfo* tablet = new TabletInfo(first_table, tablet_id);
{
auto l = tablet->LockForWrite();
Expand Down Expand Up @@ -148,6 +149,7 @@ Status TabletLoader::Visit(const TabletId& tablet_id, const SysTabletsEntryPB& m
}

tablet_deleted = l.mutable_data()->is_deleted();
listed_as_hidden = l.mutable_data()->ListedAsHidden();

// Assume we need to delete this tablet until we find an active table using this tablet.
bool should_delete_tablet = !tablet_deleted;
Expand Down Expand Up @@ -236,6 +238,10 @@ Status TabletLoader::Visit(const TabletId& tablet_id, const SysTabletsEntryPB& m

VLOG(1) << "Metadata for tablet " << tablet_id << ": " << metadata.ShortDebugString();

if (listed_as_hidden) {
catalog_manager_->hidden_tablets_.push_back(tablet);
}

return Status::OK();
}

Expand Down
40 changes: 29 additions & 11 deletions src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2171,7 +2171,7 @@ Status CatalogManager::DeleteTablets(const std::vector<TabletId>& tablet_ids) {
}

return DeleteTabletListAndSendRequests(
tablet_infos, "Tablet deleted upon request at " + LocalTimeAsString(), HideOnly::kFalse);
tablet_infos, "Tablet deleted upon request at " + LocalTimeAsString(), {});
}

Status CatalogManager::DeleteTablet(
Expand Down Expand Up @@ -3924,16 +3924,16 @@ Status CatalogManager::DeleteTableInternal(
}

for (const auto& table : tables) {
HideOnly hide_only = HideOnly::kFalse;
RepeatedBytes retained_by_snapshot_schedules;
for (const auto& entry : schedules_to_tables_map) {
if (std::binary_search(entry.second.begin(), entry.second.end(), table->id())) {
hide_only = HideOnly::kTrue;
break;
retained_by_snapshot_schedules.Add()->assign(
entry.first.AsSlice().cdata(), entry.first.size());
}
}

// Send a DeleteTablet() request to each tablet replica in the table.
RETURN_NOT_OK(DeleteTabletsAndSendRequests(table, hide_only));
RETURN_NOT_OK(DeleteTabletsAndSendRequests(table, retained_by_snapshot_schedules));
// Send a RemoveTableFromTablet() request to each colocated parent tablet replica in the table.
// TODO(pitr) handle YSQL colocated tables.
if (IsColocatedUserTable(*table)) {
Expand Down Expand Up @@ -6364,7 +6364,7 @@ Status CatalogManager::DeleteYsqlDBTables(const scoped_refptr<NamespaceInfo>& da
for (auto &table_and_lock : tables) {
auto &table = table_and_lock.first;
// TODO(pitr) undelete for YSQL tables
RETURN_NOT_OK(DeleteTabletsAndSendRequests(table, HideOnly::kFalse));
RETURN_NOT_OK(DeleteTabletsAndSendRequests(table, {}));
}

// Invoke any background tasks and return (notably, table cleanup).
Expand Down Expand Up @@ -7329,7 +7329,8 @@ Status CatalogManager::CheckIfForbiddenToDeleteTabletOf(const scoped_refptr<Tabl
return Status::OK();
}

Status CatalogManager::DeleteTabletsAndSendRequests(const TableInfoPtr& table, HideOnly hide_only) {
Status CatalogManager::DeleteTabletsAndSendRequests(
const TableInfoPtr& table, const RepeatedBytes& retained_by_snapshot_schedules) {
// Silently fail if tablet deletion is forbidden so table deletion can continue executing.
if (!CheckIfForbiddenToDeleteTabletOf(table).ok()) {
return Status::OK();
Expand All @@ -7343,7 +7344,8 @@ Status CatalogManager::DeleteTabletsAndSendRequests(const TableInfoPtr& table, H
});

string deletion_msg = "Table deleted at " + LocalTimeAsString();
RETURN_NOT_OK(DeleteTabletListAndSendRequests(tablets, deletion_msg, hide_only));
RETURN_NOT_OK(DeleteTabletListAndSendRequests(
tablets, deletion_msg, retained_by_snapshot_schedules));

if (IsColocatedParentTable(*table)) {
SharedLock<LockType> catalog_lock(lock_);
Expand All @@ -7361,7 +7363,7 @@ Status CatalogManager::DeleteTabletsAndSendRequests(const TableInfoPtr& table, H

Status CatalogManager::DeleteTabletListAndSendRequests(
const std::vector<scoped_refptr<TabletInfo>>& tablets, const std::string& deletion_msg,
HideOnly hide_only) {
const google::protobuf::RepeatedPtrField<std::string>& retained_by_snapshot_schedules) {
struct TabletAndLock {
TabletInfoPtr tablet;
TabletInfo::WriteLock lock;
Expand All @@ -7370,6 +7372,7 @@ Status CatalogManager::DeleteTabletListAndSendRequests(
tablets_and_locks.reserve(tablets.size());
std::vector<TabletInfo*> tablet_infos;
tablet_infos.reserve(tablets_and_locks.size());
std::vector<TabletInfoPtr> marked_as_hidden;

// Grab tablets and tablet write locks. The list should already be in tablet_id sorted order.
for (const auto& tablet : tablets) {
Expand All @@ -7380,18 +7383,27 @@ Status CatalogManager::DeleteTabletListAndSendRequests(
tablet_infos.emplace_back(tablet.get());
}

HideOnly hide_only(!retained_by_snapshot_schedules.empty());
HybridTime hide_hybrid_time = hide_only ? master_->clock()->Now() : HybridTime();

// Mark the tablets as deleted.
for (auto& tablet_and_lock : tablets_and_locks) {
auto& tablet = tablet_and_lock.tablet;
auto& tablet_lock = tablet_and_lock.lock;

bool was_hidden = tablet_lock->ListedAsHidden();
if (hide_only) {
LOG(INFO) << "Hiding tablet " << tablet->tablet_id() << " ...";
tablet_lock.mutable_data()->pb.set_hidden(true);
tablet_lock.mutable_data()->pb.set_hide_hybrid_time(hide_hybrid_time.ToUint64());
*tablet_lock.mutable_data()->pb.mutable_retained_by_snapshot_schedules() =
retained_by_snapshot_schedules;
} else {
LOG(INFO) << "Deleting tablet " << tablet->tablet_id() << " ...";
tablet_lock.mutable_data()->set_state(SysTabletsEntryPB::DELETED, deletion_msg);
}
if (tablet_lock->ListedAsHidden() && !was_hidden) {
marked_as_hidden.push_back(tablet);
}
DeleteTabletReplicas(tablet.get(), deletion_msg, hide_only);
}

Expand All @@ -7406,6 +7418,12 @@ Status CatalogManager::DeleteTabletListAndSendRequests(
tablet_lock.Commit();
LOG(INFO) << (hide_only ? "Hid tablet " : "Deleted tablet ") << tablet->tablet_id();
}

if (!marked_as_hidden.empty()) {
std::lock_guard<LockType> lock(lock_);
hidden_tablets_.insert(hidden_tablets_.end(), marked_as_hidden.begin(), marked_as_hidden.end());
}

return Status::OK();
}

Expand Down Expand Up @@ -8230,7 +8248,7 @@ Status CatalogManager::BuildLocationsForTablet(const scoped_refptr<TabletInfo>&
TabletLocationsPB* locs_pb) {
{
auto l_tablet = tablet->LockForRead();
if (l_tablet->pb.hidden()) {
if (l_tablet->is_hidden()) {
return STATUS_FORMAT(NotFound, "Tablet hidden", tablet->id());
}
locs_pb->set_table_id(l_tablet->pb.table_id());
Expand Down
9 changes: 7 additions & 2 deletions src/yb/master/catalog_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
#include "yb/util/monotime.h"
#include "yb/util/net/net_util.h"
#include "yb/util/oid_generator.h"
#include "yb/util/pb_util.h"
#include "yb/util/promise.h"
#include "yb/util/random.h"
#include "yb/util/rw_mutex.h"
Expand Down Expand Up @@ -1109,12 +1110,13 @@ class CatalogManager : public tserver::TabletPeerLookupIf {

// Marks each of the tablets in the given table as deleted and triggers requests to the tablet
// servers to delete them. The table parameter is expected to be given "write locked".
CHECKED_STATUS DeleteTabletsAndSendRequests(const TableInfoPtr& table, HideOnly hide_only);
CHECKED_STATUS DeleteTabletsAndSendRequests(
const TableInfoPtr& table, const RepeatedBytes& retained_by_snapshot_schedules);

// Marks each tablet as deleted and triggers requests to the tablet servers to delete them.
CHECKED_STATUS DeleteTabletListAndSendRequests(
const std::vector<scoped_refptr<TabletInfo>>& tablets, const std::string& deletion_msg,
HideOnly hide_only);
const RepeatedBytes& retained_by_snapshot_schedules);

// Send the "delete tablet request" to the specified TS/tablet.
// The specified 'reason' will be logged on the TS.
Expand Down Expand Up @@ -1299,6 +1301,9 @@ class CatalogManager : public tserver::TabletPeerLookupIf {
// Tablet maps: tablet-id -> TabletInfo
VersionTracker<TabletInfoMap> tablet_map_ GUARDED_BY(lock_);

// Tablets that was hidden instead of deleting, used to cleanup such tablets when time comes.
std::vector<TabletInfoPtr> hidden_tablets_ GUARDED_BY(lock_);

// Namespace maps: namespace-id -> NamespaceInfo and namespace-name -> NamespaceInfo
NamespaceInfoMap namespace_ids_map_ GUARDED_BY(lock_);
NamespaceNameMapper namespace_names_mapper_ GUARDED_BY(lock_);
Expand Down
9 changes: 8 additions & 1 deletion src/yb/master/master.proto
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ message SysRowEntries {
// The on-disk entry in the sys.catalog table ("metadata" column) for
// tablets entries.
message SysTabletsEntryPB {
reserved 15;

enum State {
UNKNOWN = 999;
PREPARING = 0;
Expand Down Expand Up @@ -333,7 +335,12 @@ message SysTabletsEntryPB {
// Tablet IDs for this tablet split if they have been registered in master.
repeated bytes split_tablet_ids = 14;

optional bool hidden = 15;
// Time when tablet was hidden.
optional fixed64 hide_hybrid_time = 16;

// If tablet was hidden instead of deleting, here we keep list of schedule ids that prevented
// actual deletion.
repeated bytes retained_by_snapshot_schedules = 17;
}

// The on-disk entry in the sys.catalog table ("metadata" column) for
Expand Down
Loading

0 comments on commit ff95b71

Please sign in to comment.