diff --git a/ent/src/yb/master/async_snapshot_tasks.cc b/ent/src/yb/master/async_snapshot_tasks.cc index f753ab0028a9..bdebe744bd94 100644 --- a/ent/src/yb/master/async_snapshot_tasks.cc +++ b/ent/src/yb/master/async_snapshot_tasks.cc @@ -37,6 +37,15 @@ using tserver::TabletServerErrorPB; // AsyncTabletSnapshotOp //////////////////////////////////////////////////////////// +namespace { + +std::string SnapshotIdToString(const std::string& snapshot_id) { + auto uuid = TryFullyDecodeTxnSnapshotId(snapshot_id); + return uuid.IsNil() ? snapshot_id : uuid.ToString(); +} + +} + AsyncTabletSnapshotOp::AsyncTabletSnapshotOp(Master *master, ThreadPool* callback_pool, const scoped_refptr& tablet, @@ -52,8 +61,9 @@ AsyncTabletSnapshotOp::AsyncTabletSnapshotOp(Master *master, } string AsyncTabletSnapshotOp::description() const { - return Format("$0 Tablet Snapshot Operation $1 RPC", - *tablet_, tserver::TabletSnapshotOpRequestPB::Operation_Name(operation_)); + return Format("$0 Tablet Snapshot Operation $1 RPC $2", + *tablet_, tserver::TabletSnapshotOpRequestPB::Operation_Name(operation_), + SnapshotIdToString(snapshot_id_)); } TabletId AsyncTabletSnapshotOp::tablet_id() const { @@ -64,43 +74,36 @@ TabletServerId AsyncTabletSnapshotOp::permanent_uuid() const { return target_ts_desc_ != nullptr ? target_ts_desc_->permanent_uuid() : ""; } +bool AsyncTabletSnapshotOp::RetryAllowed(TabletServerErrorPB::Code code, const Status& status) { + switch (code) { + case TabletServerErrorPB::TABLET_NOT_FOUND: + return false; + case TabletServerErrorPB::INVALID_SNAPSHOT: + return operation_ != tserver::TabletSnapshotOpRequestPB::RESTORE_ON_TABLET; + default: + return TransactionError(status) != TransactionErrorCode::kSnapshotTooOld; + } +} + void AsyncTabletSnapshotOp::HandleResponse(int attempt) { server::UpdateClock(resp_, master_->clock()); if (resp_.has_error()) { Status status = StatusFromPB(resp_.error().status()); - // Do not retry on a fatal error. - switch (resp_.error().code()) { - case TabletServerErrorPB::TABLET_NOT_FOUND: - LOG(WARNING) << "TS " << permanent_uuid() << ": snapshot failed for tablet " - << tablet_->ToString() << " no further retry: " << status; - TransitionToCompleteState(); - break; - case TabletServerErrorPB::INVALID_SNAPSHOT: - LOG(WARNING) << "TS " << permanent_uuid() << ": snapshot failed for tablet " - << tablet_->ToString() << ": " << status; - if (operation_ == tserver::TabletSnapshotOpRequestPB::RESTORE_ON_TABLET) { - LOG(WARNING) << "No further retry for RESTORE snapshot operation: " << status; - TransitionToCompleteState(); - } - break; - default: - LOG(WARNING) << "TS " << permanent_uuid() << ": snapshot failed for tablet " - << tablet_->ToString() << ": " << status; - if (TransactionError(status) == TransactionErrorCode::kSnapshotTooOld) { - TransitionToCompleteState(); - } - break; + if (!RetryAllowed(resp_.error().code(), status)) { + LOG_WITH_PREFIX(WARNING) << "Failed, NO retry: " << status; + TransitionToCompleteState(); + } else { + LOG_WITH_PREFIX(WARNING) << "Failed, will be retried: " << status; } } else { TransitionToCompleteState(); - VLOG(1) << "TS " << permanent_uuid() << ": snapshot complete on tablet " - << tablet_->ToString(); + VLOG_WITH_PREFIX(1) << "Complete"; } if (state() != MonitoredTaskState::kComplete) { - VLOG(1) << "TabletSnapshotOp task is not completed"; + VLOG_WITH_PREFIX(1) << "TabletSnapshotOp task is not completed"; return; } @@ -162,9 +165,8 @@ bool AsyncTabletSnapshotOp::SendRequest(int attempt) { req.set_propagated_hybrid_time(master_->clock()->Now().ToUint64()); ts_backup_proxy_->TabletSnapshotOpAsync(req, &resp_, &rpc_, BindRpcCallback()); - VLOG(1) << "Send tablet snapshot request " << operation_ << " to " << permanent_uuid() - << " (attempt " << attempt << "):\n" - << req.DebugString(); + VLOG_WITH_PREFIX(1) << "Sent to " << permanent_uuid() << " (attempt " << attempt << "): " + << (VLOG_IS_ON(4) ? req.ShortDebugString() : ""); return true; } diff --git a/ent/src/yb/master/async_snapshot_tasks.h b/ent/src/yb/master/async_snapshot_tasks.h index a7d99c8d827f..8df6439a6a98 100644 --- a/ent/src/yb/master/async_snapshot_tasks.h +++ b/ent/src/yb/master/async_snapshot_tasks.h @@ -61,8 +61,9 @@ class AsyncTabletSnapshotOp : public enterprise::RetryingTSRpcTask { void HandleResponse(int attempt) override; bool SendRequest(int attempt) override; void Finished(const Status& status) override; + bool RetryAllowed(tserver::TabletServerErrorPB::Code code, const Status& status); - scoped_refptr tablet_; + TabletInfoPtr tablet_; const std::string snapshot_id_; tserver::TabletSnapshotOpRequestPB::Operation operation_; SnapshotScheduleId snapshot_schedule_id_ = SnapshotScheduleId::Nil(); diff --git a/ent/src/yb/master/catalog_manager_ent.cc b/ent/src/yb/master/catalog_manager_ent.cc index 0ae871b78d88..a8dafc6bc099 100644 --- a/ent/src/yb/master/catalog_manager_ent.cc +++ b/ent/src/yb/master/catalog_manager_ent.cc @@ -1432,6 +1432,11 @@ Status CatalogManager::RestoreSysCatalog(SnapshotScheduleRestoration* restoratio // Load objects to restore and determine obsolete objects. RestoreSysCatalogState state(restoration); RETURN_NOT_OK(state.LoadObjects(schema(), doc_db)); + { + SharedLock lock(mutex_); + RETURN_NOT_OK(state.PatchVersions(*table_ids_map_)); + } + RETURN_NOT_OK(state.DetermineEntries()); { auto existing = VERIFY_RESULT(CollectEntriesForSnapshot(restoration->filter.tables().tables())); RETURN_NOT_OK(state.DetermineObsoleteObjects(existing)); diff --git a/ent/src/yb/master/restore_sys_catalog_state.cc b/ent/src/yb/master/restore_sys_catalog_state.cc index 2fde70015261..7bc7b8041a17 100644 --- a/ent/src/yb/master/restore_sys_catalog_state.cc +++ b/ent/src/yb/master/restore_sys_catalog_state.cc @@ -186,7 +186,20 @@ Status RestoreSysCatalogState::LoadObjects(const Schema& schema, const docdb::Do RETURN_NOT_OK(IterateSysCatalog(schema, doc_db, &namespaces_)); RETURN_NOT_OK(IterateSysCatalog(schema, doc_db, &tables_)); RETURN_NOT_OK(IterateSysCatalog(schema, doc_db, &tablets_)); - return DetermineEntries(); + return Status::OK(); +} + +Status RestoreSysCatalogState::PatchVersions(const TableInfoMap& tables) { + for (auto& id_and_pb : tables_) { + auto it = tables.find(id_and_pb.first); + if (it == tables.end()) { + return STATUS_FORMAT(NotFound, "Not found restoring table: $0", id_and_pb.first); + } + + // Force schema update after restoration. + id_and_pb.second.set_version(it->second->LockForRead()->pb.version() + 1); + } + return Status::OK(); } Status RestoreSysCatalogState::DetermineObsoleteObjects(const SysRowEntries& existing) { diff --git a/ent/src/yb/master/restore_sys_catalog_state.h b/ent/src/yb/master/restore_sys_catalog_state.h index c506dc4bad82..45acda2a4edb 100644 --- a/ent/src/yb/master/restore_sys_catalog_state.h +++ b/ent/src/yb/master/restore_sys_catalog_state.h @@ -35,16 +35,28 @@ class RestoreSysCatalogState { public: explicit RestoreSysCatalogState(SnapshotScheduleRestoration* restoration); + // Load objects from DB snapshot. CHECKED_STATUS LoadObjects(const Schema& schema, const docdb::DocDB& doc_db); + // Patch table versions, so restored tables will have greater schema version to force schema + // update. + CHECKED_STATUS PatchVersions(const TableInfoMap& tables); + + // Determine entries that should be restored. I.e. apply filter and serialize. + CHECKED_STATUS DetermineEntries(); + + // Determine objects that should be removed, i.e. was created after restoration time. CHECKED_STATUS DetermineObsoleteObjects(const SysRowEntries& existing); + // Prepare write batch with object changes. CHECKED_STATUS PrepareWriteBatch(const Schema& schema, docdb::DocWriteBatch* write_batch); + // Prepare write batch to delete obsolete tablet. CHECKED_STATUS PrepareTabletCleanup( const TabletId& id, SysTabletsEntryPB pb, const Schema& schema, docdb::DocWriteBatch* write_batch); + // Prepare write batch to delete obsolete table. CHECKED_STATUS PrepareTableCleanup( const TableId& id, SysTablesEntryPB pb, const Schema& schema, docdb::DocWriteBatch* write_batch); @@ -62,8 +74,6 @@ class RestoreSysCatalogState { CHECKED_STATUS IterateSysCatalog( const Schema& schema, const docdb::DocDB& doc_db, std::unordered_map* map); - CHECKED_STATUS DetermineEntries(); - Result MatchTable(const TableId& id, const SysTablesEntryPB& table); Result TableMatchesIdentifier( const TableId& id, const SysTablesEntryPB& table, const TableIdentifierPB& table_identifier); diff --git a/ent/src/yb/tools/yb-admin_client_ent.cc b/ent/src/yb/tools/yb-admin_client_ent.cc index 4a17d1499f29..06dce3306d0a 100644 --- a/ent/src/yb/tools/yb-admin_client_ent.cc +++ b/ent/src/yb/tools/yb-admin_client_ent.cc @@ -419,7 +419,8 @@ Result ClusterAdminClient::SuitableSnapshotId( req.set_snapshot_schedule_id(schedule_id.data(), schedule_id.size()); } - RETURN_NOT_OK(master_backup_proxy_->ListSnapshotSchedules(req, &resp, &rpc)); + RETURN_NOT_OK_PREPEND(master_backup_proxy_->ListSnapshotSchedules(req, &resp, &rpc), + "Failed to list snapshot schedules"); if (resp.has_error()) { return StatusFromPB(resp.error().status()); @@ -446,7 +447,8 @@ Result ClusterAdminClient::SuitableSnapshotId( master::CreateSnapshotRequestPB req; master::CreateSnapshotResponsePB resp; req.set_schedule_id(schedule_id.data(), schedule_id.size()); - RETURN_NOT_OK(master_backup_proxy_->CreateSnapshot(req, &resp, &rpc)); + RETURN_NOT_OK_PREPEND(master_backup_proxy_->CreateSnapshot(req, &resp, &rpc), + "Failed to create snapshot"); if (resp.has_error()) { auto status = StatusFromPB(resp.error().status()); if (master::MasterError(status) == master::MasterErrorPB::PARALLEL_SNAPSHOT_OPERATION) { @@ -471,7 +473,8 @@ Result ClusterAdminClient::RestoreSnapshotSchedule( master::ListSnapshotsRequestPB req; req.set_snapshot_id(snapshot_id.data(), snapshot_id.size()); master::ListSnapshotsResponsePB resp; - RETURN_NOT_OK(master_backup_proxy_->ListSnapshots(req, &resp, &rpc)); + RETURN_NOT_OK_PREPEND(master_backup_proxy_->ListSnapshots(req, &resp, &rpc), + "Failed to list snapshots"); if (resp.has_error()) { return StatusFromPB(resp.error().status()); } @@ -500,7 +503,8 @@ Result ClusterAdminClient::RestoreSnapshotSchedule( RestoreSnapshotResponsePB resp; req.set_snapshot_id(snapshot_id.data(), snapshot_id.size()); req.set_restore_ht(restore_at.ToUint64()); - RETURN_NOT_OK(master_backup_proxy_->RestoreSnapshot(req, &resp, &rpc)); + RETURN_NOT_OK_PREPEND(master_backup_proxy_->RestoreSnapshot(req, &resp, &rpc), + "Failed to restore snapshot"); if (resp.has_error()) { return StatusFromPB(resp.error().status()); diff --git a/ent/src/yb/tserver/backup_service.cc b/ent/src/yb/tserver/backup_service.cc index 5fecc4eb078a..20efc123a4e2 100644 --- a/ent/src/yb/tserver/backup_service.cc +++ b/ent/src/yb/tserver/backup_service.cc @@ -59,7 +59,8 @@ void TabletServiceBackupImpl::TabletSnapshotOp(const TabletSnapshotOpRequestPB* TRACE_EVENT1("tserver", "TabletSnapshotOp", "tablet_id: ", tablet_id); LOG(INFO) << "Processing TabletSnapshotOp for tablet " << tablet_id << " from " - << context.requestor_string() << ": " << req->operation(); + << context.requestor_string() << ": " + << TabletSnapshotOpRequestPB::Operation_Name(req->operation()); VLOG(1) << "Full request: " << req->DebugString(); auto tablet = LookupLeaderTabletOrRespond(tablet_manager_, tablet_id, resp, &context); diff --git a/src/yb/common/entity_ids.h b/src/yb/common/entity_ids.h index 3a21c896d43f..66a18d9b25cb 100644 --- a/src/yb/common/entity_ids.h +++ b/src/yb/common/entity_ids.h @@ -18,41 +18,12 @@ #include #include +#include "yb/common/entity_ids_types.h" + #include "yb/util/result.h" -#include "yb/util/strongly_typed_string.h" namespace yb { -// TODO: switch many of these to opaque types for additional type safety and efficiency. - -using NamespaceName = std::string; -using TableName = std::string; -using UDTypeName = std::string; -using RoleName = std::string; - -using NamespaceId = std::string; -using TableId = std::string; -using UDTypeId = std::string; -using CDCStreamId = std::string; - -using PeerId = std::string; -using SnapshotId = std::string; -using TabletServerId = PeerId; -using TabletId = std::string; -using TablegroupId = std::string; -using TablespaceId = std::string; - -YB_STRONGLY_TYPED_STRING(KvStoreId); - -// TODO(#79): switch to YB_STRONGLY_TYPED_STRING -using RaftGroupId = std::string; - -using NamespaceIdTableNamePair = std::pair; - -using FlushRequestId = std::string; - -using RedisConfigKey = std::string; - static const uint32_t kPgSequencesDataTableOid = 0xFFFF; static const uint32_t kPgSequencesDataDatabaseOid = 0xFFFF; diff --git a/src/yb/common/entity_ids_types.h b/src/yb/common/entity_ids_types.h new file mode 100644 index 000000000000..a79b83697503 --- /dev/null +++ b/src/yb/common/entity_ids_types.h @@ -0,0 +1,54 @@ +// Copyright (c) YugaByte, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +#ifndef YB_COMMON_ENTITY_IDS_TYPES_H +#define YB_COMMON_ENTITY_IDS_TYPES_H + +#include + +#include "yb/util/strongly_typed_string.h" + +namespace yb { + +// TODO: switch many of these to opaque types for additional type safety and efficiency. +using NamespaceName = std::string; +using TableName = std::string; +using UDTypeName = std::string; +using RoleName = std::string; + +using NamespaceId = std::string; +using TableId = std::string; +using UDTypeId = std::string; +using CDCStreamId = std::string; + +using PeerId = std::string; +using SnapshotId = std::string; +using TabletServerId = PeerId; +using TabletId = std::string; +using TablegroupId = std::string; +using TablespaceId = std::string; + +YB_STRONGLY_TYPED_STRING(KvStoreId); + +// TODO(#79): switch to YB_STRONGLY_TYPED_STRING +using RaftGroupId = std::string; + +using NamespaceIdTableNamePair = std::pair; + +using FlushRequestId = std::string; + +using RedisConfigKey = std::string; + +} // namespace yb + +#endif // YB_COMMON_ENTITY_IDS_TYPES_H diff --git a/src/yb/master/async_rpc_tasks.cc b/src/yb/master/async_rpc_tasks.cc index 44ee9ee8032f..43fb208093a3 100644 --- a/src/yb/master/async_rpc_tasks.cc +++ b/src/yb/master/async_rpc_tasks.cc @@ -31,6 +31,8 @@ #include "yb/util/logging.h" #include "yb/util/thread_restrictions.h" +using namespace std::literals; + DEFINE_int32(unresponsive_ts_rpc_timeout_ms, 15 * 60 * 1000, // 15 minutes "After this amount of time (or after we have retried unresponsive_ts_rpc_retry_limit " "times, whichever happens first), the master will stop attempting to contact a tablet " @@ -113,16 +115,20 @@ RetryingTSRpcTask::RetryingTSRpcTask(Master *master, replica_picker_(replica_picker.Pass()), table_(DCHECK_NOTNULL(table)), start_ts_(MonoTime::Now()), - attempt_(0), - state_(MonitoredTaskState::kWaiting) { - deadline_ = start_ts_; - deadline_.AddDelta(MonoDelta::FromMilliseconds(FLAGS_unresponsive_ts_rpc_timeout_ms)); + deadline_(start_ts_ + FLAGS_unresponsive_ts_rpc_timeout_ms * 1ms) { +} + +RetryingTSRpcTask::~RetryingTSRpcTask() { + auto state = state_.load(std::memory_order_acquire); + LOG_IF(DFATAL, !IsStateTerminal(state)) + << "Destroying " << this << " task in a wrong state: " << AsString(state); + VLOG_WITH_FUNC(1) << "Destroying " << this << " in " << AsString(state); } // Send the subclass RPC request. Status RetryingTSRpcTask::Run() { - VLOG_WITH_PREFIX(1) << "Start Running"; ++attempt_; + VLOG_WITH_PREFIX(1) << "Start Running, attempt: " << attempt_; auto task_state = state(); if (task_state == MonitoredTaskState::kAborted) { return STATUS(IllegalState, "Unable to run task because it has been aborted"); @@ -130,11 +136,12 @@ Status RetryingTSRpcTask::Run() { // TODO(bogdan): There is a race between scheduling and running and can cause this to fail. // Should look into removing the kScheduling state, if not needed, and simplifying the state // transitions! - DCHECK(task_state == MonitoredTaskState::kWaiting) << "State: " << ToString(task_state); + DCHECK(task_state == MonitoredTaskState::kWaiting) << "State: " << AsString(task_state); Status s = ResetTSProxy(); if (!s.ok()) { s = s.CloneAndPrepend("Failed to reset TS proxy"); + LOG_WITH_PREFIX(INFO) << s; if (s.IsExpired()) { TransitionToTerminalState(MonitoredTaskState::kWaiting, MonitoredTaskState::kFailed, s); UnregisterAsyncTask(); @@ -201,6 +208,8 @@ MonitoredTaskState RetryingTSRpcTask::AbortAndReturnPrevState(const Status& stat while (!IsStateTerminal(prev_state)) { auto expected = prev_state; if (state_.compare_exchange_weak(expected, MonitoredTaskState::kAborted)) { + VLOG_WITH_PREFIX_AND_FUNC(1) + << "Aborted with: " << status << ", prev state: " << AsString(prev_state); AbortIfScheduled(); Finished(status); UnregisterAsyncTask(); @@ -208,6 +217,8 @@ MonitoredTaskState RetryingTSRpcTask::AbortAndReturnPrevState(const Status& stat } prev_state = state(); } + VLOG_WITH_PREFIX_AND_FUNC(1) + << "Already terminated, prev state: " << AsString(prev_state); UnregisterAsyncTask(); return prev_state; } @@ -224,6 +235,7 @@ void RetryingTSRpcTask::RpcCallback() { // Note: This can fail on shutdown, so just print a warning for it. Status s = callback_pool_->SubmitFunc( std::bind(&RetryingTSRpcTask::DoRpcCallback, shared_from(this))); + VLOG_WITH_PREFIX_AND_FUNC(3) << "Submit status: " << s; if (!s.ok()) { WARN_NOT_OK(s, "Could not submit to queue, probably shutting down"); AbortTask(s); @@ -233,6 +245,8 @@ void RetryingTSRpcTask::RpcCallback() { // Handle the actual work of the RPC callback. This is run on the master's worker // pool, rather than a reactor thread, so it may do blocking IO operations. void RetryingTSRpcTask::DoRpcCallback() { + VLOG_WITH_PREFIX_AND_FUNC(3) << "Rpc status: " << rpc_.status(); + if (!rpc_.status().ok()) { LOG_WITH_PREFIX(WARNING) << "TS " << target_ts_desc_->permanent_uuid() << ": " << type_name() << " RPC failed for tablet " @@ -316,6 +330,7 @@ bool RetryingTSRpcTask::RescheduleWithBackoffDelay() { auto task_id = master_->messenger()->ScheduleOnReactor( std::bind(&RetryingTSRpcTask::RunDelayedTask, shared_from(this), _1), MonoDelta::FromMilliseconds(delay_millis), SOURCE_LOCATION(), master_->messenger()); + VLOG_WITH_PREFIX_AND_FUNC(4) << "Task id: " << task_id; reactor_task_id_.store(task_id, std::memory_order_release); if (task_id == rpc::kInvalidTaskId) { @@ -342,10 +357,10 @@ void RetryingTSRpcTask::RunDelayedTask(const Status& status) { return; } - string desc = description(); // Save in case we need to log after deletion. + auto log_prefix = LogPrefix(); // Save in case we need to log after deletion. Status s = Run(); // May delete this. if (!s.ok()) { - LOG_WITH_PREFIX(WARNING) << "Async tablet task failed: " << s.ToString(); + LOG(WARNING) << log_prefix << "Async tablet task failed: " << s; } } @@ -367,8 +382,10 @@ void RetryingTSRpcTask::UnregisterAsyncTask() { LOG_WITH_PREFIX(FATAL) << "Invalid task state " << s; } end_ts_ = MonoTime::Now(); - if (table_ != nullptr) { - table_->RemoveTask(self); + if (table_ != nullptr && table_->RemoveTask(self)) { + // We don't delete table while it have running tasks, so should check whether it was last task, + // even it is not delete table task. + master_->catalog_manager()->CheckTableDeleted(table_); } // Make sure to run the callbacks last, in case they rely on the task no longer being tracked // by the table. @@ -377,6 +394,7 @@ void RetryingTSRpcTask::UnregisterAsyncTask() { void RetryingTSRpcTask::AbortIfScheduled() { auto reactor_task_id = reactor_task_id_.load(std::memory_order_acquire); + VLOG_WITH_PREFIX_AND_FUNC(1) << "Reactor task id: " << reactor_task_id; if (reactor_task_id != rpc::kInvalidTaskId) { master_->messenger()->AbortOnReactor(reactor_task_id); } @@ -686,11 +704,12 @@ void AsyncAlterTable::HandleResponse(int attempt) { if (state() == MonitoredTaskState::kComplete) { // TODO: proper error handling here. Not critical, since TSHeartbeat will retry on failure. - WARN_NOT_OK(master_->catalog_manager()->HandleTabletSchemaVersionReport( - tablet_.get(), schema_version_, table()), - yb::Format( - "$0 for $1 failed while running AsyncAlterTable::HandleResponse. response $2", - description(), tablet_->ToString(), resp_.DebugString())); + WARN_NOT_OK( + master_->catalog_manager()->HandleTabletSchemaVersionReport( + tablet_.get(), schema_version_, table()), + Format( + "$0 failed while running AsyncAlterTable::HandleResponse. Response $1", + description(), resp_.ShortDebugString())); } else { VLOG_WITH_PREFIX(1) << "Task is not completed " << tablet_->ToString() << " for version " << schema_version_; diff --git a/src/yb/master/async_rpc_tasks.h b/src/yb/master/async_rpc_tasks.h index 142ce1a6f3aa..60e0a4346926 100644 --- a/src/yb/master/async_rpc_tasks.h +++ b/src/yb/master/async_rpc_tasks.h @@ -118,6 +118,8 @@ class RetryingTSRpcTask : public MonitoredTask { gscoped_ptr replica_picker, const scoped_refptr& table); + ~RetryingTSRpcTask(); + // Send the subclass RPC request. CHECKED_STATUS Run(); @@ -152,7 +154,7 @@ class RetryingTSRpcTask : public MonitoredTask { // Overridable log prefix with reasonable default. std::string LogPrefix() const { - return strings::Substitute("$0 (task=$1, state=$2): ", description(), this, ToString(state())); + return strings::Substitute("$0 (task=$1, state=$2): ", description(), this, AsString(state())); } bool PerformStateTransition(MonitoredTaskState expected, MonitoredTaskState new_state) @@ -200,7 +202,7 @@ class RetryingTSRpcTask : public MonitoredTask { MonoTime end_ts_; MonoTime deadline_; - int attempt_; + int attempt_ = 0; rpc::RpcController rpc_; TSDescriptor* target_ts_desc_ = nullptr; std::shared_ptr ts_proxy_; @@ -247,7 +249,7 @@ class RetryingTSRpcTask : public MonitoredTask { virtual int max_delay_ms(); // Use state() and MarkX() accessors. - std::atomic state_; + std::atomic state_{MonitoredTaskState::kWaiting}; }; // RetryingTSRpcTask subclass which always retries the same tablet server, diff --git a/src/yb/master/backfill_index.cc b/src/yb/master/backfill_index.cc index 90311c110d6f..0e08cae4de41 100644 --- a/src/yb/master/backfill_index.cc +++ b/src/yb/master/backfill_index.cc @@ -352,10 +352,10 @@ Result MultiStageAlterTable::UpdateIndexPermission( VLOG(1) << "Index permissions update skipped, leaving schema_version at " << indexed_table_pb.version(); } - indexed_table_data.set_state(SysTablesEntryPB::ALTERING, - Substitute("Alter table version=$0 ts=$1", - indexed_table_pb.version(), - LocalTimeAsString())); + indexed_table_data.set_state( + SysTablesEntryPB::ALTERING, + Format("Update index permission version=$0 ts=$1", + indexed_table_pb.version(), LocalTimeAsString())); // Update sys-catalog with the new indexed table info. TRACE("Updating indexed table metadata on disk"); @@ -1232,8 +1232,7 @@ void GetSafeTimeForTablet::UnregisterAsyncTaskCallback() { VLOG(3) << "GetSafeTime for " << tablet_->ToString() << " got an error. Returning " << safe_time; } else if (state() != MonitoredTaskState::kComplete) { - status = STATUS_SUBSTITUTE(InternalError, "$0 in state $1", description(), - ToString(state())); + status = STATUS_FORMAT(InternalError, "$0 in state $1", description(), state()); } else { safe_time = HybridTime(resp_.safe_time()); if (safe_time.is_special()) { @@ -1382,8 +1381,7 @@ void BackfillChunk::UnregisterAsyncTaskCallback() { VLOG(3) << "Considering all indexes : " << yb::ToString(indexes_being_backfilled_) << " as failed."; - status = STATUS_SUBSTITUTE(InternalError, "$0 in state $1", description(), - ToString(state())); + status = STATUS_FORMAT(InternalError, "$0 in state $1", description(), state()); } if (resp_.has_backfilled_until()) { diff --git a/src/yb/master/catalog_entity_info.cc b/src/yb/master/catalog_entity_info.cc index d29353eaab72..9a4744165223 100644 --- a/src/yb/master/catalog_entity_info.cc +++ b/src/yb/master/catalog_entity_info.cc @@ -305,11 +305,8 @@ bool TableInfo::colocated() const { return LockForRead()->pb.colocated(); } -const string TableInfo::indexed_table_id() const { - auto l = LockForRead(); - return l->pb.has_index_info() - ? l->pb.index_info().indexed_table_id() - : l->pb.has_indexed_table_id() ? l->pb.indexed_table_id() : ""; +std::string TableInfo::indexed_table_id() const { + return LockForRead()->indexed_table_id(); } bool TableInfo::is_local_index() const { @@ -403,9 +400,10 @@ bool TableInfo::IsAlterInProgress(uint32_t version) const { SharedLock l(lock_); for (const TableInfo::TabletInfoMap::value_type& e : tablet_map_) { if (e.second->reported_schema_version(table_id_) < version) { - VLOG(3) << "Table " << table_id_ << " ALTER in progress due to tablet " - << e.second->ToString() << " because reported schema " - << e.second->reported_schema_version(table_id_) << " < expected " << version; + VLOG_WITH_PREFIX_AND_FUNC(3) + << "ALTER in progress due to tablet " + << e.second->ToString() << " because reported schema " + << e.second->reported_schema_version(table_id_) << " < expected " << version; return true; } } @@ -417,11 +415,11 @@ bool TableInfo::HasTablets() const { return !tablet_map_.empty(); } -bool TableInfo::AreAllTabletsDeletedOrHidden() const { +bool TableInfo::AreAllTabletsHidden() const { SharedLock l(lock_); for (const auto& e : tablet_map_) { - auto tablet_lock = e.second->LockForRead(); - if (!tablet_lock->is_deleted() && !tablet_lock->is_hidden()) { + if (!e.second->LockForRead()->is_hidden()) { + VLOG_WITH_PREFIX_AND_FUNC(4) << "Not hidden tablet: " << e.second->ToString(); return false; } } @@ -432,6 +430,7 @@ bool TableInfo::AreAllTabletsDeleted() const { SharedLock l(lock_); for (const auto& e : tablet_map_) { if (!e.second->LockForRead()->is_deleted()) { + VLOG_WITH_PREFIX_AND_FUNC(4) << "Not deleted tablet: " << e.second->ToString(); return false; } } @@ -493,6 +492,7 @@ std::size_t TableInfo::NumTasks() const { bool TableInfo::HasTasks() const { SharedLock l(lock_); + VLOG_WITH_PREFIX_AND_FUNC(3) << AsString(pending_tasks_); return !pending_tasks_.empty(); } @@ -522,16 +522,19 @@ void TableInfo::AddTask(std::shared_ptr task) { // We need to abort these tasks without holding the lock because when a task is destroyed it tries // to acquire the same lock to remove itself from pending_tasks_. if (abort_task) { - task->AbortAndReturnPrevState(STATUS(Aborted, "Table closing")); + task->AbortAndReturnPrevState(STATUS(Expired, "Table closing")); } } -void TableInfo::RemoveTask(const std::shared_ptr& task) { +bool TableInfo::RemoveTask(const std::shared_ptr& task) { + bool result; { std::lock_guard l(lock_); pending_tasks_.erase(task); + result = pending_tasks_.empty(); } - VLOG(1) << __func__ << " Removed task " << task.get() << " " << task->description(); + VLOG(1) << "Removed task " << task.get() << " " << task->description(); + return result; } // Aborts tasks which have their rpc in progress, rest of them are aborted and also erased @@ -554,11 +557,17 @@ void TableInfo::AbortTasksAndCloseIfRequested(bool close) { abort_tasks.reserve(pending_tasks_.size()); abort_tasks.assign(pending_tasks_.cbegin(), pending_tasks_.cend()); } + if (abort_tasks.empty()) { + return; + } + auto status = close ? STATUS(Expired, "Table closing") : STATUS(Aborted, "Table closing"); // We need to abort these tasks without holding the lock because when a task is destroyed it tries // to acquire the same lock to remove itself from pending_tasks_. for (const auto& task : abort_tasks) { - VLOG(1) << __func__ << " Aborting task " << task.get() << " " << task->description(); - task->AbortAndReturnPrevState(STATUS(Aborted, "Table closing")); + VLOG_WITH_FUNC(1) + << (close ? "Close and abort" : "Abort") << " task " << task.get() << " " + << task->description(); + task->AbortAndReturnPrevState(status); } } @@ -637,12 +646,24 @@ void TableInfo::SetTablespaceIdForTableCreation(const TablespaceId& tablespace_i } void PersistentTableInfo::set_state(SysTablesEntryPB::State state, const string& msg) { - VLOG(2) << __PRETTY_FUNCTION__ << " setting state for " << name() << " to " - << SysTablesEntryPB::State_Name(state) << " reason: " << msg; + VLOG_WITH_FUNC(2) << "Setting state for " << name() << " to " + << SysTablesEntryPB::State_Name(state) << " reason: " << msg; pb.set_state(state); pb.set_state_msg(msg); } +bool PersistentTableInfo::is_index() const { + return !indexed_table_id().empty(); +} + +const std::string& PersistentTableInfo::indexed_table_id() const { + static const std::string kEmptyString; + return pb.has_index_info() + ? pb.index_info().indexed_table_id() + : pb.has_indexed_table_id() ? pb.indexed_table_id() : kEmptyString; +} + + // ================================================================================================ // DeletedTableInfo // ================================================================================================ diff --git a/src/yb/master/catalog_entity_info.h b/src/yb/master/catalog_entity_info.h index d43c2b51fa1f..3a50cdee2185 100644 --- a/src/yb/master/catalog_entity_info.h +++ b/src/yb/master/catalog_entity_info.h @@ -176,9 +176,6 @@ struct PersistentTabletInfo : public Persistent TableInfoPtr; - typedef std::unordered_map LeaderStepDownFailureTimes; // The information about a single tablet which exists in the cluster, @@ -347,6 +344,10 @@ struct PersistentTableInfo : public Persistent, virtual const std::string& id() const override { return table_id_; } // Return the indexed table id if the table is an index table. Otherwise, return an empty string. - const std::string indexed_table_id() const; + std::string indexed_table_id() const; bool is_index() const { return !indexed_table_id().empty(); @@ -440,7 +441,7 @@ class TableInfo : public RefCountedThreadSafe, bool AreAllTabletsDeleted() const; // Returns true if all tablets of the table are deleted or hidden. - bool AreAllTabletsDeletedOrHidden() const; + bool AreAllTabletsHidden() const; // Returns true if the table creation is in-progress. bool IsCreateInProgress() const; @@ -483,7 +484,10 @@ class TableInfo : public RefCountedThreadSafe, bool HasTasks() const; bool HasTasks(MonitoredTask::Type type) const; void AddTask(std::shared_ptr task); - void RemoveTask(const std::shared_ptr& task); + + // Returns true if no running tasks left. + bool RemoveTask(const std::shared_ptr& task); + void AbortTasks(); void AbortTasksAndClose(); void WaitTasksCompletion(); @@ -511,6 +515,10 @@ class TableInfo : public RefCountedThreadSafe, void AddTabletUnlocked(TabletInfo* tablet) REQUIRES_SHARED(lock_); void AbortTasksAndCloseIfRequested(bool close); + std::string LogPrefix() const { + return ToString() + ": "; + } + const TableId table_id_; scoped_refptr tasks_tracker_; @@ -801,7 +809,6 @@ class SysConfigInfo : public RefCountedThreadSafe, // Convenience typedefs. // Table(t)InfoMap ordered for deterministic locking. typedef std::map> TabletInfoMap; -typedef std::map> TableInfoMap; typedef std::pair TableNameKey; typedef std::unordered_map< TableNameKey, scoped_refptr, boost::hash> TableInfoByNameMap; diff --git a/src/yb/master/catalog_manager.cc b/src/yb/master/catalog_manager.cc index 7af3f2ed2a95..e4a46686fc98 100644 --- a/src/yb/master/catalog_manager.cc +++ b/src/yb/master/catalog_manager.cc @@ -1910,13 +1910,13 @@ Status CatalogManager::AddIndexInfoToTable(const scoped_refptr& index } // Add index info to indexed table and increment schema version. - l.mutable_data()->pb.add_indexes()->CopyFrom(index_info); - l.mutable_data()->pb.set_version(l.mutable_data()->pb.version() + 1); - l.mutable_data()->pb.set_updates_only_index_permissions(false); - l.mutable_data()->set_state(SysTablesEntryPB::ALTERING, - Substitute("Alter table version=$0 ts=$1", - l.mutable_data()->pb.version(), - LocalTimeAsString())); + auto& pb = l.mutable_data()->pb; + pb.add_indexes()->CopyFrom(index_info); + pb.set_version(l.mutable_data()->pb.version() + 1); + pb.set_updates_only_index_permissions(false); + l.mutable_data()->set_state( + SysTablesEntryPB::ALTERING, + Format("Add index info version=$0 ts=$1", pb.version(), LocalTimeAsString())); // Update sys-catalog with the new indexed table info. TRACE("Updating indexed table metadata on disk"); @@ -3944,10 +3944,10 @@ Status CatalogManager::DeleteIndexInfoFromTable( indexed_table_data.pb.set_version(indexed_table_data.pb.version() + 1); // TODO(Amit) : Is this compatible with the previous version? indexed_table_data.pb.set_updates_only_index_permissions(false); - indexed_table_data.set_state(SysTablesEntryPB::ALTERING, - Substitute("Alter table version=$0 ts=$1", - indexed_table_data.pb.version(), - LocalTimeAsString())); + indexed_table_data.set_state( + SysTablesEntryPB::ALTERING, + Format("Delete index info version=$0 ts=$1", + indexed_table_data.pb.version(), LocalTimeAsString())); // Update sys-catalog with the deleted indexed table info. TRACE("Updating indexed table metadata on disk"); @@ -4081,6 +4081,9 @@ Status CatalogManager::DeleteTableInMemory( const char* const object_type = is_index_table ? "index" : "table"; const bool cascade_delete_index = is_index_table && !update_indexed_table; + VLOG_WITH_PREFIX_AND_FUNC(1) << YB_STRUCT_TO_STRING( + table_identifier, is_index_table, update_indexed_table) << "\n" << GetStackTrace(); + // Lookup the table and verify if it exists. TRACE(Substitute("Looking up $0", object_type)); auto table_result = FindTable(table_identifier); @@ -4107,9 +4110,19 @@ Status CatalogManager::DeleteTableInMemory( return SetupError(resp->mutable_error(), MasterErrorPB::OBJECT_NOT_FOUND, s); } - if (l->started_deleting()) { + for (const auto& entry : schedules_to_tables_map) { + if (std::binary_search(entry.second.begin(), entry.second.end(), table->id())) { + data.retained_by_snapshot_schedules.Add()->assign( + entry.first.AsSlice().cdata(), entry.first.size()); + } + } + + bool hide_only = !data.retained_by_snapshot_schedules.empty(); + + if (l->started_deleting() || (hide_only && l->started_hiding())) { if (cascade_delete_index) { - LOG(WARNING) << "Index " << table_identifier.DebugString() << " was deleted"; + LOG(WARNING) << "Index " << table_identifier.ShortDebugString() << " was " + << (l->started_deleting() ? "deleted" : "hidden"); return Status::OK(); } else { Status s = STATUS(NotFound, "The object was deleted", l->pb.state_msg()); @@ -4117,20 +4130,15 @@ Status CatalogManager::DeleteTableInMemory( } } - for (const auto& entry : schedules_to_tables_map) { - if (std::binary_search(entry.second.begin(), entry.second.end(), table->id())) { - data.retained_by_snapshot_schedules.Add()->assign( - entry.first.AsSlice().cdata(), entry.first.size()); - } - } + bool was_hiding = l->started_hiding(); TRACE("Updating metadata on disk"); // Update the metadata for the on-disk state. - if (data.retained_by_snapshot_schedules.empty()) { + if (hide_only) { + l.mutable_data()->pb.set_hide_state(SysTablesEntryPB::HIDING); + } else { l.mutable_data()->set_state(SysTablesEntryPB::DELETING, Substitute("Started deleting at $0", LocalTimeAsString())); - } else { - l.mutable_data()->pb.set_hide_state(SysTablesEntryPB::HIDING); } // Update sys-catalog with the removed table state. @@ -4142,20 +4150,22 @@ Status CatalogManager::DeleteTableInMemory( if (!s.ok()) { // The mutation will be aborted when 'l' exits the scope on early return. - s = s.CloneAndPrepend(Substitute("An error occurred while updating sys tables: $0", - s.ToString())); - LOG(WARNING) << s.ToString(); + s = s.CloneAndPrepend("An error occurred while updating sys tables"); + LOG(WARNING) << s; return CheckIfNoLongerLeaderAndSetupError(s, resp); } // Update the internal table maps. // Exclude Postgres tables which are not in the name map. - if (l.data().table_type() != PGSQL_TABLE_TYPE) { + // Also exclude hidden tables, that were already removed from this map. + if (l.data().table_type() != PGSQL_TABLE_TYPE && !was_hiding) { TRACE("Removing from by-name map"); LockGuard lock(mutex_); if (table_names_map_.erase({l->namespace_id(), l->name()}) != 1) { PANIC_RPC(rpc, "Could not remove table from map, name=" + table->ToString()); } + // We commit another map to increment its version and reset cache. + // Since table_name_map_ does not have version. table_ids_map_.Commit(); } @@ -4180,7 +4190,10 @@ Status CatalogManager::DeleteTableInMemory( } } - table->AbortTasks(); + if (!hide_only) { + // If table is being hidden we should not abort snapshot related tasks. + table->AbortTasks(); + } // For regular (indexed) table, insert table info and lock in the front of the list. Else for // index table, append them to the end. We do so so that we will commit and delete the indexed @@ -4192,14 +4205,19 @@ Status CatalogManager::DeleteTableInMemory( TableInfo::WriteLock CatalogManager::MaybeTransitionTableToDeleted(const TableInfoPtr& table) { if (table->HasTasks()) { + VLOG_WITH_PREFIX_AND_FUNC(2) << table->ToString() << " has tasks"; return TableInfo::WriteLock(); } + bool hide_only; { auto lock = table->LockForRead(); // For any table in DELETING state, we will want to mark it as DELETED once all its respective // tablets have been successfully removed from tservers. - if (!lock->is_deleting() && !lock->is_hiding()) { + // For any hiding table we will want to mark it as HIDDEN once all its respective + // tablets have been successfully hidden on tservers. + hide_only = !lock->is_deleting(); + if (hide_only && !lock->is_hiding()) { return TableInfo::WriteLock(); } } @@ -4212,9 +4230,11 @@ TableInfo::WriteLock CatalogManager::MaybeTransitionTableToDeleted(const TableIn // gotten to point 3, which would add further tasks for the deletes. // // However, HasTasks is cheaper than AreAllTabletsDeletedOrHidden... - if (!table->AreAllTabletsDeletedOrHidden() && - !IsSystemTable(*table) && - !IsColocatedUserTable(*table)) { + auto all_tablets_done = hide_only ? table->AreAllTabletsHidden() : table->AreAllTabletsDeleted(); + VLOG_WITH_PREFIX_AND_FUNC(2) + << table->ToString() << " hide only: " << hide_only << ", all tablets done: " + << all_tablets_done; + if (!all_tablets_done && !IsSystemTable(*table) && !IsColocatedUserTable(*table)) { return TableInfo::WriteLock(); } @@ -5146,24 +5166,7 @@ void CatalogManager::NotifyTabletDeleteFinished(const TabletServerId& tserver_uu LOG(INFO) << "Clearing pending delete for tablet " << tablet_id << " in ts " << tserver_uuid; ts_desc->ClearPendingTabletDelete(tablet_id); } - if (FLAGS_master_drop_table_after_task_response) { - // Since this is called after every successful async DeleteTablet, it's possible if all tasks - // complete, for us to mark the table as DELETED asap. This is desirable as clients will wait - // for this before returning success to the user. - // - // However, if tasks fail, timeout, or are aborted, we still have the background thread as a - // catch all. - auto lock = MaybeTransitionTableToDeleted(table); - if (!lock.locked()) { - return; - } - Status s = sys_catalog_->UpdateItem(table.get(), leader_ready_term()); - if (!s.ok()) { - LOG(WARNING) << "Error marking table as DELETED: " << s.ToString(); - return; - } - lock.Commit(); - } + CheckTableDeleted(table); } bool CatalogManager::ReplicaMapDiffersFromConsensusState(const scoped_refptr& tablet, @@ -5189,14 +5192,13 @@ Status CatalogManager::ProcessTabletReport(TSDescriptor* ts_desc, "requestor", rpc->requestor_string(), "num_tablets", num_tablets); - if (VLOG_IS_ON(2)) { - VLOG(2) << "Received tablet report from " << RequestorString(rpc) << "(" - << ts_desc->permanent_uuid() << "): " << full_report.DebugString(); - } + VLOG_WITH_PREFIX(2) << "Received tablet report from " << RequestorString(rpc) << "(" + << ts_desc->permanent_uuid() << "): " << full_report.DebugString(); if (!ts_desc->has_tablet_report() && full_report.is_incremental()) { - string msg = "Received an incremental tablet report when a full one was needed"; - LOG(WARNING) << "Invalid tablet report from " << ts_desc->permanent_uuid() << ": " << msg; + LOG_WITH_PREFIX(WARNING) + << "Invalid tablet report from " << ts_desc->permanent_uuid() + << ": Received an incremental tablet report when a full one was needed"; // We should respond with success in order to send reply that we need full report. return Status::OK(); } @@ -7584,7 +7586,6 @@ Status CatalogManager::DeleteTabletListAndSendRequests( if (tablet_lock->ListedAsHidden() && !was_hidden) { marked_as_hidden.push_back(tablet); } - DeleteTabletReplicas(tablet.get(), deletion_msg, hide_only); } // Update all the tablet states in raft in bulk. @@ -7597,6 +7598,8 @@ Status CatalogManager::DeleteTabletListAndSendRequests( tablet_lock.Commit(); LOG(INFO) << (hide_only ? "Hid tablet " : "Deleted tablet ") << tablet->tablet_id(); + + DeleteTabletReplicas(tablet.get(), deletion_msg, hide_only); } if (!marked_as_hidden.empty()) { @@ -7849,20 +7852,21 @@ Status CatalogManager::HandleTabletSchemaVersionReport( // Update the schema version if it's the latest. tablet->set_reported_schema_version(table->id(), version); - VLOG(1) << "Tablet " << tablet->tablet_id() << " reported version " << version; + VLOG_WITH_PREFIX_AND_FUNC(1) + << "Tablet " << tablet->tablet_id() << " reported version " << version; // Verify if it's the last tablet report, and the alter completed. { auto l = table->LockForRead(); if (l->pb.state() != SysTablesEntryPB::ALTERING) { - VLOG(2) << "Table " << table->ToString() << " is not altering"; + VLOG_WITH_PREFIX_AND_FUNC(2) << "Table " << table->ToString() << " is not altering"; return Status::OK(); } uint32_t current_version = l->pb.version(); if (table->IsAlterInProgress(current_version)) { - VLOG(2) << "Table " << table->ToString() << " has IsAlterInProgress (" - << current_version << ")"; + VLOG_WITH_PREFIX_AND_FUNC(2) << "Table " << table->ToString() << " has IsAlterInProgress (" + << current_version << ")"; return Status::OK(); } } @@ -9165,11 +9169,18 @@ Status CatalogManager::CollectTable( CollectFlags flags, std::vector* all_tables, std::unordered_set* parent_colocated_table_ids) { - if (table_description.table_info->LockForRead()->started_deleting()) { + auto lock = table_description.table_info->LockForRead(); + if (lock->started_hiding()) { + VLOG_WITH_PREFIX_AND_FUNC(4) + << "Rejected hidden table: " << AsString(table_description.table_info); + return Status::OK(); + } + if (lock->started_deleting()) { + VLOG_WITH_PREFIX_AND_FUNC(4) + << "Rejected deleted table: " << AsString(table_description.table_info); return Status::OK(); } - if (flags.Test(CollectFlag::kIncludeParentColocatedTable) && - table_description.table_info->colocated()) { + if (flags.Test(CollectFlag::kIncludeParentColocatedTable) && lock->pb.colocated()) { // If a table is colocated, add its parent colocated table as well. const auto parent_table_id = table_description.namespace_info->id() + kColocatedParentTableIdSuffix; @@ -9187,21 +9198,23 @@ Status CatalogManager::CollectTable( if (flags.Test(CollectFlag::kAddIndexes)) { TRACE(Substitute("Locking object with id $0", table_description.table_info->id())); - auto l = table_description.table_info->LockForRead(); - if (table_description.table_info->is_index()) { + if (lock->is_index()) { return STATUS(InvalidArgument, "Expected table, but found index", table_description.table_info->id(), MasterError(MasterErrorPB::INVALID_TABLE_TYPE)); } - if (l->table_type() == PGSQL_TABLE_TYPE) { + if (lock->table_type() == PGSQL_TABLE_TYPE) { return STATUS(InvalidArgument, "Getting indexes for YSQL table is not supported", table_description.table_info->id(), MasterError(MasterErrorPB::INVALID_TABLE_TYPE)); } - for (const auto& index_info : l->pb.indexes()) { + auto collect_index_flags = flags; + // Don't need to collect indexes for index. + collect_index_flags.Reset(CollectFlag::kAddIndexes); + for (const auto& index_info : lock->pb.indexes()) { LOG_IF(DFATAL, table_description.table_info->id() != index_info.indexed_table_id()) << "Wrong indexed table id in index descriptor"; TableIdentifierPB index_id_pb; @@ -9209,10 +9222,8 @@ Status CatalogManager::CollectTable( index_id_pb.mutable_namespace_()->set_id(table_description.namespace_info->id()); auto index_description = VERIFY_RESULT(DescribeTable( index_id_pb, flags.Test(CollectFlag::kSucceedIfCreateInProgress))); - if (index_description.table_info->LockForRead()->started_deleting()) { - continue; - } - all_tables->push_back(index_description); + RETURN_NOT_OK(CollectTable( + index_description, collect_index_flags, all_tables, parent_colocated_table_ids)); } } @@ -9235,25 +9246,25 @@ Result> CatalogManager::CollectTables( // It is necessary because we don't support kAddIndexes for YSQL tables. ns_collect_flags.Reset(CollectFlag::kAddIndexes); auto namespace_info = VERIFY_RESULT(FindNamespaceUnlocked(table_id_pb.namespace_())); - VLOG_WITH_PREFIX(1) - << __func__ << ", collecting all tables from: " << namespace_info->ToString(); + VLOG_WITH_PREFIX_AND_FUNC(1) + << "Collecting all tables from: " << namespace_info->ToString(); for (const auto& id_and_table : *table_ids_map_) { if (id_and_table.second->is_system()) { - VLOG_WITH_PREFIX(4) << __func__ << ", rejected system table: " - << AsString(id_and_table); + VLOG_WITH_PREFIX_AND_FUNC(4) << "Rejected system table: " << AsString(id_and_table); continue; } - if (id_and_table.second->namespace_id() != namespace_info->id()) { - VLOG_WITH_PREFIX(4) << __func__ << ", rejected table from other namespace: " - << AsString(id_and_table); + auto lock = id_and_table.second->LockForRead(); + if (lock->namespace_id() != namespace_info->id()) { + VLOG_WITH_PREFIX_AND_FUNC(4) + << "Rejected table from other namespace: " << AsString(id_and_table); continue; } - VLOG_WITH_PREFIX(4) << __func__ << ", accepted: " << AsString(id_and_table); + VLOG_WITH_PREFIX_AND_FUNC(4) << "Accepted: " << AsString(id_and_table); table_with_flags.emplace_back(id_and_table.second, ns_collect_flags); } } else { auto table = VERIFY_RESULT(FindTableUnlocked(table_id_pb)); - VLOG_WITH_PREFIX(1) << __func__ << ", collecting table: " << table->ToString(); + VLOG_WITH_PREFIX_AND_FUNC(1) << "Collecting table: " << table->ToString(); table_with_flags.emplace_back(table, flags); } } @@ -9424,5 +9435,29 @@ void CatalogManager::ProcessTabletPathInfo(const std::string& ts_uuid, } } +void CatalogManager::CheckTableDeleted(const TableInfoPtr& table) { + if (!FLAGS_master_drop_table_after_task_response) { + return; + } + // Since this is called after every successful async DeleteTablet, it's possible if all tasks + // complete, for us to mark the table as DELETED/HIDDEN asap. This is desirable as clients will + // wait for this before returning success to the user. + // + // However, if tasks fail, timeout, or are aborted, we still have the background thread as a + // catch all. + auto lock = MaybeTransitionTableToDeleted(table); + if (!lock.locked()) { + return; + } + Status s = sys_catalog_->UpdateItem(table.get(), leader_ready_term()); + if (!s.ok()) { + LOG_WITH_PREFIX(WARNING) + << "Error marking table as " + << (table->LockForRead()->started_deleting() ? "DELETED" : "HIDDEN") << ": " << s; + return; + } + lock.Commit(); +} + } // namespace master } // namespace yb diff --git a/src/yb/master/catalog_manager.h b/src/yb/master/catalog_manager.h index e5e27ef1602f..63f3e1924add 100644 --- a/src/yb/master/catalog_manager.h +++ b/src/yb/master/catalog_manager.h @@ -734,6 +734,7 @@ class CatalogManager : public tserver::TabletPeerLookupIf { } uintptr_t tablets_version() const NO_THREAD_SAFETY_ANALYSIS { + // This method should not hold the lock, because Version method is thread safe. return tablet_map_.Version() + table_ids_map_.Version(); } @@ -787,6 +788,8 @@ class CatalogManager : public tserver::TabletPeerLookupIf { void ProcessTabletPathInfo(const std::string& ts_uuid, const TabletPathInfoPB& report); + void CheckTableDeleted(const TableInfoPtr& table); + protected: // TODO Get rid of these friend classes and introduce formal interface. friend class TableLoader; diff --git a/src/yb/master/master_fwd.h b/src/yb/master/master_fwd.h index 656e406e0593..fa14541c2650 100644 --- a/src/yb/master/master_fwd.h +++ b/src/yb/master/master_fwd.h @@ -17,6 +17,8 @@ #include #include +#include "yb/common/entity_ids_types.h" + #include "yb/gutil/ref_counted.h" #include "yb/util/enums.h" #include "yb/util/strongly_typed_bool.h" @@ -54,15 +56,19 @@ class SysRowEntries; class SysSnapshotEntryPB; class SysTablesEntryPB; class SysTabletsEntryPB; +class TabletReportPB; class TSHeartbeatRequestPB; class TSHeartbeatResponsePB; class TSRegistrationPB; class TSSnapshotSchedulesInfoPB; -class TabletInfo; -class TabletReportPB; -typedef scoped_refptr TabletInfoPtr; -typedef std::vector TabletInfos; +class TableInfo; +using TableInfoPtr = scoped_refptr; +using TableInfoMap = std::map; + +class TabletInfo; +using TabletInfoPtr = scoped_refptr; +using TabletInfos = std::vector; struct SnapshotScheduleRestoration; using SnapshotScheduleRestorationPtr = std::shared_ptr; diff --git a/src/yb/master/master_snapshot_coordinator.cc b/src/yb/master/master_snapshot_coordinator.cc index cb26076acfad..21d3f558d578 100644 --- a/src/yb/master/master_snapshot_coordinator.cc +++ b/src/yb/master/master_snapshot_coordinator.cc @@ -545,8 +545,8 @@ class MasterSnapshotCoordinator::Impl { } else if ((**it).ShouldUpdate(*new_entry)) { map->replace(it, std::move(new_entry)); } else { - LOG(INFO) << __func__ << " ignore because of version check, existing: " - << (**it).ToString() << ", loaded: " << new_entry->ToString(); + VLOG_WITH_FUNC(1) << "Ignore because of version check, existing: " << (**it).ToString() + << ", loaded: " << new_entry->ToString(); } return Status::OK(); diff --git a/src/yb/master/state_with_tablets.cc b/src/yb/master/state_with_tablets.cc index 4c86cfc823fd..daaade6d7c4f 100644 --- a/src/yb/master/state_with_tablets.cc +++ b/src/yb/master/state_with_tablets.cc @@ -143,11 +143,11 @@ void StateWithTablets::Done(const TabletId& tablet_id, const Status& status) { it, [terminal_state = InitialStateToTerminalState(initial_state_)](TabletData& data) { data.state = terminal_state; }); - LOG(INFO) << "Finished " << InitialStateName() << " snapshot at " << tablet_id; + LOG(INFO) << "Finished " << InitialStateName() << " snapshot at " << tablet_id << ", " + << num_tablets_in_initial_state_ << " was running"; } else { auto full_status = status.CloneAndPrepend( Format("Failed to $0 snapshot at $1", InitialStateName(), tablet_id)); - LOG(WARNING) << full_status; bool terminal = IsTerminalFailure(status); tablets_.modify(it, [&full_status, terminal](TabletData& data) { if (terminal) { @@ -155,6 +155,8 @@ void StateWithTablets::Done(const TabletId& tablet_id, const Status& status) { } data.last_error = full_status; }); + LOG(WARNING) << full_status << ", terminal: " << terminal << ", " + << num_tablets_in_initial_state_ << " was running"; if (!terminal) { return; } diff --git a/src/yb/rocksdb/db.h b/src/yb/rocksdb/db.h index 166fae044b44..013e0e72a530 100644 --- a/src/yb/rocksdb/db.h +++ b/src/yb/rocksdb/db.h @@ -620,12 +620,13 @@ class DB { virtual Status SetOptions( ColumnFamilyHandle* /*column_family*/, - const std::unordered_map& /*new_options*/) { + const std::unordered_map& /*new_options*/, + bool dump_options = true) { return STATUS(NotSupported, "Not implemented"); } virtual Status SetOptions( - const std::unordered_map& new_options) { - return SetOptions(DefaultColumnFamily(), new_options); + const std::unordered_map& new_options, bool dump_options = true) { + return SetOptions(DefaultColumnFamily(), new_options, dump_options); } virtual void SetDisableFlushOnShutdown(bool disable_flush_on_shutdown) {} diff --git a/src/yb/rocksdb/db/db_impl.cc b/src/yb/rocksdb/db/db_impl.cc index e3c7bd9339a4..07b64cac0a73 100644 --- a/src/yb/rocksdb/db/db_impl.cc +++ b/src/yb/rocksdb/db/db_impl.cc @@ -2440,8 +2440,10 @@ void DBImpl::SetDisableFlushOnShutdown(bool disable_flush_on_shutdown) { } } -Status DBImpl::SetOptions(ColumnFamilyHandle* column_family, - const std::unordered_map& options_map) { +Status DBImpl::SetOptions( + ColumnFamilyHandle* column_family, + const std::unordered_map& options_map, + bool dump_options) { #ifdef ROCKSDB_LITE return STATUS(NotSupported, "Not supported in ROCKSDB LITE"); #else @@ -2474,17 +2476,15 @@ Status DBImpl::SetOptions(ColumnFamilyHandle* column_family, } RLOG(InfoLogLevel::INFO_LEVEL, db_options_.info_log, - "SetOptions() on column family [%s], inputs:", - cfd->GetName().c_str()); - for (const auto& o : options_map) { - RLOG(InfoLogLevel::INFO_LEVEL, db_options_.info_log, - "%s: %s\n", o.first.c_str(), o.second.c_str()); - } + "SetOptions() on column family [%s], inputs: %s", + cfd->GetName().c_str(), yb::AsString(options_map).c_str()); if (s.ok()) { RLOG(InfoLogLevel::INFO_LEVEL, db_options_.info_log, "[%s] SetOptions succeeded", cfd->GetName().c_str()); - new_options.Dump(db_options_.info_log.get()); + if (dump_options) { + new_options.Dump(db_options_.info_log.get()); + } if (!persist_options_status.ok()) { if (db_options_.fail_if_options_file_error) { s = STATUS(IOError, @@ -2962,7 +2962,7 @@ Status DBImpl::EnableAutoCompaction( Status s; for (auto cf_ptr : column_family_handles) { Status status = - this->SetOptions(cf_ptr, {{"disable_auto_compactions", "false"}}); + this->SetOptions(cf_ptr, {{"disable_auto_compactions", "false"}}, false); if (status.ok()) { ColumnFamilyData* cfd = down_cast(cf_ptr)->cfd(); InstrumentedMutexLock guard_lock(&mutex_); diff --git a/src/yb/rocksdb/db/db_impl.h b/src/yb/rocksdb/db/db_impl.h index e0bdb4375505..2b5b30061700 100644 --- a/src/yb/rocksdb/db/db_impl.h +++ b/src/yb/rocksdb/db/db_impl.h @@ -169,7 +169,8 @@ class DBImpl : public DB { using DB::SetOptions; Status SetOptions( ColumnFamilyHandle* column_family, - const std::unordered_map& options_map) override; + const std::unordered_map& options_map, + bool dump_options = true) override; // Set whether DB should be flushed on shutdown. void SetDisableFlushOnShutdown(bool disable_flush_on_shutdown) override; diff --git a/src/yb/rocksdb/utilities/stackable_db.h b/src/yb/rocksdb/utilities/stackable_db.h index 8a789264ed39..b7f05e70a211 100644 --- a/src/yb/rocksdb/utilities/stackable_db.h +++ b/src/yb/rocksdb/utilities/stackable_db.h @@ -319,9 +319,9 @@ class StackableDB : public DB { using DB::SetOptions; virtual Status SetOptions(ColumnFamilyHandle* column_family_handle, - const std::unordered_map& - new_options) override { - return db_->SetOptions(column_family_handle, new_options); + const std::unordered_map& new_options, + bool dump_options) override { + return db_->SetOptions(column_family_handle, new_options, dump_options); } using DB::GetPropertiesOfAllTables; diff --git a/src/yb/rpc/reactor.cc b/src/yb/rpc/reactor.cc index 1752e5c0321e..c52235b55a35 100644 --- a/src/yb/rpc/reactor.cc +++ b/src/yb/rpc/reactor.cc @@ -341,6 +341,8 @@ void Reactor::CheckReadyToStop() { // something to our attention, like the fact that we're shutting down, or the fact that there is a // new outbound Transfer ready to send. void Reactor::AsyncHandler(ev::async &watcher, int revents) { + VLOG_WITH_PREFIX_AND_FUNC(4) << "Events: " << revents; + DCHECK(IsCurrentThread()); auto se = ScopeExit([this] { @@ -763,6 +765,9 @@ void DelayedTask::Run(Reactor* reactor) { // will be requested in the middle of scheduling - task will be aborted right after return // from this method. std::lock_guard l(lock_); + + VLOG_WITH_PREFIX_AND_FUNC(4) << "Done: " << done_ << ", when: " << when_; + if (done_) { // Task has been aborted. return; @@ -802,6 +807,10 @@ std::string DelayedTask::ToString() const { void DelayedTask::AbortTask(const Status& abort_status) { auto mark_as_done_result = MarkAsDone(); + + VLOG_WITH_PREFIX_AND_FUNC(4) + << "Status: " << abort_status << ", " << AsString(mark_as_done_result); + if (mark_as_done_result == MarkAsDoneResult::kSuccess) { // Stop the libev timer. We don't need to do this in the kNotScheduled case, because the timer // has not started in that case. @@ -841,9 +850,9 @@ void DelayedTask::TimerHandler(ev::timer& watcher, int revents) { } // Hold shared_ptr, so this task wouldn't be destroyed upon removal below until func_ is called. - auto holder = shared_from_this(); + auto holder = shared_from(this); - reactor_->scheduled_tasks_.erase(shared_from(this)); + reactor_->scheduled_tasks_.erase(holder); if (messenger_ != nullptr) { messenger_->RemoveScheduledTask(id_); } @@ -851,8 +860,10 @@ void DelayedTask::TimerHandler(ev::timer& watcher, int revents) { if (EV_ERROR & revents) { std::string msg = "Delayed task got an error in its timer handler"; LOG(WARNING) << msg; + VLOG_WITH_PREFIX_AND_FUNC(4) << "Abort"; func_(STATUS(Aborted, msg)); } else { + VLOG_WITH_PREFIX_AND_FUNC(4) << "Execute"; func_(Status::OK()); } } diff --git a/src/yb/rpc/reactor.h b/src/yb/rpc/reactor.h index 6397d38bb979..848327f851d6 100644 --- a/src/yb/rpc/reactor.h +++ b/src/yb/rpc/reactor.h @@ -235,6 +235,10 @@ class DelayedTask : public ReactorTask { std::string ToString() const override; + std::string LogPrefix() const { + return ToString() + ": "; + } + private: void DoAbort(const Status& abort_status) override; diff --git a/src/yb/rpc/yb_rpc.cc b/src/yb/rpc/yb_rpc.cc index e161751b3221..dd8db6fa8e36 100644 --- a/src/yb/rpc/yb_rpc.cc +++ b/src/yb/rpc/yb_rpc.cc @@ -586,7 +586,7 @@ void YBOutboundConnectionContext::HandleTimeout(ev::timer& watcher, int revents) if (now > deadline) { auto passed = now - last_read_time_; const auto status = STATUS_FORMAT( - NetworkError, "Read timeout, passed: $0, timeout: $1, now: $2, last_read_time_: $3", + NetworkError, "Rpc timeout, passed: $0, timeout: $1, now: $2, last_read_time_: $3", passed, timeout, now, last_read_time_); LOG(WARNING) << connection->ToString() << ": " << status; connection->reactor()->DestroyConnection(connection.get(), status); diff --git a/src/yb/server/monitored_task.h b/src/yb/server/monitored_task.h index 671d6899ec8f..307c7bce891b 100644 --- a/src/yb/server/monitored_task.h +++ b/src/yb/server/monitored_task.h @@ -104,6 +104,10 @@ class MonitoredTask : public std::enable_shared_from_this { return false; } + std::string ToString() const { + return Format("{ type: $0 description: $1 }", type(), description()); + } + protected: static bool IsStateTerminal(MonitoredTaskState state) { return state == MonitoredTaskState::kComplete || diff --git a/src/yb/tablet/tablet.cc b/src/yb/tablet/tablet.cc index 9f14ed421a63..54caff5db545 100644 --- a/src/yb/tablet/tablet.cc +++ b/src/yb/tablet/tablet.cc @@ -780,7 +780,7 @@ Status Tablet::DoEnableCompactions() { }; if (regular_db_) { WARN_WITH_PREFIX_NOT_OK( - regular_db_->SetOptions(new_options), + regular_db_->SetOptions(new_options, /* dump_options= */ false), "Failed to set options on regular DB"); regular_db_status = regular_db_->EnableAutoCompaction({regular_db_->DefaultColumnFamily()}); @@ -791,7 +791,7 @@ Status Tablet::DoEnableCompactions() { } if (intents_db_) { WARN_WITH_PREFIX_NOT_OK( - intents_db_->SetOptions(new_options), + intents_db_->SetOptions(new_options, /* dump_options= */ false), "Failed to set options on provisional records DB"); Status intents_db_status = intents_db_->EnableAutoCompaction({intents_db_->DefaultColumnFamily()}); diff --git a/src/yb/tablet/tablet_snapshots.cc b/src/yb/tablet/tablet_snapshots.cc index 3b22afa090c0..1c21fe2e7728 100644 --- a/src/yb/tablet/tablet_snapshots.cc +++ b/src/yb/tablet/tablet_snapshots.cc @@ -33,6 +33,8 @@ #include "yb/util/scope_exit.h" #include "yb/util/trace.h" +using namespace std::literals; + namespace yb { namespace tablet { @@ -74,6 +76,8 @@ Status TabletSnapshots::Create(SnapshotOperationState* tx_state) { } Status TabletSnapshots::Create(const CreateSnapshotData& data) { + LongOperationTracker long_operation_tracker("Create snapshot", 5s); + ScopedRWOperation scoped_read_operation(&pending_op_counter()); RETURN_NOT_OK(scoped_read_operation); @@ -203,6 +207,8 @@ Status TabletSnapshots::Restore(SnapshotOperationState* tx_state) { const std::string snapshot_dir = VERIFY_RESULT(tx_state->GetSnapshotDir()); auto restore_at = HybridTime::FromPB(tx_state->request()->snapshot_hybrid_time()); + VLOG_WITH_PREFIX_AND_FUNC(1) << YB_STRUCT_TO_STRING(snapshot_dir, restore_at); + if (!snapshot_dir.empty()) { RETURN_NOT_OK_PREPEND( FileExists(&rocksdb_env(), snapshot_dir), @@ -230,6 +236,8 @@ Status TabletSnapshots::Restore(SnapshotOperationState* tx_state) { Status TabletSnapshots::RestoreCheckpoint( const std::string& dir, HybridTime restore_at, const RestoreMetadata& restore_metadata, const docdb::ConsensusFrontier& frontier) { + LongOperationTracker long_operation_tracker("Restore checkpoint", 5s); + // The following two lines can't just be changed to RETURN_NOT_OK(PauseReadWriteOperations()): // op_pause has to stay in scope until the end of the function. auto op_pause = PauseReadWriteOperations(); @@ -242,7 +250,6 @@ Status TabletSnapshots::RestoreCheckpoint( std::lock_guard lock(create_checkpoint_lock()); - const rocksdb::SequenceNumber sequence_number = regular_db().GetLatestSequenceNumber(); const string db_dir = regular_db().GetName(); const std::string intents_db_dir = has_intents_db() ? intents_db().GetName() : std::string(); @@ -293,9 +300,6 @@ Status TabletSnapshots::RestoreCheckpoint( } LOG_WITH_PREFIX(INFO) << "Checkpoint restored from " << dir; - LOG_WITH_PREFIX(INFO) << "Sequence numbers: old=" << sequence_number - << ", restored=" << regular_db().GetLatestSequenceNumber(); - LOG_WITH_PREFIX(INFO) << "Re-enabling compactions"; s = tablet().EnableCompactions(&op_pause); if (!s.ok()) { diff --git a/src/yb/tserver/tablet_service.cc b/src/yb/tserver/tablet_service.cc index 321a1fb6e546..873347836de2 100644 --- a/src/yb/tserver/tablet_service.cc +++ b/src/yb/tserver/tablet_service.cc @@ -912,16 +912,16 @@ void TabletServiceAdminImpl::AlterSchema(const ChangeMetadataRequestPB* req, // If the current schema is newer than the one in the request reject the request. if (schema_version > req->schema_version()) { - LOG(ERROR) << "Tablet " << req->tablet_id() << " has a newer schema " + LOG(ERROR) << "Tablet " << req->tablet_id() << " has a newer schema" << " version=" << schema_version << " req->schema_version()=" << req->schema_version() << "\n current-schema=" << tablet_schema.ToString() - << "\n request-schema=" << req_schema.ToString() << " (wtf?)"; + << "\n request-schema=" << req_schema.ToString(); SetupErrorAndRespond( resp->mutable_error(), STATUS_SUBSTITUTE( InvalidArgument, "Tablet has a newer schema Tab $0. Req $1 vs Existing version : $2", - req->tablet_id(), req->DebugString(), schema_version), + req->tablet_id(), req->schema_version(), schema_version), TabletServerErrorPB::TABLET_HAS_A_NEWER_SCHEMA, &context); return; } @@ -1238,6 +1238,7 @@ void TabletServiceAdminImpl::DeleteTablet(const DeleteTabletRequestPB* req, LOG(INFO) << "T " << req->tablet_id() << " P " << server_->permanent_uuid() << ": Processing DeleteTablet with delete_type " << TabletDataState_Name(delete_type) << (req->has_reason() ? (" (" + req->reason() + ")") : "") + << (req->hide_only() ? " (Hide only)" : "") << " from " << context.requestor_string(); VLOG(1) << "Full request: " << req->DebugString(); diff --git a/src/yb/util/operation_counter.h b/src/yb/util/operation_counter.h index f35714ed3711..53fa6fa4b882 100644 --- a/src/yb/util/operation_counter.h +++ b/src/yb/util/operation_counter.h @@ -20,13 +20,10 @@ #include "yb/util/cross_thread_mutex.h" #include "yb/util/debug-util.h" +#include "yb/util/debug/long_operation_tracker.h" #include "yb/util/monotime.h" #include "yb/util/result.h" -#ifndef NDEBUG -#include "yb/util/debug/long_operation_tracker.h" -#endif - namespace yb { YB_STRONGLY_TYPED_BOOL(Stop); diff --git a/src/yb/util/rwc_lock.cc b/src/yb/util/rwc_lock.cc index 5e9c919e65cc..fa46664a512e 100644 --- a/src/yb/util/rwc_lock.cc +++ b/src/yb/util/rwc_lock.cc @@ -53,7 +53,6 @@ RWCLock::RWCLock() write_locked_(false), last_writer_tid_(0), last_writelock_acquire_time_(0) { - last_writer_backtrace_[0] = '\0'; #endif // NDEBUG } @@ -93,12 +92,19 @@ void RWCLock::WriteLock() { MutexLock l(lock_); // Wait for any other mutations to finish. while (write_locked_) { +#ifndef NDEBUG + if (!no_mutators_.TimedWait(MonoDelta::FromSeconds(1))) { + LOG(WARNING) << "Too long write lock wait, last writer stack: " + << last_writer_stacktrace_.Symbolize(); + } +#else no_mutators_.Wait(); +#endif } #ifndef NDEBUG last_writelock_acquire_time_ = GetCurrentTimeMicros(); last_writer_tid_ = Thread::CurrentThreadId(); - HexStackTraceToString(last_writer_backtrace_, kBacktraceBufSize); + last_writer_stacktrace_.Collect(); #endif // NDEBUG write_locked_ = true; } @@ -108,7 +114,7 @@ void RWCLock::WriteUnlock() { DCHECK(write_locked_); write_locked_ = false; #ifndef NDEBUG - last_writer_backtrace_[0] = '\0'; + last_writer_stacktrace_.Reset(); #endif // NDEBUG no_mutators_.Signal(); } @@ -129,7 +135,7 @@ void RWCLock::CommitUnlock() { DCHECK_EQ(0, reader_count_); write_locked_ = false; #ifndef NDEBUG - last_writer_backtrace_[0] = '\0'; + last_writer_stacktrace_.Reset(); #endif // NDEBUG no_mutators_.Broadcast(); lock_.unlock(); diff --git a/src/yb/util/rwc_lock.h b/src/yb/util/rwc_lock.h index 28458b1cfa67..d37490d7aeeb 100644 --- a/src/yb/util/rwc_lock.h +++ b/src/yb/util/rwc_lock.h @@ -33,7 +33,9 @@ #define YB_UTIL_RWC_LOCK_H #include "yb/gutil/macros.h" + #include "yb/util/condition_variable.h" +#include "yb/util/debug-util.h" #include "yb/util/mutex.h" namespace yb { @@ -138,10 +140,9 @@ class RWCLock { bool write_locked_; #ifndef NDEBUG - static const int kBacktraceBufSize = 1024; int64_t last_writer_tid_; int64_t last_writelock_acquire_time_; - char last_writer_backtrace_[kBacktraceBufSize]; + StackTrace last_writer_stacktrace_; #endif // NDEBUG DISALLOW_COPY_AND_ASSIGN(RWCLock);