Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix reporting of initial VDisk status to SysView #8853

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ydb/core/mind/bscontroller/bsc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
auto prevStaticVSlots = std::exchange(StaticVSlots, {});
StaticVDiskMap.clear();

const TMonotonic mono = TActivationContext::Monotonic();

if (StorageConfig.HasBlobStorageConfig()) {
if (const auto& bsConfig = StorageConfig.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
const auto& ss = bsConfig.GetServiceSet();
Expand All @@ -143,7 +145,7 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
const auto& location = vslot.GetVDiskLocation();
const TPDiskId pdiskId(location.GetNodeID(), location.GetPDiskID());
const TVSlotId vslotId(pdiskId, location.GetVDiskSlotID());
StaticVSlots.try_emplace(vslotId, vslot, prevStaticVSlots);
StaticVSlots.try_emplace(vslotId, vslot, prevStaticVSlots, mono);
const TVDiskID& vdiskId = VDiskIDFromVDiskID(vslot.GetVDiskID());
StaticVDiskMap.emplace(vdiskId, vslotId);
StaticVDiskMap.emplace(TVDiskID(vdiskId.GroupID, 0, vdiskId), vslotId);
Expand Down
5 changes: 4 additions & 1 deletion ydb/core/mind/bscontroller/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ namespace NKikimr {

// when the config cmd received
const TInstant Timestamp;
const TMonotonic Mono;

// various settings from controller
const bool DonorMode;
Expand All @@ -124,7 +125,8 @@ namespace NKikimr {
bool PushStaticGroupsToSelfHeal = false;

public:
TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp)
TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp,
TMonotonic mono)
: Self(controller)
, HostConfigs(&controller.HostConfigs)
, Boxes(&controller.Boxes)
Expand All @@ -142,6 +144,7 @@ namespace NKikimr {
, NextStoragePoolId(&controller.NextStoragePoolId)
, HostRecords(hostRecords)
, Timestamp(timestamp)
, Mono(mono)
, DonorMode(controller.DonorMode)
, DefaultMaxSlots(controller.DefaultMaxSlots)
, StaticVSlots(controller.StaticVSlots)
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/bscontroller/config_cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ namespace NKikimr::NBsController {
Response->MutableStatus()->RemoveLast();
}

State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
State->CheckConsistency();

TString m;
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/bscontroller/config_fit_groups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,7 @@ namespace NKikimr {
groupInfo->ID, 0, groupInfo->Generation, StoragePool.VDiskKind, failRealmIdx,
failDomainIdx, vdiskIdx, TMood::Normal, groupInfo, &VSlotReadyTimestampQ,
TInstant::Zero(), TDuration::Zero());
vslotInfo->VDiskStatusTimestamp = State.Mono;

// mark as uncommitted
State.UncommittedVSlots.insert(vslotId);
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/bscontroller/drop_donor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class TBlobStorageController::TTxDropDonor
TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_DROP_DONOR; }

bool Execute(TTransactionContext &txc, const TActorContext&) override {
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
State->CheckConsistency();
for (const TVSlotId& vslotId : VSlotIds) {
if (const TVSlotInfo *vslot = State->VSlots.Find(vslotId); vslot && !vslot->IsBeingDeleted()) {
Expand Down
8 changes: 5 additions & 3 deletions ydb/core/mind/bscontroller/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa

public:
std::optional<NKikimrBlobStorage::EVDiskStatus> VDiskStatus;
NHPTimer::STime VDiskStatusTimestamp = GetCycleCountFast();
TMonotonic VDiskStatusTimestamp;
bool IsReady = false;
bool OnlyPhantomsRemain = false;

Expand Down Expand Up @@ -2308,11 +2308,11 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa

std::optional<NKikimrBlobStorage::TVDiskMetrics> VDiskMetrics;
std::optional<NKikimrBlobStorage::EVDiskStatus> VDiskStatus;
NHPTimer::STime VDiskStatusTimestamp = GetCycleCountFast();
TMonotonic VDiskStatusTimestamp;
TMonotonic ReadySince = TMonotonic::Max(); // when IsReady becomes true for this disk; Max() in non-READY state

TStaticVSlotInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk,
std::map<TVSlotId, TStaticVSlotInfo>& prev)
std::map<TVSlotId, TStaticVSlotInfo>& prev, TMonotonic mono)
: VDiskId(VDiskIDFromVDiskID(vdisk.GetVDiskID()))
, VDiskKind(vdisk.GetVDiskKind())
{
Expand All @@ -2324,6 +2324,8 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
VDiskStatus = item.VDiskStatus;
VDiskStatusTimestamp = item.VDiskStatusTimestamp;
ReadySince = item.ReadySince;
} else {
VDiskStatusTimestamp = mono;
}
}
};
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/mind/bscontroller/load_everything.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBase<TBlobS
}

// VSlots
const TMonotonic mono = TActivationContext::Monotonic();
Self->VSlots.clear();
{
using T = Schema::VSlot;
Expand All @@ -374,6 +375,7 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBase<TBlobS
if (x.LastSeenReady != TInstant::Zero()) {
Self->NotReadyVSlotIds.insert(x.VSlotId);
}
x.VDiskStatusTimestamp = mono;

if (!slot.Next()) {
return false;
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/bscontroller/node_report.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class TBlobStorageController::TTxNodeReport
return true;
}

State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
State->CheckConsistency();

NIceDb::TNiceDb db(txc.DB);
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/bscontroller/register_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class TBlobStorageController::TTxUpdateNodeDrives
bool Execute(TTransactionContext& txc, const TActorContext&) override {
const TNodeId nodeId = Record.GetNodeId();

State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
State->CheckConsistency();

auto updateIsSuccessful = true;
Expand Down
26 changes: 19 additions & 7 deletions ydb/core/mind/bscontroller/sys_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,8 @@ void CopyInfo(NKikimrSysView::TPDiskInfo* info, const THolder<TBlobStorageContro
}

void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId, const NKikimrBlobStorage::TVDiskMetrics& m,
std::optional<NKikimrBlobStorage::EVDiskStatus> status, NHPTimer::STime statusTimestamp,
NKikimrBlobStorage::TVDiskKind::EVDiskKind kind, bool isBeingDeleted) {
std::optional<NKikimrBlobStorage::EVDiskStatus> status, NKikimrBlobStorage::TVDiskKind::EVDiskKind kind,
bool isBeingDeleted) {
pb->SetGroupId(vdiskId.GroupID.GetRawId());
pb->SetGroupGeneration(vdiskId.GroupGeneration);
pb->SetFailRealm(vdiskId.FailRealm);
Expand All @@ -338,9 +338,6 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId,
if (m.HasAvailableSize()) {
pb->SetAvailableSize(m.GetAvailableSize());
}
if (!status && CyclesToDuration(GetCycleCountFast() - statusTimestamp) > TDuration::Seconds(15)) {
status = NKikimrBlobStorage::EVDiskStatus::ERROR;
}
if (status) {
pb->SetStatusV2(NKikimrBlobStorage::EVDiskStatus_Name(*status));
}
Expand All @@ -352,7 +349,7 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId,

void CopyInfo(NKikimrSysView::TVSlotInfo* info, const THolder<TBlobStorageController::TVSlotInfo>& vSlotInfo) {
SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->VDiskStatus,
vSlotInfo->VDiskStatusTimestamp, vSlotInfo->Kind, vSlotInfo->IsBeingDeleted());
vSlotInfo->Kind, vSlotInfo->IsBeingDeleted());
}

void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder<TBlobStorageController::TGroupInfo>& groupInfo) {
Expand Down Expand Up @@ -428,6 +425,21 @@ void TBlobStorageController::UpdateSystemViews() {
return;
}

const TMonotonic now = TActivationContext::Monotonic();
const TDuration expiration = TDuration::Seconds(15);
for (auto& [key, value] : VSlots) {
if (!value->VDiskStatus && value->VDiskStatusTimestamp + expiration <= now) {
value->VDiskStatus = NKikimrBlobStorage::ERROR;
SysViewChangedVSlots.insert(key);
}
}
for (auto& [key, value] : StaticVSlots) {
if (!value.VDiskStatus && value.VDiskStatusTimestamp + expiration <= now) {
value.VDiskStatus = NKikimrBlobStorage::ERROR;
SysViewChangedVSlots.insert(key);
}
}

if (!SysViewChangedPDisks.empty() || !SysViewChangedVSlots.empty() || !SysViewChangedGroups.empty() ||
!SysViewChangedStoragePools.empty() || SysViewChangedSettings) {
auto update = MakeHolder<TEvControllerUpdateSystemViews>();
Expand Down Expand Up @@ -468,7 +480,7 @@ void TBlobStorageController::UpdateSystemViews() {
if (SysViewChangedVSlots.count(vslotId)) {
static const NKikimrBlobStorage::TVDiskMetrics zero;
SerializeVSlotInfo(&state.VSlots[vslotId], vslot.VDiskId, vslot.VDiskMetrics ? *vslot.VDiskMetrics : zero,
vslot.VDiskStatus, vslot.VDiskStatusTimestamp, vslot.VDiskKind, false);
vslot.VDiskStatus, vslot.VDiskKind, false);
}
}
if (StorageConfig.HasBlobStorageConfig()) {
Expand Down
6 changes: 3 additions & 3 deletions ydb/core/mind/bscontroller/virtual_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ namespace NKikimr::NBsController {
if (const TGroupInfo *group = Self->FindGroup(GroupId); !group || group->VirtualGroupSetupMachineId != MachineId) {
return true; // another machine is already running
}
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
TGroupInfo *group = State->Groups.FindForUpdate(GroupId);
Y_ABORT_UNLESS(group);
if (!Callback(*group, *State)) {
Expand Down Expand Up @@ -294,7 +294,7 @@ namespace NKikimr::NBsController {
if (Token.expired()) {
return true; // actor is already dead
}
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
const size_t n = State->BlobDepotDeleteQueue.Unshare().erase(GroupId);
Y_ABORT_UNLESS(n == 1);
TString error;
Expand Down Expand Up @@ -897,7 +897,7 @@ namespace NKikimr::NBsController {
TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_DECOMMIT_GROUP; }

bool Execute(TTransactionContext& txc, const TActorContext&) override {
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
Action(*State);
TString error;
if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) {
Expand Down
Loading