Skip to content

Commit

Permalink
Datashard: DISK_SPACE_EXHAUSTED error (#8318)
Browse files Browse the repository at this point in the history
  • Loading branch information
azevaykin authored Aug 27, 2024
1 parent cb032f1 commit ac92ce0
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 7 deletions.
1 change: 1 addition & 0 deletions ydb/core/kqp/executer_actor/kqp_data_executer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,7 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
case NKikimrDataEvents::TEvWriteResult::STATUS_ABORTED: {
return ReplyErrorAndDie(Ydb::StatusIds::ABORTED, issues);
}
case NKikimrDataEvents::TEvWriteResult::STATUS_DISK_SPACE_EXHAUSTED:
case NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR: {
return ReplyErrorAndDie(Ydb::StatusIds::INTERNAL_ERROR, issues);
}
Expand Down
14 changes: 14 additions & 0 deletions ydb/core/kqp/runtime/kqp_write_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,20 @@ class TKqpDirectWriteActor : public TActorBootstrapped<TKqpDirectWriteActor>, pu
}
return;
}
case NKikimrDataEvents::TEvWriteResult::STATUS_DISK_SPACE_EXHAUSTED: {
CA_LOG_E("Got DISK_SPACE_EXHAUSTED for table `"
<< SchemeEntry->TableId.PathId.ToString() << "`."
<< " ShardID=" << ev->Get()->Record.GetOrigin() << ","
<< " Sink=" << this->SelfId() << "."
<< getIssues().ToOneLineString());

RuntimeError(
TStringBuilder() << "Got DISK_SPACE_EXHAUSTED for table `"
<< SchemeEntry->TableId.PathId.ToString() << "`.",
NYql::NDqProto::StatusIds::PRECONDITION_FAILED,
getIssues());
return;
}
case NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED: {
CA_LOG_W("Got OVERLOADED for table `"
<< SchemeEntry->TableId.PathId.ToString() << "`."
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/protos/counters_datashard.proto
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ enum ECumulativeCounters {
COUNTER_WRITE_CANCELLED = 109 [(CounterOpts) = {Name: "WriteCancelled"}];
COUNTER_WRITE_ROWS = 110 [(CounterOpts) = {Name: "WriteRows"}];
COUNTER_WRITE_BYTES = 111 [(CounterOpts) = {Name: "WriteBytes"}];
COUNTER_WRITE_DISK_SPACE_EXHAUSTED = 112 [(CounterOpts) = {Name: "WriteDiskSpaceExhausted"}];
COUNTER_PREPARE_DISK_SPACE_EXHAUSTED = 113 [(CounterOpts) = {Name: "PrepareSpaceExhausted"}];
}

enum EPercentileCounters {
Expand Down
1 change: 1 addition & 0 deletions ydb/core/protos/data_events.proto
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ message TEvWriteResult {
STATUS_BAD_REQUEST = 7;
STATUS_SCHEME_CHANGED = 8;
STATUS_LOCKS_BROKEN = 9;
STATUS_DISK_SPACE_EXHAUSTED = 10;
}

// Status
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/tx/datashard/check_data_tx_unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,9 @@ EExecutionStatus TCheckDataTxUnit::Execute(TOperation::TPtr op,
// Updates are not allowed when database is out of space
TString err = "Cannot perform writes: database is out of disk space";

DataShard.IncCounter(COUNTER_PREPARE_OUT_OF_SPACE);
DataShard.IncCounter(COUNTER_PREPARE_DISK_SPACE_EXHAUSTED);

BuildResult(op)->AddError(NKikimrTxDataShard::TError::OUT_OF_SPACE, err);
BuildResult(op)->AddError(NKikimrTxDataShard::TError::DISK_SPACE_EXHAUSTED, err);
op->Abort(EExecutionUnitKind::FinishPropose);

LOG_LOG_S_THROTTLE(DataShard.GetLogThrottler(TDataShard::ELogThrottlerType::CheckDataTxUnit_Execute), ctx, NActors::NLog::PRI_ERROR, NKikimrServices::TX_DATASHARD, err);
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/tx/datashard/check_write_unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ EExecutionStatus TCheckWriteUnit::Execute(TOperation::TPtr op,
// Updates are not allowed when database is out of space
TString err = "Cannot perform writes: database is out of disk space";

DataShard.IncCounter(COUNTER_WRITE_OUT_OF_SPACE);
DataShard.IncCounter(COUNTER_WRITE_DISK_SPACE_EXHAUSTED);

writeOp->SetError(NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, err);
writeOp->SetError(NKikimrDataEvents::TEvWriteResult::STATUS_DISK_SPACE_EXHAUSTED, err);
op->Abort(EExecutionUnitKind::FinishProposeWrite);

DataShard.SetOverloadSubscribed(writeOp->GetWriteTx()->GetOverloadSubscribe(), writeOp->GetRecipient(), op->GetTarget(), ERejectReasons::YellowChannels, writeOp->GetWriteResult()->Record);
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/tx/datashard/datashard__op_rows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ static bool MaybeReject(TDataShard* self, TEvRequest& ev, const TActorContext& c
Reject<TEvResponse, TEvRequest>(self, ev, txDesc, rejectReasons, rejectDescription, &OutOfSpace, ctx, logThrottlerType);
return true;
} else if (self->IsSubDomainOutOfSpace()) {
self->IncCounter(COUNTER_PREPARE_OUT_OF_SPACE);
self->IncCounter(COUNTER_PREPARE_DISK_SPACE_EXHAUSTED);
rejectReasons = ERejectReasons::DiskSpace;
rejectDescription = "Cannot perform writes: database is out of disk space";
Reject<TEvResponse, TEvRequest>(self, ev, txDesc, rejectReasons, rejectDescription, &DiskSpaceExhausted, ctx, logThrottlerType);
Expand Down
3 changes: 2 additions & 1 deletion ydb/core/tx/datashard/datashard__write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,9 @@ NKikimrDataEvents::TEvWriteResult::EStatus NEvWrite::TConvertor::ConvertErrCode(
case NKikimrTxDataShard::TError_EKind_SCHEME_CHANGED:
return NKikimrDataEvents::TEvWriteResult::STATUS_SCHEME_CHANGED;
case NKikimrTxDataShard::TError_EKind_OUT_OF_SPACE:
case NKikimrTxDataShard::TError_EKind_DISK_SPACE_EXHAUSTED:
return NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED;
case NKikimrTxDataShard::TError_EKind_DISK_SPACE_EXHAUSTED:
return NKikimrDataEvents::TEvWriteResult::STATUS_DISK_SPACE_EXHAUSTED;
default:
return NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR;
}
Expand Down
8 changes: 7 additions & 1 deletion ydb/core/tx/datashard/datashard_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -3276,7 +3276,13 @@ class TDataShard
ev->Record.MutableTableStats()->SetImmediateTxCompleted(TabletCounters->Cumulative()[COUNTER_PREPARE_IMMEDIATE].Get() + TabletCounters->Cumulative()[COUNTER_WRITE_IMMEDIATE].Get());
ev->Record.MutableTableStats()->SetPlannedTxCompleted(TabletCounters->Cumulative()[COUNTER_PLANNED_TX_COMPLETE].Get());
ev->Record.MutableTableStats()->SetTxRejectedByOverload(TabletCounters->Cumulative()[COUNTER_PREPARE_OVERLOADED].Get() + TabletCounters->Cumulative()[COUNTER_WRITE_OVERLOADED].Get());
ev->Record.MutableTableStats()->SetTxRejectedBySpace(TabletCounters->Cumulative()[COUNTER_PREPARE_OUT_OF_SPACE].Get() + TabletCounters->Cumulative()[COUNTER_WRITE_OUT_OF_SPACE].Get());
ev->Record.MutableTableStats()->SetTxRejectedBySpace(
TabletCounters->Cumulative()[COUNTER_PREPARE_OUT_OF_SPACE].Get()
+ TabletCounters->Cumulative()[COUNTER_PREPARE_DISK_SPACE_EXHAUSTED].Get()
+ TabletCounters->Cumulative()[COUNTER_WRITE_OUT_OF_SPACE].Get()
+ TabletCounters->Cumulative()[COUNTER_WRITE_DISK_SPACE_EXHAUSTED].Get()
);

ev->Record.MutableTableStats()->SetTxCompleteLagMsec(TabletCounters->Simple()[COUNTER_TX_COMPLETE_LAG].Get());
ev->Record.MutableTableStats()->SetInFlightTxCount(TabletCounters->Simple()[COUNTER_TX_IN_FLY].Get()
+ TabletCounters->Simple()[COUNTER_IMMEDIATE_TX_IN_FLY].Get());
Expand Down

0 comments on commit ac92ce0

Please sign in to comment.