Skip to content

Commit

Permalink
schemeshard: preserialize Table.SplitBoundary for describe result (yd…
Browse files Browse the repository at this point in the history
…b-platform#6648)

Preserialize table's split boundaries the same way table partitions are. The size of both depend on the same variable: number of shards in the table, but TablePartitions was preserialized (and cached) while Table.SplitBoundaries wasn't. Preserializing all potentially huge parts of DescribeSchemeResult message before it gets to the interconnect saves interconnect actors additional serialization costs. And when partitioning of the huge tables goes through the period of a rapid change that additional serialization causes interconnect to overload.

Single shortcoming though: preserialized SplitBoundary is not used (cannot be used) when boundaries of the index tables are requested through describe request on table index.

KIKIMR-21686
  • Loading branch information
ijon authored Jul 16, 2024
1 parent c5d412c commit 83a86c2
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ TVector<ISubOperation::TPtr> CreateAlterContinuousBackup(TOperationId opId, cons
const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry;

NKikimrSchemeOp::TTableDescription schema;
context.SS->DescribeTable(table, typeRegistry, true, false, &schema);
context.SS->DescribeTable(table, typeRegistry, true, &schema);
schema.MutablePartitionConfig()->CopyFrom(table->TableDescription.GetPartitionConfig());

TString errStr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void PrepareScheme(NKikimrSchemeOp::TTableDescription* schema, const TString& na
const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry;

NKikimrSchemeOp::TTableDescription completedSchema;
context.SS->DescribeTable(srcTableInfo, typeRegistry, true, false, &completedSchema);
context.SS->DescribeTable(srcTableInfo, typeRegistry, true, &completedSchema);
completedSchema.SetName(name);

//inherit all from Src except PartitionConfig, PartitionConfig could be altered
Expand Down
5 changes: 3 additions & 2 deletions ydb/core/tx/schemeshard/schemeshard_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,8 +598,9 @@ void TSchemeShard::ClearDescribePathCaches(const TPathElement::TPtr node, bool f
} else if (node->PathType == NKikimrSchemeOp::EPathType::EPathTypeTable) {
Y_ABORT_UNLESS(Tables.contains(node->PathId));
TTableInfo::TPtr tabletInfo = Tables.at(node->PathId);
tabletInfo->PreSerializedPathDescription.clear();
tabletInfo->PreSerializedPathDescriptionWithoutRangeKey.clear();
tabletInfo->PreserializedTablePartitions.clear();
tabletInfo->PreserializedTablePartitionsNoKeys.clear();
tabletInfo->PreserializedTableSplitBoundaries.clear();
}
}

Expand Down
3 changes: 1 addition & 2 deletions ydb/core/tx/schemeshard/schemeshard_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ class TSchemeShard
void FillAsyncIndexInfo(const TPathId& tableId, NKikimrTxDataShard::TFlatSchemeTransaction& tx);

void DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry,
bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const;
bool fillConfig, NKikimrSchemeOp::TTableDescription* entry) const;
void DescribeTableIndex(const TPathId& pathId, const TString& name,
bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry
) const;
Expand All @@ -1031,7 +1031,6 @@ class TSchemeShard
void DescribeReplication(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TReplicationDescription& desc);
void DescribeReplication(const TPathId& pathId, const TString& name, TReplicationInfo::TPtr info, NKikimrSchemeOp::TReplicationDescription& desc);
void DescribeBlobDepot(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TBlobDepotDescription& desc);
static void FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField<NKikimrSchemeOp::TSplitBoundary>& boundaries);

void Handle(NKikimr::NOlap::NBackground::TEvExecuteGeneralLocalTransaction::TPtr& ev, const TActorContext& ctx);
void Handle(NKikimr::NOlap::NBackground::TEvRemoveSession::TPtr& ev, const TActorContext& ctx);
Expand Down
13 changes: 7 additions & 6 deletions ydb/core/tx/schemeshard/schemeshard_info_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,10 +362,10 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData(
const TTableInfo::TColumn& sourceColumn = source->Columns[colId];

if (col.HasDefaultFromSequence()) {
if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64
if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64
&& NPg::PgTypeIdFromTypeDesc(sourceColumn.PType.GetTypeDesc()) != INT8OID) {
TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg
? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID))
TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg
? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID))
: NScheme::TypeName(NScheme::NTypeIds::Int64);
errStr = Sprintf(
"Sequence value type '%s' must be equal to the column type '%s'", sequenceType.c_str(),
Expand Down Expand Up @@ -423,7 +423,7 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData(
return nullptr;
default:
break;
}
}
}
} else {
auto* typeDesc = NPg::TypeDescFromPgTypeName(typeName);
Expand Down Expand Up @@ -1586,8 +1586,9 @@ void TTableInfo::SetPartitioning(TVector<TTableShardInfo>&& newPartitioning) {
Stats.PartitionStats.swap(newPartitionStats);
Stats.Aggregated = newAggregatedStats;
Partitions.swap(newPartitioning);
PreSerializedPathDescription.clear();
PreSerializedPathDescriptionWithoutRangeKey.clear();
PreserializedTablePartitions.clear();
PreserializedTablePartitionsNoKeys.clear();
PreserializedTableSplitBoundaries.clear();

CondEraseSchedule.clear();
InFlightCondErase.clear();
Expand Down
7 changes: 5 additions & 2 deletions ydb/core/tx/schemeshard/schemeshard_info_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,8 +436,11 @@ struct TTableInfo : public TSimpleRefCount<TTableInfo> {
TMap<TTxId, TBackupRestoreResult> BackupHistory;
TMap<TTxId, TBackupRestoreResult> RestoreHistory;

TString PreSerializedPathDescription;
TString PreSerializedPathDescriptionWithoutRangeKey;
// Preserialized TDescribeSchemeResult with PathDescription.TablePartitions field filled
TString PreserializedTablePartitions;
TString PreserializedTablePartitionsNoKeys;
// Preserialized TDescribeSchemeResult with PathDescription.Table.SplitBoundary field filled
TString PreserializedTableSplitBoundaries;

THashMap<TShardIdx, NKikimrSchemeOp::TPartitionConfig> PerShardPartitionConfig;

Expand Down
169 changes: 97 additions & 72 deletions ydb/core/tx/schemeshard/schemeshard_path_describer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,68 @@ void TPathDescriber::DescribeDir(const TPath& path) {
DescribeChildren(path);
}

void FillTableBoundaries(
google::protobuf::RepeatedPtrField<NKikimrSchemeOp::TSplitBoundary>* result,
const TTableInfo::TPtr tableInfo
) {
TString errStr;
// Number of split boundaries equals to number of partitions - 1
result->Reserve(tableInfo->GetPartitions().size() - 1);
for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) {
const auto& p = tableInfo->GetPartitions()[pi];
TSerializedCellVec endKey(p.EndOfRange);
auto boundary = result->Add()->MutableKeyPrefix();
for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){
const auto& c = endKey.GetCells()[ki];
auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType;
bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr);
Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data());
}
}
}

void FillTablePartitions(
google::protobuf::RepeatedPtrField<NKikimrSchemeOp::TTablePartition>* result,
const TTableInfo::TPtr tableInfo,
const THashMap<TShardIdx, TShardInfo>& shardInfos,
bool includeKeys
) {
result->Reserve(tableInfo->GetPartitions().size());
for (auto& p : tableInfo->GetPartitions()) {
const auto& tabletId = ui64(shardInfos.at(p.ShardIdx).TabletID);
const auto& key = p.EndOfRange;

auto part = result->Add();
part->SetDatashardId(tabletId);
if (includeKeys) {
// Currently we only support uniform partitioning where each range is [start, end)
// +inf as the end of the last range is represented by empty TCell vector
part->SetIsPoint(false);
part->SetIsInclusive(false);
part->SetEndOfRangeKeyPrefix(key);
}
}
}

const TString& GetSerializedTablePartitions(
const TTableInfo::TPtr tableInfo,
const THashMap<TShardIdx, TShardInfo>& shardInfos,
bool returnRangeKey
) {
TString& cache = (returnRangeKey
? tableInfo->PreserializedTablePartitions
: tableInfo->PreserializedTablePartitionsNoKeys
);

if (cache.empty()) {
NKikimrScheme::TEvDescribeSchemeResult result;
FillTablePartitions(result.MutablePathDescription()->MutableTablePartitions(), tableInfo, shardInfos, returnRangeKey);
Y_PROTOBUF_SUPPRESS_NODISCARD result.SerializeToString(&cache);
}

return cache;
}

void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPathElement::TPtr pathEl) {
const NScheme::TTypeRegistry* typeRegistry = AppData(ctx)->TypeRegistry;
const TTableInfo::TPtr tableInfo = *Self->Tables.FindPtr(pathId);
Expand All @@ -244,50 +306,30 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa
returnRangeKey = Params.GetOptions().GetReturnRangeKey();
}

Self->DescribeTable(tableInfo, typeRegistry, returnConfig, returnBoundaries, entry);
Self->DescribeTable(tableInfo, typeRegistry, returnConfig, entry);
entry->SetName(pathEl->Name);

if (returnPartitioning) {
// partitions
if (tableInfo->PreSerializedPathDescription.empty()) {
if (returnBoundaries) {
// split boundaries (split keys without shard's tablet-ids)
if (tableInfo->PreserializedTableSplitBoundaries.empty()) {
NKikimrScheme::TEvDescribeSchemeResult preSerializedResult;
NKikimrScheme::TEvDescribeSchemeResult preSerializedResultWithoutRangeKey;

NKikimrSchemeOp::TPathDescription& pathDescription = *preSerializedResult.MutablePathDescription();
NKikimrSchemeOp::TPathDescription& pathDescriptionWithoutRangeKey = *preSerializedResultWithoutRangeKey.MutablePathDescription();

pathDescription.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size());
pathDescriptionWithoutRangeKey.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size());
for (auto& p : tableInfo->GetPartitions()) {
auto part = pathDescription.AddTablePartitions();
auto partWithoutRangeKey = pathDescriptionWithoutRangeKey.AddTablePartitions();
auto datashardIdx = p.ShardIdx;
auto datashardTabletId = Self->ShardInfos[datashardIdx].TabletID;
// Currently we only support uniform partitioning where each range is [start, end)
// +inf as the end of the last range is represented by empty TCell vector
part->SetDatashardId(ui64(datashardTabletId));
partWithoutRangeKey->SetDatashardId(ui64(datashardTabletId));

part->SetIsPoint(false);
partWithoutRangeKey->SetIsPoint(false);

part->SetIsInclusive(false);
partWithoutRangeKey->SetIsInclusive(false);

part->SetEndOfRangeKeyPrefix(p.EndOfRange);
}
Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreSerializedPathDescription);
Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResultWithoutRangeKey.SerializeToString(&tableInfo->PreSerializedPathDescriptionWithoutRangeKey);
}
if (returnRangeKey) {
Result->PreSerializedData += tableInfo->PreSerializedPathDescription;
} else {
Result->PreSerializedData += tableInfo->PreSerializedPathDescriptionWithoutRangeKey;
}
if (!pathEl->IsCreateFinished()) {
tableInfo->PreSerializedPathDescription.clear(); // KIKIMR-4337
tableInfo->PreSerializedPathDescriptionWithoutRangeKey.clear();
auto& tableDesc = *preSerializedResult.MutablePathDescription()->MutableTable();
FillTableBoundaries(tableDesc.MutableSplitBoundary(), tableInfo);
Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreserializedTableSplitBoundaries);
}
Result->PreSerializedData += tableInfo->PreserializedTableSplitBoundaries;
}

if (returnPartitioning) {
// partitions (shard tablet-ids with range keys)
Result->PreSerializedData += GetSerializedTablePartitions(tableInfo, Self->ShardInfos, returnRangeKey);
}

// KIKIMR-4337: table info is in flux until table is finally created
if (!pathEl->IsCreateFinished()) {
tableInfo->PreserializedTablePartitions.clear();
tableInfo->PreserializedTablePartitionsNoKeys.clear();
tableInfo->PreserializedTableSplitBoundaries.clear();
}

FillAggregatedStats(*Result->Record.MutablePathDescription(), tableInfo->GetStats());
Expand Down Expand Up @@ -1128,8 +1170,12 @@ THolder<TEvSchemeShard::TEvDescribeSchemeResultBuilder> DescribePath(
return DescribePath(self, ctx, pathId, options);
}

void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry,
bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const
void TSchemeShard::DescribeTable(
const TTableInfo::TPtr tableInfo,
const NScheme::TTypeRegistry* typeRegistry,
bool fillConfig,
NKikimrSchemeOp::TTableDescription* entry
) const
{
Y_UNUSED(typeRegistry);
THashMap<ui32, TString> familyNames;
Expand Down Expand Up @@ -1198,10 +1244,6 @@ void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme
FillPartitionConfig(tableInfo->PartitionConfig(), *entry->MutablePartitionConfig());
}

if (fillBoundaries) {
FillTableBoundaries(tableInfo, *entry->MutableSplitBoundary());
}

if (tableInfo->HasTTLSettings()) {
entry->MutableTTLSettings()->CopyFrom(tableInfo->TTLSettings());
}
Expand Down Expand Up @@ -1244,32 +1286,32 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name
*entry.MutableDataColumnNames()->Add() = dataColumns;
}

auto* indexPath = PathsById.FindPtr(pathId);
auto indexPath = *PathsById.FindPtr(pathId);
Y_ABORT_UNLESS(indexPath);
const ui8 expectedIndexImplTableCount = indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree ? 2 : 1;
Y_ABORT_UNLESS((*indexPath)->GetChildren().size() == expectedIndexImplTableCount);
Y_ABORT_UNLESS(indexPath->GetChildren().size() == expectedIndexImplTableCount);

ui64 dataSize = 0;
for (const auto& indexImplTablePathId : (*indexPath)->GetChildren()) {
auto* tableInfo = Tables.FindPtr(indexImplTablePathId.second);
for (const auto& indexImplTablePathId : indexPath->GetChildren()) {
auto tableInfo = *Tables.FindPtr(indexImplTablePathId.second);
Y_ABORT_UNLESS(tableInfo);

const auto& tableStats = (*tableInfo)->GetStats().Aggregated;
const auto& tableStats = tableInfo->GetStats().Aggregated;
dataSize += tableStats.DataSize + tableStats.IndexSize;

auto* tableDescription = entry.AddIndexImplTableDescriptions();
if (fillConfig) {
FillPartitionConfig((*tableInfo)->PartitionConfig(), *tableDescription->MutablePartitionConfig());
FillPartitionConfig(tableInfo->PartitionConfig(), *tableDescription->MutablePartitionConfig());
}
if (fillBoundaries) {
FillTableBoundaries(*tableInfo, *tableDescription->MutableSplitBoundary());
FillTableBoundaries(tableDescription->MutableSplitBoundary(), tableInfo);
}
}
entry.SetDataSize(dataSize);

if (indexInfo->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) {
if (const auto* vectorIndexKmeansTreeDescription = std::get_if<NKikimrSchemeOp::TVectorIndexKmeansTreeDescription>(&indexInfo->SpecializedIndexDescription)) {
const auto& indexInfoSettings = vectorIndexKmeansTreeDescription->GetSettings();
const auto& indexInfoSettings = vectorIndexKmeansTreeDescription->GetSettings();
auto entrySettings = entry.MutableVectorIndexKmeansTreeDescription()->MutableSettings();
if (indexInfoSettings.has_distance())
entrySettings->set_distance(indexInfoSettings.distance());
Expand All @@ -1283,7 +1325,7 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name
Y_FAIL_S("SpecializedIndexDescription should be set");
}
}

}

void TSchemeShard::DescribeCdcStream(const TPathId& pathId, const TString& name,
Expand Down Expand Up @@ -1429,22 +1471,5 @@ void TSchemeShard::DescribeBlobDepot(const TPathId& pathId, const TString& name,
desc.SetTabletId(static_cast<ui64>(it->second->BlobDepotTabletId));
}

void TSchemeShard::FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField<NKikimrSchemeOp::TSplitBoundary>& boundaries) {
TString errStr;
// Number of split boundaries equals to number of partitions - 1
boundaries.Reserve(tableInfo->GetPartitions().size() - 1);
for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) {
const auto& p = tableInfo->GetPartitions()[pi];
TSerializedCellVec endKey(p.EndOfRange);
auto boundary = boundaries.Add()->MutableKeyPrefix();
for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){
const auto& c = endKey.GetCells()[ki];
auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType;
bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr);
Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data());
}
}
}

} // NSchemeShard
} // NKikimr

0 comments on commit 83a86c2

Please sign in to comment.