Skip to content

Commit

Permalink
Add builtins for struct operations (#4058)
Browse files Browse the repository at this point in the history
  • Loading branch information
avevad authored May 7, 2024
1 parent e3a8844 commit 6dceba0
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 0 deletions.
140 changes: 140 additions & 0 deletions ydb/library/yql/core/type_ann/type_ann_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6074,6 +6074,142 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
return IGraphTransformer::TStatus::Ok;
}

IGraphTransformer::TStatus StructMergeWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
auto maxArgc = (input->Content() == "StructDifference" || input->Content() == "StructSymmetricDifference") ? 2 : 3;
if (!EnsureMinMaxArgsCount(*input, 2, maxArgc, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}

auto left = input->Child(0);
auto right = input->Child(1);

if (HasError(left->GetTypeAnn(), ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}

if (HasError(right->GetTypeAnn(), ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}

if (!EnsureStructType(*left, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
auto leftType = left->GetTypeAnn()->Cast<TStructExprType>();

if (!EnsureStructType(*right, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
auto rightType = right->GetTypeAnn()->Cast<TStructExprType>();

TExprNode::TPtr mergeLambda = nullptr;
if (input->ChildrenSize() == 3) {
mergeLambda = input->ChildPtr(2);
auto status = ConvertToLambda(mergeLambda, ctx.Expr, 3);
if (status.Level != IGraphTransformer::TStatus::Ok) {
return status;
}
} else {
mergeLambda = ctx.Expr.Builder(input->Pos())
.Lambda()
.Param("name")
.Param("left")
.Param("right")
.Callable("Unwrap")
.Callable(0, "Coalesce")
.Arg(0, "left")
.Arg(1, "right")
.Seal()
.Seal()
.Seal()
.Build();
}

auto buildJustMember = [&ctx, &input](const TExprNode::TPtr &st, const TStringBuf& name) -> TExprNode::TPtr {
return ctx.Expr.Builder(input->Pos())
.Callable("Just")
.Callable(0, "Member")
.Add(0, st)
.Atom(1, name)
.Seal()
.Seal()
.Build();
};

auto mergeMembers = [&ctx, &buildJustMember, &input, &left, &right, &mergeLambda](const TStringBuf& name, bool hasLeft, bool hasRight) -> TExprNode::TPtr {
auto leftMaybe = hasLeft ?
buildJustMember(left, name) :
ctx.Expr.NewCallable(input->Pos(), "Nothing", {
ExpandType(input->Pos(), *ctx.Expr.MakeType<TOptionalExprType>(right->GetTypeAnn()->Cast<TStructExprType>()->FindItemType(name)), ctx.Expr)
});

auto rightMaybe = hasRight ?
buildJustMember(right, name) :
ctx.Expr.NewCallable(input->Pos(), "Nothing", {
ExpandType(input->Pos(), *ctx.Expr.MakeType<TOptionalExprType>(left->GetTypeAnn()->Cast<TStructExprType>()->FindItemType(name)), ctx.Expr)
});

return ctx.Expr.Builder(input->Pos())
.List()
.Atom(0, name)
.Apply(1, mergeLambda)
.With(0)
.Callable("String")
.Atom(0, name)
.Seal()
.Done()
.With(1, leftMaybe)
.With(2, rightMaybe)
.Seal()
.Seal()
.Build();
};

TExprNode::TListType children;

bool isUnion = input->Content() == "StructUnion";
bool isIntersection = input->Content() == "StructIntersection";
bool isDifference = input->Content() == "StructDifference";
bool isSymmDifference = input->Content() == "StructSymmetricDifference";

for (const auto* leftItem : leftType->GetItems()) {
const auto& name = leftItem->GetName();
if (isUnion) {
if (rightType->FindItem(name)) {
children.push_back(mergeMembers(name, true, true));
} else {
children.push_back(mergeMembers(name, true, false));
}
}
if (isIntersection) {
if (rightType->FindItem(name)) {
children.push_back(mergeMembers(name, true, true));
}
}
if (isDifference || isSymmDifference) {
if (!rightType->FindItem(name)) {
children.push_back(mergeMembers(name, true, false));
}
}
}

for (const auto* rightItem : rightType->GetItems()) {
const auto& name = rightItem->GetName();
if (isUnion) {
if (!leftType->FindItem(name)) {
children.push_back(mergeMembers(name, false, true));
}
}
if (isSymmDifference) {
if (!leftType->FindItem(name)) {
children.push_back(mergeMembers(name, false, true));
}
}
}

output = ctx.Expr.NewCallable(input->Pos(), "AsStruct", std::move(children));
return IGraphTransformer::TStatus::Repeat;
}

IGraphTransformer::TStatus StaticMapWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
if (!EnsureArgsCount(*input, 2, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
Expand Down Expand Up @@ -12238,6 +12374,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
Functions["PgGrouping"] = &PgGroupingWrapper;
Functions["PgGroupingSet"] = &PgGroupingSetWrapper;
Functions["PgToRecord"] = &PgToRecordWrapper;
Functions["StructUnion"] = &StructMergeWrapper;
Functions["StructIntersection"] = &StructMergeWrapper;
Functions["StructDifference"] = &StructMergeWrapper;
Functions["StructSymmetricDifference"] = &StructMergeWrapper;

Functions["AutoDemux"] = &AutoDemuxWrapper;
Functions["AggrCountInit"] = &AggrCountInitWrapper;
Expand Down
4 changes: 4 additions & 0 deletions ydb/library/yql/sql/v1/builtin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3070,6 +3070,10 @@ struct TBuiltinFuncData {
{"flattenmembers", BuildNamedBuiltinFactoryCallback<TFlattenMembers>("FlattenMembers")},
{"staticmap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticMap", 2, 2) },
{"staticzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticZip", 1, -1) },
{"structunion", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructUnion", 2, 3)},
{"structintersection", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructIntersection", 2, 3)},
{"structdifference", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructDifference", 2, 2)},
{"structsymmetricdifference", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructSymmetricDifference", 2, 2)},
{"staticfold", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticFold", 3, 3)},
{"staticfold1", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticFold1", 3, 3)},

Expand Down
22 changes: 22 additions & 0 deletions ydb/library/yql/tests/sql/dq_file/part12/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,28 @@
"uri": "file://test.test_expr-non_persistable_group_by_column_fail--Results_/extracted"
}
],
"test.test[expr-struct_merge-default.txt-Analyze]": [
{
"checksum": "b4dd508a329723c74293d80f0278c705",
"size": 505,
"uri": "https://{canondata_backend}/1775059/79f40817d9be6347f8a0a937bdd3c46c326ab7d3/resource.tar.gz#test.test_expr-struct_merge-default.txt-Analyze_/plan.txt"
}
],
"test.test[expr-struct_merge-default.txt-Debug]": [
{
"checksum": "7ed8bb90b0fd6a7a9c734d5e24ec3a79",
"size": 867,
"uri": "https://{canondata_backend}/1689644/d939c79f1c25569f7b8f4e5b740e070ad72d7ad7/resource.tar.gz#test.test_expr-struct_merge-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[expr-struct_merge-default.txt-Plan]": [
{
"checksum": "b4dd508a329723c74293d80f0278c705",
"size": 505,
"uri": "https://{canondata_backend}/1775059/79f40817d9be6347f8a0a937bdd3c46c326ab7d3/resource.tar.gz#test.test_expr-struct_merge-default.txt-Plan_/plan.txt"
}
],
"test.test[expr-struct_merge-default.txt-Results]": [],
"test.test[flatten_by-flatten_one_field--Analyze]": [
{
"checksum": "dfeb435c40cd1a0a98c74310e1507366",
Expand Down
14 changes: 14 additions & 0 deletions ydb/library/yql/tests/sql/hybrid_file/part6/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,20 @@
"uri": "https://{canondata_backend}/1775059/3cb7d014d70b84dbcb84645fa987dd9d47d7fd6c/resource.tar.gz#test.test_expr-many_opt_comp-default.txt-Plan_/plan.txt"
}
],
"test.test[expr-struct_merge-default.txt-Debug]": [
{
"checksum": "ab77466270296597939428807f0af395",
"size": 866,
"uri": "https://{canondata_backend}/1942415/ecf45b8d311b13ba55e2de94295cabed9b642863/resource.tar.gz#test.test_expr-struct_merge-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[expr-struct_merge-default.txt-Plan]": [
{
"checksum": "b4dd508a329723c74293d80f0278c705",
"size": 505,
"uri": "https://{canondata_backend}/1936842/e15468da5c6a430935df259a2106604daa68ad66/resource.tar.gz#test.test_expr-struct_merge-default.txt-Plan_/plan.txt"
}
],
"test.test[expr-uuid_order-default.txt-Debug]": [
{
"checksum": "dd888f0b22d793979dbf237917d203dd",
Expand Down
14 changes: 14 additions & 0 deletions ydb/library/yql/tests/sql/sql2yql/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -5991,6 +5991,13 @@
"uri": "https://{canondata_backend}/1784117/d56ae82ad9d30397a41490647be1bd2124718f98/resource.tar.gz#test_sql2yql.test_expr-struct_literal_members_/sql.yql"
}
],
"test_sql2yql.test[expr-struct_merge]": [
{
"checksum": "e3781bd00212a17b07691294caa0c1b0",
"size": 3095,
"uri": "https://{canondata_backend}/1916746/116cafe28e270e7917dbeab5e0d1b5f2357e5c16/resource.tar.gz#test_sql2yql.test_expr-struct_merge_/sql.yql"
}
],
"test_sql2yql.test[expr-struct_slice]": [
{
"checksum": "4d0f79865e785d3f3b0e9e0110bb1f86",
Expand Down Expand Up @@ -24443,6 +24450,13 @@
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_expr-struct_literal_members_/formatted.sql"
}
],
"test_sql_format.test[expr-struct_merge]": [
{
"checksum": "509cfc4518e9c467b2cd05a5e568c00b",
"size": 413,
"uri": "https://{canondata_backend}/1916746/116cafe28e270e7917dbeab5e0d1b5f2357e5c16/resource.tar.gz#test_sql_format.test_expr-struct_merge_/formatted.sql"
}
],
"test_sql_format.test[expr-struct_slice]": [
{
"checksum": "8a9f027371f1722b5753e5b53cf62346",
Expand Down
14 changes: 14 additions & 0 deletions ydb/library/yql/tests/sql/suites/expr/struct_merge.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* syntax version 1 */

$merge = ($_name, $l, $r) -> { return Coalesce($l, 0) + Coalesce($r, 0); };
$left = <|a: 1, b: 2, c: 3|>;
$right = <|c: 1, d: 2, e: 3|>;

SELECT
StructUnion($left, $right),
StructUnion($left, $right, $merge),
StructIntersection($left, $right),
StructIntersection($left, $right, $merge),
StructDifference($left, $right),
StructSymmetricDifference($left, $right)
;
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,27 @@
"uri": "file://test.test_expr-non_persistable_group_by_column_fail--Results_/extracted"
}
],
"test.test[expr-struct_merge-default.txt-Debug]": [
{
"checksum": "54a15a62a123d1a72d9190f26324aa13",
"size": 796,
"uri": "https://{canondata_backend}/1784826/6c4ac0f02ea872d52d4b59ee9d0f2b2963fe6800/resource.tar.gz#test.test_expr-struct_merge-default.txt-Debug_/opt.yql"
}
],
"test.test[expr-struct_merge-default.txt-Plan]": [
{
"checksum": "b4dd508a329723c74293d80f0278c705",
"size": 505,
"uri": "https://{canondata_backend}/1881367/84017fd57088f9d554efcf1a44f82b22e5b164b7/resource.tar.gz#test.test_expr-struct_merge-default.txt-Plan_/plan.txt"
}
],
"test.test[expr-struct_merge-default.txt-Results]": [
{
"checksum": "83bf749394c8035e5f04d3cf2e23c44c",
"size": 9246,
"uri": "https://{canondata_backend}/1784826/6c4ac0f02ea872d52d4b59ee9d0f2b2963fe6800/resource.tar.gz#test.test_expr-struct_merge-default.txt-Results_/results.txt"
}
],
"test.test[flatten_by-flatten_one_field--Debug]": [
{
"checksum": "1e1f4bdf8614f3314eb9a5b53d01d8db",
Expand Down

0 comments on commit 6dceba0

Please sign in to comment.