Skip to content

Commit

Permalink
Query: Adds Distribution for MakeList and MakeSet (#4490)
Browse files Browse the repository at this point in the history
* added MakeList and MakeSet aggregators

* Added MakeList and MakeSet to AggregateQueryTests.cs

* Adjust Aggregators

* Add Array Aggregate Continuation Token Test

* Added group by coverage for MakeList and MakeSet

* address comments

* cleaning

* Refactored test to better detect when to ignore result order

* cleaning

* cleaning, update baseline

* cleaning

* removed old comment

* cleaning/refactoring

* cleaning

* Add explicit cases to hit continuation token limit.

* Added additional case to GroupBy tests

* cleaning

* cleaning

* cleaning, updated baseline test

* cleaning, updated baseline test

* Added coverage to QueryPlanBaselineTests.cs

* refactored
  • Loading branch information
ezrahaleva-msft committed Jun 20, 2024
1 parent 272e844 commit 179d9a4
Show file tree
Hide file tree
Showing 10 changed files with 1,466 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ internal enum AggregateOperator
{
Average,
Count,
MakeList,
MakeSet,
Max,
Min,
Sum,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Query.Core.Pipeline.Aggregate.Aggregators
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Text;
using Microsoft.Azure.Cosmos.CosmosElements;
using Microsoft.Azure.Cosmos.CosmosElements.Numbers;
using Microsoft.Azure.Cosmos.Query.Core.Exceptions;
using Microsoft.Azure.Cosmos.Query.Core.Monads;

internal sealed class MakeListAggregator : IAggregator
{
private readonly List<CosmosElement> globalList;

private MakeListAggregator(CosmosArray initialList)
{
this.globalList = new List<CosmosElement>();

this.Aggregate(initialList);
}

public void Aggregate(CosmosElement localList)
{
if (!(localList is CosmosArray cosmosArray))
{
throw new ArgumentException($"{nameof(localList)} must be an array.");
}

this.globalList.AddRange(cosmosArray.ToList<CosmosElement>());
}

public CosmosElement GetResult()
{
return CosmosArray.Create(this.globalList);
}

public static TryCatch<IAggregator> TryCreate(CosmosElement continuationToken)
{
CosmosArray partialList;
if (continuationToken != null)
{
if (!(continuationToken is CosmosArray cosmosPartialList))
{
return TryCatch<IAggregator>.FromException(
new MalformedContinuationTokenException($@"Invalid MakeList continuation token: ""{continuationToken}""."));
}

partialList = cosmosPartialList;
}
else
{
partialList = CosmosArray.Empty;
}

return TryCatch<IAggregator>.FromResult(new MakeListAggregator(initialList: partialList));
}

public CosmosElement GetCosmosElementContinuationToken()
{
return this.GetResult();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Query.Core.Pipeline.Aggregate.Aggregators
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Text;
using Microsoft.Azure.Cosmos.CosmosElements;
using Microsoft.Azure.Cosmos.CosmosElements.Numbers;
using Microsoft.Azure.Cosmos.Query.Core.Exceptions;
using Microsoft.Azure.Cosmos.Query.Core.Monads;

internal sealed class MakeSetAggregator : IAggregator
{
private readonly HashSet<CosmosElement> globalSet;

private MakeSetAggregator(CosmosArray initialSet)
{
this.globalSet = new HashSet<CosmosElement>();

this.Aggregate(initialSet);
}

public void Aggregate(CosmosElement localSet)
{
if (!(localSet is CosmosArray cosmosArray))
{
throw new ArgumentException($"{nameof(localSet)} must be an array.");
}

this.globalSet.UnionWith(cosmosArray.ToList<CosmosElement>());
}

public CosmosElement GetResult()
{
return CosmosArray.Create(this.globalSet);
}

public string GetContinuationToken()
{
return this.globalSet.ToString();
}

public static TryCatch<IAggregator> TryCreate(CosmosElement continuationToken)
{
CosmosArray partialSet;
if (continuationToken != null)
{
if (!(continuationToken is CosmosArray cosmosPartialSet))
{
return TryCatch<IAggregator>.FromException(
new MalformedContinuationTokenException($@"Invalid MakeSet continuation token: ""{continuationToken}""."));
}

partialSet = cosmosPartialSet;
}
else
{
partialSet = CosmosArray.Empty;
}

return TryCatch<IAggregator>.FromResult(new MakeSetAggregator(initialSet: partialSet));
}

public CosmosElement GetCosmosElementContinuationToken()
{
return this.GetResult();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,14 @@ public static TryCatch<AggregateValue> TryCreate(
tryCreateAggregator = CountAggregator.TryCreate(continuationToken);
break;

case AggregateOperator.MakeList:
tryCreateAggregator = MakeListAggregator.TryCreate(continuationToken);
break;

case AggregateOperator.MakeSet:
tryCreateAggregator = MakeSetAggregator.TryCreate(continuationToken);
break;

case AggregateOperator.Max:
tryCreateAggregator = MinMaxAggregator.TryCreateMaxAggregator(continuationToken);
break;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Results>
<Results>
<Result>
<Query><![CDATA[SELECT VALUE AVG (c.field)
FROM c
Expand Down Expand Up @@ -509,6 +509,210 @@ FROM c
WHERE c.key = "undefinedKey"]]></Query>
<Aggregation><![CDATA[0]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE IS_ARRAY(c.field)]]></Query>
<Aggregation><![CDATA[[[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE IS_BOOL(c.field)]]></Query>
<Aggregation><![CDATA[[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE IS_NULL(c.field)]]></Query>
<Aggregation><![CDATA[[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE IS_NUMBER(c.field)]]></Query>
<Aggregation><![CDATA[[8204,8204,14799,14799,19377,19377,21344,21344,27832,27832,28384,28384,31256,31256,39908,39908,50445,50445,59354,59354,62645,62645,63899,63899,68915,68915,70086,70086,75246,75246,80799,80799,88308,88308,92569,92569,94673,94673,94726,94726]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE IS_OBJECT(c.field)]]></Query>
<Aggregation><![CDATA[[{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE IS_STRING(c.field)]]></Query>
<Aggregation><![CDATA[["0.12541667890055883","0.12541667890055883","0.17028699776636763","0.17028699776636763","0.21411905540810855","0.21411905540810855","0.30774512202839605","0.30774512202839605","0.31831863583918596","0.31831863583918596","0.33943602458547617","0.33943602458547617","0.35054585866189836","0.35054585866189836","0.3936524989053851","0.3936524989053851","0.4173777645534732","0.4173777645534732","0.43862057451094527","0.43862057451094527","0.4645141444469402","0.4645141444469402","0.49734567501458604","0.49734567501458604","0.5207309469211525","0.5207309469211525","0.529118959107026","0.529118959107026","0.5440849543288746","0.5440849543288746","0.5854627101614432","0.5854627101614432","0.8055444181922564","0.8055444181922564","0.8419260577493934","0.8419260577493934","0.8958994657247791","0.8958994657247791","0.917828386145564","0.917828386145564"]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE IS_DEFINED(c.field)]]></Query>
<Aggregation><![CDATA[[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,8204,8204,14799,14799,19377,19377,21344,21344,27832,27832,28384,28384,31256,31256,39908,39908,50445,50445,59354,59354,62645,62645,63899,63899,68915,68915,70086,70086,75246,75246,80799,80799,88308,88308,92569,92569,94673,94673,94726,94726,"0.12541667890055883","0.12541667890055883","0.17028699776636763","0.17028699776636763","0.21411905540810855","0.21411905540810855","0.30774512202839605","0.30774512202839605","0.31831863583918596","0.31831863583918596","0.33943602458547617","0.33943602458547617","0.35054585866189836","0.35054585866189836","0.3936524989053851","0.3936524989053851","0.4173777645534732","0.4173777645534732","0.43862057451094527","0.43862057451094527","0.4645141444469402","0.4645141444469402","0.49734567501458604","0.49734567501458604","0.5207309469211525","0.5207309469211525","0.529118959107026","0.529118959107026","0.5440849543288746","0.5440849543288746","0.5854627101614432","0.5854627101614432","0.8055444181922564","0.8055444181922564","0.8419260577493934","0.8419260577493934","0.8958994657247791","0.8958994657247791","0.917828386145564","0.917828386145564",[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE IS_PRIMITIVE(c.field)]]></Query>
<Aggregation><![CDATA[[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,8204,8204,14799,14799,19377,19377,21344,21344,27832,27832,28384,28384,31256,31256,39908,39908,50445,50445,59354,59354,62645,62645,63899,63899,68915,68915,70086,70086,75246,75246,80799,80799,88308,88308,92569,92569,94673,94673,94726,94726,"0.12541667890055883","0.12541667890055883","0.17028699776636763","0.17028699776636763","0.21411905540810855","0.21411905540810855","0.30774512202839605","0.30774512202839605","0.31831863583918596","0.31831863583918596","0.33943602458547617","0.33943602458547617","0.35054585866189836","0.35054585866189836","0.3936524989053851","0.3936524989053851","0.4173777645534732","0.4173777645534732","0.43862057451094527","0.43862057451094527","0.4645141444469402","0.4645141444469402","0.49734567501458604","0.49734567501458604","0.5207309469211525","0.5207309469211525","0.529118959107026","0.529118959107026","0.5440849543288746","0.5440849543288746","0.5854627101614432","0.5854627101614432","0.8055444181922564","0.8055444181922564","0.8419260577493934","0.8419260577493934","0.8958994657247791","0.8958994657247791","0.917828386145564","0.917828386145564"]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "doubleOnly"]]></Query>
<Aggregation><![CDATA[[8204,14799,19377,21344,27832,28384,31256,39908,50445,59354,62645,63899,68915,70086,75246,80799,88308,92569,94673,94726]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "stringOnly"]]></Query>
<Aggregation><![CDATA[["0.12541667890055883","0.17028699776636763","0.21411905540810855","0.30774512202839605","0.31831863583918596","0.33943602458547617","0.35054585866189836","0.3936524989053851","0.4173777645534732","0.43862057451094527","0.4645141444469402","0.49734567501458604","0.5207309469211525","0.529118959107026","0.5440849543288746","0.5854627101614432","0.8055444181922564","0.8419260577493934","0.8958994657247791","0.917828386145564"]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "boolOnly"]]></Query>
<Aggregation><![CDATA[[false,false,false,false,false,false,false,false,false,false,false,true,true,true,true,true,true,true,true,true]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "nullOnly"]]></Query>
<Aggregation><![CDATA[[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "objectOnlyKey"]]></Query>
<Aggregation><![CDATA[[{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "arrayOnlyKey"]]></Query>
<Aggregation><![CDATA[[[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "oneArrayKey"]]></Query>
<Aggregation><![CDATA[[[]]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "oneObjectKey"]]></Query>
<Aggregation><![CDATA[[{}]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKELIST (c.field)
FROM c
WHERE c.key = "undefinedKey"]]></Query>
<Aggregation><![CDATA[[]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE IS_ARRAY(c.field)]]></Query>
<Aggregation><![CDATA[[[]]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE IS_BOOL(c.field)]]></Query>
<Aggregation><![CDATA[[false,true]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE IS_NULL(c.field)]]></Query>
<Aggregation><![CDATA[[null]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE IS_NUMBER(c.field)]]></Query>
<Aggregation><![CDATA[[8204,14799,19377,21344,27832,28384,31256,39908,50445,59354,62645,63899,68915,70086,75246,80799,88308,92569,94673,94726]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE IS_OBJECT(c.field)]]></Query>
<Aggregation><![CDATA[[{}]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE IS_STRING(c.field)]]></Query>
<Aggregation><![CDATA[["0.12541667890055883","0.17028699776636763","0.21411905540810855","0.30774512202839605","0.31831863583918596","0.33943602458547617","0.35054585866189836","0.3936524989053851","0.4173777645534732","0.43862057451094527","0.4645141444469402","0.49734567501458604","0.5207309469211525","0.529118959107026","0.5440849543288746","0.5854627101614432","0.8055444181922564","0.8419260577493934","0.8958994657247791","0.917828386145564"]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE IS_DEFINED(c.field)]]></Query>
<Aggregation><![CDATA[[null,false,true,8204,14799,19377,21344,27832,28384,31256,39908,50445,59354,62645,63899,68915,70086,75246,80799,88308,92569,94673,94726,"0.12541667890055883","0.17028699776636763","0.21411905540810855","0.30774512202839605","0.31831863583918596","0.33943602458547617","0.35054585866189836","0.3936524989053851","0.4173777645534732","0.43862057451094527","0.4645141444469402","0.49734567501458604","0.5207309469211525","0.529118959107026","0.5440849543288746","0.5854627101614432","0.8055444181922564","0.8419260577493934","0.8958994657247791","0.917828386145564",[],{}]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE IS_PRIMITIVE(c.field)]]></Query>
<Aggregation><![CDATA[[null,false,true,8204,14799,19377,21344,27832,28384,31256,39908,50445,59354,62645,63899,68915,70086,75246,80799,88308,92569,94673,94726,"0.12541667890055883","0.17028699776636763","0.21411905540810855","0.30774512202839605","0.31831863583918596","0.33943602458547617","0.35054585866189836","0.3936524989053851","0.4173777645534732","0.43862057451094527","0.4645141444469402","0.49734567501458604","0.5207309469211525","0.529118959107026","0.5440849543288746","0.5854627101614432","0.8055444181922564","0.8419260577493934","0.8958994657247791","0.917828386145564"]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "doubleOnly"]]></Query>
<Aggregation><![CDATA[[8204,14799,19377,21344,27832,28384,31256,39908,50445,59354,62645,63899,68915,70086,75246,80799,88308,92569,94673,94726]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "stringOnly"]]></Query>
<Aggregation><![CDATA[["0.12541667890055883","0.17028699776636763","0.21411905540810855","0.30774512202839605","0.31831863583918596","0.33943602458547617","0.35054585866189836","0.3936524989053851","0.4173777645534732","0.43862057451094527","0.4645141444469402","0.49734567501458604","0.5207309469211525","0.529118959107026","0.5440849543288746","0.5854627101614432","0.8055444181922564","0.8419260577493934","0.8958994657247791","0.917828386145564"]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "boolOnly"]]></Query>
<Aggregation><![CDATA[[false,true]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "nullOnly"]]></Query>
<Aggregation><![CDATA[[null]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "objectOnlyKey"]]></Query>
<Aggregation><![CDATA[[{}]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "arrayOnlyKey"]]></Query>
<Aggregation><![CDATA[[[]]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "oneArrayKey"]]></Query>
<Aggregation><![CDATA[[[]]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "oneObjectKey"]]></Query>
<Aggregation><![CDATA[[{}]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MAKESET (c.field)
FROM c
WHERE c.key = "undefinedKey"]]></Query>
<Aggregation><![CDATA[[]]]></Aggregation>
</Result>
<Result>
<Query><![CDATA[SELECT VALUE MIN (c.field)
FROM c
Expand Down
Loading

0 comments on commit 179d9a4

Please sign in to comment.