From cabae657f13cd408793856861c6431c9cbb45545 Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Wed, 18 Sep 2024 08:55:34 -0400 Subject: [PATCH 1/9] feat: add a SetMode option to the SetRel spec to allow encoding of distinct/all SQL modifiers --- proto/substrait/algebra.proto | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index eed7bcc79..bffa5524c 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -328,6 +328,7 @@ message SetRel { // inputs. There must be at least two inputs. repeated Rel inputs = 2; SetOp op = 3; + SetMode mode = 4; substrait.extensions.AdvancedExtension advanced_extension = 10; enum SetOp { @@ -338,6 +339,13 @@ message SetRel { SET_OP_INTERSECTION_MULTISET = 4; SET_OP_UNION_DISTINCT = 5; SET_OP_UNION_ALL = 6; + SET_OP_UNION = 7; + } + + enum SetMode { + SET_OP_MODE_UNSPECIFIED = 0; + SET_OP_MODE_DISTINCT = 1; + SET_OP_MODE_ALL = 2; } } From 45013dcac703b3d70e9311666a71ef8571074a12 Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Wed, 18 Sep 2024 13:55:13 -0400 Subject: [PATCH 2/9] deprecated flag --- proto/substrait/algebra.proto | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index bffa5524c..f84bcecb9 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -337,8 +337,10 @@ message SetRel { SET_OP_MINUS_MULTISET = 2; SET_OP_INTERSECTION_PRIMARY = 3; SET_OP_INTERSECTION_MULTISET = 4; - SET_OP_UNION_DISTINCT = 5; - SET_OP_UNION_ALL = 6; + // Deprecated: Use SET_OP_UNION in conjunction with the mode field + // (SET_OP_MODE_DISTINCT or SET_OP_MODE_ALL) instead. + SET_OP_UNION_DISTINCT = 5 [deprecated = true]; + SET_OP_UNION_ALL = 6 [deprecated = true]; SET_OP_UNION = 7; } From fd8c0c05f8e2344538537a863211f0e90556d9d2 Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Wed, 18 Sep 2024 16:13:33 -0400 Subject: [PATCH 3/9] buf lint failures --- proto/substrait/algebra.proto | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index f84bcecb9..c0e83da67 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -345,9 +345,9 @@ message SetRel { } enum SetMode { - SET_OP_MODE_UNSPECIFIED = 0; - SET_OP_MODE_DISTINCT = 1; - SET_OP_MODE_ALL = 2; + SET_MODE_UNSPECIFIED = 0; + SET_MODE_DISTINCT = 1; + SET_MODE_ALL = 2; } } From d745059e029c2ad142b437afe63b3ebebe88d456 Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Thu, 19 Sep 2024 12:24:27 -0400 Subject: [PATCH 4/9] comments --- proto/substrait/algebra.proto | 16 ++++------------ site/docs/relations/logical_relations.md | 8 ++++++-- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index c0e83da67..440c173b2 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -328,7 +328,6 @@ message SetRel { // inputs. There must be at least two inputs. repeated Rel inputs = 2; SetOp op = 3; - SetMode mode = 4; substrait.extensions.AdvancedExtension advanced_extension = 10; enum SetOp { @@ -337,17 +336,10 @@ message SetRel { SET_OP_MINUS_MULTISET = 2; SET_OP_INTERSECTION_PRIMARY = 3; SET_OP_INTERSECTION_MULTISET = 4; - // Deprecated: Use SET_OP_UNION in conjunction with the mode field - // (SET_OP_MODE_DISTINCT or SET_OP_MODE_ALL) instead. - SET_OP_UNION_DISTINCT = 5 [deprecated = true]; - SET_OP_UNION_ALL = 6 [deprecated = true]; - SET_OP_UNION = 7; - } - - enum SetMode { - SET_MODE_UNSPECIFIED = 0; - SET_MODE_DISTINCT = 1; - SET_MODE_ALL = 2; + SET_OP_UNION_DISTINCT = 5; + SET_OP_UNION_ALL = 6; + SET_OP_MINUS_DISTINCT = 7; + SET_OP_INTERSECTION_DISTINCT = 8; } } diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index d71b57d2f..57838748a 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -270,12 +270,14 @@ The set operation type determines both the records that are emitted and the type | Property | Description | Output Nullability | ----------------------- | ------------------------------------------------------------------------------------------------------------- | ----------------------------- | -| Minus (Primary) | Returns all records from the primary input excluding any matching records from secondary inputs. | The same as the primary input. +| Minus (Primary) | Returns all records (including duplicates) from the primary input excluding any matching records from secondary inputs. | The same as the primary input. | Minus (Multiset) | Returns all records from the primary input excluding any records that are included in *all* secondary inputs. | The same as the primary input. -| Intersection (Primary) | Returns all records from the primary input that match at least one record from *any* secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Intersection (Primary) | Returns all records (including duplicates) from the primary input that match at least one record from *any* secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. | Intersection (Multiset) | Returns all records from the primary input that match at least one record from *all* secondary inputs. | If a field is required in any of the inputs, it is required in the output. | Union Distinct | Returns all the records from each set, removing any rows that are duplicated (within or across sets). | If a field is nullable in any of the inputs, it is nullable in the output. | Union All | Returns all records from each set, allowing duplicates. | If a field is nullable in any of the inputs, it is nullable in the output. | +| Minus Distinct | Returns all records (excluding duplicates) from the primary input excluding any matching records from secondary inputs. | The same as the primary input. | +| Intersection Distinct | Returns all records (excluding duplicates) from the primary input that match any records in secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. | Note that for set operations, NULL matches NULL. That is ``` @@ -302,6 +304,8 @@ The output type is as follows for the various operations | Intersection (Multiset) | (R, R, R, R, R, R, R, N) | Union Distinct | (R, N, N, N, N, N, N, N) | Union All | (R, N, N, N, N, N, N, N) +| Minus Distinct | (R, R, R, R, N, N, N, N) +| Intersection Distinct | (R, R, R, R, R, N, N, N) === "SetRel Message" From 638aa002fc23a6ef341682594614a742362e752d Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Fri, 20 Sep 2024 10:48:19 -0400 Subject: [PATCH 5/9] separate all/distinct enum values --- proto/substrait/algebra.proto | 6 ++++-- site/docs/relations/logical_relations.md | 24 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 440c173b2..8d394cedf 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -338,8 +338,10 @@ message SetRel { SET_OP_INTERSECTION_MULTISET = 4; SET_OP_UNION_DISTINCT = 5; SET_OP_UNION_ALL = 6; - SET_OP_MINUS_DISTINCT = 7; - SET_OP_INTERSECTION_DISTINCT = 8; + SET_OP_MINUS_PRIMARY_ALL = 7; + SET_OP_MINUS_MULTISET_ALL = 8; + SET_OP_INTERSECTION_PRIMARY_ALL = 9; + SET_OP_INTERSECTION_MULTISET_ALL = 10; } } diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index 57838748a..c2159b61e 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -270,14 +270,22 @@ The set operation type determines both the records that are emitted and the type | Property | Description | Output Nullability | ----------------------- | ------------------------------------------------------------------------------------------------------------- | ----------------------------- | -| Minus (Primary) | Returns all records (including duplicates) from the primary input excluding any matching records from secondary inputs. | The same as the primary input. -| Minus (Multiset) | Returns all records from the primary input excluding any records that are included in *all* secondary inputs. | The same as the primary input. -| Intersection (Primary) | Returns all records (including duplicates) from the primary input that match at least one record from *any* secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. -| Intersection (Multiset) | Returns all records from the primary input that match at least one record from *all* secondary inputs. | If a field is required in any of the inputs, it is required in the output. -| Union Distinct | Returns all the records from each set, removing any rows that are duplicated (within or across sets). | If a field is nullable in any of the inputs, it is nullable in the output. -| Union All | Returns all records from each set, allowing duplicates. | If a field is nullable in any of the inputs, it is nullable in the output. | -| Minus Distinct | Returns all records (excluding duplicates) from the primary input excluding any matching records from secondary inputs. | The same as the primary input. | -| Intersection Distinct | Returns all records (excluding duplicates) from the primary input that match any records in secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. | +| Minus (Primary) | Returns all records (removing duplicates) from the primary input excluding any matching records from secondary inputs. | The same as the primary input. +| Minus (Primary All) | Returns all records (allowing duplicates) from the primary input excluding any matching records from secondary inputs. | The same as the primary input. +| Minus (Multiset) | Returns all records (removing duplicates) from the primary input excluding any records that are included in *all* secondary inputs. | The same as the primary input. +| Minus (Multiset All) | Returns all records (allowing duplicates) from the primary input excluding any records that are included in *all* secondary inputs. | The same as the primary input. +| Intersection (Primary) | Returns all records (removing duplicates) from the primary input that match at least one record from *any* secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Intersection (Primary All) | Returns all records (allowing duplicates) from the primary input that match at least one record from *any* secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Intersection (Multiset) | Returns all records (removing duplicates) from the primary input that match at least one record from *all* secondary inputs. | If a field is required in any of the inputs, it is required in the output. +| Intersection (Multiset All) | Returns all records (allowing duplicates) from the primary input that match at least one record from *all* secondary inputs. | If a field is required in any of the inputs, it is required in the output. +| Union Distinct | Returns all the records from each set, removing any rows that are duplicated (within or across sets). | If a field is nullable in any of the inputs, it is nullable in the output. +| Union All | Returns all records from each set, allowing duplicates. | If a field is nullable in any of the inputs, it is nullable in the output. | + +Intersection All looks at all occurrences of values in both tables, but the output is limited by the smallest number of matches between the two: +``` +{1, 3, 2, 2, 2} INTERSECTION ALL {1, 1, 2, 3, 2} === {1, 3, 2, 2} +{1, 3, 2, 2, 2} INTERSECTION DISTINCT {1, 1, 2, 3, 2} === {1, 3, 2} +``` Note that for set operations, NULL matches NULL. That is ``` From 305b339d175a4679e813f23536913234d46b722f Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Thu, 26 Sep 2024 12:56:22 -0400 Subject: [PATCH 6/9] minus/intersect all according to sql spec --- proto/substrait/algebra.proto | 6 ++-- site/docs/relations/logical_relations.md | 35 ++++++++++++------------ 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 8d394cedf..2dbea74ac 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -333,15 +333,13 @@ message SetRel { enum SetOp { SET_OP_UNSPECIFIED = 0; SET_OP_MINUS_PRIMARY = 1; + SET_OP_MINUS_PRIMARY_ALL = 7; SET_OP_MINUS_MULTISET = 2; SET_OP_INTERSECTION_PRIMARY = 3; + SET_OP_INTERSECTION_PRIMARY_ALL = 8; SET_OP_INTERSECTION_MULTISET = 4; SET_OP_UNION_DISTINCT = 5; SET_OP_UNION_ALL = 6; - SET_OP_MINUS_PRIMARY_ALL = 7; - SET_OP_MINUS_MULTISET_ALL = 8; - SET_OP_INTERSECTION_PRIMARY_ALL = 9; - SET_OP_INTERSECTION_MULTISET_ALL = 10; } } diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index c2159b61e..0574547e2 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -268,24 +268,23 @@ The set operation encompasses several set-level operations that support combinin The set operation type determines both the records that are emitted and the type of the output record. -| Property | Description | Output Nullability -| ----------------------- | ------------------------------------------------------------------------------------------------------------- | ----------------------------- | -| Minus (Primary) | Returns all records (removing duplicates) from the primary input excluding any matching records from secondary inputs. | The same as the primary input. -| Minus (Primary All) | Returns all records (allowing duplicates) from the primary input excluding any matching records from secondary inputs. | The same as the primary input. -| Minus (Multiset) | Returns all records (removing duplicates) from the primary input excluding any records that are included in *all* secondary inputs. | The same as the primary input. -| Minus (Multiset All) | Returns all records (allowing duplicates) from the primary input excluding any records that are included in *all* secondary inputs. | The same as the primary input. -| Intersection (Primary) | Returns all records (removing duplicates) from the primary input that match at least one record from *any* secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. -| Intersection (Primary All) | Returns all records (allowing duplicates) from the primary input that match at least one record from *any* secondary inputs. | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. -| Intersection (Multiset) | Returns all records (removing duplicates) from the primary input that match at least one record from *all* secondary inputs. | If a field is required in any of the inputs, it is required in the output. -| Intersection (Multiset All) | Returns all records (allowing duplicates) from the primary input that match at least one record from *all* secondary inputs. | If a field is required in any of the inputs, it is required in the output. -| Union Distinct | Returns all the records from each set, removing any rows that are duplicated (within or across sets). | If a field is nullable in any of the inputs, it is nullable in the output. -| Union All | Returns all records from each set, allowing duplicates. | If a field is nullable in any of the inputs, it is nullable in the output. | - -Intersection All looks at all occurrences of values in both tables, but the output is limited by the smallest number of matches between the two: -``` -{1, 3, 2, 2, 2} INTERSECTION ALL {1, 1, 2, 3, 2} === {1, 3, 2, 2} -{1, 3, 2, 2, 2} INTERSECTION DISTINCT {1, 1, 2, 3, 2} === {1, 3, 2} -``` +For some set operations, whether a specific record is included in the output and if it appears more than once depends on the number of times it occurs across all inputs. In the following table, treat: +* m: the number of time a records occurs in the primary input +* n1: the number of times a record occurs in the 1st secondary input +* n2: the number of times a record occurs in the 2nd secondary input +* ... +* n: the number of times a record occurs in the nth secondary input + +| Operation | Description | Examples | Output Nullability +|----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| ----------------------------- +| Minus (Primary) | Returns all records from the primary input excluding any matching rows from secondary inputs, removing duplicate rows within or across sets.
Each value is treated as a unique member of the set, so duplicates in the first set don’t affect the result. | MINUS
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {1, 2}
  n2: {3}
YIELDS
{4} | The same as the primary input. +| Minus (Primary All) | Returns all records from the primary input excluding any matching records from secondary inputs.
For each specific record returned, the output contains max(0, m - sum(n1, n2, …, n)) copies. | MINUS ALL
  m: {1, 2, 2, 3, 3, 3, 3}
  n1: {1, 2, 3, 4}
  n2: {3}
YIELDS
{2, 3, 3} | The same as the primary input. +| Minus (Multiset) | Returns all records from the primary input excluding any records that are included in *all* secondary inputs. | MINUS MULTISET
  m: {1, 2, 3, 4}
  n1: {1, 2}
  n2: {1, 2, 3}
YIELDS
{4} | The same as the primary input. +| Intersection (Primary) | Returns all records from the primary input that are present in every secondary input, removing duplicate rows within or across sets. | INTERSECT
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {1, 2, 3, 5}
  n2: {2, 3, 6}
YIELDS
{2, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Intersection (Primary All) | Returns all records from the primary input that are present in every secondary input.
For each specific record returned, the output contains min(m, n1, n2, …, n) copies. | INTERSECT ALL
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {1, 2, 3, 3, 5}
  n2: {2, 3, 3, 6}
YIELDS
{2, 3, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Intersection (Multiset) | Returns all records from the primary input that match at least one record from *all* secondary inputs. | INTERSECT MULTISET
  m: {1, 2, 3, 4}
  n1: {2, 3}
  n2: {3, 4}
YIELDS
{3} | If a field is required in any of the inputs, it is required in the output. +| Union | Returns all records from each set, removing duplicate rows within or across sets. | UNION
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {2, 3, 5}
  n2: {1, 6}
YIELDS
{1, 2, 3, 4, 5, 6} | If a field is nullable in any of the inputs, it is nullable in the output. +| Union All | Returns all records from all inputs.
For each specific record returned, the output contains (m + n1 + n2 + … + n) copies. | UNION ALL
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {2, 3, 5}
  n2: {1, 6}
YIELDS
{1, 2, 2, 3, 3, 3, 4, 2, 3, 5, 1, 6} | If a field is nullable in any of the inputs, it is nullable in the output. Note that for set operations, NULL matches NULL. That is ``` From 79106bf5f9a1b98099c5452e4f55798a9d583304 Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Fri, 27 Sep 2024 14:22:32 -0400 Subject: [PATCH 7/9] remove SET_OP_INTERSECTION_PRIMARY_ALL --- proto/substrait/algebra.proto | 2 +- site/docs/relations/logical_relations.md | 46 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 2dbea74ac..efeff6852 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -336,8 +336,8 @@ message SetRel { SET_OP_MINUS_PRIMARY_ALL = 7; SET_OP_MINUS_MULTISET = 2; SET_OP_INTERSECTION_PRIMARY = 3; - SET_OP_INTERSECTION_PRIMARY_ALL = 8; SET_OP_INTERSECTION_MULTISET = 4; + SET_OP_INTERSECTION_MULTISET_ALL = 8; SET_OP_UNION_DISTINCT = 5; SET_OP_UNION_ALL = 6; } diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index 0574547e2..9d81c2242 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -269,22 +269,22 @@ The set operation encompasses several set-level operations that support combinin The set operation type determines both the records that are emitted and the type of the output record. For some set operations, whether a specific record is included in the output and if it appears more than once depends on the number of times it occurs across all inputs. In the following table, treat: -* m: the number of time a records occurs in the primary input -* n1: the number of times a record occurs in the 1st secondary input -* n2: the number of times a record occurs in the 2nd secondary input +* m: the number of time a records occurs in the primary input (p) +* n1: the number of times a record occurs in the 1st secondary input (s1) +* n2: the number of times a record occurs in the 2nd secondary input (s2) * ... * n: the number of times a record occurs in the nth secondary input -| Operation | Description | Examples | Output Nullability -|----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| ----------------------------- -| Minus (Primary) | Returns all records from the primary input excluding any matching rows from secondary inputs, removing duplicate rows within or across sets.
Each value is treated as a unique member of the set, so duplicates in the first set don’t affect the result. | MINUS
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {1, 2}
  n2: {3}
YIELDS
{4} | The same as the primary input. -| Minus (Primary All) | Returns all records from the primary input excluding any matching records from secondary inputs.
For each specific record returned, the output contains max(0, m - sum(n1, n2, …, n)) copies. | MINUS ALL
  m: {1, 2, 2, 3, 3, 3, 3}
  n1: {1, 2, 3, 4}
  n2: {3}
YIELDS
{2, 3, 3} | The same as the primary input. -| Minus (Multiset) | Returns all records from the primary input excluding any records that are included in *all* secondary inputs. | MINUS MULTISET
  m: {1, 2, 3, 4}
  n1: {1, 2}
  n2: {1, 2, 3}
YIELDS
{4} | The same as the primary input. -| Intersection (Primary) | Returns all records from the primary input that are present in every secondary input, removing duplicate rows within or across sets. | INTERSECT
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {1, 2, 3, 5}
  n2: {2, 3, 6}
YIELDS
{2, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. -| Intersection (Primary All) | Returns all records from the primary input that are present in every secondary input.
For each specific record returned, the output contains min(m, n1, n2, …, n) copies. | INTERSECT ALL
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {1, 2, 3, 3, 5}
  n2: {2, 3, 3, 6}
YIELDS
{2, 3, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. -| Intersection (Multiset) | Returns all records from the primary input that match at least one record from *all* secondary inputs. | INTERSECT MULTISET
  m: {1, 2, 3, 4}
  n1: {2, 3}
  n2: {3, 4}
YIELDS
{3} | If a field is required in any of the inputs, it is required in the output. -| Union | Returns all records from each set, removing duplicate rows within or across sets. | UNION
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {2, 3, 5}
  n2: {1, 6}
YIELDS
{1, 2, 3, 4, 5, 6} | If a field is nullable in any of the inputs, it is nullable in the output. -| Union All | Returns all records from all inputs.
For each specific record returned, the output contains (m + n1 + n2 + … + n) copies. | UNION ALL
  m: {1, 2, 2, 3, 3, 3, 4}
  n1: {2, 3, 5}
  n2: {1, 6}
YIELDS
{1, 2, 2, 3, 3, 3, 4, 2, 3, 5, 1, 6} | If a field is nullable in any of the inputs, it is nullable in the output. +| Operation | Description | Examples | Output Nullability +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------| ----------------------------- +| Minus (Primary) | Returns all records from the primary input excluding any matching rows from secondary inputs, removing duplicates.
Each value is treated as a unique member of the set, so duplicates in the first set don’t affect the result.
This operation maps to SQL EXCEPT. | MINUS
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2}
  s2: {3}
YIELDS
{4} | The same as the primary input. +| Minus (Primary All) | Returns all records from the primary input excluding any matching records from secondary inputs.
For each specific record returned, the output contains max(0, m - sum(n1, n2, …, n)) copies.
This operation maps to SQL EXCEPT ALL. | MINUS ALL
  p: {1, 2, 2, 3, 3, 3, 3}
  s1: {1, 2, 3, 4}
  s2: {3}
YIELDS
{2, 3, 3} | The same as the primary input. +| Minus (Multiset) | Returns all records from the primary input excluding any records that are included in *all* secondary inputs.
This operation does have a direct SQL mapping. | MINUS MULTISET
  p: {1, 2, 3, 4}
  s1: {1, 2}
  s2: {1, 2, 3}
YIELDS
{3, 4} | The same as the primary input. +| Intersection (Primary) | Returns all records from the primary input that are present in any secondary input, removing duplicates.
This operation does have a direct SQL mapping. | INTERSECT
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 5}
  s2: {2, 3, 6}
YIELDS
{2, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Intersection (Multiset) | Returns all records from the primary input that match at least one record from *all* secondary inputs.
This operation maps to SQL INTERSECT DISTINCT | INTERSECT MULTISET
  p: {1, 2, 3, 4}
  s1: {2, 3}
  s2: {3, 4}
YIELDS
{3} | If a field is required in any of the inputs, it is required in the output. +| Intersection (Multiset All) | Returns all records from the primary input that are present in every secondary input.
For each specific record returned, the output contains min(m, n1, n2, …, n) copies.
This operation maps to SQL INTERSECT ALL. | INTERSECT ALL
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 3, 5}
  s2: {2, 3, 3, 6}
YIELDS
{2, 3, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Union Distinct | Returns all records from each set, removing duplicates.
This operation maps to SQL UNION DISTINCT. | UNION
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {2, 3, 5}
  s2: {1, 6}
YIELDS
{1, 2, 3, 4, 5, 6} | If a field is nullable in any of the inputs, it is nullable in the output. +| Union All | Returns all records from all inputs.
For each specific record returned, the output contains (m + n1 + n2 + … + n) copies.
This operation maps to SQL UNION ALL. | UNION ALL
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {2, 3, 5}
  s2: {1, 6}
YIELDS
{1, 2, 2, 3, 3, 3, 4, 2, 3, 5, 1, 6} | If a field is nullable in any of the inputs, it is nullable in the output. Note that for set operations, NULL matches NULL. That is ``` @@ -303,16 +303,16 @@ Input 3: (R, N, R, N, R, N, R, N) Secondary Input The output type is as follows for the various operations -| Property | Output Type -| ----------------------- | ----------------------------------------------------------------------------------------------------- -| Minus (Primary) | (R, R, R, R, N, N, N, N) -| Minus (Multiset) | (R, R, R, R, N, N, N, N) -| Intersection (Primary) | (R, R, R, R, R, N, N, N) -| Intersection (Multiset) | (R, R, R, R, R, R, R, N) -| Union Distinct | (R, N, N, N, N, N, N, N) -| Union All | (R, N, N, N, N, N, N, N) -| Minus Distinct | (R, R, R, R, N, N, N, N) -| Intersection Distinct | (R, R, R, R, R, N, N, N) +| Property | Output Type +|-----------------------------| ----------------------------------------------------------------------------------------------------- +| Minus (Primary) | (R, R, R, R, N, N, N, N) +| Minus (Primary All) | (R, R, R, R, N, N, N, N) +| Minus (Multiset) | (R, R, R, R, N, N, N, N) +| Intersection (Primary) | (R, R, R, R, R, N, N, N) +| Intersection (Multiset) | (R, R, R, R, R, R, R, N) +| Intersection (Multiset All) | (R, R, R, R, R, N, N, N) +| Union Distinct | (R, N, N, N, N, N, N, N) +| Union All | (R, N, N, N, N, N, N, N) === "SetRel Message" From 22c48f49c8b281f4542383843e74fc4ecdab796c Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Fri, 27 Sep 2024 14:56:33 -0400 Subject: [PATCH 8/9] typos --- site/docs/relations/logical_relations.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index 9d81c2242..417d29f5c 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -275,16 +275,16 @@ For some set operations, whether a specific record is included in the output and * ... * n: the number of times a record occurs in the nth secondary input -| Operation | Description | Examples | Output Nullability -|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------| ----------------------------- -| Minus (Primary) | Returns all records from the primary input excluding any matching rows from secondary inputs, removing duplicates.
Each value is treated as a unique member of the set, so duplicates in the first set don’t affect the result.
This operation maps to SQL EXCEPT. | MINUS
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2}
  s2: {3}
YIELDS
{4} | The same as the primary input. -| Minus (Primary All) | Returns all records from the primary input excluding any matching records from secondary inputs.
For each specific record returned, the output contains max(0, m - sum(n1, n2, …, n)) copies.
This operation maps to SQL EXCEPT ALL. | MINUS ALL
  p: {1, 2, 2, 3, 3, 3, 3}
  s1: {1, 2, 3, 4}
  s2: {3}
YIELDS
{2, 3, 3} | The same as the primary input. -| Minus (Multiset) | Returns all records from the primary input excluding any records that are included in *all* secondary inputs.
This operation does have a direct SQL mapping. | MINUS MULTISET
  p: {1, 2, 3, 4}
  s1: {1, 2}
  s2: {1, 2, 3}
YIELDS
{3, 4} | The same as the primary input. -| Intersection (Primary) | Returns all records from the primary input that are present in any secondary input, removing duplicates.
This operation does have a direct SQL mapping. | INTERSECT
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 5}
  s2: {2, 3, 6}
YIELDS
{2, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. -| Intersection (Multiset) | Returns all records from the primary input that match at least one record from *all* secondary inputs.
This operation maps to SQL INTERSECT DISTINCT | INTERSECT MULTISET
  p: {1, 2, 3, 4}
  s1: {2, 3}
  s2: {3, 4}
YIELDS
{3} | If a field is required in any of the inputs, it is required in the output. -| Intersection (Multiset All) | Returns all records from the primary input that are present in every secondary input.
For each specific record returned, the output contains min(m, n1, n2, …, n) copies.
This operation maps to SQL INTERSECT ALL. | INTERSECT ALL
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 3, 5}
  s2: {2, 3, 3, 6}
YIELDS
{2, 3, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. -| Union Distinct | Returns all records from each set, removing duplicates.
This operation maps to SQL UNION DISTINCT. | UNION
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {2, 3, 5}
  s2: {1, 6}
YIELDS
{1, 2, 3, 4, 5, 6} | If a field is nullable in any of the inputs, it is nullable in the output. -| Union All | Returns all records from all inputs.
For each specific record returned, the output contains (m + n1 + n2 + … + n) copies.
This operation maps to SQL UNION ALL. | UNION ALL
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {2, 3, 5}
  s2: {1, 6}
YIELDS
{1, 2, 2, 3, 3, 3, 4, 2, 3, 5, 1, 6} | If a field is nullable in any of the inputs, it is nullable in the output. +| Operation | Description | Examples | Output Nullability +|-----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------| ----------------------------- +| Minus (Primary) | Returns all records from the primary input excluding any matching rows from secondary inputs, removing duplicates.
Each value is treated as a unique member of the set, so duplicates in the first set don’t affect the result.
This operation maps to SQL EXCEPT DISTINCT. | MINUS
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2}
  s2: {3}
YIELDS
{4} | The same as the primary input. +| Minus (Primary All) | Returns all records from the primary input excluding any matching records from secondary inputs.
For each specific record returned, the output contains max(0, m - sum(n1, n2, …, n)) copies.
This operation maps to SQL EXCEPT ALL. | MINUS ALL
  p: {1, 2, 2, 3, 3, 3, 3}
  s1: {1, 2, 3, 4}
  s2: {3}
YIELDS
{2, 3, 3} | The same as the primary input. +| Minus (Multiset) | Returns all records from the primary input excluding any records that are included in *all* secondary inputs.
This operation does not have a direct SQL mapping. | MINUS MULTISET
  p: {1, 2, 3, 4}
  s1: {1, 2}
  s2: {1, 2, 3}
YIELDS
{3, 4} | The same as the primary input. +| Intersection (Primary) | Returns all records from the primary input that are present in any secondary input, removing duplicates.
This operation does not have a direct SQL mapping. | INTERSECT
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 5}
  s2: {2, 3, 6}
YIELDS
{2, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Intersection (Multiset) | Returns all records from the primary input that match at least one record from *all* secondary inputs.
This operation maps to SQL INTERSECT DISTINCT | INTERSECT MULTISET
  p: {1, 2, 3, 4}
  s1: {2, 3}
  s2: {3, 4}
YIELDS
{3} | If a field is required in any of the inputs, it is required in the output. +| Intersection (Multiset All) | Returns all records from the primary input that are present in every secondary input.
For each specific record returned, the output contains min(m, n1, n2, …, n) copies.
This operation maps to SQL INTERSECT ALL. | INTERSECT ALL
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 3, 5}
  s2: {2, 3, 3, 6}
YIELDS
{2, 3, 3} | If a field is required in any of the inputs, it is required in the output. +| Union Distinct | Returns all records from each set, removing duplicates.
This operation maps to SQL UNION DISTINCT. | UNION
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {2, 3, 5}
  s2: {1, 6}
YIELDS
{1, 2, 3, 4, 5, 6} | If a field is nullable in any of the inputs, it is nullable in the output. +| Union All | Returns all records from all inputs.
For each specific record returned, the output contains (m + n1 + n2 + … + n) copies.
This operation maps to SQL UNION ALL. | UNION ALL
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {2, 3, 5}
  s2: {1, 6}
YIELDS
{1, 2, 2, 3, 3, 3, 4, 2, 3, 5, 1, 6} | If a field is nullable in any of the inputs, it is nullable in the output. Note that for set operations, NULL matches NULL. That is ``` @@ -310,7 +310,7 @@ The output type is as follows for the various operations | Minus (Multiset) | (R, R, R, R, N, N, N, N) | Intersection (Primary) | (R, R, R, R, R, N, N, N) | Intersection (Multiset) | (R, R, R, R, R, R, R, N) -| Intersection (Multiset All) | (R, R, R, R, R, N, N, N) +| Intersection (Multiset All) | (R, R, R, R, R, R, R, N) | Union Distinct | (R, N, N, N, N, N, N, N) | Union All | (R, N, N, N, N, N, N, N) From 63f5e1649af539e16b329d3d19ad8d43c551b69f Mon Sep 17 00:00:00 2001 From: Kadin Rabo Date: Fri, 27 Sep 2024 16:17:38 -0400 Subject: [PATCH 9/9] example typo Co-authored-by: Weston Pace --- site/docs/relations/logical_relations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index 417d29f5c..ebee1acc4 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -280,7 +280,7 @@ For some set operations, whether a specific record is included in the output and | Minus (Primary) | Returns all records from the primary input excluding any matching rows from secondary inputs, removing duplicates.
Each value is treated as a unique member of the set, so duplicates in the first set don’t affect the result.
This operation maps to SQL EXCEPT DISTINCT. | MINUS
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2}
  s2: {3}
YIELDS
{4} | The same as the primary input. | Minus (Primary All) | Returns all records from the primary input excluding any matching records from secondary inputs.
For each specific record returned, the output contains max(0, m - sum(n1, n2, …, n)) copies.
This operation maps to SQL EXCEPT ALL. | MINUS ALL
  p: {1, 2, 2, 3, 3, 3, 3}
  s1: {1, 2, 3, 4}
  s2: {3}
YIELDS
{2, 3, 3} | The same as the primary input. | Minus (Multiset) | Returns all records from the primary input excluding any records that are included in *all* secondary inputs.
This operation does not have a direct SQL mapping. | MINUS MULTISET
  p: {1, 2, 3, 4}
  s1: {1, 2}
  s2: {1, 2, 3}
YIELDS
{3, 4} | The same as the primary input. -| Intersection (Primary) | Returns all records from the primary input that are present in any secondary input, removing duplicates.
This operation does not have a direct SQL mapping. | INTERSECT
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 5}
  s2: {2, 3, 6}
YIELDS
{2, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. +| Intersection (Primary) | Returns all records from the primary input that are present in any secondary input, removing duplicates.
This operation does not have a direct SQL mapping. | INTERSECT
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 5}
  s2: {2, 3, 6}
YIELDS
{1, 2, 3} | If a field is nullable in the primary input and in any of the secondary inputs, it is nullable in the output. | Intersection (Multiset) | Returns all records from the primary input that match at least one record from *all* secondary inputs.
This operation maps to SQL INTERSECT DISTINCT | INTERSECT MULTISET
  p: {1, 2, 3, 4}
  s1: {2, 3}
  s2: {3, 4}
YIELDS
{3} | If a field is required in any of the inputs, it is required in the output. | Intersection (Multiset All) | Returns all records from the primary input that are present in every secondary input.
For each specific record returned, the output contains min(m, n1, n2, …, n) copies.
This operation maps to SQL INTERSECT ALL. | INTERSECT ALL
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {1, 2, 3, 3, 5}
  s2: {2, 3, 3, 6}
YIELDS
{2, 3, 3} | If a field is required in any of the inputs, it is required in the output. | Union Distinct | Returns all records from each set, removing duplicates.
This operation maps to SQL UNION DISTINCT. | UNION
  p: {1, 2, 2, 3, 3, 3, 4}
  s1: {2, 3, 5}
  s2: {1, 6}
YIELDS
{1, 2, 3, 4, 5, 6} | If a field is nullable in any of the inputs, it is nullable in the output.