Skip to content

Commit

Permalink
GROUP-BY prioritizes input columns in case of ambiguity (#9228)
Browse files Browse the repository at this point in the history
* GROUP-BY prioritizes input columns in case of ambiguity

* Update datafusion/sqllogictest/test_files/aggregate.slt

Co-authored-by: Andrew Lamb <[email protected]>

* Update datafusion/sqllogictest/test_files/aggregate.slt

Co-authored-by: Andrew Lamb <[email protected]>

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
jonahgao and alamb authored Feb 16, 2024
1 parent e4f4031 commit 40353fe
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
7 changes: 5 additions & 2 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

// having and group by clause may reference aliases defined in select projection
let projected_plan = self.project(base_plan.clone(), select_exprs.clone())?;
let mut combined_schema = (**projected_plan.schema()).clone();
combined_schema.merge(base_plan.schema());
// Place the fields of the base plan at the front so that when there are references
// with the same name, the fields of the base plan will be searched first.
// See https://github.com/apache/arrow-datafusion/issues/9162
let mut combined_schema = base_plan.schema().as_ref().clone();
combined_schema.merge(projected_plan.schema());

// this alias map is resolved and looked up in both having exprs and group by exprs
let alias_map = extract_aliases(&select_exprs);
Expand Down
20 changes: 20 additions & 0 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3179,3 +3179,23 @@ NULL
statement ok
DROP TABLE t;


# Test for the case when the column name is ambiguous
statement ok
CREATE TABLE t(a BIGINT) AS VALUES(1), (2), (3);

# The column name referenced by GROUP-BY is ambiguous, prefer the column in base plan
query I
SELECT 0 as "t.a" FROM t GROUP BY t.a;
----
0
0
0

# The column name referenced by HAVING is ambiguous, prefer the column in the base plan
query I
SELECT 0 AS "t.a" FROM t HAVING MAX(t.a) = 0;
----

statement ok
DROP TABLE t;

0 comments on commit 40353fe

Please sign in to comment.