Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: make ntile work in some corner cases #8371

Merged
merged 4 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion datafusion/expr/src/window_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,20 @@ impl BuiltInWindowFunction {
BuiltInWindowFunction::FirstValue | BuiltInWindowFunction::LastValue => {
Signature::any(1, Volatility::Immutable)
}
BuiltInWindowFunction::Ntile => Signature::any(1, Volatility::Immutable),
BuiltInWindowFunction::Ntile => Signature::uniform(
1,
vec![
DataType::UInt64,
DataType::UInt32,
DataType::UInt16,
DataType::UInt8,
DataType::Int64,
DataType::Int32,
DataType::Int16,
DataType::Int8,
],
Volatility::Immutable,
),
BuiltInWindowFunction::NthValue => Signature::any(2, Volatility::Immutable),
}
}
Expand Down
3 changes: 2 additions & 1 deletion datafusion/physical-expr/src/window/ntile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,9 @@ impl PartitionEvaluator for NtileEvaluator {
) -> Result<ArrayRef> {
let num_rows = num_rows as u64;
let mut vec: Vec<u64> = Vec::new();
let n = u64::min(self.n, num_rows);
for i in 0..num_rows {
let res = i * self.n / num_rows;
let res = i * n / num_rows;
vec.push(res + 1)
}
Ok(Arc::new(UInt64Array::from(vec)))
Expand Down
29 changes: 20 additions & 9 deletions datafusion/physical-plan/src/windows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,26 @@ fn create_built_in_window_expr(
BuiltInWindowFunction::PercentRank => Arc::new(percent_rank(name)),
BuiltInWindowFunction::CumeDist => Arc::new(cume_dist(name)),
BuiltInWindowFunction::Ntile => {
let n: i64 = get_scalar_value_from_args(args, 0)?
.ok_or_else(|| {
DataFusionError::Execution(
"NTILE requires at least 1 argument".to_string(),
)
})?
.try_into()?;
let n: u64 = n as u64;
Arc::new(Ntile::new(name, n))
let n = get_scalar_value_from_args(args, 0)?.ok_or_else(|| {
DataFusionError::Execution(
"NTILE requires a positive integer".to_string(),
)
})?;

if n.is_null() {
return exec_err!("NTILE requires a positive integer, but finds NULL");
}

if n.is_unsigned() {
let n: u64 = n.try_into()?;
Arc::new(Ntile::new(name, n))
} else {
let n: i64 = n.try_into()?;
if n <= 0 {
return exec_err!("NTILE requires a positive integer");
haohuaijin marked this conversation as resolved.
Show resolved Hide resolved
}
Arc::new(Ntile::new(name, n as u64))
}
}
BuiltInWindowFunction::Lag => {
let arg = args[0].clone();
Expand Down
146 changes: 146 additions & 0 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3581,3 +3581,149 @@ CREATE TABLE new_table AS SELECT NTILE(2) OVER(ORDER BY c1) AS ntile_2 FROM aggr

statement ok
DROP TABLE new_table;

statement ok
CREATE TABLE t1 (a int) AS VALUES (1), (2), (3);

query I
SELECT NTILE(9223377) OVER(ORDER BY a) FROM t1;
----
1
2
3

query I
SELECT NTILE(9223372036854775809) OVER(ORDER BY a) FROM t1;
----
1
2
3

query error DataFusion error: Execution error: NTILE requires a positive integer
SELECT NTILE(-922337203685477580) OVER(ORDER BY a) FROM t1;

query error DataFusion error: Execution error: Table 't' doesn't exist\.
DROP TABLE t;

# NTILE with PARTITION BY, those tests from duckdb: https://github.com/duckdb/duckdb/blob/main/test/sql/window/test_ntile.test
statement ok
CREATE TABLE score_board (team_name VARCHAR, player VARCHAR, score INTEGER) as VALUES
('Mongrels', 'Apu', 350),
('Mongrels', 'Ned', 666),
('Mongrels', 'Meg', 1030),
('Mongrels', 'Burns', 1270),
('Simpsons', 'Homer', 1),
('Simpsons', 'Lisa', 710),
('Simpsons', 'Marge', 990),
('Simpsons', 'Bart', 2010)

query TTII
SELECT
team_name,
player,
score,
NTILE(2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s
ORDER BY team_name, score;
----
Mongrels Apu 350 1
Mongrels Ned 666 1
Mongrels Meg 1030 2
Mongrels Burns 1270 2
Simpsons Homer 1 1
Simpsons Lisa 710 1
Simpsons Marge 990 2
Simpsons Bart 2010 2

query TTII
SELECT
team_name,
player,
score,
NTILE(2) OVER (ORDER BY score ASC) AS NTILE
FROM score_board s
ORDER BY score;
----
Simpsons Homer 1 1
Mongrels Apu 350 1
Mongrels Ned 666 1
Simpsons Lisa 710 1
Simpsons Marge 990 2
Mongrels Meg 1030 2
Mongrels Burns 1270 2
Simpsons Bart 2010 2

query TTII
SELECT
team_name,
player,
score,
NTILE(1000) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s
ORDER BY team_name, score;
----
Mongrels Apu 350 1
Mongrels Ned 666 2
Mongrels Meg 1030 3
Mongrels Burns 1270 4
Simpsons Homer 1 1
Simpsons Lisa 710 2
Simpsons Marge 990 3
Simpsons Bart 2010 4

query TTII
SELECT
team_name,
player,
score,
NTILE(1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s
ORDER BY team_name, score;
----
Mongrels Apu 350 1
Mongrels Ned 666 1
Mongrels Meg 1030 1
Mongrels Burns 1270 1
Simpsons Homer 1 1
Simpsons Lisa 710 1
Simpsons Marge 990 1
Simpsons Bart 2010 1

# incorrect number of parameters for ntile
query error DataFusion error: Execution error: NTILE requires a positive integer, but finds NULL
SELECT
NTILE(NULL) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

query error DataFusion error: Execution error: NTILE requires a positive integer
SELECT
NTILE(-1) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

query error DataFusion error: Execution error: NTILE requires a positive integer
SELECT
NTILE(0) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement error
SELECT
NTILE() OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement error
SELECT
NTILE(1,2) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement error
SELECT
NTILE(1,2,3) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement error
SELECT
NTILE(1,2,3,4) OVER (PARTITION BY team_name ORDER BY score ASC) AS NTILE
FROM score_board s

statement ok
DROP TABLE score_board;