Skip to content

Commit

Permalink
feat(#3006): correct semantic for first_value
Browse files Browse the repository at this point in the history
- correct `first_value` semantic, same as `first_value` in standard SQL.
- `first_value` will accept a optional parameter `ignoreNull`
- add `last_value`
  • Loading branch information
aceforeverd committed Apr 9, 2024
1 parent 0cef78a commit 04276e6
Show file tree
Hide file tree
Showing 10 changed files with 337 additions and 159 deletions.
8 changes: 4 additions & 4 deletions cases/function/function/test_udaf_function.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2004,7 +2004,7 @@ cases:
- [12, 0, "b"]
sql: |
SELECT {0}.id,
count_where(id, ifnull(c1, "a") = ifnull(first_value(c1), "a")) OVER w1 AS count_where
count_where(id, ifnull(c1, "a") = ifnull(last_value(c1), "a")) OVER w1 AS count_where
FROM {0}
WINDOW w1 AS (PARTITION BY {0}.pk ORDER BY {0}.id ROWS BETWEEN 10 PRECEDING AND CURRENT ROW);
expect:
Expand Down Expand Up @@ -2433,7 +2433,7 @@ cases:
`val1`,
lag(val1, 0) over w1 as agg1,
lag(val1, 3) over w1 as agg2,
first_value(val1) over w1 as agg3
last_value(val1) over w1 as agg3
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows_range between 2s preceding and 1s preceding MAXSIZE 10);
expect:
Expand Down Expand Up @@ -2512,7 +2512,7 @@ cases:
`val1`,
lag(val1, 0) over w1 as agg1,
lag(val1, 3) over w1 as agg2,
first_value(val1) over w1 as agg3
last_value(val1) over w1 as agg3
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows between 2 preceding and 1 preceding);
expect:
Expand Down Expand Up @@ -2657,7 +2657,7 @@ cases:
id,
key1,
nth_value_where(id, 0, true) over w as agg_null,
nth_value_where(l1 + 1, -1, l1 < first_value(l1)) over w as agg1,
nth_value_where(l1 + 1, -1, l1 < last_value(l1)) over w as agg1,
nth_value_where(f1, -2, case when f1 > 5.0 then true else null end) over w as agg2,
nth_value_where(val, -2, case when sum(l2) > 10 then true else false end) over w as agg3,
nth_value_where(l1 + 1, 1, l1 < 4) over w as agg4,
Expand Down
116 changes: 80 additions & 36 deletions cases/function/window/test_window.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ cases:

- id: 33
desc: |
first_value results in two rows_range window, refer https://github.com/4paradigm/OpenMLDB/issues/1587
first_value/last_value results in two rows_range window, refer https://github.com/4paradigm/OpenMLDB/issues/1587
inputs:
- columns: [ "id int","ts timestamp","group1 string","val1 int" ]
indexs: [ "index1:group1:ts" ]
Expand All @@ -1040,27 +1040,29 @@ cases:
select
`id`,
`val1`,
first_value(val1) over w1 as agg1,
first_value(val1) over w2 as agg2,
last_value(val1) over w1 as agg1,
last_value(val1) over w2 as agg2,
first_value(val1) over w1 as agg3,
first_value(val1) over w2 as agg4,
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows_range between 5s preceding and 0s preceding),
w2 as (partition by `group1` order by `ts` rows_range between 5s preceding and 1s preceding);
expect:
columns: ["id int", "val1 int", "agg1 int", "agg2 int"]
columns: ["id int", "val1 int", "agg1 int", "agg2 int", "agg3 int", "agg4 int"]
order: id
rows:
- [1, 1, 1, NULL]
- [2, 2, 2, 1]
- [3, 3, 3, 2]
- [4, 4, 4, 3]
- [5, 5, 5, 4]
- [6, 4, 4, NULL]
- [7, 3, 3, 4]
- [8, 2, 2, 3]
- [1, 1, 1, NULL, 1, NULL]
- [2, 2, 2, 1, 1, 1]
- [3, 3, 3, 2, 1, 1]
- [4, 4, 4, 3, 1, 1]
- [5, 5, 5, 4, 1, 1]
- [6, 4, 4, NULL, 4, NULL]
- [7, 3, 3, 4, 4, 4]
- [8, 2, 2, 3, 4, 4]

- id: 34
desc: |
first_value results in two rows windows
first_value/last_value results in two rows windows
inputs:
- columns: [ "id int","ts timestamp","group1 string","val1 int" ]
indexs: [ "index1:group1:ts" ]
Expand All @@ -1078,27 +1080,29 @@ cases:
select
`id`,
`val1`,
first_value(val1) over w1 as agg1,
first_value(val1) over w2 as agg2,
last_value(val1) over w1 as agg1,
last_value(val1) over w2 as agg2,
first_value(val1) over w1 as agg3,
first_value(val1) over w2 as agg4,
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows between 5 preceding and 0 preceding),
w2 as (partition by `group1` order by `ts` rows between 5 preceding and 1 preceding);
expect:
columns: ["id int", "val1 int", "agg1 int", "agg2 int"]
columns: ["id int", "val1 int", "agg1 int", "agg2 int", "agg3 int", "agg4 int"]
order: id
rows:
- [1, 1, 1, NULL]
- [2, 2, 2, 1]
- [3, 3, 3, 2]
- [4, 4, 4, 3]
- [5, 5, 5, 4]
- [6, 4, 4, NULL]
- [7, 3, 3, 4]
- [8, 2, 2, 3]
- [1, 1, 1, NULL, 1, NULL]
- [2, 2, 2, 1, 1, 1]
- [3, 3, 3, 2, 1, 1]
- [4, 4, 4, 3, 1, 1]
- [5, 5, 5, 4, 1, 1]
- [6, 4, 4, NULL, 4, NULL]
- [7, 3, 3, 4, 4, 4]
- [8, 2, 2, 3, 4, 4]

- id: 35
desc: |
first_value results in rows/rows_range windows
first_value/last_value results in rows/rows_range windows
inputs:
- columns: [ "id int","ts timestamp","group1 string","val1 int" ]
indexs: [ "index1:group1:ts" ]
Expand All @@ -1116,23 +1120,25 @@ cases:
select
`id`,
`val1`,
first_value(val1) over w1 as agg1,
first_value(val1) over w2 as agg2,
last_value(val1) over w1 as agg1,
last_value(val1) over w2 as agg2,
first_value(val1) over w1 as agg3,
first_value(val1) over w2 as agg4,
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows_range between 5s preceding and 0s preceding),
w2 as (partition by `group1` order by `ts` rows between 5 preceding and 1 preceding);
expect:
columns: ["id int", "val1 int", "agg1 int", "agg2 int"]
columns: ["id int", "val1 int", "agg1 int", "agg2 int", "agg3 int", "agg4 int"]
order: id
rows:
- [1, 1, 1, NULL]
- [2, 2, 2, 1]
- [3, 3, 3, 2]
- [4, 4, 4, 3]
- [5, 5, 5, 4]
- [6, 4, 4, NULL]
- [7, 3, 3, 4]
- [8, 2, 2, 3]
- [1, 1, 1, NULL, 1, NULL]
- [2, 2, 2, 1, 1, 1]
- [3, 3, 3, 2, 1, 1]
- [4, 4, 4, 3, 1, 1]
- [5, 5, 5, 4, 1, 1]
- [6, 4, 4, NULL, 4, NULL]
- [7, 3, 3, 4, 4, 4]
- [8, 2, 2, 3, 4, 4]

- id: 36
version: 0.6.0
Expand Down Expand Up @@ -1213,3 +1219,41 @@ cases:
1, 0, NULL, NULL, NULL
2, 1, 21, 21, 21
3, 1, 22, 22, 22
- id: 38
desc: |
first_value/last_value with ignoreNull flag
inputs:
- columns: [ "id int","ts timestamp","group1 string","val1 string" ]
indexs: [ "index1:group1:ts" ]
name: t1
data: |
1, 1612130400000, g1, 1
2, 1612130401000, g1, 2
3, 1612130402000, g1, 3
4, 1612130403000, g1, NULL
5, 1612130404000, g1, 5
6, 1612130404000, g2, NULL
7, 1612130405000, g2, 3
8, 1612130406000, g2, 2
sql: |
select
`id`,
`val1`,
first_value(val1, true) over w1 as agg1,
last_value(val1, true) over w1 as agg2,
last_value(val1, false) over w1 as agg3,
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows_range between 5s preceding and 1s preceding)
expect:
columns: ["id int", "val1 string", "agg1 string", "agg2 string", "agg3 string"]
order: id
data: |
1, 1, NULL, NULL, NULL
2, 2, 1, 1, 1
3, 3, 1, 2, 2
4, NULL, 1, 3, 3
5, 5, 1, 3, NULL
6, NULL, NULL, NULL, NULL
7, 3, NULL, NULL, NULL
8, 2, 3, 3, 3
6 changes: 3 additions & 3 deletions cases/integration_test/function/test_udaf_function.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2026,7 +2026,7 @@ cases:
- [12, 0, "b"]
sql: |
SELECT {0}.id,
count_where(id, ifnull(c1, "a") = ifnull(first_value(c1), "a")) OVER w1 AS count_where
count_where(id, ifnull(c1, "a") = ifnull(last_value(c1), "a")) OVER w1 AS count_where
FROM {0}
WINDOW w1 AS (PARTITION BY {0}.pk ORDER BY {0}.id ROWS BETWEEN 10 PRECEDING AND CURRENT ROW);
expect:
Expand Down Expand Up @@ -2455,7 +2455,7 @@ cases:
`val1`,
lag(val1, 0) over w1 as agg1,
lag(val1, 3) over w1 as agg2,
first_value(val1) over w1 as agg3
last_value(val1) over w1 as agg3
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows_range between 2s preceding and 1s preceding MAXSIZE 10);
expect:
Expand Down Expand Up @@ -2534,7 +2534,7 @@ cases:
`val1`,
lag(val1, 0) over w1 as agg1,
lag(val1, 3) over w1 as agg2,
first_value(val1) over w1 as agg3
last_value(val1) over w1 as agg3
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows between 2 preceding and 1 preceding);
expect:
Expand Down
78 changes: 42 additions & 36 deletions cases/integration_test/window/test_window.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ cases:

- id: 33
desc: |
first_value results in two rows_range window, refer https://github.com/4paradigm/OpenMLDB/issues/1587
first_value/last_value results in two rows_range window, refer https://github.com/4paradigm/OpenMLDB/issues/1587
inputs:
- columns: [ "id int","ts timestamp","group1 string","val1 int" ]
indexs: [ "index1:group1:ts" ]
Expand All @@ -1041,27 +1041,29 @@ cases:
select
`id`,
`val1`,
first_value(val1) over w1 as agg1,
first_value(val1) over w2 as agg2,
last_value(val1) over w1 as agg1,
last_value(val1) over w2 as agg2,
first_value(val1) over w1 as agg3,
first_value(val1) over w2 as agg4,
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows_range between 5s preceding and 0s preceding),
w2 as (partition by `group1` order by `ts` rows_range between 5s preceding and 1s preceding);
expect:
columns: ["id int", "val1 int", "agg1 int", "agg2 int"]
columns: ["id int", "val1 int", "agg1 int", "agg2 int", "agg3 int", "agg4 int"]
order: id
rows:
- [1, 1, 1, NULL]
- [2, 2, 2, 1]
- [3, 3, 3, 2]
- [4, 4, 4, 3]
- [5, 5, 5, 4]
- [6, 4, 4, NULL]
- [7, 3, 3, 4]
- [8, 2, 2, 3]
- [1, 1, 1, NULL, 1, NULL]
- [2, 2, 2, 1, 1, 1]
- [3, 3, 3, 2, 1, 1]
- [4, 4, 4, 3, 1, 1]
- [5, 5, 5, 4, 1, 1]
- [6, 4, 4, NULL, 4, NULL]
- [7, 3, 3, 4, 4, 4]
- [8, 2, 2, 3, 4, 4]

- id: 34
desc: |
first_value results in two rows windows
first_value/last_value results in two rows windows
inputs:
- columns: [ "id int","ts timestamp","group1 string","val1 int" ]
indexs: [ "index1:group1:ts" ]
Expand All @@ -1079,27 +1081,29 @@ cases:
select
`id`,
`val1`,
first_value(val1) over w1 as agg1,
first_value(val1) over w2 as agg2,
last_value(val1) over w1 as agg1,
last_value(val1) over w2 as agg2,
first_value(val1) over w1 as agg3,
first_value(val1) over w2 as agg4,
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows between 5 preceding and 0 preceding),
w2 as (partition by `group1` order by `ts` rows between 5 preceding and 1 preceding);
expect:
columns: ["id int", "val1 int", "agg1 int", "agg2 int"]
columns: ["id int", "val1 int", "agg1 int", "agg2 int", "agg3 int", "agg4 int"]
order: id
rows:
- [1, 1, 1, NULL]
- [2, 2, 2, 1]
- [3, 3, 3, 2]
- [4, 4, 4, 3]
- [5, 5, 5, 4]
- [6, 4, 4, NULL]
- [7, 3, 3, 4]
- [8, 2, 2, 3]
- [1, 1, 1, NULL, 1, NULL]
- [2, 2, 2, 1, 1, 1]
- [3, 3, 3, 2, 1, 1]
- [4, 4, 4, 3, 1, 1]
- [5, 5, 5, 4, 1, 1]
- [6, 4, 4, NULL, 4, NULL]
- [7, 3, 3, 4, 4, 4]
- [8, 2, 2, 3, 4, 4]

- id: 35
desc: |
first_value results in rows/rows_range windows
first_value/last_value results in rows/rows_range windows
inputs:
- columns: [ "id int","ts timestamp","group1 string","val1 int" ]
indexs: [ "index1:group1:ts" ]
Expand All @@ -1117,23 +1121,25 @@ cases:
select
`id`,
`val1`,
first_value(val1) over w1 as agg1,
first_value(val1) over w2 as agg2,
last_value(val1) over w1 as agg1,
last_value(val1) over w2 as agg2,
first_value(val1) over w1 as agg3,
first_value(val1) over w2 as agg4,
from `t1` WINDOW
w1 as (partition by `group1` order by `ts` rows_range between 5s preceding and 0s preceding),
w2 as (partition by `group1` order by `ts` rows between 5 preceding and 1 preceding);
expect:
columns: ["id int", "val1 int", "agg1 int", "agg2 int"]
columns: ["id int", "val1 int", "agg1 int", "agg2 int", "agg3 int", "agg4 int"]
order: id
rows:
- [1, 1, 1, NULL]
- [2, 2, 2, 1]
- [3, 3, 3, 2]
- [4, 4, 4, 3]
- [5, 5, 5, 4]
- [6, 4, 4, NULL]
- [7, 3, 3, 4]
- [8, 2, 2, 3]
- [1, 1, 1, NULL, 1, NULL]
- [2, 2, 2, 1, 1, 1]
- [3, 3, 3, 2, 1, 1]
- [4, 4, 4, 3, 1, 1]
- [5, 5, 5, 4, 1, 1]
- [6, 4, 4, NULL, 4, NULL]
- [7, 3, 3, 4, 4, 4]
- [8, 2, 2, 3, 4, 4]

- id: 36
version: 0.6.0
Expand Down
2 changes: 1 addition & 1 deletion cases/query/udaf_query.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ cases:
sql: |
SELECT
{0}.id, c1,
count(case when c2 > first_value(c2) over w1 then c3 else null end) OVER w1 as m2,
count(case when c2 > last_value(c2) over w1 then c3 else null end) OVER w1 as m2,
sum(nvl(c3, 14)) OVER w1 as m3
FROM {0} WINDOW
w1 AS (PARTITION BY {0}.c1 ORDER BY {0}.c7 ROWS BETWEEN 2 PRECEDING AND CURRENT ROW);
Expand Down
9 changes: 8 additions & 1 deletion hybridse/include/base/iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,16 @@ class AbstractIterator {
virtual void Next() = 0;
/// Return the key of current element pair.
virtual const K& GetKey() const = 0;
/// Return the value of current element pari
/// Return the value of current element pair
/// when Valid() return `true`.
virtual Ref GetValue() = 0;

/// Check whether current value is NULL.
/// Column values in a table can be NULL, `GetValue()` to a NULL column
/// lead to unspecific behavior.
/// Implemente this if iterator values may be NULL.
virtual bool IsValueNull() { return false; }

/// Return whether the dataset is seekable or
/// not. A dataset is seekable if it allows access to data with Seek()
/// method
Expand Down
Loading

0 comments on commit 04276e6

Please sign in to comment.