diff --git a/Cargo.lock b/Cargo.lock index 0979306a..c41ef771 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -739,9 +739,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f92d2d7a9cba4580900b32b009848d9eb35f1028ac84cdd6ddcf97612cd0068" +checksum = "ab9d55a9cd2634818953809f75ebe5248b00dd43c3227efb2a51a2d5feaad54e" dependencies = [ "ahash", "apache-avro", @@ -795,9 +795,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effed030d2c1667eb1e11df5372d4981eaf5d11a521be32220b3985ae5ba6971" +checksum = "def66b642959e7f96f5d2da22e1f43d3bd35598f821e5ce351a0553e0f1b7367" dependencies = [ "ahash", "apache-avro", @@ -819,18 +819,18 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0091318129dad1359f08e4c6c71f855163c35bba05d1dbf983196f727857894" +checksum = "f104bb9cb44c06c9badf8a0d7e0855e5f7fa5e395b887d7f835e8a9457dc1352" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8385aba84fc4a06d3ebccfbcbf9b4f985e80c762fac634b49079f7cc14933fb1" +checksum = "2ac0fd8b5d80bbca3fc3b6f40da4e9f6907354824ec3b18bbd83fee8cf5c3c3e" dependencies = [ "arrow", "chrono", @@ -849,9 +849,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebb192f0055d2ce64e38ac100abc18e4e6ae9734d3c28eee522bbbd6a32108a3" +checksum = "2103d2cc16fb11ef1fa993a6cac57ed5cb028601db4b97566c90e5fa77aa1e68" dependencies = [ "ahash", "arrow", @@ -868,9 +868,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c081ae5b7edd712b92767fb8ed5c0e32755682f8075707666cd70835807c0b" +checksum = "a369332afd0ef5bd565f6db2139fb9f1dfdd0afa75a7f70f000b74208d76994f" dependencies = [ "arrow", "base64 0.22.1", @@ -880,7 +880,6 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "hashbrown", "hex", "itertools 0.12.1", @@ -895,9 +894,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feb28a4ea52c28a26990646986a27c4052829a2a2572386258679e19263f8b78" +checksum = "92718db1aff70c47e5abf9fc975768530097059e5db7c7b78cd64b5e9a11fc77" dependencies = [ "ahash", "arrow", @@ -913,9 +912,9 @@ dependencies = [ [[package]] name = "datafusion-functions-array" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b17c02a74cdc87380a56758ec27e7d417356bf806f33062700908929aedb8a" +checksum = "30bb80f46ff3dcf4bb4510209c2ba9b8ce1b716ac8b7bf70c6bf7dca6260c831" dependencies = [ "arrow", "arrow-array", @@ -926,6 +925,7 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-functions", + "datafusion-functions-aggregate", "itertools 0.12.1", "log", "paste", @@ -933,9 +933,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12172f2a6c9eb4992a51e62d709eeba5dedaa3b5369cce37ff6c2260e100ba76" +checksum = "82f34692011bec4fdd6fc18c264bf8037b8625d801e6dd8f5111af15cb6d71d3" dependencies = [ "arrow", "async-trait", @@ -947,14 +947,15 @@ dependencies = [ "indexmap", "itertools 0.12.1", "log", + "paste", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a3fce531b623e94180f6cd33d620ef01530405751b6ddd2fd96250cdbd78e2e" +checksum = "45538630defedb553771434a437f7ca8f04b9b3e834344aafacecb27dc65d5e5" dependencies = [ "ahash", "arrow", @@ -968,7 +969,6 @@ dependencies = [ "datafusion-common", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", "datafusion-physical-expr-common", "half", "hashbrown", @@ -983,21 +983,23 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046400b6a2cc3ed57a7c576f5ae6aecc77804ac8e0186926b278b189305b2a77" +checksum = "9d8a72b0ca908e074aaeca52c14ddf5c28d22361e9cb6bc79bb733cd6661b536" dependencies = [ + "ahash", "arrow", "datafusion-common", "datafusion-expr", + "hashbrown", "rand", ] [[package]] name = "datafusion-physical-plan" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4aed47f5a2ad8766260befb375b201592e86a08b260256e168ae4311426a2bff" +checksum = "b504eae6107a342775e22e323e9103f7f42db593ec6103b28605b7b7b1405c4a" dependencies = [ "ahash", "arrow", @@ -1029,7 +1031,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "39.0.0" +version = "40.0.0" dependencies = [ "arrow", "async-trait", @@ -1059,9 +1061,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fa92bb1fd15e46ce5fb6f1c85f3ac054592560f294429a28e392b5f9cd4255e" +checksum = "e5db33f323f41b95ae201318ba654a9bf11113e58a51a1dff977b1a836d3d889" dependencies = [ "arrow", "arrow-array", @@ -1076,9 +1078,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "39.0.0" +version = "40.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8249d69665c1cd32e07789ed6dd1da6528a23019ef16d3483db52952b6f9f68a" +checksum = "434e52fbff22e6e04e6c787f603a6aba4961a7e249a29c743c5d4f609ec2dcef" dependencies = [ "arrow-buffer", "async-recursion", @@ -1089,6 +1091,7 @@ dependencies = [ "pbjson-types", "prost", "substrait", + "url", ] [[package]] @@ -2958,9 +2961,9 @@ dependencies = [ [[package]] name = "substrait" -version = "0.34.1" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04c77dec9b6c4e48ac828937bbe7cf473b0933168c5d76d51a5816ace7046be9" +checksum = "b1ee6e584c8bf37104b7eb51c25eae07a9321b0e01379bec3b7c462d2f42afbf" dependencies = [ "heck 0.5.0", "pbjson", diff --git a/Cargo.toml b/Cargo.toml index a77eca0c..d05a617a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-python" -version = "39.0.0" +version = "40.0.0" homepage = "https://datafusion.apache.org/python" repository = "https://github.com/apache/datafusion-python" authors = ["Apache DataFusion "] @@ -38,13 +38,13 @@ tokio = { version = "1.35", features = ["macros", "rt", "rt-multi-thread", "sync rand = "0.8" pyo3 = { version = "0.21", features = ["extension-module", "abi3", "abi3-py38"] } arrow = { version = "52", feature = ["pyarrow"] } -datafusion = { version = "39.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } -datafusion-common = { version = "39.0.0", features = ["pyarrow"] } -datafusion-expr = "39.0.0" -datafusion-functions-array = "39.0.0" -datafusion-optimizer = "39.0.0" -datafusion-sql = "39.0.0" -datafusion-substrait = { version = "39.0.0", optional = true } +datafusion = { version = "40.0.0", features = ["pyarrow", "avro", "unicode_expressions"] } +datafusion-common = { version = "40.0.0", features = ["pyarrow"] } +datafusion-expr = "40.0.0" +datafusion-functions-array = "40.0.0" +datafusion-optimizer = "40.0.0" +datafusion-sql = "40.0.0" +datafusion-substrait = { version = "40.0.0", optional = true } prost = "0.12" prost-types = "0.12" uuid = { version = "1.9", features = ["v4"] } diff --git a/docs/requirements.txt b/docs/requirements.txt index 67f1ec6a..42bc4e51 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -21,4 +21,5 @@ myst-parser maturin jinja2 ipython -pandas \ No newline at end of file +pandas +pickleshare \ No newline at end of file diff --git a/examples/substrait.py b/examples/substrait.py index fd4d0f9c..fa6f7791 100644 --- a/examples/substrait.py +++ b/examples/substrait.py @@ -46,4 +46,4 @@ # Back to Substrait Plan just for demonstration purposes # type(substrait_plan) -> -substrait_plan = ss.Producer.to_substrait_plan(df_logical_plan) +substrait_plan = ss.Producer.to_substrait_plan(df_logical_plan, ctx) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 46d2a2f0..ca41f5ff 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -101,7 +101,7 @@ def concat(*args: Expr) -> Expr: NULL arguments are ignored. """ args = [arg.expr for arg in args] - return Expr(f.concat(*args)) + return Expr(f.concat(args)) def concat_ws(separator: str, *args: Expr) -> Expr: @@ -110,7 +110,7 @@ def concat_ws(separator: str, *args: Expr) -> Expr: `NULL` arugments are ignored. `separator` should not be `NULL`. """ args = [arg.expr for arg in args] - return Expr(f.concat_ws(separator, *args)) + return Expr(f.concat_ws(separator, args)) def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> Expr: @@ -757,7 +757,7 @@ def upper(arg: Expr) -> Expr: def make_array(*args: Expr) -> Expr: """Returns an array using the specified input expressions.""" args = [arg.expr for arg in args] - return Expr(f.make_array(*args)) + return Expr(f.make_array(args)) def array(*args: Expr) -> Expr: @@ -840,7 +840,7 @@ def list_push_back(array: Expr, element: Expr) -> Expr: def array_concat(*args: Expr) -> Expr: """Concatenates the input arrays.""" args = [arg.expr for arg in args] - return Expr(f.array_concat(*args)) + return Expr(f.array_concat(args)) def array_cat(*args: Expr) -> Expr: @@ -1233,9 +1233,9 @@ def flatten(array: Expr) -> Expr: # aggregate functions -def approx_distinct(arg: Expr) -> Expr: +def approx_distinct(expression: Expr) -> Expr: """Returns the approximate number of distinct values.""" - return Expr(f.approx_distinct(arg.expr, distinct=True)) + return Expr(f.approx_distinct(expression.expr)) def approx_median(arg: Expr, distinct: bool = False) -> Expr: @@ -1244,20 +1244,21 @@ def approx_median(arg: Expr, distinct: bool = False) -> Expr: def approx_percentile_cont( - expr: Expr, + expression: Expr, percentile: Expr, - num_centroids: int | None = None, distinct: bool = False, ) -> Expr: """Returns the value that is approximately at a given percentile of ``expr``.""" + # Re-enable num_centroids: https://github.com/apache/datafusion-python/issues/777 + num_centroids = None if num_centroids is None: return Expr( - f.approx_percentile_cont(expr.expr, percentile.expr, distinct=distinct) + f.approx_percentile_cont(expression.expr, percentile.expr, distinct=distinct) ) return Expr( f.approx_percentile_cont( - expr.expr, percentile.expr, num_centroids, distinct=distinct + expression.expr, percentile.expr, distinct=distinct ) ) @@ -1306,7 +1307,7 @@ def covar(y: Expr, x: Expr) -> Expr: This is an alias for `covar_samp`. """ - return Expr(f.covar(y.expr, x.expr)) + return covar_samp(y, x) def covar_pop(y: Expr, x: Expr) -> Expr: @@ -1324,7 +1325,7 @@ def grouping(arg: Expr, distinct: bool = False) -> Expr: Returns 1 if the value of the argument is aggregated, 0 if not. """ - return Expr(f.grouping([arg.expr], distinct=distinct)) + return Expr(f.grouping(arg.expr, distinct=distinct)) def max(arg: Expr, distinct: bool = False) -> Expr: @@ -1396,7 +1397,7 @@ def regr_avgx(y: Expr, x: Expr, distinct: bool = False) -> Expr: Only non-null pairs of the inputs are evaluated. """ - return Expr(f.regr_avgx[y.expr, x.expr], distinct) + return Expr(f.regr_avgx(y.expr, x.expr, distinct)) def regr_avgy(y: Expr, x: Expr, distinct: bool = False) -> Expr: @@ -1404,42 +1405,42 @@ def regr_avgy(y: Expr, x: Expr, distinct: bool = False) -> Expr: Only non-null pairs of the inputs are evaluated. """ - return Expr(f.regr_avgy[y.expr, x.expr], distinct) + return Expr(f.regr_avgy(y.expr, x.expr, distinct)) def regr_count(y: Expr, x: Expr, distinct: bool = False) -> Expr: """Counts the number of rows in which both expressions are not null.""" - return Expr(f.regr_count[y.expr, x.expr], distinct) + return Expr(f.regr_count(y.expr, x.expr, distinct)) def regr_intercept(y: Expr, x: Expr, distinct: bool = False) -> Expr: """Computes the intercept from the linear regression.""" - return Expr(f.regr_intercept[y.expr, x.expr], distinct) + return Expr(f.regr_intercept(y.expr, x.expr, distinct)) def regr_r2(y: Expr, x: Expr, distinct: bool = False) -> Expr: """Computes the R-squared value from linear regression.""" - return Expr(f.regr_r2[y.expr, x.expr], distinct) + return Expr(f.regr_r2(y.expr, x.expr, distinct)) def regr_slope(y: Expr, x: Expr, distinct: bool = False) -> Expr: """Computes the slope from linear regression.""" - return Expr(f.regr_slope[y.expr, x.expr], distinct) + return Expr(f.regr_slope(y.expr, x.expr, distinct)) def regr_sxx(y: Expr, x: Expr, distinct: bool = False) -> Expr: """Computes the sum of squares of the independent variable `x`.""" - return Expr(f.regr_sxx[y.expr, x.expr], distinct) + return Expr(f.regr_sxx(y.expr, x.expr, distinct)) def regr_sxy(y: Expr, x: Expr, distinct: bool = False) -> Expr: """Computes the sum of products of pairs of numbers.""" - return Expr(f.regr_sxy[y.expr, x.expr], distinct) + return Expr(f.regr_sxy(y.expr, x.expr, distinct)) def regr_syy(y: Expr, x: Expr, distinct: bool = False) -> Expr: """Computes the sum of squares of the dependent variable `y`.""" - return Expr(f.regr_syy[y.expr, x.expr], distinct) + return Expr(f.regr_syy(y.expr, x.expr, distinct)) def first_value( @@ -1480,31 +1481,26 @@ def last_value( ) -def bit_and(*args: Expr, distinct: bool = False) -> Expr: +def bit_and(arg: Expr, distinct: bool = False) -> Expr: """Computes the bitwise AND of the argument.""" - args = [arg.expr for arg in args] - return Expr(f.bit_and(*args, distinct=distinct)) + return Expr(f.bit_and(arg.expr, distinct=distinct)) -def bit_or(*args: Expr, distinct: bool = False) -> Expr: +def bit_or(arg: Expr, distinct: bool = False) -> Expr: """Computes the bitwise OR of the argument.""" - args = [arg.expr for arg in args] - return Expr(f.bit_or(*args, distinct=distinct)) + return Expr(f.bit_or(arg.expr, distinct=distinct)) -def bit_xor(*args: Expr, distinct: bool = False) -> Expr: +def bit_xor(arg: Expr, distinct: bool = False) -> Expr: """Computes the bitwise XOR of the argument.""" - args = [arg.expr for arg in args] - return Expr(f.bit_xor(*args, distinct=distinct)) + return Expr(f.bit_xor(arg.expr, distinct=distinct)) -def bool_and(*args: Expr, distinct: bool = False) -> Expr: +def bool_and(arg: Expr, distinct: bool = False) -> Expr: """Computes the boolean AND of the arugment.""" - args = [arg.expr for arg in args] - return Expr(f.bool_and(*args, distinct=distinct)) + return Expr(f.bool_and(arg.expr, distinct=distinct)) -def bool_or(*args: Expr, distinct: bool = False) -> Expr: +def bool_or(arg: Expr, distinct: bool = False) -> Expr: """Computes the boolean OR of the arguement.""" - args = [arg.expr for arg in args] - return Expr(f.bool_or(*args, distinct=distinct)) + return Expr(f.bool_or(arg.expr, distinct=distinct)) diff --git a/python/datafusion/tests/test_aggregation.py b/python/datafusion/tests/test_aggregation.py index 99a470b6..c10e5f36 100644 --- a/python/datafusion/tests/test_aggregation.py +++ b/python/datafusion/tests/test_aggregation.py @@ -79,7 +79,8 @@ def test_built_in_aggregation(df): assert result.column(0) == pa.array([2], type=pa.uint64()) assert result.column(1) == pa.array([4]) assert result.column(2) == pa.array([4]) - assert result.column(3) == pa.array([6]) + # Ref: https://github.com/apache/datafusion-python/issues/777 + # assert result.column(3) == pa.array([6]) assert result.column(4) == pa.array([[4, 4, 6]]) np.testing.assert_array_almost_equal(result.column(5), np.average(values_a)) np.testing.assert_array_almost_equal( diff --git a/python/datafusion/tests/test_dataframe.py b/python/datafusion/tests/test_dataframe.py index 25875da7..6444d932 100644 --- a/python/datafusion/tests/test_dataframe.py +++ b/python/datafusion/tests/test_dataframe.py @@ -278,63 +278,48 @@ def test_distinct(): assert df_a.collect() == df_b.collect() -def test_window_functions(df): +data_test_window_functions = [ + ("row", f.window("row_number", [], order_by=[f.order_by(column("c"))]), [2, 1, 3]), + ("rank", f.window("rank", [], order_by=[f.order_by(column("c"))]), [2, 1, 2]), + ("dense_rank", f.window("dense_rank", [], order_by=[f.order_by(column("c"))]), [2, 1, 2] ), + ("percent_rank", f.window("percent_rank", [], order_by=[f.order_by(column("c"))]), [0.5, 0, 0.5]), + ("cume_dist", f.window("cume_dist", [], order_by=[f.order_by(column("b"))]), [0.3333333333333333, 0.6666666666666666, 1.0]), + ("ntile", f.window("ntile", [literal(2)], order_by=[f.order_by(column("c"))]), [1, 1, 2]), + ("next", f.window("lead", [column("b")], order_by=[f.order_by(column("b"))]), [5, 6, None]), + ("previous", f.window("lag", [column("b")], order_by=[f.order_by(column("b"))]), [None, 4, 5]), + pytest.param( + "first_value", + f.window( + "first_value", + [column("a")], + order_by=[f.order_by(column("b"))] + ), + [1, 1, 1], + ), + pytest.param( + "last_value", + f.window("last_value", [column("b")], order_by=[f.order_by(column("b"))]), + [4, 5, 6], + ), + pytest.param( + "2nd_value", + f.window( + "nth_value", + [column("b"), literal(2)], + order_by=[f.order_by(column("b"))], + ), + [None, 5, 5], + ), +] + + +@pytest.mark.parametrize("name,expr,result", data_test_window_functions) +def test_window_functions(df, name, expr, result): df = df.select( column("a"), column("b"), column("c"), - f.alias( - f.window("row_number", [], order_by=[f.order_by(column("c"))]), - "row", - ), - f.alias( - f.window("rank", [], order_by=[f.order_by(column("c"))]), - "rank", - ), - f.alias( - f.window("dense_rank", [], order_by=[f.order_by(column("c"))]), - "dense_rank", - ), - f.alias( - f.window("percent_rank", [], order_by=[f.order_by(column("c"))]), - "percent_rank", - ), - f.alias( - f.window("cume_dist", [], order_by=[f.order_by(column("b"))]), - "cume_dist", - ), - f.alias( - f.window("ntile", [literal(2)], order_by=[f.order_by(column("c"))]), - "ntile", - ), - f.alias( - f.window("lag", [column("b")], order_by=[f.order_by(column("b"))]), - "previous", - ), - f.alias( - f.window("lead", [column("b")], order_by=[f.order_by(column("b"))]), - "next", - ), - f.alias( - f.window( - "first_value", - [column("a")], - order_by=[f.order_by(column("b"))], - ), - "first_value", - ), - f.alias( - f.window("last_value", [column("b")], order_by=[f.order_by(column("b"))]), - "last_value", - ), - f.alias( - f.window( - "nth_value", - [column("b"), literal(2)], - order_by=[f.order_by(column("b"))], - ), - "2nd_value", - ), + f.alias(expr, name) ) table = pa.Table.from_batches(df.collect()) @@ -343,18 +328,9 @@ def test_window_functions(df): "a": [1, 2, 3], "b": [4, 5, 6], "c": [8, 5, 8], - "row": [2, 1, 3], - "rank": [2, 1, 2], - "dense_rank": [2, 1, 2], - "percent_rank": [0.5, 0, 0.5], - "cume_dist": [0.3333333333333333, 0.6666666666666666, 1.0], - "ntile": [1, 1, 2], - "next": [5, 6, None], - "previous": [None, 4, 5], - "first_value": [1, 1, 1], - "last_value": [4, 5, 6], - "2nd_value": [None, 5, 5], + name: result } + assert table.sort_by("a").to_pydict() == expected @@ -434,13 +410,13 @@ def test_explain(df): def test_logical_plan(aggregate_df): plan = aggregate_df.logical_plan() - expected = "Projection: test.c1, SUM(test.c2)" + expected = "Projection: test.c1, sum(test.c2)" assert expected == plan.display() expected = ( - "Projection: test.c1, SUM(test.c2)\n" - " Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" + "Projection: test.c1, sum(test.c2)\n" + " Aggregate: groupBy=[[test.c1]], aggr=[[sum(test.c2)]]\n" " TableScan: test" ) @@ -450,12 +426,12 @@ def test_logical_plan(aggregate_df): def test_optimized_logical_plan(aggregate_df): plan = aggregate_df.optimized_logical_plan() - expected = "Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]" + expected = "Aggregate: groupBy=[[test.c1]], aggr=[[sum(test.c2)]]" assert expected == plan.display() expected = ( - "Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" + "Aggregate: groupBy=[[test.c1]], aggr=[[sum(test.c2)]]\n" " TableScan: test projection=[c1, c2]" ) @@ -466,7 +442,7 @@ def test_execution_plan(aggregate_df): plan = aggregate_df.execution_plan() expected = ( - "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[SUM(test.c2)]\n" # noqa: E501 + "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n" # noqa: E501 ) assert expected == plan.display() diff --git a/python/datafusion/tests/test_functions.py b/python/datafusion/tests/test_functions.py index 25d7de14..29391232 100644 --- a/python/datafusion/tests/test_functions.py +++ b/python/datafusion/tests/test_functions.py @@ -567,87 +567,51 @@ def test_array_function_obj_tests(stmt, py_expr): assert a == b -def test_string_functions(df): - df = df.select( - f.ascii(column("a")), - f.bit_length(column("a")), - f.btrim(literal(" World ")), - f.character_length(column("a")), - f.chr(literal(68)), - f.concat_ws("-", column("a"), literal("test")), - f.concat(column("a"), literal("?")), - f.initcap(column("c")), - f.left(column("a"), literal(3)), - f.length(column("c")), - f.lower(column("a")), - f.lpad(column("a"), literal(7)), - f.ltrim(column("c")), - f.md5(column("a")), - f.octet_length(column("a")), - f.repeat(column("a"), literal(2)), - f.replace(column("a"), literal("l"), literal("?")), - f.reverse(column("a")), - f.right(column("a"), literal(4)), - f.rpad(column("a"), literal(8)), - f.rtrim(column("c")), - f.split_part(column("a"), literal("l"), literal(1)), - f.starts_with(column("a"), literal("Wor")), - f.strpos(column("a"), literal("o")), - f.substr(column("a"), literal(3)), - f.translate(column("a"), literal("or"), literal("ld")), - f.trim(column("c")), - f.upper(column("c")), - f.ends_with(column("a"), literal("llo")), - f.overlay(column("a"), literal("--"), literal(2)), - f.regexp_like(column("a"), literal("(ell|orl)")), - f.regexp_match(column("a"), literal("(ell|orl)")), - f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")), - ) - +@pytest.mark.parametrize("function, expected_result", [ + (f.ascii(column("a")), pa.array([72, 87, 33], type=pa.int32())), # H = 72; W = 87; ! = 33 + (f.bit_length(column("a")), pa.array([40, 40, 8], type=pa.int32())), + (f.btrim(literal(" World ")), pa.array(["World", "World", "World"])), + (f.character_length(column("a")), pa.array([5, 5, 1], type=pa.int32())), + (f.chr(literal(68)), pa.array(["D", "D", "D"])), + (f.concat_ws("-", column("a"), literal("test")), pa.array(["Hello-test", "World-test", "!-test"])), + (f.concat(column("a"), literal("?")), pa.array(["Hello?", "World?", "!?"])), + (f.initcap(column("c")), pa.array(["Hello ", " World ", " !"])), + (f.left(column("a"), literal(3)), pa.array(["Hel", "Wor", "!"])), + (f.length(column("c")), pa.array([6, 7, 2], type=pa.int32())), + (f.lower(column("a")), pa.array(["hello", "world", "!"])), + (f.lpad(column("a"), literal(7)), pa.array([" Hello", " World", " !"])), + (f.ltrim(column("c")), pa.array(["hello ", "world ", "!"])), + (f.md5(column("a")), pa.array([ + "8b1a9953c4611296a827abf8c47804d7", + "f5a7924e621e84c9280a9a27e1bcb7f6", + "9033e0e305f247c0c3c80d0c7848c8b3", + ])), + (f.octet_length(column("a")), pa.array([5, 5, 1], type=pa.int32())), + (f.repeat(column("a"), literal(2)), pa.array(["HelloHello", "WorldWorld", "!!"])), + (f.replace(column("a"), literal("l"), literal("?")), pa.array(["He??o", "Wor?d", "!"])), + (f.reverse(column("a")), pa.array(["olleH", "dlroW", "!"])), + (f.right(column("a"), literal(4)), pa.array(["ello", "orld", "!"])), + (f.rpad(column("a"), literal(8)), pa.array(["Hello ", "World ", "! "])), + (f.rtrim(column("c")), pa.array(["hello", " world", " !"])), + (f.split_part(column("a"), literal("l"), literal(1)), pa.array(["He", "Wor", "!"])), + (f.starts_with(column("a"), literal("Wor")), pa.array([False, True, False])), + (f.strpos(column("a"), literal("o")), pa.array([5, 2, 0], type=pa.int32())), + (f.substr(column("a"), literal(3)), pa.array(["llo", "rld", ""])), + (f.translate(column("a"), literal("or"), literal("ld")), pa.array(["Helll", "Wldld", "!"])), + (f.trim(column("c")), pa.array(["hello", "world", "!"])), + (f.upper(column("c")), pa.array(["HELLO ", " WORLD ", " !"])), + (f.ends_with(column("a"), literal("llo")), pa.array([True, False, False])), + (f.overlay(column("a"), literal("--"), literal(2)), pa.array(["H--lo", "W--ld", "--"])), + (f.regexp_like(column("a"), literal("(ell|orl)")), pa.array([True, True, False])), + (f.regexp_match(column("a"), literal("(ell|orl)")), pa.array([["ell"], ["orl"], None])), + (f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")), pa.array(["H-o", "W-d", "!"])), +]) +def test_string_functions(df, function, expected_result): + df = df.select(function) result = df.collect() assert len(result) == 1 result = result[0] - assert result.column(0) == pa.array( - [72, 87, 33], type=pa.int32() - ) # H = 72; W = 87; ! = 33 - assert result.column(1) == pa.array([40, 40, 8], type=pa.int32()) - assert result.column(2) == pa.array(["World", "World", "World"]) - assert result.column(3) == pa.array([5, 5, 1], type=pa.int32()) - assert result.column(4) == pa.array(["D", "D", "D"]) - assert result.column(5) == pa.array(["Hello-test", "World-test", "!-test"]) - assert result.column(6) == pa.array(["Hello?", "World?", "!?"]) - assert result.column(7) == pa.array(["Hello ", " World ", " !"]) - assert result.column(8) == pa.array(["Hel", "Wor", "!"]) - assert result.column(9) == pa.array([6, 7, 2], type=pa.int32()) - assert result.column(10) == pa.array(["hello", "world", "!"]) - assert result.column(11) == pa.array([" Hello", " World", " !"]) - assert result.column(12) == pa.array(["hello ", "world ", "!"]) - assert result.column(13) == pa.array( - [ - "8b1a9953c4611296a827abf8c47804d7", - "f5a7924e621e84c9280a9a27e1bcb7f6", - "9033e0e305f247c0c3c80d0c7848c8b3", - ] - ) - assert result.column(14) == pa.array([5, 5, 1], type=pa.int32()) - assert result.column(15) == pa.array(["HelloHello", "WorldWorld", "!!"]) - assert result.column(16) == pa.array(["He??o", "Wor?d", "!"]) - assert result.column(17) == pa.array(["olleH", "dlroW", "!"]) - assert result.column(18) == pa.array(["ello", "orld", "!"]) - assert result.column(19) == pa.array(["Hello ", "World ", "! "]) - assert result.column(20) == pa.array(["hello", " world", " !"]) - assert result.column(21) == pa.array(["He", "Wor", "!"]) - assert result.column(22) == pa.array([False, True, False]) - assert result.column(23) == pa.array([5, 2, 0], type=pa.int32()) - assert result.column(24) == pa.array(["llo", "rld", ""]) - assert result.column(25) == pa.array(["Helll", "Wldld", "!"]) - assert result.column(26) == pa.array(["hello", "world", "!"]) - assert result.column(27) == pa.array(["HELLO ", " WORLD ", " !"]) - assert result.column(28) == pa.array([True, False, False]) - assert result.column(29) == pa.array(["H--lo", "W--ld", "--"]) - assert result.column(30) == pa.array([True, True, False]) - assert result.column(31) == pa.array([["ell"], ["orl"], None]) - assert result.column(32) == pa.array(["H-o", "W-d", "!"]) + assert result.column(0) == expected_result def test_hash_functions(df): @@ -831,7 +795,7 @@ def test_case(df): assert result.column(2) == pa.array(["Hola", "Mundo", None]) -def test_regr_funcs(df): +def test_regr_funcs_sql(df): # test case base on # https://github.com/apache/arrow-datafusion/blob/d1361d56b9a9e0c165d3d71a8df6795d2a5f51dd/datafusion/core/tests/sqllogictests/test_files/aggregate.slt#L2330 ctx = SessionContext() @@ -853,6 +817,68 @@ def test_regr_funcs(df): assert result[0].column(8) == pa.array([0], type=pa.float64()) +def test_regr_funcs_sql_2(): + # test case based on `regr_*() basic tests + # https://github.com/apache/datafusion/blob/d1361d56b9a9e0c165d3d71a8df6795d2a5f51dd/datafusion/core/tests/sqllogictests/test_files/aggregate.slt#L2358C1-L2374C1 + ctx = SessionContext() + + # Perform the regression functions using SQL + result_sql = ctx.sql( + "select " + "regr_slope(column2, column1), " + "regr_intercept(column2, column1), " + "regr_count(column2, column1), " + "regr_r2(column2, column1), " + "regr_avgx(column2, column1), " + "regr_avgy(column2, column1), " + "regr_sxx(column2, column1), " + "regr_syy(column2, column1), " + "regr_sxy(column2, column1) " + "from (values (1,2), (2,4), (3,6))" + ).collect() + + # Assertions for SQL results + assert result_sql[0].column(0) == pa.array([2], type=pa.float64()) + assert result_sql[0].column(1) == pa.array([0], type=pa.float64()) + assert result_sql[0].column(2) == pa.array([3], type=pa.float64()) # todo: i would not expect this to be float + assert result_sql[0].column(3) == pa.array([1], type=pa.float64()) + assert result_sql[0].column(4) == pa.array([2], type=pa.float64()) + assert result_sql[0].column(5) == pa.array([4], type=pa.float64()) + assert result_sql[0].column(6) == pa.array([2], type=pa.float64()) + assert result_sql[0].column(7) == pa.array([8], type=pa.float64()) + assert result_sql[0].column(8) == pa.array([4], type=pa.float64()) + + +@pytest.mark.parametrize("func, expected", [ + pytest.param(f.regr_slope, pa.array([2], type=pa.float64()), id="regr_slope"), + pytest.param(f.regr_intercept, pa.array([0], type=pa.float64()), id="regr_intercept"), + pytest.param(f.regr_count, pa.array([3], type=pa.float64()), id="regr_count"), # TODO: I would expect this to return an int array + pytest.param(f.regr_r2, pa.array([1], type=pa.float64()), id="regr_r2"), + pytest.param(f.regr_avgx, pa.array([2], type=pa.float64()), id="regr_avgx"), + pytest.param(f.regr_avgy, pa.array([4], type=pa.float64()), id="regr_avgy"), + pytest.param(f.regr_sxx, pa.array([2], type=pa.float64()), id="regr_sxx"), + pytest.param(f.regr_syy, pa.array([8], type=pa.float64()), id="regr_syy"), + pytest.param(f.regr_sxy, pa.array([4], type=pa.float64()), id="regr_sxy") +]) +def test_regr_funcs_df(func, expected): + + # test case based on `regr_*() basic tests + # https://github.com/apache/datafusion/blob/d1361d56b9a9e0c165d3d71a8df6795d2a5f51dd/datafusion/core/tests/sqllogictests/test_files/aggregate.slt#L2358C1-L2374C1 + + + ctx = SessionContext() + + # Create a DataFrame + data = {'column1': [1, 2, 3], 'column2': [2, 4, 6]} + df = ctx.from_pydict(data, name="test_table") + + # Perform the regression function using DataFrame API + result_df = df.aggregate([], [func(f.col("column2"), f.col("column1"))]).collect() + + # Assertion for DataFrame API result + assert result_df[0].column(0) == expected + + def test_first_last_value(df): df = df.aggregate( [], diff --git a/python/datafusion/tests/test_sql.py b/python/datafusion/tests/test_sql.py index d85f380e..1505fb1e 100644 --- a/python/datafusion/tests/test_sql.py +++ b/python/datafusion/tests/test_sql.py @@ -378,17 +378,18 @@ def test_udf( # C data interface missing pytest.param( pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), + id="binary4", marks=pytest.mark.xfail, ), - pytest.param(helpers.data_datetime("s"), marks=pytest.mark.xfail), - pytest.param(helpers.data_datetime("ms"), marks=pytest.mark.xfail), - pytest.param(helpers.data_datetime("us"), marks=pytest.mark.xfail), - pytest.param(helpers.data_datetime("ns"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("s"), id="datetime_s", marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ms"), id="datetime_ms", marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("us"), id="datetime_us", marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ns"), id="datetime_ns", marks=pytest.mark.xfail), # Not writtable to parquet - pytest.param(helpers.data_timedelta("s"), marks=pytest.mark.xfail), - pytest.param(helpers.data_timedelta("ms"), marks=pytest.mark.xfail), - pytest.param(helpers.data_timedelta("us"), marks=pytest.mark.xfail), - pytest.param(helpers.data_timedelta("ns"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("s"), id="timedelta_s", marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ms"), id="timedelta_ms", marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("us"), id="timedelta_us", marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ns"), id="timedelta_ns", marks=pytest.mark.xfail), ], ) def test_simple_select(ctx, tmp_path, arr): diff --git a/src/common/data_type.rs b/src/common/data_type.rs index 42c5aefe..469bb789 100644 --- a/src/common/data_type.rs +++ b/src/common/data_type.rs @@ -326,6 +326,11 @@ impl DataTypeMap { ScalarValue::Union(_, _, _) => Err(py_datafusion_err(DataFusionError::NotImplemented( "ScalarValue::LargeList".to_string(), ))), + ScalarValue::Utf8View(_) => Ok(DataType::Utf8View), + ScalarValue::BinaryView(_) => Ok(DataType::BinaryView), + ScalarValue::Map(_) => Err(py_datafusion_err(DataFusionError::NotImplemented( + "ScalarValue::Map".to_string(), + ))), } } } diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs index 240c8648..5fe1f4d1 100644 --- a/src/dataset_exec.rs +++ b/src/dataset_exec.rs @@ -152,6 +152,11 @@ impl DatasetExec { } impl ExecutionPlan for DatasetExec { + fn name(&self) -> &str { + // [ExecutionPlan::name] docs recommends forwarding to `static_name` + Self::static_name() + } + /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { self diff --git a/src/functions.rs b/src/functions.rs index 74eb48a6..e60c63c8 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -16,6 +16,7 @@ // under the License. use datafusion::functions_aggregate::all_default_aggregate_functions; +use datafusion_expr::AggregateExt; use pyo3::{prelude::*, wrap_pyfunction}; use crate::common::data_type::NullTreatment; @@ -30,13 +31,141 @@ use datafusion::functions_aggregate; use datafusion_common::{Column, ScalarValue, TableReference}; use datafusion_expr::expr::Alias; use datafusion_expr::{ - aggregate_function, expr::{ find_df_window_func, AggregateFunction, AggregateFunctionDefinition, Sort, WindowFunction, }, lit, Expr, WindowFunctionDefinition, }; +#[pyfunction] +pub fn approx_distinct(expression: PyExpr) -> PyExpr { + functions_aggregate::expr_fn::approx_distinct::approx_distinct(expression.expr).into() +} + +#[pyfunction] +pub fn approx_median(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::approx_median(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn approx_percentile_cont( + expression: PyExpr, + percentile: PyExpr, + distinct: bool, +) -> PyResult { + let expr = + functions_aggregate::expr_fn::approx_percentile_cont(expression.expr, percentile.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn approx_percentile_cont_with_weight( + expression: PyExpr, + weight: PyExpr, + percentile: PyExpr, + distinct: bool, +) -> PyResult { + let expr = functions_aggregate::expr_fn::approx_percentile_cont_with_weight( + expression.expr, + weight.expr, + percentile.expr, + ); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn avg(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::avg(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn bit_and(expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::bit_and(expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn bit_or(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::bit_or(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn bit_xor(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::bit_xor(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn bool_and(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::bool_and(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn bool_or(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::bool_or(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn corr(y: PyExpr, x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::corr(y.expr, x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn grouping(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::grouping(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + #[pyfunction] pub fn sum(args: PyExpr) -> PyExpr { functions_aggregate::expr_fn::sum(args.expr).into() @@ -58,9 +187,23 @@ pub fn median(arg: PyExpr) -> PyExpr { } #[pyfunction] -pub fn covar(y: PyExpr, x: PyExpr) -> PyExpr { - // alias for covar_samp - covar_samp(y, x) +pub fn stddev(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::stddev(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn stddev_pop(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::stddev_pop(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } } #[pyfunction] @@ -69,53 +212,166 @@ pub fn var_samp(expression: PyExpr) -> PyExpr { } #[pyfunction] -/// Alias for [`var_samp`] -pub fn var(y: PyExpr) -> PyExpr { - var_samp(y) +pub fn var_pop(expression: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::var_pop(expression.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_avgx(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_avgx(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_avgy(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_avgy(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_count(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_count(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_intercept(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_intercept(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_r2(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_r2(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_slope(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_slope(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_sxx(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_sxx(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_sxy(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_sxy(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } +} + +#[pyfunction] +pub fn regr_syy(expr_y: PyExpr, expr_x: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::regr_syy(expr_y.expr, expr_x.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } } #[pyfunction] -#[pyo3(signature = (*args, distinct = false, filter = None, order_by = None, null_treatment = None))] pub fn first_value( - args: Vec, + expr: PyExpr, distinct: bool, filter: Option, order_by: Option>, null_treatment: Option, -) -> PyExpr { - let null_treatment = null_treatment.map(Into::into); - let args = args.into_iter().map(|x| x.expr).collect::>(); - let order_by = order_by.map(|x| x.into_iter().map(|x| x.expr).collect::>()); - functions_aggregate::expr_fn::first_value( - args, - distinct, - filter.map(|x| Box::new(x.expr)), - order_by, - null_treatment, - ) - .into() +) -> PyResult { + // If we initialize the UDAF with order_by directly, then it gets over-written by the builder + let agg_fn = functions_aggregate::expr_fn::first_value(expr.expr, None); + + // luckily, I can guarantee initializing a builder with an `order_by` default of empty vec + let order_by = order_by + .map(|x| x.into_iter().map(|x| x.expr).collect::>()) + .unwrap_or_default(); + let mut builder = agg_fn.order_by(order_by); + + if distinct { + builder = builder.distinct(); + } + + if let Some(filter) = filter { + builder = builder.filter(filter.expr); + } + + if let Some(null_treatment) = null_treatment { + builder = builder.null_treatment(null_treatment.into()) + } + + Ok(builder.build()?.into()) } #[pyfunction] -#[pyo3(signature = (*args, distinct = false, filter = None, order_by = None, null_treatment = None))] pub fn last_value( - args: Vec, + expr: PyExpr, distinct: bool, filter: Option, order_by: Option>, null_treatment: Option, -) -> PyExpr { - let null_treatment = null_treatment.map(Into::into); - let args = args.into_iter().map(|x| x.expr).collect::>(); - let order_by = order_by.map(|x| x.into_iter().map(|x| x.expr).collect::>()); - functions_aggregate::expr_fn::last_value( - args, - distinct, - filter.map(|x| Box::new(x.expr)), - order_by, - null_treatment, - ) - .into() +) -> PyResult { + let agg_fn = functions_aggregate::expr_fn::last_value(vec![expr.expr]); + + // luckily, I can guarantee initializing a builder with an `order_by` default of empty vec + let order_by = order_by + .map(|x| x.into_iter().map(|x| x.expr).collect::>()) + .unwrap_or_default(); + let mut builder = agg_fn.order_by(order_by); + + if distinct { + builder = builder.distinct(); + } + + if let Some(filter) = filter { + builder = builder.filter(filter.expr); + } + + if let Some(null_treatment) = null_treatment { + builder = builder.null_treatment(null_treatment.into()) + } + + Ok(builder.build()?.into()) } #[pyfunction] @@ -129,34 +385,23 @@ fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { } #[pyfunction] -#[pyo3(signature = (*exprs))] fn make_array(exprs: Vec) -> PyExpr { datafusion_functions_array::expr_fn::make_array(exprs.into_iter().map(|x| x.into()).collect()) .into() } #[pyfunction] -#[pyo3(signature = (*exprs))] -fn array(exprs: Vec) -> PyExpr { - // alias for make_array - make_array(exprs) -} - -#[pyfunction] -#[pyo3(signature = (*exprs))] fn array_concat(exprs: Vec) -> PyExpr { let exprs = exprs.into_iter().map(|x| x.into()).collect(); datafusion_functions_array::expr_fn::array_concat(exprs).into() } #[pyfunction] -#[pyo3(signature = (*exprs))] fn array_cat(exprs: Vec) -> PyExpr { array_concat(exprs) } #[pyfunction] -#[pyo3(signature = (array, element, index = 1))] fn array_position(array: PyExpr, element: PyExpr, index: Option) -> PyExpr { let index = ScalarValue::Int64(index); let index = Expr::Literal(index); @@ -164,28 +409,6 @@ fn array_position(array: PyExpr, element: PyExpr, index: Option) -> PyExpr } #[pyfunction] -#[pyo3(signature = (array, element, index = 1))] -fn array_indexof(array: PyExpr, element: PyExpr, index: Option) -> PyExpr { - // alias of array_position - array_position(array, element, index) -} - -#[pyfunction] -#[pyo3(signature = (array, element, index = 1))] -fn list_position(array: PyExpr, element: PyExpr, index: Option) -> PyExpr { - // alias of array_position - array_position(array, element, index) -} - -#[pyfunction] -#[pyo3(signature = (array, element, index = 1))] -fn list_indexof(array: PyExpr, element: PyExpr, index: Option) -> PyExpr { - // alias of array_position - array_position(array, element, index) -} - -#[pyfunction] -#[pyo3(signature = (array, begin, end, stride = None))] fn array_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option) -> PyExpr { datafusion_functions_array::expr_fn::array_slice( array.into(), @@ -196,18 +419,10 @@ fn array_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option .into() } -#[pyfunction] -#[pyo3(signature = (array, begin, end, stride = None))] -fn list_slice(array: PyExpr, begin: PyExpr, end: PyExpr, stride: Option) -> PyExpr { - // alias of array_slice - array_slice(array, begin, end, stride) -} - /// Computes a binary hash of the given data. type is the algorithm to use. /// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. // #[pyfunction(value, method)] #[pyfunction] -#[pyo3(signature = (value, method))] fn digest(value: PyExpr, method: PyExpr) -> PyExpr { PyExpr { expr: functions::expr_fn::digest(value.expr, method.expr), @@ -217,7 +432,6 @@ fn digest(value: PyExpr, method: PyExpr) -> PyExpr { /// Concatenates the text representations of all the arguments. /// NULL arguments are ignored. #[pyfunction] -#[pyo3(signature = (*args))] fn concat(args: Vec) -> PyResult { let args = args.into_iter().map(|e| e.expr).collect::>(); Ok(functions::string::expr_fn::concat(args).into()) @@ -227,20 +441,17 @@ fn concat(args: Vec) -> PyResult { /// The first argument is used as the separator string, and should not be NULL. /// Other NULL arguments are ignored. #[pyfunction] -#[pyo3(signature = (sep, *args))] fn concat_ws(sep: String, args: Vec) -> PyResult { let args = args.into_iter().map(|e| e.expr).collect::>(); Ok(functions::string::expr_fn::concat_ws(lit(sep), args).into()) } #[pyfunction] -#[pyo3(signature = (values, regex, flags = None))] fn regexp_like(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { Ok(functions::expr_fn::regexp_like(values.expr, regex.expr, flags.map(|x| x.expr)).into()) } #[pyfunction] -#[pyo3(signature = (values, regex, flags = None))] fn regexp_match(values: PyExpr, regex: PyExpr, flags: Option) -> PyResult { Ok(functions::expr_fn::regexp_match(values.expr, regex.expr, flags.map(|x| x.expr)).into()) } @@ -293,21 +504,23 @@ fn col(name: &str) -> PyResult { }) } +// TODO: should we just expose this in python? /// Create a COUNT(1) aggregate expression #[pyfunction] -fn count_star() -> PyResult { - Ok(PyExpr { - expr: Expr::AggregateFunction(AggregateFunction { - func_def: datafusion_expr::expr::AggregateFunctionDefinition::BuiltIn( - aggregate_function::AggregateFunction::Count, - ), - args: vec![lit(1)], - distinct: false, - filter: None, - order_by: None, - null_treatment: None, - }), - }) +fn count_star() -> PyExpr { + functions_aggregate::expr_fn::count(lit(1)).into() +} + +/// Wrapper for [`functions_aggregate::expr_fn::count`] +/// Count the number of non-null values in the column +#[pyfunction] +fn count(expr: PyExpr, distinct: bool) -> PyResult { + let expr = functions_aggregate::expr_fn::count(expr.expr); + if distinct { + Ok(expr.distinct().build()?.into()) + } else { + Ok(expr.into()) + } } /// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression. @@ -318,48 +531,70 @@ fn case(expr: PyExpr) -> PyResult { }) } -/// Helper function to find the appropriate window function. First, if a session -/// context is defined check it's registered functions. If no context is defined, -/// attempt to find from all default functions. Lastly, as a fall back attempt -/// to use built in window functions, which are being deprecated. +/// Helper function to find the appropriate window function. +/// +/// Search procedure: +/// 1) Search built in window functions, which are being deprecated. +/// 1) If a session context is provided: +/// 1) search User Defined Aggregate Functions (UDAFs) +/// 1) search registered window functions +/// 1) search registered aggregate functions +/// 1) If no function has been found, search default aggregate functions. +/// +/// NOTE: we search the built-ins first because the `UDAF` versions currently do not have the same behavior. fn find_window_fn(name: &str, ctx: Option) -> PyResult { - let mut maybe_fn = match &ctx { - Some(ctx) => { - let session_state = ctx.ctx.state(); - - match session_state.window_functions().contains_key(name) { - true => session_state - .window_functions() - .get(name) - .map(|f| WindowFunctionDefinition::WindowUDF(f.clone())), - false => session_state - .aggregate_functions() - .get(name) - .map(|f| WindowFunctionDefinition::AggregateUDF(f.clone())), - } + // search built in window functions (soon to be deprecated) + let df_window_func = find_df_window_func(name); + if let Some(df_window_func) = df_window_func { + return Ok(df_window_func); + } + + if let Some(ctx) = ctx { + // search UDAFs + let udaf = ctx + .ctx + .udaf(name) + .map(WindowFunctionDefinition::AggregateUDF) + .ok(); + + if let Some(udaf) = udaf { + return Ok(udaf); } - None => { - let default_aggregate_fns = all_default_aggregate_functions(); - default_aggregate_fns - .iter() - .find(|v| v.aliases().contains(&name.to_string())) - .map(|f| WindowFunctionDefinition::AggregateUDF(f.clone())) + let session_state = ctx.ctx.state(); + + // search registered window functions + let window_fn = session_state + .window_functions() + .get(name) + .map(|f| WindowFunctionDefinition::WindowUDF(f.clone())); + + if let Some(window_fn) = window_fn { + return Ok(window_fn); } - }; - if maybe_fn.is_none() { - maybe_fn = find_df_window_func(name).or_else(|| { - ctx.and_then(|ctx| { - ctx.ctx - .udaf(name) - .map(WindowFunctionDefinition::AggregateUDF) - .ok() - }) - }); + // search registered aggregate functions + let agg_fn = session_state + .aggregate_functions() + .get(name) + .map(|f| WindowFunctionDefinition::AggregateUDF(f.clone())); + + if let Some(agg_fn) = agg_fn { + return Ok(agg_fn); + } } - maybe_fn.ok_or(DataFusionError::Common("window function not found".to_string()).into()) + // search default aggregate functions + let agg_fn = all_default_aggregate_functions() + .iter() + .find(|v| v.name() == name || v.aliases().contains(&name.to_string())) + .map(|f| WindowFunctionDefinition::AggregateUDF(f.clone())); + + if let Some(agg_fn) = agg_fn { + return Ok(agg_fn); + } + + Err(DataFusionError::Common(format!("window function `{name}` not found")).into()) } /// Creates a new Window function expression @@ -424,25 +659,19 @@ macro_rules! aggregate_function { /// /// These functions have explicit named arguments. macro_rules! expr_fn { - ($NAME: ident) => { - expr_fn!($NAME, $NAME, , stringify!($NAME)); - }; - ($NAME:ident, $($arg:ident)*) => { - expr_fn!($NAME, $NAME, $($arg)*, stringify!($FUNC)); - }; - ($NAME:ident, $FUNC:ident, $($arg:ident)*) => { - expr_fn!($NAME, $FUNC, $($arg)*, stringify!($FUNC)); + ($FUNC: ident) => { + expr_fn!($FUNC, , stringify!($FUNC)); }; - ($NAME: ident, $DOC: expr) => { - expr_fn!($NAME, $NAME, ,$DOC); + ($FUNC:ident, $($arg:ident)*) => { + expr_fn!($FUNC, $($arg)*, stringify!($FUNC)); }; - ($NAME: ident, $($arg:ident)*, $DOC: expr) => { - expr_fn!($NAME, $NAME, $($arg)* ,$DOC); + ($FUNC: ident, $DOC: expr) => { + expr_fn!($FUNC, ,$DOC); }; - ($NAME: ident, $FUNC: ident, $($arg:ident)*, $DOC: expr) => { + ($FUNC: ident, $($arg:ident)*, $DOC: expr) => { #[doc = $DOC] #[pyfunction] - fn $NAME($($arg: PyExpr),*) -> PyExpr { + fn $FUNC($($arg: PyExpr),*) -> PyExpr { functions::expr_fn::$FUNC($($arg.into()),*).into() } }; @@ -452,17 +681,14 @@ macro_rules! expr_fn { /// /// These functions take a single `Vec` argument using `pyo3(signature = (*args))`. macro_rules! expr_fn_vec { - ($NAME: ident) => { - expr_fn_vec!($NAME, $NAME, stringify!($NAME)); + ($FUNC: ident) => { + expr_fn_vec!($FUNC, stringify!($FUNC)); }; - ($NAME: ident, $DOC: expr) => { - expr_fn_vec!($NAME, $NAME, $DOC); - }; - ($NAME: ident, $FUNC: ident, $DOC: expr) => { + ($FUNC: ident, $DOC: expr) => { #[doc = $DOC] #[pyfunction] #[pyo3(signature = (*args))] - fn $NAME(args: Vec) -> PyExpr { + fn $FUNC(args: Vec) -> PyExpr { let args = args.into_iter().map(|e| e.into()).collect::>(); functions::expr_fn::$FUNC(args).into() } @@ -473,22 +699,19 @@ macro_rules! expr_fn_vec { /// /// These functions have explicit named arguments. macro_rules! array_fn { - ($NAME: ident) => { - array_fn!($NAME, $NAME, , stringify!($NAME)); + ($FUNC: ident) => { + array_fn!($FUNC, , stringify!($FUNC)); }; - ($NAME:ident, $($arg:ident)*) => { - array_fn!($NAME, $NAME, $($arg)*, stringify!($FUNC)); + ($FUNC:ident, $($arg:ident)*) => { + array_fn!($FUNC, $($arg)*, stringify!($FUNC)); }; - ($NAME: ident, $FUNC:ident, $($arg:ident)*) => { - array_fn!($NAME, $FUNC, $($arg)*, stringify!($FUNC)); + ($FUNC: ident, $DOC: expr) => { + array_fn!($FUNC, , $DOC); }; - ($NAME: ident, $DOC: expr) => { - array_fn!($NAME, $NAME, , $DOC); - }; - ($NAME: ident, $FUNC:ident, $($arg:ident)*, $DOC:expr) => { + ($FUNC: ident, $($arg:ident)*, $DOC:expr) => { #[doc = $DOC] #[pyfunction] - fn $NAME($($arg: PyExpr),*) -> PyExpr { + fn $FUNC($($arg: PyExpr),*) -> PyExpr { datafusion_functions_array::expr_fn::$FUNC($($arg.into()),*).into() } }; @@ -559,7 +782,6 @@ expr_fn!(octet_length, args, "Returns number of bytes in the string. Since this expr_fn_vec!(overlay); expr_fn!(pi); expr_fn!(power, base exponent); -expr_fn!(pow, power, base exponent); expr_fn!(radians, num); expr_fn!(repeat, string n, "Repeats string the specified number of times."); expr_fn!( @@ -611,9 +833,7 @@ expr_fn_vec!(to_unixtime); expr_fn!(current_date); expr_fn!(current_time); expr_fn!(date_part, part date); -expr_fn!(datepart, date_part, part date); expr_fn!(date_trunc, part date); -expr_fn!(datetrunc, date_trunc, part date); expr_fn!(date_bin, stride source origin); expr_fn!(make_date, year month day); @@ -630,95 +850,37 @@ expr_fn!(random); // Array Functions array_fn!(array_append, array element); -array_fn!(array_push_back, array_append, array element); array_fn!(array_to_string, array delimiter); -array_fn!(array_join, array_to_string, array delimiter); -array_fn!(list_to_string, array_to_string, array delimiter); -array_fn!(list_join, array_to_string, array delimiter); -array_fn!(list_append, array_append, array element); -array_fn!(list_push_back, array_append, array element); array_fn!(array_dims, array); array_fn!(array_distinct, array); -array_fn!(list_distinct, array_distinct, array); -array_fn!(list_dims, array_dims, array); array_fn!(array_element, array element); -array_fn!(array_extract, array_element, array element); -array_fn!(list_element, array_element, array element); -array_fn!(list_extract, array_element, array element); array_fn!(array_length, array); -array_fn!(list_length, array_length, array); array_fn!(array_has, first_array second_array); array_fn!(array_has_all, first_array second_array); array_fn!(array_has_any, first_array second_array); -array_fn!(array_positions, array_positions, array element); -array_fn!(list_positions, array_positions, array element); +array_fn!(array_positions, array element); array_fn!(array_ndims, array); -array_fn!(list_ndims, array_ndims, array); array_fn!(array_prepend, element array); -array_fn!(array_push_front, array_prepend, element array); -array_fn!(list_prepend, array_prepend, element array); -array_fn!(list_push_front, array_prepend, element array); array_fn!(array_pop_back, array); array_fn!(array_pop_front, array); array_fn!(array_remove, array element); -array_fn!(list_remove, array_remove, array element); array_fn!(array_remove_n, array element max); -array_fn!(list_remove_n, array_remove_n, array element max); array_fn!(array_remove_all, array element); -array_fn!(list_remove_all, array_remove_all, array element); array_fn!(array_repeat, element count); array_fn!(array_replace, array from to); -array_fn!(list_replace, array_replace, array from to); array_fn!(array_replace_n, array from to max); -array_fn!(list_replace_n, array_replace_n, array from to max); array_fn!(array_replace_all, array from to); -array_fn!(list_replace_all, array_replace_all, array from to); array_fn!(array_sort, array desc null_first); -array_fn!(list_sort, array_sort, array desc null_first); array_fn!(array_intersect, first_array second_array); -array_fn!(list_intersect, array_intersect, first_array second_array); array_fn!(array_union, array1 array2); -array_fn!(list_union, array_union, array1 array2); array_fn!(array_except, first_array second_array); -array_fn!(list_except, array_except, first_array second_array); array_fn!(array_resize, array size value); -array_fn!(list_resize, array_resize, array size value); array_fn!(flatten, array); array_fn!(range, start stop step); -aggregate_function!(approx_distinct, ApproxDistinct); -aggregate_function!(approx_median, ApproxMedian); -aggregate_function!(approx_percentile_cont, ApproxPercentileCont); -aggregate_function!( - approx_percentile_cont_with_weight, - ApproxPercentileContWithWeight -); aggregate_function!(array_agg, ArrayAgg); -aggregate_function!(avg, Avg); -aggregate_function!(corr, Correlation); -aggregate_function!(count, Count); -aggregate_function!(grouping, Grouping); aggregate_function!(max, Max); -aggregate_function!(mean, Avg); aggregate_function!(min, Min); -aggregate_function!(stddev, Stddev); -aggregate_function!(stddev_pop, StddevPop); -aggregate_function!(stddev_samp, Stddev); -aggregate_function!(var_pop, VariancePop); -aggregate_function!(regr_avgx, RegrAvgx); -aggregate_function!(regr_avgy, RegrAvgy); -aggregate_function!(regr_count, RegrCount); -aggregate_function!(regr_intercept, RegrIntercept); -aggregate_function!(regr_r2, RegrR2); -aggregate_function!(regr_slope, RegrSlope); -aggregate_function!(regr_sxx, RegrSXX); -aggregate_function!(regr_sxy, RegrSXY); -aggregate_function!(regr_syy, RegrSYY); -aggregate_function!(bit_and, BitAnd); -aggregate_function!(bit_or, BitOr); -aggregate_function!(bit_xor, BitXor); -aggregate_function!(bool_and, BoolAnd); -aggregate_function!(bool_or, BoolOr); pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(abs))?; @@ -729,7 +891,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(approx_median))?; m.add_wrapped(wrap_pyfunction!(approx_percentile_cont))?; m.add_wrapped(wrap_pyfunction!(approx_percentile_cont_with_weight))?; - m.add_wrapped(wrap_pyfunction!(array))?; m.add_wrapped(wrap_pyfunction!(range))?; m.add_wrapped(wrap_pyfunction!(array_agg))?; m.add_wrapped(wrap_pyfunction!(arrow_typeof))?; @@ -758,16 +919,13 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(cot))?; m.add_wrapped(wrap_pyfunction!(count))?; m.add_wrapped(wrap_pyfunction!(count_star))?; - m.add_wrapped(wrap_pyfunction!(covar))?; m.add_wrapped(wrap_pyfunction!(covar_pop))?; m.add_wrapped(wrap_pyfunction!(covar_samp))?; m.add_wrapped(wrap_pyfunction!(current_date))?; m.add_wrapped(wrap_pyfunction!(current_time))?; m.add_wrapped(wrap_pyfunction!(degrees))?; m.add_wrapped(wrap_pyfunction!(date_bin))?; - m.add_wrapped(wrap_pyfunction!(datepart))?; m.add_wrapped(wrap_pyfunction!(date_part))?; - m.add_wrapped(wrap_pyfunction!(datetrunc))?; m.add_wrapped(wrap_pyfunction!(date_trunc))?; m.add_wrapped(wrap_pyfunction!(make_date))?; m.add_wrapped(wrap_pyfunction!(digest))?; @@ -796,7 +954,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(max))?; m.add_wrapped(wrap_pyfunction!(make_array))?; m.add_wrapped(wrap_pyfunction!(md5))?; - m.add_wrapped(wrap_pyfunction!(mean))?; m.add_wrapped(wrap_pyfunction!(median))?; m.add_wrapped(wrap_pyfunction!(min))?; m.add_wrapped(wrap_pyfunction!(named_struct))?; @@ -808,7 +965,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(overlay))?; m.add_wrapped(wrap_pyfunction!(pi))?; m.add_wrapped(wrap_pyfunction!(power))?; - m.add_wrapped(wrap_pyfunction!(pow))?; m.add_wrapped(wrap_pyfunction!(radians))?; m.add_wrapped(wrap_pyfunction!(random))?; m.add_wrapped(wrap_pyfunction!(regexp_like))?; @@ -833,7 +989,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(starts_with))?; m.add_wrapped(wrap_pyfunction!(stddev))?; m.add_wrapped(wrap_pyfunction!(stddev_pop))?; - m.add_wrapped(wrap_pyfunction!(stddev_samp))?; m.add_wrapped(wrap_pyfunction!(strpos))?; m.add_wrapped(wrap_pyfunction!(r#struct))?; // Use raw identifier since struct is a keyword m.add_wrapped(wrap_pyfunction!(substr))?; @@ -854,7 +1009,6 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(trunc))?; m.add_wrapped(wrap_pyfunction!(upper))?; m.add_wrapped(wrap_pyfunction!(self::uuid))?; // Use self to avoid name collision - m.add_wrapped(wrap_pyfunction!(var))?; m.add_wrapped(wrap_pyfunction!(var_pop))?; m.add_wrapped(wrap_pyfunction!(var_samp))?; m.add_wrapped(wrap_pyfunction!(window))?; @@ -881,67 +1035,35 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { // Array Functions m.add_wrapped(wrap_pyfunction!(array_append))?; - m.add_wrapped(wrap_pyfunction!(array_push_back))?; - m.add_wrapped(wrap_pyfunction!(list_append))?; - m.add_wrapped(wrap_pyfunction!(list_push_back))?; m.add_wrapped(wrap_pyfunction!(array_concat))?; m.add_wrapped(wrap_pyfunction!(array_cat))?; m.add_wrapped(wrap_pyfunction!(array_dims))?; m.add_wrapped(wrap_pyfunction!(array_distinct))?; - m.add_wrapped(wrap_pyfunction!(list_distinct))?; - m.add_wrapped(wrap_pyfunction!(list_dims))?; m.add_wrapped(wrap_pyfunction!(array_element))?; - m.add_wrapped(wrap_pyfunction!(array_extract))?; - m.add_wrapped(wrap_pyfunction!(list_element))?; - m.add_wrapped(wrap_pyfunction!(list_extract))?; m.add_wrapped(wrap_pyfunction!(array_length))?; - m.add_wrapped(wrap_pyfunction!(list_length))?; m.add_wrapped(wrap_pyfunction!(array_has))?; m.add_wrapped(wrap_pyfunction!(array_has_all))?; m.add_wrapped(wrap_pyfunction!(array_has_any))?; m.add_wrapped(wrap_pyfunction!(array_position))?; - m.add_wrapped(wrap_pyfunction!(array_indexof))?; - m.add_wrapped(wrap_pyfunction!(list_position))?; - m.add_wrapped(wrap_pyfunction!(list_indexof))?; m.add_wrapped(wrap_pyfunction!(array_positions))?; - m.add_wrapped(wrap_pyfunction!(list_positions))?; m.add_wrapped(wrap_pyfunction!(array_to_string))?; m.add_wrapped(wrap_pyfunction!(array_intersect))?; - m.add_wrapped(wrap_pyfunction!(list_intersect))?; m.add_wrapped(wrap_pyfunction!(array_union))?; - m.add_wrapped(wrap_pyfunction!(list_union))?; m.add_wrapped(wrap_pyfunction!(array_except))?; - m.add_wrapped(wrap_pyfunction!(list_except))?; m.add_wrapped(wrap_pyfunction!(array_resize))?; - m.add_wrapped(wrap_pyfunction!(list_resize))?; - m.add_wrapped(wrap_pyfunction!(array_join))?; - m.add_wrapped(wrap_pyfunction!(list_to_string))?; - m.add_wrapped(wrap_pyfunction!(list_join))?; m.add_wrapped(wrap_pyfunction!(array_ndims))?; - m.add_wrapped(wrap_pyfunction!(list_ndims))?; m.add_wrapped(wrap_pyfunction!(array_prepend))?; - m.add_wrapped(wrap_pyfunction!(array_push_front))?; - m.add_wrapped(wrap_pyfunction!(list_prepend))?; - m.add_wrapped(wrap_pyfunction!(list_push_front))?; m.add_wrapped(wrap_pyfunction!(array_pop_back))?; m.add_wrapped(wrap_pyfunction!(array_pop_front))?; m.add_wrapped(wrap_pyfunction!(array_remove))?; - m.add_wrapped(wrap_pyfunction!(list_remove))?; m.add_wrapped(wrap_pyfunction!(array_remove_n))?; - m.add_wrapped(wrap_pyfunction!(list_remove_n))?; m.add_wrapped(wrap_pyfunction!(array_remove_all))?; - m.add_wrapped(wrap_pyfunction!(list_remove_all))?; m.add_wrapped(wrap_pyfunction!(array_repeat))?; m.add_wrapped(wrap_pyfunction!(array_replace))?; - m.add_wrapped(wrap_pyfunction!(list_replace))?; m.add_wrapped(wrap_pyfunction!(array_replace_n))?; - m.add_wrapped(wrap_pyfunction!(list_replace_n))?; m.add_wrapped(wrap_pyfunction!(array_replace_all))?; - m.add_wrapped(wrap_pyfunction!(list_replace_all))?; m.add_wrapped(wrap_pyfunction!(array_sort))?; - m.add_wrapped(wrap_pyfunction!(list_sort))?; m.add_wrapped(wrap_pyfunction!(array_slice))?; - m.add_wrapped(wrap_pyfunction!(list_slice))?; m.add_wrapped(wrap_pyfunction!(flatten))?; Ok(())