diff --git a/Cargo.lock b/Cargo.lock index 55d25a95d5c5..b0c6272b9f92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -759,19 +759,20 @@ dependencies = [ [[package]] name = "arrow-udf-js" version = "0.2.0" -source = "git+https://github.com/datafuse-extras/arrow-udf?rev=a8fdfdd#a8fdfdd3622facb7d836a8da42a8a1c2d318f817" +source = "git+https://github.com/datafuse-extras/arrow-udf?rev=d0a21f0#d0a21f0fde330a0e5f658a55b58e0405d8372844" dependencies = [ "anyhow", "arrow-array 51.0.0", "arrow-buffer 51.0.0", "arrow-schema 51.0.0", + "atomic-time", "rquickjs", ] [[package]] name = "arrow-udf-wasm" version = "0.2.2" -source = "git+https://github.com/datafuse-extras/arrow-udf?rev=a8fdfdd#a8fdfdd3622facb7d836a8da42a8a1c2d318f817" +source = "git+https://github.com/datafuse-extras/arrow-udf?rev=d0a21f0#d0a21f0fde330a0e5f658a55b58e0405d8372844" dependencies = [ "anyhow", "arrow-array 51.0.0", @@ -1081,6 +1082,15 @@ dependencies = [ "critical-section", ] +[[package]] +name = "atomic-time" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3424654267706036b8c23c0abadc4e0412416b9d0208d7ebe1e6978c8c31fec0" +dependencies = [ + "portable-atomic", +] + [[package]] name = "atomic-waker" version = "1.1.2" diff --git a/Cargo.toml b/Cargo.toml index fb766ceb8be6..d9578da75d4e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -224,8 +224,8 @@ parquet = { version = "51", features = ["async"] } parquet_rs = { package = "parquet", version = "51" } # Crates from risingwavelabs -arrow-udf-js = { package = "arrow-udf-js", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } -arrow-udf-wasm = { package = "arrow-udf-wasm", git = "https://github.com/datafuse-extras/arrow-udf", rev = "a8fdfdd" } +arrow-udf-js = { package = "arrow-udf-js", git = "https://github.com/datafuse-extras/arrow-udf", rev = "d0a21f0" } +arrow-udf-wasm = { package = "arrow-udf-wasm", git = "https://github.com/datafuse-extras/arrow-udf", rev = "d0a21f0" } # Serialization prost = { version = "0.12.1" } diff --git a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs index 3e3e533097d1..23d66b62bb74 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs @@ -20,6 +20,8 @@ use arrow_array::RecordBatch; use arrow_schema::Schema; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::converts::arrow::EXTENSION_KEY; +use databend_common_expression::converts::arrow2::ARROW_EXT_TYPE_VARIANT; use databend_common_expression::variant_transform::contains_variant; use databend_common_expression::variant_transform::transform_variant; use databend_common_expression::BlockEntry; @@ -46,7 +48,13 @@ impl ScriptRuntime { pub fn try_create(lang: &str, code: Option>) -> Result { match lang { "javascript" => arrow_udf_js::Runtime::new() - .map(|runtime| ScriptRuntime::JavaScript(Arc::new(RwLock::new(runtime)))) + .map(|mut runtime| { + runtime.converter.set_arrow_extension_key(EXTENSION_KEY); + runtime + .converter + .set_json_extension_name(ARROW_EXT_TYPE_VARIANT); + ScriptRuntime::JavaScript(Arc::new(RwLock::new(runtime))) + }) .map_err(|err| { ErrorCode::UDFDataError(format!("Cannot create js runtime: {}", err)) }), @@ -83,7 +91,10 @@ impl ScriptRuntime { let mut runtime = runtime.write(); runtime.add_function_with_handler( &func.name, - arrow_schema.field(0).data_type().clone(), + // we pass the field instead of the data type because arrow-udf-js + // now takes the field as an argument here so that it can get any + // metadata associated with the field + arrow_schema.field(0).clone(), arrow_udf_js::CallMode::ReturnNullOnNullInput, code, &func.func_name, diff --git a/tests/sqllogictests/suites/base/03_common/03_0013_select_udf.test b/tests/sqllogictests/suites/base/03_common/03_0013_select_udf.test index 76459c6f8374..c65903b52df4 100644 --- a/tests/sqllogictests/suites/base/03_common/03_0013_select_udf.test +++ b/tests/sqllogictests/suites/base/03_common/03_0013_select_udf.test @@ -54,7 +54,6 @@ select number, gcd(number * 3, number * 6) from numbers(5) where number > 0 orde statement ok DROP FUNCTION gcd - statement ok CREATE OR REPLACE FUNCTION check_idcard (String) RETURNS BOOLEAN LANGUAGE javascript HANDLER = 'validateIdCard' AS $$ export function validateIdCard(idCard) { @@ -106,4 +105,85 @@ select check_idcard('360781199308240205'), check_idcard('310110198812071013'); 0 1 statement ok -DROP FUNCTION check_idcard \ No newline at end of file +DROP FUNCTION check_idcard + +## test js udf with variant +statement ok +CREATE FUNCTION variant_udf_test (VARIANT) RETURNS VARIANT LANGUAGE javascript HANDLER = 'transform_variant' AS $$ + export function transform_variant(v) { + v["added_key"] = "yes"; + return v; + } +$$; + +query BC +select variant_udf_test(json_object('some_key', [1, 2])); +---- +{"added_key":"yes","some_key":[1,2]} + +statement ok +DROP FUNCTION variant_udf_test + +statement ok +CREATE OR REPLACE FUNCTION decimal128_add (Decimal(19, 5), Decimal(19, 5)) RETURNS Decimal(19, 5) language javascript HANDLER = 'decimal128_add' AS $$ + export function decimal128_add(a, b) { + return a + b + BigDecimal('0.001'); + } +$$; + +query BD +select decimal128_add(10.1, 10.000000000485); +---- +20.10100 + +statement ok +DROP FUNCTION decimal128_add + +statement ok +CREATE OR REPLACE FUNCTION decimal256_add (Decimal(42, 5), Decimal(42, 5)) RETURNS Decimal(42, 5) language javascript HANDLER = 'decimal256_add' AS $$ + export function decimal256_add(a, b) { + return a + b + BigDecimal('0.001'); + } +$$; + +query BE +select decimal256_add('1000000000000000000000000000000000000.1', '10.0000000004'); +---- +1000000000000000000000000000000000010.10100 + +statement ok +DROP FUNCTION decimal256_add + +statement ok +CREATE OR REPLACE FUNCTION timestamp_add_days (Timestamp, int16) RETURNS Timestamp language javascript HANDLER = 'timestamp_add_days' AS $$ + export function timestamp_add_days(date, days) { + date.setDate(date.getDate() + days); + return date; + } +$$; + +query BF +select timestamp_add_days(to_timestamp('2022-01-02T01:12:00'), 12); +---- +2022-01-14 01:12:00.000000 + + +statement ok +DROP FUNCTION timestamp_add_days + +statement ok +CREATE OR REPLACE FUNCTION date_add_days (Date, int16) RETURNS Date language javascript HANDLER = 'date_add_days' AS $$ + export function date_add_days(date, days) { + date.setDate(date.getDate() + days); + return date; + } +$$; + +query BG +select date_add_days(to_date('2022-01-02T01:12:00'), 12); +---- +2022-01-14 + + +statement ok +DROP FUNCTION date_add_days;