From ca6d146960f6c24aa0b1675fba1832a4e19940bd Mon Sep 17 00:00:00 2001 From: Xin Li <33629085+xinlifoobar@users.noreply.github.com> Date: Sat, 6 Jul 2024 04:55:55 -0700 Subject: [PATCH] Implement user defined planner for position (#11243) * Implement user defined planner for position * Fix format * Move planner to session_state * Extract function --- .../core/src/execution/session_state.rs | 2 + datafusion/expr/src/planner.rs | 6 +++ datafusion/functions/src/unicode/mod.rs | 1 + datafusion/functions/src/unicode/planner.rs | 36 +++++++++++++++ datafusion/sql/src/expr/mod.rs | 44 +++++++++++-------- 5 files changed, 70 insertions(+), 19 deletions(-) create mode 100644 datafusion/functions/src/unicode/planner.rs diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index a831f92def50..ffaaa2df5e7e 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -240,6 +240,8 @@ impl SessionState { Arc::new(functions_array::planner::FieldAccessPlanner), #[cfg(feature = "datetime_expressions")] Arc::new(functions::datetime::planner::ExtractPlanner), + #[cfg(feature = "unicode_expressions")] + Arc::new(functions::unicode::planner::PositionPlanner), ]; let mut new_self = SessionState { diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index bba0228ae0aa..bcbf5eb203ac 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -116,6 +116,12 @@ pub trait UserDefinedSQLPlanner: Send + Sync { Ok(PlannerResult::Original(exprs)) } + // Plan the POSITION expression, e.g., POSITION( in ) + // returns origin expression arguments if not possible + fn plan_position(&self, args: Vec) -> Result>> { + Ok(PlannerResult::Original(args)) + } + /// Plan the dictionary literal `{ key: value, ...}` /// /// Returns origin expression arguments if not possible diff --git a/datafusion/functions/src/unicode/mod.rs b/datafusion/functions/src/unicode/mod.rs index 9e8c07cd36ed..a391b8ba11dc 100644 --- a/datafusion/functions/src/unicode/mod.rs +++ b/datafusion/functions/src/unicode/mod.rs @@ -25,6 +25,7 @@ pub mod character_length; pub mod find_in_set; pub mod left; pub mod lpad; +pub mod planner; pub mod reverse; pub mod right; pub mod rpad; diff --git a/datafusion/functions/src/unicode/planner.rs b/datafusion/functions/src/unicode/planner.rs new file mode 100644 index 000000000000..4d6f73321b4a --- /dev/null +++ b/datafusion/functions/src/unicode/planner.rs @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! SQL planning extensions like [`PositionPlanner`] + +use datafusion_common::Result; +use datafusion_expr::{ + expr::ScalarFunction, + planner::{PlannerResult, UserDefinedSQLPlanner}, + Expr, +}; + +#[derive(Default)] +pub struct PositionPlanner; + +impl UserDefinedSQLPlanner for PositionPlanner { + fn plan_position(&self, args: Vec) -> Result>> { + Ok(PlannerResult::Planned(Expr::ScalarFunction( + ScalarFunction::new_udf(crate::unicode::strpos(), args), + ))) + } +} diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 2ddd2d22c022..6295821fa944 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -629,6 +629,31 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } + fn sql_position_to_expr( + &self, + substr_expr: SQLExpr, + str_expr: SQLExpr, + schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + let substr = + self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?; + let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?; + let mut extract_args = vec![fullstr, substr]; + for planner in self.planners.iter() { + match planner.plan_position(extract_args)? { + PlannerResult::Planned(expr) => return Ok(expr), + PlannerResult::Original(args) => { + extract_args = args; + } + } + } + + not_impl_err!( + "Position not supported by UserDefinedExtensionPlanners: {extract_args:?}" + ) + } + fn try_plan_dictionary_literal( &self, fields: Vec, @@ -924,25 +949,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }; Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args))) } - fn sql_position_to_expr( - &self, - substr_expr: SQLExpr, - str_expr: SQLExpr, - schema: &DFSchema, - planner_context: &mut PlannerContext, - ) -> Result { - let fun = self - .context_provider - .get_function_meta("strpos") - .ok_or_else(|| { - internal_datafusion_err!("Unable to find expected 'strpos' function") - })?; - let substr = - self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?; - let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?; - let args = vec![fullstr, substr]; - Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args))) - } } #[cfg(test)]