diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index f41d264d35c0..7003f5ac7f38 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! This module provides common traits for visiting or rewriting tree -//! data structures easily. +//! [`TreeNode`] for visiting and rewriting expression and plan trees use std::sync::Arc; @@ -31,22 +30,83 @@ macro_rules! handle_transform_recursion { }}; } -/// Defines a visitable and rewriteable tree node. This trait is implemented -/// for plans ([`ExecutionPlan`] and [`LogicalPlan`]) as well as expression -/// trees ([`PhysicalExpr`], [`Expr`]) in DataFusion. +/// API for inspecting and rewriting tree data structures. +/// +/// The `TreeNode` API is used to express algorithms separately from traversing +/// the structure of `TreeNode`s, avoiding substantial code duplication. +/// +/// This trait is implemented for plans ([`ExecutionPlan`], [`LogicalPlan`]) and +/// expression trees ([`PhysicalExpr`], [`Expr`]) as well as Plan+Payload +/// combinations [`PlanContext`] and [`ExprContext`]. +/// +/// # Overview +/// There are three categories of TreeNode APIs: +/// +/// 1. "Inspecting" APIs to traverse a tree of `&TreeNodes`: +/// [`apply`], [`visit`], [`exists`]. +/// +/// 2. "Transforming" APIs that traverse and consume a tree of `TreeNode`s +/// producing possibly changed `TreeNode`s: [`transform`], [`transform_up`], +/// [`transform_down`], [`transform_down_up`], and [`rewrite`]. +/// +/// 3. Internal APIs used to implement the `TreeNode` API: [`apply_children`], +/// and [`map_children`]. +/// +/// | Traversal Order | Inspecting | Transforming | +/// | --- | --- | --- | +/// | top-down | [`apply`], [`exists`] | [`transform_down`]| +/// | bottom-up | | [`transform`] , [`transform_up`]| +/// | combined with separate `f_down` and `f_up` closures | | [`transform_down_up`] | +/// | combined with `f_down()` and `f_up()` in an object | [`visit`] | [`rewrite`] | +/// +/// **Note**:while there is currently no in-place mutation API that uses `&mut +/// TreeNode`, the transforming APIs are efficient and optimized to avoid +/// cloning. +/// +/// [`apply`]: Self::apply +/// [`visit`]: Self::visit +/// [`exists`]: Self::exists +/// [`transform`]: Self::transform +/// [`transform_up`]: Self::transform_up +/// [`transform_down`]: Self::transform_down +/// [`transform_down_up`]: Self::transform_down_up +/// [`rewrite`]: Self::rewrite +/// [`apply_children`]: Self::apply_children +/// [`map_children`]: Self::map_children +/// +/// # Terminology +/// The following terms are used in this trait +/// +/// * `f_down`: Invoked before any children of the current node are visited. +/// * `f_up`: Invoked after all children of the current node are visited. +/// * `f`: closure that is applied to the current node. +/// * `map_*`: applies a transformation to rewrite owned nodes +/// * `apply_*`: invokes a function on borrowed nodes +/// * `transform_`: applies a transformation to rewrite owned nodes /// /// /// [`ExecutionPlan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html /// [`PhysicalExpr`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.PhysicalExpr.html /// [`LogicalPlan`]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html /// [`Expr`]: https://docs.rs/datafusion-expr/latest/datafusion_expr/expr/enum.Expr.html +/// [`PlanContext`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/tree_node/struct.PlanContext.html +/// [`ExprContext`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/tree_node/struct.ExprContext.html pub trait TreeNode: Sized { - /// Visit the tree node using the given [`TreeNodeVisitor`], performing a + /// Visit the tree node with a [`TreeNodeVisitor`], performing a /// depth-first walk of the node and its children. /// - /// See also: - /// * [`Self::rewrite`] to rewrite owned `TreeNode`s + /// [`TreeNodeVisitor::f_down()`] is called in top-down order (before + /// children are visited), [`TreeNodeVisitor::f_up()`] is called in + /// bottom-up order (after children are visited). /// + /// # Return Value + /// Specifies how the tree walk ended. See [`TreeNodeRecursion`] for details. + /// + /// # See Also: + /// * [`Self::apply`] for inspecting nodes with a closure + /// * [`Self::rewrite`] to rewrite owned `TreeNode`s + /// + /// # Example /// Consider the following tree structure: /// ```text /// ParentNode @@ -63,14 +123,6 @@ pub trait TreeNode: Sized { /// TreeNodeVisitor::f_up(ChildNode2) /// TreeNodeVisitor::f_up(ParentNode) /// ``` - /// - /// See [`TreeNodeRecursion`] for more details on controlling the traversal. - /// - /// If [`TreeNodeVisitor::f_down()`] or [`TreeNodeVisitor::f_up()`] returns [`Err`], - /// the recursion stops immediately. - /// - /// If using the default [`TreeNodeVisitor::f_up`] that does nothing, consider using - /// [`Self::apply`]. fn visit>( &self, visitor: &mut V, @@ -81,12 +133,29 @@ pub trait TreeNode: Sized { .visit_parent(|| visitor.f_up(self)) } - /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for - /// recursively transforming [`TreeNode`]s. + /// Rewrite the tree node with a [`TreeNodeRewriter`], performing a + /// depth-first walk of the node and its children. + /// + /// [`TreeNodeRewriter::f_down()`] is called in top-down order (before + /// children are visited), [`TreeNodeRewriter::f_up()`] is called in + /// bottom-up order (after children are visited). + /// + /// Note: If using the default [`TreeNodeRewriter::f_up`] or + /// [`TreeNodeRewriter::f_down`] that do nothing, consider using + /// [`Self::transform_down`] instead. + /// + /// # Return Value + /// The returns value specifies how the tree walk should proceed. See + /// [`TreeNodeRecursion`] for details. If an [`Err`] is returned, the + /// recursion stops immediately. /// - /// See also: - /// * [`Self::visit`] for inspecting (without modification) `TreeNode`s + /// # See Also + /// * [`Self::visit`] for inspecting (without modification) `TreeNode`s + /// * [Self::transform_down_up] for a top-down (pre-order) traversal. + /// * [Self::transform_down] for a top-down (pre-order) traversal. + /// * [`Self::transform_up`] for a bottom-up (post-order) traversal. /// + /// # Example /// Consider the following tree structure: /// ```text /// ParentNode @@ -103,11 +172,6 @@ pub trait TreeNode: Sized { /// TreeNodeRewriter::f_up(ChildNode2) /// TreeNodeRewriter::f_up(ParentNode) /// ``` - /// - /// See [`TreeNodeRecursion`] for more details on controlling the traversal. - /// - /// If [`TreeNodeVisitor::f_down()`] or [`TreeNodeVisitor::f_up()`] returns [`Err`], - /// the recursion stops immediately. fn rewrite>( self, rewriter: &mut R, @@ -117,12 +181,15 @@ pub trait TreeNode: Sized { }) } - /// Applies `f` to the node and its children. `f` is applied in a pre-order - /// way, and it is controlled by [`TreeNodeRecursion`], which means result - /// of the `f` on a node can cause an early return. + /// Applies `f` to the node then each of its children, recursively (a + /// top-down, pre-order traversal). + /// + /// The return [`TreeNodeRecursion`] controls the recursion and can cause + /// an early return. /// - /// The `f` closure can be used to collect some information from tree nodes - /// or run a check on the tree. + /// # See Also + /// * [`Self::transform_down`] for the equivalent transformation API. + /// * [`Self::visit`] for both top-down and bottom up traversal. fn apply Result>( &self, mut f: F, @@ -137,9 +204,10 @@ pub trait TreeNode: Sized { apply_impl(self, &mut f) } - /// Convenience utility for writing optimizer rules: Recursively apply the - /// given function `f` to the tree in a bottom-up (post-order) fashion. When - /// `f` does not apply to a given node, it is left unchanged. + /// Recursively rewrite the node's children and then the node using `f` + /// (a bottom-up post-order traversal). + /// + /// A synonym of [`Self::transform_up`]. fn transform Result>>( self, f: F, @@ -147,9 +215,15 @@ pub trait TreeNode: Sized { self.transform_up(f) } - /// Convenience utility for writing optimizer rules: Recursively apply the - /// given function `f` to a node and then to its children (pre-order traversal). - /// When `f` does not apply to a given node, it is left unchanged. + /// Recursively rewrite the tree using `f` in a top-down (pre-order) + /// fashion. + /// + /// `f` is applied to the node first, and then its children. + /// + /// # See Also + /// * [`Self::transform_up`] for a bottom-up (post-order) traversal. + /// * [Self::transform_down_up] for a combined traversal with closures + /// * [`Self::rewrite`] for a combined traversal with a visitor fn transform_down Result>>( self, mut f: F, @@ -164,9 +238,7 @@ pub trait TreeNode: Sized { transform_down_impl(self, &mut f) } - /// Convenience utility for writing optimizer rules: Recursively apply the - /// given mutable function `f` to a node and then to its children (pre-order - /// traversal). When `f` does not apply to a given node, it is left unchanged. + /// Same as [`Self::transform_down`] but with a mutable closure. #[deprecated(since = "38.0.0", note = "Use `transform_down` instead")] fn transform_down_mut Result>>( self, @@ -175,10 +247,15 @@ pub trait TreeNode: Sized { self.transform_down(f) } - /// Convenience utility for writing optimizer rules: Recursively apply the - /// given function `f` to all children of a node, and then to the node itself - /// (post-order traversal). When `f` does not apply to a given node, it is - /// left unchanged. + /// Recursively rewrite the node using `f` in a bottom-up (post-order) + /// fashion. + /// + /// `f` is applied to the node's children first, and then to the node itself. + /// + /// # See Also + /// * [`Self::transform_down`] top-down (pre-order) traversal. + /// * [Self::transform_down_up] for a combined traversal with closures + /// * [`Self::rewrite`] for a combined traversal with a visitor fn transform_up Result>>( self, mut f: F, @@ -194,10 +271,7 @@ pub trait TreeNode: Sized { transform_up_impl(self, &mut f) } - /// Convenience utility for writing optimizer rules: Recursively apply the - /// given mutable function `f` to all children of a node, and then to the - /// node itself (post-order traversal). When `f` does not apply to a given - /// node, it is left unchanged. + /// Same as [`Self::transform_up`] but with a mutable closure. #[deprecated(since = "38.0.0", note = "Use `transform_up` instead")] fn transform_up_mut Result>>( self, @@ -206,15 +280,21 @@ pub trait TreeNode: Sized { self.transform_up(f) } - /// Transforms the tree using `f_down` while traversing the tree top-down + /// Transforms the node using `f_down` while traversing the tree top-down /// (pre-order), and using `f_up` while traversing the tree bottom-up /// (post-order). /// - /// Use this method if you want to start the `f_up` process right where `f_down` jumps. - /// This can make the whole process faster by reducing the number of `f_up` steps. - /// If you don't need this, it's just like using `transform_down` followed by - /// `transform_up` on the same tree. + /// The method behaves the same as calling [`Self::transform_down`] followed + /// by [`Self::transform_up`] on the same node. Use this method if you want + /// to start the `f_up` process right where `f_down` jumps. This can make + /// the whole process faster by reducing the number of `f_up` steps. /// + /// # See Also + /// * [`Self::transform_up`] for a bottom-up (post-order) traversal. + /// * [Self::transform_down] for a top-down (pre-order) traversal. + /// * [`Self::rewrite`] for a combined traversal with a visitor + /// + /// # Example /// Consider the following tree structure: /// ```text /// ParentNode @@ -322,7 +402,7 @@ pub trait TreeNode: Sized { transform_down_up_impl(self, &mut f_down, &mut f_up) } - /// Returns true if `f` returns true for node in the tree. + /// Returns true if `f` returns true for any node in the tree. /// /// Stops recursion as soon as a matching node is found fn exists bool>(&self, mut f: F) -> bool { @@ -339,50 +419,91 @@ pub trait TreeNode: Sized { found } - /// Apply the closure `F` to the node's children. + /// Low-level API used to implement other APIs. + /// + /// If you want to implement the [`TreeNode`] trait for your own type, you + /// should implement this method and [`Self::map_children`]. /// - /// See `mutate_children` for rewriting in place + /// Users should use one of the higher level APIs described on [`Self`]. + /// + /// Description: Apply `f` to inspect node's children (but not the node + /// itself). fn apply_children Result>( &self, f: F, ) -> Result; - /// Apply transform `F` to potentially rewrite the node's children. Note - /// that the transform `F` might have a direction (pre-order or post-order). + /// Low-level API used to implement other APIs. + /// + /// If you want to implement the [`TreeNode`] trait for your own type, you + /// should implement this method and [`Self::apply_children`]. + /// + /// Users should use one of the higher level APIs described on [`Self`]. + /// + /// Description: Apply `f` to rewrite the node's children (but not the node itself). fn map_children Result>>( self, f: F, ) -> Result>; } -/// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) -/// for recursively walking [`TreeNode`]s. +/// A [Visitor](https://en.wikipedia.org/wiki/Visitor_pattern) for recursively +/// inspecting [`TreeNode`]s via [`TreeNode::visit`]. /// -/// A [`TreeNodeVisitor`] allows one to express algorithms separately from the -/// code traversing the structure of the `TreeNode` tree, making it easier to -/// add new types of tree nodes and algorithms. +/// See [`TreeNode`] for more details on available APIs /// /// When passed to [`TreeNode::visit`], [`TreeNodeVisitor::f_down`] and /// [`TreeNodeVisitor::f_up`] are invoked recursively on the tree. /// See [`TreeNodeRecursion`] for more details on controlling the traversal. +/// +/// # Return Value +/// The returns value of `f_up` and `f_down` specifies how the tree walk should +/// proceed. See [`TreeNodeRecursion`] for details. If an [`Err`] is returned, +/// the recursion stops immediately. +/// +/// Note: If using the default implementations of [`TreeNodeVisitor::f_up`] or +/// [`TreeNodeVisitor::f_down`] that do nothing, consider using +/// [`TreeNode::apply`] instead. +/// +/// # See Also: +/// * [`TreeNode::rewrite`] to rewrite owned `TreeNode`s pub trait TreeNodeVisitor: Sized { /// The node type which is visitable. type Node: TreeNode; - /// Invoked before any children of `node` are visited. - /// Default implementation simply continues the recursion. + /// Invoked while traversing down the tree, before any children are visited. + /// Default implementation continues the recursion. fn f_down(&mut self, _node: &Self::Node) -> Result { Ok(TreeNodeRecursion::Continue) } - /// Invoked after all children of `node` are visited. - /// Default implementation simply continues the recursion. + /// Invoked while traversing up the tree after children are visited. Default + /// implementation continues the recursion. fn f_up(&mut self, _node: &Self::Node) -> Result { Ok(TreeNodeRecursion::Continue) } } -/// Trait for potentially recursively transforming a tree of [`TreeNode`]s. +/// A [Visitor](https://en.wikipedia.org/wiki/Visitor_pattern) for recursively +/// rewriting [`TreeNode`]s via [`TreeNode::rewrite`]. +/// +/// See [`TreeNode`] for more details on available APIs +/// +/// When passed to [`TreeNode::rewrite`], [`TreeNodeRewriter::f_down`] and +/// [`TreeNodeRewriter::f_up`] are invoked recursively on the tree. +/// See [`TreeNodeRecursion`] for more details on controlling the traversal. +/// +/// # Return Value +/// The returns value of `f_up` and `f_down` specifies how the tree walk should +/// proceed. See [`TreeNodeRecursion`] for details. If an [`Err`] is returned, +/// the recursion stops immediately. +/// +/// Note: If using the default implementations of [`TreeNodeRewriter::f_up`] or +/// [`TreeNodeRewriter::f_down`] that do nothing, consider using +/// [`TreeNode::transform_up`] or [`TreeNode::transform_down`] instead. +/// +/// # See Also: +/// * [`TreeNode::visit`] to inspect borrowed `TreeNode`s pub trait TreeNodeRewriter: Sized { /// The node type which is rewritable. type Node: TreeNode; @@ -460,24 +581,24 @@ impl TreeNodeRecursion { } } -/// This struct is used by tree transformation APIs such as -/// - [`TreeNode::rewrite`], -/// - [`TreeNode::transform_down`], -/// - [`TreeNode::transform_up`], -/// - [`TreeNode::transform_down_up`] +/// Result of tree walk / transformation APIs /// -/// to control the transformation and return the transformed result. -/// -/// Specifically, API users can provide transformation closures or [`TreeNodeRewriter`] -/// implementations to control the transformation by returning: +/// API users control the transformation by returning: /// - The resulting (possibly transformed) node, -/// - A flag indicating whether any change was made to the node, and -/// - A flag specifying how to proceed with the recursion. +/// - `transformed`: flag indicating whether any change was made to the node +/// - `tnr`: [`TreeNodeRecursion`] specifying how to proceed with the recursion. /// /// At the end of the transformation, the return value will contain: /// - The final (possibly transformed) tree, -/// - A flag indicating whether any change was made to the tree, and -/// - A flag specifying how the recursion ended. +/// - `transformed`: flag indicating whether any change was made to the node +/// - `tnr`: [`TreeNodeRecursion`] specifying how the recursion ended. +/// +/// Example APIs: +/// - [`TreeNode`], +/// - [`TreeNode::rewrite`], +/// - [`TreeNode::transform_down`], +/// - [`TreeNode::transform_up`], +/// - [`TreeNode::transform_down_up`] #[derive(PartialEq, Debug)] pub struct Transformed { pub data: T, @@ -659,6 +780,7 @@ impl TreeNodeIterator for I { /// Transformation helper to process a heterogeneous sequence of tree node containing /// expressions. +/// /// This macro is very similar to [TreeNodeIterator::map_until_stop_and_collect] to /// process nodes that are siblings, but it accepts an initial transformation (`F0`) and /// a sequence of pairs. Each pair is made of an expression (`EXPR`) and its diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 97f5e22287ea..6df5516b1bba 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -65,7 +65,7 @@ pub use datafusion_common::{JoinConstraint, JoinType}; /// from leaves up to the root to produce the query result. /// /// # See also: -/// * [`tree_node`]: visiting and rewriting API +/// * [`tree_node`]: To inspect and rewrite `LogicalPlan` trees /// /// [`tree_node`]: crate::logical_plan::tree_node #[derive(Clone, PartialEq, Eq, Hash)]