facebookincubator · liujiayi771 · Mar 19, 2024 · Apr 13, 2024 · Apr 16, 2024 · Apr 17, 2024
diff --git a/velox/docs/develop/aggregate-functions.rst b/velox/docs/develop/aggregate-functions.rst
@@ -152,13 +152,33 @@ A simple aggregation function is implemented as a class as the following.
     using IntermediateType = Array<Generic<T1>>;
     using OutputType = Array<Generic<T1>>;
 
+    // Define a struct for function-level states. Even if the aggregation function
+    // doesn't use function-level states, it is still necessary to define an empty
+    // FunctionState struct.
+    struct FunctionState {
+      // Optional.
+      TypePtr resultType;
+    };
+
+    // Optional. Defined only when the aggregation function needs to use function-level states.
+    // This method is called once when the aggregation function is created.
+    static void initialize(
+        core::AggregationNode::Step step,
+        FunctionState& state,
+        const std::vector<TypePtr>& rawInputTypes,
+        const TypePtr& resultType,
+        const std::vector<VectorPtr>& constantInputs,
+        std::optional<core::AggregationNode::Step> companionStep) {
 return (*fn_).initialize(inputTypes, config, values...); 
 FOLLY_ALWAYS_INLINE void initialize( 
 return (*fn_).initialize(inputTypes, config, values...); 
 FOLLY_ALWAYS_INLINE void initialize( 
+      state.resultType = resultType;
+    }
+
     // Optional. Default is true.
     static constexpr bool default_null_behavior_ = false;
 
     // Optional.
     static bool toIntermediate(
-      exec::out_type<Array<Generic<T1>>>& out,
-      exec::optional_arg_type<Generic<T1>> in);
+        exec::out_type<Array<Generic<T1>>>& out,
+        exec::optional_arg_type<Generic<T1>> in);
 
     struct AccumulatorType { ... };
   };
@@ -169,6 +189,15 @@ function's argument type(s) wrapped in a Row<> even if the function only takes
 one argument. This is needed for the SimpleAggregateAdapter to parse input
 types for arbitrary aggregation functions properly.
 
+A FunctionState struct needs to be declared in the simple aggregation function
+class. FunctionState is initialized once when the aggregation function is
+created and used at every row when adding inputs to accumulators or extracting
+values from accumulators. For example, if the aggregation function needs to get
+the result type or the raw input type of the aggregaiton function, the author
+can hold them in the FunctionState struct, and initialize them in the
+initialize() method. If the aggregation function does not require the use of
+FunctionState, it is necessary to declare an empty FunctionState struct.
+
 The author can define an optional flag `default_null_behavior_` indicating
 whether the aggregation function has default-null behavior. This flag is true
 by default. Next, the class can have an optional method `toIntermediate()`
@@ -257,17 +286,21 @@ For aggregaiton functions of default-null behavior, the author defines an
     // Optional. Default is false.
     static constexpr bool is_aligned_ = true;
 
-    explicit AccumulatorType(HashStringAllocator* allocator);
+    explicit AccumulatorType(HashStringAllocator* allocator, const FunctionState& state);
 
-    void addInput(HashStringAllocator* allocator, exec::arg_type<T1> value1, ...);
+    void addInput(
+        HashStringAllocator* allocator,
+        exec::arg_type<T1> value1, ...,
+        const FunctionState& state);
 
     void combine(
         HashStringAllocator* allocator,
-        exec::arg_type<IntermediateType> other);
+        exec::arg_type<IntermediateType> other,
+        const FunctionState& state);
 
-    bool writeIntermediateResult(exec::out_type<IntermediateType>& out);
+    bool writeIntermediateResult(exec::out_type<IntermediateType>& out, const FunctionState& state);
 
-    bool writeFinalResult(exec::out_type<OutputType>& out);
+    bool writeFinalResult(exec::out_type<OutputType>& out, const FunctionState& state);
 
     // Optional. Called during destruction.
     void destroy(HashStringAllocator* allocator);
@@ -296,7 +329,8 @@ addInput
 
 This method adds raw input values to *this* accumulator. It receives a
 `HashStringAllocator*` followed by `exec::arg_type<T1>`-typed values, one for
-each argument type `Ti` wrapped in InputType.
+each argument type `Ti` wrapped in InputType. `const FunctionState&` hold the
+function-level variables.
 
 With default-null behavior, raw-input rows where at least one column is null are
 ignored before `addInput` is called. After `addInput` is called, *this*
@@ -306,31 +340,32 @@ combine
 """""""
 
 This method adds an input intermediate state to *this* accumulator. It receives
-a `HashStringAllocator*` and one `exec::arg_type<IntermediateType>` value. With
-default-null behavior, nulls among the input intermediate states are ignored
-before `combine` is called. After `combine` is called, *this*  accumulator is
-assumed to be non-null.
+a `HashStringAllocator*` and one `exec::arg_type<IntermediateType>` value.
+`const FunctionState&` hold the function-level variables. With default-null
+behavior, nulls among the input intermediate states are ignored before `combine`
+is called. After `combine` is called, *this*  accumulator is assumed to be non-null.
 
 writeIntermediateResult
 """""""""""""""""""""""
 
 This method writes *this* accumulator out to an intermediate state vector. It
-has an out-parameter of the type `exec::out_type<IntermediateType>&`. This
-method returns true if it writes a non-null value to `out`, or returns false
-meaning a null should be written to the intermediate state vector. Accumulators
-that are nulls (i.e., no value has been added to them) automatically become
-nulls in the intermediate state vector without `writeIntermediateResult` being
-called.
+has an out-parameter of the type `exec::out_type<IntermediateType>&`.
+`const FunctionState&` hold the function-level variables. This method returns
+true if it writes a non-null value to `out`, or returns false meaning a null
+should be written to the intermediate state vector. Accumulators that are
+nulls (i.e., no value has been added to them) automatically become nulls in
+the intermediate state vector without `writeIntermediateResult` being called.
 
 writeFinalResult
 """"""""""""""""
 
 This method writes *this* accumulator out to a final result vector. It
-has an out-parameter of the type `exec::out_type<OutputType>&`. This
-method returns true if it writes a non-null value to `out`, or returns false
-meaning a null should be written to the final result vector. Accumulators
-that are nulls (i.e., no value has been added to them) automatically become
-nulls in the final result vector without `writeFinalResult` being called.
+has an out-parameter of the type `exec::out_type<OutputType>&`.
+`const FunctionState&` hold the function-level variables. This method returns
+true if it writes a non-null value to `out`, or returns false meaning a null
+should be written to the final result vector. Accumulators that are
+nulls (i.e., no value has been added to them) automatically become nulls in the
+final result vector without `writeFinalResult` being called.
 
 AccumulatorType of Non-Default-Null Behavior
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -355,15 +390,25 @@ For aggregaiton functions of non-default-null behavior, the author defines an
 
     explicit AccumulatorType(HashStringAllocator* allocator);
 
-    bool addInput(HashStringAllocator* allocator, exec::optional_arg_type<T1> value1, ...);
+    bool addInput(
+        HashStringAllocator* allocator,
+        exec::optional_arg_type<T1> value1, ...,
+        const FunctionState& state);
 
     bool combine(
         HashStringAllocator* allocator,
-        exec::optional_arg_type<IntermediateType> other);
+        exec::optional_arg_type<IntermediateType> other,
+        const FunctionState& state);
 
-    bool writeIntermediateResult(bool nonNullGroup, exec::out_type<IntermediateType>& out);
+    bool writeIntermediateResult(
+        bool nonNullGroup,
+        exec::out_type<IntermediateType>& out,
+        const FunctionState& state);
 
-    bool writeFinalResult(bool nonNullGroup, exec::out_type<OutputType>& out);
+    bool writeFinalResult(
+        bool nonNullGroup,
+        exec::out_type<OutputType>& out,
+        const FunctionState& state);
 
     // Optional.
     void destroy(HashStringAllocator* allocator);
@@ -384,7 +429,7 @@ addInput
 
 This method receives a `HashStringAllocator*` followed by
 `exec::optional_arg_type<T1>` values, one for each argument type `Ti` wrapped
-in InputType.
+in InputType. `const FunctionState&` hold the function-level variables.
 
 This method is called on all raw-input rows even if some columns may be null.
 It returns a boolean meaning whether *this* accumulator is non-null after the
@@ -397,26 +442,29 @@ combine
 """""""
 
 This method receives a `HashStringAllocator*` and an
-`exec::optional_arg_type<IntermediateType>` value. This method is called on
-all intermediate states even if some are nulls. Same as `addInput`, this method
-returns a boolean meaning whether *this* accumulator is non-null after the call.
+`exec::optional_arg_type<IntermediateType>` value. `const FunctionState&` hold
+the function-level variables.This method is called on all intermediate states
+even if some are nulls. Same as `addInput`, this method returns a boolean
+meaning whether *this* accumulator is non-null after the call.
 
 writeIntermediateResult
 """""""""""""""""""""""
 
 This method has an out-parameter of the type `exec::out_type<IntermediateType>&`
 and a boolean flag `nonNullGroup` indicating whether *this* accumulator is
-non-null. This method returns true if it writes a non-null value to `out`, or
-return false meaning a null should be written to the intermediate state vector.
+non-null. `const FunctionState&` hold the function-level variables. This method
+returns true if it writes a non-null value to `out`, or return false meaning a
+null should be written to the intermediate state vector.
 
 writeFinalResult
 """"""""""""""""
 
 This method writes *this* accumulator out to a final result vector. It has an
 out-parameter of the type `exec::out_type<OutputType>&` and a boolean flag
-`nonNullGroup` indicating whether *this* accumulator is non-null. This method
-returns true if it writes a non-null value to `out`, or return false meaning a
-null should be written to the final result vector.
+`nonNullGroup` indicating whether *this* accumulator is non-null.
+`const FunctionState&` hold the function-level variables.This method returns
+true if it writes a non-null value to `out`, or return false meaning a null
+should be written to the final result vector.
 
 Limitations
 ^^^^^^^^^^^

diff --git a/velox/exec/Aggregate.h b/velox/exec/Aggregate.h
@@ -129,6 +129,27 @@ class Aggregate {
         rowSizeOffset);
   }
 
+  // Initialize the function-level state of the simple function interface for
+  // UDAF.
+  // @param step The aggregation step.
+  // @param rawInputType The raw input type of the UDAF.
+  // @param resultType The result type of the current aggregation step.
+  // @param constantInputs Optional constant input values for aggregate
+  // function. constantInputs should be empty if there are no constant inputs,
+  // aligned with inputTypes if there is at least one constant input, with
+  // non-constant inputs represented as nullptr, and must be instances of
+  // ConstantVector.
+  // @param companionStep The step used to register aggregate companion
+  // functions. kPartial for partial companion function, kIntermediate for merge
+  // and merge extract companion function.
+  virtual void initialize(
+      core::AggregationNode::Step step,
+      const std::vector<TypePtr>& rawInputType,
+      const TypePtr& resultType,
+      const std::vector<VectorPtr>& constantInputs,
+      std::optional<core::AggregationNode::Step> companionStep = std::nullopt) {
+  }
+
   // Initializes null flags and accumulators for newly encountered groups.  This
   // function should be called only once for each group.
   //

diff --git a/velox/exec/AggregateCompanionAdapter.cpp b/velox/exec/AggregateCompanionAdapter.cpp
@@ -124,13 +124,41 @@ void AggregateCompanionFunctionBase::extractAccumulators(
   fn_->extractAccumulators(groups, numGroups, result);
 }
 
+void AggregateCompanionAdapter::PartialFunction::initialize(
+    core::AggregationNode::Step step,
+    const std::vector<TypePtr>& rawInputType,
+    const facebook::velox::TypePtr& resultType,
+    const std::vector<VectorPtr>& constantInputs,
+    std::optional<core::AggregationNode::Step> /*companionStep*/) {
+  fn_->initialize(
+      step,
+      rawInputType,
+      resultType,
+      constantInputs,
+      core::AggregationNode::Step::kPartial);
+}
+
 void AggregateCompanionAdapter::PartialFunction::extractValues(
     char** groups,
     int32_t numGroups,
     VectorPtr* result) {
   fn_->extractAccumulators(groups, numGroups, result);
 }
 
+void AggregateCompanionAdapter::MergeFunction::initialize(
+    core::AggregationNode::Step step,
+    const std::vector<TypePtr>& rawInputType,
+    const facebook::velox::TypePtr& resultType,
+    const std::vector<VectorPtr>& constantInputs,
+    std::optional<core::AggregationNode::Step> /*companionStep*/) {
+  fn_->initialize(
+      step,
+      rawInputType,
+      resultType,
+      constantInputs,
+      core::AggregationNode::Step::kIntermediate);
+}
+
 void AggregateCompanionAdapter::MergeFunction::addRawInput(
     char** groups,
     const SelectivityVector& rows,
@@ -156,6 +184,20 @@ void AggregateCompanionAdapter::MergeFunction::extractValues(
   fn_->extractAccumulators(groups, numGroups, result);
 }
 
+void AggregateCompanionAdapter::MergeExtractFunction::initialize(
+    core::AggregationNode::Step step,
+    const std::vector<TypePtr>& rawInputType,
+    const facebook::velox::TypePtr& resultType,
+    const std::vector<VectorPtr>& constantInputs,
+    std::optional<core::AggregationNode::Step> /*companionStep*/) {
+  fn_->initialize(
+      step,
+      rawInputType,
+      resultType,
+      constantInputs,
+      core::AggregationNode::Step::kFinal);
+}
+
 void AggregateCompanionAdapter::MergeExtractFunction::extractValues(
     char** groups,
     int32_t numGroups,
@@ -229,6 +271,25 @@ void AggregateCompanionAdapter::ExtractFunction::apply(
   // Perform per-row aggregation.
   std::vector<vector_size_t> allSelectedRange;
   rows.applyToSelected([&](auto row) { allSelectedRange.push_back(row); });
+
+  // Get the raw input types.
+  std::vector<TypePtr> rawInputTypes{args.size()};
+  std::vector<VectorPtr> constantInputs{args.size()};
+  for (auto i = 0; i < args.size(); i++) {
+    rawInputTypes[i] = args[i]->type();
+    if (args[i]->isConstantEncoding()) {
+      constantInputs[i] = args[i];
+    } else {
+      constantInputs[i] = nullptr;
+    }
+  }
+
+  fn_->initialize(
+      core::AggregationNode::Step::kFinal,
+      rawInputTypes,
+      outputType,
+      constantInputs,
+      core::AggregationNode::Step::kFinal);
   fn_->initializeNewGroups(groups, allSelectedRange);
   fn_->enableValidateIntermediateInputs();
   fn_->addIntermediateResults(groups, rows, args, false);

diff --git a/velox/exec/AggregateCompanionAdapter.h b/velox/exec/AggregateCompanionAdapter.h
@@ -99,6 +99,13 @@ struct AggregateCompanionAdapter {
         const TypePtr& resultType)
         : AggregateCompanionFunctionBase{std::move(fn), resultType} {}
 
+    void initialize(
+        core::AggregationNode::Step step,
+        const std::vector<TypePtr>& rawInputType,
+        const TypePtr& resultType,
+        const std::vector<VectorPtr>& constantInputs,
+        std::optional<core::AggregationNode::Step> companionStep) override;
+
     void extractValues(char** groups, int32_t numGroups, VectorPtr* result)
         override;
   };
@@ -110,6 +117,13 @@ struct AggregateCompanionAdapter {
         const TypePtr& resultType)
         : AggregateCompanionFunctionBase{std::move(fn), resultType} {}
 
+    void initialize(
+        core::AggregationNode::Step step,
+        const std::vector<TypePtr>& rawInputType,
+        const TypePtr& resultType,
+        const std::vector<VectorPtr>& constantInputs,
+        std::optional<core::AggregationNode::Step> companionStep) override;
+
     void addRawInput(
         char** groups,
         const SelectivityVector& rows,
@@ -133,6 +147,13 @@ struct AggregateCompanionAdapter {
         const TypePtr& resultType)
         : MergeFunction{std::move(fn), resultType} {}
 
+    void initialize(
+        core::AggregationNode::Step step,
+        const std::vector<TypePtr>& rawInputType,
+        const TypePtr& resultType,
+        const std::vector<VectorPtr>& constantInputs,
+        std::optional<core::AggregationNode::Step> companionStep) override;
+
     void extractValues(char** groups, int32_t numGroups, VectorPtr* result)
         override;
   };

diff --git a/velox/exec/AggregateInfo.cpp b/velox/exec/AggregateInfo.cpp
@@ -103,6 +103,8 @@ std::vector<AggregateInfo> toAggregateInfo(
         aggResultType,
         operatorCtx.driverCtx()->queryConfig());
 
+    info.function->initialize(
+        step, aggregate.rawInputTypes, aggResultType, info.constantInputs);
     auto lambdas = extractLambdaInputs(aggregate);
     if (!lambdas.empty()) {
       if (expressionEvaluator == nullptr) {