diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index fbe7d5c04b9b..2d7ff2af89ba 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1345,6 +1345,7 @@ dependencies = [
  "datafusion-functions-aggregate-common",
  "datafusion-functions-window-common",
  "datafusion-physical-expr-common",
+ "indexmap",
  "paste",
  "serde_json",
  "sqlparser",
@@ -1402,7 +1403,6 @@ dependencies = [
  "half",
  "log",
  "paste",
- "sqlparser",
 ]
 
 [[package]]
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 0dec14e9178a..69cdf866cf98 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -226,7 +226,12 @@ impl DFSchema {
 
         for (field, qualifier) in self.inner.fields().iter().zip(&self.field_qualifiers) {
             if let Some(qualifier) = qualifier {
-                qualified_names.insert((qualifier, field.name()));
+                if !qualified_names.insert((qualifier, field.name())) {
+                    return _schema_err!(SchemaError::DuplicateQualifiedField {
+                        qualifier: Box::new(qualifier.clone()),
+                        name: field.name().to_string(),
+                    });
+                }
             } else if !unqualified_names.insert(field.name()) {
                 return _schema_err!(SchemaError::DuplicateUnqualifiedField {
                     name: field.name().to_string()
@@ -1165,7 +1170,10 @@ mod tests {
         let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
         let right = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
         let join = left.join(&right);
-        assert!(join.err().is_none());
+        assert_eq!(
+            join.unwrap_err().strip_backtrace(),
+            "Schema error: Schema contains duplicate qualified field name t1.c0",
+        );
         Ok(())
     }
 
diff --git a/datafusion/core/src/catalog_common/mod.rs b/datafusion/core/src/catalog_common/mod.rs
index b8414378862e..85207845a005 100644
--- a/datafusion/core/src/catalog_common/mod.rs
+++ b/datafusion/core/src/catalog_common/mod.rs
@@ -185,9 +185,7 @@ pub fn resolve_table_references(
                 let _ = s.as_ref().visit(visitor);
             }
             DFStatement::CreateExternalTable(table) => {
-                visitor
-                    .relations
-                    .insert(ObjectName(vec![Ident::from(table.name.as_str())]));
+                visitor.relations.insert(table.name.clone());
             }
             DFStatement::CopyTo(CopyToStatement { source, .. }) => match source {
                 CopyToSource::Relation(table_name) => {
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 70c507511453..f5867881da13 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -3380,52 +3380,6 @@ mod tests {
         Ok(())
     }
 
-    // Table 't1' self join
-    // Supplementary test of issue: https://github.com/apache/datafusion/issues/7790
-    #[tokio::test]
-    async fn with_column_self_join() -> Result<()> {
-        let df = test_table().await?.select_columns(&["c1"])?;
-        let ctx = SessionContext::new();
-
-        ctx.register_table("t1", df.into_view())?;
-
-        let df = ctx
-            .table("t1")
-            .await?
-            .join(
-                ctx.table("t1").await?,
-                JoinType::Inner,
-                &["c1"],
-                &["c1"],
-                None,
-            )?
-            .sort(vec![
-                // make the test deterministic
-                col("t1.c1").sort(true, true),
-            ])?
-            .limit(0, Some(1))?;
-
-        let df_results = df.clone().collect().await?;
-        assert_batches_sorted_eq!(
-            [
-                "+----+----+",
-                "| c1 | c1 |",
-                "+----+----+",
-                "| a  | a  |",
-                "+----+----+",
-            ],
-            &df_results
-        );
-
-        let actual_err = df.clone().with_column("new_column", lit(true)).unwrap_err();
-        let expected_err = "Error during planning: Projections require unique expression names \
-            but the expression \"t1.c1\" at position 0 and \"t1.c1\" at position 1 have the same name. \
-            Consider aliasing (\"AS\") one of them.";
-        assert_eq!(actual_err.strip_backtrace(), expected_err);
-
-        Ok(())
-    }
-
     #[tokio::test]
     async fn with_column_renamed() -> Result<()> {
         let df = test_table()
diff --git a/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
index 3a5d50bba07f..98b6702bc383 100644
--- a/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
+++ b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
@@ -573,7 +573,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
                 // extract list values, with non-lists converted to Value::Null
                 let array_item_count = rows
                     .iter()
-                    .map(|row| match row {
+                    .map(|row| match maybe_resolve_union(row) {
                         Value::Array(values) => values.len(),
                         _ => 1,
                     })
@@ -1643,6 +1643,93 @@ mod test {
         assert_batches_eq!(expected, &[batch]);
     }
 
+    #[test]
+    fn test_avro_nullable_struct_array() {
+        let schema = apache_avro::Schema::parse_str(
+            r#"
+            {
+              "type": "record",
+              "name": "r1",
+              "fields": [
+                {
+                  "name": "col1",
+                  "type": [
+                    "null",
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": [
+                                "null",
+                                {
+                                    "type": "record",
+                                    "name": "Item",
+                                    "fields": [
+                                        {
+                                            "name": "id",
+                                            "type": "long"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                  ],
+                  "default": null
+                }
+              ]
+            }"#,
+        )
+        .unwrap();
+        let jv1 = serde_json::json!({
+            "col1": [
+                {
+                    "id": 234
+                },
+                {
+                    "id": 345
+                }
+            ]
+        });
+        let r1 = apache_avro::to_value(jv1)
+            .unwrap()
+            .resolve(&schema)
+            .unwrap();
+        let r2 = apache_avro::to_value(serde_json::json!({ "col1": null }))
+            .unwrap()
+            .resolve(&schema)
+            .unwrap();
+
+        let mut w = apache_avro::Writer::new(&schema, vec![]);
+        for _i in 0..5 {
+            w.append(r1.clone()).unwrap();
+        }
+        w.append(r2).unwrap();
+        let bytes = w.into_inner().unwrap();
+
+        let mut reader = ReaderBuilder::new()
+            .read_schema()
+            .with_batch_size(20)
+            .build(std::io::Cursor::new(bytes))
+            .unwrap();
+        let batch = reader.next().unwrap().unwrap();
+        assert_eq!(batch.num_rows(), 6);
+        assert_eq!(batch.num_columns(), 1);
+
+        let expected = [
+            "+------------------------+",
+            "| col1                   |",
+            "+------------------------+",
+            "| [{id: 234}, {id: 345}] |",
+            "| [{id: 234}, {id: 345}] |",
+            "| [{id: 234}, {id: 345}] |",
+            "| [{id: 234}, {id: 345}] |",
+            "| [{id: 234}, {id: 345}] |",
+            "|                        |",
+            "+------------------------+",
+        ];
+        assert_batches_eq!(expected, &[batch]);
+    }
+
     #[test]
     fn test_avro_iterator() {
         let reader = build_reader("alltypes_plain.avro", 5);
diff --git a/datafusion/core/src/datasource/dynamic_file.rs b/datafusion/core/src/datasource/dynamic_file.rs
index 3c409af29703..6654d0871c3f 100644
--- a/datafusion/core/src/datasource/dynamic_file.rs
+++ b/datafusion/core/src/datasource/dynamic_file.rs
@@ -69,11 +69,18 @@ impl UrlTableFactory for DynamicListTableFactory {
             .ok_or_else(|| plan_datafusion_err!("get current SessionStore error"))?;
 
         match ListingTableConfig::new(table_url.clone())
-            .infer(state)
+            .infer_options(state)
             .await
         {
-            Ok(cfg) => ListingTable::try_new(cfg)
-                .map(|table| Some(Arc::new(table) as Arc<dyn TableProvider>)),
+            Ok(cfg) => {
+                let cfg = cfg
+                    .infer_partitions_from_path(state)
+                    .await?
+                    .infer_schema(state)
+                    .await?;
+                ListingTable::try_new(cfg)
+                    .map(|table| Some(Arc::new(table) as Arc<dyn TableProvider>))
+            }
             Err(_) => Ok(None),
         }
     }
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 3eb8eed9de36..a9c6aec17537 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -33,7 +33,7 @@ use crate::datasource::{
 };
 use crate::execution::context::SessionState;
 use datafusion_catalog::TableProvider;
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::{config_err, DataFusionError, Result};
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::{utils::conjunction, Expr, TableProviderFilterPushDown};
 use datafusion_expr::{SortExpr, TableType};
@@ -192,6 +192,38 @@ impl ListingTableConfig {
     pub async fn infer(self, state: &SessionState) -> Result<Self> {
         self.infer_options(state).await?.infer_schema(state).await
     }
+
+    /// Infer the partition columns from the path. Requires `self.options` to be set prior to using.
+    pub async fn infer_partitions_from_path(self, state: &SessionState) -> Result<Self> {
+        match self.options {
+            Some(options) => {
+                let Some(url) = self.table_paths.first() else {
+                    return config_err!("No table path found");
+                };
+                let partitions = options
+                    .infer_partitions(state, url)
+                    .await?
+                    .into_iter()
+                    .map(|col_name| {
+                        (
+                            col_name,
+                            DataType::Dictionary(
+                                Box::new(DataType::UInt16),
+                                Box::new(DataType::Utf8),
+                            ),
+                        )
+                    })
+                    .collect::<Vec<_>>();
+                let options = options.with_table_partition_cols(partitions);
+                Ok(Self {
+                    table_paths: self.table_paths,
+                    file_schema: self.file_schema,
+                    options: Some(options),
+                })
+            }
+            None => config_err!("No `ListingOptions` set for inferring schema"),
+        }
+    }
 }
 
 /// Options for creating a [`ListingTable`]
@@ -505,7 +537,7 @@ impl ListingOptions {
     /// Infer the partitioning at the given path on the provided object store.
     /// For performance reasons, it doesn't read all the files on disk
     /// and therefore may fail to detect invalid partitioning.
-    async fn infer_partitions(
+    pub(crate) async fn infer_partitions(
         &self,
         state: &SessionState,
         table_path: &ListingTableUrl,
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 520392c9f075..78c70606bf68 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2557,6 +2557,10 @@ mod tests {
         ) -> Result<Self> {
             unimplemented!("NoOp");
         }
+
+        fn supports_limit_pushdown(&self) -> bool {
+            false // Disallow limit push-down by default
+        }
     }
 
     #[derive(Debug)]
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index e51adbc4ddc1..2b45d0ed600b 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -443,6 +443,10 @@ impl UserDefinedLogicalNodeCore for TopKPlanNode {
             expr: replace_sort_expression(self.expr.clone(), exprs.swap_remove(0)),
         })
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        false // Disallow limit push-down by default
+    }
 }
 
 /// Physical planner for TopK nodes
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 55387fea22ee..d7dc1afe4d50 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -48,6 +48,7 @@ datafusion-expr-common = { workspace = true }
 datafusion-functions-aggregate-common = { workspace = true }
 datafusion-functions-window-common = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
+indexmap = { workspace = true }
 paste = "^1.0"
 serde_json = { workspace = true }
 sqlparser = { workspace = true }
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 32eac90c3eec..7d94a3b93eab 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -90,7 +90,7 @@ pub use logical_plan::*;
 pub use partition_evaluator::PartitionEvaluator;
 pub use sqlparser;
 pub use table_source::{TableProviderFilterPushDown, TableSource, TableType};
-pub use udaf::{AggregateUDF, AggregateUDFImpl, ReversedUDAF};
+pub use udaf::{AggregateUDF, AggregateUDFImpl, ReversedUDAF, StatisticsArgs};
 pub use udf::{ScalarUDF, ScalarUDFImpl};
 pub use udwf::{ReversedUDWF, WindowUDF, WindowUDFImpl};
 pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits};
diff --git a/datafusion/expr/src/logical_plan/extension.rs b/datafusion/expr/src/logical_plan/extension.rs
index d49c85fb6fd6..19d4cb3db9ce 100644
--- a/datafusion/expr/src/logical_plan/extension.rs
+++ b/datafusion/expr/src/logical_plan/extension.rs
@@ -195,6 +195,16 @@ pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync {
     /// directly because it must remain object safe.
     fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool;
     fn dyn_ord(&self, other: &dyn UserDefinedLogicalNode) -> Option<Ordering>;
+
+    /// Returns `true` if a limit can be safely pushed down through this
+    /// `UserDefinedLogicalNode` node.
+    ///
+    /// If this method returns `true`, and the query plan contains a limit at
+    /// the output of this node, DataFusion will push the limit to the input
+    /// of this node.
+    fn supports_limit_pushdown(&self) -> bool {
+        false
+    }
 }
 
 impl Hash for dyn UserDefinedLogicalNode {
@@ -295,6 +305,16 @@ pub trait UserDefinedLogicalNodeCore:
     ) -> Option<Vec<Vec<usize>>> {
         None
     }
+
+    /// Returns `true` if a limit can be safely pushed down through this
+    /// `UserDefinedLogicalNode` node.
+    ///
+    /// If this method returns `true`, and the query plan contains a limit at
+    /// the output of this node, DataFusion will push the limit to the input
+    /// of this node.
+    fn supports_limit_pushdown(&self) -> bool {
+        false // Disallow limit push-down by default
+    }
 }
 
 /// Automatically derive UserDefinedLogicalNode to `UserDefinedLogicalNode`
@@ -361,6 +381,10 @@ impl<T: UserDefinedLogicalNodeCore> UserDefinedLogicalNode for T {
             .downcast_ref::<Self>()
             .and_then(|other| self.partial_cmp(other))
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        self.supports_limit_pushdown()
+    }
 }
 
 fn get_all_columns_from_schema(schema: &DFSchema) -> HashSet<String> {
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 443d23804adb..19e73140b75c 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -51,6 +51,7 @@ use datafusion_common::{
     DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
     FunctionalDependencies, ParamValues, Result, TableReference, UnnestOptions,
 };
+use indexmap::IndexSet;
 
 // backwards compatibility
 use crate::display::PgJsonVisitor;
@@ -3071,6 +3072,8 @@ fn calc_func_dependencies_for_aggregate(
         let group_by_expr_names = group_expr
             .iter()
             .map(|item| item.schema_name().to_string())
+            .collect::<IndexSet<_>>()
+            .into_iter()
             .collect::<Vec<_>>();
         let aggregate_func_dependencies = aggregate_functional_dependencies(
             input.schema(),
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index d30d202df050..8ac6ad372482 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -602,89 +602,48 @@ fn coerced_from<'a>(
             Some(type_into.clone())
         }
         // coerced into type_into
-        (Int8, _) if matches!(type_from, Null | Int8) => Some(type_into.clone()),
-        (Int16, _) if matches!(type_from, Null | Int8 | Int16 | UInt8) => {
-            Some(type_into.clone())
-        }
-        (Int32, _)
-            if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | UInt16) =>
-        {
-            Some(type_into.clone())
-        }
-        (Int64, _)
-            if matches!(
-                type_from,
-                Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
-            ) =>
-        {
-            Some(type_into.clone())
-        }
-        (UInt8, _) if matches!(type_from, Null | UInt8) => Some(type_into.clone()),
-        (UInt16, _) if matches!(type_from, Null | UInt8 | UInt16) => {
-            Some(type_into.clone())
-        }
-        (UInt32, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => {
-            Some(type_into.clone())
-        }
-        (UInt64, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) => {
-            Some(type_into.clone())
-        }
-        (Float32, _)
-            if matches!(
-                type_from,
-                Null | Int8
-                    | Int16
-                    | Int32
-                    | Int64
-                    | UInt8
-                    | UInt16
-                    | UInt32
-                    | UInt64
-                    | Float32
-            ) =>
-        {
-            Some(type_into.clone())
-        }
-        (Float64, _)
-            if matches!(
-                type_from,
-                Null | Int8
-                    | Int16
-                    | Int32
-                    | Int64
-                    | UInt8
-                    | UInt16
-                    | UInt32
-                    | UInt64
-                    | Float32
-                    | Float64
-                    | Decimal128(_, _)
-            ) =>
-        {
-            Some(type_into.clone())
-        }
-        (Timestamp(TimeUnit::Nanosecond, None), _)
-            if matches!(
-                type_from,
-                Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
-            ) =>
-        {
-            Some(type_into.clone())
-        }
-        (Interval(_), _) if matches!(type_from, Utf8 | LargeUtf8) => {
+        (Int8, Null | Int8) => Some(type_into.clone()),
+        (Int16, Null | Int8 | Int16 | UInt8) => Some(type_into.clone()),
+        (Int32, Null | Int8 | Int16 | Int32 | UInt8 | UInt16) => Some(type_into.clone()),
+        (Int64, Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32) => {
             Some(type_into.clone())
         }
+        (UInt8, Null | UInt8) => Some(type_into.clone()),
+        (UInt16, Null | UInt8 | UInt16) => Some(type_into.clone()),
+        (UInt32, Null | UInt8 | UInt16 | UInt32) => Some(type_into.clone()),
+        (UInt64, Null | UInt8 | UInt16 | UInt32 | UInt64) => Some(type_into.clone()),
+        (
+            Float32,
+            Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
+            | Float32,
+        ) => Some(type_into.clone()),
+        (
+            Float64,
+            Null
+            | Int8
+            | Int16
+            | Int32
+            | Int64
+            | UInt8
+            | UInt16
+            | UInt32
+            | UInt64
+            | Float32
+            | Float64
+            | Decimal128(_, _),
+        ) => Some(type_into.clone()),
+        (
+            Timestamp(TimeUnit::Nanosecond, None),
+            Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8,
+        ) => Some(type_into.clone()),
+        (Interval(_), Utf8 | LargeUtf8) => Some(type_into.clone()),
         // We can go into a Utf8View from a Utf8 or LargeUtf8
-        (Utf8View, _) if matches!(type_from, Utf8 | LargeUtf8 | Null) => {
-            Some(type_into.clone())
-        }
+        (Utf8View, Utf8 | LargeUtf8 | Null) => Some(type_into.clone()),
         // Any type can be coerced into strings
         (Utf8 | LargeUtf8, _) => Some(type_into.clone()),
         (Null, _) if can_cast_types(type_from, type_into) => Some(type_into.clone()),
 
-        (List(_), _) if matches!(type_from, FixedSizeList(_, _)) => {
-            Some(type_into.clone())
-        }
+        (List(_), FixedSizeList(_, _)) => Some(type_into.clone()),
 
         // Only accept list and largelist with the same number of dimensions unless the type is Null.
         // List or LargeList with different dimensions should be handled in TypeSignature or other places before this
@@ -695,18 +654,16 @@ fn coerced_from<'a>(
             Some(type_into.clone())
         }
         // should be able to coerce wildcard fixed size list to non wildcard fixed size list
-        (FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD), _) => match type_from {
-            FixedSizeList(f_from, size_from) => {
-                match coerced_from(f_into.data_type(), f_from.data_type()) {
-                    Some(data_type) if &data_type != f_into.data_type() => {
-                        let new_field =
-                            Arc::new(f_into.as_ref().clone().with_data_type(data_type));
-                        Some(FixedSizeList(new_field, *size_from))
-                    }
-                    Some(_) => Some(FixedSizeList(Arc::clone(f_into), *size_from)),
-                    _ => None,
-                }
+        (
+            FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD),
+            FixedSizeList(f_from, size_from),
+        ) => match coerced_from(f_into.data_type(), f_from.data_type()) {
+            Some(data_type) if &data_type != f_into.data_type() => {
+                let new_field =
+                    Arc::new(f_into.as_ref().clone().with_data_type(data_type));
+                Some(FixedSizeList(new_field, *size_from))
             }
+            Some(_) => Some(FixedSizeList(Arc::clone(f_into), *size_from)),
             _ => None,
         },
         (Timestamp(unit, Some(tz)), _) if tz.as_ref() == TIMEZONE_WILDCARD => {
@@ -721,12 +678,7 @@ fn coerced_from<'a>(
                 _ => None,
             }
         }
-        (Timestamp(_, Some(_)), _)
-            if matches!(
-                type_from,
-                Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
-            ) =>
-        {
+        (Timestamp(_, Some(_)), Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8) => {
             Some(type_into.clone())
         }
         _ => None,
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index e3ef672daf5f..780ea36910a4 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -26,7 +26,8 @@ use std::vec;
 
 use arrow::datatypes::{DataType, Field};
 
-use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
+use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue, Statistics};
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
 use crate::expr::AggregateFunction;
 use crate::function::{
@@ -94,6 +95,22 @@ impl fmt::Display for AggregateUDF {
     }
 }
 
+/// Arguments passed to [`AggregateUDFImpl::value_from_stats`]
+pub struct StatisticsArgs<'a> {
+    /// The statistics of the aggregate input
+    pub statistics: &'a Statistics,
+    /// The resolved return type of the aggregate function
+    pub return_type: &'a DataType,
+    /// Whether the aggregate function is distinct.
+    ///
+    /// ```sql
+    /// SELECT COUNT(DISTINCT column1) FROM t;
+    /// ```
+    pub is_distinct: bool,
+    /// The physical expression of arguments the aggregate function takes.
+    pub exprs: &'a [Arc<dyn PhysicalExpr>],
+}
+
 impl AggregateUDF {
     /// Create a new `AggregateUDF` from a `[AggregateUDFImpl]` trait object
     ///
@@ -237,13 +254,23 @@ impl AggregateUDF {
     }
 
     /// Returns true if the function is max, false if the function is min
-    /// None in all other cases, used in certain optimizations or
+    /// None in all other cases, used in certain optimizations for
     /// or aggregate
-    ///
     pub fn is_descending(&self) -> Option<bool> {
         self.inner.is_descending()
     }
 
+    /// Return the value of this aggregate function if it can be determined
+    /// entirely from statistics and arguments.
+    ///
+    /// See [`AggregateUDFImpl::value_from_stats`] for more details.
+    pub fn value_from_stats(
+        &self,
+        statistics_args: &StatisticsArgs,
+    ) -> Option<ScalarValue> {
+        self.inner.value_from_stats(statistics_args)
+    }
+
     /// See [`AggregateUDFImpl::default_value`] for more details.
     pub fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
         self.inner.default_value(data_type)
@@ -557,6 +584,18 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
         None
     }
 
+    /// Return the value of this aggregate function if it can be determined
+    /// entirely from statistics and arguments.
+    ///
+    /// Using a [`ScalarValue`] rather than a runtime computation can significantly
+    /// improving query performance.
+    ///
+    /// For example, if the minimum value of column `x` is known to be `42` from
+    /// statistics, then the aggregate `MIN(x)` should return `Some(ScalarValue(42))`
+    fn value_from_stats(&self, _statistics_args: &StatisticsArgs) -> Option<ScalarValue> {
+        None
+    }
+
     /// Returns default value of the function given the input is all `null`.
     ///
     /// Most of the aggregate function return Null if input is Null,
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 1d8eb9445eda..9bb53a1d04a0 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -38,6 +38,7 @@ use datafusion_common::{
     DataFusionError, Result, TableReference,
 };
 
+use indexmap::IndexSet;
 use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem};
 
 pub use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
@@ -59,16 +60,7 @@ pub fn exprlist_to_columns(expr: &[Expr], accum: &mut HashSet<Column>) -> Result
 /// Count the number of distinct exprs in a list of group by expressions. If the
 /// first element is a `GroupingSet` expression then it must be the only expr.
 pub fn grouping_set_expr_count(group_expr: &[Expr]) -> Result<usize> {
-    if let Some(Expr::GroupingSet(grouping_set)) = group_expr.first() {
-        if group_expr.len() > 1 {
-            return plan_err!(
-                "Invalid group by expressions, GroupingSet must be the only expression"
-            );
-        }
-        Ok(grouping_set.distinct_expr().len())
-    } else {
-        Ok(group_expr.len())
-    }
+    grouping_set_to_exprlist(group_expr).map(|exprs| exprs.len())
 }
 
 /// The [power set] (or powerset) of a set S is the set of all subsets of S, \
@@ -260,7 +252,11 @@ pub fn grouping_set_to_exprlist(group_expr: &[Expr]) -> Result<Vec<&Expr>> {
         }
         Ok(grouping_set.distinct_expr())
     } else {
-        Ok(group_expr.iter().collect())
+        Ok(group_expr
+            .iter()
+            .collect::<IndexSet<_>>()
+            .into_iter()
+            .collect())
     }
 }
 
diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml
index d78f68a2604e..33a52afbe21a 100644
--- a/datafusion/functions-aggregate/Cargo.toml
+++ b/datafusion/functions-aggregate/Cargo.toml
@@ -50,7 +50,6 @@ datafusion-physical-expr-common = { workspace = true }
 half = { workspace = true }
 log = { workspace = true }
 paste = "1.0.14"
-sqlparser = { workspace = true }
 
 [dev-dependencies]
 arrow = { workspace = true, features = ["test_utils"] }
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index 417e28e72a71..cc245b3572ec 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -16,7 +16,9 @@
 // under the License.
 
 use ahash::RandomState;
+use datafusion_common::stats::Precision;
 use datafusion_functions_aggregate_common::aggregate::count_distinct::BytesViewDistinctCountAccumulator;
+use datafusion_physical_expr::expressions;
 use std::collections::HashSet;
 use std::ops::BitAnd;
 use std::{fmt::Debug, sync::Arc};
@@ -46,7 +48,7 @@ use datafusion_expr::{
     function::AccumulatorArgs, utils::format_state_name, Accumulator, AggregateUDFImpl,
     EmitTo, GroupsAccumulator, Signature, Volatility,
 };
-use datafusion_expr::{Expr, ReversedUDAF, TypeSignature};
+use datafusion_expr::{Expr, ReversedUDAF, StatisticsArgs, TypeSignature};
 use datafusion_functions_aggregate_common::aggregate::count_distinct::{
     BytesDistinctCountAccumulator, FloatDistinctCountAccumulator,
     PrimitiveDistinctCountAccumulator,
@@ -54,6 +56,7 @@ use datafusion_functions_aggregate_common::aggregate::count_distinct::{
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate_indices;
 use datafusion_physical_expr_common::binary_map::OutputType;
 
+use datafusion_common::utils::expr::COUNT_STAR_EXPANSION;
 make_udaf_expr_and_func!(
     Count,
     count,
@@ -291,6 +294,36 @@ impl AggregateUDFImpl for Count {
     fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
         Ok(ScalarValue::Int64(Some(0)))
     }
+
+    fn value_from_stats(&self, statistics_args: &StatisticsArgs) -> Option<ScalarValue> {
+        if statistics_args.is_distinct {
+            return None;
+        }
+        if let Precision::Exact(num_rows) = statistics_args.statistics.num_rows {
+            if statistics_args.exprs.len() == 1 {
+                // TODO optimize with exprs other than Column
+                if let Some(col_expr) = statistics_args.exprs[0]
+                    .as_any()
+                    .downcast_ref::<expressions::Column>()
+                {
+                    let current_val = &statistics_args.statistics.column_statistics
+                        [col_expr.index()]
+                    .null_count;
+                    if let &Precision::Exact(val) = current_val {
+                        return Some(ScalarValue::Int64(Some((num_rows - val) as i64)));
+                    }
+                } else if let Some(lit_expr) = statistics_args.exprs[0]
+                    .as_any()
+                    .downcast_ref::<expressions::Literal>()
+                {
+                    if lit_expr.value() == &COUNT_STAR_EXPANSION {
+                        return Some(ScalarValue::Int64(Some(num_rows as i64)));
+                    }
+                }
+            }
+        }
+        None
+    }
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/macros.rs b/datafusion/functions-aggregate/src/macros.rs
index 573b9fd5bdb2..ffb5183278e6 100644
--- a/datafusion/functions-aggregate/src/macros.rs
+++ b/datafusion/functions-aggregate/src/macros.rs
@@ -15,23 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
 macro_rules! make_udaf_expr {
     ($EXPR_FN:ident, $($arg:ident)*, $DOC:expr, $AGGREGATE_UDF_FN:ident) => {
         // "fluent expr_fn" style function
diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs
index 961e8639604c..1ce1abe09ea8 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -15,7 +15,7 @@
 // under the License.
 
 //! [`Max`] and [`MaxAccumulator`] accumulator for the `max` function
-//! [`Min`] and [`MinAccumulator`] accumulator for the `max` function
+//! [`Min`] and [`MinAccumulator`] accumulator for the `min` function
 
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
@@ -49,10 +49,12 @@ use arrow::datatypes::{
     UInt8Type,
 };
 use arrow_schema::IntervalUnit;
+use datafusion_common::stats::Precision;
 use datafusion_common::{
-    downcast_value, exec_err, internal_err, DataFusionError, Result,
+    downcast_value, exec_err, internal_err, ColumnStatistics, DataFusionError, Result,
 };
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
+use datafusion_physical_expr::expressions;
 use std::fmt::Debug;
 
 use arrow::datatypes::i256;
@@ -63,10 +65,10 @@ use arrow::datatypes::{
 };
 
 use datafusion_common::ScalarValue;
-use datafusion_expr::GroupsAccumulator;
 use datafusion_expr::{
     function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Signature, Volatility,
 };
+use datafusion_expr::{GroupsAccumulator, StatisticsArgs};
 use half::f16;
 use std::ops::Deref;
 
@@ -147,6 +149,54 @@ macro_rules! instantiate_min_accumulator {
     }};
 }
 
+trait FromColumnStatistics {
+    fn value_from_column_statistics(
+        &self,
+        stats: &ColumnStatistics,
+    ) -> Option<ScalarValue>;
+
+    fn value_from_statistics(
+        &self,
+        statistics_args: &StatisticsArgs,
+    ) -> Option<ScalarValue> {
+        if let Precision::Exact(num_rows) = &statistics_args.statistics.num_rows {
+            match *num_rows {
+                0 => return ScalarValue::try_from(statistics_args.return_type).ok(),
+                value if value > 0 => {
+                    let col_stats = &statistics_args.statistics.column_statistics;
+                    if statistics_args.exprs.len() == 1 {
+                        // TODO optimize with exprs other than Column
+                        if let Some(col_expr) = statistics_args.exprs[0]
+                            .as_any()
+                            .downcast_ref::<expressions::Column>()
+                        {
+                            return self.value_from_column_statistics(
+                                &col_stats[col_expr.index()],
+                            );
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+        None
+    }
+}
+
+impl FromColumnStatistics for Max {
+    fn value_from_column_statistics(
+        &self,
+        col_stats: &ColumnStatistics,
+    ) -> Option<ScalarValue> {
+        if let Precision::Exact(ref val) = col_stats.max_value {
+            if !val.is_null() {
+                return Some(val.clone());
+            }
+        }
+        None
+    }
+}
+
 impl AggregateUDFImpl for Max {
     fn as_any(&self) -> &dyn std::any::Any {
         self
@@ -272,6 +322,7 @@ impl AggregateUDFImpl for Max {
     fn is_descending(&self) -> Option<bool> {
         Some(true)
     }
+
     fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
         datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
     }
@@ -282,6 +333,9 @@ impl AggregateUDFImpl for Max {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Identical
     }
+    fn value_from_stats(&self, statistics_args: &StatisticsArgs) -> Option<ScalarValue> {
+        self.value_from_statistics(statistics_args)
+    }
 }
 
 // Statically-typed version of min/max(array) -> ScalarValue for string types
@@ -926,6 +980,20 @@ impl Default for Min {
     }
 }
 
+impl FromColumnStatistics for Min {
+    fn value_from_column_statistics(
+        &self,
+        col_stats: &ColumnStatistics,
+    ) -> Option<ScalarValue> {
+        if let Precision::Exact(ref val) = col_stats.min_value {
+            if !val.is_null() {
+                return Some(val.clone());
+            }
+        }
+        None
+    }
+}
+
 impl AggregateUDFImpl for Min {
     fn as_any(&self) -> &dyn std::any::Any {
         self
@@ -1052,6 +1120,9 @@ impl AggregateUDFImpl for Min {
         Some(false)
     }
 
+    fn value_from_stats(&self, statistics_args: &StatisticsArgs) -> Option<ScalarValue> {
+        self.value_from_statistics(statistics_args)
+    }
     fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
         datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
     }
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index ff1b926a9b82..a3d114221d3f 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -102,6 +102,11 @@ harness = false
 name = "to_timestamp"
 required-features = ["datetime_expressions"]
 
+[[bench]]
+harness = false
+name = "encoding"
+required-features = ["encoding_expressions"]
+
 [[bench]]
 harness = false
 name = "regx"
diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs
new file mode 100644
index 000000000000..d49235aac938
--- /dev/null
+++ b/datafusion/functions/benches/encoding.rs
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::util::bench_util::create_string_array_with_len;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::encoding;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let decode = encoding::decode();
+    for size in [1024, 4096, 8192] {
+        let str_array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, 32));
+        c.bench_function(&format!("base64_decode/{size}"), |b| {
+            let method = ColumnarValue::Scalar("base64".into());
+            let encoded = encoding::encode()
+                .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()])
+                .unwrap();
+
+            let args = vec![encoded, method];
+            b.iter(|| black_box(decode.invoke(&args).unwrap()))
+        });
+
+        c.bench_function(&format!("hex_decode/{size}"), |b| {
+            let method = ColumnarValue::Scalar("hex".into());
+            let encoded = encoding::encode()
+                .invoke(&[ColumnarValue::Array(str_array.clone()), method.clone()])
+                .unwrap();
+
+            let args = vec![encoded, method];
+            b.iter(|| black_box(decode.invoke(&args).unwrap()))
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs
index 5b80c908cfc3..2a22e572614b 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -18,9 +18,12 @@
 //! Encoding expressions
 
 use arrow::{
-    array::{Array, ArrayRef, BinaryArray, OffsetSizeTrait, StringArray},
-    datatypes::DataType,
+    array::{
+        Array, ArrayRef, BinaryArray, GenericByteArray, OffsetSizeTrait, StringArray,
+    },
+    datatypes::{ByteArrayType, DataType},
 };
+use arrow_buffer::{Buffer, OffsetBufferBuilder};
 use base64::{engine::general_purpose, Engine as _};
 use datafusion_common::{
     cast::{as_generic_binary_array, as_generic_string_array},
@@ -245,16 +248,22 @@ fn base64_encode(input: &[u8]) -> String {
     general_purpose::STANDARD_NO_PAD.encode(input)
 }
 
-fn hex_decode(input: &[u8]) -> Result<Vec<u8>> {
-    hex::decode(input).map_err(|e| {
+fn hex_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
+    // only write input / 2 bytes to buf
+    let out_len = input.len() / 2;
+    let buf = &mut buf[..out_len];
+    hex::decode_to_slice(input, buf).map_err(|e| {
         DataFusionError::Internal(format!("Failed to decode from hex: {}", e))
-    })
+    })?;
+    Ok(out_len)
 }
 
-fn base64_decode(input: &[u8]) -> Result<Vec<u8>> {
-    general_purpose::STANDARD_NO_PAD.decode(input).map_err(|e| {
-        DataFusionError::Internal(format!("Failed to decode from base64: {}", e))
-    })
+fn base64_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
+    general_purpose::STANDARD_NO_PAD
+        .decode_slice(input, buf)
+        .map_err(|e| {
+            DataFusionError::Internal(format!("Failed to decode from base64: {}", e))
+        })
 }
 
 macro_rules! encode_to_array {
@@ -267,14 +276,35 @@ macro_rules! encode_to_array {
     }};
 }
 
-macro_rules! decode_to_array {
-    ($METHOD: ident, $INPUT:expr) => {{
-        let binary_array: BinaryArray = $INPUT
-            .iter()
-            .map(|x| x.map(|x| $METHOD(x.as_ref())).transpose())
-            .collect::<Result<_>>()?;
-        Arc::new(binary_array)
-    }};
+fn decode_to_array<F, T: ByteArrayType>(
+    method: F,
+    input: &GenericByteArray<T>,
+    conservative_upper_bound_size: usize,
+) -> Result<ArrayRef>
+where
+    F: Fn(&[u8], &mut [u8]) -> Result<usize>,
+{
+    let mut values = vec![0; conservative_upper_bound_size];
+    let mut offsets = OffsetBufferBuilder::new(input.len());
+    let mut total_bytes_decoded = 0;
+    for v in input {
+        if let Some(v) = v {
+            let cursor = &mut values[total_bytes_decoded..];
+            let decoded = method(v.as_ref(), cursor)?;
+            total_bytes_decoded += decoded;
+            offsets.push_length(decoded);
+        } else {
+            offsets.push_length(0);
+        }
+    }
+    // We reserved an upper bound size for the values buffer, but we only use the actual size
+    values.truncate(total_bytes_decoded);
+    let binary_array = BinaryArray::try_new(
+        offsets.finish(),
+        Buffer::from_vec(values),
+        input.nulls().cloned(),
+    )?;
+    Ok(Arc::new(binary_array))
 }
 
 impl Encoding {
@@ -381,10 +411,7 @@ impl Encoding {
         T: OffsetSizeTrait,
     {
         let input_value = as_generic_binary_array::<T>(value)?;
-        let array: ArrayRef = match self {
-            Self::Base64 => decode_to_array!(base64_decode, input_value),
-            Self::Hex => decode_to_array!(hex_decode, input_value),
-        };
+        let array = self.decode_byte_array(input_value)?;
         Ok(ColumnarValue::Array(array))
     }
 
@@ -393,12 +420,29 @@ impl Encoding {
         T: OffsetSizeTrait,
     {
         let input_value = as_generic_string_array::<T>(value)?;
-        let array: ArrayRef = match self {
-            Self::Base64 => decode_to_array!(base64_decode, input_value),
-            Self::Hex => decode_to_array!(hex_decode, input_value),
-        };
+        let array = self.decode_byte_array(input_value)?;
         Ok(ColumnarValue::Array(array))
     }
+
+    fn decode_byte_array<T: ByteArrayType>(
+        &self,
+        input_value: &GenericByteArray<T>,
+    ) -> Result<ArrayRef> {
+        match self {
+            Self::Base64 => {
+                let upper_bound =
+                    base64::decoded_len_estimate(input_value.values().len());
+                decode_to_array(base64_decode, input_value, upper_bound)
+            }
+            Self::Hex => {
+                // Calculate the upper bound for decoded byte size
+                // For hex encoding, each pair of hex characters (2 bytes) represents 1 byte when decoded
+                // So the upper bound is half the length of the input values.
+                let upper_bound = input_value.values().len() / 2;
+                decode_to_array(hex_decode, input_value, upper_bound)
+            }
+        }
+    }
 }
 
 impl fmt::Display for Encoding {
diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs
index 20029ba005c4..8cd26a824acc 100644
--- a/datafusion/functions/src/regex/regexplike.rs
+++ b/datafusion/functions/src/regex/regexplike.rs
@@ -48,9 +48,9 @@ impl RegexpLikeFunc {
             signature: Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8]),
                     Exact(vec![Utf8, Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
                 ],
                 Volatility::Immutable,
             ),
diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs
index bf40eff11d30..498b591620ee 100644
--- a/datafusion/functions/src/regex/regexpmatch.rs
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -54,9 +54,9 @@ impl RegexpMatchFunc {
                     // If that fails, it proceeds to `(LargeUtf8, Utf8)`.
                     // TODO: Native support Utf8View for regexp_match.
                     Exact(vec![Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8]),
                     Exact(vec![Utf8, Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]),
                 ],
                 Volatility::Immutable,
             ),
@@ -131,7 +131,7 @@ pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             let flags = as_generic_string_array::<T>(&args[2])?;
 
             if flags.iter().any(|s| s == Some("g")) {
-                return plan_err!("regexp_match() does not support the \"global\" option")
+                return plan_err!("regexp_match() does not support the \"global\" option");
             }
 
             regexp::regexp_match(values, regex, Some(flags))
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 86520b3587cd..b3b24724552a 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -48,13 +48,7 @@ impl AnalyzerRule for CountWildcardRule {
 }
 
 fn is_wildcard(expr: &Expr) -> bool {
-    matches!(
-        expr,
-        Expr::Wildcard {
-            qualifier: None,
-            ..
-        }
-    )
+    matches!(expr, Expr::Wildcard { .. })
 }
 
 fn is_count_star_aggregate(aggregate_function: &AggregateFunction) -> bool {
diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
index c771f31a58b2..aabc549de583 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -385,6 +385,10 @@ mod test {
                 empty_schema: Arc::clone(&self.empty_schema),
             })
         }
+
+        fn supports_limit_pushdown(&self) -> bool {
+            false // Disallow limit push-down by default
+        }
     }
 
     #[test]
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index 5ab427a31699..b5d581f3919f 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -895,6 +895,10 @@ mod tests {
             // Since schema is same. Output columns requires their corresponding version in the input columns.
             Some(vec![output_columns.to_vec()])
         }
+
+        fn supports_limit_pushdown(&self) -> bool {
+            false // Disallow limit push-down by default
+        }
     }
 
     #[derive(Debug, Hash, PartialEq, Eq)]
@@ -991,6 +995,10 @@ mod tests {
             }
             Some(vec![left_reqs, right_reqs])
         }
+
+        fn supports_limit_pushdown(&self) -> bool {
+            false // Disallow limit push-down by default
+        }
     }
 
     #[test]
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 4e36cc62588e..6e2cc0cbdbcb 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -1499,6 +1499,10 @@ mod tests {
                 schema: Arc::clone(&self.schema),
             })
         }
+
+        fn supports_limit_pushdown(&self) -> bool {
+            false // Disallow limit push-down by default
+        }
     }
 
     #[test]
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index 158c7592df51..8b5e483001b3 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -153,6 +153,29 @@ impl OptimizerRule for PushDownLimit {
                 subquery_alias.input = Arc::new(new_limit);
                 Ok(Transformed::yes(LogicalPlan::SubqueryAlias(subquery_alias)))
             }
+            LogicalPlan::Extension(extension_plan)
+                if extension_plan.node.supports_limit_pushdown() =>
+            {
+                let new_children = extension_plan
+                    .node
+                    .inputs()
+                    .into_iter()
+                    .map(|child| {
+                        LogicalPlan::Limit(Limit {
+                            skip: 0,
+                            fetch: Some(fetch + skip),
+                            input: Arc::new(child.clone()),
+                        })
+                    })
+                    .collect::<Vec<_>>();
+
+                // Create a new extension node with updated inputs
+                let child_plan = LogicalPlan::Extension(extension_plan);
+                let new_extension =
+                    child_plan.with_new_exprs(child_plan.expressions(), new_children)?;
+
+                transformed_limit(skip, fetch, new_extension)
+            }
             input => original_limit(skip, fetch, input),
         }
     }
@@ -258,17 +281,241 @@ fn push_down_join(mut join: Join, limit: usize) -> Transformed<Join> {
 
 #[cfg(test)]
 mod test {
+    use std::cmp::Ordering;
+    use std::fmt::{Debug, Formatter};
     use std::vec;
 
     use super::*;
     use crate::test::*;
-    use datafusion_expr::{col, exists, logical_plan::builder::LogicalPlanBuilder};
+
+    use datafusion_common::DFSchemaRef;
+    use datafusion_expr::{
+        col, exists, logical_plan::builder::LogicalPlanBuilder, Expr, Extension,
+        UserDefinedLogicalNodeCore,
+    };
     use datafusion_functions_aggregate::expr_fn::max;
 
     fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
         assert_optimized_plan_eq(Arc::new(PushDownLimit::new()), plan, expected)
     }
 
+    #[derive(Debug, PartialEq, Eq, Hash)]
+    pub struct NoopPlan {
+        input: Vec<LogicalPlan>,
+        schema: DFSchemaRef,
+    }
+
+    // Manual implementation needed because of `schema` field. Comparison excludes this field.
+    impl PartialOrd for NoopPlan {
+        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+            self.input.partial_cmp(&other.input)
+        }
+    }
+
+    impl UserDefinedLogicalNodeCore for NoopPlan {
+        fn name(&self) -> &str {
+            "NoopPlan"
+        }
+
+        fn inputs(&self) -> Vec<&LogicalPlan> {
+            self.input.iter().collect()
+        }
+
+        fn schema(&self) -> &DFSchemaRef {
+            &self.schema
+        }
+
+        fn expressions(&self) -> Vec<Expr> {
+            self.input
+                .iter()
+                .flat_map(|child| child.expressions())
+                .collect()
+        }
+
+        fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+            write!(f, "NoopPlan")
+        }
+
+        fn with_exprs_and_inputs(
+            &self,
+            _exprs: Vec<Expr>,
+            inputs: Vec<LogicalPlan>,
+        ) -> Result<Self> {
+            Ok(Self {
+                input: inputs,
+                schema: Arc::clone(&self.schema),
+            })
+        }
+
+        fn supports_limit_pushdown(&self) -> bool {
+            true // Allow limit push-down
+        }
+    }
+
+    #[derive(Debug, PartialEq, Eq, Hash)]
+    struct NoLimitNoopPlan {
+        input: Vec<LogicalPlan>,
+        schema: DFSchemaRef,
+    }
+
+    // Manual implementation needed because of `schema` field. Comparison excludes this field.
+    impl PartialOrd for NoLimitNoopPlan {
+        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+            self.input.partial_cmp(&other.input)
+        }
+    }
+
+    impl UserDefinedLogicalNodeCore for NoLimitNoopPlan {
+        fn name(&self) -> &str {
+            "NoLimitNoopPlan"
+        }
+
+        fn inputs(&self) -> Vec<&LogicalPlan> {
+            self.input.iter().collect()
+        }
+
+        fn schema(&self) -> &DFSchemaRef {
+            &self.schema
+        }
+
+        fn expressions(&self) -> Vec<Expr> {
+            self.input
+                .iter()
+                .flat_map(|child| child.expressions())
+                .collect()
+        }
+
+        fn fmt_for_explain(&self, f: &mut Formatter) -> std::fmt::Result {
+            write!(f, "NoLimitNoopPlan")
+        }
+
+        fn with_exprs_and_inputs(
+            &self,
+            _exprs: Vec<Expr>,
+            inputs: Vec<LogicalPlan>,
+        ) -> Result<Self> {
+            Ok(Self {
+                input: inputs,
+                schema: Arc::clone(&self.schema),
+            })
+        }
+
+        fn supports_limit_pushdown(&self) -> bool {
+            false // Disallow limit push-down by default
+        }
+    }
+    #[test]
+    fn limit_pushdown_basic() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let noop_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(NoopPlan {
+                input: vec![table_scan.clone()],
+                schema: Arc::clone(table_scan.schema()),
+            }),
+        });
+
+        let plan = LogicalPlanBuilder::from(noop_plan)
+            .limit(0, Some(1000))?
+            .build()?;
+
+        let expected = "Limit: skip=0, fetch=1000\
+        \n  NoopPlan\
+        \n    Limit: skip=0, fetch=1000\
+        \n      TableScan: test, fetch=1000";
+
+        assert_optimized_plan_equal(plan, expected)
+    }
+
+    #[test]
+    fn limit_pushdown_with_skip() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let noop_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(NoopPlan {
+                input: vec![table_scan.clone()],
+                schema: Arc::clone(table_scan.schema()),
+            }),
+        });
+
+        let plan = LogicalPlanBuilder::from(noop_plan)
+            .limit(10, Some(1000))?
+            .build()?;
+
+        let expected = "Limit: skip=10, fetch=1000\
+        \n  NoopPlan\
+        \n    Limit: skip=0, fetch=1010\
+        \n      TableScan: test, fetch=1010";
+
+        assert_optimized_plan_equal(plan, expected)
+    }
+
+    #[test]
+    fn limit_pushdown_multiple_limits() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let noop_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(NoopPlan {
+                input: vec![table_scan.clone()],
+                schema: Arc::clone(table_scan.schema()),
+            }),
+        });
+
+        let plan = LogicalPlanBuilder::from(noop_plan)
+            .limit(10, Some(1000))?
+            .limit(20, Some(500))?
+            .build()?;
+
+        let expected = "Limit: skip=30, fetch=500\
+        \n  NoopPlan\
+        \n    Limit: skip=0, fetch=530\
+        \n      TableScan: test, fetch=530";
+
+        assert_optimized_plan_equal(plan, expected)
+    }
+
+    #[test]
+    fn limit_pushdown_multiple_inputs() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let noop_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(NoopPlan {
+                input: vec![table_scan.clone(), table_scan.clone()],
+                schema: Arc::clone(table_scan.schema()),
+            }),
+        });
+
+        let plan = LogicalPlanBuilder::from(noop_plan)
+            .limit(0, Some(1000))?
+            .build()?;
+
+        let expected = "Limit: skip=0, fetch=1000\
+        \n  NoopPlan\
+        \n    Limit: skip=0, fetch=1000\
+        \n      TableScan: test, fetch=1000\
+        \n    Limit: skip=0, fetch=1000\
+        \n      TableScan: test, fetch=1000";
+
+        assert_optimized_plan_equal(plan, expected)
+    }
+
+    #[test]
+    fn limit_pushdown_disallowed_noop_plan() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let no_limit_noop_plan = LogicalPlan::Extension(Extension {
+            node: Arc::new(NoLimitNoopPlan {
+                input: vec![table_scan.clone()],
+                schema: Arc::clone(table_scan.schema()),
+            }),
+        });
+
+        let plan = LogicalPlanBuilder::from(no_limit_noop_plan)
+            .limit(0, Some(1000))?
+            .build()?;
+
+        let expected = "Limit: skip=0, fetch=1000\
+        \n  NoLimitNoopPlan\
+        \n    TableScan: test";
+
+        assert_optimized_plan_equal(plan, expected)
+    }
+
     #[test]
     fn limit_pushdown_projection_table_provider() -> Result<()> {
         let table_scan = test_table_scan()?;
diff --git a/datafusion/optimizer/src/test/user_defined.rs b/datafusion/optimizer/src/test/user_defined.rs
index 814cd0c0cd0a..a39f90b5da5d 100644
--- a/datafusion/optimizer/src/test/user_defined.rs
+++ b/datafusion/optimizer/src/test/user_defined.rs
@@ -76,4 +76,8 @@ impl UserDefinedLogicalNodeCore for TestUserDefinedPlanNode {
             input: inputs.swap_remove(0),
         })
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        false // Disallow limit push-down by default
+    }
 }
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index 470bd947c7fb..236167985790 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -345,7 +345,7 @@ fn select_wildcard_with_repeated_column() {
     let sql = "SELECT *, col_int32 FROM test";
     let err = test_sql(sql).expect_err("query should have failed");
     assert_eq!(
-        "expand_wildcard_rule\ncaused by\nError during planning: Projections require unique expression names but the expression \"test.col_int32\" at position 0 and \"test.col_int32\" at position 7 have the same name. Consider aliasing (\"AS\") one of them.",
+        "Schema error: Schema contains duplicate qualified field name test.col_int32",
         err.strip_backtrace()
     );
 }
@@ -396,7 +396,7 @@ fn test_sql(sql: &str) -> Result<LogicalPlan> {
         .with_udaf(count_udaf())
         .with_udaf(avg_udaf());
     let sql_to_rel = SqlToRel::new(&context_provider);
-    let plan = sql_to_rel.sql_statement_to_plan(statement.clone()).unwrap();
+    let plan = sql_to_rel.sql_statement_to_plan(statement.clone())?;
 
     let config = OptimizerContext::new().with_skip_failing_rules(false);
     let analyzer = Analyzer::new();
diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs
index 65423033d5e0..bb3e9218bc41 100644
--- a/datafusion/physical-expr/src/equivalence/ordering.rs
+++ b/datafusion/physical-expr/src/equivalence/ordering.rs
@@ -18,6 +18,7 @@
 use std::fmt::Display;
 use std::hash::Hash;
 use std::sync::Arc;
+use std::vec::IntoIter;
 
 use crate::equivalence::add_offset_to_expr;
 use crate::{LexOrdering, PhysicalExpr, PhysicalSortExpr};
@@ -36,7 +37,7 @@ use arrow_schema::SortOptions;
 ///
 /// Here, both `vec![a ASC, b ASC]` and `vec![c DESC, d ASC]` describe the table
 /// ordering. In this case, we say that these orderings are equivalent.
-#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
 pub struct OrderingEquivalenceClass {
     pub orderings: Vec<LexOrdering>,
 }
@@ -44,7 +45,7 @@ pub struct OrderingEquivalenceClass {
 impl OrderingEquivalenceClass {
     /// Creates new empty ordering equivalence class.
     pub fn empty() -> Self {
-        Self { orderings: vec![] }
+        Default::default()
     }
 
     /// Clears (empties) this ordering equivalence class.
@@ -197,6 +198,15 @@ impl OrderingEquivalenceClass {
     }
 }
 
+impl IntoIterator for OrderingEquivalenceClass {
+    type Item = LexOrdering;
+    type IntoIter = IntoIter<LexOrdering>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.orderings.into_iter()
+    }
+}
+
 /// This function constructs a duplicate-free `LexOrdering` by filtering out
 /// duplicate entries that have same physical expression inside. For example,
 /// `vec![a ASC, a DESC]` collapses to `vec![a ASC]`.
@@ -229,10 +239,10 @@ impl Display for OrderingEquivalenceClass {
         write!(f, "[")?;
         let mut iter = self.orderings.iter();
         if let Some(ordering) = iter.next() {
-            write!(f, "{}", PhysicalSortExpr::format_list(ordering))?;
+            write!(f, "[{}]", PhysicalSortExpr::format_list(ordering))?;
         }
         for ordering in iter {
-            write!(f, "{}", PhysicalSortExpr::format_list(ordering))?;
+            write!(f, ", [{}]", PhysicalSortExpr::format_list(ordering))?;
         }
         write!(f, "]")?;
         Ok(())
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index dc59a1eb835b..8137b4f9da13 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -118,7 +118,7 @@ use itertools::Itertools;
 ///   PhysicalSortExpr::new_default(col_c).desc(),
 /// ]);
 ///
-/// assert_eq!(eq_properties.to_string(), "order: [a@0 ASC,c@2 DESC], const: [b@1]")
+/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC,c@2 DESC]], const: [b@1]")
 /// ```
 #[derive(Debug, Clone)]
 pub struct EquivalenceProperties {
@@ -2708,379 +2708,428 @@ mod tests {
         ))
     }
 
-    #[tokio::test]
-    async fn test_union_equivalence_properties_multi_children() -> Result<()> {
-        let schema = create_test_schema()?;
+    #[test]
+    fn test_union_equivalence_properties_multi_children_1() {
+        let schema = create_test_schema().unwrap();
         let schema2 = append_fields(&schema, "1");
         let schema3 = append_fields(&schema, "2");
-        let test_cases = vec![
-            // --------- TEST CASE 1 ----------
-            (
-                vec![
-                    // Children 1
-                    (
-                        // Orderings
-                        vec![vec!["a", "b", "c"]],
-                        Arc::clone(&schema),
-                    ),
-                    // Children 2
-                    (
-                        // Orderings
-                        vec![vec!["a1", "b1", "c1"]],
-                        Arc::clone(&schema2),
-                    ),
-                    // Children 3
-                    (
-                        // Orderings
-                        vec![vec!["a2", "b2"]],
-                        Arc::clone(&schema3),
-                    ),
-                ],
-                // Expected
-                vec![vec!["a", "b"]],
-            ),
-            // --------- TEST CASE 2 ----------
-            (
-                vec![
-                    // Children 1
-                    (
-                        // Orderings
-                        vec![vec!["a", "b", "c"]],
-                        Arc::clone(&schema),
-                    ),
-                    // Children 2
-                    (
-                        // Orderings
-                        vec![vec!["a1", "b1", "c1"]],
-                        Arc::clone(&schema2),
-                    ),
-                    // Children 3
-                    (
-                        // Orderings
-                        vec![vec!["a2", "b2", "c2"]],
-                        Arc::clone(&schema3),
-                    ),
-                ],
-                // Expected
-                vec![vec!["a", "b", "c"]],
-            ),
-            // --------- TEST CASE 3 ----------
-            (
-                vec![
-                    // Children 1
-                    (
-                        // Orderings
-                        vec![vec!["a", "b"]],
-                        Arc::clone(&schema),
-                    ),
-                    // Children 2
-                    (
-                        // Orderings
-                        vec![vec!["a1", "b1", "c1"]],
-                        Arc::clone(&schema2),
-                    ),
-                    // Children 3
-                    (
-                        // Orderings
-                        vec![vec!["a2", "b2", "c2"]],
-                        Arc::clone(&schema3),
-                    ),
-                ],
-                // Expected
-                vec![vec!["a", "b"]],
-            ),
-            // --------- TEST CASE 4 ----------
-            (
-                vec![
-                    // Children 1
-                    (
-                        // Orderings
-                        vec![vec!["a", "b"]],
-                        Arc::clone(&schema),
-                    ),
-                    // Children 2
-                    (
-                        // Orderings
-                        vec![vec!["a1", "b1"]],
-                        Arc::clone(&schema2),
-                    ),
-                    // Children 3
-                    (
-                        // Orderings
-                        vec![vec!["b2", "c2"]],
-                        Arc::clone(&schema3),
-                    ),
-                ],
-                // Expected
-                vec![],
-            ),
-            // --------- TEST CASE 5 ----------
-            (
-                vec![
-                    // Children 1
-                    (
-                        // Orderings
-                        vec![vec!["a", "b"], vec!["c"]],
-                        Arc::clone(&schema),
-                    ),
-                    // Children 2
-                    (
-                        // Orderings
-                        vec![vec!["a1", "b1"], vec!["c1"]],
-                        Arc::clone(&schema2),
-                    ),
-                ],
-                // Expected
-                vec![vec!["a", "b"], vec!["c"]],
-            ),
-        ];
-        for (children, expected) in test_cases {
-            let children_eqs = children
-                .iter()
-                .map(|(orderings, schema)| {
-                    let orderings = orderings
-                        .iter()
-                        .map(|ordering| {
-                            ordering
-                                .iter()
-                                .map(|name| PhysicalSortExpr {
-                                    expr: col(name, schema).unwrap(),
-                                    options: SortOptions::default(),
-                                })
-                                .collect::<Vec<_>>()
-                        })
-                        .collect::<Vec<_>>();
-                    EquivalenceProperties::new_with_orderings(
-                        Arc::clone(schema),
-                        &orderings,
-                    )
-                })
-                .collect::<Vec<_>>();
-            let actual = calculate_union(children_eqs, Arc::clone(&schema))?;
+        UnionEquivalenceTest::new(&schema)
+            // Children 1
+            .with_child_sort(vec![vec!["a", "b", "c"]], &schema)
+            // Children 2
+            .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2)
+            // Children 3
+            .with_child_sort(vec![vec!["a2", "b2"]], &schema3)
+            .with_expected_sort(vec![vec!["a", "b"]])
+            .run()
+    }
 
-            let expected_ordering = expected
-                .into_iter()
-                .map(|ordering| {
-                    ordering
-                        .into_iter()
-                        .map(|name| PhysicalSortExpr {
-                            expr: col(name, &schema).unwrap(),
-                            options: SortOptions::default(),
-                        })
-                        .collect::<Vec<_>>()
-                })
-                .collect::<Vec<_>>();
-            let expected = EquivalenceProperties::new_with_orderings(
-                Arc::clone(&schema),
-                &expected_ordering,
-            );
-            assert_eq_properties_same(
-                &actual,
-                &expected,
-                format!("expected: {:?}, actual: {:?}", expected, actual),
-            );
-        }
-        Ok(())
+    #[test]
+    fn test_union_equivalence_properties_multi_children_2() {
+        let schema = create_test_schema().unwrap();
+        let schema2 = append_fields(&schema, "1");
+        let schema3 = append_fields(&schema, "2");
+        UnionEquivalenceTest::new(&schema)
+            // Children 1
+            .with_child_sort(vec![vec!["a", "b", "c"]], &schema)
+            // Children 2
+            .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2)
+            // Children 3
+            .with_child_sort(vec![vec!["a2", "b2", "c2"]], &schema3)
+            .with_expected_sort(vec![vec!["a", "b", "c"]])
+            .run()
     }
 
-    #[tokio::test]
-    async fn test_union_equivalence_properties_binary() -> Result<()> {
-        let schema = create_test_schema()?;
+    #[test]
+    fn test_union_equivalence_properties_multi_children_3() {
+        let schema = create_test_schema().unwrap();
         let schema2 = append_fields(&schema, "1");
-        let col_a = &col("a", &schema)?;
-        let col_b = &col("b", &schema)?;
-        let col_c = &col("c", &schema)?;
-        let col_a1 = &col("a1", &schema2)?;
-        let col_b1 = &col("b1", &schema2)?;
-        let options = SortOptions::default();
-        let options_desc = !SortOptions::default();
-        let test_cases = [
-            //-----------TEST CASE 1----------//
-            (
-                (
-                    // First child orderings
-                    vec![
-                        // [a ASC]
-                        (vec![(col_a, options)]),
-                    ],
-                    // First child constants
-                    vec![col_b, col_c],
-                    Arc::clone(&schema),
-                ),
-                (
-                    // Second child orderings
-                    vec![
-                        // [b ASC]
-                        (vec![(col_b, options)]),
-                    ],
-                    // Second child constants
-                    vec![col_a, col_c],
-                    Arc::clone(&schema),
-                ),
-                (
-                    // Union expected orderings
-                    vec![
-                        // [a ASC]
-                        vec![(col_a, options)],
-                        // [b ASC]
-                        vec![(col_b, options)],
-                    ],
-                    // Union
-                    vec![col_c],
-                ),
-            ),
-            //-----------TEST CASE 2----------//
+        let schema3 = append_fields(&schema, "2");
+        UnionEquivalenceTest::new(&schema)
+            // Children 1
+            .with_child_sort(vec![vec!["a", "b"]], &schema)
+            // Children 2
+            .with_child_sort(vec![vec!["a1", "b1", "c1"]], &schema2)
+            // Children 3
+            .with_child_sort(vec![vec!["a2", "b2", "c2"]], &schema3)
+            .with_expected_sort(vec![vec!["a", "b"]])
+            .run()
+    }
+
+    #[test]
+    fn test_union_equivalence_properties_multi_children_4() {
+        let schema = create_test_schema().unwrap();
+        let schema2 = append_fields(&schema, "1");
+        let schema3 = append_fields(&schema, "2");
+        UnionEquivalenceTest::new(&schema)
+            // Children 1
+            .with_child_sort(vec![vec!["a", "b"]], &schema)
+            // Children 2
+            .with_child_sort(vec![vec!["a1", "b1"]], &schema2)
+            // Children 3
+            .with_child_sort(vec![vec!["b2", "c2"]], &schema3)
+            .with_expected_sort(vec![])
+            .run()
+    }
+
+    #[test]
+    fn test_union_equivalence_properties_multi_children_5() {
+        let schema = create_test_schema().unwrap();
+        let schema2 = append_fields(&schema, "1");
+        UnionEquivalenceTest::new(&schema)
+            // Children 1
+            .with_child_sort(vec![vec!["a", "b"], vec!["c"]], &schema)
+            // Children 2
+            .with_child_sort(vec![vec!["a1", "b1"], vec!["c1"]], &schema2)
+            .with_expected_sort(vec![vec!["a", "b"], vec!["c"]])
+            .run()
+    }
+
+    #[test]
+    fn test_union_equivalence_properties_constants_1() {
+        let schema = create_test_schema().unwrap();
+        UnionEquivalenceTest::new(&schema)
+            .with_child_sort_and_const_exprs(
+                // First child: [a ASC], const [b, c]
+                vec![vec!["a"]],
+                vec!["b", "c"],
+                &schema,
+            )
+            .with_child_sort_and_const_exprs(
+                // Second child: [b ASC], const [a, c]
+                vec![vec!["b"]],
+                vec!["a", "c"],
+                &schema,
+            )
+            .with_expected_sort_and_const_exprs(
+                // Union expected orderings: [[a ASC], [b ASC]], const [c]
+                vec![vec!["a"], vec!["b"]],
+                vec!["c"],
+            )
+            .run()
+    }
+
+    #[test]
+    fn test_union_equivalence_properties_constants_2() {
+        let schema = create_test_schema().unwrap();
+        UnionEquivalenceTest::new(&schema)
             // Meet ordering between [a ASC], [a ASC, b ASC] should be [a ASC]
-            (
-                (
-                    // First child orderings
-                    vec![
-                        // [a ASC]
-                        vec![(col_a, options)],
-                    ],
-                    // No constant
-                    vec![],
-                    Arc::clone(&schema),
-                ),
-                (
-                    // Second child orderings
-                    vec![
-                        // [a ASC, b ASC]
-                        vec![(col_a, options), (col_b, options)],
-                    ],
-                    // No constant
-                    vec![],
-                    Arc::clone(&schema),
-                ),
-                (
-                    // Union orderings
-                    vec![
-                        // [a ASC]
-                        vec![(col_a, options)],
-                    ],
-                    // No constant
-                    vec![],
-                ),
-            ),
-            //-----------TEST CASE 3----------//
+            .with_child_sort_and_const_exprs(
+                // First child: [a ASC], const []
+                vec![vec!["a"]],
+                vec![],
+                &schema,
+            )
+            .with_child_sort_and_const_exprs(
+                // Second child: [a ASC, b ASC], const []
+                vec![vec!["a", "b"]],
+                vec![],
+                &schema,
+            )
+            .with_expected_sort_and_const_exprs(
+                // Union orderings: [a ASC], const []
+                vec![vec!["a"]],
+                vec![],
+            )
+            .run()
+    }
+
+    #[test]
+    fn test_union_equivalence_properties_constants_3() {
+        let schema = create_test_schema().unwrap();
+        UnionEquivalenceTest::new(&schema)
             // Meet ordering between [a ASC], [a DESC] should be []
-            (
-                (
-                    // First child orderings
-                    vec![
-                        // [a ASC]
-                        vec![(col_a, options)],
-                    ],
-                    // No constant
-                    vec![],
-                    Arc::clone(&schema),
-                ),
-                (
-                    // Second child orderings
-                    vec![
-                        // [a DESC]
-                        vec![(col_a, options_desc)],
-                    ],
-                    // No constant
-                    vec![],
-                    Arc::clone(&schema),
-                ),
-                (
-                    // Union doesn't have any ordering
-                    vec![],
-                    // No constant
-                    vec![],
-                ),
-            ),
-            //-----------TEST CASE 4----------//
-            // Meet ordering between [a ASC], [a1 ASC, b1 ASC] should be [a ASC]
-            // Where a, and a1 ath the same index for their corresponding schemas.
-            (
-                (
-                    // First child orderings
-                    vec![
-                        // [a ASC]
-                        vec![(col_a, options)],
-                    ],
-                    // No constant
-                    vec![],
-                    Arc::clone(&schema),
-                ),
-                (
-                    // Second child orderings
-                    vec![
-                        // [a1 ASC, b1 ASC]
-                        vec![(col_a1, options), (col_b1, options)],
-                    ],
-                    // No constant
-                    vec![],
-                    Arc::clone(&schema2),
-                ),
-                (
-                    // Union orderings
-                    vec![
-                        // [a ASC]
-                        vec![(col_a, options)],
-                    ],
-                    // No constant
-                    vec![],
-                ),
-            ),
-        ];
+            .with_child_sort_and_const_exprs(
+                // First child: [a ASC], const []
+                vec![vec!["a"]],
+                vec![],
+                &schema,
+            )
+            .with_child_sort_and_const_exprs(
+                // Second child orderings: [a DESC], const []
+                vec![vec!["a DESC"]],
+                vec![],
+                &schema,
+            )
+            .with_expected_sort_and_const_exprs(
+                // Union doesn't have any ordering or constant
+                vec![],
+                vec![],
+            )
+            .run()
+    }
 
-        for (
-            test_idx,
-            (
-                (first_child_orderings, first_child_constants, first_schema),
-                (second_child_orderings, second_child_constants, second_schema),
-                (union_orderings, union_constants),
-            ),
-        ) in test_cases.iter().enumerate()
-        {
-            let first_orderings = first_child_orderings
-                .iter()
-                .map(|ordering| convert_to_sort_exprs(ordering))
-                .collect::<Vec<_>>();
-            let first_constants = first_child_constants
-                .iter()
-                .map(|expr| ConstExpr::new(Arc::clone(expr)))
-                .collect::<Vec<_>>();
-            let mut lhs = EquivalenceProperties::new(Arc::clone(first_schema));
-            lhs = lhs.with_constants(first_constants);
-            lhs.add_new_orderings(first_orderings);
+    #[test]
+    fn test_union_equivalence_properties_constants_4() {
+        let schema = create_test_schema().unwrap();
+        let schema2 = append_fields(&schema, "1");
+        UnionEquivalenceTest::new(&schema)
+            .with_child_sort_and_const_exprs(
+                // First child orderings: [a ASC], const []
+                vec![vec!["a"]],
+                vec![],
+                &schema,
+            )
+            .with_child_sort_and_const_exprs(
+                // Second child orderings: [a1 ASC, b1 ASC], const []
+                vec![vec!["a1", "b1"]],
+                vec![],
+                &schema2,
+            )
+            .with_expected_sort_and_const_exprs(
+                // Union orderings:
+                // should be [a ASC]
+                //
+                // Where a, and a1 ath the same index for their corresponding
+                // schemas.
+                vec![vec!["a"]],
+                vec![],
+            )
+            .run()
+    }
 
-            let second_orderings = second_child_orderings
-                .iter()
-                .map(|ordering| convert_to_sort_exprs(ordering))
-                .collect::<Vec<_>>();
-            let second_constants = second_child_constants
-                .iter()
-                .map(|expr| ConstExpr::new(Arc::clone(expr)))
-                .collect::<Vec<_>>();
-            let mut rhs = EquivalenceProperties::new(Arc::clone(second_schema));
-            rhs = rhs.with_constants(second_constants);
-            rhs.add_new_orderings(second_orderings);
+    #[test]
+    #[ignore]
+    // ignored due to https://github.com/apache/datafusion/issues/12446
+    fn test_union_equivalence_properties_constants() {
+        let schema = create_test_schema().unwrap();
+        UnionEquivalenceTest::new(&schema)
+            .with_child_sort_and_const_exprs(
+                // First child orderings: [a ASC, c ASC], const [b]
+                vec![vec!["a", "c"]],
+                vec!["b"],
+                &schema,
+            )
+            .with_child_sort_and_const_exprs(
+                // Second child orderings: [b ASC, c ASC], const [a]
+                vec![vec!["b", "c"]],
+                vec!["a"],
+                &schema,
+            )
+            .with_expected_sort_and_const_exprs(
+                // Union orderings: [
+                //   [a ASC, b ASC, c ASC],
+                //   [b ASC, a ASC, c ASC]
+                // ], const []
+                vec![vec!["a", "b", "c"], vec!["b", "a", "c"]],
+                vec![],
+            )
+            .run()
+    }
+
+    #[test]
+    #[ignore]
+    // ignored due to https://github.com/apache/datafusion/issues/12446
+    fn test_union_equivalence_properties_constants_desc() {
+        let schema = create_test_schema().unwrap();
+        UnionEquivalenceTest::new(&schema)
+            .with_child_sort_and_const_exprs(
+                // NB `b DESC` in the second child
+                // First child orderings: [a ASC, c ASC], const [b]
+                vec![vec!["a", "c"]],
+                vec!["b"],
+                &schema,
+            )
+            .with_child_sort_and_const_exprs(
+                // Second child orderings: [b ASC, c ASC], const [a]
+                vec![vec!["b DESC", "c"]],
+                vec!["a"],
+                &schema,
+            )
+            .with_expected_sort_and_const_exprs(
+                // Union orderings: [
+                //   [a ASC, b ASC, c ASC],
+                //   [b ASC, a ASC, c ASC]
+                // ], const []
+                vec![vec!["a", "b DESC", "c"], vec!["b DESC", "a", "c"]],
+                vec![],
+            )
+            .run()
+    }
+
+    #[test]
+    #[ignore]
+    // ignored due to https://github.com/apache/datafusion/issues/12446
+    fn test_union_equivalence_properties_constants_middle() {
+        let schema = create_test_schema().unwrap();
+        UnionEquivalenceTest::new(&schema)
+            .with_child_sort_and_const_exprs(
+                // First child: [a ASC, b ASC, d ASC], const [c]
+                vec![vec!["a", "b", "d"]],
+                vec!["c"],
+                &schema,
+            )
+            .with_child_sort_and_const_exprs(
+                // Second child: [a ASC, c ASC, d ASC], const [b]
+                vec![vec!["a", "c", "d"]],
+                vec!["b"],
+                &schema,
+            )
+            .with_expected_sort_and_const_exprs(
+                // Union orderings:
+                // [a, b, d] (c constant)
+                // [a, c, d] (b constant)
+                vec![vec!["a", "c", "b", "d"], vec!["a", "b", "c", "d"]],
+                vec![],
+            )
+            .run()
+    }
+
+    #[test]
+    #[ignore]
+    // ignored due to https://github.com/apache/datafusion/issues/12446
+    fn test_union_equivalence_properties_constants_middle_desc() {
+        let schema = create_test_schema().unwrap();
+        UnionEquivalenceTest::new(&schema)
+            .with_child_sort_and_const_exprs(
+                // NB `b DESC` in the first child
+                //
+                // First child: [a ASC, b DESC, d ASC], const [c]
+                vec![vec!["a", "b DESC", "d"]],
+                vec!["c"],
+                &schema,
+            )
+            .with_child_sort_and_const_exprs(
+                // Second child: [a ASC, c ASC, d ASC], const [b]
+                vec![vec!["a", "c", "d"]],
+                vec!["b"],
+                &schema,
+            )
+            .with_expected_sort_and_const_exprs(
+                // Union orderings:
+                // [a, b, d] (c constant)
+                // [a, c, d] (b constant)
+                vec![vec!["a", "c", "b DESC", "d"], vec!["a", "b DESC", "c", "d"]],
+                vec![],
+            )
+            .run()
+    }
+
+    // TODO tests with multiple constants
 
-            let union_expected_orderings = union_orderings
+    #[derive(Debug)]
+    struct UnionEquivalenceTest {
+        /// The schema of the output of the Union
+        output_schema: SchemaRef,
+        /// The equivalence properties of each child to the union
+        child_properties: Vec<EquivalenceProperties>,
+        /// The expected output properties of the union. Must be set before
+        /// running `build`
+        expected_properties: Option<EquivalenceProperties>,
+    }
+
+    impl UnionEquivalenceTest {
+        fn new(output_schema: &SchemaRef) -> Self {
+            Self {
+                output_schema: Arc::clone(output_schema),
+                child_properties: vec![],
+                expected_properties: None,
+            }
+        }
+
+        /// Add a union input with the specified orderings
+        ///
+        /// See [`Self::make_props`] for the format of the strings in `orderings`
+        fn with_child_sort(
+            mut self,
+            orderings: Vec<Vec<&str>>,
+            schema: &SchemaRef,
+        ) -> Self {
+            let properties = self.make_props(orderings, vec![], schema);
+            self.child_properties.push(properties);
+            self
+        }
+
+        /// Add a union input with the specified orderings and constant
+        /// equivalences
+        ///
+        /// See [`Self::make_props`] for the format of the strings in
+        /// `orderings` and `constants`
+        fn with_child_sort_and_const_exprs(
+            mut self,
+            orderings: Vec<Vec<&str>>,
+            constants: Vec<&str>,
+            schema: &SchemaRef,
+        ) -> Self {
+            let properties = self.make_props(orderings, constants, schema);
+            self.child_properties.push(properties);
+            self
+        }
+
+        /// Set the expected output sort order for the union of the children
+        ///
+        /// See [`Self::make_props`] for the format of the strings in `orderings`
+        fn with_expected_sort(mut self, orderings: Vec<Vec<&str>>) -> Self {
+            let properties = self.make_props(orderings, vec![], &self.output_schema);
+            self.expected_properties = Some(properties);
+            self
+        }
+
+        /// Set the expected output sort order and constant expressions for the
+        /// union of the children
+        ///
+        /// See [`Self::make_props`] for the format of the strings in
+        /// `orderings` and `constants`.
+        fn with_expected_sort_and_const_exprs(
+            mut self,
+            orderings: Vec<Vec<&str>>,
+            constants: Vec<&str>,
+        ) -> Self {
+            let properties = self.make_props(orderings, constants, &self.output_schema);
+            self.expected_properties = Some(properties);
+            self
+        }
+
+        /// compute the union's output equivalence properties from the child
+        /// properties, and compare them to the expected properties
+        fn run(self) {
+            let Self {
+                output_schema,
+                child_properties,
+                expected_properties,
+            } = self;
+            let expected_properties =
+                expected_properties.expect("expected_properties not set");
+            let actual_properties =
+                calculate_union(child_properties, Arc::clone(&output_schema))
+                    .expect("failed to calculate union equivalence properties");
+            assert_eq_properties_same(
+                &actual_properties,
+                &expected_properties,
+                format!(
+                    "expected: {expected_properties:?}\nactual:  {actual_properties:?}"
+                ),
+            );
+        }
+
+        /// Make equivalence properties for the specified columns named in orderings and constants
+        ///
+        /// orderings: strings formatted like `"a"` or `"a DESC"`. See [`parse_sort_expr`]
+        /// constants: strings formatted like `"a"`.
+        fn make_props(
+            &self,
+            orderings: Vec<Vec<&str>>,
+            constants: Vec<&str>,
+            schema: &SchemaRef,
+        ) -> EquivalenceProperties {
+            let orderings = orderings
                 .iter()
-                .map(|ordering| convert_to_sort_exprs(ordering))
+                .map(|ordering| {
+                    ordering
+                        .iter()
+                        .map(|name| parse_sort_expr(name, schema))
+                        .collect::<Vec<_>>()
+                })
                 .collect::<Vec<_>>();
-            let union_constants = union_constants
+
+            let constants = constants
                 .iter()
-                .map(|expr| ConstExpr::new(Arc::clone(expr)))
+                .map(|col_name| ConstExpr::new(col(col_name, schema).unwrap()))
                 .collect::<Vec<_>>();
-            let mut union_expected_eq = EquivalenceProperties::new(Arc::clone(&schema));
-            union_expected_eq = union_expected_eq.with_constants(union_constants);
-            union_expected_eq.add_new_orderings(union_expected_orderings);
 
-            let actual_union_eq = calculate_union_binary(lhs, rhs)?;
-            let err_msg = format!(
-                "Error in test id: {:?}, test case: {:?}",
-                test_idx, test_cases[test_idx]
-            );
-            assert_eq_properties_same(&actual_union_eq, &union_expected_eq, err_msg);
+            EquivalenceProperties::new_with_orderings(Arc::clone(schema), &orderings)
+                .with_constants(constants)
         }
-        Ok(())
     }
 
     fn assert_eq_properties_same(
@@ -3091,21 +3140,63 @@ mod tests {
         // Check whether constants are same
         let lhs_constants = lhs.constants();
         let rhs_constants = rhs.constants();
-        assert_eq!(lhs_constants.len(), rhs_constants.len(), "{}", err_msg);
         for rhs_constant in rhs_constants {
             assert!(
                 const_exprs_contains(lhs_constants, rhs_constant.expr()),
-                "{}",
-                err_msg
+                "{err_msg}\nlhs: {lhs}\nrhs: {rhs}"
             );
         }
+        assert_eq!(
+            lhs_constants.len(),
+            rhs_constants.len(),
+            "{err_msg}\nlhs: {lhs}\nrhs: {rhs}"
+        );
 
         // Check whether orderings are same.
         let lhs_orderings = lhs.oeq_class();
         let rhs_orderings = &rhs.oeq_class.orderings;
-        assert_eq!(lhs_orderings.len(), rhs_orderings.len(), "{}", err_msg);
         for rhs_ordering in rhs_orderings {
-            assert!(lhs_orderings.contains(rhs_ordering), "{}", err_msg);
+            assert!(
+                lhs_orderings.contains(rhs_ordering),
+                "{err_msg}\nlhs: {lhs}\nrhs: {rhs}"
+            );
         }
+        assert_eq!(
+            lhs_orderings.len(),
+            rhs_orderings.len(),
+            "{err_msg}\nlhs: {lhs}\nrhs: {rhs}"
+        );
+    }
+
+    /// Converts a string to a physical sort expression
+    ///
+    /// # Example
+    /// * `"a"` -> (`"a"`, `SortOptions::default()`)
+    /// * `"a ASC"` -> (`"a"`, `SortOptions { descending: false, nulls_first: false }`)
+    fn parse_sort_expr(name: &str, schema: &SchemaRef) -> PhysicalSortExpr {
+        let mut parts = name.split_whitespace();
+        let name = parts.next().expect("empty sort expression");
+        let mut sort_expr = PhysicalSortExpr::new(
+            col(name, schema).expect("invalid column name"),
+            SortOptions::default(),
+        );
+
+        if let Some(options) = parts.next() {
+            sort_expr = match options {
+                "ASC" => sort_expr.asc(),
+                "DESC" => sort_expr.desc(),
+                _ => panic!(
+                    "unknown sort options. Expected 'ASC' or 'DESC', got {}",
+                    options
+                ),
+            }
+        }
+
+        assert!(
+            parts.next().is_none(),
+            "unexpected tokens in column name. Expected 'name' / 'name ASC' / 'name DESC' but got  '{name}'"
+        );
+
+        sort_expr
     }
 }
diff --git a/datafusion/physical-expr/src/window/nth_value.rs b/datafusion/physical-expr/src/window/nth_value.rs
index 87c74579c639..d94983c5adf7 100644
--- a/datafusion/physical-expr/src/window/nth_value.rs
+++ b/datafusion/physical-expr/src/window/nth_value.rs
@@ -30,7 +30,7 @@ use crate::PhysicalExpr;
 use arrow::array::{Array, ArrayRef};
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::Result;
-use datafusion_common::{exec_err, ScalarValue};
+use datafusion_common::ScalarValue;
 use datafusion_expr::window_state::WindowAggState;
 use datafusion_expr::PartitionEvaluator;
 
@@ -86,16 +86,13 @@ impl NthValue {
         n: i64,
         ignore_nulls: bool,
     ) -> Result<Self> {
-        match n {
-            0 => exec_err!("NTH_VALUE expects n to be non-zero"),
-            _ => Ok(Self {
-                name: name.into(),
-                expr,
-                data_type,
-                kind: NthValueKind::Nth(n),
-                ignore_nulls,
-            }),
-        }
+        Ok(Self {
+            name: name.into(),
+            expr,
+            data_type,
+            kind: NthValueKind::Nth(n),
+            ignore_nulls,
+        })
     }
 
     /// Get the NTH_VALUE kind
@@ -188,10 +185,7 @@ impl PartitionEvaluator for NthValueEvaluator {
                         // Negative index represents reverse direction.
                         (n_range >= reverse_index, true)
                     }
-                    Ordering::Equal => {
-                        // The case n = 0 is not valid for the NTH_VALUE function.
-                        unreachable!();
-                    }
+                    Ordering::Equal => (true, false),
                 }
             }
         };
@@ -298,10 +292,7 @@ impl PartitionEvaluator for NthValueEvaluator {
                                 )
                             }
                         }
-                        Ordering::Equal => {
-                            // The case n = 0 is not valid for the NTH_VALUE function.
-                            unreachable!();
-                        }
+                        Ordering::Equal => ScalarValue::try_from(arr.data_type()),
                     }
                 }
             }
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index 71f129be984d..a11b498b955c 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -23,14 +23,12 @@ use datafusion_common::scalar::ScalarValue;
 use datafusion_common::Result;
 use datafusion_physical_plan::aggregates::AggregateExec;
 use datafusion_physical_plan::projection::ProjectionExec;
-use datafusion_physical_plan::{expressions, ExecutionPlan, Statistics};
+use datafusion_physical_plan::{expressions, ExecutionPlan};
 
 use crate::PhysicalOptimizerRule;
-use datafusion_common::stats::Precision;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::utils::expr::COUNT_STAR_EXPANSION;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
-use datafusion_physical_plan::udaf::AggregateFunctionExpr;
+use datafusion_physical_plan::udaf::{AggregateFunctionExpr, StatisticsArgs};
 
 /// Optimizer that uses available statistics for aggregate functions
 #[derive(Default, Debug)]
@@ -57,14 +55,19 @@ impl PhysicalOptimizerRule for AggregateStatistics {
             let stats = partial_agg_exec.input().statistics()?;
             let mut projections = vec![];
             for expr in partial_agg_exec.aggr_expr() {
-                if let Some((non_null_rows, name)) =
-                    take_optimizable_column_and_table_count(expr, &stats)
+                let field = expr.field();
+                let args = expr.expressions();
+                let statistics_args = StatisticsArgs {
+                    statistics: &stats,
+                    return_type: field.data_type(),
+                    is_distinct: expr.is_distinct(),
+                    exprs: args.as_slice(),
+                };
+                if let Some((optimizable_statistic, name)) =
+                    take_optimizable_value_from_statistics(&statistics_args, expr)
                 {
-                    projections.push((expressions::lit(non_null_rows), name.to_owned()));
-                } else if let Some((min, name)) = take_optimizable_min(expr, &stats) {
-                    projections.push((expressions::lit(min), name.to_owned()));
-                } else if let Some((max, name)) = take_optimizable_max(expr, &stats) {
-                    projections.push((expressions::lit(max), name.to_owned()));
+                    projections
+                        .push((expressions::lit(optimizable_statistic), name.to_owned()));
                 } else {
                     // TODO: we need all aggr_expr to be resolved (cf TODO fullres)
                     break;
@@ -135,160 +138,11 @@ fn take_optimizable(node: &dyn ExecutionPlan) -> Option<Arc<dyn ExecutionPlan>>
     None
 }
 
-/// If this agg_expr is a count that can be exactly derived from the statistics, return it.
-fn take_optimizable_column_and_table_count(
-    agg_expr: &AggregateFunctionExpr,
-    stats: &Statistics,
-) -> Option<(ScalarValue, String)> {
-    let col_stats = &stats.column_statistics;
-    if is_non_distinct_count(agg_expr) {
-        if let Precision::Exact(num_rows) = stats.num_rows {
-            let exprs = agg_expr.expressions();
-            if exprs.len() == 1 {
-                // TODO optimize with exprs other than Column
-                if let Some(col_expr) =
-                    exprs[0].as_any().downcast_ref::<expressions::Column>()
-                {
-                    let current_val = &col_stats[col_expr.index()].null_count;
-                    if let &Precision::Exact(val) = current_val {
-                        return Some((
-                            ScalarValue::Int64(Some((num_rows - val) as i64)),
-                            agg_expr.name().to_string(),
-                        ));
-                    }
-                } else if let Some(lit_expr) =
-                    exprs[0].as_any().downcast_ref::<expressions::Literal>()
-                {
-                    if lit_expr.value() == &COUNT_STAR_EXPANSION {
-                        return Some((
-                            ScalarValue::Int64(Some(num_rows as i64)),
-                            agg_expr.name().to_string(),
-                        ));
-                    }
-                }
-            }
-        }
-    }
-    None
-}
-
-/// If this agg_expr is a min that is exactly defined in the statistics, return it.
-fn take_optimizable_min(
-    agg_expr: &AggregateFunctionExpr,
-    stats: &Statistics,
-) -> Option<(ScalarValue, String)> {
-    if let Precision::Exact(num_rows) = &stats.num_rows {
-        match *num_rows {
-            0 => {
-                // MIN/MAX with 0 rows is always null
-                if is_min(agg_expr) {
-                    if let Ok(min_data_type) =
-                        ScalarValue::try_from(agg_expr.field().data_type())
-                    {
-                        return Some((min_data_type, agg_expr.name().to_string()));
-                    }
-                }
-            }
-            value if value > 0 => {
-                let col_stats = &stats.column_statistics;
-                if is_min(agg_expr) {
-                    let exprs = agg_expr.expressions();
-                    if exprs.len() == 1 {
-                        // TODO optimize with exprs other than Column
-                        if let Some(col_expr) =
-                            exprs[0].as_any().downcast_ref::<expressions::Column>()
-                        {
-                            if let Precision::Exact(val) =
-                                &col_stats[col_expr.index()].min_value
-                            {
-                                if !val.is_null() {
-                                    return Some((
-                                        val.clone(),
-                                        agg_expr.name().to_string(),
-                                    ));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            _ => {}
-        }
-    }
-    None
-}
-
 /// If this agg_expr is a max that is exactly defined in the statistics, return it.
-fn take_optimizable_max(
+fn take_optimizable_value_from_statistics(
+    statistics_args: &StatisticsArgs,
     agg_expr: &AggregateFunctionExpr,
-    stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
-    if let Precision::Exact(num_rows) = &stats.num_rows {
-        match *num_rows {
-            0 => {
-                // MIN/MAX with 0 rows is always null
-                if is_max(agg_expr) {
-                    if let Ok(max_data_type) =
-                        ScalarValue::try_from(agg_expr.field().data_type())
-                    {
-                        return Some((max_data_type, agg_expr.name().to_string()));
-                    }
-                }
-            }
-            value if value > 0 => {
-                let col_stats = &stats.column_statistics;
-                if is_max(agg_expr) {
-                    let exprs = agg_expr.expressions();
-                    if exprs.len() == 1 {
-                        // TODO optimize with exprs other than Column
-                        if let Some(col_expr) =
-                            exprs[0].as_any().downcast_ref::<expressions::Column>()
-                        {
-                            if let Precision::Exact(val) =
-                                &col_stats[col_expr.index()].max_value
-                            {
-                                if !val.is_null() {
-                                    return Some((
-                                        val.clone(),
-                                        agg_expr.name().to_string(),
-                                    ));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            _ => {}
-        }
-    }
-    None
-}
-
-// TODO: Move this check into AggregateUDFImpl
-// https://github.com/apache/datafusion/issues/11153
-fn is_non_distinct_count(agg_expr: &AggregateFunctionExpr) -> bool {
-    if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
-        return true;
-    }
-    false
+    let value = agg_expr.fun().value_from_stats(statistics_args);
+    value.map(|val| (val, agg_expr.name().to_string()))
 }
-
-// TODO: Move this check into AggregateUDFImpl
-// https://github.com/apache/datafusion/issues/11153
-fn is_min(agg_expr: &AggregateFunctionExpr) -> bool {
-    if agg_expr.fun().name().to_lowercase() == "min" {
-        return true;
-    }
-    false
-}
-
-// TODO: Move this check into AggregateUDFImpl
-// https://github.com/apache/datafusion/issues/11153
-fn is_max(agg_expr: &AggregateFunctionExpr) -> bool {
-    if agg_expr.fun().name().to_lowercase() == "max" {
-        return true;
-    }
-    false
-}
-
-// See tests in datafusion/core/tests/physical_optimizer
diff --git a/datafusion/physical-plan/src/aggregates/group_values/column.rs b/datafusion/physical-plan/src/aggregates/group_values/column.rs
index 91d87302ce99..28f35b2bded2 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/column.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/column.rs
@@ -16,8 +16,7 @@
 // under the License.
 
 use crate::aggregates::group_values::group_column::{
-    ByteGroupValueBuilder, GroupColumn, NonNullPrimitiveGroupValueBuilder,
-    PrimitiveGroupValueBuilder,
+    ByteGroupValueBuilder, GroupColumn, PrimitiveGroupValueBuilder,
 };
 use crate::aggregates::group_values::GroupValues;
 use ahash::RandomState;
@@ -124,8 +123,7 @@ impl GroupValuesColumn {
     }
 }
 
-/// instantiates a [`PrimitiveGroupValueBuilder`] or
-/// [`NonNullPrimitiveGroupValueBuilder`]  and pushes it into $v
+/// instantiates a [`PrimitiveGroupValueBuilder`] and pushes it into $v
 ///
 /// Arguments:
 /// `$v`: the vector to push the new builder into
@@ -135,10 +133,10 @@ impl GroupValuesColumn {
 macro_rules! instantiate_primitive {
     ($v:expr, $nullable:expr, $t:ty) => {
         if $nullable {
-            let b = PrimitiveGroupValueBuilder::<$t>::new();
+            let b = PrimitiveGroupValueBuilder::<$t, true>::new();
             $v.push(Box::new(b) as _)
         } else {
-            let b = NonNullPrimitiveGroupValueBuilder::<$t>::new();
+            let b = PrimitiveGroupValueBuilder::<$t, false>::new();
             $v.push(Box::new(b) as _)
         }
     };
diff --git a/datafusion/physical-plan/src/aggregates/group_values/group_column.rs b/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
index 7409f5c214b9..15c93262968e 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
@@ -15,24 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::BooleanBufferBuilder;
 use arrow::array::BufferBuilder;
 use arrow::array::GenericBinaryArray;
 use arrow::array::GenericStringArray;
 use arrow::array::OffsetSizeTrait;
 use arrow::array::PrimitiveArray;
 use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray};
-use arrow::buffer::NullBuffer;
 use arrow::buffer::OffsetBuffer;
 use arrow::buffer::ScalarBuffer;
-use arrow::datatypes::ArrowNativeType;
 use arrow::datatypes::ByteArrayType;
 use arrow::datatypes::DataType;
 use arrow::datatypes::GenericBinaryType;
-use arrow::datatypes::GenericStringType;
 use datafusion_common::utils::proxy::VecAllocExt;
 
 use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
+use arrow_array::types::GenericStringType;
 use datafusion_physical_expr_common::binary_map::{OutputType, INITIAL_BUFFER_CAPACITY};
 use std::sync::Arc;
 use std::vec;
@@ -63,75 +60,25 @@ pub trait GroupColumn: Send + Sync {
     fn take_n(&mut self, n: usize) -> ArrayRef;
 }
 
-/// An implementation of [`GroupColumn`] for primitive values which are known to have no nulls
-#[derive(Debug)]
-pub struct NonNullPrimitiveGroupValueBuilder<T: ArrowPrimitiveType> {
-    group_values: Vec<T::Native>,
-}
-
-impl<T> NonNullPrimitiveGroupValueBuilder<T>
-where
-    T: ArrowPrimitiveType,
-{
-    pub fn new() -> Self {
-        Self {
-            group_values: vec![],
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> GroupColumn for NonNullPrimitiveGroupValueBuilder<T> {
-    fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool {
-        // know input has no nulls
-        self.group_values[lhs_row] == array.as_primitive::<T>().value(rhs_row)
-    }
-
-    fn append_val(&mut self, array: &ArrayRef, row: usize) {
-        // input can't possibly have nulls, so don't worry about them
-        self.group_values.push(array.as_primitive::<T>().value(row))
-    }
-
-    fn len(&self) -> usize {
-        self.group_values.len()
-    }
-
-    fn size(&self) -> usize {
-        self.group_values.allocated_size()
-    }
-
-    fn build(self: Box<Self>) -> ArrayRef {
-        let Self { group_values } = *self;
-
-        let nulls = None;
-
-        Arc::new(PrimitiveArray::<T>::new(
-            ScalarBuffer::from(group_values),
-            nulls,
-        ))
-    }
-
-    fn take_n(&mut self, n: usize) -> ArrayRef {
-        let first_n = self.group_values.drain(0..n).collect::<Vec<_>>();
-        let first_n_nulls = None;
-
-        Arc::new(PrimitiveArray::<T>::new(
-            ScalarBuffer::from(first_n),
-            first_n_nulls,
-        ))
-    }
-}
-
-/// An implementation of [`GroupColumn`] for primitive values which may have nulls
+/// An implementation of [`GroupColumn`] for primitive values
+///
+/// Optimized to skip null buffer construction if the input is known to be non nullable
+///
+/// # Template parameters
+///
+/// `T`: the native Rust type that stores the data
+/// `NULLABLE`: if the data can contain any nulls
 #[derive(Debug)]
-pub struct PrimitiveGroupValueBuilder<T: ArrowPrimitiveType> {
+pub struct PrimitiveGroupValueBuilder<T: ArrowPrimitiveType, const NULLABLE: bool> {
     group_values: Vec<T::Native>,
     nulls: MaybeNullBufferBuilder,
 }
 
-impl<T> PrimitiveGroupValueBuilder<T>
+impl<T, const NULLABLE: bool> PrimitiveGroupValueBuilder<T, NULLABLE>
 where
     T: ArrowPrimitiveType,
 {
+    /// Create a new `PrimitiveGroupValueBuilder`
     pub fn new() -> Self {
         Self {
             group_values: vec![],
@@ -140,18 +87,32 @@ where
     }
 }
 
-impl<T: ArrowPrimitiveType> GroupColumn for PrimitiveGroupValueBuilder<T> {
+impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
+    for PrimitiveGroupValueBuilder<T, NULLABLE>
+{
     fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool {
-        self.nulls.is_null(lhs_row) == array.is_null(rhs_row)
+        // Perf: skip null check (by short circuit) if input is not ullable
+        let null_match = if NULLABLE {
+            self.nulls.is_null(lhs_row) == array.is_null(rhs_row)
+        } else {
+            true
+        };
+
+        null_match
             && self.group_values[lhs_row] == array.as_primitive::<T>().value(rhs_row)
     }
 
     fn append_val(&mut self, array: &ArrayRef, row: usize) {
-        if array.is_null(row) {
-            self.nulls.append(true);
-            self.group_values.push(T::default_value());
+        // Perf: skip null check if input can't have nulls
+        if NULLABLE {
+            if array.is_null(row) {
+                self.nulls.append(true);
+                self.group_values.push(T::default_value());
+            } else {
+                self.nulls.append(false);
+                self.group_values.push(array.as_primitive::<T>().value(row));
+            }
         } else {
-            self.nulls.append(false);
             self.group_values.push(array.as_primitive::<T>().value(row));
         }
     }
@@ -171,6 +132,9 @@ impl<T: ArrowPrimitiveType> GroupColumn for PrimitiveGroupValueBuilder<T> {
         } = *self;
 
         let nulls = nulls.build();
+        if !NULLABLE {
+            assert!(nulls.is_none(), "unexpected nulls in non nullable input");
+        }
 
         Arc::new(PrimitiveArray::<T>::new(
             ScalarBuffer::from(group_values),
@@ -180,7 +144,8 @@ impl<T: ArrowPrimitiveType> GroupColumn for PrimitiveGroupValueBuilder<T> {
 
     fn take_n(&mut self, n: usize) -> ArrayRef {
         let first_n = self.group_values.drain(0..n).collect::<Vec<_>>();
-        let first_n_nulls = self.nulls.take_n(n);
+
+        let first_n_nulls = if NULLABLE { self.nulls.take_n(n) } else { None };
 
         Arc::new(PrimitiveArray::<T>::new(
             ScalarBuffer::from(first_n),
@@ -190,6 +155,12 @@ impl<T: ArrowPrimitiveType> GroupColumn for PrimitiveGroupValueBuilder<T> {
 }
 
 /// An implementation of [`GroupColumn`] for binary and utf8 types.
+///
+/// Stores a collection of binary or utf8 group values in a single buffer
+/// in a way that allows:
+///
+/// 1. Efficient comparison of incoming rows to existing rows
+/// 2. Efficient construction of the final output array
 pub struct ByteGroupValueBuilder<O>
 where
     O: OffsetSizeTrait,
@@ -201,8 +172,8 @@ where
     /// stored in the range `offsets[i]..offsets[i+1]` in `buffer`. Null values
     /// are stored as a zero length string.
     offsets: Vec<O>,
-    /// Null indexes in offsets, if `i` is in nulls, `offsets[i]` should be equals to `offsets[i+1]`
-    nulls: Vec<usize>,
+    /// Nulls
+    nulls: MaybeNullBufferBuilder,
 }
 
 impl<O> ByteGroupValueBuilder<O>
@@ -214,7 +185,7 @@ where
             output_type,
             buffer: BufferBuilder::new(INITIAL_BUFFER_CAPACITY),
             offsets: vec![O::default()],
-            nulls: vec![],
+            nulls: MaybeNullBufferBuilder::new(),
         }
     }
 
@@ -224,40 +195,33 @@ where
     {
         let arr = array.as_bytes::<B>();
         if arr.is_null(row) {
-            self.nulls.push(self.len());
+            self.nulls.append(true);
             // nulls need a zero length in the offset buffer
             let offset = self.buffer.len();
-
             self.offsets.push(O::usize_as(offset));
-            return;
+        } else {
+            self.nulls.append(false);
+            let value: &[u8] = arr.value(row).as_ref();
+            self.buffer.append_slice(value);
+            self.offsets.push(O::usize_as(self.buffer.len()));
         }
-
-        let value: &[u8] = arr.value(row).as_ref();
-        self.buffer.append_slice(value);
-        self.offsets.push(O::usize_as(self.buffer.len()));
     }
 
     fn equal_to_inner<B>(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> bool
     where
         B: ByteArrayType,
     {
-        // Handle nulls
-        let is_lhs_null = self.nulls.iter().any(|null_idx| *null_idx == lhs_row);
         let arr = array.as_bytes::<B>();
-        if is_lhs_null {
-            return arr.is_null(rhs_row);
-        } else if arr.is_null(rhs_row) {
-            return false;
-        }
+        self.nulls.is_null(lhs_row) == arr.is_null(rhs_row)
+            && self.value(lhs_row) == (arr.value(rhs_row).as_ref() as &[u8])
+    }
 
-        let arr = array.as_bytes::<B>();
-        let rhs_elem: &[u8] = arr.value(rhs_row).as_ref();
-        let rhs_elem_len = arr.value_length(rhs_row).as_usize();
-        debug_assert_eq!(rhs_elem_len, rhs_elem.len());
-        let l = self.offsets[lhs_row].as_usize();
-        let r = self.offsets[lhs_row + 1].as_usize();
-        let existing_elem = unsafe { self.buffer.as_slice().get_unchecked(l..r) };
-        rhs_elem == existing_elem
+    /// return the current value of the specified row irrespective of null
+    pub fn value(&self, row: usize) -> &[u8] {
+        let l = self.offsets[row].as_usize();
+        let r = self.offsets[row + 1].as_usize();
+        // Safety: the offsets are constructed correctly and never decrease
+        unsafe { self.buffer.as_slice().get_unchecked(l..r) }
     }
 }
 
@@ -325,18 +289,7 @@ where
             nulls,
         } = *self;
 
-        let null_buffer = if nulls.is_empty() {
-            None
-        } else {
-            // Only make a `NullBuffer` if there was a null value
-            let num_values = offsets.len() - 1;
-            let mut bool_builder = BooleanBufferBuilder::new(num_values);
-            bool_builder.append_n(num_values, true);
-            nulls.into_iter().for_each(|null_index| {
-                bool_builder.set_bit(null_index, false);
-            });
-            Some(NullBuffer::from(bool_builder.finish()))
-        };
+        let null_buffer = nulls.build();
 
         // SAFETY: the offsets were constructed correctly in `insert_if_new` --
         // monotonically increasing, overflows were checked.
@@ -353,9 +306,9 @@ where
                 // SAFETY:
                 // 1. the offsets were constructed safely
                 //
-                // 2. we asserted the input arrays were all the correct type and
-                // thus since all the values that went in were valid (e.g. utf8)
-                // so are all the values that come out
+                // 2. the input arrays were all the correct type and thus since
+                // all the values that went in were valid (e.g. utf8) so are all
+                // the values that come out
                 Arc::new(unsafe {
                     GenericStringArray::new_unchecked(offsets, values, null_buffer)
                 })
@@ -366,27 +319,7 @@ where
 
     fn take_n(&mut self, n: usize) -> ArrayRef {
         debug_assert!(self.len() >= n);
-
-        let null_buffer = if self.nulls.is_empty() {
-            None
-        } else {
-            // Only make a `NullBuffer` if there was a null value
-            let mut bool_builder = BooleanBufferBuilder::new(n);
-            bool_builder.append_n(n, true);
-
-            let mut new_nulls = vec![];
-            self.nulls.iter().for_each(|null_index| {
-                if *null_index < n {
-                    bool_builder.set_bit(*null_index, false);
-                } else {
-                    new_nulls.push(null_index - n);
-                }
-            });
-
-            self.nulls = new_nulls;
-            Some(NullBuffer::from(bool_builder.finish()))
-        };
-
+        let null_buffer = self.nulls.take_n(n);
         let first_remaining_offset = O::as_usize(self.offsets[n]);
 
         // Given offests like [0, 2, 4, 5] and n = 1, we expect to get
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 2bdaed479655..9466ff6dd459 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -26,6 +26,7 @@ use crate::aggregates::{
     topk_stream::GroupedTopKAggregateStream,
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+use crate::projection::get_field_metadata;
 use crate::windows::get_ordered_partition_by_indices;
 use crate::{
     DisplayFormatType, Distribution, ExecutionPlan, InputOrderMode,
@@ -795,14 +796,17 @@ fn create_schema(
 ) -> Result<Schema> {
     let mut fields = Vec::with_capacity(group_expr.len() + aggr_expr.len());
     for (index, (expr, name)) in group_expr.iter().enumerate() {
-        fields.push(Field::new(
-            name,
-            expr.data_type(input_schema)?,
-            // In cases where we have multiple grouping sets, we will use NULL expressions in
-            // order to align the grouping sets. So the field must be nullable even if the underlying
-            // schema field is not.
-            group_expr_nullable[index] || expr.nullable(input_schema)?,
-        ))
+        fields.push(
+            Field::new(
+                name,
+                expr.data_type(input_schema)?,
+                // In cases where we have multiple grouping sets, we will use NULL expressions in
+                // order to align the grouping sets. So the field must be nullable even if the underlying
+                // schema field is not.
+                group_expr_nullable[index] || expr.nullable(input_schema)?,
+            )
+            .with_metadata(get_field_metadata(expr, input_schema).unwrap_or_default()),
+        )
     }
 
     match mode {
@@ -823,7 +827,10 @@ fn create_schema(
         }
     }
 
-    Ok(Schema::new(fields))
+    Ok(Schema::new_with_metadata(
+        fields,
+        input_schema.metadata().clone(),
+    ))
 }
 
 fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef {
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index a043905765ec..998f6184f321 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -38,7 +38,7 @@ use crate::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::array::*;
 use arrow::datatypes::SchemaRef;
 use arrow_schema::SortOptions;
-use datafusion_common::{internal_datafusion_err, DataFusionError, Result};
+use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
@@ -1081,13 +1081,14 @@ impl GroupedHashAggregateStream {
 
     /// Transforms input batch to intermediate aggregate state, without grouping it
     fn transform_to_states(&self, batch: RecordBatch) -> Result<RecordBatch> {
-        let group_values = evaluate_group_by(&self.group_by, &batch)?;
+        let mut group_values = evaluate_group_by(&self.group_by, &batch)?;
         let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;
         let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
 
-        let mut output = group_values.first().cloned().ok_or_else(|| {
-            internal_datafusion_err!("group_values expected to have at least one element")
-        })?;
+        if group_values.len() != 1 {
+            return internal_err!("group_values expected to have single element");
+        }
+        let mut output = group_values.swap_remove(0);
 
         let iter = self
             .accumulators
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 7cbfd49afb86..845a74eaea48 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -82,6 +82,7 @@ pub mod windows;
 pub mod work_table;
 
 pub mod udaf {
+    pub use datafusion_expr::StatisticsArgs;
     pub use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 }
 
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index f1b9cdaf728f..4c889d1fc88c 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -237,7 +237,7 @@ impl ExecutionPlan for ProjectionExec {
 
 /// If e is a direct column reference, returns the field level
 /// metadata for that field, if any. Otherwise returns None
-fn get_field_metadata(
+pub(crate) fn get_field_metadata(
     e: &Arc<dyn PhysicalExpr>,
     input_schema: &Schema,
 ) -> Option<HashMap<String, String>> {
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 9510baab51fb..4a4c940b22e2 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -257,17 +257,11 @@ impl ExecutionPlan for BoundedWindowAggExec {
     fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         let partition_bys = self.window_expr()[0].partition_by();
         let order_keys = self.window_expr()[0].order_by();
-        if self.input_order_mode != InputOrderMode::Sorted
-            || self.ordered_partition_by_indices.len() >= partition_bys.len()
-        {
-            let partition_bys = self
-                .ordered_partition_by_indices
-                .iter()
-                .map(|idx| &partition_bys[*idx]);
-            vec![calc_requirements(partition_bys, order_keys)]
-        } else {
-            vec![calc_requirements(partition_bys, order_keys)]
-        }
+        let partition_bys = self
+            .ordered_partition_by_indices
+            .iter()
+            .map(|idx| &partition_bys[*idx]);
+        vec![calc_requirements(partition_bys, order_keys)]
     }
 
     fn required_input_distribution(&self) -> Vec<Distribution> {
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 7c86b4e9cb55..463e89a1407e 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -182,20 +182,26 @@ fn get_scalar_value_from_args(
 }
 
 fn get_signed_integer(value: ScalarValue) -> Result<i64> {
+    if value.is_null() {
+        return Ok(0);
+    }
+
     if !value.data_type().is_integer() {
-        return Err(DataFusionError::Execution(
-            "Expected an integer value".to_string(),
-        ));
+        return exec_err!("Expected an integer value");
     }
+
     value.cast_to(&DataType::Int64)?.try_into()
 }
 
 fn get_unsigned_integer(value: ScalarValue) -> Result<u64> {
+    if value.is_null() {
+        return Ok(0);
+    }
+
     if !value.data_type().is_integer() {
-        return Err(DataFusionError::Execution(
-            "Expected an integer value".to_string(),
-        ));
+        return exec_err!("Expected an integer value");
     }
+
     value.cast_to(&DataType::UInt64)?.try_into()
 }
 
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 132a0a69ee2c..24cc19db515a 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -1061,6 +1061,10 @@ impl UserDefinedLogicalNodeCore for TopKPlanNode {
             expr: exprs.swap_remove(0),
         })
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        false // Disallow limit push-down by default
+    }
 }
 
 #[derive(Debug)]
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index ddafc4e3a03a..20a772cdd088 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -432,6 +432,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 qualifier: None,
                 options: WildcardOptions::default(),
             }),
+            FunctionArg::Unnamed(FunctionArgExpr::QualifiedWildcard(object_name)) => {
+                let qualifier = self.object_name_to_table_reference(object_name)?;
+                // sanity check on qualifier with schema
+                let qualified_indices = schema.fields_indices_with_qualified(&qualifier);
+                if qualified_indices.is_empty() {
+                    return plan_err!("Invalid qualifier {qualifier}");
+                }
+                Ok(Expr::Wildcard {
+                    qualifier: Some(qualifier),
+                    options: WildcardOptions::default(),
+                })
+            }
             _ => not_impl_err!("Unsupported qualified wildcard argument: {sql:?}"),
         }
     }
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 2df8d89c59bc..6d130647a49f 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -181,7 +181,7 @@ pub(crate) type LexOrdering = Vec<OrderByExpr>;
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct CreateExternalTable {
     /// Table name
-    pub name: String,
+    pub name: ObjectName,
     /// Optional schema
     pub columns: Vec<ColumnDef>,
     /// File type (Parquet, NDJSON, CSV, etc)
@@ -813,7 +813,7 @@ impl<'a> DFParser<'a> {
         }
 
         let create = CreateExternalTable {
-            name: table_name.to_string(),
+            name: table_name,
             columns,
             file_type: builder.file_type.unwrap(),
             location: builder.location.unwrap(),
@@ -915,8 +915,9 @@ mod tests {
         // positive case
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
         let display = None;
+        let name = ObjectName(vec![Ident::from("t")]);
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -932,7 +933,7 @@ mod tests {
         // positive case: leading space
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'     ";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(None))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -949,7 +950,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'      ;";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(None))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -966,7 +967,7 @@ mod tests {
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -986,7 +987,7 @@ mod tests {
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -1013,7 +1014,7 @@ mod tests {
          ];
         for (sql, compression) in sqls {
             let expected = Statement::CreateExternalTable(CreateExternalTable {
-                name: "t".into(),
+                name: name.clone(),
                 columns: vec![make_column_def("c1", DataType::Int(display))],
                 file_type: "CSV".to_string(),
                 location: "foo.csv".into(),
@@ -1033,7 +1034,7 @@ mod tests {
         // positive case: it is ok for parquet files not to have columns specified
         let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
             location: "foo.parquet".into(),
@@ -1049,7 +1050,7 @@ mod tests {
         // positive case: it is ok for parquet files to be other than upper case
         let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
             location: "foo.parquet".into(),
@@ -1065,7 +1066,7 @@ mod tests {
         // positive case: it is ok for avro files not to have columns specified
         let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "AVRO".to_string(),
             location: "foo.avro".into(),
@@ -1082,7 +1083,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
             location: "foo.parquet".into(),
@@ -1099,7 +1100,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![
                 make_column_def("c1", DataType::Int(None)),
                 make_column_def("p1", DataType::Int(None)),
@@ -1132,7 +1133,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "X".to_string(),
             location: "blahblah".into(),
@@ -1149,7 +1150,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "X".to_string(),
             location: "blahblah".into(),
@@ -1188,7 +1189,7 @@ mod tests {
         ];
         for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
             let expected = Statement::CreateExternalTable(CreateExternalTable {
-                name: "t".into(),
+                name: name.clone(),
                 columns: vec![make_column_def("c1", DataType::Int(None))],
                 file_type: "CSV".to_string(),
                 location: "foo.csv".into(),
@@ -1214,7 +1215,7 @@ mod tests {
         let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![
                 make_column_def("c1", DataType::Int(display)),
                 make_column_def("c2", DataType::Int(display)),
@@ -1253,7 +1254,7 @@ mod tests {
         let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![
                 make_column_def("c1", DataType::Int(display)),
                 make_column_def("c2", DataType::Int(display)),
@@ -1297,7 +1298,7 @@ mod tests {
                      'TRUNCATE' 'NO',
                      'format.has_header' 'true')";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![
                 make_column_def("c1", DataType::Int(None)),
                 make_column_def("c2", DataType::Float(None)),
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 5cbe1d7c014a..e8defedddf2c 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -197,9 +197,9 @@ impl PlannerContext {
 
     /// extends the FROM schema, returning the existing one, if any
     pub fn extend_outer_from_schema(&mut self, schema: &DFSchemaRef) -> Result<()> {
-        self.outer_from_schema = match self.outer_from_schema.as_ref() {
-            Some(from_schema) => Some(Arc::new(from_schema.join(schema)?)),
-            None => Some(Arc::clone(schema)),
+        match self.outer_from_schema.as_mut() {
+            Some(from_schema) => Arc::make_mut(from_schema).merge(schema),
+            None => self.outer_from_schema = Some(Arc::clone(schema)),
         };
         Ok(())
     }
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 895285c59737..656d72d07ba2 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1239,8 +1239,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let ordered_exprs =
             self.build_order_by(order_exprs, &df_schema, &mut planner_context)?;
 
-        // External tables do not support schemas at the moment, so the name is just a table name
-        let name = TableReference::bare(name);
+        let name = self.object_name_to_table_reference(name)?;
         let constraints =
             Constraints::new_from_table_constraints(&all_constraints, &df_schema)?;
         Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 5c9655a55606..44b591fedef8 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1913,6 +1913,13 @@ fn create_external_table_with_pk() {
     quick_test(sql, expected);
 }
 
+#[test]
+fn create_external_table_wih_schema() {
+    let sql = "CREATE EXTERNAL TABLE staging.foo STORED AS CSV LOCATION 'foo.csv'";
+    let expected = "CreateExternalTable: Partial { schema: \"staging\", table: \"foo\" }";
+    quick_test(sql, expected);
+}
+
 #[test]
 fn create_schema_with_quoted_name() {
     let sql = "CREATE SCHEMA \"quoted_schema_name\"";
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 46327534e7de..a78ade81eeba 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1124,6 +1124,14 @@ SELECT COUNT(*) FROM aggregate_test_100
 ----
 100
 
+query I
+SELECT COUNT(aggregate_test_100.*) FROM aggregate_test_100
+----
+100
+
+query error Error during planning: Invalid qualifier foo
+SELECT COUNT(foo.*) FROM aggregate_test_100
+
 # csv_query_count_literal
 query I
 SELECT COUNT(2) FROM aggregate_test_100
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index 12b097c3d5d1..9ac2ecdce7cc 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -275,3 +275,15 @@ DROP TABLE t;
 # query should fail with bad column
 statement error DataFusion error: Error during planning: Column foo is not in schema
 CREATE EXTERNAL TABLE t STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet' WITH ORDER (foo);
+
+# Create external table with qualified name should belong to the schema
+statement ok
+CREATE SCHEMA staging;
+
+statement ok
+CREATE EXTERNAL TABLE staging.foo STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet';
+
+# Create external table with qualified name, but no schema should error
+statement error DataFusion error: Error during planning: failed to resolve schema: release
+CREATE EXTERNAL TABLE release.bar STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet';
+
diff --git a/datafusion/sqllogictest/test_files/dynamic_file.slt b/datafusion/sqllogictest/test_files/dynamic_file.slt
index e177fd3de243..69f9a43ad407 100644
--- a/datafusion/sqllogictest/test_files/dynamic_file.slt
+++ b/datafusion/sqllogictest/test_files/dynamic_file.slt
@@ -25,9 +25,170 @@ SELECT * FROM '../core/tests/data/partitioned_table_arrow/part=123' ORDER BY f0;
 1 foo true
 2 bar false
 
-# dynamic file query doesn't support partitioned table
-statement error DataFusion error: Error during planning: table 'datafusion.public.../core/tests/data/partitioned_table_arrow' not found
-SELECT * FROM '../core/tests/data/partitioned_table_arrow' ORDER BY f0;
+# Read partitioned file
+statement ok
+CREATE TABLE src_table_1 (
+  int_col INT,
+  string_col TEXT,
+  bigint_col BIGINT,
+  partition_col INT
+) AS VALUES
+(1, 'aaa', 100, 1),
+(2, 'bbb', 200, 1),
+(3, 'ccc', 300, 1),
+(4, 'ddd', 400, 1);
+
+statement ok
+CREATE TABLE src_table_2 (
+  int_col INT,
+  string_col TEXT,
+  bigint_col BIGINT,
+  partition_col INT
+) AS VALUES
+(5, 'eee', 500, 2),
+(6, 'fff', 600, 2),
+(7, 'ggg', 700, 2),
+(8, 'hhh', 800, 2);
+
+# Read partitioned csv file
+
+query I
+COPY  src_table_1 TO 'test_files/scratch/dynamic_file/csv_partitions'
+STORED AS CSV
+PARTITIONED BY (partition_col);
+----
+4
+
+query I
+COPY  src_table_2 TO 'test_files/scratch/dynamic_file/csv_partitions'
+STORED AS CSV
+PARTITIONED BY (partition_col);
+----
+4
+
+query ITIT rowsort
+SELECT int_col, string_col, bigint_col, partition_col FROM 'test_files/scratch/dynamic_file/csv_partitions';
+----
+1 aaa 100 1
+2 bbb 200 1
+3 ccc 300 1
+4 ddd 400 1
+5 eee 500 2
+6 fff 600 2
+7 ggg 700 2
+8 hhh 800 2
+
+# Read partitioned json file
+
+query I
+COPY  src_table_1 TO 'test_files/scratch/dynamic_file/json_partitions'
+STORED AS JSON
+PARTITIONED BY (partition_col);
+----
+4
+
+query I
+COPY  src_table_2 TO 'test_files/scratch/dynamic_file/json_partitions'
+STORED AS JSON
+PARTITIONED BY (partition_col);
+----
+4
+
+query ITIT rowsort
+SELECT int_col, string_col, bigint_col, partition_col FROM 'test_files/scratch/dynamic_file/json_partitions';
+----
+1 aaa 100 1
+2 bbb 200 1
+3 ccc 300 1
+4 ddd 400 1
+5 eee 500 2
+6 fff 600 2
+7 ggg 700 2
+8 hhh 800 2
+
+# Read partitioned arrow file
+
+query I
+COPY  src_table_1 TO 'test_files/scratch/dynamic_file/arrow_partitions'
+STORED AS ARROW
+PARTITIONED BY (partition_col);
+----
+4
+
+query I
+COPY  src_table_2 TO 'test_files/scratch/dynamic_file/arrow_partitions'
+STORED AS ARROW
+PARTITIONED BY (partition_col);
+----
+4
+
+query ITIT rowsort
+SELECT int_col, string_col, bigint_col, partition_col FROM 'test_files/scratch/dynamic_file/arrow_partitions';
+----
+1 aaa 100 1
+2 bbb 200 1
+3 ccc 300 1
+4 ddd 400 1
+5 eee 500 2
+6 fff 600 2
+7 ggg 700 2
+8 hhh 800 2
+
+# Read partitioned parquet file
+
+query I
+COPY  src_table_1 TO 'test_files/scratch/dynamic_file/parquet_partitions'
+STORED AS PARQUET
+PARTITIONED BY (partition_col);
+----
+4
+
+query I
+COPY  src_table_2 TO 'test_files/scratch/dynamic_file/parquet_partitions'
+STORED AS PARQUET
+PARTITIONED BY (partition_col);
+----
+4
+
+query ITIT rowsort
+select * from 'test_files/scratch/dynamic_file/parquet_partitions';
+----
+1 aaa 100 1
+2 bbb 200 1
+3 ccc 300 1
+4 ddd 400 1
+5 eee 500 2
+6 fff 600 2
+7 ggg 700 2
+8 hhh 800 2
+
+# Read partitioned parquet file with multiple partition columns
+
+query I
+COPY  src_table_1 TO 'test_files/scratch/dynamic_file/nested_partition'
+STORED AS PARQUET
+PARTITIONED BY (partition_col, string_col);
+----
+4
+
+query I
+COPY  src_table_2 TO 'test_files/scratch/dynamic_file/nested_partition'
+STORED AS PARQUET
+PARTITIONED BY (partition_col, string_col);
+----
+4
+
+query IITT rowsort
+select * from 'test_files/scratch/dynamic_file/nested_partition';
+----
+1 100 1 aaa
+2 200 1 bbb
+3 300 1 ccc
+4 400 1 ddd
+5 500 2 eee
+6 600 2 fff
+7 700 2 ggg
+8 800 2 hhh
 
 # read avro file
 query IT
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 8d801b92c393..519fbb887c7e 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -1215,14 +1215,14 @@ statement ok
 create table t1(v1 int) as values(100);
 
 ## Query with Ambiguous column reference
-query error DataFusion error: Schema error: Ambiguous reference to unqualified field v1
+query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1
 select count(*)
 from t1
 right outer join t1
 on t1.v1 > 0;
 
-query error DataFusion error: Schema error: Ambiguous reference to unqualified field v1
+query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1
 select t1.v1 from t1 join t1 using(v1) cross join (select struct('foo' as v1) as t1);
 
 statement ok
-drop table t1;
\ No newline at end of file
+drop table t1;
diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt
index 3b2b219244f5..f38281abc5ab 100644
--- a/datafusion/sqllogictest/test_files/metadata.slt
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -58,5 +58,43 @@ WHERE "data"."id" = "samples"."id";
 1
 3
 
+
+
+# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687
+query I
+select count(distinct name) from table_with_metadata;
+----
+2
+
+# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687
+query I
+select approx_median(distinct id) from table_with_metadata;
+----
+2
+
+# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687
+statement ok
+select array_agg(distinct id) from table_with_metadata;
+
+query I
+select distinct id from table_with_metadata order by id;
+----
+1
+3
+NULL
+
+query I
+select count(id) from table_with_metadata;
+----
+2
+
+query I
+select count(id) cnt from table_with_metadata group by name order by cnt;
+----
+0
+1
+1
+
+
 statement ok
 drop table table_with_metadata;
diff --git a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
index ea3c9b8eb6ca..c181f613ee9a 100644
--- a/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
+++ b/datafusion/sqllogictest/test_files/string/dictionary_utf8.slt
@@ -53,36 +53,6 @@ Xiangpeng datafusion数据融合 false true false true
 Raphael datafusionДатаФусион false false false false
 NULL NULL NULL NULL NULL NULL
 
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query BBBB
-SELECT
-  REGEXP_LIKE(ascii_1, 'an'),
-  REGEXP_LIKE(unicode_1, 'таФ'),
-  REGEXP_LIKE(ascii_1, NULL),
-  REGEXP_LIKE(unicode_1, NULL)
-FROM test_basic_operator;
-----
-false false NULL NULL
-true false NULL NULL
-false true NULL NULL
-NULL NULL NULL NULL
-
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query ????
-SELECT
-  REGEXP_MATCH(ascii_1, 'an'),
-  REGEXP_MATCH(unicode_1, 'таФ'),
-  REGEXP_MATCH(ascii_1, NULL),
-  REGEXP_MATCH(unicode_1, NULL)
-FROM test_basic_operator;
-----
-NULL NULL NULL NULL
-[an] NULL NULL NULL
-NULL [таФ] NULL NULL
-NULL NULL NULL NULL
-
 #
 # common test for string-like functions and operators
 #
diff --git a/datafusion/sqllogictest/test_files/string/string.slt b/datafusion/sqllogictest/test_files/string/string.slt
index 6b89147c5c4f..f4e83966f78f 100644
--- a/datafusion/sqllogictest/test_files/string/string.slt
+++ b/datafusion/sqllogictest/test_files/string/string.slt
@@ -63,36 +63,6 @@ Xiangpeng datafusion数据融合 false true false true
 Raphael datafusionДатаФусион false false false false
 NULL NULL NULL NULL NULL NULL
 
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query BBBB
-SELECT
-  REGEXP_LIKE(ascii_1, 'an'),
-  REGEXP_LIKE(unicode_1, 'таФ'),
-  REGEXP_LIKE(ascii_1, NULL),
-  REGEXP_LIKE(unicode_1, NULL)
-FROM test_basic_operator;
-----
-false false NULL NULL
-true false NULL NULL
-false true NULL NULL
-NULL NULL NULL NULL
-
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query ????
-SELECT
-  REGEXP_MATCH(ascii_1, 'an'),
-  REGEXP_MATCH(unicode_1, 'таФ'),
-  REGEXP_MATCH(ascii_1, NULL),
-  REGEXP_MATCH(unicode_1, NULL)
-FROM test_basic_operator;
-----
-NULL NULL NULL NULL
-[an] NULL NULL NULL
-NULL [таФ] NULL NULL
-NULL NULL NULL NULL
-
 # TODO: move it back to `string_query.slt.part` after fixing the issue
 # see detail: https://github.com/apache/datafusion/issues/12670
 query IIIIII
diff --git a/datafusion/sqllogictest/test_files/string/string_query.slt.part b/datafusion/sqllogictest/test_files/string/string_query.slt.part
index 0af0a6a642b2..3ba2b31bbab2 100644
--- a/datafusion/sqllogictest/test_files/string/string_query.slt.part
+++ b/datafusion/sqllogictest/test_files/string/string_query.slt.part
@@ -856,39 +856,47 @@ NULL NULL
 # Test REGEXP_LIKE
 # --------------------------------------
 
-# TODO: LargeString does not support REGEXP_LIKE. Enable this after fixing the issue
-# see issue: https://github.com/apache/datafusion/issues/12664
-#query BBBB
-#SELECT
-#  REGEXP_LIKE(ascii_1, 'an'),
-#  REGEXP_LIKE(unicode_1, 'таФ'),
-#  REGEXP_LIKE(ascii_1, NULL),
-#  REGEXP_LIKE(unicode_1, NULL)
-#FROM test_basic_operator;
-#----
-#false false NULL NULL
-#true false NULL NULL
-#false true NULL NULL
-#NULL NULL NULL NULL
+query BBBBBBBB
+SELECT
+  -- without flags
+  REGEXP_LIKE(ascii_1, 'an'),
+  REGEXP_LIKE(unicode_1, 'таФ'),
+  REGEXP_LIKE(ascii_1, NULL),
+  REGEXP_LIKE(unicode_1, NULL),
+  -- with flags
+  REGEXP_LIKE(ascii_1, 'AN', 'i'),
+  REGEXP_LIKE(unicode_1, 'ТаФ', 'i'),
+  REGEXP_LIKE(ascii_1, NULL, 'i'),
+  REGEXP_LIKE(unicode_1, NULL, 'i')
+  FROM test_basic_operator;
+----
+false false NULL NULL true false NULL NULL
+true false NULL NULL true false NULL NULL
+false true NULL NULL false true NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
 # Test REGEXP_MATCH
 # --------------------------------------
 
-# TODO: LargeString does not support REGEXP_MATCH. Enable this after fixing the issue
-# see issue: https://github.com/apache/datafusion/issues/12664
-#query ????
-#SELECT
-#  REGEXP_MATCH(ascii_1, 'an'),
-#  REGEXP_MATCH(unicode_1, 'таФ'),
-#  REGEXP_MATCH(ascii_1, NULL),
-#  REGEXP_MATCH(unicode_1, NULL)
-#FROM test_basic_operator;
-#----
-#NULL NULL NULL NULL
-#[an] NULL NULL NULL
-#NULL [таФ] NULL NULL
-#NULL NULL NULL NULL
+query ????????
+SELECT
+  -- without flags
+  REGEXP_MATCH(ascii_1, 'an'),
+  REGEXP_MATCH(unicode_1, 'ТаФ'),
+  REGEXP_MATCH(ascii_1, NULL),
+  REGEXP_MATCH(unicode_1, NULL),
+  -- with flags
+  REGEXP_MATCH(ascii_1, 'AN', 'i'),
+  REGEXP_MATCH(unicode_1, 'таФ', 'i'),
+  REGEXP_MATCH(ascii_1, NULL, 'i'),
+  REGEXP_MATCH(unicode_1, NULL, 'i')
+FROM test_basic_operator;
+----
+NULL NULL NULL NULL [An] NULL NULL NULL
+[an] NULL NULL NULL [an] NULL NULL NULL
+NULL NULL NULL NULL NULL [таФ] NULL NULL
+NULL NULL NULL NULL NULL NULL NULL NULL
 
 # --------------------------------------
 # Test REPEAT
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt
index fb82726e3a9d..4e7857ad804b 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -50,36 +50,6 @@ false false
 false true
 NULL NULL
 
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query BBBB
-SELECT
-  REGEXP_LIKE(ascii_1, 'an'),
-  REGEXP_LIKE(unicode_1, 'таФ'),
-  REGEXP_LIKE(ascii_1, NULL),
-  REGEXP_LIKE(unicode_1, NULL)
-FROM test_basic_operator;
-----
-false false NULL NULL
-true false NULL NULL
-false true NULL NULL
-NULL NULL NULL NULL
-
-# TODO: move it back to `string_query.slt.part` after fixing the issue
-# see detail: https://github.com/apache/datafusion/issues/12664
-query ????
-SELECT
-  REGEXP_MATCH(ascii_1, 'an'),
-  REGEXP_MATCH(unicode_1, 'таФ'),
-  REGEXP_MATCH(ascii_1, NULL),
-  REGEXP_MATCH(unicode_1, NULL)
-FROM test_basic_operator;
-----
-NULL NULL NULL NULL
-[an] NULL NULL NULL
-NULL [таФ] NULL NULL
-NULL NULL NULL NULL
-
 # TODO: move it back to `string_query.slt.part` after fixing the issue
 # see detail: https://github.com/apache/datafusion/issues/12670
 query IIIIII
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 7fee84f9bcd9..cb6c6a5ace76 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -4894,3 +4894,42 @@ NULL a4 5
 
 statement ok
 drop table t
+
+## test handle NULL and 0 value of nth_value
+statement ok
+CREATE TABLE t(v1 int, v2 int);
+
+statement ok
+INSERT INTO t VALUES (1,1), (1,2),(1,3),(2,1),(2,2);
+
+query II
+SELECT v1, NTH_VALUE(v2, null) OVER (PARTITION BY v1 ORDER BY v2) FROM t;
+----
+1 NULL
+1 NULL
+1 NULL
+2 NULL
+2 NULL
+
+query II
+SELECT v1, NTH_VALUE(v2, v2*null) OVER (PARTITION BY v1 ORDER BY v2) FROM t;
+----
+1 NULL
+1 NULL
+1 NULL
+2 NULL
+2 NULL
+
+query II
+SELECT v1, NTH_VALUE(v2, 0) OVER (PARTITION BY v1 ORDER BY v2) FROM t;
+----
+1 NULL
+1 NULL
+1 NULL
+2 NULL
+2 NULL
+
+statement ok
+DROP TABLE t;
+
+## end test handle NULL and 0 of NTH_VALUE
\ No newline at end of file
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index f7686bec5435..3b7d0fd29610 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -149,6 +149,10 @@ impl UserDefinedLogicalNode for MockUserDefinedLogicalPlan {
     fn dyn_ord(&self, _: &dyn UserDefinedLogicalNode) -> Option<Ordering> {
         unimplemented!()
     }
+
+    fn supports_limit_pushdown(&self) -> bool {
+        false // Disallow limit push-down by default
+    }
 }
 
 impl MockUserDefinedLogicalPlan {
diff --git a/docs/source/user-guide/sql/data_types.md b/docs/source/user-guide/sql/data_types.md
index 0e974550a84d..18c95cdea70e 100644
--- a/docs/source/user-guide/sql/data_types.md
+++ b/docs/source/user-guide/sql/data_types.md
@@ -97,7 +97,7 @@ select arrow_cast(now(), 'Timestamp(Second, None)');
 | `BYTEA`      | `Binary`       |
 
 You can create binary literals using a hex string literal such as
-`X'1234` to create a `Binary` value of two bytes, `0x12` and `0x34`.
+`X'1234'` to create a `Binary` value of two bytes, `0x12` and `0x34`.
 
 ## Unsupported SQL Types