diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index edc45c2c23b7..b69dcac78424 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -20,7 +20,7 @@ use crate::{Array, ArrayRef, MapArray, StructArray}; use arrow_buffer::Buffer; use arrow_buffer::{NullBuffer, NullBufferBuilder}; use arrow_data::ArrayData; -use arrow_schema::{ArrowError, DataType, Field}; +use arrow_schema::{ArrowError, DataType, Field, FieldRef}; use std::any::Any; use std::sync::Arc; @@ -61,6 +61,7 @@ pub struct MapBuilder { field_names: MapFieldNames, key_builder: K, value_builder: V, + value_field: Option, } /// The [`Field`] names for a [`MapArray`] @@ -106,6 +107,20 @@ impl MapBuilder { field_names: field_names.unwrap_or_default(), key_builder, value_builder, + value_field: None, + } + } + + /// Override the field passed to [`MapBuilder::new`] + /// + /// By default a nullable field is created with the name `values` + /// + /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the + /// field's data type does not match that of `V` + pub fn with_values_field(self, field: impl Into) -> Self { + Self { + value_field: Some(field.into()), + ..self } } @@ -184,11 +199,14 @@ impl MapBuilder { keys_arr.data_type().clone(), false, // always non-nullable )); - let values_field = Arc::new(Field::new( - self.field_names.value.as_str(), - values_arr.data_type().clone(), - true, - )); + let values_field = match &self.value_field { + Some(f) => f.clone(), + None => Arc::new(Field::new( + self.field_names.value.as_str(), + values_arr.data_type().clone(), + true, + )), + }; let struct_array = StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]); @@ -296,4 +314,62 @@ mod tests { 42 ); } + + #[test] + fn test_with_values_field() { + let value_field = Arc::new(Field::new("bars", DataType::Int32, false)); + let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new()) + .with_values_field(value_field.clone()); + builder.keys().append_value(1); + builder.values().append_value(2); + builder.append(true).unwrap(); + builder.append(false).unwrap(); // This is fine as nullability refers to nullability of values + builder.keys().append_value(3); + builder.values().append_value(4); + builder.append(true).unwrap(); + let map = builder.finish(); + + assert_eq!(map.len(), 3); + assert_eq!( + map.data_type(), + &DataType::Map( + Arc::new(Field::new( + "entries", + DataType::Struct( + vec![ + Arc::new(Field::new("keys", DataType::Int32, false)), + value_field.clone() + ] + .into() + ), + false, + )), + false + ) + ); + + builder.keys().append_value(5); + builder.values().append_value(6); + builder.append(true).unwrap(); + let map = builder.finish(); + + assert_eq!(map.len(), 1); + assert_eq!( + map.data_type(), + &DataType::Map( + Arc::new(Field::new( + "entries", + DataType::Struct( + vec![ + Arc::new(Field::new("keys", DataType::Int32, false)), + value_field + ] + .into() + ), + false, + )), + false + ) + ); + } } diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 917b58522f66..ce4aa48e9dda 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -171,11 +171,11 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box { let builder = make_builder(field.data_type(), capacity); - Box::new(ListBuilder::with_capacity(builder, capacity)) + Box::new(ListBuilder::with_capacity(builder, capacity).with_field(field.clone())) } DataType::LargeList(field) => { let builder = make_builder(field.data_type(), capacity); - Box::new(LargeListBuilder::with_capacity(builder, capacity)) + Box::new(LargeListBuilder::with_capacity(builder, capacity).with_field(field.clone())) } DataType::Map(field, _) => match field.data_type() { DataType::Struct(fields) => { @@ -186,12 +186,15 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box panic!("The field of Map data type {t:?} should has a child Struct field"), },