Skip to content

Commit

Permalink
Merge branch 'master' into cast-durations
Browse files Browse the repository at this point in the history
  • Loading branch information
tisonkun committed Sep 25, 2024
2 parents a009ea2 + 62825b2 commit a07e863
Show file tree
Hide file tree
Showing 11 changed files with 1,560 additions and 81 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ jobs:
ARROW_INTEGRATION_JAVA: ON
ARROW_INTEGRATION_JS: ON
ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS: "rust"
ARCHERY_INTEGRATION_WITH_NANOARROW: "1"
# Disable nanoarrow integration, due to https://github.com/apache/arrow-rs/issues/5052
ARCHERY_INTEGRATION_WITH_NANOARROW: "0"
# https://github.com/apache/arrow/pull/38403/files#r1371281630
ARCHERY_INTEGRATION_WITH_RUST: "1"
# These are necessary because the github runner overrides $HOME
Expand Down
56 changes: 44 additions & 12 deletions arrow-cast/src/cast/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,17 @@ pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
let dict_array = array
.as_dictionary::<K>()
.downcast_dict::<StringArray>()
.unwrap();
.ok_or_else(|| {
ArrowError::ComputeError(
"Internal Error: Cannot cast Utf8View to StringArray of expected type"
.to_string(),
)
})?;

let string_view = view_from_dict_values::<K, StringViewType, GenericStringType<i32>>(
dict_array.values(),
dict_array.keys(),
);
)?;
Ok(Arc::new(string_view))
}
BinaryView => {
Expand All @@ -105,12 +110,17 @@ pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
let dict_array = array
.as_dictionary::<K>()
.downcast_dict::<BinaryArray>()
.unwrap();
.ok_or_else(|| {
ArrowError::ComputeError(
"Internal Error: Cannot cast BinaryView to BinaryArray of expected type"
.to_string(),
)
})?;

let binary_view = view_from_dict_values::<K, BinaryViewType, BinaryType>(
dict_array.values(),
dict_array.keys(),
);
)?;
Ok(Arc::new(binary_view))
}
_ => unpack_dictionary::<K>(array, to_type, cast_options),
Expand All @@ -120,15 +130,17 @@ pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArrayType>(
array: &GenericByteArray<V>,
keys: &PrimitiveArray<K>,
) -> GenericByteViewArray<T> {
) -> Result<GenericByteViewArray<T>, ArrowError> {
let value_buffer = array.values();
let value_offsets = array.value_offsets();
let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
builder.append_block(value_buffer.clone());
for i in keys.iter() {
match i {
Some(v) => {
let idx = v.to_usize().unwrap();
let idx = v.to_usize().ok_or_else(|| {
ArrowError::ComputeError("Invalid dictionary index".to_string())
})?;

// Safety
// (1) The index is within bounds as they are offsets
Expand All @@ -145,7 +157,7 @@ fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArra
}
}
}
builder.finish()
Ok(builder.finish())
}

// Unpack a dictionary where the keys are of type <K> into a flattened array of type to_type
Expand Down Expand Up @@ -211,7 +223,11 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
let dict = dict
.as_dictionary::<K>()
.downcast_dict::<Decimal128Array>()
.unwrap();
.ok_or_else(|| {
ArrowError::ComputeError(
"Internal Error: Cannot cast dict to Decimal128Array".to_string(),
)
})?;
let value = dict.values().clone();
// Set correct precision/scale
let value = value.with_precision_and_scale(p, s)?;
Expand All @@ -229,7 +245,11 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
let dict = dict
.as_dictionary::<K>()
.downcast_dict::<Decimal256Array>()
.unwrap();
.ok_or_else(|| {
ArrowError::ComputeError(
"Internal Error: Cannot cast dict to Decimal256Array".to_string(),
)
})?;
let value = dict.values().clone();
// Set correct precision/scale
let value = value.with_precision_and_scale(p, s)?;
Expand Down Expand Up @@ -350,7 +370,12 @@ where
1024,
1024,
);
let string_view = array.as_any().downcast_ref::<StringViewArray>().unwrap();
let string_view = array
.as_any()
.downcast_ref::<StringViewArray>()
.ok_or_else(|| {
ArrowError::ComputeError("Internal Error: Cannot cast to StringViewArray".to_string())
})?;
for v in string_view.iter() {
match v {
Some(v) => {
Expand All @@ -376,7 +401,12 @@ where
1024,
1024,
);
let binary_view = array.as_any().downcast_ref::<BinaryViewArray>().unwrap();
let binary_view = array
.as_any()
.downcast_ref::<BinaryViewArray>()
.ok_or_else(|| {
ArrowError::ComputeError("Internal Error: Cannot cast to BinaryViewArray".to_string())
})?;
for v in binary_view.iter() {
match v {
Some(v) => {
Expand Down Expand Up @@ -405,7 +435,9 @@ where
let values = cast_values
.as_any()
.downcast_ref::<GenericByteArray<T>>()
.unwrap();
.ok_or_else(|| {
ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
})?;
let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);

// copy each element one at a time
Expand Down
4 changes: 3 additions & 1 deletion arrow-flight/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ pub struct IpcMessage(pub Bytes);

fn flight_schema_as_encoded_data(arrow_schema: &Schema, options: &IpcWriteOptions) -> EncodedData {
let data_gen = writer::IpcDataGenerator::default();
data_gen.schema_to_bytes(arrow_schema, options)
let mut dict_tracker =
writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
data_gen.schema_to_bytes_with_dictionary_tracker(arrow_schema, &mut dict_tracker, options)
}

fn flight_schema_as_flatbuffer(schema: &Schema, options: &IpcWriteOptions) -> IpcMessage {
Expand Down
Loading

0 comments on commit a07e863

Please sign in to comment.