Skip to content

Commit

Permalink
Add validation for Decimal256 (#2113)
Browse files Browse the repository at this point in the history
* Add validation for decimal256

* Add validation in array data
  • Loading branch information
viirya authored Jul 22, 2022
1 parent c3e019f commit 34f58f9
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 5 deletions.
2 changes: 1 addition & 1 deletion arrow/src/array/array_decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ impl From<ArrayData> for Decimal256Array {
assert_eq!(
data.buffers().len(),
1,
"Decimal128Array data should contain 1 buffer only (values)"
"Decimal256Array data should contain 1 buffer only (values)"
);
let values = data.buffers()[0].as_ptr();
let (precision, scale) = match data.data_type() {
Expand Down
67 changes: 65 additions & 2 deletions arrow/src/array/builder/decimal_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use num::BigInt;
use std::any::Any;
use std::sync::Arc;

Expand All @@ -25,7 +26,7 @@ use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder};

use crate::error::{ArrowError, Result};

use crate::datatypes::validate_decimal_precision;
use crate::datatypes::{validate_decimal256_precision, validate_decimal_precision};
use crate::util::decimal::{BasicDecimal, Decimal256};

/// Array Builder for [`Decimal128Array`]
Expand All @@ -51,6 +52,10 @@ pub struct Decimal256Builder {
builder: FixedSizeBinaryBuilder,
precision: usize,
scale: usize,

/// Should decimal values be validated for compatibility with scale and precision?
/// defaults to true
value_validation: bool,
}

impl Decimal128Builder {
Expand Down Expand Up @@ -163,14 +168,35 @@ impl Decimal256Builder {
builder: FixedSizeBinaryBuilder::new(capacity, Self::BYTE_LENGTH),
precision,
scale,
value_validation: true,
}
}

/// Disable validation
///
/// # Safety
///
/// After disabling validation, caller must ensure that appended values are compatible
/// for the specified precision and scale.
pub unsafe fn disable_value_validation(&mut self) {
self.value_validation = false;
}

/// Appends a [`Decimal256`] number into the builder.
///
/// Returns an error if `value` has different precision, scale or length in bytes than this builder
#[inline]
pub fn append_value(&mut self, value: &Decimal256) -> Result<()> {
let value = if self.value_validation {
let raw_bytes = value.raw_value();
let integer = BigInt::from_signed_bytes_le(raw_bytes);
let value_str = integer.to_string();
validate_decimal256_precision(&value_str, self.precision)?;
value
} else {
value
};

if self.precision != value.precision() || self.scale != value.scale() {
return Err(ArrowError::InvalidArgumentError(
"Decimal value does not have the same precision or scale as Decimal256Builder".to_string()
Expand Down Expand Up @@ -206,9 +232,10 @@ impl Decimal256Builder {
#[cfg(test)]
mod tests {
use super::*;
use num::Num;

use crate::array::array_decimal::{BasicDecimalArray, Decimal128Array};
use crate::array::Array;
use crate::array::{array_decimal, Array};
use crate::datatypes::DataType;
use crate::util::decimal::Decimal128;

Expand Down Expand Up @@ -298,4 +325,40 @@ mod tests {
let value = Decimal256::try_new_from_bytes(40, 6, bytes.as_slice()).unwrap();
builder.append_value(&value).unwrap();
}

#[test]
#[should_panic(
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 76. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_builder_out_of_range_precision_scale() {
let mut builder = Decimal256Builder::new(30, 76, 6);

let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
let bytes = big_value.to_signed_bytes_le();
let value = Decimal256::try_new_from_bytes(76, 6, &bytes).unwrap();
builder.append_value(&value).unwrap();
}

#[test]
#[should_panic(
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 76. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_data_validation() {
let mut builder = Decimal256Builder::new(30, 76, 6);
// Disable validation at builder
unsafe {
builder.disable_value_validation();
}

let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
let bytes = big_value.to_signed_bytes_le();
let value = Decimal256::try_new_from_bytes(76, 6, &bytes).unwrap();
builder
.append_value(&value)
.expect("should not validate invalid value at builder");

let array = builder.finish();
let array_data = array_decimal::BasicDecimalArray::data(&array);
array_data.validate_values().unwrap();
}
}
17 changes: 16 additions & 1 deletion arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,18 @@
//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
//! common attributes and operations for Arrow array.

use crate::datatypes::{validate_decimal_precision, DataType, IntervalUnit, UnionMode};
use crate::datatypes::{
validate_decimal256_precision, validate_decimal_precision, DataType, IntervalUnit,
UnionMode,
};
use crate::error::{ArrowError, Result};
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
use crate::{
buffer::{Buffer, MutableBuffer},
util::bit_util,
};
use half::f16;
use num::BigInt;
use std::convert::TryInto;
use std::mem;
use std::ops::Range;
Expand Down Expand Up @@ -1018,6 +1022,17 @@ impl ArrayData {
}
Ok(())
}
DataType::Decimal256(p, _) => {
let values = self.buffers()[0].as_slice();
for pos in 0..self.len() {
let offset = pos * 32;
let raw_bytes = &values[offset..offset + 32];
let integer = BigInt::from_signed_bytes_le(raw_bytes);
let value_str = integer.to_string();
validate_decimal256_precision(&value_str, *p)?;
}
Ok(())
}
DataType::Utf8 => self.validate_utf8::<i32>(),
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
Expand Down
144 changes: 143 additions & 1 deletion arrow/src/datatypes/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use num::{BigInt, Num, ToPrimitive};
use std::fmt;

use serde_derive::{Deserialize, Serialize};
Expand Down Expand Up @@ -300,6 +301,49 @@ pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
99999999999999999999999999999999999999,
];

/// `MAX_DECIMAL_FOR_LARGER_PRECISION[p]` holds the maximum integer value
/// that can be stored in [DataType::Decimal256] value of precision `p` > 38
pub const MAX_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
"99999999999999999999999999999999999999",
"999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999999999999999",
];

/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value
/// that can be stored in a [DataType::Decimal] value of precision `p`
pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
Expand Down Expand Up @@ -343,13 +387,62 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
-99999999999999999999999999999999999999,
];

/// `MIN_DECIMAL_FOR_LARGER_PRECISION[p]` holds the minimum integer value
/// that can be stored in a [DataType::Decimal256] value of precision `p` > 38
pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
"-99999999999999999999999999999999999999",
"-999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999999999999999",
];

/// The maximum precision for [DataType::Decimal] values
pub const DECIMAL_MAX_PRECISION: usize = 38;

/// The maximum scale for [DataType::Decimal] values
pub const DECIMAL_MAX_SCALE: usize = 38;

/// The default scale for [DataType::Decimal] values
/// The maximum precision for [DataType::Decimal256] values
pub const DECIMAL256_MAX_PRECISION: usize = 76;

/// The maximum scale for [DataType::Decimal256] values
pub const DECIMAL256_MAX_SCALE: usize = 76;

/// The default scale for [DataType::Decimal] and [DataType::Decimal256] values
pub const DECIMAL_DEFAULT_SCALE: usize = 10;

/// Validates that the specified `i128` value can be properly
Expand Down Expand Up @@ -379,6 +472,55 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul
}
}

/// Validates that the specified string value can be properly
/// interpreted as a Decimal256 number with precision `precision`
#[inline]
pub(crate) fn validate_decimal256_precision(
value: &str,
precision: usize,
) -> Result<BigInt> {
if precision > 38 {
let max_str = MAX_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];
let min_str = MIN_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];

let max = BigInt::from_str_radix(max_str, 10).unwrap();
let min = BigInt::from_str_radix(min_str, 10).unwrap();

let value = BigInt::from_str_radix(value, 10).unwrap();
if value > max {
Err(ArrowError::InvalidArgumentError(format!(
"{} is too large to store in a Decimal256 of precision {}. Max is {}",
value, precision, max
)))
} else if value < min {
Err(ArrowError::InvalidArgumentError(format!(
"{} is too small to store in a Decimal256 of precision {}. Min is {}",
value, precision, min
)))
} else {
Ok(value)
}
} else {
let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1];
let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1];
let value = BigInt::from_str_radix(value, 10).unwrap();

if value.to_i128().unwrap() > max {
Err(ArrowError::InvalidArgumentError(format!(
"{} is too large to store in a Decimal256 of precision {}. Max is {}",
value, precision, max
)))
} else if value.to_i128().unwrap() < min {
Err(ArrowError::InvalidArgumentError(format!(
"{} is too small to store in a Decimal256 of precision {}. Min is {}",
value, precision, min
)))
} else {
Ok(value)
}
}
}

impl DataType {
/// Parse a data type from a JSON representation.
pub(crate) fn from(json: &Value) -> Result<DataType> {
Expand Down

0 comments on commit 34f58f9

Please sign in to comment.