From c7e88a257ca53d6bdbfd839b1051cbec717b9b88 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Tue, 18 Jun 2024 04:37:29 +0800 Subject: [PATCH] fix: prevent potential out-of-range access in FixedSizeListArray (#5902) * fix: prevent potential out-of-range access in FixedSizeListArray Signed-off-by: BubbleCal * add benchmark & format Signed-off-by: BubbleCal * format Cargo.toml Signed-off-by: BubbleCal --------- Signed-off-by: BubbleCal --- arrow-array/Cargo.toml | 4 ++ arrow-array/benches/fixed_size_list_array.rs | 51 +++++++++++++++++++ .../src/array/fixed_size_list_array.rs | 8 +-- 3 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 arrow-array/benches/fixed_size_list_array.rs diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml index a8dbeded9ce7..bf6e27f6b232 100644 --- a/arrow-array/Cargo.toml +++ b/arrow-array/Cargo.toml @@ -66,3 +66,7 @@ harness = false [[bench]] name = "gc_view_types" harness = false + +[[bench]] +name = "fixed_size_list_array" +harness = false diff --git a/arrow-array/benches/fixed_size_list_array.rs b/arrow-array/benches/fixed_size_list_array.rs new file mode 100644 index 000000000000..5f001a4f3d3a --- /dev/null +++ b/arrow-array/benches/fixed_size_list_array.rs @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::{Array, FixedSizeListArray, Int32Array}; +use arrow_schema::Field; +use criterion::*; +use rand::{thread_rng, Rng}; +use std::sync::Arc; + +fn gen_fsl(len: usize, value_len: usize) -> FixedSizeListArray { + let mut rng = thread_rng(); + let values = Arc::new(Int32Array::from( + (0..len).map(|_| rng.gen::()).collect::>(), + )); + let field = Arc::new(Field::new("item", values.data_type().clone(), true)); + FixedSizeListArray::new(field, value_len as i32, values, None) +} + +fn criterion_benchmark(c: &mut Criterion) { + let len = 4096; + for value_len in [1, 32, 1024] { + let fsl = gen_fsl(len, value_len); + c.bench_function( + &format!("fixed_size_list_array(len: {len}, value_len: {value_len})"), + |b| { + b.iter(|| { + for i in 0..len / value_len { + black_box(fsl.value(i)); + } + }); + }, + ); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index 7bb4e0c5ee5a..6f3a76908723 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -245,7 +245,7 @@ impl FixedSizeListArray { /// Returns ith value of this list array. pub fn value(&self, i: usize) -> ArrayRef { self.values - .slice(self.value_offset(i) as usize, self.value_length() as usize) + .slice(self.value_offset_at(i), self.value_length() as usize) } /// Returns the offset for value at index `i`. @@ -253,7 +253,7 @@ impl FixedSizeListArray { /// Note this doesn't do any bound checking, for performance reason. #[inline] pub fn value_offset(&self, i: usize) -> i32 { - self.value_offset_at(i) + self.value_offset_at(i) as i32 } /// Returns the length for an element. @@ -265,8 +265,8 @@ impl FixedSizeListArray { } #[inline] - const fn value_offset_at(&self, i: usize) -> i32 { - i as i32 * self.value_length + const fn value_offset_at(&self, i: usize) -> usize { + i * self.value_length as usize } /// Returns a zero-copy slice of this array with the indicated offset and length.