Skip to content

Commit

Permalink
Pass scalar to eq inside nullif (#11697)
Browse files Browse the repository at this point in the history
* Properly specialize nullif for scalar (3x faster)

* missed feature flag

* fix test

* extract

* dodes -> does

Co-authored-by: Oleks V <[email protected]>

---------

Co-authored-by: Oleks V <[email protected]>
  • Loading branch information
simonvandel and comphead authored Aug 5, 2024
1 parent 011a3f3 commit c6f0d3c
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 3 deletions.
5 changes: 5 additions & 0 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ harness = false
name = "make_date"
required-features = ["datetime_expressions"]

[[bench]]
harness = false
name = "nullif"
required-features = ["core_expressions"]

[[bench]]
harness = false
name = "date_bin"
Expand Down
42 changes: 42 additions & 0 deletions datafusion/functions/benches/nullif.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

extern crate criterion;

use arrow::util::bench_util::create_string_array_with_len;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_common::ScalarValue;
use datafusion_expr::ColumnarValue;
use datafusion_functions::core::nullif;
use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {
let nullif = nullif();
for size in [1024, 4096, 8192] {
let array = Arc::new(create_string_array_with_len::<i32>(size, 0.2, 32));
let args = vec![
ColumnarValue::Scalar(ScalarValue::Utf8(Some("abcd".to_string()))),
ColumnarValue::Array(array),
];
c.bench_function(&format!("nullif scalar array: {}", size), |b| {
b.iter(|| black_box(nullif.invoke(&args).unwrap()))
});
}
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
10 changes: 7 additions & 3 deletions datafusion/functions/src/core/nullif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
use datafusion_expr::ColumnarValue;

use arrow::array::Array;
use arrow::compute::kernels::cmp::eq;
use arrow::compute::kernels::nullif::nullif;
use datafusion_common::ScalarValue;
Expand Down Expand Up @@ -122,8 +121,13 @@ fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
Ok(ColumnarValue::Array(array))
}
(ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
let lhs = lhs.to_array_of_size(rhs.len())?;
let array = nullif(&lhs, &eq(&lhs, &rhs)?)?;
let lhs_s = lhs.to_scalar()?;
let lhs_a = lhs.to_array_of_size(rhs.len())?;
let array = nullif(
// nullif in arrow-select does not support Datum, so we need to convert to array
lhs_a.as_ref(),
&eq(&lhs_s, &rhs)?,
)?;
Ok(ColumnarValue::Array(array))
}
(ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {
Expand Down

0 comments on commit c6f0d3c

Please sign in to comment.