From 66329dbd2ab42583b7b113b31a63747d56af99fd Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Tue, 7 Mar 2023 16:59:54 +0800 Subject: [PATCH] Support more data types for IN filter pushdown (#139) Support tiny int, small int and date for IN filter pushdown. --- velox/substrait/SubstraitToVeloxPlan.cpp | 62 ++++++++++++++++++++++++ velox/substrait/TypeUtils.h | 14 ++++++ 2 files changed, 76 insertions(+) diff --git a/velox/substrait/SubstraitToVeloxPlan.cpp b/velox/substrait/SubstraitToVeloxPlan.cpp index 2b389afa71bf..7737a85b8db5 100644 --- a/velox/substrait/SubstraitToVeloxPlan.cpp +++ b/velox/substrait/SubstraitToVeloxPlan.cpp @@ -1851,6 +1851,60 @@ void SubstraitVeloxPlanConverter::setInFilter( common::createBigintValues(values, nullAllowed); } +template <> +void SubstraitVeloxPlanConverter::setInFilter( + const std::vector& variants, + bool nullAllowed, + const std::string& inputName, + connector::hive::SubfieldFilters& filters) { + // Use bigint values for small int type. + std::vector values; + values.reserve(variants.size()); + for (const auto& variant : variants) { + // Use the matched type to get value from variant. + int64_t value = variant.value(); + values.emplace_back(value); + } + filters[common::Subfield(inputName)] = + common::createBigintValues(values, nullAllowed); +} + +template <> +void SubstraitVeloxPlanConverter::setInFilter( + const std::vector& variants, + bool nullAllowed, + const std::string& inputName, + connector::hive::SubfieldFilters& filters) { + // Use bigint values for tiny int type. + std::vector values; + values.reserve(variants.size()); + for (const auto& variant : variants) { + // Use the matched type to get value from variant. + int64_t value = variant.value(); + values.emplace_back(value); + } + filters[common::Subfield(inputName)] = + common::createBigintValues(values, nullAllowed); +} + +template <> +void SubstraitVeloxPlanConverter::setInFilter( + const std::vector& variants, + bool nullAllowed, + const std::string& inputName, + connector::hive::SubfieldFilters& filters) { + // Use bigint values for int type. + std::vector values; + values.reserve(variants.size()); + for (const auto& variant : variants) { + // Use int32 to get value from date variant. + int64_t value = variant.value(); + values.emplace_back(value); + } + filters[common::Subfield(inputName)] = + common::createBigintValues(values, nullAllowed); +} + template <> void SubstraitVeloxPlanConverter::setInFilter( const std::vector& variants, @@ -2017,6 +2071,14 @@ connector::hive::SubfieldFilters SubstraitVeloxPlanConverter::mapToFilters( for (uint32_t colIdx = 0; colIdx < inputNameList.size(); colIdx++) { auto inputType = inputTypeList[colIdx]; switch (inputType->kind()) { + case TypeKind::TINYINT: + constructSubfieldFilters( + colIdx, inputNameList[colIdx], colInfoMap[colIdx], filters); + break; + case TypeKind::SMALLINT: + constructSubfieldFilters( + colIdx, inputNameList[colIdx], colInfoMap[colIdx], filters); + break; case TypeKind::INTEGER: constructSubfieldFilters( colIdx, inputNameList[colIdx], colInfoMap[colIdx], filters); diff --git a/velox/substrait/TypeUtils.h b/velox/substrait/TypeUtils.h index d5f9e7303fbc..de78c84e3a3b 100644 --- a/velox/substrait/TypeUtils.h +++ b/velox/substrait/TypeUtils.h @@ -32,6 +32,20 @@ std::string_view getNameBeforeDelimiter( template struct RangeTraits {}; +template <> +struct RangeTraits { + using RangeType = common::BigintRange; + using MultiRangeType = common::BigintMultiRange; + using NativeType = int8_t; +}; + +template <> +struct RangeTraits { + using RangeType = common::BigintRange; + using MultiRangeType = common::BigintMultiRange; + using NativeType = int16_t; +}; + template <> struct RangeTraits { using RangeType = common::BigintRange;