diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc index 1eb810a39605d..1d8c7872cf983 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc @@ -34,12 +34,6 @@ namespace compute { namespace internal { namespace { -template -using enable_if_supports_set_lookup = - enable_if_t::value || is_base_binary_type::value || - is_fixed_size_binary_type::value || is_decimal_type::value, - R>; - template struct SetLookupState : public KernelState { explicit SetLookupState(MemoryPool* pool) @@ -91,6 +85,30 @@ struct SetLookupState : public KernelState { int64_t lookup_null_count; }; +// TODO: Put this concept somewhere reusable +template +struct UnsignedIntType; + +template <> +struct UnsignedIntType<1> { + using Type = UInt8Type; +}; + +template <> +struct UnsignedIntType<2> { + using Type = UInt16Type; +}; + +template <> +struct UnsignedIntType<4> { + using Type = UInt32Type; +}; + +template <> +struct UnsignedIntType<8> { + using Type = UInt64Type; +}; + // Constructing the type requires a type parameter struct InitStateVisitor { KernelContext* ctx; @@ -114,15 +132,24 @@ struct InitStateVisitor { Status Visit(const DataType&) { return Init(); } template - enable_if_supports_set_lookup Visit(const Type&) { - return Init(); + enable_if_boolean Visit(const Type&) { + return Init(); } - // Handle Decimal128 as a physical string, not a number - Status Visit(const Decimal128Type& type) { - return Visit(checked_cast(type)); + template + enable_if_t::value && !is_boolean_type::value, Status> Visit( + const Type&) { + return Init::Type>(); } + template + enable_if_base_binary Visit(const Type&) { + return Init(); + } + + // Handle Decimal128Type, FixedSizeBinaryType + Status Visit(const FixedSizeBinaryType& type) { return Init(); } + Status GetResult(std::unique_ptr* out) { RETURN_NOT_OK(VisitTypeInline(*options->value_set.type(), this)); *out = std::move(result); @@ -163,7 +190,7 @@ struct MatchVisitor { } template - enable_if_supports_set_lookup Visit(const Type&) { + Status ProcessMatch() { using T = typename GetViewType::T; const auto& state = checked_cast&>(*ctx->state()); @@ -194,9 +221,25 @@ struct MatchVisitor { return Status::OK(); } - // Handle Decimal128 as a physical string, not a number - Status Visit(const Decimal128Type& type) { - return Visit(checked_cast(type)); + template + enable_if_boolean Visit(const Type&) { + return ProcessMatch(); + } + + template + enable_if_t::value && !is_boolean_type::value, Status> Visit( + const Type&) { + return ProcessMatch::Type>(); + } + + template + enable_if_base_binary Visit(const Type&) { + return ProcessMatch(); + } + + // Handle Decimal128Type, FixedSizeBinaryType + Status Visit(const FixedSizeBinaryType& type) { + return ProcessMatch(); } Status Execute() { @@ -243,7 +286,7 @@ struct IsInVisitor { } template - enable_if_supports_set_lookup Visit(const Type&) { + Status ProcessIsIn() { using T = typename GetViewType::T; const auto& state = checked_cast&>(*ctx->state()); ArrayData* output = out->mutable_array(); @@ -275,9 +318,25 @@ struct IsInVisitor { return Status::OK(); } - // Handle Decimal128 as a physical string, not a number - Status Visit(const Decimal128Type& type) { - return Visit(checked_cast(type)); + template + enable_if_boolean Visit(const Type&) { + return ProcessIsIn(); + } + + template + enable_if_t::value && !is_boolean_type::value, Status> Visit( + const Type&) { + return ProcessIsIn::Type>(); + } + + template + enable_if_base_binary Visit(const Type&) { + return ProcessIsIn(); + } + + // Handle Decimal128Type, FixedSizeBinaryType + Status Visit(const FixedSizeBinaryType& type) { + return ProcessIsIn(); } Status Execute() { return VisitTypeInline(*data.type, this); } diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc index bdaae4c2385e6..2805e5dde678d 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/cpp/src/arrow/python/python_to_arrow.cc @@ -642,8 +642,7 @@ class StringConverter // We should have bailed out earlier DCHECK(!STRICT); - auto binary_type = - TypeTraits::type_singleton(); + auto binary_type = TypeTraits::type_singleton(); return (*out)->View(binary_type).Value(out); } return Status::OK(); diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 4801076c3d7c9..c343a987b7e9f 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -829,6 +829,7 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType { static constexpr Type::type type_id = Type::BINARY; static constexpr bool is_utf8 = false; using offset_type = int32_t; + using PhysicalType = BinaryType; static constexpr const char* type_name() { return "binary"; } @@ -856,6 +857,7 @@ class ARROW_EXPORT LargeBinaryType : public BaseBinaryType { static constexpr Type::type type_id = Type::LARGE_BINARY; static constexpr bool is_utf8 = false; using offset_type = int64_t; + using PhysicalType = LargeBinaryType; static constexpr const char* type_name() { return "large_binary"; } @@ -882,7 +884,7 @@ class ARROW_EXPORT StringType : public BinaryType { public: static constexpr Type::type type_id = Type::STRING; static constexpr bool is_utf8 = true; - using EquivalentBinaryType = BinaryType; + using PhysicalType = BinaryType; static constexpr const char* type_name() { return "utf8"; } @@ -900,7 +902,7 @@ class ARROW_EXPORT LargeStringType : public LargeBinaryType { public: static constexpr Type::type type_id = Type::LARGE_STRING; static constexpr bool is_utf8 = true; - using EquivalentBinaryType = LargeBinaryType; + using PhysicalType = LargeBinaryType; static constexpr const char* type_name() { return "large_utf8"; }