Skip to content

Commit

Permalink
ARROW-9250: [C++] Instantiate fewer templates in IsIn, Match kernel i…
Browse files Browse the repository at this point in the history
…mplementations

This yields a 150KB reduction in code for me on Linux.

Since this may become a common pattern (using e.g. a single `uint32_t`-based function to process both int32/uint32), some of this may be factored out to make writing such kernel implementations simpler in the future.

Closes #7558 from wesm/ARROW-9250

Authored-by: Wes McKinney <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
  • Loading branch information
wesm committed Jun 28, 2020
1 parent d57d8a2 commit 22994b7
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 23 deletions.
97 changes: 78 additions & 19 deletions cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@ namespace compute {
namespace internal {
namespace {

template <typename T, typename R = void>
using enable_if_supports_set_lookup =
enable_if_t<has_c_type<T>::value || is_base_binary_type<T>::value ||
is_fixed_size_binary_type<T>::value || is_decimal_type<T>::value,
R>;

template <typename Type>
struct SetLookupState : public KernelState {
explicit SetLookupState(MemoryPool* pool)
Expand Down Expand Up @@ -91,6 +85,30 @@ struct SetLookupState<NullType> : public KernelState {
int64_t lookup_null_count;
};

// TODO: Put this concept somewhere reusable
template <int width>
struct UnsignedIntType;

template <>
struct UnsignedIntType<1> {
using Type = UInt8Type;
};

template <>
struct UnsignedIntType<2> {
using Type = UInt16Type;
};

template <>
struct UnsignedIntType<4> {
using Type = UInt32Type;
};

template <>
struct UnsignedIntType<8> {
using Type = UInt64Type;
};

// Constructing the type requires a type parameter
struct InitStateVisitor {
KernelContext* ctx;
Expand All @@ -114,15 +132,24 @@ struct InitStateVisitor {
Status Visit(const DataType&) { return Init<NullType>(); }

template <typename Type>
enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
return Init<Type>();
enable_if_boolean<Type, Status> Visit(const Type&) {
return Init<BooleanType>();
}

// Handle Decimal128 as a physical string, not a number
Status Visit(const Decimal128Type& type) {
return Visit(checked_cast<const FixedSizeBinaryType&>(type));
template <typename Type>
enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
const Type&) {
return Init<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
}

template <typename Type>
enable_if_base_binary<Type, Status> Visit(const Type&) {
return Init<typename Type::PhysicalType>();
}

// Handle Decimal128Type, FixedSizeBinaryType
Status Visit(const FixedSizeBinaryType& type) { return Init<FixedSizeBinaryType>(); }

Status GetResult(std::unique_ptr<KernelState>* out) {
RETURN_NOT_OK(VisitTypeInline(*options->value_set.type(), this));
*out = std::move(result);
Expand Down Expand Up @@ -163,7 +190,7 @@ struct MatchVisitor {
}

template <typename Type>
enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
Status ProcessMatch() {
using T = typename GetViewType<Type>::T;

const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
Expand Down Expand Up @@ -194,9 +221,25 @@ struct MatchVisitor {
return Status::OK();
}

// Handle Decimal128 as a physical string, not a number
Status Visit(const Decimal128Type& type) {
return Visit(checked_cast<const FixedSizeBinaryType&>(type));
template <typename Type>
enable_if_boolean<Type, Status> Visit(const Type&) {
return ProcessMatch<BooleanType>();
}

template <typename Type>
enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
const Type&) {
return ProcessMatch<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
}

template <typename Type>
enable_if_base_binary<Type, Status> Visit(const Type&) {
return ProcessMatch<typename Type::PhysicalType>();
}

// Handle Decimal128Type, FixedSizeBinaryType
Status Visit(const FixedSizeBinaryType& type) {
return ProcessMatch<FixedSizeBinaryType>();
}

Status Execute() {
Expand Down Expand Up @@ -243,7 +286,7 @@ struct IsInVisitor {
}

template <typename Type>
enable_if_supports_set_lookup<Type, Status> Visit(const Type&) {
Status ProcessIsIn() {
using T = typename GetViewType<Type>::T;
const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
ArrayData* output = out->mutable_array();
Expand Down Expand Up @@ -275,9 +318,25 @@ struct IsInVisitor {
return Status::OK();
}

// Handle Decimal128 as a physical string, not a number
Status Visit(const Decimal128Type& type) {
return Visit(checked_cast<const FixedSizeBinaryType&>(type));
template <typename Type>
enable_if_boolean<Type, Status> Visit(const Type&) {
return ProcessIsIn<BooleanType>();
}

template <typename Type>
enable_if_t<has_c_type<Type>::value && !is_boolean_type<Type>::value, Status> Visit(
const Type&) {
return ProcessIsIn<typename UnsignedIntType<sizeof(typename Type::c_type)>::Type>();
}

template <typename Type>
enable_if_base_binary<Type, Status> Visit(const Type&) {
return ProcessIsIn<typename Type::PhysicalType>();
}

// Handle Decimal128Type, FixedSizeBinaryType
Status Visit(const FixedSizeBinaryType& type) {
return ProcessIsIn<FixedSizeBinaryType>();
}

Status Execute() { return VisitTypeInline(*data.type, this); }
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -642,8 +642,7 @@ class StringConverter
// We should have bailed out earlier
DCHECK(!STRICT);

auto binary_type =
TypeTraits<typename TypeClass::EquivalentBinaryType>::type_singleton();
auto binary_type = TypeTraits<typename TypeClass::PhysicalType>::type_singleton();
return (*out)->View(binary_type).Value(out);
}
return Status::OK();
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,7 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType {
static constexpr Type::type type_id = Type::BINARY;
static constexpr bool is_utf8 = false;
using offset_type = int32_t;
using PhysicalType = BinaryType;

static constexpr const char* type_name() { return "binary"; }

Expand Down Expand Up @@ -856,6 +857,7 @@ class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
static constexpr Type::type type_id = Type::LARGE_BINARY;
static constexpr bool is_utf8 = false;
using offset_type = int64_t;
using PhysicalType = LargeBinaryType;

static constexpr const char* type_name() { return "large_binary"; }

Expand All @@ -882,7 +884,7 @@ class ARROW_EXPORT StringType : public BinaryType {
public:
static constexpr Type::type type_id = Type::STRING;
static constexpr bool is_utf8 = true;
using EquivalentBinaryType = BinaryType;
using PhysicalType = BinaryType;

static constexpr const char* type_name() { return "utf8"; }

Expand All @@ -900,7 +902,7 @@ class ARROW_EXPORT LargeStringType : public LargeBinaryType {
public:
static constexpr Type::type type_id = Type::LARGE_STRING;
static constexpr bool is_utf8 = true;
using EquivalentBinaryType = LargeBinaryType;
using PhysicalType = LargeBinaryType;

static constexpr const char* type_name() { return "large_utf8"; }

Expand Down

0 comments on commit 22994b7

Please sign in to comment.