Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix null on overflow and multiply as spark precision and support cast varchar to decimal #169

Merged
merged 8 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions velox/common/base/BitUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,13 @@ inline int32_t countLeadingZeros(uint64_t word) {
return __builtin_clzll(word);
}

inline int32_t countLeadingZerosUint128(__uint128_t word) {
uint64_t hi = word >> 64;
uint64_t lo = static_cast<uint64_t>(word);
return (hi == 0) ? 64 + bits::countLeadingZeros(lo)
: bits::countLeadingZeros(hi);
}

inline uint64_t nextPowerOfTwo(uint64_t size) {
if (size == 0) {
return 0;
Expand Down
35 changes: 35 additions & 0 deletions velox/expression/CastExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "velox/expression/StringWriter.h"
#include "velox/external/date/tz.h"
#include "velox/functions/lib/RowsTranslationUtil.h"
#include "velox/type/DecimalUtilOp.h"
#include "velox/vector/ComplexVector.h"
#include "velox/vector/FunctionVector.h"
#include "velox/vector/SelectivityVector.h"
Expand Down Expand Up @@ -201,6 +202,30 @@ void applyDoubleToDecimalCastKernel(
}
});
}

template <typename TOutput>
void applyVarCharToDecimalCastKernel(
const SelectivityVector& rows,
const BaseVector& input,
exec::EvalCtx& context,
const TypePtr& toType,
VectorPtr castResult) {
auto sourceVector = input.as<SimpleVector<StringView>>();
auto castResultRawBuffer =
castResult->asUnchecked<FlatVector<TOutput>>()->mutableRawValues();
const auto& toPrecisionScale = getDecimalPrecisionScale(*toType);
context.applyToSelectedNoThrow(rows, [&](vector_size_t row) {
auto rescaledValue = DecimalUtilOp::rescaleVarchar<TOutput>(
sourceVector->valueAt(row),
toPrecisionScale.first,
toPrecisionScale.second);
if (rescaledValue.has_value()) {
castResultRawBuffer[row] = rescaledValue.value();
} else {
castResult->setNull(row, true);
}
});
}
} // namespace

template <typename To, typename From>
Expand Down Expand Up @@ -635,6 +660,16 @@ VectorPtr CastExpr::applyDecimal(
}
break;
}
case TypeKind::VARCHAR: {
if (toType->kind() == TypeKind::SHORT_DECIMAL) {
applyVarCharToDecimalCastKernel<UnscaledShortDecimal>(
rows, input, context, toType, castResult);
} else {
applyVarCharToDecimalCastKernel<UnscaledLongDecimal>(
rows, input, context, toType, castResult);
}
break;
}
default:
VELOX_UNSUPPORTED(
"Cast from {} to {} is not supported",
Expand Down
20 changes: 20 additions & 0 deletions velox/expression/tests/CastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,26 @@ TEST_F(CastExprTest, bigintToDecimal) {
"Cannot cast BIGINT '100' to DECIMAL(17,16)");
}

TEST_F(CastExprTest, varcharToDecimal) {
// varchar to short decimal
// auto input = makeFlatVector<StringView>({"-3", "177"});
// testComplexCast(
// "c0", input, makeShortDecimalFlatVector({-300, 17700}, DECIMAL(6, 2)));

// // varchar to long decimal
// auto input2 = makeFlatVector<StringView>(
// {"-300000001234567891234.5", "1771234.5678912345678"});
// testComplexCast(
// "c0", input2, makeLongDecimalFlatVector({-300, 17700}, DECIMAL(32, 7)));

auto input3 = makeFlatVector<StringView>({"9999999999.99", "9999999999.99"});
testComplexCast(
"c0", input3, makeLongDecimalFlatVector(
{-30'000'000'000,
-20'000'000'000},
DECIMAL(12, 2)));
}

TEST_F(CastExprTest, castInTry) {
// Test try(cast(array(varchar) as array(bigint))) whose input vector is
// wrapped in dictinary encoding. The row of ["2a"] should trigger an error
Expand Down
Loading