Skip to content

Commit

Permalink
Improve the performance of length and ascii functions (#9345)
Browse files Browse the repository at this point in the history
close #9344

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
xzhangxian1008 and ti-chi-bot[bot] authored Sep 3, 2024
1 parent 0761d45 commit b30c1f5
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 258 deletions.
2 changes: 1 addition & 1 deletion dbms/src/Functions/FunctionsNull.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class FunctionCoalesce : public IFunction
public:
static constexpr auto name = "coalesce";
static FunctionPtr create(const Context & context);
FunctionCoalesce(const Context & context)
explicit FunctionCoalesce(const Context & context)
: context(context)
{}

Expand Down
60 changes: 29 additions & 31 deletions dbms/src/Functions/FunctionsString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.

#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Common/TargetSpecific.h>
#include <Common/UTF8Helpers.h>
#include <Common/Volnitsky.h>
Expand Down Expand Up @@ -4474,9 +4476,11 @@ class FunctionASCII : public IFunction
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

bool useDefaultImplementationForConstants() const override { return true; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
if unlikely (arguments.size() != 1)
throw Exception(
fmt::format(
"Number of arguments for function {} doesn't match: passed {}, should be 1.",
Expand All @@ -4490,30 +4494,25 @@ class FunctionASCII : public IFunction
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const IColumn * c0_col = block.getByPosition(arguments[0]).column.get();
const auto * c0_const = checkAndGetColumn<ColumnConst>(c0_col);
const auto * c0_string = checkAndGetColumn<ColumnString>(c0_col);

Field res_field;
int val_num = c0_col->size();
auto col_res = ColumnInt64::create();
col_res->reserve(val_num);
if (c0_const == nullptr && c0_string == nullptr)
if unlikely (c0_string == nullptr)
throw Exception(
fmt::format("Illegal argument of function {}", getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

for (int i = 0; i < val_num; i++)
{
c0_col->get(i, res_field);
String handled_str = res_field.get<String>();
Int64 res = handled_str.empty() ? 0 : static_cast<Int64>(handled_str[0]);
col_res->insert(res);
}
auto val_num = static_cast<ssize_t>(c0_col->size());
auto col_res = ColumnInt64::create();
ColumnInt64::Container & data = col_res->getData();
data.resize(val_num);

const auto & chars = c0_string->getChars();
const auto & offsets = c0_string->getOffsets();

for (ssize_t i = 0; i < val_num; i++)
data[i] = chars[offsets[i - 1]];

block.getByPosition(result).column = std::move(col_res);
}

private:
};

class FunctionLength : public IFunction
Expand All @@ -4527,9 +4526,11 @@ class FunctionLength : public IFunction
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

bool useDefaultImplementationForConstants() const override { return true; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
if unlikely (arguments.size() != 1)
throw Exception(
fmt::format(
"Number of arguments for function {} doesn't match: passed {}, should be 1.",
Expand All @@ -4543,24 +4544,21 @@ class FunctionLength : public IFunction
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const IColumn * c0_col = block.getByPosition(arguments[0]).column.get();
const auto * c0_const = checkAndGetColumn<ColumnConst>(c0_col);
const auto * c0_string = checkAndGetColumn<ColumnString>(c0_col);

Field res_field;
int val_num = c0_col->size();
auto col_res = ColumnInt64::create();
col_res->reserve(val_num);
if (c0_const == nullptr && c0_string == nullptr)
if unlikely (c0_string == nullptr)
throw Exception(
fmt::format("Illegal argument of function {}", getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

for (int i = 0; i < val_num; i++)
{
c0_col->get(i, res_field);
String handled_str = res_field.get<String>();
col_res->insert(static_cast<Int64>(handled_str.size()));
}
auto val_num = static_cast<ssize_t>(c0_col->size());
auto col_res = ColumnInt64::create();
ColumnInt64::Container & data = col_res->getData();
data.resize(val_num);

const auto & offsets = c0_string->getOffsets();

for (ssize_t i = 0; i < val_num; i++)
data[i] = offsets[i] - offsets[i - 1] - 1;

block.getByPosition(result).column = std::move(col_res);
}
Expand Down
33 changes: 17 additions & 16 deletions dbms/src/Functions/GatherUtils/Algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
size_t sources_num = array_sources.size();
std::vector<char> is_const(sources_num);

auto checkAndGetSizeToReserve = [](auto source, IArraySource * array_source) {
auto check_and_get_size_to_reserve = [](auto source, IArraySource * array_source) {
if (source == nullptr)
throw Exception(
"Concat function expected " + demangle(typeid(Source).name()) + " or "
Expand All @@ -205,17 +205,18 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
size_t size_to_reserve = 0;
for (auto i : ext::range(0, sources_num))
{
auto & source = array_sources[i];
const auto & source = array_sources[i];
is_const[i] = source->isConst();
if (is_const[i])
size_to_reserve += checkAndGetSizeToReserve(typeid_cast<ConstSource<Source> *>(source.get()), source.get());
size_to_reserve
+= check_and_get_size_to_reserve(typeid_cast<ConstSource<Source> *>(source.get()), source.get());
else
size_to_reserve += checkAndGetSizeToReserve(typeid_cast<Source *>(source.get()), source.get());
size_to_reserve += check_and_get_size_to_reserve(typeid_cast<Source *>(source.get()), source.get());
}

sink.reserve(size_to_reserve);

auto writeNext = [&sink](auto source) {
auto write_next = [&sink](auto source) {
writeSlice(source->getWhole(), sink);
source->next();
};
Expand All @@ -224,11 +225,11 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
{
for (auto i : ext::range(0, sources_num))
{
auto & source = array_sources[i];
const auto & source = array_sources[i];
if (is_const[i])
writeNext(static_cast<ConstSource<Source> *>(source.get()));
write_next(static_cast<ConstSource<Source> *>(source.get()));
else
writeNext(static_cast<Source *>(source.get()));
write_next(static_cast<Source *>(source.get()));
}
sink.next();
}
Expand Down Expand Up @@ -389,11 +390,11 @@ void NO_INLINE pad(SourceA && src, SourceB && padding, Sink && sink, ssize_t len
size_t left = static_cast<size_t>(length) - slice.size;
if (is_left)
{
StringSource::Slice padSlice = padding.getWhole();
while (left > padSlice.size && padSlice.size != 0)
StringSource::Slice pad_slice = padding.getWhole();
while (left > pad_slice.size && pad_slice.size != 0)
{
writeSlice(padSlice, sink);
left -= padSlice.size;
writeSlice(pad_slice, sink);
left -= pad_slice.size;
}

writeSlice(padding.getSliceFromLeft(0, left), sink);
Expand All @@ -402,11 +403,11 @@ void NO_INLINE pad(SourceA && src, SourceB && padding, Sink && sink, ssize_t len
else
{
writeSlice(slice, sink);
StringSource::Slice padSlice = padding.getWhole();
while (left > padSlice.size && padSlice.size != 0)
StringSource::Slice pad_slice = padding.getWhole();
while (left > pad_slice.size && pad_slice.size != 0)
{
writeSlice(padSlice, sink);
left -= padSlice.size;
writeSlice(pad_slice, sink);
left -= pad_slice.size;
}

writeSlice(padding.getSliceFromLeft(0, left), sink);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#include <TestUtils/FunctionTestUtils.h>
#include <benchmark/benchmark.h>

/// this is a hack, include the cpp file so we can test MatchImpl directly
#include <Functions/FunctionsString.cpp>
/// this is a hack, include the cpp file so we can test functions directly
#include <Functions/FunctionsString.cpp> // NOLINT
#include <Functions/FunctionsStringSearch.cpp> // NOLINT

namespace DB
Expand Down Expand Up @@ -306,5 +306,65 @@ BENCH_LIKE_COLLATOR(ASCII_BIN);
BENCH_LIKE_COLLATOR(BINARY);
BENCH_LIKE_COLLATOR(LATIN1_BIN);

class LengthBench : public benchmark::Fixture
{
public:
using ColStringType = typename TypeTraits<String>::FieldType;

ColumnsWithTypeAndName data1{toVec<String>("col", std::vector<ColStringType>(data_num, ""))};
ColumnsWithTypeAndName data2{toVec<String>("col", std::vector<ColStringType>(data_num, "aaaaaaaaaa"))};
ColumnsWithTypeAndName data3{toVec<String>("col", std::vector<ColStringType>(data_num, "啊aaaaaaaa"))};

void SetUp(const benchmark::State &) override {}
};

BENCHMARK_DEFINE_F(LengthBench, bench)
(benchmark::State & state)
try
{
FunctionLength function_length;
std::vector<Block> blocks{Block(data1), Block(data2), Block(data3)};
for (auto & block : blocks)
block.insert({nullptr, std::make_shared<DataTypeNumber<UInt8>>(), "res"});
ColumnNumbers arguments{0};
for (auto _ : state)
{
for (auto & block : blocks)
function_length.executeImpl(block, arguments, 1);
}
}
CATCH
BENCHMARK_REGISTER_F(LengthBench, bench)->Iterations(10);

class ASCIIBench : public benchmark::Fixture
{
public:
using ColStringType = typename TypeTraits<String>::FieldType;

ColumnsWithTypeAndName data1{toVec<String>("col", std::vector<ColStringType>(data_num, ""))};
ColumnsWithTypeAndName data2{toVec<String>("col", std::vector<ColStringType>(data_num, "aaaaaaaaaa"))};
ColumnsWithTypeAndName data3{toVec<String>("col", std::vector<ColStringType>(data_num, "啊aaaaaaaa"))};

void SetUp(const benchmark::State &) override {}
};

BENCHMARK_DEFINE_F(ASCIIBench, bench)
(benchmark::State & state)
try
{
FunctionASCII function_ascii;
std::vector<Block> blocks{Block(data1), Block(data2), Block(data3)};
for (auto & block : blocks)
block.insert({nullptr, std::make_shared<DataTypeNumber<UInt8>>(), "res"});
ColumnNumbers arguments{0};
for (auto _ : state)
{
for (auto & block : blocks)
function_ascii.executeImpl(block, arguments, 1);
}
}
CATCH
BENCHMARK_REGISTER_F(ASCIIBench, bench)->Iterations(10);

} // namespace tests
} // namespace DB
Loading

0 comments on commit b30c1f5

Please sign in to comment.