Skip to content

Commit

Permalink
Improve the performance of length and ascii functions (#9345) (#9399
Browse files Browse the repository at this point in the history
)

close #9344

Signed-off-by: ti-chi-bot <[email protected]>

Co-authored-by: xzhangxian1008 <[email protected]>
  • Loading branch information
ti-chi-bot and xzhangxian1008 committed Sep 6, 2024
1 parent ba58bcd commit cb546d8
Show file tree
Hide file tree
Showing 8 changed files with 492 additions and 254 deletions.
2 changes: 1 addition & 1 deletion dbms/src/Functions/FunctionsNull.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class FunctionCoalesce : public IFunction
public:
static constexpr auto name = "coalesce";
static FunctionPtr create(const Context & context);
FunctionCoalesce(const Context & context)
explicit FunctionCoalesce(const Context & context)
: context(context)
{}

Expand Down
61 changes: 31 additions & 30 deletions dbms/src/Functions/FunctionsString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.

#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Common/TargetSpecific.h>
#include <Common/UTF8Helpers.h>
#include <Common/Volnitsky.h>
Expand All @@ -36,7 +38,6 @@

#include <boost/algorithm/string/predicate.hpp>
#include <ext/range.h>
#include <thread>

namespace DB
{
Expand Down Expand Up @@ -4141,9 +4142,11 @@ class FunctionASCII : public IFunction
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

bool useDefaultImplementationForConstants() const override { return true; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
if unlikely (arguments.size() != 1)
throw Exception(
fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
Expand All @@ -4154,28 +4157,25 @@ class FunctionASCII : public IFunction
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const IColumn * c0_col = block.getByPosition(arguments[0]).column.get();
const auto * c0_const = checkAndGetColumn<ColumnConst>(c0_col);
const auto * c0_string = checkAndGetColumn<ColumnString>(c0_col);
if unlikely (c0_string == nullptr)
throw Exception(
fmt::format("Illegal argument of function {}", getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

Field res_field;
int val_num = c0_col->size();
auto val_num = static_cast<ssize_t>(c0_col->size());
auto col_res = ColumnInt64::create();
col_res->reserve(val_num);
if (c0_const == nullptr && c0_string == nullptr)
throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
ColumnInt64::Container & data = col_res->getData();
data.resize(val_num);

for (int i = 0; i < val_num; i++)
{
c0_col->get(i, res_field);
String handled_str = res_field.get<String>();
Int64 res = handled_str.empty() ? 0 : static_cast<Int64>(handled_str[0]);
col_res->insert(res);
}
const auto & chars = c0_string->getChars();
const auto & offsets = c0_string->getOffsets();

for (ssize_t i = 0; i < val_num; i++)
data[i] = chars[offsets[i - 1]];

block.getByPosition(result).column = std::move(col_res);
}

private:
};

class FunctionLength : public IFunction
Expand All @@ -4192,9 +4192,11 @@ class FunctionLength : public IFunction
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

bool useDefaultImplementationForConstants() const override { return true; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
if unlikely (arguments.size() != 1)
throw Exception(
fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
Expand All @@ -4205,22 +4207,21 @@ class FunctionLength : public IFunction
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const IColumn * c0_col = block.getByPosition(arguments[0]).column.get();
const auto * c0_const = checkAndGetColumn<ColumnConst>(c0_col);
const auto * c0_string = checkAndGetColumn<ColumnString>(c0_col);
if unlikely (c0_string == nullptr)
throw Exception(
fmt::format("Illegal argument of function {}", getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

Field res_field;
int val_num = c0_col->size();
auto val_num = static_cast<ssize_t>(c0_col->size());
auto col_res = ColumnInt64::create();
col_res->reserve(val_num);
if (c0_const == nullptr && c0_string == nullptr)
throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
ColumnInt64::Container & data = col_res->getData();
data.resize(val_num);

for (int i = 0; i < val_num; i++)
{
c0_col->get(i, res_field);
String handled_str = res_field.get<String>();
col_res->insert(static_cast<Int64>(handled_str.size()));
}
const auto & offsets = c0_string->getOffsets();

for (ssize_t i = 0; i < val_num; i++)
data[i] = offsets[i] - offsets[i - 1] - 1;

block.getByPosition(result).column = std::move(col_res);
}
Expand Down
33 changes: 17 additions & 16 deletions dbms/src/Functions/GatherUtils/Algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
size_t sources_num = array_sources.size();
std::vector<char> is_const(sources_num);

auto checkAndGetSizeToReserve = [](auto source, IArraySource * array_source) {
auto check_and_get_size_to_reserve = [](auto source, IArraySource * array_source) {
if (source == nullptr)
throw Exception("Concat function expected " + demangle(typeid(Source).name()) + " or "
+ demangle(typeid(ConstSource<Source>).name()) + " but got "
Expand All @@ -199,17 +199,18 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
size_t size_to_reserve = 0;
for (auto i : ext::range(0, sources_num))
{
auto & source = array_sources[i];
const auto & source = array_sources[i];
is_const[i] = source->isConst();
if (is_const[i])
size_to_reserve += checkAndGetSizeToReserve(typeid_cast<ConstSource<Source> *>(source.get()), source.get());
size_to_reserve
+= check_and_get_size_to_reserve(typeid_cast<ConstSource<Source> *>(source.get()), source.get());
else
size_to_reserve += checkAndGetSizeToReserve(typeid_cast<Source *>(source.get()), source.get());
size_to_reserve += check_and_get_size_to_reserve(typeid_cast<Source *>(source.get()), source.get());
}

sink.reserve(size_to_reserve);

auto writeNext = [&sink](auto source) {
auto write_next = [&sink](auto source) {
writeSlice(source->getWhole(), sink);
source->next();
};
Expand All @@ -218,11 +219,11 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
{
for (auto i : ext::range(0, sources_num))
{
auto & source = array_sources[i];
const auto & source = array_sources[i];
if (is_const[i])
writeNext(static_cast<ConstSource<Source> *>(source.get()));
write_next(static_cast<ConstSource<Source> *>(source.get()));
else
writeNext(static_cast<Source *>(source.get()));
write_next(static_cast<Source *>(source.get()));
}
sink.next();
}
Expand Down Expand Up @@ -383,11 +384,11 @@ void NO_INLINE pad(SourceA && src, SourceB && padding, Sink && sink, ssize_t len
size_t left = static_cast<size_t>(length) - slice.size;
if (is_left)
{
StringSource::Slice padSlice = padding.getWhole();
while (left > padSlice.size && padSlice.size != 0)
StringSource::Slice pad_slice = padding.getWhole();
while (left > pad_slice.size && pad_slice.size != 0)
{
writeSlice(padSlice, sink);
left -= padSlice.size;
writeSlice(pad_slice, sink);
left -= pad_slice.size;
}

writeSlice(padding.getSliceFromLeft(0, left), sink);
Expand All @@ -396,11 +397,11 @@ void NO_INLINE pad(SourceA && src, SourceB && padding, Sink && sink, ssize_t len
else
{
writeSlice(slice, sink);
StringSource::Slice padSlice = padding.getWhole();
while (left > padSlice.size && padSlice.size != 0)
StringSource::Slice pad_slice = padding.getWhole();
while (left > pad_slice.size && pad_slice.size != 0)
{
writeSlice(padSlice, sink);
left -= padSlice.size;
writeSlice(pad_slice, sink);
left -= pad_slice.size;
}

writeSlice(padding.getSliceFromLeft(0, left), sink);
Expand Down
Loading

0 comments on commit cb546d8

Please sign in to comment.