Skip to content

Commit

Permalink
feat: pushdown get_format into TiFlash (#5269)
Browse files Browse the repository at this point in the history
close #5115
  • Loading branch information
wirybeaver authored Jul 11, 2022
1 parent 649919d commit 707fc6d
Show file tree
Hide file tree
Showing 5 changed files with 329 additions and 1 deletion.
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ const std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
//{tipb::ScalarFuncSig::YearWeekWithMode, "cast"},
//{tipb::ScalarFuncSig::YearWeekWithoutMode, "cast"},

//{tipb::ScalarFuncSig::GetFormat, "cast"},
{tipb::ScalarFuncSig::GetFormat, "getFormat"},
{tipb::ScalarFuncSig::SysDateWithFsp, "sysDateWithFsp"},
{tipb::ScalarFuncSig::SysDateWithoutFsp, "sysDateWithoutFsp"},
//{tipb::ScalarFuncSig::CurrentDate, "cast"},
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Functions/FunctionsConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ void registerFunctionsConversion(FunctionFactory & factory)

factory.registerFunction<FunctionFromUnixTime>();
factory.registerFunction<FunctionDateFormat>();
factory.registerFunction<FunctionGetFormat>();
factory.registerFunction<FunctionTiDBUnixTimeStamp<NameTiDBUnixTimeStampInt>>();
factory.registerFunction<FunctionTiDBUnixTimeStamp<NameTiDBUnixTimeStampDec>>();
factory.registerFunction<FunctionStrToDate<NameStrToDateDate>>();
Expand Down
114 changes: 114 additions & 0 deletions dbms/src/Functions/FunctionsConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -1751,6 +1751,120 @@ class FunctionDateFormat : public IFunction
}
};

class FunctionGetFormat : public IFunction
{
private:
static String get_format(const StringRef & time_type, const StringRef & location)
{
if (time_type == "DATE")
{
if (location == "USA")
return "%m.%d.%Y";
else if (location == "JIS")
return "%Y-%m-%d";
else if (location == "ISO")
return "%Y-%m-%d";
else if (location == "EUR")
return "%d.%m.%Y";
else if (location == "INTERNAL")
return "%Y%m%d";
}
else if (time_type == "DATETIME" || time_type == "TIMESTAMP")
{
if (location == "USA")
return "%Y-%m-%d %H.%i.%s";
else if (location == "JIS")
return "%Y-%m-%d %H:%i:%s";
else if (location == "ISO")
return "%Y-%m-%d %H:%i:%s";
else if (location == "EUR")
return "%Y-%m-%d %H.%i.%s";
else if (location == "INTERNAL")
return "%Y%m%d%H%i%s";
}
else if (time_type == "TIME")
{
if (location == "USA")
return "%h:%i:%s %p";
else if (location == "JIS")
return "%H:%i:%s";
else if (location == "ISO")
return "%H:%i:%s";
else if (location == "EUR")
return "%H.%i.%s";
else if (location == "INTERNAL")
return "%H%i%s";
}
return "";
}

public:
static constexpr auto name = "getFormat";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionGetFormat>(); };

String getName() const override { return name; }

size_t getNumberOfArguments() const override { return 2; }

DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!arguments[0].type->isString())
throw Exception("First argument for function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN);
if (!arguments[1].type->isString())
throw Exception("Second argument for function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN);

return std::make_shared<DataTypeString>();
}

bool useDefaultImplementationForConstants() const override { return true; }

/**
* @brief The first argument is designed as a MySQL reserved word. You would encounter a syntax error when wrap it around with quote in SQL.
* For example, select GET_FORMAT("DATE", "USA") will fail. Removing the quote can solve the problem.
* Thus the first argument should always be a ColumnConst. See details in the link below:
* https://dev.mysql.com/doc/refman/5.7/en/date-and-time-functions.html#function_get-format
*
* @return ColumnNumbers
*/
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }

void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) const override
{
const auto * location_col = checkAndGetColumn<ColumnString>(block.getByPosition(arguments[1]).column.get());
assert(location_col);
size_t size = location_col->size();
const auto & time_type_col = block.getByPosition(arguments[0]).column;
auto col_to = ColumnString::create();

if (time_type_col->isColumnConst())
{
const auto & time_type_col_const = checkAndGetColumnConst<ColumnString>(time_type_col.get());
const auto & time_type = time_type_col_const->getValue<String>();

ColumnString::Chars_t & data_to = col_to->getChars();
ColumnString::Offsets & offsets_to = col_to->getOffsets();
auto max_length = 18;
data_to.resize(size * max_length);
offsets_to.resize(size);
WriteBufferFromVector<ColumnString::Chars_t> write_buffer(data_to);
for (size_t i = 0; i < size; ++i)
{
const auto & location = location_col->getDataAt(i);
const auto & result = get_format(StringRef(time_type), location);
write_buffer.write(result.c_str(), result.size());
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
}
data_to.resize(write_buffer.count());
block.getByPosition(result).column = std::move(col_to);
}
else
{
throw Exception("First argument for function " + getName() + " must be String constant", ErrorCodes::ILLEGAL_COLUMN);
}
}
};

struct NameStrToDateDate
{
static constexpr auto name = "strToDateDate";
Expand Down
153 changes: 153 additions & 0 deletions dbms/src/Functions/tests/gtest_get_format.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/Context.h>
#include <TestUtils/FunctionTestUtils.h>
#include <TestUtils/TiFlashTestBasic.h>

#include <string>
#include <vector>


#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsign-compare"
#include <Poco/Types.h>

#pragma GCC diagnostic pop

namespace DB::tests
{
class GetFormatTest : public DB::tests::FunctionTest
{
public:
static constexpr auto funcName = "getFormat";
};

TEST_F(GetFormatTest, testBoundary)
try
{
// const(non-null), vector
// time_type is a const with non null value
// location is a vector containing null
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"%m.%d.%Y", {}}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(2, "DATE"),
createColumn<Nullable<String>>({"USA", {}})));

// const(null), vector
// time_type is a const with null value
// location is a vector containing null
ASSERT_COLUMN_EQ(
createConstColumn<Nullable<String>>(2, {}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(2, {}),
createColumn<Nullable<String>>({"USA", {}})));

// const(non-null), const(non-null)
// time_type is a const with non null value
// location is a const with non null value
ASSERT_COLUMN_EQ(
createConstColumn<String>(2, "%m.%d.%Y"),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(2, "DATE"),
createConstColumn<Nullable<String>>(2, "USA")));

// const(non-null), const(null)
// time_type is a const with non null value
// location is a const with null value
ASSERT_COLUMN_EQ(
createConstColumn<Nullable<String>>(2, {}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(2, "DATE"),
createConstColumn<Nullable<String>>(2, {})));

// The time_type is a system pre_defined macro, thus assume time_type column is const
// Throw an exception is time_type is not ColumnConst
ASSERT_THROW(
executeFunction(
funcName,
createColumn<Nullable<String>>({"DATE", "TIME"}),
createColumn<Nullable<String>>({"USA", {}})),
DB::Exception);
}
CATCH

TEST_F(GetFormatTest, testMoreCases)
try
{
// time_type: DATE
// all locations
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"%m.%d.%Y", "%Y-%m-%d", "%Y-%m-%d", "%d.%m.%Y", "%Y%m%d"}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(5, "DATE"),
createColumn<Nullable<String>>({"USA", "JIS", "ISO", "EUR", "INTERNAL"})));

// time_type: DATETIME
// all locations
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"%Y-%m-%d %H.%i.%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H.%i.%s", "%Y%m%d%H%i%s"}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(5, "DATETIME"),
createColumn<Nullable<String>>({"USA", "JIS", "ISO", "EUR", "INTERNAL"})));

// time_type: TIMESTAMP
// all locations
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"%Y-%m-%d %H.%i.%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H.%i.%s", "%Y%m%d%H%i%s"}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(5, "TIMESTAMP"),
createColumn<Nullable<String>>({"USA", "JIS", "ISO", "EUR", "INTERNAL"})));

// time_type: TIME
// all locations
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"%h:%i:%s %p", "%H:%i:%s", "%H:%i:%s", "%H.%i.%s", "%H%i%s"}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(5, "TIME"),
createColumn<Nullable<String>>({"USA", "JIS", "ISO", "EUR", "INTERNAL"})));

// the location is not in ("USA", "JIS", "ISO", "EUR", "INTERNAL")
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"", ""}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(2, "TIME"),
createColumn<Nullable<String>>({"CAN", ""})));

// the time_type is not in ("DATE", "DATETIME", "TIMESTAMP", "TIME")
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"", ""}),
executeFunction(
funcName,
createConstColumn<Nullable<String>>(2, "TIMEINUTC"),
createColumn<Nullable<String>>({"USA", "ISO"})));
}
CATCH

} // namespace DB::tests
60 changes: 60 additions & 0 deletions tests/fullstack-test/expr/get_format.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2022 PingCAP, Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mysql> drop table if exists test.t;
mysql> create table test.t(location varchar(10));
mysql> insert into test.t values('USA'), ('JIS'), ('ISO'), ('EUR'), ('INTERNAL');
mysql> alter table test.t set tiflash replica 1;
func> wait_table test t
mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(DATE, location) from test.t;
+----------------------------+
| GET_FORMAT(DATE, location) |
+----------------------------+
| %m.%d.%Y |
| %Y-%m-%d |
| %Y-%m-%d |
| %d.%m.%Y |
| %Y%m%d |
+----------------------------+
mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(DATETIME, location) from test.t;
+--------------------------------+
| GET_FORMAT(DATETIME, location) |
+--------------------------------+
| %Y-%m-%d %H.%i.%s |
| %Y-%m-%d %H:%i:%s |
| %Y-%m-%d %H:%i:%s |
| %Y-%m-%d %H.%i.%s |
| %Y%m%d%H%i%s |
+--------------------------------+
mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(TIMESTAMP, location) from test.t;
+---------------------------------+
| GET_FORMAT(TIMESTAMP, location) |
+---------------------------------+
| %Y-%m-%d %H.%i.%s |
| %Y-%m-%d %H:%i:%s |
| %Y-%m-%d %H:%i:%s |
| %Y-%m-%d %H.%i.%s |
| %Y%m%d%H%i%s |
+---------------------------------+
mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(TIME, location) from test.t;
+----------------------------+
| GET_FORMAT(TIME, location) |
+----------------------------+
| %h:%i:%s %p |
| %H:%i:%s |
| %H:%i:%s |
| %H.%i.%s |
| %H%i%s |
+----------------------------+
mysql> drop table if exists test.t;

0 comments on commit 707fc6d

Please sign in to comment.