From aa92f4edb3af8e94c57baf6e934c0b0543183a4e Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 30 Jul 2019 09:32:23 +0800 Subject: [PATCH 01/79] basic framework for coprocessor support in tiflash --- .gitmodules | 4 + CMakeLists.txt | 1 + cmake/find_tipb.cmake | 10 + contrib/CMakeLists.txt | 1 + contrib/tipb | 1 + dbms/CMakeLists.txt | 2 + dbms/src/Coprocessor/CoprocessorHandler.cpp | 61 +++ dbms/src/Coprocessor/CoprocessorHandler.h | 43 ++ dbms/src/Core/Defines.h | 2 + .../DataStreams/TidbCopBlockOutputStream.cpp | 74 ++++ .../DataStreams/TidbCopBlockOutputStream.h | 43 ++ dbms/src/Interpreters/ClientInfo.h | 1 + .../Interpreters/CoprocessorBuilderUtils.cpp | 403 ++++++++++++++++++ .../Interpreters/CoprocessorBuilderUtils.h | 10 + .../Interpreters/InterpreterDagRequestV1.cpp | 217 ++++++++++ .../Interpreters/InterpreterDagRequestV1.h | 45 ++ .../Interpreters/InterpreterDagRequestV2.cpp | 160 +++++++ .../Interpreters/InterpreterDagRequestV2.h | 51 +++ dbms/src/Interpreters/Settings.h | 2 + dbms/src/Server/CMakeLists.txt | 7 +- dbms/src/Server/FlashService.cpp | 118 +++++ dbms/src/Server/FlashService.h | 39 ++ dbms/src/Server/Server.cpp | 15 + dbms/src/Server/cop_test.cpp | 125 ++++++ dbms/src/Storages/Transaction/RegionTable.cpp | 16 + dbms/src/Storages/Transaction/RegionTable.h | 1 + dbms/src/Storages/Transaction/TypeMapping.cpp | 17 +- dbms/src/Storages/Transaction/TypeMapping.h | 2 + 28 files changed, 1469 insertions(+), 2 deletions(-) create mode 100644 cmake/find_tipb.cmake create mode 160000 contrib/tipb create mode 100644 dbms/src/Coprocessor/CoprocessorHandler.cpp create mode 100644 dbms/src/Coprocessor/CoprocessorHandler.h create mode 100644 dbms/src/DataStreams/TidbCopBlockOutputStream.cpp create mode 100644 dbms/src/DataStreams/TidbCopBlockOutputStream.h create mode 100644 dbms/src/Interpreters/CoprocessorBuilderUtils.cpp create mode 100644 dbms/src/Interpreters/CoprocessorBuilderUtils.h create mode 100644 dbms/src/Interpreters/InterpreterDagRequestV1.cpp create mode 100644 dbms/src/Interpreters/InterpreterDagRequestV1.h create mode 100644 dbms/src/Interpreters/InterpreterDagRequestV2.cpp create mode 100644 dbms/src/Interpreters/InterpreterDagRequestV2.h create mode 100644 dbms/src/Server/FlashService.cpp create mode 100644 dbms/src/Server/FlashService.h create mode 100644 dbms/src/Server/cop_test.cpp diff --git a/.gitmodules b/.gitmodules index 073d97f217a..505fe3ed19a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -37,3 +37,7 @@ [submodule "contrib/kvproto"] path = contrib/kvproto url = https://github.com/pingcap/kvproto.git +[submodule "contrib/tipb"] + path = contrib/tipb + url = https://github.com/pingcap/tipb.git + branch = tipb_cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5628975d5d6..c4be8c63368 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -266,6 +266,7 @@ include (cmake/find_llvm.cmake) include (cmake/find_grpc.cmake) include (cmake/find_kvproto.cmake) include (cmake/find_curl.cmake) +include (cmake/find_tipb.cmake) include (cmake/find_contrib_lib.cmake) diff --git a/cmake/find_tipb.cmake b/cmake/find_tipb.cmake new file mode 100644 index 00000000000..7e8bfff5092 --- /dev/null +++ b/cmake/find_tipb.cmake @@ -0,0 +1,10 @@ + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/tipb/cpp/tipb/select.pb.h") + if (EXISTS "${ClickHouse_SOURCE_DIR}/contrib/tipb/proto/select.proto") + message (FATAL_ERROR "tipb cpp files in contrib/tipb is missing. Try go to contrib/tipb, and run ./generate_cpp.sh") + else() + message (FATAL_ERROR "tipb submodule in contrib/tipb is missing. Try run 'git submodule update --init --recursive', and go to contrib/tipb, and run ./generate_cpp.sh") + endif() +endif () + +message(STATUS "Using tipb: ${ClickHouse_SOURCE_DIR}/contrib/tipb/cpp") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 6b87cce2701..13c2f861b5b 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory (kvproto/cpp) add_subdirectory (client-c) +add_subdirectory (tipb/cpp) if (NOT MSVC) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast") diff --git a/contrib/tipb b/contrib/tipb new file mode 160000 index 00000000000..961b01c984e --- /dev/null +++ b/contrib/tipb @@ -0,0 +1 @@ +Subproject commit 961b01c984ebbdc9723ee4dc928b6102d0ee2b88 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index d8abac2fac7..d2f4f62f4b2 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -60,6 +60,7 @@ add_headers_and_sources(dbms src/Storages/Page) add_headers_and_sources(dbms src/Raft) add_headers_and_sources(dbms src/TiDB) add_headers_and_sources(dbms src/Client) +add_headers_and_sources(dbms src/Coprocessor) add_headers_only(dbms src/Server) list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) @@ -151,6 +152,7 @@ target_link_libraries (dbms clickhouse_common_io kvproto kv_client + tipb ${Protobuf_LIBRARIES} gRPC::grpc++_unsecure ${CURL_LIBRARIES} diff --git a/dbms/src/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Coprocessor/CoprocessorHandler.cpp new file mode 100644 index 00000000000..ce849385a86 --- /dev/null +++ b/dbms/src/Coprocessor/CoprocessorHandler.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +CoprocessorHandler::CoprocessorHandler(const coprocessor::Request * cop_request_, coprocessor::Response * cop_response_, CoprocessorContext & context_) + : cop_request(cop_request_), cop_response(cop_response_), context(context_) +{ + if(!dag_request.ParseFromString(cop_request->data())) { + throw Exception("Could not extract dag request from coprocessor request"); + } +} + +CoprocessorHandler::~CoprocessorHandler() +{ +} + +BlockIO CoprocessorHandler::buildCHPlan() { + String builder_version = context.ch_context.getSettings().coprocessor_plan_builder_version; + if(builder_version == "v1") { + InterpreterDagRequestV1 builder(context, dag_request); + return builder.execute(); + } else if (builder_version == "v2"){ + //throw Exception("coprocessor plan builder version v2 is not supported yet"); + InterpreterDagRequestV2 builder(context, dag_request); + return builder.execute(); + } else { + throw Exception("coprocessor plan builder version should be set to v1 or v2"); + } +} + +bool CoprocessorHandler::execute() { + context.ch_context.setSetting("read_tso", UInt64(dag_request.start_ts())); + //todo set region related info + BlockIO streams = buildCHPlan(); + if(!streams.in || streams.out) { + // only query is allowed, so streams.in must not be null and streams.out must be null + return false; + } + tipb::SelectResponse select_response; + BlockOutputStreamPtr outputStreamPtr = std::make_shared( + &select_response, context.ch_context.getSettings().records_per_chunk, dag_request.encode_type(), streams.in->getHeader() + ); + copyData(*streams.in, *outputStreamPtr); + cop_response->set_data(select_response.SerializeAsString()); + return true; +} + +} + diff --git a/dbms/src/Coprocessor/CoprocessorHandler.h b/dbms/src/Coprocessor/CoprocessorHandler.h new file mode 100644 index 00000000000..841adff4276 --- /dev/null +++ b/dbms/src/Coprocessor/CoprocessorHandler.h @@ -0,0 +1,43 @@ +#pragma once + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#include +#pragma GCC diagnostic pop + +#include +#include + +namespace DB { + +struct CoprocessorContext { + Context & ch_context; + const kvrpcpb::Context & kv_context; + grpc::ServerContext & grpc_server_context; + CoprocessorContext(Context & ch_context_, const kvrpcpb::Context & kv_context_, + grpc::ServerContext & grpc_server_context_) + : ch_context(ch_context_), kv_context(kv_context_), grpc_server_context(grpc_server_context_) { + } +}; + +/** handle coprocesssor request, this is used by tiflash coprocessor. + */ +class CoprocessorHandler { +public: + CoprocessorHandler(const coprocessor::Request *cop_request, coprocessor::Response *response, CoprocessorContext &context); + + ~CoprocessorHandler(); + + bool execute(); + +private: + String buildSqlString(); + BlockIO buildCHPlan(); + const coprocessor::Request *cop_request; + coprocessor::Response *cop_response; + CoprocessorContext &context; + tipb::DAGRequest dag_request; + +}; +} diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 85d6da06994..aa01d84aaad 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -28,6 +28,8 @@ #define DEFAULT_MAX_READ_TSO 0xFFFFFFFFFFFFFFFF +#define DEFAULT_RECORDS_PER_CHUNK 64L + /** Which blocks by default read the data (by number of rows). * Smaller values give better cache locality, less consumption of RAM, but more overhead to process the query. */ diff --git a/dbms/src/DataStreams/TidbCopBlockOutputStream.cpp b/dbms/src/DataStreams/TidbCopBlockOutputStream.cpp new file mode 100644 index 00000000000..5993d0d4443 --- /dev/null +++ b/dbms/src/DataStreams/TidbCopBlockOutputStream.cpp @@ -0,0 +1,74 @@ + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes { + extern const int UNSUPPORTED_PARAMETER; +} + +struct TypeMapping; + +TidbCopBlockOutputStream::TidbCopBlockOutputStream( + tipb::SelectResponse *response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, Block header_) + : response(response_), records_per_chunk(records_per_chunk_), encodeType(encodeType_), header(header_) +{ + if(encodeType == tipb::EncodeType::TypeArrow) { + throw Exception("Encode type TypeArrow is not supported yet in TidbCopBlockOutputStream.", ErrorCodes::UNSUPPORTED_PARAMETER); + } + current_chunk = nullptr; + current_records_num = 0; + total_rows = 0; +} + + +void TidbCopBlockOutputStream::writePrefix() +{ + //something to do here? +} + +void TidbCopBlockOutputStream::writeSuffix() +{ + // error handle, + if(current_chunk != nullptr && records_per_chunk > 0) { + current_chunk->set_rows_data(current_ss.str()); + } +} + + +void TidbCopBlockOutputStream::write(const Block & block) +{ + // encode data to chunk + size_t rows = block.rows(); + for(size_t i = 0; i < rows; i++) { + if(current_chunk == nullptr || current_records_num >= records_per_chunk) { + if(current_chunk) { + // set the current ss to current chunk + current_chunk->set_rows_data(current_ss.str()); + } + current_chunk = response->add_chunks(); + current_ss.str(""); + records_per_chunk = 0; + } + for(size_t j = 0; j < block.columns(); j++) { + auto field = (*block.getByPosition(j).column.get())[i]; + const DataTypePtr & dataTypePtr = block.getByPosition(j).type; + if(dataTypePtr->isNullable()) { + const DataTypePtr real = dynamic_cast(dataTypePtr.get())->getNestedType(); + EncodeDatum(field, getCodecFlagByDataType(real), current_ss); + } else { + EncodeDatum(field, getCodecFlagByDataType(block.getByPosition(j).type), current_ss); + } + } + //encode current row + records_per_chunk++; + total_rows++; + } +} + +} diff --git a/dbms/src/DataStreams/TidbCopBlockOutputStream.h b/dbms/src/DataStreams/TidbCopBlockOutputStream.h new file mode 100644 index 00000000000..731cba83d89 --- /dev/null +++ b/dbms/src/DataStreams/TidbCopBlockOutputStream.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include + +#pragma GCC diagnostic pop + +namespace DB +{ + + + +/** Serializes the stream of blocks in tidb coprocessor format. + * Designed for communication with tidb via coprocessor. + */ +class TidbCopBlockOutputStream : public IBlockOutputStream +{ +public: + TidbCopBlockOutputStream( + tipb::SelectResponse *response, Int64 records_per_chunk, tipb::EncodeType encodeType, Block header); + + Block getHeader() const override { return header; } + void write(const Block & block) override; + void writePrefix() override; + void writeSuffix() override; + +private: + tipb::SelectResponse *response; + Int64 records_per_chunk; + tipb::EncodeType encodeType; + Block header; + tipb::Chunk *current_chunk; + Int64 current_records_num; + std::stringstream current_ss; + Int64 total_rows; + +}; + +} diff --git a/dbms/src/Interpreters/ClientInfo.h b/dbms/src/Interpreters/ClientInfo.h index 58a6c250b55..f890c13851b 100644 --- a/dbms/src/Interpreters/ClientInfo.h +++ b/dbms/src/Interpreters/ClientInfo.h @@ -24,6 +24,7 @@ class ClientInfo { TCP = 1, HTTP = 2, + GRPC = 2, }; enum class HTTPMethod : UInt8 diff --git a/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp b/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp new file mode 100644 index 00000000000..d8058b5c1bc --- /dev/null +++ b/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp @@ -0,0 +1,403 @@ + +#include +#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop +#include + +namespace DB { + + std::unordered_map aggFunMap( + { + {tipb::ExprType::Count, "count"}, + {tipb::ExprType::Sum, "sum"}, + {tipb::ExprType::Avg, "avg"}, + {tipb::ExprType::Min, "min"}, + {tipb::ExprType::Max, "max"}, + {tipb::ExprType::First, "any"}, + //{tipb::ExprType::GroupConcat, ""}, + //{tipb::ExprType::Agg_BitAnd, ""}, + //{tipb::ExprType::Agg_BitOr, ""}, + //{tipb::ExprType::Agg_BitXor, ""}, + //{tipb::ExprType::Std, ""}, + //{tipb::ExprType::Stddev, ""}, + //{tipb::ExprType::StddevPop, ""}, + //{tipb::ExprType::StddevSamp, ""}, + //{tipb::ExprType::VarPop, ""}, + //{tipb::ExprType::VarSamp, ""}, + //{tipb::ExprType::Variance, ""}, + //{tipb::ExprType::JsonArrayAgg, ""}, + //{tipb::ExprType::JsonObjectAgg, ""}, + } + ); + + std::unordered_map scalarFunMap( + { + {tipb::ScalarFuncSig::CastIntAsInt, "cast"}, + {tipb::ScalarFuncSig::CastIntAsReal, "cast"}, + {tipb::ScalarFuncSig::CastIntAsString, "cast"}, + {tipb::ScalarFuncSig::CastIntAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastIntAsTime, "cast"}, + {tipb::ScalarFuncSig::CastIntAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastIntAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastRealAsInt, "cast"}, + {tipb::ScalarFuncSig::CastRealAsReal, "cast"}, + {tipb::ScalarFuncSig::CastRealAsString, "cast"}, + {tipb::ScalarFuncSig::CastRealAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastRealAsTime, "cast"}, + {tipb::ScalarFuncSig::CastRealAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastRealAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastDecimalAsInt, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsReal, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsString, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsTime, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastStringAsInt, "cast"}, + {tipb::ScalarFuncSig::CastStringAsReal, "cast"}, + {tipb::ScalarFuncSig::CastStringAsString, "cast"}, + {tipb::ScalarFuncSig::CastStringAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastStringAsTime, "cast"}, + {tipb::ScalarFuncSig::CastStringAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastStringAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastTimeAsInt, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsReal, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsString, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsTime, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastDurationAsInt, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsReal, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsString, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsTime, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastJsonAsInt, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsReal, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsString, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsTime, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsJson, "cast"}, + + {tipb::ScalarFuncSig::CoalesceInt, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceReal, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceString, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceDecimal, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceTime, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceDuration, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceJson, "coalesce"}, + + {tipb::ScalarFuncSig::LTInt, "less"}, + {tipb::ScalarFuncSig::LTReal, "less"}, + {tipb::ScalarFuncSig::LTString, "less"}, + {tipb::ScalarFuncSig::LTDecimal, "less"}, + {tipb::ScalarFuncSig::LTTime, "less"}, + {tipb::ScalarFuncSig::LTDuration, "less"}, + {tipb::ScalarFuncSig::LTJson, "less"}, + + {tipb::ScalarFuncSig::LEInt, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEReal, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEString, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEDecimal, "lessOrEquals"}, + {tipb::ScalarFuncSig::LETime, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEDuration, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEJson, "lessOrEquals"}, + + {tipb::ScalarFuncSig::GTInt, "greater"}, + {tipb::ScalarFuncSig::GTReal, "greater"}, + {tipb::ScalarFuncSig::GTString, "greater"}, + {tipb::ScalarFuncSig::GTDecimal, "greater"}, + {tipb::ScalarFuncSig::GTTime, "greater"}, + {tipb::ScalarFuncSig::GTDuration, "greater"}, + {tipb::ScalarFuncSig::GTJson, "greater"}, + + {tipb::ScalarFuncSig::GreatestInt, "greatest"}, + {tipb::ScalarFuncSig::GreatestReal, "greatest"}, + {tipb::ScalarFuncSig::GreatestString, "greatest"}, + {tipb::ScalarFuncSig::GreatestDecimal, "greatest"}, + {tipb::ScalarFuncSig::GreatestTime, "greatest"}, + + {tipb::ScalarFuncSig::LeastInt, "least"}, + {tipb::ScalarFuncSig::LeastReal, "least"}, + {tipb::ScalarFuncSig::LeastString, "least"}, + {tipb::ScalarFuncSig::LeastDecimal, "least"}, + {tipb::ScalarFuncSig::LeastTime, "least"}, + + //{tipb::ScalarFuncSig::IntervalInt, "cast"}, + //{tipb::ScalarFuncSig::IntervalReal, "cast"}, + + {tipb::ScalarFuncSig::GEInt, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEReal, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEString, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEDecimal, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GETime, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEDuration, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEJson, "greaterOrEquals"}, + + {tipb::ScalarFuncSig::EQInt, "equals"}, + {tipb::ScalarFuncSig::EQReal, "equals"}, + {tipb::ScalarFuncSig::EQString, "equals"}, + {tipb::ScalarFuncSig::EQDecimal, "equals"}, + {tipb::ScalarFuncSig::EQTime, "equals"}, + {tipb::ScalarFuncSig::EQDuration, "equals"}, + {tipb::ScalarFuncSig::EQJson, "equals"}, + + {tipb::ScalarFuncSig::NEInt, "notEquals"}, + {tipb::ScalarFuncSig::NEReal, "notEquals"}, + {tipb::ScalarFuncSig::NEString, "notEquals"}, + {tipb::ScalarFuncSig::NEDecimal, "notEquals"}, + {tipb::ScalarFuncSig::NETime, "notEquals"}, + {tipb::ScalarFuncSig::NEDuration, "notEquals"}, + {tipb::ScalarFuncSig::NEJson, "notEquals"}, + + //{tipb::ScalarFuncSig::NullEQInt, "cast"}, + //{tipb::ScalarFuncSig::NullEQReal, "cast"}, + //{tipb::ScalarFuncSig::NullEQString, "cast"}, + //{tipb::ScalarFuncSig::NullEQDecimal, "cast"}, + //{tipb::ScalarFuncSig::NullEQTime, "cast"}, + //{tipb::ScalarFuncSig::NullEQDuration, "cast"}, + //{tipb::ScalarFuncSig::NullEQJson, "cast"}, + + {tipb::ScalarFuncSig::PlusReal, "plus"}, + {tipb::ScalarFuncSig::PlusDecimal, "plus"}, + {tipb::ScalarFuncSig::PlusInt, "plus"}, + + {tipb::ScalarFuncSig::MinusReal, "minus"}, + {tipb::ScalarFuncSig::MinusDecimal, "minus"}, + {tipb::ScalarFuncSig::MinusInt, "minus"}, + + {tipb::ScalarFuncSig::MultiplyReal, "multiply"}, + {tipb::ScalarFuncSig::MultiplyDecimal, "multiply"}, + {tipb::ScalarFuncSig::MultiplyInt, "multiply"}, + + {tipb::ScalarFuncSig::DivideReal, "divide"}, + {tipb::ScalarFuncSig::DivideDecimal, "divide"}, + {tipb::ScalarFuncSig::IntDivideInt, "intDiv"}, + {tipb::ScalarFuncSig::IntDivideDecimal, "divide"}, + + {tipb::ScalarFuncSig::ModReal, "modulo"}, + {tipb::ScalarFuncSig::ModDecimal, "modulo"}, + {tipb::ScalarFuncSig::ModInt, "modulo"}, + + {tipb::ScalarFuncSig::MultiplyIntUnsigned, "multiply"}, + + {tipb::ScalarFuncSig::AbsInt, "abs"}, + {tipb::ScalarFuncSig::AbsUInt, "abs"}, + {tipb::ScalarFuncSig::AbsReal, "abs"}, + {tipb::ScalarFuncSig::AbsDecimal, "abs"}, + + {tipb::ScalarFuncSig::CeilIntToDec, "ceil"}, + {tipb::ScalarFuncSig::CeilIntToInt, "ceil"}, + {tipb::ScalarFuncSig::CeilDecToInt, "ceil"}, + {tipb::ScalarFuncSig::CeilDecToDec, "ceil"}, + {tipb::ScalarFuncSig::CeilReal, "ceil"}, + + {tipb::ScalarFuncSig::FloorIntToDec, "floor"}, + {tipb::ScalarFuncSig::FloorIntToInt, "floor"}, + {tipb::ScalarFuncSig::FloorDecToInt, "floor"}, + {tipb::ScalarFuncSig::FloorDecToDec, "floor"}, + {tipb::ScalarFuncSig::FloorReal, "floor"}, + + {tipb::ScalarFuncSig::RoundReal, "round"}, + {tipb::ScalarFuncSig::RoundInt, "round"}, + {tipb::ScalarFuncSig::RoundDec, "round"}, + //{tipb::ScalarFuncSig::RoundWithFracReal, "cast"}, + //{tipb::ScalarFuncSig::RoundWithFracInt, "cast"}, + //{tipb::ScalarFuncSig::RoundWithFracDec, "cast"}, + + {tipb::ScalarFuncSig::Log1Arg, "log"}, + //{tipb::ScalarFuncSig::Log2Args, "cast"}, + {tipb::ScalarFuncSig::Log2, "log2"}, + {tipb::ScalarFuncSig::Log10, "log10"}, + + {tipb::ScalarFuncSig::Rand, "rand"}, + //{tipb::ScalarFuncSig::RandWithSeed, "cast"}, + + {tipb::ScalarFuncSig::Pow, "pow"}, + //{tipb::ScalarFuncSig::Conv, "cast"}, + //{tipb::ScalarFuncSig::CRC32, "cast"}, + //{tipb::ScalarFuncSig::Sign, "cast"}, + + {tipb::ScalarFuncSig::Sqrt, "sqrt"}, + {tipb::ScalarFuncSig::Acos, "acos"}, + {tipb::ScalarFuncSig::Asin, "asin"}, + {tipb::ScalarFuncSig::Atan1Arg, "atan"}, + //{tipb::ScalarFuncSig::Atan2Args, "cast"}, + {tipb::ScalarFuncSig::Cos, "cos"}, + //{tipb::ScalarFuncSig::Cot, "cast"}, + //{tipb::ScalarFuncSig::Degrees, "cast"}, + {tipb::ScalarFuncSig::Exp, "exp"}, + //{tipb::ScalarFuncSig::PI, "cast"}, + //{tipb::ScalarFuncSig::Radians, "cast"}, + {tipb::ScalarFuncSig::Sin, "sin"}, + {tipb::ScalarFuncSig::Tan, "tan"}, + {tipb::ScalarFuncSig::TruncateInt, "trunc"}, + {tipb::ScalarFuncSig::TruncateReal, "trunc"}, + //{tipb::ScalarFuncSig::TruncateDecimal, "cast"}, + + {tipb::ScalarFuncSig::LogicalAnd, "and"}, + {tipb::ScalarFuncSig::LogicalOr, "or"}, + {tipb::ScalarFuncSig::LogicalXor, "xor"}, + {tipb::ScalarFuncSig::UnaryNot, "not"}, + {tipb::ScalarFuncSig::UnaryMinusInt, "negate"}, + {tipb::ScalarFuncSig::UnaryMinusReal, "negate"}, + {tipb::ScalarFuncSig::UnaryMinusDecimal, "negate"}, + {tipb::ScalarFuncSig::DecimalIsNull, "isNull"}, + {tipb::ScalarFuncSig::DurationIsNull, "isNull"}, + {tipb::ScalarFuncSig::RealIsNull, "isNull"}, + {tipb::ScalarFuncSig::StringIsNull, "isNull"}, + {tipb::ScalarFuncSig::TimeIsNull, "isNull"}, + {tipb::ScalarFuncSig::IntIsNull, "isNull"}, + {tipb::ScalarFuncSig::JsonIsNull, "isNull"}, + + //{tipb::ScalarFuncSig::BitAndSig, "cast"}, + //{tipb::ScalarFuncSig::BitOrSig, "cast"}, + //{tipb::ScalarFuncSig::BitXorSig, "cast"}, + //{tipb::ScalarFuncSig::BitNegSig, "cast"}, + //{tipb::ScalarFuncSig::IntIsTrue, "cast"}, + //{tipb::ScalarFuncSig::RealIsTrue, "cast"}, + //{tipb::ScalarFuncSig::DecimalIsTrue, "cast"}, + //{tipb::ScalarFuncSig::IntIsFalse, "cast"}, + //{tipb::ScalarFuncSig::RealIsFalse, "cast"}, + //{tipb::ScalarFuncSig::DecimalIsFalse, "cast"}, + + //{tipb::ScalarFuncSig::LeftShift, "cast"}, + //{tipb::ScalarFuncSig::RightShift, "cast"}, + + //{tipb::ScalarFuncSig::BitCount, "cast"}, + //{tipb::ScalarFuncSig::GetParamString, "cast"}, + //{tipb::ScalarFuncSig::GetVar, "cast"}, + //{tipb::ScalarFuncSig::RowSig, "cast"}, + //{tipb::ScalarFuncSig::SetVar, "cast"}, + //{tipb::ScalarFuncSig::ValuesDecimal, "cast"}, + //{tipb::ScalarFuncSig::ValuesDuration, "cast"}, + //{tipb::ScalarFuncSig::ValuesInt, "cast"}, + //{tipb::ScalarFuncSig::ValuesJSON, "cast"}, + //{tipb::ScalarFuncSig::ValuesReal, "cast"}, + //{tipb::ScalarFuncSig::ValuesString, "cast"}, + //{tipb::ScalarFuncSig::ValuesTime, "cast"}, + + {tipb::ScalarFuncSig::InInt, "in"}, + {tipb::ScalarFuncSig::InReal, "in"}, + {tipb::ScalarFuncSig::InString, "in"}, + {tipb::ScalarFuncSig::InDecimal, "in"}, + {tipb::ScalarFuncSig::InTime, "in"}, + {tipb::ScalarFuncSig::InDuration, "in"}, + {tipb::ScalarFuncSig::InJson, "in"}, + + {tipb::ScalarFuncSig::IfNullInt, "ifNull"}, + {tipb::ScalarFuncSig::IfNullReal, "ifNull"}, + {tipb::ScalarFuncSig::IfNullString, "ifNull"}, + {tipb::ScalarFuncSig::IfNullDecimal, "ifNull"}, + {tipb::ScalarFuncSig::IfNullTime, "ifNull"}, + {tipb::ScalarFuncSig::IfNullDuration, "ifNull"}, + {tipb::ScalarFuncSig::IfNullJson, "ifNull"}, + + {tipb::ScalarFuncSig::IfInt, "if"}, + {tipb::ScalarFuncSig::IfReal, "if"}, + {tipb::ScalarFuncSig::IfString, "if"}, + {tipb::ScalarFuncSig::IfDecimal, "if"}, + {tipb::ScalarFuncSig::IfTime, "if"}, + {tipb::ScalarFuncSig::IfDuration, "if"}, + {tipb::ScalarFuncSig::IfJson, "if"}, + + //todo need further check for caseWithExpression and multiIf + {tipb::ScalarFuncSig::CaseWhenInt, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenReal, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenString, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenDecimal, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenTime, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenDuration, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenJson, "caseWithExpression"}, + + //{tipb::ScalarFuncSig::AesDecrypt, "cast"}, + //{tipb::ScalarFuncSig::AesEncrypt, "cast"}, + //{tipb::ScalarFuncSig::Compress, "cast"}, + //{tipb::ScalarFuncSig::MD5, "cast"}, + //{tipb::ScalarFuncSig::Password, "cast"}, + //{tipb::ScalarFuncSig::RandomBytes, "cast"}, + //{tipb::ScalarFuncSig::SHA1, "cast"}, + //{tipb::ScalarFuncSig::SHA2, "cast"}, + //{tipb::ScalarFuncSig::Uncompress, "cast"}, + //{tipb::ScalarFuncSig::UncompressedLength, "cast"}, + + //{tipb::ScalarFuncSig::Database, "cast"}, + //{tipb::ScalarFuncSig::FoundRows, "cast"}, + //{tipb::ScalarFuncSig::CurrentUser, "cast"}, + //{tipb::ScalarFuncSig::User, "cast"}, + //{tipb::ScalarFuncSig::ConnectionID, "cast"}, + //{tipb::ScalarFuncSig::LastInsertID, "cast"}, + //{tipb::ScalarFuncSig::LastInsertIDWithID, "cast"}, + //{tipb::ScalarFuncSig::Version, "cast"}, + //{tipb::ScalarFuncSig::TiDBVersion, "cast"}, + //{tipb::ScalarFuncSig::RowCount, "cast"}, + + //{tipb::ScalarFuncSig::Sleep, "cast"}, + //{tipb::ScalarFuncSig::Lock, "cast"}, + //{tipb::ScalarFuncSig::ReleaseLock, "cast"}, + //{tipb::ScalarFuncSig::DecimalAnyValue, "cast"}, + //{tipb::ScalarFuncSig::DurationAnyValue, "cast"}, + //{tipb::ScalarFuncSig::IntAnyValue, "cast"}, + //{tipb::ScalarFuncSig::JSONAnyValue, "cast"}, + //{tipb::ScalarFuncSig::RealAnyValue, "cast"}, + //{tipb::ScalarFuncSig::StringAnyValue, "cast"}, + //{tipb::ScalarFuncSig::TimeAnyValue, "cast"}, + //{tipb::ScalarFuncSig::InetAton, "cast"}, + //{tipb::ScalarFuncSig::InetNtoa, "cast"}, + //{tipb::ScalarFuncSig::Inet6Aton, "cast"}, + //{tipb::ScalarFuncSig::Inet6Ntoa, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4Compat, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4Mapped, "cast"}, + //{tipb::ScalarFuncSig::IsIPv6, "cast"}, + //{tipb::ScalarFuncSig::UUID, "cast"}, + + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + } + ); +} diff --git a/dbms/src/Interpreters/CoprocessorBuilderUtils.h b/dbms/src/Interpreters/CoprocessorBuilderUtils.h new file mode 100644 index 00000000000..908a8638c77 --- /dev/null +++ b/dbms/src/Interpreters/CoprocessorBuilderUtils.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB { + + extern std::unordered_map aggFunMap; + extern std::unordered_map scalarFunMap; + +} diff --git a/dbms/src/Interpreters/InterpreterDagRequestV1.cpp b/dbms/src/Interpreters/InterpreterDagRequestV1.cpp new file mode 100644 index 00000000000..109dba6aaab --- /dev/null +++ b/dbms/src/Interpreters/InterpreterDagRequestV1.cpp @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB { + + bool InterpreterDagRequestV1::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) { + TableID id; + if(ts.has_table_id()) { + id = ts.table_id(); + } else { + // do not have table id + return false; + } + auto & tmt_ctx = context.ch_context.getTMTContext(); + auto storage = tmt_ctx.getStorages().get(id); + if(storage == nullptr) { + tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); + storage = tmt_ctx.getStorages().get(id); + } + if(storage == nullptr) { + return false; + } + const auto * merge_tree = dynamic_cast(storage.get()); + if (!merge_tree) { + return false; + } + + for(const tipb::ColumnInfo &ci : ts.columns()) { + ColumnID cid = ci.column_id(); + String name = merge_tree->getTableInfo().columns[cid-1].name; + column_name_from_ts.emplace(std::make_pair(cid, name)); + } + if(column_name_from_ts.empty()) { + // no column selected, must be something wrong + return false; + } + ss << "FROM " << merge_tree->getTableInfo().db_name << "." << merge_tree->getTableInfo().name << " "; + return true; + } + + String InterpreterDagRequestV1::exprToString(const tipb::Expr & expr, bool &succ) { + std::stringstream ss; + succ = true; + size_t cursor = 1; + Int64 columnId = 0; + String func_name; + Field f; + switch (expr.tp()) { + case tipb::ExprType::Null: + return "NULL"; + case tipb::ExprType::Int64: + return std::to_string(DecodeInt(cursor, expr.val())); + case tipb::ExprType::Uint64: + return std::to_string(DecodeInt(cursor, expr.val())); + case tipb::ExprType::Float32: + case tipb::ExprType::Float64: + return std::to_string(DecodeFloat64(cursor, expr.val())); + case tipb::ExprType::String: + // + return expr.val(); + case tipb::ExprType::Bytes: + return DecodeBytes(cursor, expr.val()); + case tipb::ExprType::ColumnRef: + columnId = DecodeInt(cursor, expr.val()); + if(getCurrentColumnNames().count(columnId) == 0) { + succ = false; + return ""; + } + return getCurrentColumnNames().find(columnId)->second; + case tipb::ExprType::Count: + case tipb::ExprType::Sum: + case tipb::ExprType::Avg: + case tipb::ExprType::Min: + case tipb::ExprType::Max: + case tipb::ExprType::First: + if(!aggFunMap.count(expr.tp())) { + succ = false; + return ""; + } + func_name = aggFunMap.find(expr.tp())->second; + break; + case tipb::ExprType::ScalarFunc: + if(!scalarFunMap.count(expr.sig())) { + succ = false; + return ""; + } + func_name = scalarFunMap.find(expr.sig())->second; + break; + default: + succ = false; + return ""; + } + // build function expr + if(func_name == "in") { + // for in, we could not represent the function expr using func_name(param1, param2, ...) + succ = false; + return ""; + } else { + ss << func_name << "("; + bool first = true; + bool sub_succ = true; + for(const tipb::Expr &child : expr.children()) { + String s = exprToString(child, sub_succ); + if(!sub_succ) { + succ = false; + return ""; + } + if(first) { + first = false; + } else { + ss << ", "; + } + ss << s; + } + ss << ") "; + return ss.str(); + } + } + + bool InterpreterDagRequestV1::buildSelString(const tipb::Selection & sel, std::stringstream & ss) { + bool first = true; + for(const tipb::Expr & expr : sel.conditions()) { + bool succ = true; + auto s = exprToString(expr, succ); + if(!succ) { + return false; + } + if(first) { + ss << "WHERE "; + first = false; + } else { + ss << "AND "; + } + ss << s << " "; + } + return true; + } + + bool InterpreterDagRequestV1::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) { + ss << "LIMIT " << limit.limit() << " "; + return true; + } + + //todo return the error message + bool InterpreterDagRequestV1::buildString(const tipb::Executor & executor, std::stringstream & ss) { + switch (executor.tp()) { + case tipb::ExecType::TypeTableScan: + return buildTSString(executor.tbl_scan(), ss); + case tipb::ExecType::TypeIndexScan: + // index scan not supported + return false; + case tipb::ExecType::TypeSelection: + return buildSelString(executor.selection(), ss); + case tipb::ExecType::TypeAggregation: + // stream agg is not supported, treated as normal agg + case tipb::ExecType::TypeStreamAgg: + //todo support agg + return false; + case tipb::ExecType::TypeTopN: + // todo support top n + return false; + case tipb::ExecType::TypeLimit: + return buildLimitString(executor.limit(), ss); + } + } + + bool isProject(const tipb::Executor &) { + // currently, project is not pushed so always return false + return false; + } + InterpreterDagRequestV1::InterpreterDagRequestV1(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) + : context(context_), dag_request(dag_request_) { + afterAgg = false; + } + + BlockIO InterpreterDagRequestV1::execute() { + String query = buildSqlString(); + return executeQuery(query, context.ch_context, false, QueryProcessingStage::Complete); + } + + String InterpreterDagRequestV1::buildSqlString() { + std::stringstream query_buf; + std::stringstream project; + for(const tipb::Executor & executor : dag_request.executors()) { + if(!buildString(executor, query_buf)) { + return ""; + } + } + if(!isProject(dag_request.executors(dag_request.executors_size()-1))) { + //append final project + project << "SELECT "; + bool first = true; + for(UInt32 index : dag_request.output_offsets()) { + if(first) { + first = false; + } else { + project << ", "; + } + project << getCurrentColumnNames()[index+1]; + } + project << " "; + } + return project.str() + query_buf.str(); + } + + InterpreterDagRequestV1::~InterpreterDagRequestV1() { + + } +} diff --git a/dbms/src/Interpreters/InterpreterDagRequestV1.h b/dbms/src/Interpreters/InterpreterDagRequestV1.h new file mode 100644 index 00000000000..7b58b2dd164 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterDagRequestV1.h @@ -0,0 +1,45 @@ +#pragma once + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#include +#pragma GCC diagnostic pop + +#include +#include + +namespace DB { + +/** build ch plan from dag request: dag executors -> query_string -> ch plan + */ +class InterpreterDagRequestV1 { +public: + InterpreterDagRequestV1(CoprocessorContext & context_, tipb::DAGRequest & dag_request_); + + ~InterpreterDagRequestV1(); + + BlockIO execute(); + +private: + String buildSqlString(); + bool buildTSString(const tipb::TableScan & ts, std::stringstream & ss); + String exprToString(const tipb::Expr & expr, bool &succ); + bool buildSelString(const tipb::Selection & sel, std::stringstream & ss); + bool buildLimitString(const tipb::Limit & limit, std::stringstream & ss); + bool buildString(const tipb::Executor & executor, std::stringstream & ss); + CoprocessorContext & context; + tipb::DAGRequest & dag_request; + std::unordered_map column_name_from_ts; + std::unordered_map column_name_from_agg; + bool afterAgg; + std::unordered_map & getCurrentColumnNames() { + if(afterAgg) { + return column_name_from_agg; + } + return column_name_from_ts; + } + +}; + +} diff --git a/dbms/src/Interpreters/InterpreterDagRequestV2.cpp b/dbms/src/Interpreters/InterpreterDagRequestV2.cpp new file mode 100644 index 00000000000..8733d0d0aa5 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterDagRequestV2.cpp @@ -0,0 +1,160 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "CoprocessorBuilderUtils.h" + +namespace DB { + + namespace ErrorCodes + { + extern const int TOO_MANY_COLUMNS; + } + + InterpreterDagRequestV2::InterpreterDagRequestV2(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) + : context(context_), dag_request(dag_request_) { + (void)dag_request; + } + + bool InterpreterDagRequestV2::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { + if(!ts.has_table_id()) { + // do not have table id + return false; + } + TableID id = ts.table_id(); + auto & tmt_ctx = context.ch_context.getTMTContext(); + auto storage = tmt_ctx.getStorages().get(id); + if(storage == nullptr) { + tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); + storage = tmt_ctx.getStorages().get(id); + } + if(storage == nullptr) { + return false; + } + auto table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); + const auto * merge_tree = dynamic_cast(storage.get()); + if(!merge_tree) { + return false; + } + + Names required_columns; + for(const tipb::ColumnInfo & ci : ts.columns()) { + ColumnID cid = ci.column_id(); + if(cid < 1 || cid > (Int64)merge_tree->getTableInfo().columns.size()) { + // cid out of bound + return false; + } + String name = merge_tree->getTableInfo().columns[cid - 1].name; + //todo handle output_offset + required_columns.push_back(name); + } + if(required_columns.empty()) { + // no column selected, must be something wrong + return false; + } + // todo handle alias column + const Settings & settings = context.ch_context.getSettingsRef(); + + if(settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) { + throw Exception("Limit for number of columns to read exceeded. " + "Requested: " + toString(required_columns.size()) + + ", maximum: " + settings.max_columns_to_read.toString(), + ErrorCodes::TOO_MANY_COLUMNS); + } + + size_t max_block_size = settings.max_block_size; + size_t max_streams = settings.max_threads; + QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; + if(max_streams > 1) { + max_streams *= settings.max_streams_to_max_threads_ratio; + } + + //todo support index in + SelectQueryInfo query_info; + query_info.query = std::make_unique(); + query_info.mvcc_query_info = std::make_unique(); + query_info.mvcc_query_info->resolve_locks = true; + query_info.mvcc_query_info->read_tso = settings.read_tso; + RegionQueryInfo info; + info.region_id = context.kv_context.region_id(); + info.conf_version = context.kv_context.region_epoch().conf_ver(); + info.version = context.kv_context.region_epoch().version(); + auto current_region = context.ch_context.getTMTContext().getRegionTable().getRegionById(id, info.region_id); + if(!current_region) { + return false; + } + info.range_in_table = current_region->getHandleRangeByTable(id); + query_info.mvcc_query_info->regions_query_info.push_back(info); + query_info.mvcc_query_info->concurrent = 0.0; + pipeline.streams = storage->read(required_columns, query_info, context.ch_context, from_stage, max_block_size, max_streams); + /// Set the limits and quota for reading data, the speed and time of the query. + { + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); + limits.max_execution_time = settings.max_execution_time; + limits.timeout_overflow_mode = settings.timeout_overflow_mode; + + /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, + * because the initiating server has a summary of the execution of the request on all servers. + * + * But limits on data size to read and maximum execution time are reasonable to check both on initiator and + * additionally on each remote server, because these limits are checked per block of data processed, + * and remote servers may process way more blocks of data than are received by initiator. + */ + limits.min_execution_speed = settings.min_execution_speed; + limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + + QuotaForIntervals & quota = context.ch_context.getQuota(); + + pipeline.transform([&](auto & stream) + { + if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + { + p_stream->setLimits(limits); + p_stream->setQuota(quota); + } + }); + } + return true; + } + + //todo return the error message + bool InterpreterDagRequestV2::buildPlan(const tipb::Executor & executor, Pipeline & pipeline) { + switch (executor.tp()) { + case tipb::ExecType::TypeTableScan: + return buildTSPlan(executor.tbl_scan(), pipeline); + case tipb::ExecType::TypeIndexScan: + // index scan is not supported + return false; + case tipb::ExecType::TypeSelection: + return false; + case tipb::ExecType::TypeAggregation: + case tipb::ExecType::TypeStreamAgg: + return false; + case tipb::ExecType::TypeTopN: + return false; + case tipb::ExecType::TypeLimit: + return false; + } + } + + BlockIO InterpreterDagRequestV2::execute() { + Pipeline pipeline; + for(const tipb::Executor & executor : dag_request.executors()) { + if(!buildPlan(executor, pipeline)) { + return BlockIO(); + } + } + return BlockIO(); + } + InterpreterDagRequestV2::~InterpreterDagRequestV2() { + + } +} diff --git a/dbms/src/Interpreters/InterpreterDagRequestV2.h b/dbms/src/Interpreters/InterpreterDagRequestV2.h new file mode 100644 index 00000000000..62ec0236f7e --- /dev/null +++ b/dbms/src/Interpreters/InterpreterDagRequestV2.h @@ -0,0 +1,51 @@ +#pragma once + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#include +#pragma GCC diagnostic pop + +#include +#include +#include "CoprocessorBuilderUtils.h" + +namespace DB { + +/** build ch plan from dag request: dag executors -> ch plan + */ +class InterpreterDagRequestV2 { +public: + InterpreterDagRequestV2(CoprocessorContext & context_, tipb::DAGRequest & dag_request); + + ~InterpreterDagRequestV2(); + + BlockIO execute(); + +private: + CoprocessorContext & context; + tipb::DAGRequest & dag_request; + struct Pipeline + { + BlockInputStreams streams; + + BlockInputStreamPtr & firstStream() { return streams.at(0); } + + template + void transform(Transform && transform) + { + for (auto & stream : streams) + transform(stream); + } + + bool hasMoreThanOneStream() const + { + return streams.size() > 1; + } + }; + + bool buildPlan(const tipb::Executor & executor, Pipeline & streams); + bool buildTSPlan(const tipb::TableScan & ts, Pipeline & streams); + +}; +} diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 53ca87ee9bc..4c967747808 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -29,6 +29,8 @@ struct Settings M(SettingString, regions, "", "the region need to be read.") \ M(SettingBool, resolve_locks, false, "tmt read tso.") \ M(SettingUInt64, read_tso, DEFAULT_MAX_READ_TSO, "tmt read tso.") \ + M(SettingInt64, records_per_chunk, DEFAULT_RECORDS_PER_CHUNK, "default chunk size for coprocessor.") \ + M(SettingString, coprocessor_plan_builder_version, "v1", "how to build ch plan in coprocessor handler, v1 means build the plan based on string, v2 means build the plan based on cop executor") \ M(SettingUInt64, min_compress_block_size, DEFAULT_MIN_COMPRESS_BLOCK_SIZE, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.") \ M(SettingUInt64, max_compress_block_size, DEFAULT_MAX_COMPRESS_BLOCK_SIZE, "The maximum size of blocks of uncompressed data before compressing for writing to a table.") \ M(SettingUInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading") \ diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index be452e85d65..614ae9c6a9f 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -24,7 +24,9 @@ add_library (clickhouse-server-lib RootRequestHandler.cpp Server.cpp StatusFile.cpp - TCPHandler.cpp) + TCPHandler.cpp + FlashService.cpp + cop_test.cpp) target_link_libraries (clickhouse-server-lib clickhouse_common_io daemon clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions) target_include_directories (clickhouse-server-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) @@ -105,6 +107,9 @@ else () target_include_directories (theflash BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) target_include_directories (theflash PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + add_executable (copClient cop_test.cpp) + target_link_libraries (copClient clickhouse-server-lib) + if (USE_EMBEDDED_COMPILER) target_link_libraries (theflash clickhouse-compiler-lib) endif () diff --git a/dbms/src/Server/FlashService.cpp b/dbms/src/Server/FlashService.cpp new file mode 100644 index 00000000000..b2f65aa17a0 --- /dev/null +++ b/dbms/src/Server/FlashService.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +const Int64 REQ_TYPE_DAG = 103; +//const Int64 REQ_TYPE_ANALYZE = 104; +//const Int64 REQ_TYPE_CHECKSUM = 105; + +FlashService::FlashService(const std::string & address_, IServer & server_) + : server(server_), + address(address_), + log(&Logger::get("FlashService")) +{ + grpc::ServerBuilder builder; + builder.AddListeningPort(address, grpc::InsecureServerCredentials()); + builder.RegisterService(this); + + // todo should set a reasonable value?? + builder.SetMaxReceiveMessageSize(-1); + builder.SetMaxSendMessageSize(-1); + + grpc_server = builder.BuildAndStart(); + + LOG_INFO(log, "Flash service listening on [" << address << "]"); +} + +FlashService::~FlashService() +{ + // wait 5 seconds for pending rpcs to gracefully stop + gpr_timespec deadline{5, 0, GPR_TIMESPAN}; + LOG_DEBUG(log, "Begin to shutting down grpc server"); + grpc_server->Shutdown(deadline); + grpc_server->Wait(); +} + +String getClientMetaVar(grpc::ServerContext * grpc_context, String name, String default_val) { + if(grpc_context->client_metadata().count(name) != 1) { + return default_val; + } else { + return String(grpc_context->client_metadata().find(name)->second.data()); + } +} + +::grpc::Status setClientInfo(grpc::ServerContext * grpc_context, Context & server_context) { + auto client_meta = grpc_context->client_metadata(); + String query_id = getClientMetaVar(grpc_context, "query_id", ""); + server_context.setCurrentQueryId(query_id); + ClientInfo & client_info = server_context.getClientInfo(); + client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; + client_info.interface = ClientInfo::Interface::GRPC; + std::string peer = grpc_context->peer(); + Int64 pos = peer.find(':'); + if(pos == -1) { + return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "invalid peer address"); + } + std::string client_ip = peer.substr(pos+1); + Poco::Net::SocketAddress client_address(client_ip); + client_info.current_address = client_address; + client_info.current_user = getClientMetaVar(grpc_context, "user", ""); + std::string records_per_chunk_str = getClientMetaVar(grpc_context, "records_per_chunk", ""); + if(!records_per_chunk_str.empty()) { + server_context.setSetting("records_per_chunk", records_per_chunk_str); + } + std::string builder_version = getClientMetaVar(grpc_context, "builder_version", "v1"); + server_context.setSetting("coprocessor_plan_builder_version", builder_version); + return ::grpc::Status::OK; +} + +grpc::Status FlashService::Coprocessor(grpc::ServerContext * grpc_context, const coprocessor::Request * request, + coprocessor::Response * response) +{ + LOG_DEBUG(log, "receive coprocessor request"); + LOG_DEBUG(log, request->DebugString()); + Context context = server.context(); + context.setGlobalContext(server.context()); + setClientInfo(grpc_context, context); + if(request->tp() != REQ_TYPE_DAG) { + LOG_ERROR(log, "Flash service Coprocessor other than dag request not implement yet"); + return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, "Only DAG request is supported"); + } + try { + CoprocessorContext cop_context(context, request->context(), *grpc_context); + CoprocessorHandler coprocessorHandler(request, response, cop_context); + if (coprocessorHandler.execute()) { + LOG_DEBUG(log, "Flash service Coprocessor finished"); + return ::grpc::Status(::grpc::StatusCode::OK, ""); + } else { + LOG_ERROR(log, "Flash service Coprocessor meet internal error"); + return ::grpc::Status(::grpc::StatusCode::INTERNAL, ""); + } + } catch (LockException & e) { + //todo set lock error info + LOG_ERROR(log, "meet lock exception"); + // clear the data to avoid sending partial data + response->set_data(""); + } catch (RegionException & e) { + // todo set region error info + LOG_ERROR(log, "meet region exception"); + response->set_data(""); + } catch (Exception & e) { + // todo return exception message + LOG_ERROR(log, "meet unknown exception, errmsg: " + e.message()); + response->set_data(""); + } catch (...) { + LOG_ERROR(log, "meet unknown exception"); + response->set_data(""); + } + return ::grpc::Status(::grpc::StatusCode::INTERNAL, ""); +} + +} // namespace DB diff --git a/dbms/src/Server/FlashService.h b/dbms/src/Server/FlashService.h new file mode 100644 index 00000000000..baeac200657 --- /dev/null +++ b/dbms/src/Server/FlashService.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include "IServer.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop + +namespace DB +{ + +using GRPCServerPtr = std::unique_ptr; +class FlashService; +using FlashServicePtr = std::shared_ptr; + +class FlashService final : public tikvpb::Tikv::Service, public std::enable_shared_from_this, private boost::noncopyable +{ +public: + FlashService(const std::string & address_, IServer & server_); + + ~FlashService() final; + + grpc::Status Coprocessor(grpc::ServerContext* context, const coprocessor::Request* request, coprocessor::Response* response); +private: + + IServer &server; + + std::string address; + + GRPCServerPtr grpc_server; + + Logger * log; + +}; + +} // namespace DB diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 030e0019f1c..537f3173936 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -37,6 +37,7 @@ #include "MetricsTransmitter.h" #include "StatusFile.h" #include "TCPHandlerFactory.h" +#include "FlashService.h" #if Poco_NetSSL_FOUND #include @@ -429,6 +430,20 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Shutted down raft service."); }); + FlashServicePtr flash_service = nullptr; + if(config().has("flash")) { + String flash_service_addr = config().getString("flash.service_addr"); + flash_service = std::make_shared(flash_service_addr, *this); + } + + SCOPE_EXIT({ + if (flash_service != nullptr) { + LOG_INFO(log, "Shutting down flash service."); + flash_service.reset(); + LOG_INFO(log, "Shutted down flash service."); + } + }); + { Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); diff --git a/dbms/src/Server/cop_test.cpp b/dbms/src/Server/cop_test.cpp new file mode 100644 index 00000000000..c3910fef2af --- /dev/null +++ b/dbms/src/Server/cop_test.cpp @@ -0,0 +1,125 @@ +#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#include +#include +#include +#pragma GCC diagnostic pop +#include +#include + + +using ChannelPtr = std::shared_ptr; +using SubPtr = std::shared_ptr; +static const int DAGREQUEST = 103; +class FlashClient { +private: + SubPtr sp; +public: + FlashClient(ChannelPtr cp) : sp(tikvpb::Tikv::NewStub(cp)){ + } + grpc::Status coprocessor(coprocessor::Request* rqst) { + grpc::ClientContext clientContext; + clientContext.AddMetadata("user_name",""); + clientContext.AddMetadata("builder_version","v1"); + coprocessor::Response response; + grpc::Status status = sp->Coprocessor(&clientContext, *rqst, &response); + size_t column_num = 3; + if(status.ok()) { + // if status is ok, try to decode the result + tipb::SelectResponse selectResponse; + if(selectResponse.ParseFromString(response.data())) { + for(tipb::Chunk chunk : selectResponse.chunks()) { + size_t cursor = 0; + std::vector row_result; + const std::string &data = chunk.rows_data(); + while (cursor < data.size()) { + row_result.push_back(DB::DecodeDatum(cursor, data)); + if(row_result.size() == column_num) { + //print the result + std::cout << row_result[0].get() + << " "<< row_result[1].get() + << " "<< row_result[2].get() << std::endl; + row_result.clear(); + } + } + + } + } + } + return status; + } +}; + +using ClientPtr = std::shared_ptr; +grpc::Status rpcTest() { + ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); + ClientPtr clientPtr = std::make_shared(cp); + // construct a dag request + tipb::DAGRequest dagRequest; + dagRequest.set_start_ts(18446744073709551615uL); + tipb::Executor *executor = dagRequest.add_executors(); + executor->set_tp(tipb::ExecType::TypeTableScan); + tipb::TableScan *ts = executor->mutable_tbl_scan(); + ts->set_table_id(41); + tipb::ColumnInfo * ci = ts->add_columns(); + ci->set_column_id(1); + ci = ts->add_columns(); + ci->set_column_id(2); + dagRequest.add_output_offsets(1); + dagRequest.add_output_offsets(0); + dagRequest.add_output_offsets(1); + executor = dagRequest.add_executors(); + executor->set_tp(tipb::ExecType::TypeSelection); + tipb::Selection *selection = executor->mutable_selection(); + tipb::Expr *expr = selection->add_conditions(); + expr->set_tp(tipb::ExprType::ScalarFunc); + expr->set_sig(tipb::ScalarFuncSig::LTInt); + tipb::Expr *col = expr->add_children(); + tipb::Expr *value = expr->add_children(); + col->set_tp(tipb::ExprType::ColumnRef); + std::stringstream ss; + DB::EncodeNumber(2, ss); + col->set_val(ss.str()); + value->set_tp(tipb::ExprType::Int64); + ss.str(""); + DB::EncodeNumber(289,ss); + value->set_val(std::string(ss.str())); + + + // construct a coprocessor request + coprocessor::Request request; + //todo add context info + kvrpcpb::Context *ctx = request.mutable_context(); + ctx->set_region_id(2); + auto region_epoch = ctx->mutable_region_epoch(); + region_epoch->set_version(20); + region_epoch->set_conf_ver(2); + request.set_tp(DAGREQUEST); + request.set_data(dagRequest.SerializeAsString()); + //request.add_ranges(); + return clientPtr->coprocessor(&request); +} + +void codecTest() { + Int64 i = 123; + std::stringstream ss; + DB::EncodeNumber(i, ss); + std::string val = ss.str(); + std::stringstream decode_ss; + size_t cursor = 0; + DB::Field f = DB::DecodeDatum(cursor, val); + Int64 r = f.get(); + r++; +} + +int main() { +// std::cout << "Before rpcTest"<< std::endl; + grpc::Status ret = rpcTest(); +// codecTest(); +// std::cout << "End rpcTest " << std::endl; +// std::cout << "The ret is " << ret.error_code() << " " << ret.error_details() +// << " " << ret.error_message() << std::endl; + return 0; +} diff --git a/dbms/src/Storages/Transaction/RegionTable.cpp b/dbms/src/Storages/Transaction/RegionTable.cpp index 445a8bda57c..7c433c10dff 100644 --- a/dbms/src/Storages/Transaction/RegionTable.cpp +++ b/dbms/src/Storages/Transaction/RegionTable.cpp @@ -512,6 +512,22 @@ void RegionTable::traverseInternalRegionsByTable(const TableID table_id, std::fu callback(region_info.second); } +RegionPtr RegionTable::getRegionById(const TableID table_id, const RegionID region_id) { + auto & kvstore = context.getTMTContext().getKVStore(); + { + std::lock_guard lock(mutex); + auto & table = getOrCreateTable(table_id); + + for (const auto & region_info : table.regions) + { + if(region_info.second.region_id == region_id) { + return kvstore->getRegion(region_info.second.region_id); + } + } + } + return nullptr; +} + std::vector> RegionTable::getRegionsByTable(const TableID table_id) { auto & kvstore = context.getTMTContext().getKVStore(); diff --git a/dbms/src/Storages/Transaction/RegionTable.h b/dbms/src/Storages/Transaction/RegionTable.h index af9247ccb50..a13ae2ab0ce 100644 --- a/dbms/src/Storages/Transaction/RegionTable.h +++ b/dbms/src/Storages/Transaction/RegionTable.h @@ -177,6 +177,7 @@ class RegionTable : private boost::noncopyable void traverseInternalRegions(std::function && callback); void traverseInternalRegionsByTable(const TableID table_id, std::function && callback); std::vector> getRegionsByTable(const TableID table_id); + RegionPtr getRegionById(const TableID table_id, const RegionID region_id); static std::tuple, RegionReadStatus> getBlockInputStreamByRegion(TableID table_id, RegionPtr region, diff --git a/dbms/src/Storages/Transaction/TypeMapping.cpp b/dbms/src/Storages/Transaction/TypeMapping.cpp index 1a436b0c3a5..91161b787a4 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.cpp +++ b/dbms/src/Storages/Transaction/TypeMapping.cpp @@ -37,11 +37,14 @@ class TypeMapping : public ext::singleton public: using Creator = std::function; using TypeMap = std::unordered_map; + using CodecFlagMap = std::unordered_map; DataTypePtr getSigned(const ColumnInfo & column_info); DataTypePtr getUnsigned(const ColumnInfo & column_info); + TiDB::CodecFlag getCodecFlag(const DataTypePtr & dataTypePtr); + private: TypeMapping(); @@ -49,6 +52,8 @@ class TypeMapping : public ext::singleton TypeMap unsigned_type_map; + CodecFlagMap codec_flag_map; + friend class ext::singleton; }; @@ -61,7 +66,9 @@ TypeMapping::TypeMapping() #define M(tt, v, cf, cfu, ct, ctu) \ signed_type_map[TiDB::Type##tt] = getDataTypeByColumnInfoBase; \ - unsigned_type_map[TiDB::Type##tt] = getDataTypeByColumnInfoBase; + unsigned_type_map[TiDB::Type##tt] = getDataTypeByColumnInfoBase; \ + codec_flag_map[#ctu] = TiDB::CodecFlag##cfu; \ + codec_flag_map[#ct] = TiDB::CodecFlag##cf; COLUMN_TYPES(M) #undef M } @@ -78,6 +85,14 @@ DataTypePtr TypeMapping::getUnsigned(const ColumnInfo & column_info) return unsigned_type_map[column_info.tp](column_info); } +TiDB::CodecFlag TypeMapping::getCodecFlag(const DB::DataTypePtr & dataTypePtr) { + // fixme: String's CodecFlag will be CodecFlagCompactBytes, which is wrong for Json type + return codec_flag_map[dataTypePtr->getFamilyName()]; +} + +TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & dataTypePtr) { + return TypeMapping::instance().getCodecFlag(dataTypePtr); +} DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info) { diff --git a/dbms/src/Storages/Transaction/TypeMapping.h b/dbms/src/Storages/Transaction/TypeMapping.h index 50caf68e83a..d8b2fc32357 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.h +++ b/dbms/src/Storages/Transaction/TypeMapping.h @@ -11,4 +11,6 @@ using ColumnInfo = TiDB::ColumnInfo; DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info); +TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & dataTypePtr); + } From 4f37218ac42eedaf4633fd60e916e147741cdd91 Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 30 Jul 2019 13:47:07 +0800 Subject: [PATCH 02/79] basic support for InterpreterDagRequestV2 --- dbms/src/Coprocessor/CoprocessorHandler.h | 1 - dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 ++ .../Interpreters/InterpreterDagRequestV2.cpp | 48 ++++++++++++++++++- .../Interpreters/InterpreterDagRequestV2.h | 6 +++ dbms/src/Parsers/ASTSelectQuery.h | 1 + dbms/src/Server/cop_test.cpp | 5 +- .../MergeTreeDataSelectExecutorCommon.hpp | 22 +++++---- 7 files changed, 71 insertions(+), 15 deletions(-) diff --git a/dbms/src/Coprocessor/CoprocessorHandler.h b/dbms/src/Coprocessor/CoprocessorHandler.h index 841adff4276..3a61233a939 100644 --- a/dbms/src/Coprocessor/CoprocessorHandler.h +++ b/dbms/src/Coprocessor/CoprocessorHandler.h @@ -32,7 +32,6 @@ class CoprocessorHandler { bool execute(); private: - String buildSqlString(); BlockIO buildCHPlan(); const coprocessor::Request *cop_request; coprocessor::Response *cop_response; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 02faff83e91..21225f2756b 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -173,6 +173,9 @@ ExpressionAnalyzer::ExpressionAnalyzer( do_global(do_global_), subqueries_for_sets(subqueries_for_set_) { select_query = typeid_cast(ast.get()); + if(select_query && select_query->is_fake_sel) { + return; + } if (!storage && select_query) { diff --git a/dbms/src/Interpreters/InterpreterDagRequestV2.cpp b/dbms/src/Interpreters/InterpreterDagRequestV2.cpp index 8733d0d0aa5..a930fd971c1 100644 --- a/dbms/src/Interpreters/InterpreterDagRequestV2.cpp +++ b/dbms/src/Interpreters/InterpreterDagRequestV2.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "CoprocessorBuilderUtils.h" namespace DB { @@ -19,7 +20,24 @@ namespace DB { InterpreterDagRequestV2::InterpreterDagRequestV2(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) : context(context_), dag_request(dag_request_) { - (void)dag_request; + for(const tipb::Executor & executor : dag_request.executors()) { + switch (executor.tp()) { + case tipb::ExecType::TypeSelection: + has_where = true; + break; + case tipb::ExecType::TypeStreamAgg: + case tipb::ExecType::TypeAggregation: + has_agg = true; + break; + case tipb::ExecType::TypeTopN: + has_orderby = true; + case tipb::ExecType::TypeLimit: + has_limit = true; + break; + default: + break; + } + } } bool InterpreterDagRequestV2::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { @@ -58,6 +76,19 @@ namespace DB { // no column selected, must be something wrong return false; } + + if(!has_agg) { + // if the dag request does not contain agg, then the final output is + // based on the output of table scan + for (auto i : dag_request.output_offsets()) { + if (i < 0 || i >= required_columns.size()) { + // array index out of bound + return false; + } + // do not have alias + final_project.emplace_back(required_columns[i], ""); + } + } // todo handle alias column const Settings & settings = context.ch_context.getSettingsRef(); @@ -78,6 +109,7 @@ namespace DB { //todo support index in SelectQueryInfo query_info; query_info.query = std::make_unique(); + ((ASTSelectQuery*)query_info.query.get())->is_fake_sel = true; query_info.mvcc_query_info = std::make_unique(); query_info.mvcc_query_info->resolve_locks = true; query_info.mvcc_query_info->read_tso = settings.read_tso; @@ -152,7 +184,19 @@ namespace DB { return BlockIO(); } } - return BlockIO(); + // add final project + auto stream_before_project = pipeline.firstStream(); + auto columns = stream_before_project->getHeader(); + NamesAndTypesList input_column; + for(auto column : columns.getColumnsWithTypeAndName()) { + input_column.emplace_back(column.name, column.type); + } + ExpressionActionsPtr project = std::make_shared(input_column, context.ch_context.getSettingsRef()); + project->add(ExpressionAction::project(final_project)); + auto final_stream = std::make_shared(stream_before_project, project); + BlockIO res; + res.in = final_stream; + return res; } InterpreterDagRequestV2::~InterpreterDagRequestV2() { diff --git a/dbms/src/Interpreters/InterpreterDagRequestV2.h b/dbms/src/Interpreters/InterpreterDagRequestV2.h index 62ec0236f7e..d167fdcfc80 100644 --- a/dbms/src/Interpreters/InterpreterDagRequestV2.h +++ b/dbms/src/Interpreters/InterpreterDagRequestV2.h @@ -9,6 +9,7 @@ #include #include #include "CoprocessorBuilderUtils.h" +#include "ExpressionActions.h" namespace DB { @@ -25,6 +26,11 @@ class InterpreterDagRequestV2 { private: CoprocessorContext & context; tipb::DAGRequest & dag_request; + NamesWithAliases final_project; + bool has_where; + bool has_agg; + bool has_orderby; + bool has_limit; struct Pipeline { BlockInputStreams streams; diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index 96508073e17..fe1e64b43b4 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -20,6 +20,7 @@ class ASTSelectQuery : public IAST ASTPtr clone() const override; + bool is_fake_sel = false; bool raw_for_mutable = false; bool distinct = false; bool no_kvstore = false; diff --git a/dbms/src/Server/cop_test.cpp b/dbms/src/Server/cop_test.cpp index c3910fef2af..deda54a4480 100644 --- a/dbms/src/Server/cop_test.cpp +++ b/dbms/src/Server/cop_test.cpp @@ -22,7 +22,7 @@ class FlashClient { grpc::Status coprocessor(coprocessor::Request* rqst) { grpc::ClientContext clientContext; clientContext.AddMetadata("user_name",""); - clientContext.AddMetadata("builder_version","v1"); + clientContext.AddMetadata("builder_version","v2"); coprocessor::Response response; grpc::Status status = sp->Coprocessor(&clientContext, *rqst, &response); size_t column_num = 3; @@ -70,7 +70,7 @@ grpc::Status rpcTest() { dagRequest.add_output_offsets(1); dagRequest.add_output_offsets(0); dagRequest.add_output_offsets(1); - executor = dagRequest.add_executors(); + /*executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeSelection); tipb::Selection *selection = executor->mutable_selection(); tipb::Expr *expr = selection->add_conditions(); @@ -86,6 +86,7 @@ grpc::Status rpcTest() { ss.str(""); DB::EncodeNumber(289,ss); value->set_val(std::string(ss.str())); + */ // construct a coprocessor request diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp index 9e8d4474c22..2b379fedf55 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp @@ -18,22 +18,24 @@ static inline void extendMutableEngineColumnNames(Names & column_names_to_read, /// make pk, version, delmark is always the first 3 columns, maybe some sample column will be added later. static inline void extendMutableEngineColumnNames(Names & column_names_to_read, const std::string & handle_col_name) { - // use std::set to make order same. - std::set names; - - for (auto & name : column_names_to_read) - names.emplace(std::move(name)); + std::set reserved_names; + reserved_names.insert(handle_col_name); + reserved_names.insert(MutableSupport::version_column_name); + reserved_names.insert(MutableSupport::delmark_column_name); + Names org_names; + + for (auto & name : column_names_to_read) { + if(reserved_names.count(name) == 0) { + org_names.emplace_back(std::move(name)); + } + } column_names_to_read.clear(); column_names_to_read.push_back(handle_col_name); column_names_to_read.push_back(MutableSupport::version_column_name); column_names_to_read.push_back(MutableSupport::delmark_column_name); - names.erase(MutableSupport::version_column_name); - names.erase(MutableSupport::delmark_column_name); - names.erase(handle_col_name); - - for (auto & name : names) + for (auto & name : org_names) column_names_to_read.emplace_back(std::move(name)); } From 85bfd5c9213ea400f7d7edaa9342e47ea7853d77 Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 30 Jul 2019 18:52:18 +0800 Subject: [PATCH 03/79] code refine --- dbms/src/Coprocessor/CoprocessorHandler.cpp | 14 ++--- dbms/src/Interpreters/DagQueryInfo.cpp | 27 ++++++++++ dbms/src/Interpreters/DagQueryInfo.h | 32 ++++++++++++ ...agRequestV1.cpp => DagStringConverter.cpp} | 24 +++------ ...terDagRequestV1.h => DagStringConverter.h} | 12 ++--- dbms/src/Interpreters/IQueryInfo.h | 24 +++++++++ ...equestV2.cpp => InterpreterDagRequest.cpp} | 14 ++--- ...DagRequestV2.h => InterpreterDagRequest.h} | 13 ++--- dbms/src/Interpreters/StringQueryInfo.cpp | 32 ++++++++++++ dbms/src/Interpreters/StringQueryInfo.h | 32 ++++++++++++ dbms/src/Interpreters/executeQuery.cpp | 51 ++++++++++--------- dbms/src/Interpreters/executeQuery.h | 4 ++ dbms/src/Server/cop_test.cpp | 5 +- 13 files changed, 214 insertions(+), 70 deletions(-) create mode 100644 dbms/src/Interpreters/DagQueryInfo.cpp create mode 100644 dbms/src/Interpreters/DagQueryInfo.h rename dbms/src/Interpreters/{InterpreterDagRequestV1.cpp => DagStringConverter.cpp} (87%) rename dbms/src/Interpreters/{InterpreterDagRequestV1.h => DagStringConverter.h} (80%) create mode 100644 dbms/src/Interpreters/IQueryInfo.h rename dbms/src/Interpreters/{InterpreterDagRequestV2.cpp => InterpreterDagRequest.cpp} (94%) rename dbms/src/Interpreters/{InterpreterDagRequestV2.h => InterpreterDagRequest.h} (75%) create mode 100644 dbms/src/Interpreters/StringQueryInfo.cpp create mode 100644 dbms/src/Interpreters/StringQueryInfo.h diff --git a/dbms/src/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Coprocessor/CoprocessorHandler.cpp index ce849385a86..80edd485209 100644 --- a/dbms/src/Coprocessor/CoprocessorHandler.cpp +++ b/dbms/src/Coprocessor/CoprocessorHandler.cpp @@ -8,8 +8,9 @@ #include #include #include -#include -#include +#include +#include +#include namespace DB { @@ -29,12 +30,11 @@ CoprocessorHandler::~CoprocessorHandler() BlockIO CoprocessorHandler::buildCHPlan() { String builder_version = context.ch_context.getSettings().coprocessor_plan_builder_version; if(builder_version == "v1") { - InterpreterDagRequestV1 builder(context, dag_request); - return builder.execute(); + DagStringConverter converter(context, dag_request); + String query = converter.buildSqlString(); + return executeQuery(query, context.ch_context, false, QueryProcessingStage::Complete); } else if (builder_version == "v2"){ - //throw Exception("coprocessor plan builder version v2 is not supported yet"); - InterpreterDagRequestV2 builder(context, dag_request); - return builder.execute(); + return executeQuery(dag_request, context, QueryProcessingStage::Complete); } else { throw Exception("coprocessor plan builder version should be set to v1 or v2"); } diff --git a/dbms/src/Interpreters/DagQueryInfo.cpp b/dbms/src/Interpreters/DagQueryInfo.cpp new file mode 100644 index 00000000000..f491c16ecc4 --- /dev/null +++ b/dbms/src/Interpreters/DagQueryInfo.cpp @@ -0,0 +1,27 @@ + +#include +#include +#include + + +namespace DB +{ + + DagQueryInfo::DagQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorContext & coprocessorContext_) + : dag_request(dag_request_), coprocessorContext(coprocessorContext_) {} + + std::tuple DagQueryInfo::parse(size_t ) { + query = String("cop query"); + ast = std::make_shared(); + ((ASTSelectQuery*)ast.get())->is_fake_sel = true; + return std::make_tuple(query, ast); + } + + String DagQueryInfo::get_query_ignore_error(size_t ) { + return query; + } + + std::unique_ptr DagQueryInfo::getInterpreter(Context & , QueryProcessingStage::Enum ) { + return std::make_unique(coprocessorContext, dag_request); + } +} diff --git a/dbms/src/Interpreters/DagQueryInfo.h b/dbms/src/Interpreters/DagQueryInfo.h new file mode 100644 index 00000000000..c476bf93912 --- /dev/null +++ b/dbms/src/Interpreters/DagQueryInfo.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + +/** IQueryInfo interface for different source of queries. + */ +class DagQueryInfo : public IQueryInfo +{ +public: + + DagQueryInfo(const tipb::DAGRequest & dag_request, CoprocessorContext & coprocessorContext_); + bool isInternalQuery() { return false;}; + virtual std::tuple parse(size_t max_query_size); + virtual String get_query_ignore_error(size_t max_query_size); + virtual std::unique_ptr getInterpreter(Context & context, QueryProcessingStage::Enum stage); + +private: + const tipb::DAGRequest & dag_request; + CoprocessorContext & coprocessorContext; + String query; + ASTPtr ast; +}; + +} diff --git a/dbms/src/Interpreters/InterpreterDagRequestV1.cpp b/dbms/src/Interpreters/DagStringConverter.cpp similarity index 87% rename from dbms/src/Interpreters/InterpreterDagRequestV1.cpp rename to dbms/src/Interpreters/DagStringConverter.cpp index 109dba6aaab..0d085fc6975 100644 --- a/dbms/src/Interpreters/InterpreterDagRequestV1.cpp +++ b/dbms/src/Interpreters/DagStringConverter.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -11,7 +11,7 @@ namespace DB { - bool InterpreterDagRequestV1::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) { + bool DagStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) { TableID id; if(ts.has_table_id()) { id = ts.table_id(); @@ -46,7 +46,7 @@ namespace DB { return true; } - String InterpreterDagRequestV1::exprToString(const tipb::Expr & expr, bool &succ) { + String DagStringConverter::exprToString(const tipb::Expr & expr, bool &succ) { std::stringstream ss; succ = true; size_t cursor = 1; @@ -125,7 +125,7 @@ namespace DB { } } - bool InterpreterDagRequestV1::buildSelString(const tipb::Selection & sel, std::stringstream & ss) { + bool DagStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) { bool first = true; for(const tipb::Expr & expr : sel.conditions()) { bool succ = true; @@ -144,13 +144,13 @@ namespace DB { return true; } - bool InterpreterDagRequestV1::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) { + bool DagStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) { ss << "LIMIT " << limit.limit() << " "; return true; } //todo return the error message - bool InterpreterDagRequestV1::buildString(const tipb::Executor & executor, std::stringstream & ss) { + bool DagStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) { switch (executor.tp()) { case tipb::ExecType::TypeTableScan: return buildTSString(executor.tbl_scan(), ss); @@ -176,17 +176,12 @@ namespace DB { // currently, project is not pushed so always return false return false; } - InterpreterDagRequestV1::InterpreterDagRequestV1(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) + DagStringConverter::DagStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) : context(context_), dag_request(dag_request_) { afterAgg = false; } - BlockIO InterpreterDagRequestV1::execute() { - String query = buildSqlString(); - return executeQuery(query, context.ch_context, false, QueryProcessingStage::Complete); - } - - String InterpreterDagRequestV1::buildSqlString() { + String DagStringConverter::buildSqlString() { std::stringstream query_buf; std::stringstream project; for(const tipb::Executor & executor : dag_request.executors()) { @@ -211,7 +206,4 @@ namespace DB { return project.str() + query_buf.str(); } - InterpreterDagRequestV1::~InterpreterDagRequestV1() { - - } } diff --git a/dbms/src/Interpreters/InterpreterDagRequestV1.h b/dbms/src/Interpreters/DagStringConverter.h similarity index 80% rename from dbms/src/Interpreters/InterpreterDagRequestV1.h rename to dbms/src/Interpreters/DagStringConverter.h index 7b58b2dd164..566e42b8ba1 100644 --- a/dbms/src/Interpreters/InterpreterDagRequestV1.h +++ b/dbms/src/Interpreters/DagStringConverter.h @@ -11,18 +11,14 @@ namespace DB { -/** build ch plan from dag request: dag executors -> query_string -> ch plan - */ -class InterpreterDagRequestV1 { +class DagStringConverter { public: - InterpreterDagRequestV1(CoprocessorContext & context_, tipb::DAGRequest & dag_request_); + DagStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_); - ~InterpreterDagRequestV1(); + ~DagStringConverter() = default; - BlockIO execute(); - -private: String buildSqlString(); +private: bool buildTSString(const tipb::TableScan & ts, std::stringstream & ss); String exprToString(const tipb::Expr & expr, bool &succ); bool buildSelString(const tipb::Selection & sel, std::stringstream & ss); diff --git a/dbms/src/Interpreters/IQueryInfo.h b/dbms/src/Interpreters/IQueryInfo.h new file mode 100644 index 00000000000..5ef5c60dc33 --- /dev/null +++ b/dbms/src/Interpreters/IQueryInfo.h @@ -0,0 +1,24 @@ +#pragma once + + +#include +#include +#include + +namespace DB +{ + +/** IQueryInfo interface for different source of queries. + */ +class IQueryInfo +{ +public: + + virtual bool isInternalQuery() = 0; + virtual std::tuple parse(size_t max_query_size) = 0; + virtual String get_query_ignore_error(size_t max_query_size) = 0; + virtual std::unique_ptr getInterpreter(Context & context, QueryProcessingStage::Enum stage) = 0; + virtual ~IQueryInfo() {} +}; + +} diff --git a/dbms/src/Interpreters/InterpreterDagRequestV2.cpp b/dbms/src/Interpreters/InterpreterDagRequest.cpp similarity index 94% rename from dbms/src/Interpreters/InterpreterDagRequestV2.cpp rename to dbms/src/Interpreters/InterpreterDagRequest.cpp index a930fd971c1..9a593b5887b 100644 --- a/dbms/src/Interpreters/InterpreterDagRequestV2.cpp +++ b/dbms/src/Interpreters/InterpreterDagRequest.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -9,7 +9,7 @@ #include #include #include -#include "CoprocessorBuilderUtils.h" +#include namespace DB { @@ -18,7 +18,7 @@ namespace DB { extern const int TOO_MANY_COLUMNS; } - InterpreterDagRequestV2::InterpreterDagRequestV2(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) + InterpreterDagRequest::InterpreterDagRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request_) : context(context_), dag_request(dag_request_) { for(const tipb::Executor & executor : dag_request.executors()) { switch (executor.tp()) { @@ -40,7 +40,7 @@ namespace DB { } } - bool InterpreterDagRequestV2::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { + bool InterpreterDagRequest::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { if(!ts.has_table_id()) { // do not have table id return false; @@ -158,7 +158,7 @@ namespace DB { } //todo return the error message - bool InterpreterDagRequestV2::buildPlan(const tipb::Executor & executor, Pipeline & pipeline) { + bool InterpreterDagRequest::buildPlan(const tipb::Executor & executor, Pipeline & pipeline) { switch (executor.tp()) { case tipb::ExecType::TypeTableScan: return buildTSPlan(executor.tbl_scan(), pipeline); @@ -177,7 +177,7 @@ namespace DB { } } - BlockIO InterpreterDagRequestV2::execute() { + BlockIO InterpreterDagRequest::execute() { Pipeline pipeline; for(const tipb::Executor & executor : dag_request.executors()) { if(!buildPlan(executor, pipeline)) { @@ -198,7 +198,7 @@ namespace DB { res.in = final_stream; return res; } - InterpreterDagRequestV2::~InterpreterDagRequestV2() { + InterpreterDagRequest::~InterpreterDagRequest() { } } diff --git a/dbms/src/Interpreters/InterpreterDagRequestV2.h b/dbms/src/Interpreters/InterpreterDagRequest.h similarity index 75% rename from dbms/src/Interpreters/InterpreterDagRequestV2.h rename to dbms/src/Interpreters/InterpreterDagRequest.h index d167fdcfc80..02ecdfbcb6b 100644 --- a/dbms/src/Interpreters/InterpreterDagRequestV2.h +++ b/dbms/src/Interpreters/InterpreterDagRequest.h @@ -8,24 +8,25 @@ #include #include -#include "CoprocessorBuilderUtils.h" -#include "ExpressionActions.h" +#include +#include +#include namespace DB { /** build ch plan from dag request: dag executors -> ch plan */ -class InterpreterDagRequestV2 { +class InterpreterDagRequest : public IInterpreter { public: - InterpreterDagRequestV2(CoprocessorContext & context_, tipb::DAGRequest & dag_request); + InterpreterDagRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request); - ~InterpreterDagRequestV2(); + ~InterpreterDagRequest(); BlockIO execute(); private: CoprocessorContext & context; - tipb::DAGRequest & dag_request; + const tipb::DAGRequest & dag_request; NamesWithAliases final_project; bool has_where; bool has_agg; diff --git a/dbms/src/Interpreters/StringQueryInfo.cpp b/dbms/src/Interpreters/StringQueryInfo.cpp new file mode 100644 index 00000000000..d326f51dead --- /dev/null +++ b/dbms/src/Interpreters/StringQueryInfo.cpp @@ -0,0 +1,32 @@ +#include +#include +#include +#include + + +namespace DB +{ + + StringQueryInfo::StringQueryInfo(const char *begin_, const char *end_, bool internal_) + : begin(begin_), end(end_), internal(internal_){} + std::tuple StringQueryInfo::parse(size_t max_query_size) { + ParserQuery parser(end); + size_t query_size; + /// TODO Parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size); + + /// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion. + if (!(begin <= ast->range.first && ast->range.second <= end)) + throw Exception("Unexpected behavior: AST chars range is not inside source range", ErrorCodes::LOGICAL_ERROR); + query_size = ast->range.second - begin; + query = String(begin, begin + query_size); + return std::make_tuple(query, ast); + } + String StringQueryInfo::get_query_ignore_error(size_t max_query_size) { + return String(begin, begin + std::min(end - begin, static_cast(max_query_size))); + } + + std::unique_ptr StringQueryInfo::getInterpreter(Context & context, QueryProcessingStage::Enum stage) { + return InterpreterFactory::get(ast, context, stage); + } +} diff --git a/dbms/src/Interpreters/StringQueryInfo.h b/dbms/src/Interpreters/StringQueryInfo.h new file mode 100644 index 00000000000..b82e3b8f884 --- /dev/null +++ b/dbms/src/Interpreters/StringQueryInfo.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/** IQueryInfo interface for different source of queries. + */ +class StringQueryInfo : public IQueryInfo +{ +public: + + StringQueryInfo(const char * begin_, const char * end_, bool internal_); + std::tuple parse(size_t max_query_size); + String get_query_ignore_error(size_t max_query_size); + std::unique_ptr getInterpreter(Context & context, QueryProcessingStage::Enum stage); + bool isInternalQuery() {return internal;}; + +private: + const char * begin; + const char * end; + bool internal; + String query; + ASTPtr ast; +}; + +} diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index bac8cef33c5..89fd6c92073 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -23,6 +23,11 @@ #include #include #include +#include +#include +#include +#include +#include namespace ProfileEvents @@ -132,10 +137,8 @@ static void onExceptionBeforeStart(const String & query, Context & context, time static std::tuple executeQueryImpl( - const char * begin, - const char * end, + IQueryInfo & queryInfo, Context & context, - bool internal, QueryProcessingStage::Enum stage) { ProfileEvents::increment(ProfileEvents::Query); @@ -145,31 +148,24 @@ static std::tuple executeQueryImpl( const Settings & settings = context.getSettingsRef(); - ParserQuery parser(end); ASTPtr ast; - size_t query_size; + String query; /// Don't limit the size of internal queries. size_t max_query_size = 0; - if (!internal) + if (!queryInfo.isInternalQuery()) max_query_size = settings.max_query_size; try { - /// TODO Parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size); - - /// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion. - if (!(begin <= ast->range.first && ast->range.second <= end)) - throw Exception("Unexpected behavior: AST chars range is not inside source range", ErrorCodes::LOGICAL_ERROR); - query_size = ast->range.second - begin; + std::tie(query, ast) = queryInfo.parse(max_query_size); } catch (...) { - if (!internal) + if (!queryInfo.isInternalQuery()) { /// Anyway log the query. - String query = String(begin, begin + std::min(end - begin, static_cast(max_query_size))); + String q = queryInfo.get_query_ignore_error(max_query_size); logQuery(query.substr(0, settings.log_queries_cut_to_length), context); onExceptionBeforeStart(query, context, current_time); } @@ -177,12 +173,11 @@ static std::tuple executeQueryImpl( throw; } - String query(begin, query_size); BlockIO res; try { - if (!internal) + if (!queryInfo.isInternalQuery()) logQuery(query.substr(0, settings.log_queries_cut_to_length), context); /// Check the limits. @@ -195,7 +190,7 @@ static std::tuple executeQueryImpl( /// Put query to process list. But don't put SHOW PROCESSLIST query itself. ProcessList::EntryPtr process_list_entry; - if (!internal && nullptr == typeid_cast(&*ast)) + if (!queryInfo.isInternalQuery() && nullptr == typeid_cast(&*ast)) { process_list_entry = context.getProcessList().insert( query, @@ -206,7 +201,7 @@ static std::tuple executeQueryImpl( context.setProcessListElement(&process_list_entry->get()); } - auto interpreter = InterpreterFactory::get(ast, context, stage); + auto interpreter = queryInfo.getInterpreter(context, stage); res = interpreter->execute(); /// Delayed initialization of query streams (required for KILL QUERY purposes) @@ -258,7 +253,7 @@ static std::tuple executeQueryImpl( elem.client_info = context.getClientInfo(); - bool log_queries = settings.log_queries && !internal; + bool log_queries = settings.log_queries && !queryInfo.isInternalQuery(); /// Log into system table start of query execution, if need. if (log_queries) @@ -363,7 +358,7 @@ static std::tuple executeQueryImpl( } }; - if (!internal && res.in) + if (!queryInfo.isInternalQuery() && res.in) { std::stringstream log_str; log_str << "Query pipeline:\n"; @@ -374,7 +369,7 @@ static std::tuple executeQueryImpl( } catch (...) { - if (!internal) + if (!queryInfo.isInternalQuery()) onExceptionBeforeStart(query, context, current_time); throw; @@ -391,10 +386,17 @@ BlockIO executeQuery( QueryProcessingStage::Enum stage) { BlockIO streams; - std::tie(std::ignore, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, internal, stage); + StringQueryInfo queryInfo(query.data(), query.data() + query.size(), internal); + std::tie(std::ignore, streams) = executeQueryImpl(queryInfo, context, stage); return streams; } +BlockIO executeQuery(const tipb::DAGRequest & dag_request, CoprocessorContext & context, QueryProcessingStage::Enum stage) { + BlockIO streams; + DagQueryInfo queryInfo(dag_request, context); + std::tie(std::ignore, streams) = executeQueryImpl(queryInfo, context.ch_context, stage); + return streams; +} void executeQuery( ReadBuffer & istr, @@ -432,7 +434,8 @@ void executeQuery( ASTPtr ast; BlockIO streams; - std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete); + StringQueryInfo queryInfo(begin, end, false); + std::tie(ast, streams) = executeQueryImpl(queryInfo, context, QueryProcessingStage::Complete); try { diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index cc333ea8cb9..db8c93e68cd 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -2,6 +2,8 @@ #include #include +#include +#include namespace DB @@ -39,4 +41,6 @@ BlockIO executeQuery( QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// To which stage the query must be executed. ); +BlockIO executeQuery(const tipb::DAGRequest & dag_request, CoprocessorContext & context, QueryProcessingStage::Enum stage); + } diff --git a/dbms/src/Server/cop_test.cpp b/dbms/src/Server/cop_test.cpp index deda54a4480..13559193ad0 100644 --- a/dbms/src/Server/cop_test.cpp +++ b/dbms/src/Server/cop_test.cpp @@ -70,7 +70,8 @@ grpc::Status rpcTest() { dagRequest.add_output_offsets(1); dagRequest.add_output_offsets(0); dagRequest.add_output_offsets(1); - /*executor = dagRequest.add_executors(); + /* + executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeSelection); tipb::Selection *selection = executor->mutable_selection(); tipb::Expr *expr = selection->add_conditions(); @@ -86,7 +87,7 @@ grpc::Status rpcTest() { ss.str(""); DB::EncodeNumber(289,ss); value->set_val(std::string(ss.str())); - */ + */ // construct a coprocessor request From e1700c3558decdd21adab7578640be19378c1db9 Mon Sep 17 00:00:00 2001 From: xufei Date: Wed, 31 Jul 2019 10:54:13 +0800 Subject: [PATCH 04/79] tipb submodule use tipb master branch --- .gitmodules | 2 +- contrib/tipb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 505fe3ed19a..1fb6f88a2b1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -40,4 +40,4 @@ [submodule "contrib/tipb"] path = contrib/tipb url = https://github.com/pingcap/tipb.git - branch = tipb_cpp + branch = master diff --git a/contrib/tipb b/contrib/tipb index 961b01c984e..3a69b884cc9 160000 --- a/contrib/tipb +++ b/contrib/tipb @@ -1 +1 @@ -Subproject commit 961b01c984ebbdc9723ee4dc928b6102d0ee2b88 +Subproject commit 3a69b884cc9793da55d7d4ef38dc79459d17583f From 0f82665e21c2b9649d8d97f6b30cd3eb904b9a3c Mon Sep 17 00:00:00 2001 From: xufei Date: Wed, 31 Jul 2019 12:52:12 +0800 Subject: [PATCH 05/79] rewrite build flow in InterpreterDagRequest --- dbms/src/Coprocessor/CoprocessorHandler.cpp | 4 +- .../Interpreters/InterpreterDagRequest.cpp | 77 +++++++++++-------- dbms/src/Interpreters/InterpreterDagRequest.h | 14 ++-- 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/dbms/src/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Coprocessor/CoprocessorHandler.cpp index 80edd485209..1470613d766 100644 --- a/dbms/src/Coprocessor/CoprocessorHandler.cpp +++ b/dbms/src/Coprocessor/CoprocessorHandler.cpp @@ -32,6 +32,9 @@ BlockIO CoprocessorHandler::buildCHPlan() { if(builder_version == "v1") { DagStringConverter converter(context, dag_request); String query = converter.buildSqlString(); + if(query.empty()) { + return BlockIO(); + } return executeQuery(query, context.ch_context, false, QueryProcessingStage::Complete); } else if (builder_version == "v2"){ return executeQuery(dag_request, context, QueryProcessingStage::Complete); @@ -42,7 +45,6 @@ BlockIO CoprocessorHandler::buildCHPlan() { bool CoprocessorHandler::execute() { context.ch_context.setSetting("read_tso", UInt64(dag_request.start_ts())); - //todo set region related info BlockIO streams = buildCHPlan(); if(!streams.in || streams.out) { // only query is allowed, so streams.in must not be null and streams.out must be null diff --git a/dbms/src/Interpreters/InterpreterDagRequest.cpp b/dbms/src/Interpreters/InterpreterDagRequest.cpp index 9a593b5887b..82e96411a6a 100644 --- a/dbms/src/Interpreters/InterpreterDagRequest.cpp +++ b/dbms/src/Interpreters/InterpreterDagRequest.cpp @@ -18,28 +18,43 @@ namespace DB { extern const int TOO_MANY_COLUMNS; } + static void assignOrThrowException(Int32 & index, Int32 value, String name) { + if(index != -1) { + throw Exception("Duplicated " + name + " in DAG request"); + } + index = value; + } + InterpreterDagRequest::InterpreterDagRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request_) : context(context_), dag_request(dag_request_) { - for(const tipb::Executor & executor : dag_request.executors()) { - switch (executor.tp()) { + for(int i = 0; i < dag_request.executors_size(); i++) { + switch (dag_request.executors(i).tp()) { + case tipb::ExecType::TypeTableScan: + assignOrThrowException(ts_index, i, "TableScan"); + break; case tipb::ExecType::TypeSelection: - has_where = true; + assignOrThrowException(sel_index, i, "Selection"); break; case tipb::ExecType::TypeStreamAgg: case tipb::ExecType::TypeAggregation: - has_agg = true; + assignOrThrowException(agg_index, i, "Aggregation"); break; case tipb::ExecType::TypeTopN: - has_orderby = true; + assignOrThrowException(order_index, i, "Order"); case tipb::ExecType::TypeLimit: - has_limit = true; + assignOrThrowException(limit_index, i, "Limit"); break; default: - break; + throw Exception("Unsupported executor in DAG request: " + dag_request.executors(i).DebugString()); } } } + bool InterpreterDagRequest::buildSelPlan(const tipb::Selection & , Pipeline & ) { + return false; + } + + // the flow is the same as executeFetchcolumns bool InterpreterDagRequest::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { if(!ts.has_table_id()) { // do not have table id @@ -77,7 +92,7 @@ namespace DB { return false; } - if(!has_agg) { + if(agg_index == -1) { // if the dag request does not contain agg, then the final output is // based on the output of table scan for (auto i : dag_request.output_offsets()) { @@ -158,32 +173,35 @@ namespace DB { } //todo return the error message - bool InterpreterDagRequest::buildPlan(const tipb::Executor & executor, Pipeline & pipeline) { - switch (executor.tp()) { - case tipb::ExecType::TypeTableScan: - return buildTSPlan(executor.tbl_scan(), pipeline); - case tipb::ExecType::TypeIndexScan: - // index scan is not supported - return false; - case tipb::ExecType::TypeSelection: - return false; - case tipb::ExecType::TypeAggregation: - case tipb::ExecType::TypeStreamAgg: - return false; - case tipb::ExecType::TypeTopN: - return false; - case tipb::ExecType::TypeLimit: + bool InterpreterDagRequest::buildPlan(Pipeline & pipeline) { + // step 1. build table scan + if(!buildTSPlan(dag_request.executors(ts_index).tbl_scan(), pipeline)) { + return false; + } + // step 2. build selection if needed + if(sel_index != -1) { + if(buildSelPlan(dag_request.executors(sel_index).selection(), pipeline)) { return false; + } + } + // step 3. build agg if needed + if(agg_index != -1) { + return false; + } + // step 3. build order by if needed + if(order_index != -1) { + return false; + } + // step 3. build limit if needed + if(limit_index != -1) { + return false; } + return true; } BlockIO InterpreterDagRequest::execute() { Pipeline pipeline; - for(const tipb::Executor & executor : dag_request.executors()) { - if(!buildPlan(executor, pipeline)) { - return BlockIO(); - } - } + buildPlan(pipeline); // add final project auto stream_before_project = pipeline.firstStream(); auto columns = stream_before_project->getHeader(); @@ -198,7 +216,4 @@ namespace DB { res.in = final_stream; return res; } - InterpreterDagRequest::~InterpreterDagRequest() { - - } } diff --git a/dbms/src/Interpreters/InterpreterDagRequest.h b/dbms/src/Interpreters/InterpreterDagRequest.h index 02ecdfbcb6b..ac62c967277 100644 --- a/dbms/src/Interpreters/InterpreterDagRequest.h +++ b/dbms/src/Interpreters/InterpreterDagRequest.h @@ -20,7 +20,7 @@ class InterpreterDagRequest : public IInterpreter { public: InterpreterDagRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request); - ~InterpreterDagRequest(); + ~InterpreterDagRequest() = default; BlockIO execute(); @@ -28,10 +28,11 @@ class InterpreterDagRequest : public IInterpreter { CoprocessorContext & context; const tipb::DAGRequest & dag_request; NamesWithAliases final_project; - bool has_where; - bool has_agg; - bool has_orderby; - bool has_limit; + Int32 ts_index = -1; + Int32 sel_index = -1; + Int32 agg_index = -1; + Int32 order_index = -1; + Int32 limit_index = -1; struct Pipeline { BlockInputStreams streams; @@ -51,8 +52,9 @@ class InterpreterDagRequest : public IInterpreter { } }; - bool buildPlan(const tipb::Executor & executor, Pipeline & streams); + bool buildPlan(Pipeline & streams); bool buildTSPlan(const tipb::TableScan & ts, Pipeline & streams); + bool buildSelPlan(const tipb::Selection & sel, Pipeline & streams); }; } From a7655bcd5335359e0ba0095f8f38fcb209a8e62e Mon Sep 17 00:00:00 2001 From: xufei Date: Wed, 31 Jul 2019 13:40:39 +0800 Subject: [PATCH 06/79] rename Dag to DAG --- dbms/src/Coprocessor/CoprocessorHandler.cpp | 6 +++--- .../{DagQueryInfo.cpp => DAGQueryInfo.cpp} | 14 +++++++------- .../{DagQueryInfo.h => DAGQueryInfo.h} | 6 +++--- ...tringConverter.cpp => DAGStringConverter.cpp} | 16 ++++++++-------- ...DagStringConverter.h => DAGStringConverter.h} | 6 +++--- ...rDagRequest.cpp => InterpreterDAGRequest.cpp} | 12 ++++++------ ...reterDagRequest.h => InterpreterDAGRequest.h} | 6 +++--- dbms/src/Interpreters/StringQueryInfo.h | 2 +- dbms/src/Interpreters/executeQuery.cpp | 4 ++-- 9 files changed, 36 insertions(+), 36 deletions(-) rename dbms/src/Interpreters/{DagQueryInfo.cpp => DAGQueryInfo.cpp} (54%) rename dbms/src/Interpreters/{DagQueryInfo.h => DAGQueryInfo.h} (81%) rename dbms/src/Interpreters/{DagStringConverter.cpp => DAGStringConverter.cpp} (93%) rename dbms/src/Interpreters/{DagStringConverter.h => DAGStringConverter.h} (89%) rename dbms/src/Interpreters/{InterpreterDagRequest.cpp => InterpreterDAGRequest.cpp} (96%) rename dbms/src/Interpreters/{InterpreterDagRequest.h => InterpreterDAGRequest.h} (90%) diff --git a/dbms/src/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Coprocessor/CoprocessorHandler.cpp index 1470613d766..272a28111cb 100644 --- a/dbms/src/Coprocessor/CoprocessorHandler.cpp +++ b/dbms/src/Coprocessor/CoprocessorHandler.cpp @@ -8,8 +8,8 @@ #include #include #include -#include -#include +#include +#include #include namespace DB @@ -30,7 +30,7 @@ CoprocessorHandler::~CoprocessorHandler() BlockIO CoprocessorHandler::buildCHPlan() { String builder_version = context.ch_context.getSettings().coprocessor_plan_builder_version; if(builder_version == "v1") { - DagStringConverter converter(context, dag_request); + DAGStringConverter converter(context, dag_request); String query = converter.buildSqlString(); if(query.empty()) { return BlockIO(); diff --git a/dbms/src/Interpreters/DagQueryInfo.cpp b/dbms/src/Interpreters/DAGQueryInfo.cpp similarity index 54% rename from dbms/src/Interpreters/DagQueryInfo.cpp rename to dbms/src/Interpreters/DAGQueryInfo.cpp index f491c16ecc4..77c6b2daef1 100644 --- a/dbms/src/Interpreters/DagQueryInfo.cpp +++ b/dbms/src/Interpreters/DAGQueryInfo.cpp @@ -1,27 +1,27 @@ #include -#include -#include +#include +#include namespace DB { - DagQueryInfo::DagQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorContext & coprocessorContext_) + DAGQueryInfo::DAGQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorContext & coprocessorContext_) : dag_request(dag_request_), coprocessorContext(coprocessorContext_) {} - std::tuple DagQueryInfo::parse(size_t ) { + std::tuple DAGQueryInfo::parse(size_t ) { query = String("cop query"); ast = std::make_shared(); ((ASTSelectQuery*)ast.get())->is_fake_sel = true; return std::make_tuple(query, ast); } - String DagQueryInfo::get_query_ignore_error(size_t ) { + String DAGQueryInfo::get_query_ignore_error(size_t ) { return query; } - std::unique_ptr DagQueryInfo::getInterpreter(Context & , QueryProcessingStage::Enum ) { - return std::make_unique(coprocessorContext, dag_request); + std::unique_ptr DAGQueryInfo::getInterpreter(Context & , QueryProcessingStage::Enum ) { + return std::make_unique(coprocessorContext, dag_request); } } diff --git a/dbms/src/Interpreters/DagQueryInfo.h b/dbms/src/Interpreters/DAGQueryInfo.h similarity index 81% rename from dbms/src/Interpreters/DagQueryInfo.h rename to dbms/src/Interpreters/DAGQueryInfo.h index c476bf93912..826a07cfc33 100644 --- a/dbms/src/Interpreters/DagQueryInfo.h +++ b/dbms/src/Interpreters/DAGQueryInfo.h @@ -10,13 +10,13 @@ namespace DB { -/** IQueryInfo interface for different source of queries. +/** DAGQueryInfo for query represented by DAG request. */ -class DagQueryInfo : public IQueryInfo +class DAGQueryInfo : public IQueryInfo { public: - DagQueryInfo(const tipb::DAGRequest & dag_request, CoprocessorContext & coprocessorContext_); + DAGQueryInfo(const tipb::DAGRequest & dag_request, CoprocessorContext & coprocessorContext_); bool isInternalQuery() { return false;}; virtual std::tuple parse(size_t max_query_size); virtual String get_query_ignore_error(size_t max_query_size); diff --git a/dbms/src/Interpreters/DagStringConverter.cpp b/dbms/src/Interpreters/DAGStringConverter.cpp similarity index 93% rename from dbms/src/Interpreters/DagStringConverter.cpp rename to dbms/src/Interpreters/DAGStringConverter.cpp index 0d085fc6975..f06f92704e4 100644 --- a/dbms/src/Interpreters/DagStringConverter.cpp +++ b/dbms/src/Interpreters/DAGStringConverter.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -11,7 +11,7 @@ namespace DB { - bool DagStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) { + bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) { TableID id; if(ts.has_table_id()) { id = ts.table_id(); @@ -46,7 +46,7 @@ namespace DB { return true; } - String DagStringConverter::exprToString(const tipb::Expr & expr, bool &succ) { + String DAGStringConverter::exprToString(const tipb::Expr & expr, bool &succ) { std::stringstream ss; succ = true; size_t cursor = 1; @@ -125,7 +125,7 @@ namespace DB { } } - bool DagStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) { + bool DAGStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) { bool first = true; for(const tipb::Expr & expr : sel.conditions()) { bool succ = true; @@ -144,13 +144,13 @@ namespace DB { return true; } - bool DagStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) { + bool DAGStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) { ss << "LIMIT " << limit.limit() << " "; return true; } //todo return the error message - bool DagStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) { + bool DAGStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) { switch (executor.tp()) { case tipb::ExecType::TypeTableScan: return buildTSString(executor.tbl_scan(), ss); @@ -176,12 +176,12 @@ namespace DB { // currently, project is not pushed so always return false return false; } - DagStringConverter::DagStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) + DAGStringConverter::DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) : context(context_), dag_request(dag_request_) { afterAgg = false; } - String DagStringConverter::buildSqlString() { + String DAGStringConverter::buildSqlString() { std::stringstream query_buf; std::stringstream project; for(const tipb::Executor & executor : dag_request.executors()) { diff --git a/dbms/src/Interpreters/DagStringConverter.h b/dbms/src/Interpreters/DAGStringConverter.h similarity index 89% rename from dbms/src/Interpreters/DagStringConverter.h rename to dbms/src/Interpreters/DAGStringConverter.h index 566e42b8ba1..cae42a54f19 100644 --- a/dbms/src/Interpreters/DagStringConverter.h +++ b/dbms/src/Interpreters/DAGStringConverter.h @@ -11,11 +11,11 @@ namespace DB { -class DagStringConverter { +class DAGStringConverter { public: - DagStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_); + DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_); - ~DagStringConverter() = default; + ~DAGStringConverter() = default; String buildSqlString(); private: diff --git a/dbms/src/Interpreters/InterpreterDagRequest.cpp b/dbms/src/Interpreters/InterpreterDAGRequest.cpp similarity index 96% rename from dbms/src/Interpreters/InterpreterDagRequest.cpp rename to dbms/src/Interpreters/InterpreterDAGRequest.cpp index 82e96411a6a..8a8e6fe4698 100644 --- a/dbms/src/Interpreters/InterpreterDagRequest.cpp +++ b/dbms/src/Interpreters/InterpreterDAGRequest.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -25,7 +25,7 @@ namespace DB { index = value; } - InterpreterDagRequest::InterpreterDagRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request_) + InterpreterDAGRequest::InterpreterDAGRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request_) : context(context_), dag_request(dag_request_) { for(int i = 0; i < dag_request.executors_size(); i++) { switch (dag_request.executors(i).tp()) { @@ -50,12 +50,12 @@ namespace DB { } } - bool InterpreterDagRequest::buildSelPlan(const tipb::Selection & , Pipeline & ) { + bool InterpreterDAGRequest::buildSelPlan(const tipb::Selection & , Pipeline & ) { return false; } // the flow is the same as executeFetchcolumns - bool InterpreterDagRequest::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { + bool InterpreterDAGRequest::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { if(!ts.has_table_id()) { // do not have table id return false; @@ -173,7 +173,7 @@ namespace DB { } //todo return the error message - bool InterpreterDagRequest::buildPlan(Pipeline & pipeline) { + bool InterpreterDAGRequest::buildPlan(Pipeline & pipeline) { // step 1. build table scan if(!buildTSPlan(dag_request.executors(ts_index).tbl_scan(), pipeline)) { return false; @@ -199,7 +199,7 @@ namespace DB { return true; } - BlockIO InterpreterDagRequest::execute() { + BlockIO InterpreterDAGRequest::execute() { Pipeline pipeline; buildPlan(pipeline); // add final project diff --git a/dbms/src/Interpreters/InterpreterDagRequest.h b/dbms/src/Interpreters/InterpreterDAGRequest.h similarity index 90% rename from dbms/src/Interpreters/InterpreterDagRequest.h rename to dbms/src/Interpreters/InterpreterDAGRequest.h index ac62c967277..13a542b597a 100644 --- a/dbms/src/Interpreters/InterpreterDagRequest.h +++ b/dbms/src/Interpreters/InterpreterDAGRequest.h @@ -16,11 +16,11 @@ namespace DB { /** build ch plan from dag request: dag executors -> ch plan */ -class InterpreterDagRequest : public IInterpreter { +class InterpreterDAGRequest : public IInterpreter { public: - InterpreterDagRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request); + InterpreterDAGRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request); - ~InterpreterDagRequest() = default; + ~InterpreterDAGRequest() = default; BlockIO execute(); diff --git a/dbms/src/Interpreters/StringQueryInfo.h b/dbms/src/Interpreters/StringQueryInfo.h index b82e3b8f884..d5031c5d4f9 100644 --- a/dbms/src/Interpreters/StringQueryInfo.h +++ b/dbms/src/Interpreters/StringQueryInfo.h @@ -9,7 +9,7 @@ namespace DB { -/** IQueryInfo interface for different source of queries. +/** StringQueryInfo for query represented by string. */ class StringQueryInfo : public IQueryInfo { diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 89fd6c92073..c06de5e6d2d 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include namespace ProfileEvents @@ -393,7 +393,7 @@ BlockIO executeQuery( BlockIO executeQuery(const tipb::DAGRequest & dag_request, CoprocessorContext & context, QueryProcessingStage::Enum stage) { BlockIO streams; - DagQueryInfo queryInfo(dag_request, context); + DAGQueryInfo queryInfo(dag_request, context); std::tie(std::ignore, streams) = executeQueryImpl(queryInfo, context.ch_context, stage); return streams; } From f516f0053a496f7ff65b4dbfcec671169921fc88 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Thu, 1 Aug 2019 13:27:34 +0800 Subject: [PATCH 07/79] Update tipb submodule --- contrib/tipb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/tipb b/contrib/tipb index 3a69b884cc9..b2d318af5e8 160000 --- a/contrib/tipb +++ b/contrib/tipb @@ -1 +1 @@ -Subproject commit 3a69b884cc9793da55d7d4ef38dc79459d17583f +Subproject commit b2d318af5e8af28f54a2c6422bc18631f65a8506 From 3b520c9cec743e786b48492ca7addeafb743efde Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 2 Aug 2019 13:54:56 +0800 Subject: [PATCH 08/79] basic support for selection/limit/topn executor in InterpreterDAGRequest --- .../Interpreters/CoprocessorBuilderUtils.cpp | 996 +++++++++++------- .../Interpreters/CoprocessorBuilderUtils.h | 26 +- .../Interpreters/DAGExpressionAnalyzer.cpp | 171 +++ dbms/src/Interpreters/DAGExpressionAnalyzer.h | 40 + dbms/src/Interpreters/DAGQueryInfo.cpp | 66 +- dbms/src/Interpreters/DAGQueryInfo.h | 61 +- dbms/src/Interpreters/DAGStringConverter.cpp | 318 +++--- dbms/src/Interpreters/DAGStringConverter.h | 40 +- .../Interpreters/InterpreterDAGRequest.cpp | 502 +++++---- dbms/src/Interpreters/InterpreterDAGRequest.h | 59 +- dbms/src/Server/cop_test.cpp | 104 +- dbms/src/Storages/Transaction/TypeMapping.cpp | 17 +- dbms/src/Storages/Transaction/TypeMapping.h | 7 + 13 files changed, 1552 insertions(+), 855 deletions(-) create mode 100644 dbms/src/Interpreters/DAGExpressionAnalyzer.cpp create mode 100644 dbms/src/Interpreters/DAGExpressionAnalyzer.h diff --git a/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp b/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp index d8058b5c1bc..de720e0c7b4 100644 --- a/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp +++ b/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp @@ -1,403 +1,615 @@ #include + #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#pragma GCC diagnostic pop #include +#include +#include + +namespace DB +{ + +bool isFunctionExpr(const tipb::Expr & expr) +{ + switch (expr.tp()) + { + case tipb::ExprType::ScalarFunc: + case tipb::ExprType::Count: + case tipb::ExprType::Sum: + case tipb::ExprType::Avg: + case tipb::ExprType::Min: + case tipb::ExprType::Max: + case tipb::ExprType::First: + case tipb::ExprType::GroupConcat: + case tipb::ExprType::Agg_BitAnd: + case tipb::ExprType::Agg_BitOr: + case tipb::ExprType::Agg_BitXor: + case tipb::ExprType::Std: + case tipb::ExprType::Stddev: + case tipb::ExprType::StddevPop: + case tipb::ExprType::StddevSamp: + case tipb::ExprType::VarPop: + case tipb::ExprType::VarSamp: + case tipb::ExprType::Variance: + case tipb::ExprType::JsonArrayAgg: + case tipb::ExprType::JsonObjectAgg: + return true; + default: + return false; + } +} -namespace DB { +const String & getFunctionName(const tipb::Expr & expr) +{ + if (isAggFunctionExpr(expr)) + { + if (!aggFunMap.count(expr.tp())) + { + throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported."); + } + return aggFunMap[expr.tp()]; + } + else + { + if (!scalarFunMap.count(expr.sig())) + { + throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + " is not supported."); + } + return scalarFunMap[expr.sig()]; + } +} - std::unordered_map aggFunMap( +String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col) +{ + std::stringstream ss; + size_t cursor = 1; + Int64 columnId = 0; + String func_name; + Field f; + switch (expr.tp()) + { + case tipb::ExprType::Null: + return "NULL"; + case tipb::ExprType::Int64: + return std::to_string(DecodeInt(cursor, expr.val())); + case tipb::ExprType::Uint64: + return std::to_string(DecodeInt(cursor, expr.val())); + case tipb::ExprType::Float32: + case tipb::ExprType::Float64: + return std::to_string(DecodeFloat64(cursor, expr.val())); + case tipb::ExprType::String: + // + return expr.val(); + case tipb::ExprType::Bytes: + return DecodeBytes(cursor, expr.val()); + case tipb::ExprType::ColumnRef: + columnId = DecodeInt(cursor, expr.val()); + if (columnId < 1 || columnId > (ColumnID)input_col.size()) { - {tipb::ExprType::Count, "count"}, - {tipb::ExprType::Sum, "sum"}, - {tipb::ExprType::Avg, "avg"}, - {tipb::ExprType::Min, "min"}, - {tipb::ExprType::Max, "max"}, - {tipb::ExprType::First, "any"}, - //{tipb::ExprType::GroupConcat, ""}, - //{tipb::ExprType::Agg_BitAnd, ""}, - //{tipb::ExprType::Agg_BitOr, ""}, - //{tipb::ExprType::Agg_BitXor, ""}, - //{tipb::ExprType::Std, ""}, - //{tipb::ExprType::Stddev, ""}, - //{tipb::ExprType::StddevPop, ""}, - //{tipb::ExprType::StddevSamp, ""}, - //{tipb::ExprType::VarPop, ""}, - //{tipb::ExprType::VarSamp, ""}, - //{tipb::ExprType::Variance, ""}, - //{tipb::ExprType::JsonArrayAgg, ""}, - //{tipb::ExprType::JsonObjectAgg, ""}, + throw Exception("out of bound"); } - ); - - std::unordered_map scalarFunMap( + return input_col.getNames()[columnId - 1]; + case tipb::ExprType::Count: + case tipb::ExprType::Sum: + case tipb::ExprType::Avg: + case tipb::ExprType::Min: + case tipb::ExprType::Max: + case tipb::ExprType::First: + if (!aggFunMap.count(expr.tp())) + { + throw Exception("not supported"); + } + func_name = aggFunMap.find(expr.tp())->second; + break; + case tipb::ExprType::ScalarFunc: + if (!scalarFunMap.count(expr.sig())) { - {tipb::ScalarFuncSig::CastIntAsInt, "cast"}, - {tipb::ScalarFuncSig::CastIntAsReal, "cast"}, - {tipb::ScalarFuncSig::CastIntAsString, "cast"}, - {tipb::ScalarFuncSig::CastIntAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastIntAsTime, "cast"}, - {tipb::ScalarFuncSig::CastIntAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastIntAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastRealAsInt, "cast"}, - {tipb::ScalarFuncSig::CastRealAsReal, "cast"}, - {tipb::ScalarFuncSig::CastRealAsString, "cast"}, - {tipb::ScalarFuncSig::CastRealAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastRealAsTime, "cast"}, - {tipb::ScalarFuncSig::CastRealAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastRealAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastDecimalAsInt, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsReal, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsString, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsTime, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastStringAsInt, "cast"}, - {tipb::ScalarFuncSig::CastStringAsReal, "cast"}, - {tipb::ScalarFuncSig::CastStringAsString, "cast"}, - {tipb::ScalarFuncSig::CastStringAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastStringAsTime, "cast"}, - {tipb::ScalarFuncSig::CastStringAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastStringAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastTimeAsInt, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsReal, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsString, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsTime, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastDurationAsInt, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsReal, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsString, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsTime, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastJsonAsInt, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsReal, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsString, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsTime, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsJson, "cast"}, - - {tipb::ScalarFuncSig::CoalesceInt, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceReal, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceString, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceDecimal, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceTime, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceDuration, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceJson, "coalesce"}, - - {tipb::ScalarFuncSig::LTInt, "less"}, - {tipb::ScalarFuncSig::LTReal, "less"}, - {tipb::ScalarFuncSig::LTString, "less"}, - {tipb::ScalarFuncSig::LTDecimal, "less"}, - {tipb::ScalarFuncSig::LTTime, "less"}, - {tipb::ScalarFuncSig::LTDuration, "less"}, - {tipb::ScalarFuncSig::LTJson, "less"}, - - {tipb::ScalarFuncSig::LEInt, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEReal, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEString, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEDecimal, "lessOrEquals"}, - {tipb::ScalarFuncSig::LETime, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEDuration, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEJson, "lessOrEquals"}, - - {tipb::ScalarFuncSig::GTInt, "greater"}, - {tipb::ScalarFuncSig::GTReal, "greater"}, - {tipb::ScalarFuncSig::GTString, "greater"}, - {tipb::ScalarFuncSig::GTDecimal, "greater"}, - {tipb::ScalarFuncSig::GTTime, "greater"}, - {tipb::ScalarFuncSig::GTDuration, "greater"}, - {tipb::ScalarFuncSig::GTJson, "greater"}, - - {tipb::ScalarFuncSig::GreatestInt, "greatest"}, - {tipb::ScalarFuncSig::GreatestReal, "greatest"}, - {tipb::ScalarFuncSig::GreatestString, "greatest"}, - {tipb::ScalarFuncSig::GreatestDecimal, "greatest"}, - {tipb::ScalarFuncSig::GreatestTime, "greatest"}, - - {tipb::ScalarFuncSig::LeastInt, "least"}, - {tipb::ScalarFuncSig::LeastReal, "least"}, - {tipb::ScalarFuncSig::LeastString, "least"}, - {tipb::ScalarFuncSig::LeastDecimal, "least"}, - {tipb::ScalarFuncSig::LeastTime, "least"}, - - //{tipb::ScalarFuncSig::IntervalInt, "cast"}, - //{tipb::ScalarFuncSig::IntervalReal, "cast"}, - - {tipb::ScalarFuncSig::GEInt, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEReal, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEString, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEDecimal, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GETime, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEDuration, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEJson, "greaterOrEquals"}, - - {tipb::ScalarFuncSig::EQInt, "equals"}, - {tipb::ScalarFuncSig::EQReal, "equals"}, - {tipb::ScalarFuncSig::EQString, "equals"}, - {tipb::ScalarFuncSig::EQDecimal, "equals"}, - {tipb::ScalarFuncSig::EQTime, "equals"}, - {tipb::ScalarFuncSig::EQDuration, "equals"}, - {tipb::ScalarFuncSig::EQJson, "equals"}, - - {tipb::ScalarFuncSig::NEInt, "notEquals"}, - {tipb::ScalarFuncSig::NEReal, "notEquals"}, - {tipb::ScalarFuncSig::NEString, "notEquals"}, - {tipb::ScalarFuncSig::NEDecimal, "notEquals"}, - {tipb::ScalarFuncSig::NETime, "notEquals"}, - {tipb::ScalarFuncSig::NEDuration, "notEquals"}, - {tipb::ScalarFuncSig::NEJson, "notEquals"}, - - //{tipb::ScalarFuncSig::NullEQInt, "cast"}, - //{tipb::ScalarFuncSig::NullEQReal, "cast"}, - //{tipb::ScalarFuncSig::NullEQString, "cast"}, - //{tipb::ScalarFuncSig::NullEQDecimal, "cast"}, - //{tipb::ScalarFuncSig::NullEQTime, "cast"}, - //{tipb::ScalarFuncSig::NullEQDuration, "cast"}, - //{tipb::ScalarFuncSig::NullEQJson, "cast"}, - - {tipb::ScalarFuncSig::PlusReal, "plus"}, - {tipb::ScalarFuncSig::PlusDecimal, "plus"}, - {tipb::ScalarFuncSig::PlusInt, "plus"}, - - {tipb::ScalarFuncSig::MinusReal, "minus"}, - {tipb::ScalarFuncSig::MinusDecimal, "minus"}, - {tipb::ScalarFuncSig::MinusInt, "minus"}, - - {tipb::ScalarFuncSig::MultiplyReal, "multiply"}, - {tipb::ScalarFuncSig::MultiplyDecimal, "multiply"}, - {tipb::ScalarFuncSig::MultiplyInt, "multiply"}, - - {tipb::ScalarFuncSig::DivideReal, "divide"}, - {tipb::ScalarFuncSig::DivideDecimal, "divide"}, - {tipb::ScalarFuncSig::IntDivideInt, "intDiv"}, - {tipb::ScalarFuncSig::IntDivideDecimal, "divide"}, - - {tipb::ScalarFuncSig::ModReal, "modulo"}, - {tipb::ScalarFuncSig::ModDecimal, "modulo"}, - {tipb::ScalarFuncSig::ModInt, "modulo"}, - - {tipb::ScalarFuncSig::MultiplyIntUnsigned, "multiply"}, - - {tipb::ScalarFuncSig::AbsInt, "abs"}, - {tipb::ScalarFuncSig::AbsUInt, "abs"}, - {tipb::ScalarFuncSig::AbsReal, "abs"}, - {tipb::ScalarFuncSig::AbsDecimal, "abs"}, - - {tipb::ScalarFuncSig::CeilIntToDec, "ceil"}, - {tipb::ScalarFuncSig::CeilIntToInt, "ceil"}, - {tipb::ScalarFuncSig::CeilDecToInt, "ceil"}, - {tipb::ScalarFuncSig::CeilDecToDec, "ceil"}, - {tipb::ScalarFuncSig::CeilReal, "ceil"}, - - {tipb::ScalarFuncSig::FloorIntToDec, "floor"}, - {tipb::ScalarFuncSig::FloorIntToInt, "floor"}, - {tipb::ScalarFuncSig::FloorDecToInt, "floor"}, - {tipb::ScalarFuncSig::FloorDecToDec, "floor"}, - {tipb::ScalarFuncSig::FloorReal, "floor"}, - - {tipb::ScalarFuncSig::RoundReal, "round"}, - {tipb::ScalarFuncSig::RoundInt, "round"}, - {tipb::ScalarFuncSig::RoundDec, "round"}, - //{tipb::ScalarFuncSig::RoundWithFracReal, "cast"}, - //{tipb::ScalarFuncSig::RoundWithFracInt, "cast"}, - //{tipb::ScalarFuncSig::RoundWithFracDec, "cast"}, - - {tipb::ScalarFuncSig::Log1Arg, "log"}, - //{tipb::ScalarFuncSig::Log2Args, "cast"}, - {tipb::ScalarFuncSig::Log2, "log2"}, - {tipb::ScalarFuncSig::Log10, "log10"}, - - {tipb::ScalarFuncSig::Rand, "rand"}, - //{tipb::ScalarFuncSig::RandWithSeed, "cast"}, - - {tipb::ScalarFuncSig::Pow, "pow"}, - //{tipb::ScalarFuncSig::Conv, "cast"}, - //{tipb::ScalarFuncSig::CRC32, "cast"}, - //{tipb::ScalarFuncSig::Sign, "cast"}, - - {tipb::ScalarFuncSig::Sqrt, "sqrt"}, - {tipb::ScalarFuncSig::Acos, "acos"}, - {tipb::ScalarFuncSig::Asin, "asin"}, - {tipb::ScalarFuncSig::Atan1Arg, "atan"}, - //{tipb::ScalarFuncSig::Atan2Args, "cast"}, - {tipb::ScalarFuncSig::Cos, "cos"}, - //{tipb::ScalarFuncSig::Cot, "cast"}, - //{tipb::ScalarFuncSig::Degrees, "cast"}, - {tipb::ScalarFuncSig::Exp, "exp"}, - //{tipb::ScalarFuncSig::PI, "cast"}, - //{tipb::ScalarFuncSig::Radians, "cast"}, - {tipb::ScalarFuncSig::Sin, "sin"}, - {tipb::ScalarFuncSig::Tan, "tan"}, - {tipb::ScalarFuncSig::TruncateInt, "trunc"}, - {tipb::ScalarFuncSig::TruncateReal, "trunc"}, - //{tipb::ScalarFuncSig::TruncateDecimal, "cast"}, - - {tipb::ScalarFuncSig::LogicalAnd, "and"}, - {tipb::ScalarFuncSig::LogicalOr, "or"}, - {tipb::ScalarFuncSig::LogicalXor, "xor"}, - {tipb::ScalarFuncSig::UnaryNot, "not"}, - {tipb::ScalarFuncSig::UnaryMinusInt, "negate"}, - {tipb::ScalarFuncSig::UnaryMinusReal, "negate"}, - {tipb::ScalarFuncSig::UnaryMinusDecimal, "negate"}, - {tipb::ScalarFuncSig::DecimalIsNull, "isNull"}, - {tipb::ScalarFuncSig::DurationIsNull, "isNull"}, - {tipb::ScalarFuncSig::RealIsNull, "isNull"}, - {tipb::ScalarFuncSig::StringIsNull, "isNull"}, - {tipb::ScalarFuncSig::TimeIsNull, "isNull"}, - {tipb::ScalarFuncSig::IntIsNull, "isNull"}, - {tipb::ScalarFuncSig::JsonIsNull, "isNull"}, - - //{tipb::ScalarFuncSig::BitAndSig, "cast"}, - //{tipb::ScalarFuncSig::BitOrSig, "cast"}, - //{tipb::ScalarFuncSig::BitXorSig, "cast"}, - //{tipb::ScalarFuncSig::BitNegSig, "cast"}, - //{tipb::ScalarFuncSig::IntIsTrue, "cast"}, - //{tipb::ScalarFuncSig::RealIsTrue, "cast"}, - //{tipb::ScalarFuncSig::DecimalIsTrue, "cast"}, - //{tipb::ScalarFuncSig::IntIsFalse, "cast"}, - //{tipb::ScalarFuncSig::RealIsFalse, "cast"}, - //{tipb::ScalarFuncSig::DecimalIsFalse, "cast"}, - - //{tipb::ScalarFuncSig::LeftShift, "cast"}, - //{tipb::ScalarFuncSig::RightShift, "cast"}, - - //{tipb::ScalarFuncSig::BitCount, "cast"}, - //{tipb::ScalarFuncSig::GetParamString, "cast"}, - //{tipb::ScalarFuncSig::GetVar, "cast"}, - //{tipb::ScalarFuncSig::RowSig, "cast"}, - //{tipb::ScalarFuncSig::SetVar, "cast"}, - //{tipb::ScalarFuncSig::ValuesDecimal, "cast"}, - //{tipb::ScalarFuncSig::ValuesDuration, "cast"}, - //{tipb::ScalarFuncSig::ValuesInt, "cast"}, - //{tipb::ScalarFuncSig::ValuesJSON, "cast"}, - //{tipb::ScalarFuncSig::ValuesReal, "cast"}, - //{tipb::ScalarFuncSig::ValuesString, "cast"}, - //{tipb::ScalarFuncSig::ValuesTime, "cast"}, - - {tipb::ScalarFuncSig::InInt, "in"}, - {tipb::ScalarFuncSig::InReal, "in"}, - {tipb::ScalarFuncSig::InString, "in"}, - {tipb::ScalarFuncSig::InDecimal, "in"}, - {tipb::ScalarFuncSig::InTime, "in"}, - {tipb::ScalarFuncSig::InDuration, "in"}, - {tipb::ScalarFuncSig::InJson, "in"}, - - {tipb::ScalarFuncSig::IfNullInt, "ifNull"}, - {tipb::ScalarFuncSig::IfNullReal, "ifNull"}, - {tipb::ScalarFuncSig::IfNullString, "ifNull"}, - {tipb::ScalarFuncSig::IfNullDecimal, "ifNull"}, - {tipb::ScalarFuncSig::IfNullTime, "ifNull"}, - {tipb::ScalarFuncSig::IfNullDuration, "ifNull"}, - {tipb::ScalarFuncSig::IfNullJson, "ifNull"}, - - {tipb::ScalarFuncSig::IfInt, "if"}, - {tipb::ScalarFuncSig::IfReal, "if"}, - {tipb::ScalarFuncSig::IfString, "if"}, - {tipb::ScalarFuncSig::IfDecimal, "if"}, - {tipb::ScalarFuncSig::IfTime, "if"}, - {tipb::ScalarFuncSig::IfDuration, "if"}, - {tipb::ScalarFuncSig::IfJson, "if"}, - - //todo need further check for caseWithExpression and multiIf - {tipb::ScalarFuncSig::CaseWhenInt, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenReal, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenString, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenDecimal, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenTime, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenDuration, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenJson, "caseWithExpression"}, - - //{tipb::ScalarFuncSig::AesDecrypt, "cast"}, - //{tipb::ScalarFuncSig::AesEncrypt, "cast"}, - //{tipb::ScalarFuncSig::Compress, "cast"}, - //{tipb::ScalarFuncSig::MD5, "cast"}, - //{tipb::ScalarFuncSig::Password, "cast"}, - //{tipb::ScalarFuncSig::RandomBytes, "cast"}, - //{tipb::ScalarFuncSig::SHA1, "cast"}, - //{tipb::ScalarFuncSig::SHA2, "cast"}, - //{tipb::ScalarFuncSig::Uncompress, "cast"}, - //{tipb::ScalarFuncSig::UncompressedLength, "cast"}, - - //{tipb::ScalarFuncSig::Database, "cast"}, - //{tipb::ScalarFuncSig::FoundRows, "cast"}, - //{tipb::ScalarFuncSig::CurrentUser, "cast"}, - //{tipb::ScalarFuncSig::User, "cast"}, - //{tipb::ScalarFuncSig::ConnectionID, "cast"}, - //{tipb::ScalarFuncSig::LastInsertID, "cast"}, - //{tipb::ScalarFuncSig::LastInsertIDWithID, "cast"}, - //{tipb::ScalarFuncSig::Version, "cast"}, - //{tipb::ScalarFuncSig::TiDBVersion, "cast"}, - //{tipb::ScalarFuncSig::RowCount, "cast"}, - - //{tipb::ScalarFuncSig::Sleep, "cast"}, - //{tipb::ScalarFuncSig::Lock, "cast"}, - //{tipb::ScalarFuncSig::ReleaseLock, "cast"}, - //{tipb::ScalarFuncSig::DecimalAnyValue, "cast"}, - //{tipb::ScalarFuncSig::DurationAnyValue, "cast"}, - //{tipb::ScalarFuncSig::IntAnyValue, "cast"}, - //{tipb::ScalarFuncSig::JSONAnyValue, "cast"}, - //{tipb::ScalarFuncSig::RealAnyValue, "cast"}, - //{tipb::ScalarFuncSig::StringAnyValue, "cast"}, - //{tipb::ScalarFuncSig::TimeAnyValue, "cast"}, - //{tipb::ScalarFuncSig::InetAton, "cast"}, - //{tipb::ScalarFuncSig::InetNtoa, "cast"}, - //{tipb::ScalarFuncSig::Inet6Aton, "cast"}, - //{tipb::ScalarFuncSig::Inet6Ntoa, "cast"}, - //{tipb::ScalarFuncSig::IsIPv4, "cast"}, - //{tipb::ScalarFuncSig::IsIPv4Compat, "cast"}, - //{tipb::ScalarFuncSig::IsIPv4Mapped, "cast"}, - //{tipb::ScalarFuncSig::IsIPv6, "cast"}, - //{tipb::ScalarFuncSig::UUID, "cast"}, - - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, + throw Exception("not supported"); } - ); + func_name = scalarFunMap.find(expr.sig())->second; + break; + default: + throw Exception("not supported"); + } + // build function expr + if (func_name == "in") + { + // for in, we could not represent the function expr using func_name(param1, param2, ...) + throw Exception("not supported"); + } + else + { + ss << func_name << "("; + bool first = true; + for (const tipb::Expr & child : expr.children()) + { + String s = exprToString(child, input_col); + if (first) + { + first = false; + } + else + { + ss << ", "; + } + ss << s; + } + ss << ") "; + return ss.str(); + } +} + +const String & getTypeName(const tipb::Expr & expr) { return tipb::ExprType_Name(expr.tp()); } + +String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns) +{ + return exprToString(expr, current_input_columns); +} + +bool isAggFunctionExpr(const tipb::Expr & expr) +{ + switch (expr.tp()) + { + case tipb::ExprType::Count: + case tipb::ExprType::Sum: + case tipb::ExprType::Avg: + case tipb::ExprType::Min: + case tipb::ExprType::Max: + case tipb::ExprType::First: + case tipb::ExprType::GroupConcat: + case tipb::ExprType::Agg_BitAnd: + case tipb::ExprType::Agg_BitOr: + case tipb::ExprType::Agg_BitXor: + case tipb::ExprType::Std: + case tipb::ExprType::Stddev: + case tipb::ExprType::StddevPop: + case tipb::ExprType::StddevSamp: + case tipb::ExprType::VarPop: + case tipb::ExprType::VarSamp: + case tipb::ExprType::Variance: + case tipb::ExprType::JsonArrayAgg: + case tipb::ExprType::JsonObjectAgg: + return true; + default: + return false; + } +} + +bool isLiteralExpr(const tipb::Expr & expr) +{ + switch (expr.tp()) + { + case tipb::ExprType::Null: + case tipb::ExprType::Int64: + case tipb::ExprType::Uint64: + case tipb::ExprType::Float32: + case tipb::ExprType::Float64: + case tipb::ExprType::String: + case tipb::ExprType::Bytes: + case tipb::ExprType::MysqlBit: + case tipb::ExprType::MysqlDecimal: + case tipb::ExprType::MysqlDuration: + case tipb::ExprType::MysqlEnum: + case tipb::ExprType::MysqlHex: + case tipb::ExprType::MysqlSet: + case tipb::ExprType::MysqlTime: + case tipb::ExprType::MysqlJson: + case tipb::ExprType::ValueList: + return true; + default: + return false; + } } + +bool isColumnExpr(const tipb::Expr & expr) { return expr.tp() == tipb::ExprType::ColumnRef; } + +Field decodeLiteral(const tipb::Expr & expr) +{ + size_t cursor = 0; + switch (expr.tp()) + { + case tipb::ExprType::MysqlBit: + case tipb::ExprType::MysqlDecimal: + case tipb::ExprType::MysqlDuration: + case tipb::ExprType::MysqlEnum: + case tipb::ExprType::MysqlHex: + case tipb::ExprType::MysqlSet: + case tipb::ExprType::MysqlTime: + case tipb::ExprType::MysqlJson: + case tipb::ExprType::ValueList: + throw Exception("mysql type literal is not supported yet"); + default: + return DecodeDatum(cursor, expr.val()); + } +} + +ColumnID getColumnID(const tipb::Expr & expr) +{ + size_t cursor = 1; + return DecodeInt(cursor, expr.val()); +} + +std::unordered_map aggFunMap({ + {tipb::ExprType::Count, "count"}, {tipb::ExprType::Sum, "sum"}, {tipb::ExprType::Avg, "avg"}, {tipb::ExprType::Min, "min"}, + {tipb::ExprType::Max, "max"}, {tipb::ExprType::First, "any"}, + //{tipb::ExprType::GroupConcat, ""}, + //{tipb::ExprType::Agg_BitAnd, ""}, + //{tipb::ExprType::Agg_BitOr, ""}, + //{tipb::ExprType::Agg_BitXor, ""}, + //{tipb::ExprType::Std, ""}, + //{tipb::ExprType::Stddev, ""}, + //{tipb::ExprType::StddevPop, ""}, + //{tipb::ExprType::StddevSamp, ""}, + //{tipb::ExprType::VarPop, ""}, + //{tipb::ExprType::VarSamp, ""}, + //{tipb::ExprType::Variance, ""}, + //{tipb::ExprType::JsonArrayAgg, ""}, + //{tipb::ExprType::JsonObjectAgg, ""}, +}); + +std::unordered_map scalarFunMap({ + {tipb::ScalarFuncSig::CastIntAsInt, "cast"}, + {tipb::ScalarFuncSig::CastIntAsReal, "cast"}, + {tipb::ScalarFuncSig::CastIntAsString, "cast"}, + {tipb::ScalarFuncSig::CastIntAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastIntAsTime, "cast"}, + {tipb::ScalarFuncSig::CastIntAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastIntAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastRealAsInt, "cast"}, + {tipb::ScalarFuncSig::CastRealAsReal, "cast"}, + {tipb::ScalarFuncSig::CastRealAsString, "cast"}, + {tipb::ScalarFuncSig::CastRealAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastRealAsTime, "cast"}, + {tipb::ScalarFuncSig::CastRealAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastRealAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastDecimalAsInt, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsReal, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsString, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsTime, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastStringAsInt, "cast"}, + {tipb::ScalarFuncSig::CastStringAsReal, "cast"}, + {tipb::ScalarFuncSig::CastStringAsString, "cast"}, + {tipb::ScalarFuncSig::CastStringAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastStringAsTime, "cast"}, + {tipb::ScalarFuncSig::CastStringAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastStringAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastTimeAsInt, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsReal, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsString, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsTime, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastDurationAsInt, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsReal, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsString, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsTime, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastJsonAsInt, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsReal, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsString, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsTime, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsJson, "cast"}, + + {tipb::ScalarFuncSig::CoalesceInt, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceReal, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceString, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceDecimal, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceTime, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceDuration, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceJson, "coalesce"}, + + {tipb::ScalarFuncSig::LTInt, "less"}, + {tipb::ScalarFuncSig::LTReal, "less"}, + {tipb::ScalarFuncSig::LTString, "less"}, + {tipb::ScalarFuncSig::LTDecimal, "less"}, + {tipb::ScalarFuncSig::LTTime, "less"}, + {tipb::ScalarFuncSig::LTDuration, "less"}, + {tipb::ScalarFuncSig::LTJson, "less"}, + + {tipb::ScalarFuncSig::LEInt, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEReal, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEString, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEDecimal, "lessOrEquals"}, + {tipb::ScalarFuncSig::LETime, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEDuration, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEJson, "lessOrEquals"}, + + {tipb::ScalarFuncSig::GTInt, "greater"}, + {tipb::ScalarFuncSig::GTReal, "greater"}, + {tipb::ScalarFuncSig::GTString, "greater"}, + {tipb::ScalarFuncSig::GTDecimal, "greater"}, + {tipb::ScalarFuncSig::GTTime, "greater"}, + {tipb::ScalarFuncSig::GTDuration, "greater"}, + {tipb::ScalarFuncSig::GTJson, "greater"}, + + {tipb::ScalarFuncSig::GreatestInt, "greatest"}, + {tipb::ScalarFuncSig::GreatestReal, "greatest"}, + {tipb::ScalarFuncSig::GreatestString, "greatest"}, + {tipb::ScalarFuncSig::GreatestDecimal, "greatest"}, + {tipb::ScalarFuncSig::GreatestTime, "greatest"}, + + {tipb::ScalarFuncSig::LeastInt, "least"}, + {tipb::ScalarFuncSig::LeastReal, "least"}, + {tipb::ScalarFuncSig::LeastString, "least"}, + {tipb::ScalarFuncSig::LeastDecimal, "least"}, + {tipb::ScalarFuncSig::LeastTime, "least"}, + + //{tipb::ScalarFuncSig::IntervalInt, "cast"}, + //{tipb::ScalarFuncSig::IntervalReal, "cast"}, + + {tipb::ScalarFuncSig::GEInt, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEReal, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEString, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEDecimal, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GETime, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEDuration, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEJson, "greaterOrEquals"}, + + {tipb::ScalarFuncSig::EQInt, "equals"}, + {tipb::ScalarFuncSig::EQReal, "equals"}, + {tipb::ScalarFuncSig::EQString, "equals"}, + {tipb::ScalarFuncSig::EQDecimal, "equals"}, + {tipb::ScalarFuncSig::EQTime, "equals"}, + {tipb::ScalarFuncSig::EQDuration, "equals"}, + {tipb::ScalarFuncSig::EQJson, "equals"}, + + {tipb::ScalarFuncSig::NEInt, "notEquals"}, + {tipb::ScalarFuncSig::NEReal, "notEquals"}, + {tipb::ScalarFuncSig::NEString, "notEquals"}, + {tipb::ScalarFuncSig::NEDecimal, "notEquals"}, + {tipb::ScalarFuncSig::NETime, "notEquals"}, + {tipb::ScalarFuncSig::NEDuration, "notEquals"}, + {tipb::ScalarFuncSig::NEJson, "notEquals"}, + + //{tipb::ScalarFuncSig::NullEQInt, "cast"}, + //{tipb::ScalarFuncSig::NullEQReal, "cast"}, + //{tipb::ScalarFuncSig::NullEQString, "cast"}, + //{tipb::ScalarFuncSig::NullEQDecimal, "cast"}, + //{tipb::ScalarFuncSig::NullEQTime, "cast"}, + //{tipb::ScalarFuncSig::NullEQDuration, "cast"}, + //{tipb::ScalarFuncSig::NullEQJson, "cast"}, + + {tipb::ScalarFuncSig::PlusReal, "plus"}, + {tipb::ScalarFuncSig::PlusDecimal, "plus"}, + {tipb::ScalarFuncSig::PlusInt, "plus"}, + + {tipb::ScalarFuncSig::MinusReal, "minus"}, + {tipb::ScalarFuncSig::MinusDecimal, "minus"}, + {tipb::ScalarFuncSig::MinusInt, "minus"}, + + {tipb::ScalarFuncSig::MultiplyReal, "multiply"}, + {tipb::ScalarFuncSig::MultiplyDecimal, "multiply"}, + {tipb::ScalarFuncSig::MultiplyInt, "multiply"}, + + {tipb::ScalarFuncSig::DivideReal, "divide"}, + {tipb::ScalarFuncSig::DivideDecimal, "divide"}, + {tipb::ScalarFuncSig::IntDivideInt, "intDiv"}, + {tipb::ScalarFuncSig::IntDivideDecimal, "divide"}, + + {tipb::ScalarFuncSig::ModReal, "modulo"}, + {tipb::ScalarFuncSig::ModDecimal, "modulo"}, + {tipb::ScalarFuncSig::ModInt, "modulo"}, + + {tipb::ScalarFuncSig::MultiplyIntUnsigned, "multiply"}, + + {tipb::ScalarFuncSig::AbsInt, "abs"}, + {tipb::ScalarFuncSig::AbsUInt, "abs"}, + {tipb::ScalarFuncSig::AbsReal, "abs"}, + {tipb::ScalarFuncSig::AbsDecimal, "abs"}, + + {tipb::ScalarFuncSig::CeilIntToDec, "ceil"}, + {tipb::ScalarFuncSig::CeilIntToInt, "ceil"}, + {tipb::ScalarFuncSig::CeilDecToInt, "ceil"}, + {tipb::ScalarFuncSig::CeilDecToDec, "ceil"}, + {tipb::ScalarFuncSig::CeilReal, "ceil"}, + + {tipb::ScalarFuncSig::FloorIntToDec, "floor"}, + {tipb::ScalarFuncSig::FloorIntToInt, "floor"}, + {tipb::ScalarFuncSig::FloorDecToInt, "floor"}, + {tipb::ScalarFuncSig::FloorDecToDec, "floor"}, + {tipb::ScalarFuncSig::FloorReal, "floor"}, + + {tipb::ScalarFuncSig::RoundReal, "round"}, + {tipb::ScalarFuncSig::RoundInt, "round"}, + {tipb::ScalarFuncSig::RoundDec, "round"}, + //{tipb::ScalarFuncSig::RoundWithFracReal, "cast"}, + //{tipb::ScalarFuncSig::RoundWithFracInt, "cast"}, + //{tipb::ScalarFuncSig::RoundWithFracDec, "cast"}, + + {tipb::ScalarFuncSig::Log1Arg, "log"}, + //{tipb::ScalarFuncSig::Log2Args, "cast"}, + {tipb::ScalarFuncSig::Log2, "log2"}, + {tipb::ScalarFuncSig::Log10, "log10"}, + + {tipb::ScalarFuncSig::Rand, "rand"}, + //{tipb::ScalarFuncSig::RandWithSeed, "cast"}, + + {tipb::ScalarFuncSig::Pow, "pow"}, + //{tipb::ScalarFuncSig::Conv, "cast"}, + //{tipb::ScalarFuncSig::CRC32, "cast"}, + //{tipb::ScalarFuncSig::Sign, "cast"}, + + {tipb::ScalarFuncSig::Sqrt, "sqrt"}, + {tipb::ScalarFuncSig::Acos, "acos"}, + {tipb::ScalarFuncSig::Asin, "asin"}, + {tipb::ScalarFuncSig::Atan1Arg, "atan"}, + //{tipb::ScalarFuncSig::Atan2Args, "cast"}, + {tipb::ScalarFuncSig::Cos, "cos"}, + //{tipb::ScalarFuncSig::Cot, "cast"}, + //{tipb::ScalarFuncSig::Degrees, "cast"}, + {tipb::ScalarFuncSig::Exp, "exp"}, + //{tipb::ScalarFuncSig::PI, "cast"}, + //{tipb::ScalarFuncSig::Radians, "cast"}, + {tipb::ScalarFuncSig::Sin, "sin"}, + {tipb::ScalarFuncSig::Tan, "tan"}, + {tipb::ScalarFuncSig::TruncateInt, "trunc"}, + {tipb::ScalarFuncSig::TruncateReal, "trunc"}, + //{tipb::ScalarFuncSig::TruncateDecimal, "cast"}, + + {tipb::ScalarFuncSig::LogicalAnd, "and"}, + {tipb::ScalarFuncSig::LogicalOr, "or"}, + {tipb::ScalarFuncSig::LogicalXor, "xor"}, + {tipb::ScalarFuncSig::UnaryNot, "not"}, + {tipb::ScalarFuncSig::UnaryMinusInt, "negate"}, + {tipb::ScalarFuncSig::UnaryMinusReal, "negate"}, + {tipb::ScalarFuncSig::UnaryMinusDecimal, "negate"}, + {tipb::ScalarFuncSig::DecimalIsNull, "isNull"}, + {tipb::ScalarFuncSig::DurationIsNull, "isNull"}, + {tipb::ScalarFuncSig::RealIsNull, "isNull"}, + {tipb::ScalarFuncSig::StringIsNull, "isNull"}, + {tipb::ScalarFuncSig::TimeIsNull, "isNull"}, + {tipb::ScalarFuncSig::IntIsNull, "isNull"}, + {tipb::ScalarFuncSig::JsonIsNull, "isNull"}, + + //{tipb::ScalarFuncSig::BitAndSig, "cast"}, + //{tipb::ScalarFuncSig::BitOrSig, "cast"}, + //{tipb::ScalarFuncSig::BitXorSig, "cast"}, + //{tipb::ScalarFuncSig::BitNegSig, "cast"}, + //{tipb::ScalarFuncSig::IntIsTrue, "cast"}, + //{tipb::ScalarFuncSig::RealIsTrue, "cast"}, + //{tipb::ScalarFuncSig::DecimalIsTrue, "cast"}, + //{tipb::ScalarFuncSig::IntIsFalse, "cast"}, + //{tipb::ScalarFuncSig::RealIsFalse, "cast"}, + //{tipb::ScalarFuncSig::DecimalIsFalse, "cast"}, + + //{tipb::ScalarFuncSig::LeftShift, "cast"}, + //{tipb::ScalarFuncSig::RightShift, "cast"}, + + //{tipb::ScalarFuncSig::BitCount, "cast"}, + //{tipb::ScalarFuncSig::GetParamString, "cast"}, + //{tipb::ScalarFuncSig::GetVar, "cast"}, + //{tipb::ScalarFuncSig::RowSig, "cast"}, + //{tipb::ScalarFuncSig::SetVar, "cast"}, + //{tipb::ScalarFuncSig::ValuesDecimal, "cast"}, + //{tipb::ScalarFuncSig::ValuesDuration, "cast"}, + //{tipb::ScalarFuncSig::ValuesInt, "cast"}, + //{tipb::ScalarFuncSig::ValuesJSON, "cast"}, + //{tipb::ScalarFuncSig::ValuesReal, "cast"}, + //{tipb::ScalarFuncSig::ValuesString, "cast"}, + //{tipb::ScalarFuncSig::ValuesTime, "cast"}, + + {tipb::ScalarFuncSig::InInt, "in"}, + {tipb::ScalarFuncSig::InReal, "in"}, + {tipb::ScalarFuncSig::InString, "in"}, + {tipb::ScalarFuncSig::InDecimal, "in"}, + {tipb::ScalarFuncSig::InTime, "in"}, + {tipb::ScalarFuncSig::InDuration, "in"}, + {tipb::ScalarFuncSig::InJson, "in"}, + + {tipb::ScalarFuncSig::IfNullInt, "ifNull"}, + {tipb::ScalarFuncSig::IfNullReal, "ifNull"}, + {tipb::ScalarFuncSig::IfNullString, "ifNull"}, + {tipb::ScalarFuncSig::IfNullDecimal, "ifNull"}, + {tipb::ScalarFuncSig::IfNullTime, "ifNull"}, + {tipb::ScalarFuncSig::IfNullDuration, "ifNull"}, + {tipb::ScalarFuncSig::IfNullJson, "ifNull"}, + + {tipb::ScalarFuncSig::IfInt, "if"}, + {tipb::ScalarFuncSig::IfReal, "if"}, + {tipb::ScalarFuncSig::IfString, "if"}, + {tipb::ScalarFuncSig::IfDecimal, "if"}, + {tipb::ScalarFuncSig::IfTime, "if"}, + {tipb::ScalarFuncSig::IfDuration, "if"}, + {tipb::ScalarFuncSig::IfJson, "if"}, + + //todo need further check for caseWithExpression and multiIf + {tipb::ScalarFuncSig::CaseWhenInt, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenReal, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenString, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenDecimal, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenTime, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenDuration, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenJson, "caseWithExpression"}, + + //{tipb::ScalarFuncSig::AesDecrypt, "cast"}, + //{tipb::ScalarFuncSig::AesEncrypt, "cast"}, + //{tipb::ScalarFuncSig::Compress, "cast"}, + //{tipb::ScalarFuncSig::MD5, "cast"}, + //{tipb::ScalarFuncSig::Password, "cast"}, + //{tipb::ScalarFuncSig::RandomBytes, "cast"}, + //{tipb::ScalarFuncSig::SHA1, "cast"}, + //{tipb::ScalarFuncSig::SHA2, "cast"}, + //{tipb::ScalarFuncSig::Uncompress, "cast"}, + //{tipb::ScalarFuncSig::UncompressedLength, "cast"}, + + //{tipb::ScalarFuncSig::Database, "cast"}, + //{tipb::ScalarFuncSig::FoundRows, "cast"}, + //{tipb::ScalarFuncSig::CurrentUser, "cast"}, + //{tipb::ScalarFuncSig::User, "cast"}, + //{tipb::ScalarFuncSig::ConnectionID, "cast"}, + //{tipb::ScalarFuncSig::LastInsertID, "cast"}, + //{tipb::ScalarFuncSig::LastInsertIDWithID, "cast"}, + //{tipb::ScalarFuncSig::Version, "cast"}, + //{tipb::ScalarFuncSig::TiDBVersion, "cast"}, + //{tipb::ScalarFuncSig::RowCount, "cast"}, + + //{tipb::ScalarFuncSig::Sleep, "cast"}, + //{tipb::ScalarFuncSig::Lock, "cast"}, + //{tipb::ScalarFuncSig::ReleaseLock, "cast"}, + //{tipb::ScalarFuncSig::DecimalAnyValue, "cast"}, + //{tipb::ScalarFuncSig::DurationAnyValue, "cast"}, + //{tipb::ScalarFuncSig::IntAnyValue, "cast"}, + //{tipb::ScalarFuncSig::JSONAnyValue, "cast"}, + //{tipb::ScalarFuncSig::RealAnyValue, "cast"}, + //{tipb::ScalarFuncSig::StringAnyValue, "cast"}, + //{tipb::ScalarFuncSig::TimeAnyValue, "cast"}, + //{tipb::ScalarFuncSig::InetAton, "cast"}, + //{tipb::ScalarFuncSig::InetNtoa, "cast"}, + //{tipb::ScalarFuncSig::Inet6Aton, "cast"}, + //{tipb::ScalarFuncSig::Inet6Ntoa, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4Compat, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4Mapped, "cast"}, + //{tipb::ScalarFuncSig::IsIPv6, "cast"}, + //{tipb::ScalarFuncSig::UUID, "cast"}, + + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, +}); +} // namespace DB diff --git a/dbms/src/Interpreters/CoprocessorBuilderUtils.h b/dbms/src/Interpreters/CoprocessorBuilderUtils.h index 908a8638c77..22cf460141b 100644 --- a/dbms/src/Interpreters/CoprocessorBuilderUtils.h +++ b/dbms/src/Interpreters/CoprocessorBuilderUtils.h @@ -2,9 +2,27 @@ #include -namespace DB { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop - extern std::unordered_map aggFunMap; - extern std::unordered_map scalarFunMap; +#include -} +namespace DB +{ + +bool isLiteralExpr(const tipb::Expr & expr); +Field decodeLiteral(const tipb::Expr & expr); +bool isFunctionExpr(const tipb::Expr & expr); +bool isAggFunctionExpr(const tipb::Expr & expr); +const String & getFunctionName(const tipb::Expr & expr); +bool isColumnExpr(const tipb::Expr & expr); +ColumnID getColumnID(const tipb::Expr & expr); +String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns); +const String & getTypeName(const tipb::Expr & expr); +String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col); +extern std::unordered_map aggFunMap; +extern std::unordered_map scalarFunMap; + +} // namespace DB diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp new file mode 100644 index 00000000000..2cc8ce0b9c2 --- /dev/null +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp @@ -0,0 +1,171 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +DAGExpressionAnalyzer::DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_) + : source_columns(source_columns_), context(context_) +{ + settings = context.getSettings(); +} + +bool DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name) +{ + if (sel.conditions_size() == 0) + { + return false; + } + tipb::Expr final_condition; + if (sel.conditions_size() > 1) + { + final_condition.set_tp(tipb::ExprType::ScalarFunc); + final_condition.set_sig(tipb::ScalarFuncSig::LogicalAnd); + + for (auto & condition : sel.conditions()) + { + auto c = final_condition.add_children(); + c->ParseFromString(condition.SerializeAsString()); + } + } + + const tipb::Expr & filter = sel.conditions_size() > 1 ? final_condition : sel.conditions(0); + initChain(chain, source_columns); + filter_column_name = getActions(filter, chain.steps.back().actions); + chain.steps.back().required_output.push_back(filter_column_name); + return true; +} + +bool DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names) +{ + if (topN.order_by_size() == 0) + { + return false; + } + initChain(chain, aggregated_columns); + ExpressionActionsChain::Step & step = chain.steps.back(); + for (const tipb::ByItem & byItem : topN.order_by()) + { + String name = getActions(byItem.expr(), step.actions); + step.required_output.push_back(name); + order_column_names.push_back(name); + } + return true; +} + +const NamesAndTypesList & DAGExpressionAnalyzer::getCurrentInputColumns() { return source_columns; } + +String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions) +{ + String expr_name = getName(expr, getCurrentInputColumns()); + if ((isLiteralExpr(expr) || isFunctionExpr(expr)) && actions->getSampleBlock().has(expr_name)) + { + return expr_name; + } + if (isLiteralExpr(expr)) + { + Field value = decodeLiteral(expr); + DataTypePtr type = expr.has_field_type() ? getDataTypeByFieldType(expr.field_type()) : applyVisitor(FieldToDataType(), value); + + ColumnWithTypeAndName column; + column.column = type->createColumnConst(1, convertFieldToType(value, *type)); + column.name = expr_name; + column.type = type; + + actions->add(ExpressionAction::addColumn(column)); + return column.name; + } + else if (isColumnExpr(expr)) + { + ColumnID columnId = getColumnID(expr); + if (columnId < 1 || columnId > (ColumnID)getCurrentInputColumns().size()) + { + throw Exception("column id out of bound"); + } + //todo check if the column type need to be cast to field type + return expr_name; + } + else if (isFunctionExpr(expr)) + { + if (isAggFunctionExpr(expr)) + { + throw Exception("agg function is not supported yet"); + } + const String & func_name = getFunctionName(expr); + if (func_name == "in" || func_name == "notIn" || func_name == "globalIn" || func_name == "globalNotIn") + { + // todo support in + throw Exception(func_name + " is not supported yet"); + } + + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); + Names argument_names; + DataTypes argument_types; + for (auto & child : expr.children()) + { + String name = getActions(child, actions); + if (actions->getSampleBlock().has(name)) + { + argument_names.push_back(name); + argument_types.push_back(actions->getSampleBlock().getByName(name).type); + } + else + { + throw Exception("Unknown expr: " + child.DebugString()); + } + } + + const ExpressionAction & applyFunction = ExpressionAction::applyFunction(function_builder, argument_names, expr_name); + actions->add(applyFunction); + // add cast if needed + if (expr.has_field_type()) + { + DataTypePtr expected_type = getDataTypeByFieldType(expr.field_type()); + DataTypePtr actual_type = applyFunction.result_type; + //todo maybe use a more decent compare method + if (expected_type->getName() != actual_type->getName()) + { + // need to add cast function + // first construct the second argument + tipb::Expr type_expr; + type_expr.set_tp(tipb::ExprType::String); + std::stringstream ss; + EncodeCompactBytes(expected_type->getName(), ss); + type_expr.set_val(ss.str()); + auto type_field_type = type_expr.field_type(); + type_field_type.set_tp(0xfe); + type_field_type.set_flag(1); + String name = getActions(type_expr, actions); + String cast_name = "cast"; + const FunctionBuilderPtr & cast_func_builder = FunctionFactory::instance().get(cast_name, context); + String cast_expr_name = cast_name + "_" + expr_name + "_" + getName(type_expr, getCurrentInputColumns()); + Names cast_argument_names; + cast_argument_names.push_back(expr_name); + cast_argument_names.push_back(getName(type_expr, getCurrentInputColumns())); + const ExpressionAction & apply_cast_function + = ExpressionAction::applyFunction(cast_func_builder, argument_names, cast_expr_name); + actions->add(apply_cast_function); + return cast_expr_name; + } + else + { + return expr_name; + } + } + else + { + return expr_name; + } + } + else + { + throw Exception("Unsupported expr type: " + getTypeName(expr)); + } +} +} // namespace DB diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.h b/dbms/src/Interpreters/DAGExpressionAnalyzer.h new file mode 100644 index 00000000000..6a63600fb12 --- /dev/null +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/** Transforms an expression from DAG expression into a sequence of actions to execute it. + * + */ +class DAGExpressionAnalyzer : private boost::noncopyable +{ +private: + using ExpressionActionsPtr = std::shared_ptr; + // all columns from table scan + NamesAndTypesList source_columns; + // all columns after aggregation + NamesAndTypesList aggregated_columns; + Settings settings; + const Context & context; + +public: + DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_); + bool appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name); + bool appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names); + void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const + { + if (chain.steps.empty()) + { + chain.settings = settings; + chain.steps.emplace_back(std::make_shared(columns, settings)); + } + } + String getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions); + const NamesAndTypesList & getCurrentInputColumns(); +}; + +} // namespace DB diff --git a/dbms/src/Interpreters/DAGQueryInfo.cpp b/dbms/src/Interpreters/DAGQueryInfo.cpp index 77c6b2daef1..ad03da917d6 100644 --- a/dbms/src/Interpreters/DAGQueryInfo.cpp +++ b/dbms/src/Interpreters/DAGQueryInfo.cpp @@ -1,27 +1,67 @@ -#include #include #include +#include namespace DB { - DAGQueryInfo::DAGQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorContext & coprocessorContext_) - : dag_request(dag_request_), coprocessorContext(coprocessorContext_) {} +const String DAGQueryInfo::TS_NAME("tablescan"); +const String DAGQueryInfo::SEL_NAME("selection"); +const String DAGQueryInfo::AGG_NAME("aggregation"); +const String DAGQueryInfo::TOPN_NAME("topN"); +const String DAGQueryInfo::LIMIT_NAME("limit"); - std::tuple DAGQueryInfo::parse(size_t ) { - query = String("cop query"); - ast = std::make_shared(); - ((ASTSelectQuery*)ast.get())->is_fake_sel = true; - return std::make_tuple(query, ast); +static void assignOrThrowException(Int32 & index, Int32 value, const String & name) +{ + if (index != -1) + { + throw Exception("Duplicated " + name + " in DAG request"); } + index = value; +} - String DAGQueryInfo::get_query_ignore_error(size_t ) { - return query; +DAGQueryInfo::DAGQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorContext & coprocessorContext_) + : dag_request(dag_request_), coprocessorContext(coprocessorContext_) +{ + for (int i = 0; i < dag_request.executors_size(); i++) + { + switch (dag_request.executors(i).tp()) + { + case tipb::ExecType::TypeTableScan: + assignOrThrowException(ts_index, i, TS_NAME); + break; + case tipb::ExecType::TypeSelection: + assignOrThrowException(sel_index, i, SEL_NAME); + break; + case tipb::ExecType::TypeStreamAgg: + case tipb::ExecType::TypeAggregation: + assignOrThrowException(agg_index, i, AGG_NAME); + break; + case tipb::ExecType::TypeTopN: + assignOrThrowException(order_index, i, TOPN_NAME); + case tipb::ExecType::TypeLimit: + assignOrThrowException(limit_index, i, LIMIT_NAME); + break; + default: + throw Exception("Unsupported executor in DAG request: " + dag_request.executors(i).DebugString()); + } } +} - std::unique_ptr DAGQueryInfo::getInterpreter(Context & , QueryProcessingStage::Enum ) { - return std::make_unique(coprocessorContext, dag_request); - } +std::tuple DAGQueryInfo::parse(size_t) +{ + query = String("cop query"); + ast = std::make_shared(); + ((ASTSelectQuery *)ast.get())->is_fake_sel = true; + return std::make_tuple(query, ast); +} + +String DAGQueryInfo::get_query_ignore_error(size_t) { return query; } + +std::unique_ptr DAGQueryInfo::getInterpreter(Context &, QueryProcessingStage::Enum) +{ + return std::make_unique(coprocessorContext, *this); } +} // namespace DB diff --git a/dbms/src/Interpreters/DAGQueryInfo.h b/dbms/src/Interpreters/DAGQueryInfo.h index 826a07cfc33..aa2baa833c9 100644 --- a/dbms/src/Interpreters/DAGQueryInfo.h +++ b/dbms/src/Interpreters/DAGQueryInfo.h @@ -1,10 +1,14 @@ #pragma once -#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" #include -#include -#include +#pragma GCC diagnostic pop + #include +#include +#include +#include namespace DB @@ -15,18 +19,65 @@ namespace DB class DAGQueryInfo : public IQueryInfo { public: + static const String TS_NAME; + static const String SEL_NAME; + static const String AGG_NAME; + static const String TOPN_NAME; + static const String LIMIT_NAME; DAGQueryInfo(const tipb::DAGRequest & dag_request, CoprocessorContext & coprocessorContext_); - bool isInternalQuery() { return false;}; + bool isInternalQuery() { return false; }; virtual std::tuple parse(size_t max_query_size); virtual String get_query_ignore_error(size_t max_query_size); virtual std::unique_ptr getInterpreter(Context & context, QueryProcessingStage::Enum stage); + void assertValid(Int32 index, const String & name) + { + if (index < 0 || index > dag_request.executors_size()) + { + throw Exception("Access invalid executor: " + name); + } + } + bool has_selection() { return sel_index != -1; }; + bool has_aggregation() { return agg_index != -1; }; + bool has_topN() { return order_index != -1; }; + bool has_limit() { return order_index == -1 && limit_index != -1; }; + const tipb::TableScan & get_ts() + { + assertValid(ts_index, TS_NAME); + return dag_request.executors(ts_index).tbl_scan(); + }; + const tipb::Selection & get_sel() + { + assertValid(sel_index, SEL_NAME); + return dag_request.executors(sel_index).selection(); + }; + const tipb::Aggregation & get_agg() + { + assertValid(agg_index, AGG_NAME); + return dag_request.executors(agg_index).aggregation(); + }; + const tipb::TopN & get_topN() + { + assertValid(order_index, TOPN_NAME); + return dag_request.executors(order_index).topn(); + }; + const tipb::Limit & get_limit() + { + assertValid(limit_index, LIMIT_NAME); + return dag_request.executors(limit_index).limit(); + }; + const tipb::DAGRequest & get_dag_request() { return dag_request; }; private: const tipb::DAGRequest & dag_request; CoprocessorContext & coprocessorContext; String query; ASTPtr ast; + Int32 ts_index = -1; + Int32 sel_index = -1; + Int32 agg_index = -1; + Int32 order_index = -1; + Int32 limit_index = -1; }; -} +} // namespace DB diff --git a/dbms/src/Interpreters/DAGStringConverter.cpp b/dbms/src/Interpreters/DAGStringConverter.cpp index f06f92704e4..aa49500d274 100644 --- a/dbms/src/Interpreters/DAGStringConverter.cpp +++ b/dbms/src/Interpreters/DAGStringConverter.cpp @@ -1,209 +1,159 @@ +#include #include +#include #include -#include #include -#include -#include -#include -#include #include #include +#include +#include +#include -namespace DB { +namespace DB +{ - bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) { - TableID id; - if(ts.has_table_id()) { - id = ts.table_id(); - } else { - // do not have table id - return false; - } - auto & tmt_ctx = context.ch_context.getTMTContext(); - auto storage = tmt_ctx.getStorages().get(id); - if(storage == nullptr) { - tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); - storage = tmt_ctx.getStorages().get(id); - } - if(storage == nullptr) { - return false; - } - const auto * merge_tree = dynamic_cast(storage.get()); - if (!merge_tree) { - return false; - } +bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) +{ + TableID id; + if (ts.has_table_id()) + { + id = ts.table_id(); + } + else + { + // do not have table id + return false; + } + auto & tmt_ctx = context.ch_context.getTMTContext(); + auto storage = tmt_ctx.getStorages().get(id); + if (storage == nullptr) + { + tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); + storage = tmt_ctx.getStorages().get(id); + } + if (storage == nullptr) + { + return false; + } + const auto * merge_tree = dynamic_cast(storage.get()); + if (!merge_tree) + { + return false; + } - for(const tipb::ColumnInfo &ci : ts.columns()) { - ColumnID cid = ci.column_id(); - String name = merge_tree->getTableInfo().columns[cid-1].name; - column_name_from_ts.emplace(std::make_pair(cid, name)); - } - if(column_name_from_ts.empty()) { - // no column selected, must be something wrong - return false; + if (ts.columns_size() == 0) + { + // no column selected, must be something wrong + return false; + } + columns_from_ts = storage->getColumns().getAllPhysical(); + for (const tipb::ColumnInfo & ci : ts.columns()) + { + ColumnID cid = ci.column_id(); + if (cid <= 0 || cid > (ColumnID)columns_from_ts.size()) + { + throw Exception("column id out of bound"); } - ss << "FROM " << merge_tree->getTableInfo().db_name << "." << merge_tree->getTableInfo().name << " "; - return true; + String name = merge_tree->getTableInfo().columns[cid - 1].name; + output_from_ts.push_back(std::move(name)); } + ss << "FROM " << merge_tree->getTableInfo().db_name << "." << merge_tree->getTableInfo().name << " "; + return true; +} - String DAGStringConverter::exprToString(const tipb::Expr & expr, bool &succ) { - std::stringstream ss; - succ = true; - size_t cursor = 1; - Int64 columnId = 0; - String func_name; - Field f; - switch (expr.tp()) { - case tipb::ExprType::Null: - return "NULL"; - case tipb::ExprType::Int64: - return std::to_string(DecodeInt(cursor, expr.val())); - case tipb::ExprType::Uint64: - return std::to_string(DecodeInt(cursor, expr.val())); - case tipb::ExprType::Float32: - case tipb::ExprType::Float64: - return std::to_string(DecodeFloat64(cursor, expr.val())); - case tipb::ExprType::String: - // - return expr.val(); - case tipb::ExprType::Bytes: - return DecodeBytes(cursor, expr.val()); - case tipb::ExprType::ColumnRef: - columnId = DecodeInt(cursor, expr.val()); - if(getCurrentColumnNames().count(columnId) == 0) { - succ = false; - return ""; - } - return getCurrentColumnNames().find(columnId)->second; - case tipb::ExprType::Count: - case tipb::ExprType::Sum: - case tipb::ExprType::Avg: - case tipb::ExprType::Min: - case tipb::ExprType::Max: - case tipb::ExprType::First: - if(!aggFunMap.count(expr.tp())) { - succ = false; - return ""; - } - func_name = aggFunMap.find(expr.tp())->second; - break; - case tipb::ExprType::ScalarFunc: - if(!scalarFunMap.count(expr.sig())) { - succ = false; - return ""; - } - func_name = scalarFunMap.find(expr.sig())->second; - break; - default: - succ = false; - return ""; +bool DAGStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) +{ + bool first = true; + for (const tipb::Expr & expr : sel.conditions()) + { + auto s = exprToString(expr, getCurrentColumns()); + if (first) + { + ss << "WHERE "; + first = false; } - // build function expr - if(func_name == "in") { - // for in, we could not represent the function expr using func_name(param1, param2, ...) - succ = false; - return ""; - } else { - ss << func_name << "("; - bool first = true; - bool sub_succ = true; - for(const tipb::Expr &child : expr.children()) { - String s = exprToString(child, sub_succ); - if(!sub_succ) { - succ = false; - return ""; - } - if(first) { - first = false; - } else { - ss << ", "; - } - ss << s; - } - ss << ") "; - return ss.str(); + else + { + ss << "AND "; } + ss << s << " "; } + return true; +} - bool DAGStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) { - bool first = true; - for(const tipb::Expr & expr : sel.conditions()) { - bool succ = true; - auto s = exprToString(expr, succ); - if(!succ) { - return false; - } - if(first) { - ss << "WHERE "; - first = false; - } else { - ss << "AND "; - } - ss << s << " "; - } - return true; - } +bool DAGStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) +{ + ss << "LIMIT " << limit.limit() << " "; + return true; +} - bool DAGStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) { - ss << "LIMIT " << limit.limit() << " "; - return true; +//todo return the error message +bool DAGStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) +{ + switch (executor.tp()) + { + case tipb::ExecType::TypeTableScan: + return buildTSString(executor.tbl_scan(), ss); + case tipb::ExecType::TypeIndexScan: + // index scan not supported + return false; + case tipb::ExecType::TypeSelection: + return buildSelString(executor.selection(), ss); + case tipb::ExecType::TypeAggregation: + // stream agg is not supported, treated as normal agg + case tipb::ExecType::TypeStreamAgg: + //todo support agg + return false; + case tipb::ExecType::TypeTopN: + // todo support top n + return false; + case tipb::ExecType::TypeLimit: + return buildLimitString(executor.limit(), ss); } +} - //todo return the error message - bool DAGStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) { - switch (executor.tp()) { - case tipb::ExecType::TypeTableScan: - return buildTSString(executor.tbl_scan(), ss); - case tipb::ExecType::TypeIndexScan: - // index scan not supported - return false; - case tipb::ExecType::TypeSelection: - return buildSelString(executor.selection(), ss); - case tipb::ExecType::TypeAggregation: - // stream agg is not supported, treated as normal agg - case tipb::ExecType::TypeStreamAgg: - //todo support agg - return false; - case tipb::ExecType::TypeTopN: - // todo support top n - return false; - case tipb::ExecType::TypeLimit: - return buildLimitString(executor.limit(), ss); - } - } +bool isProject(const tipb::Executor &) +{ + // currently, project is not pushed so always return false + return false; +} +DAGStringConverter::DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) + : context(context_), dag_request(dag_request_) +{ + afterAgg = false; +} - bool isProject(const tipb::Executor &) { - // currently, project is not pushed so always return false - return false; - } - DAGStringConverter::DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) - : context(context_), dag_request(dag_request_) { - afterAgg = false; +String DAGStringConverter::buildSqlString() +{ + std::stringstream query_buf; + std::stringstream project; + for (const tipb::Executor & executor : dag_request.executors()) + { + if (!buildString(executor, query_buf)) + { + return ""; + } } - - String DAGStringConverter::buildSqlString() { - std::stringstream query_buf; - std::stringstream project; - for(const tipb::Executor & executor : dag_request.executors()) { - if(!buildString(executor, query_buf)) { - return ""; + if (!isProject(dag_request.executors(dag_request.executors_size() - 1))) + { + //append final project + project << "SELECT "; + bool first = true; + for (UInt32 index : dag_request.output_offsets()) + { + if (first) + { + first = false; } - } - if(!isProject(dag_request.executors(dag_request.executors_size()-1))) { - //append final project - project << "SELECT "; - bool first = true; - for(UInt32 index : dag_request.output_offsets()) { - if(first) { - first = false; - } else { - project << ", "; - } - project << getCurrentColumnNames()[index+1]; + else + { + project << ", "; } - project << " "; + project << getCurrentOutputColumns()[index]; } - return project.str() + query_buf.str(); + project << " "; } - + return project.str() + query_buf.str(); } + +} // namespace DB diff --git a/dbms/src/Interpreters/DAGStringConverter.h b/dbms/src/Interpreters/DAGStringConverter.h index cae42a54f19..2fa200e0f8e 100644 --- a/dbms/src/Interpreters/DAGStringConverter.h +++ b/dbms/src/Interpreters/DAGStringConverter.h @@ -2,40 +2,56 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include #include +#include #pragma GCC diagnostic pop -#include #include +#include -namespace DB { +namespace DB +{ -class DAGStringConverter { +class DAGStringConverter +{ public: DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_); ~DAGStringConverter() = default; String buildSqlString(); + private: bool buildTSString(const tipb::TableScan & ts, std::stringstream & ss); - String exprToString(const tipb::Expr & expr, bool &succ); bool buildSelString(const tipb::Selection & sel, std::stringstream & ss); bool buildLimitString(const tipb::Limit & limit, std::stringstream & ss); bool buildString(const tipb::Executor & executor, std::stringstream & ss); CoprocessorContext & context; tipb::DAGRequest & dag_request; - std::unordered_map column_name_from_ts; - std::unordered_map column_name_from_agg; + // used by columnRef, which starts with 1, and refs column index in the original ts/agg output + NamesAndTypesList columns_from_ts; + NamesAndTypesList columns_from_agg; + // used by output_offset, which starts with 0, and refs the index in the selected output of ts/agg operater + Names output_from_ts; + Names output_from_agg; bool afterAgg; - std::unordered_map & getCurrentColumnNames() { - if(afterAgg) { - return column_name_from_agg; + const NamesAndTypesList & getCurrentColumns() + { + if (afterAgg) + { + return columns_from_agg; } - return column_name_from_ts; + return columns_from_ts; } + const Names & getCurrentOutputColumns() + { + if (afterAgg) + { + return output_from_agg; + } + return output_from_ts; + } }; -} +} // namespace DB diff --git a/dbms/src/Interpreters/InterpreterDAGRequest.cpp b/dbms/src/Interpreters/InterpreterDAGRequest.cpp index 8a8e6fe4698..483ef96fa2e 100644 --- a/dbms/src/Interpreters/InterpreterDAGRequest.cpp +++ b/dbms/src/Interpreters/InterpreterDAGRequest.cpp @@ -1,219 +1,349 @@ #include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include +#include +#include +#include +#include #include +#include #include -#include -#include -#include -#include -#include -#include +#include -namespace DB { +namespace DB +{ + +namespace ErrorCodes +{ +extern const int TOO_MANY_COLUMNS; +} - namespace ErrorCodes +InterpreterDAGRequest::InterpreterDAGRequest(CoprocessorContext & context_, DAGQueryInfo & dag_query_info_) + : context(context_), dag_query_info(dag_query_info_) +{} + +// the flow is the same as executeFetchcolumns +bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) +{ + if (!ts.has_table_id()) { - extern const int TOO_MANY_COLUMNS; + // do not have table id + return false; } - - static void assignOrThrowException(Int32 & index, Int32 value, String name) { - if(index != -1) { - throw Exception("Duplicated " + name + " in DAG request"); - } - index = value; - } - - InterpreterDAGRequest::InterpreterDAGRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request_) - : context(context_), dag_request(dag_request_) { - for(int i = 0; i < dag_request.executors_size(); i++) { - switch (dag_request.executors(i).tp()) { - case tipb::ExecType::TypeTableScan: - assignOrThrowException(ts_index, i, "TableScan"); - break; - case tipb::ExecType::TypeSelection: - assignOrThrowException(sel_index, i, "Selection"); - break; - case tipb::ExecType::TypeStreamAgg: - case tipb::ExecType::TypeAggregation: - assignOrThrowException(agg_index, i, "Aggregation"); - break; - case tipb::ExecType::TypeTopN: - assignOrThrowException(order_index, i, "Order"); - case tipb::ExecType::TypeLimit: - assignOrThrowException(limit_index, i, "Limit"); - break; - default: - throw Exception("Unsupported executor in DAG request: " + dag_request.executors(i).DebugString()); - } - } + TableID id = ts.table_id(); + auto & tmt_ctx = context.ch_context.getTMTContext(); + auto storage = tmt_ctx.getStorages().get(id); + if (storage == nullptr) + { + tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); + storage = tmt_ctx.getStorages().get(id); } - - bool InterpreterDAGRequest::buildSelPlan(const tipb::Selection & , Pipeline & ) { + if (storage == nullptr) + { + return false; + } + auto table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); + const auto * merge_tree = dynamic_cast(storage.get()); + if (!merge_tree) + { return false; } - // the flow is the same as executeFetchcolumns - bool InterpreterDAGRequest::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { - if(!ts.has_table_id()) { - // do not have table id - return false; - } - TableID id = ts.table_id(); - auto & tmt_ctx = context.ch_context.getTMTContext(); - auto storage = tmt_ctx.getStorages().get(id); - if(storage == nullptr) { - tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); - storage = tmt_ctx.getStorages().get(id); - } - if(storage == nullptr) { - return false; - } - auto table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); - const auto * merge_tree = dynamic_cast(storage.get()); - if(!merge_tree) { + Names required_columns; + for (const tipb::ColumnInfo & ci : ts.columns()) + { + ColumnID cid = ci.column_id(); + if (cid < 1 || cid > (Int64)merge_tree->getTableInfo().columns.size()) + { + // cid out of bound return false; } + String name = merge_tree->getTableInfo().columns[cid - 1].name; + //todo handle output_offset + required_columns.push_back(name); + } + if (required_columns.empty()) + { + // no column selected, must be something wrong + return false; + } - Names required_columns; - for(const tipb::ColumnInfo & ci : ts.columns()) { - ColumnID cid = ci.column_id(); - if(cid < 1 || cid > (Int64)merge_tree->getTableInfo().columns.size()) { - // cid out of bound + if (!dag_query_info.has_aggregation()) + { + // if the dag request does not contain agg, then the final output is + // based on the output of table scan + for (auto i : dag_query_info.get_dag_request().output_offsets()) + { + if (i < 0 || i >= required_columns.size()) + { + // array index out of bound return false; } - String name = merge_tree->getTableInfo().columns[cid - 1].name; - //todo handle output_offset - required_columns.push_back(name); - } - if(required_columns.empty()) { - // no column selected, must be something wrong - return false; + // do not have alias + final_project.emplace_back(required_columns[i], ""); } + } + // todo handle alias column + const Settings & settings = context.ch_context.getSettingsRef(); - if(agg_index == -1) { - // if the dag request does not contain agg, then the final output is - // based on the output of table scan - for (auto i : dag_request.output_offsets()) { - if (i < 0 || i >= required_columns.size()) { - // array index out of bound - return false; - } - // do not have alias - final_project.emplace_back(required_columns[i], ""); - } - } - // todo handle alias column - const Settings & settings = context.ch_context.getSettingsRef(); - - if(settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) { - throw Exception("Limit for number of columns to read exceeded. " - "Requested: " + toString(required_columns.size()) - + ", maximum: " + settings.max_columns_to_read.toString(), - ErrorCodes::TOO_MANY_COLUMNS); - } + if (settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) + { + throw Exception("Limit for number of columns to read exceeded. " + "Requested: " + + toString(required_columns.size()) + ", maximum: " + settings.max_columns_to_read.toString(), + ErrorCodes::TOO_MANY_COLUMNS); + } - size_t max_block_size = settings.max_block_size; - size_t max_streams = settings.max_threads; - QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; - if(max_streams > 1) { - max_streams *= settings.max_streams_to_max_threads_ratio; - } + size_t max_block_size = settings.max_block_size; + max_streams = settings.max_threads; + QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; + if (max_streams > 1) + { + max_streams *= settings.max_streams_to_max_threads_ratio; + } - //todo support index in - SelectQueryInfo query_info; - query_info.query = std::make_unique(); - ((ASTSelectQuery*)query_info.query.get())->is_fake_sel = true; - query_info.mvcc_query_info = std::make_unique(); - query_info.mvcc_query_info->resolve_locks = true; - query_info.mvcc_query_info->read_tso = settings.read_tso; - RegionQueryInfo info; - info.region_id = context.kv_context.region_id(); - info.conf_version = context.kv_context.region_epoch().conf_ver(); - info.version = context.kv_context.region_epoch().version(); - auto current_region = context.ch_context.getTMTContext().getRegionTable().getRegionById(id, info.region_id); - if(!current_region) { - return false; - } - info.range_in_table = current_region->getHandleRangeByTable(id); - query_info.mvcc_query_info->regions_query_info.push_back(info); - query_info.mvcc_query_info->concurrent = 0.0; - pipeline.streams = storage->read(required_columns, query_info, context.ch_context, from_stage, max_block_size, max_streams); - /// Set the limits and quota for reading data, the speed and time of the query. - { - IProfilingBlockInputStream::LocalLimits limits; - limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; - limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); - limits.max_execution_time = settings.max_execution_time; - limits.timeout_overflow_mode = settings.timeout_overflow_mode; + //todo support index in + SelectQueryInfo query_info; + query_info.query = std::make_unique(); + ((ASTSelectQuery *)query_info.query.get())->is_fake_sel = true; + query_info.mvcc_query_info = std::make_unique(); + query_info.mvcc_query_info->resolve_locks = true; + query_info.mvcc_query_info->read_tso = settings.read_tso; + RegionQueryInfo info; + info.region_id = context.kv_context.region_id(); + info.conf_version = context.kv_context.region_epoch().conf_ver(); + info.version = context.kv_context.region_epoch().version(); + auto current_region = context.ch_context.getTMTContext().getRegionTable().getRegionById(id, info.region_id); + if (!current_region) + { + return false; + } + info.range_in_table = current_region->getHandleRangeByTable(id); + query_info.mvcc_query_info->regions_query_info.push_back(info); + query_info.mvcc_query_info->concurrent = 0.0; + pipeline.streams = storage->read(required_columns, query_info, context.ch_context, from_stage, max_block_size, max_streams); + /// Set the limits and quota for reading data, the speed and time of the query. + { + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); + limits.max_execution_time = settings.max_execution_time; + limits.timeout_overflow_mode = settings.timeout_overflow_mode; - /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, + /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, * because the initiating server has a summary of the execution of the request on all servers. * * But limits on data size to read and maximum execution time are reasonable to check both on initiator and * additionally on each remote server, because these limits are checked per block of data processed, * and remote servers may process way more blocks of data than are received by initiator. */ - limits.min_execution_speed = settings.min_execution_speed; - limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; - - QuotaForIntervals & quota = context.ch_context.getQuota(); - - pipeline.transform([&](auto & stream) - { - if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) - { - p_stream->setLimits(limits); - p_stream->setQuota(quota); - } - }); - } - return true; - } + limits.min_execution_speed = settings.min_execution_speed; + limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; - //todo return the error message - bool InterpreterDAGRequest::buildPlan(Pipeline & pipeline) { - // step 1. build table scan - if(!buildTSPlan(dag_request.executors(ts_index).tbl_scan(), pipeline)) { - return false; - } - // step 2. build selection if needed - if(sel_index != -1) { - if(buildSelPlan(dag_request.executors(sel_index).selection(), pipeline)) { - return false; + QuotaForIntervals & quota = context.ch_context.getQuota(); + + pipeline.transform([&](auto & stream) { + if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + { + p_stream->setLimits(limits); + p_stream->setQuota(quota); } + }); + } + ColumnsWithTypeAndName columnsWithTypeAndName = pipeline.firstStream()->getHeader().getColumnsWithTypeAndName(); + source_columns = storage->getColumns().getAllPhysical(); + return true; +} + +InterpreterDAGRequest::AnalysisResult InterpreterDAGRequest::analyzeExpressions() +{ + AnalysisResult res; + ExpressionActionsChain chain; + res.need_aggregate = dag_query_info.has_aggregation(); + DAGExpressionAnalyzer expressionAnalyzer(source_columns, context.ch_context); + if (dag_query_info.has_selection()) + { + if (expressionAnalyzer.appendWhere(chain, dag_query_info.get_sel(), res.filter_column_name)) + { + res.has_where = true; + res.before_where = chain.getLastActions(); + res.filter_column_name = chain.steps.back().required_output[0]; + chain.addStep(); } - // step 3. build agg if needed - if(agg_index != -1) { - return false; - } - // step 3. build order by if needed - if(order_index != -1) { - return false; - } - // step 3. build limit if needed - if(limit_index != -1) { - return false; - } - return true; - } - - BlockIO InterpreterDAGRequest::execute() { - Pipeline pipeline; - buildPlan(pipeline); - // add final project - auto stream_before_project = pipeline.firstStream(); - auto columns = stream_before_project->getHeader(); - NamesAndTypesList input_column; - for(auto column : columns.getColumnsWithTypeAndName()) { - input_column.emplace_back(column.name, column.type); - } - ExpressionActionsPtr project = std::make_shared(input_column, context.ch_context.getSettingsRef()); - project->add(ExpressionAction::project(final_project)); - auto final_stream = std::make_shared(stream_before_project, project); - BlockIO res; - res.in = final_stream; - return res; } + if (res.need_aggregate) + { + throw Exception("agg not supported"); + } + if (dag_query_info.has_topN()) + { + res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag_query_info.get_topN(), res.order_column_names); + } + // append final project results + for (auto & name : final_project) + { + chain.steps.back().required_output.push_back(name.first); + } + res.before_order_and_select = chain.getLastActions(); + chain.finalize(); + chain.clear(); + //todo need call prependProjectInput?? + return res; +} + +void InterpreterDAGRequest::executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column) +{ + pipeline.transform( + [&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr, filter_column); }); +} + +void InterpreterDAGRequest::executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr) +{ + if (expressionActionsPtr->getActions().size() > 0) + { + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr); }); + } +} + +SortDescription InterpreterDAGRequest::getSortDescription(Strings & order_column_names) +{ + // construct SortDescription + SortDescription order_descr; + const tipb::TopN & topN = dag_query_info.get_topN(); + order_descr.reserve(topN.order_by_size()); + for (int i = 0; i < topN.order_by_size(); i++) + { + String name = order_column_names[i]; + int direction = topN.order_by(i).desc() ? -1 : 1; + // todo get this information from DAGRequest + // currently use NULLS LAST + int nulls_direction = direction; + // todo get this information from DAGRequest + // currently use the defalut value + std::shared_ptr collator; + + order_descr.emplace_back(name, direction, nulls_direction, collator); + } + return order_descr; +} + +void InterpreterDAGRequest::executeUnion(Pipeline & pipeline) +{ + if (pipeline.hasMoreThanOneStream()) + { + pipeline.firstStream() = std::make_shared>(pipeline.streams, nullptr, max_streams); + pipeline.streams.resize(1); + } +} + +void InterpreterDAGRequest::executeOrder(Pipeline & pipeline, Strings & order_column_names) +{ + SortDescription order_descr = getSortDescription(order_column_names); + const Settings & settings = context.ch_context.getSettingsRef(); + Int64 limit = dag_query_info.get_topN().limit(); + + pipeline.transform([&](auto & stream) { + auto sorting_stream = std::make_shared(stream, order_descr, limit); + + /// Limits on sorting + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode); + sorting_stream->setLimits(limits); + + stream = sorting_stream; + }); + + /// If there are several streams, we merge them into one + executeUnion(pipeline); + + /// Merge the sorted blocks. + pipeline.firstStream() = std::make_shared(pipeline.firstStream(), order_descr, settings.max_block_size, + limit, settings.max_bytes_before_external_sort, context.ch_context.getTemporaryPath()); +} + +//todo return the error message +bool InterpreterDAGRequest::executeImpl(Pipeline & pipeline) +{ + if (!executeTS(dag_query_info.get_ts(), pipeline)) + { + return false; + } + + auto res = analyzeExpressions(); + // execute selection + if (res.has_where) + { + executeWhere(pipeline, res.before_where, res.filter_column_name); + } + if (res.need_aggregate) + { + // execute aggregation + throw Exception("agg not supported"); + } + else + { + executeExpression(pipeline, res.before_order_and_select); + } + + if (res.has_order_by) + { + // execute topN + executeOrder(pipeline, res.order_column_names); + } + + // execute projection + executeFinalProject(pipeline); + + // execute limit + if (dag_query_info.has_limit() && !dag_query_info.has_topN()) + { + executeLimit(pipeline); + } + return true; +} + +void InterpreterDAGRequest::executeFinalProject(Pipeline & pipeline) +{ + auto columns = pipeline.firstStream()->getHeader(); + NamesAndTypesList input_column; + for (auto column : columns.getColumnsWithTypeAndName()) + { + input_column.emplace_back(column.name, column.type); + } + ExpressionActionsPtr project = std::make_shared(input_column, context.ch_context.getSettingsRef()); + project->add(ExpressionAction::project(final_project)); + // add final project + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, project); }); +} + +void InterpreterDAGRequest::executeLimit(Pipeline & pipeline) +{ + pipeline.transform( + [&](auto & stream) { stream = std::make_shared(stream, dag_query_info.get_limit().limit(), 0, false); }); + if (pipeline.hasMoreThanOneStream()) + { + executeUnion(pipeline); + pipeline.transform( + [&](auto & stream) { stream = std::make_shared(stream, dag_query_info.get_limit().limit(), 0, false); }); + } +} + +BlockIO InterpreterDAGRequest::execute() +{ + Pipeline pipeline; + executeImpl(pipeline); + executeUnion(pipeline); + + BlockIO res; + res.in = pipeline.firstStream(); + return res; } +} // namespace DB diff --git a/dbms/src/Interpreters/InterpreterDAGRequest.h b/dbms/src/Interpreters/InterpreterDAGRequest.h index 13a542b597a..7cfe18c9374 100644 --- a/dbms/src/Interpreters/InterpreterDAGRequest.h +++ b/dbms/src/Interpreters/InterpreterDAGRequest.h @@ -2,23 +2,26 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include #include +#include #pragma GCC diagnostic pop -#include #include +#include #include +#include #include #include -namespace DB { +namespace DB +{ /** build ch plan from dag request: dag executors -> ch plan */ -class InterpreterDAGRequest : public IInterpreter { +class InterpreterDAGRequest : public IInterpreter +{ public: - InterpreterDAGRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request); + InterpreterDAGRequest(CoprocessorContext & context_, DAGQueryInfo & dag_query_info); ~InterpreterDAGRequest() = default; @@ -26,13 +29,11 @@ class InterpreterDAGRequest : public IInterpreter { private: CoprocessorContext & context; - const tipb::DAGRequest & dag_request; NamesWithAliases final_project; - Int32 ts_index = -1; - Int32 sel_index = -1; - Int32 agg_index = -1; - Int32 order_index = -1; - Int32 limit_index = -1; + DAGQueryInfo & dag_query_info; + NamesAndTypesList source_columns; + size_t max_streams = 1; + struct Pipeline { BlockInputStreams streams; @@ -46,15 +47,35 @@ class InterpreterDAGRequest : public IInterpreter { transform(stream); } - bool hasMoreThanOneStream() const - { - return streams.size() > 1; - } + bool hasMoreThanOneStream() const { return streams.size() > 1; } }; - bool buildPlan(Pipeline & streams); - bool buildTSPlan(const tipb::TableScan & ts, Pipeline & streams); - bool buildSelPlan(const tipb::Selection & sel, Pipeline & streams); + struct AnalysisResult + { + bool has_where = false; + bool need_aggregate = false; + bool has_order_by = false; + + ExpressionActionsPtr before_where; + ExpressionActionsPtr before_aggregation; + ExpressionActionsPtr before_order_and_select; + ExpressionActionsPtr final_projection; + + String filter_column_name; + Strings order_column_names; + /// Columns from the SELECT list, before renaming them to aliases. + Names selected_columns; + }; + bool executeImpl(Pipeline & pipeline); + bool executeTS(const tipb::TableScan & ts, Pipeline & pipeline); + void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column); + void executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr); + void executeOrder(Pipeline & pipeline, Strings & order_column_names); + void executeUnion(Pipeline & pipeline); + void executeLimit(Pipeline & pipeline); + void executeFinalProject(Pipeline & pipeline); + SortDescription getSortDescription(Strings & order_column_names); + AnalysisResult analyzeExpressions(); }; -} +} // namespace DB diff --git a/dbms/src/Server/cop_test.cpp b/dbms/src/Server/cop_test.cpp index 13559193ad0..d039d90465d 100644 --- a/dbms/src/Server/cop_test.cpp +++ b/dbms/src/Server/cop_test.cpp @@ -1,50 +1,57 @@ +#include #include + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include +#include #include +#include #include -#include #pragma GCC diagnostic pop -#include + #include using ChannelPtr = std::shared_ptr; using SubPtr = std::shared_ptr; static const int DAGREQUEST = 103; -class FlashClient { +class FlashClient +{ private: SubPtr sp; + public: - FlashClient(ChannelPtr cp) : sp(tikvpb::Tikv::NewStub(cp)){ - } - grpc::Status coprocessor(coprocessor::Request* rqst) { + FlashClient(ChannelPtr cp) : sp(tikvpb::Tikv::NewStub(cp)) {} + grpc::Status coprocessor(coprocessor::Request * rqst) + { grpc::ClientContext clientContext; - clientContext.AddMetadata("user_name",""); - clientContext.AddMetadata("builder_version","v2"); + clientContext.AddMetadata("user_name", ""); + clientContext.AddMetadata("builder_version", "v2"); coprocessor::Response response; grpc::Status status = sp->Coprocessor(&clientContext, *rqst, &response); size_t column_num = 3; - if(status.ok()) { + if (status.ok()) + { // if status is ok, try to decode the result tipb::SelectResponse selectResponse; - if(selectResponse.ParseFromString(response.data())) { - for(tipb::Chunk chunk : selectResponse.chunks()) { + if (selectResponse.ParseFromString(response.data())) + { + for (tipb::Chunk chunk : selectResponse.chunks()) + { size_t cursor = 0; std::vector row_result; - const std::string &data = chunk.rows_data(); - while (cursor < data.size()) { + const std::string & data = chunk.rows_data(); + while (cursor < data.size()) + { row_result.push_back(DB::DecodeDatum(cursor, data)); - if(row_result.size() == column_num) { + if (row_result.size() == column_num) + { //print the result - std::cout << row_result[0].get() - << " "<< row_result[1].get() - << " "<< row_result[2].get() << std::endl; + std::cout << row_result[0].get() << " " << row_result[1].get() << " " + << row_result[2].get() << std::endl; row_result.clear(); } } - } } } @@ -53,15 +60,16 @@ class FlashClient { }; using ClientPtr = std::shared_ptr; -grpc::Status rpcTest() { - ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); +grpc::Status rpcTest() +{ + ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); ClientPtr clientPtr = std::make_shared(cp); // construct a dag request tipb::DAGRequest dagRequest; dagRequest.set_start_ts(18446744073709551615uL); - tipb::Executor *executor = dagRequest.add_executors(); + tipb::Executor * executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeTableScan); - tipb::TableScan *ts = executor->mutable_tbl_scan(); + tipb::TableScan * ts = executor->mutable_tbl_scan(); ts->set_table_id(41); tipb::ColumnInfo * ci = ts->add_columns(); ci->set_column_id(1); @@ -70,30 +78,48 @@ grpc::Status rpcTest() { dagRequest.add_output_offsets(1); dagRequest.add_output_offsets(0); dagRequest.add_output_offsets(1); - /* executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeSelection); - tipb::Selection *selection = executor->mutable_selection(); - tipb::Expr *expr = selection->add_conditions(); + tipb::Selection * selection = executor->mutable_selection(); + tipb::Expr * expr = selection->add_conditions(); expr->set_tp(tipb::ExprType::ScalarFunc); expr->set_sig(tipb::ScalarFuncSig::LTInt); - tipb::Expr *col = expr->add_children(); - tipb::Expr *value = expr->add_children(); + tipb::Expr * col = expr->add_children(); + tipb::Expr * value = expr->add_children(); col->set_tp(tipb::ExprType::ColumnRef); std::stringstream ss; DB::EncodeNumber(2, ss); col->set_val(ss.str()); value->set_tp(tipb::ExprType::Int64); ss.str(""); - DB::EncodeNumber(289,ss); + DB::EncodeNumber(123, ss); value->set_val(std::string(ss.str())); - */ + + // topn + executor = dagRequest.add_executors(); + executor->set_tp(tipb::ExecType::TypeTopN); + tipb::TopN * topN = executor->mutable_topn(); + topN->set_limit(3); + tipb::ByItem * byItem = topN->add_order_by(); + byItem->set_desc(true); + tipb::Expr * expr1 = byItem->mutable_expr(); + expr1->set_tp(tipb::ExprType::ColumnRef); + ss.str(""); + DB::EncodeNumber(2, ss); + expr1->set_val(ss.str()); + // limit + /* + executor = dagRequest.add_executors(); + executor->set_tp(tipb::ExecType::TypeLimit); + tipb::Limit *limit = executor->mutable_limit(); + limit->set_limit(1); + */ // construct a coprocessor request coprocessor::Request request; //todo add context info - kvrpcpb::Context *ctx = request.mutable_context(); + kvrpcpb::Context * ctx = request.mutable_context(); ctx->set_region_id(2); auto region_epoch = ctx->mutable_region_epoch(); region_epoch->set_version(20); @@ -104,7 +130,8 @@ grpc::Status rpcTest() { return clientPtr->coprocessor(&request); } -void codecTest() { +void codecTest() +{ Int64 i = 123; std::stringstream ss; DB::EncodeNumber(i, ss); @@ -116,12 +143,13 @@ void codecTest() { r++; } -int main() { -// std::cout << "Before rpcTest"<< std::endl; +int main() +{ + // std::cout << "Before rpcTest"<< std::endl; grpc::Status ret = rpcTest(); -// codecTest(); -// std::cout << "End rpcTest " << std::endl; -// std::cout << "The ret is " << ret.error_code() << " " << ret.error_details() -// << " " << ret.error_message() << std::endl; + // codecTest(); + // std::cout << "End rpcTest " << std::endl; + // std::cout << "The ret is " << ret.error_code() << " " << ret.error_details() + // << " " << ret.error_message() << std::endl; return 0; } diff --git a/dbms/src/Storages/Transaction/TypeMapping.cpp b/dbms/src/Storages/Transaction/TypeMapping.cpp index 91161b787a4..706f98322f7 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.cpp +++ b/dbms/src/Storages/Transaction/TypeMapping.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -85,15 +86,27 @@ DataTypePtr TypeMapping::getUnsigned(const ColumnInfo & column_info) return unsigned_type_map[column_info.tp](column_info); } -TiDB::CodecFlag TypeMapping::getCodecFlag(const DB::DataTypePtr & dataTypePtr) { +TiDB::CodecFlag TypeMapping::getCodecFlag(const DB::DataTypePtr & dataTypePtr) +{ // fixme: String's CodecFlag will be CodecFlagCompactBytes, which is wrong for Json type return codec_flag_map[dataTypePtr->getFamilyName()]; } -TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & dataTypePtr) { +TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & dataTypePtr) +{ return TypeMapping::instance().getCodecFlag(dataTypePtr); } +DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type) +{ + ColumnInfo mock_ci; + mock_ci.tp = static_cast(field_type.tp()); + mock_ci.flag = field_type.flag(); + mock_ci.flen = field_type.flen(); + mock_ci.decimal = field_type.decimal(); + return getDataTypeByColumnInfo(mock_ci); +} + DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info) { DataTypePtr base; diff --git a/dbms/src/Storages/Transaction/TypeMapping.h b/dbms/src/Storages/Transaction/TypeMapping.h index d8b2fc32357..db05d27ff84 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.h +++ b/dbms/src/Storages/Transaction/TypeMapping.h @@ -1,5 +1,10 @@ #pragma once +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop + #include #include @@ -11,6 +16,8 @@ using ColumnInfo = TiDB::ColumnInfo; DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info); +DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type); + TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & dataTypePtr); } From ead960966b736386c1f70ece4f4bcb156386f851 Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 2 Aug 2019 13:58:33 +0800 Subject: [PATCH 09/79] basic support for selection/limit/topn executor in InterpreterDAGRequest (#150) --- .../Interpreters/CoprocessorBuilderUtils.cpp | 996 +++++++++++------- .../Interpreters/CoprocessorBuilderUtils.h | 26 +- .../Interpreters/DAGExpressionAnalyzer.cpp | 171 +++ dbms/src/Interpreters/DAGExpressionAnalyzer.h | 40 + dbms/src/Interpreters/DAGQueryInfo.cpp | 66 +- dbms/src/Interpreters/DAGQueryInfo.h | 61 +- dbms/src/Interpreters/DAGStringConverter.cpp | 318 +++--- dbms/src/Interpreters/DAGStringConverter.h | 40 +- .../Interpreters/InterpreterDAGRequest.cpp | 502 +++++---- dbms/src/Interpreters/InterpreterDAGRequest.h | 59 +- dbms/src/Server/cop_test.cpp | 104 +- dbms/src/Storages/Transaction/TypeMapping.cpp | 17 +- dbms/src/Storages/Transaction/TypeMapping.h | 7 + 13 files changed, 1552 insertions(+), 855 deletions(-) create mode 100644 dbms/src/Interpreters/DAGExpressionAnalyzer.cpp create mode 100644 dbms/src/Interpreters/DAGExpressionAnalyzer.h diff --git a/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp b/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp index d8058b5c1bc..de720e0c7b4 100644 --- a/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp +++ b/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp @@ -1,403 +1,615 @@ #include + #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#pragma GCC diagnostic pop #include +#include +#include + +namespace DB +{ + +bool isFunctionExpr(const tipb::Expr & expr) +{ + switch (expr.tp()) + { + case tipb::ExprType::ScalarFunc: + case tipb::ExprType::Count: + case tipb::ExprType::Sum: + case tipb::ExprType::Avg: + case tipb::ExprType::Min: + case tipb::ExprType::Max: + case tipb::ExprType::First: + case tipb::ExprType::GroupConcat: + case tipb::ExprType::Agg_BitAnd: + case tipb::ExprType::Agg_BitOr: + case tipb::ExprType::Agg_BitXor: + case tipb::ExprType::Std: + case tipb::ExprType::Stddev: + case tipb::ExprType::StddevPop: + case tipb::ExprType::StddevSamp: + case tipb::ExprType::VarPop: + case tipb::ExprType::VarSamp: + case tipb::ExprType::Variance: + case tipb::ExprType::JsonArrayAgg: + case tipb::ExprType::JsonObjectAgg: + return true; + default: + return false; + } +} -namespace DB { +const String & getFunctionName(const tipb::Expr & expr) +{ + if (isAggFunctionExpr(expr)) + { + if (!aggFunMap.count(expr.tp())) + { + throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported."); + } + return aggFunMap[expr.tp()]; + } + else + { + if (!scalarFunMap.count(expr.sig())) + { + throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + " is not supported."); + } + return scalarFunMap[expr.sig()]; + } +} - std::unordered_map aggFunMap( +String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col) +{ + std::stringstream ss; + size_t cursor = 1; + Int64 columnId = 0; + String func_name; + Field f; + switch (expr.tp()) + { + case tipb::ExprType::Null: + return "NULL"; + case tipb::ExprType::Int64: + return std::to_string(DecodeInt(cursor, expr.val())); + case tipb::ExprType::Uint64: + return std::to_string(DecodeInt(cursor, expr.val())); + case tipb::ExprType::Float32: + case tipb::ExprType::Float64: + return std::to_string(DecodeFloat64(cursor, expr.val())); + case tipb::ExprType::String: + // + return expr.val(); + case tipb::ExprType::Bytes: + return DecodeBytes(cursor, expr.val()); + case tipb::ExprType::ColumnRef: + columnId = DecodeInt(cursor, expr.val()); + if (columnId < 1 || columnId > (ColumnID)input_col.size()) { - {tipb::ExprType::Count, "count"}, - {tipb::ExprType::Sum, "sum"}, - {tipb::ExprType::Avg, "avg"}, - {tipb::ExprType::Min, "min"}, - {tipb::ExprType::Max, "max"}, - {tipb::ExprType::First, "any"}, - //{tipb::ExprType::GroupConcat, ""}, - //{tipb::ExprType::Agg_BitAnd, ""}, - //{tipb::ExprType::Agg_BitOr, ""}, - //{tipb::ExprType::Agg_BitXor, ""}, - //{tipb::ExprType::Std, ""}, - //{tipb::ExprType::Stddev, ""}, - //{tipb::ExprType::StddevPop, ""}, - //{tipb::ExprType::StddevSamp, ""}, - //{tipb::ExprType::VarPop, ""}, - //{tipb::ExprType::VarSamp, ""}, - //{tipb::ExprType::Variance, ""}, - //{tipb::ExprType::JsonArrayAgg, ""}, - //{tipb::ExprType::JsonObjectAgg, ""}, + throw Exception("out of bound"); } - ); - - std::unordered_map scalarFunMap( + return input_col.getNames()[columnId - 1]; + case tipb::ExprType::Count: + case tipb::ExprType::Sum: + case tipb::ExprType::Avg: + case tipb::ExprType::Min: + case tipb::ExprType::Max: + case tipb::ExprType::First: + if (!aggFunMap.count(expr.tp())) + { + throw Exception("not supported"); + } + func_name = aggFunMap.find(expr.tp())->second; + break; + case tipb::ExprType::ScalarFunc: + if (!scalarFunMap.count(expr.sig())) { - {tipb::ScalarFuncSig::CastIntAsInt, "cast"}, - {tipb::ScalarFuncSig::CastIntAsReal, "cast"}, - {tipb::ScalarFuncSig::CastIntAsString, "cast"}, - {tipb::ScalarFuncSig::CastIntAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastIntAsTime, "cast"}, - {tipb::ScalarFuncSig::CastIntAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastIntAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastRealAsInt, "cast"}, - {tipb::ScalarFuncSig::CastRealAsReal, "cast"}, - {tipb::ScalarFuncSig::CastRealAsString, "cast"}, - {tipb::ScalarFuncSig::CastRealAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastRealAsTime, "cast"}, - {tipb::ScalarFuncSig::CastRealAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastRealAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastDecimalAsInt, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsReal, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsString, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsTime, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastDecimalAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastStringAsInt, "cast"}, - {tipb::ScalarFuncSig::CastStringAsReal, "cast"}, - {tipb::ScalarFuncSig::CastStringAsString, "cast"}, - {tipb::ScalarFuncSig::CastStringAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastStringAsTime, "cast"}, - {tipb::ScalarFuncSig::CastStringAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastStringAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastTimeAsInt, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsReal, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsString, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsTime, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastTimeAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastDurationAsInt, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsReal, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsString, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsTime, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastDurationAsJson, "cast"}, - - {tipb::ScalarFuncSig::CastJsonAsInt, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsReal, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsString, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsDecimal, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsTime, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsDuration, "cast"}, - {tipb::ScalarFuncSig::CastJsonAsJson, "cast"}, - - {tipb::ScalarFuncSig::CoalesceInt, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceReal, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceString, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceDecimal, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceTime, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceDuration, "coalesce"}, - {tipb::ScalarFuncSig::CoalesceJson, "coalesce"}, - - {tipb::ScalarFuncSig::LTInt, "less"}, - {tipb::ScalarFuncSig::LTReal, "less"}, - {tipb::ScalarFuncSig::LTString, "less"}, - {tipb::ScalarFuncSig::LTDecimal, "less"}, - {tipb::ScalarFuncSig::LTTime, "less"}, - {tipb::ScalarFuncSig::LTDuration, "less"}, - {tipb::ScalarFuncSig::LTJson, "less"}, - - {tipb::ScalarFuncSig::LEInt, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEReal, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEString, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEDecimal, "lessOrEquals"}, - {tipb::ScalarFuncSig::LETime, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEDuration, "lessOrEquals"}, - {tipb::ScalarFuncSig::LEJson, "lessOrEquals"}, - - {tipb::ScalarFuncSig::GTInt, "greater"}, - {tipb::ScalarFuncSig::GTReal, "greater"}, - {tipb::ScalarFuncSig::GTString, "greater"}, - {tipb::ScalarFuncSig::GTDecimal, "greater"}, - {tipb::ScalarFuncSig::GTTime, "greater"}, - {tipb::ScalarFuncSig::GTDuration, "greater"}, - {tipb::ScalarFuncSig::GTJson, "greater"}, - - {tipb::ScalarFuncSig::GreatestInt, "greatest"}, - {tipb::ScalarFuncSig::GreatestReal, "greatest"}, - {tipb::ScalarFuncSig::GreatestString, "greatest"}, - {tipb::ScalarFuncSig::GreatestDecimal, "greatest"}, - {tipb::ScalarFuncSig::GreatestTime, "greatest"}, - - {tipb::ScalarFuncSig::LeastInt, "least"}, - {tipb::ScalarFuncSig::LeastReal, "least"}, - {tipb::ScalarFuncSig::LeastString, "least"}, - {tipb::ScalarFuncSig::LeastDecimal, "least"}, - {tipb::ScalarFuncSig::LeastTime, "least"}, - - //{tipb::ScalarFuncSig::IntervalInt, "cast"}, - //{tipb::ScalarFuncSig::IntervalReal, "cast"}, - - {tipb::ScalarFuncSig::GEInt, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEReal, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEString, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEDecimal, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GETime, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEDuration, "greaterOrEquals"}, - {tipb::ScalarFuncSig::GEJson, "greaterOrEquals"}, - - {tipb::ScalarFuncSig::EQInt, "equals"}, - {tipb::ScalarFuncSig::EQReal, "equals"}, - {tipb::ScalarFuncSig::EQString, "equals"}, - {tipb::ScalarFuncSig::EQDecimal, "equals"}, - {tipb::ScalarFuncSig::EQTime, "equals"}, - {tipb::ScalarFuncSig::EQDuration, "equals"}, - {tipb::ScalarFuncSig::EQJson, "equals"}, - - {tipb::ScalarFuncSig::NEInt, "notEquals"}, - {tipb::ScalarFuncSig::NEReal, "notEquals"}, - {tipb::ScalarFuncSig::NEString, "notEquals"}, - {tipb::ScalarFuncSig::NEDecimal, "notEquals"}, - {tipb::ScalarFuncSig::NETime, "notEquals"}, - {tipb::ScalarFuncSig::NEDuration, "notEquals"}, - {tipb::ScalarFuncSig::NEJson, "notEquals"}, - - //{tipb::ScalarFuncSig::NullEQInt, "cast"}, - //{tipb::ScalarFuncSig::NullEQReal, "cast"}, - //{tipb::ScalarFuncSig::NullEQString, "cast"}, - //{tipb::ScalarFuncSig::NullEQDecimal, "cast"}, - //{tipb::ScalarFuncSig::NullEQTime, "cast"}, - //{tipb::ScalarFuncSig::NullEQDuration, "cast"}, - //{tipb::ScalarFuncSig::NullEQJson, "cast"}, - - {tipb::ScalarFuncSig::PlusReal, "plus"}, - {tipb::ScalarFuncSig::PlusDecimal, "plus"}, - {tipb::ScalarFuncSig::PlusInt, "plus"}, - - {tipb::ScalarFuncSig::MinusReal, "minus"}, - {tipb::ScalarFuncSig::MinusDecimal, "minus"}, - {tipb::ScalarFuncSig::MinusInt, "minus"}, - - {tipb::ScalarFuncSig::MultiplyReal, "multiply"}, - {tipb::ScalarFuncSig::MultiplyDecimal, "multiply"}, - {tipb::ScalarFuncSig::MultiplyInt, "multiply"}, - - {tipb::ScalarFuncSig::DivideReal, "divide"}, - {tipb::ScalarFuncSig::DivideDecimal, "divide"}, - {tipb::ScalarFuncSig::IntDivideInt, "intDiv"}, - {tipb::ScalarFuncSig::IntDivideDecimal, "divide"}, - - {tipb::ScalarFuncSig::ModReal, "modulo"}, - {tipb::ScalarFuncSig::ModDecimal, "modulo"}, - {tipb::ScalarFuncSig::ModInt, "modulo"}, - - {tipb::ScalarFuncSig::MultiplyIntUnsigned, "multiply"}, - - {tipb::ScalarFuncSig::AbsInt, "abs"}, - {tipb::ScalarFuncSig::AbsUInt, "abs"}, - {tipb::ScalarFuncSig::AbsReal, "abs"}, - {tipb::ScalarFuncSig::AbsDecimal, "abs"}, - - {tipb::ScalarFuncSig::CeilIntToDec, "ceil"}, - {tipb::ScalarFuncSig::CeilIntToInt, "ceil"}, - {tipb::ScalarFuncSig::CeilDecToInt, "ceil"}, - {tipb::ScalarFuncSig::CeilDecToDec, "ceil"}, - {tipb::ScalarFuncSig::CeilReal, "ceil"}, - - {tipb::ScalarFuncSig::FloorIntToDec, "floor"}, - {tipb::ScalarFuncSig::FloorIntToInt, "floor"}, - {tipb::ScalarFuncSig::FloorDecToInt, "floor"}, - {tipb::ScalarFuncSig::FloorDecToDec, "floor"}, - {tipb::ScalarFuncSig::FloorReal, "floor"}, - - {tipb::ScalarFuncSig::RoundReal, "round"}, - {tipb::ScalarFuncSig::RoundInt, "round"}, - {tipb::ScalarFuncSig::RoundDec, "round"}, - //{tipb::ScalarFuncSig::RoundWithFracReal, "cast"}, - //{tipb::ScalarFuncSig::RoundWithFracInt, "cast"}, - //{tipb::ScalarFuncSig::RoundWithFracDec, "cast"}, - - {tipb::ScalarFuncSig::Log1Arg, "log"}, - //{tipb::ScalarFuncSig::Log2Args, "cast"}, - {tipb::ScalarFuncSig::Log2, "log2"}, - {tipb::ScalarFuncSig::Log10, "log10"}, - - {tipb::ScalarFuncSig::Rand, "rand"}, - //{tipb::ScalarFuncSig::RandWithSeed, "cast"}, - - {tipb::ScalarFuncSig::Pow, "pow"}, - //{tipb::ScalarFuncSig::Conv, "cast"}, - //{tipb::ScalarFuncSig::CRC32, "cast"}, - //{tipb::ScalarFuncSig::Sign, "cast"}, - - {tipb::ScalarFuncSig::Sqrt, "sqrt"}, - {tipb::ScalarFuncSig::Acos, "acos"}, - {tipb::ScalarFuncSig::Asin, "asin"}, - {tipb::ScalarFuncSig::Atan1Arg, "atan"}, - //{tipb::ScalarFuncSig::Atan2Args, "cast"}, - {tipb::ScalarFuncSig::Cos, "cos"}, - //{tipb::ScalarFuncSig::Cot, "cast"}, - //{tipb::ScalarFuncSig::Degrees, "cast"}, - {tipb::ScalarFuncSig::Exp, "exp"}, - //{tipb::ScalarFuncSig::PI, "cast"}, - //{tipb::ScalarFuncSig::Radians, "cast"}, - {tipb::ScalarFuncSig::Sin, "sin"}, - {tipb::ScalarFuncSig::Tan, "tan"}, - {tipb::ScalarFuncSig::TruncateInt, "trunc"}, - {tipb::ScalarFuncSig::TruncateReal, "trunc"}, - //{tipb::ScalarFuncSig::TruncateDecimal, "cast"}, - - {tipb::ScalarFuncSig::LogicalAnd, "and"}, - {tipb::ScalarFuncSig::LogicalOr, "or"}, - {tipb::ScalarFuncSig::LogicalXor, "xor"}, - {tipb::ScalarFuncSig::UnaryNot, "not"}, - {tipb::ScalarFuncSig::UnaryMinusInt, "negate"}, - {tipb::ScalarFuncSig::UnaryMinusReal, "negate"}, - {tipb::ScalarFuncSig::UnaryMinusDecimal, "negate"}, - {tipb::ScalarFuncSig::DecimalIsNull, "isNull"}, - {tipb::ScalarFuncSig::DurationIsNull, "isNull"}, - {tipb::ScalarFuncSig::RealIsNull, "isNull"}, - {tipb::ScalarFuncSig::StringIsNull, "isNull"}, - {tipb::ScalarFuncSig::TimeIsNull, "isNull"}, - {tipb::ScalarFuncSig::IntIsNull, "isNull"}, - {tipb::ScalarFuncSig::JsonIsNull, "isNull"}, - - //{tipb::ScalarFuncSig::BitAndSig, "cast"}, - //{tipb::ScalarFuncSig::BitOrSig, "cast"}, - //{tipb::ScalarFuncSig::BitXorSig, "cast"}, - //{tipb::ScalarFuncSig::BitNegSig, "cast"}, - //{tipb::ScalarFuncSig::IntIsTrue, "cast"}, - //{tipb::ScalarFuncSig::RealIsTrue, "cast"}, - //{tipb::ScalarFuncSig::DecimalIsTrue, "cast"}, - //{tipb::ScalarFuncSig::IntIsFalse, "cast"}, - //{tipb::ScalarFuncSig::RealIsFalse, "cast"}, - //{tipb::ScalarFuncSig::DecimalIsFalse, "cast"}, - - //{tipb::ScalarFuncSig::LeftShift, "cast"}, - //{tipb::ScalarFuncSig::RightShift, "cast"}, - - //{tipb::ScalarFuncSig::BitCount, "cast"}, - //{tipb::ScalarFuncSig::GetParamString, "cast"}, - //{tipb::ScalarFuncSig::GetVar, "cast"}, - //{tipb::ScalarFuncSig::RowSig, "cast"}, - //{tipb::ScalarFuncSig::SetVar, "cast"}, - //{tipb::ScalarFuncSig::ValuesDecimal, "cast"}, - //{tipb::ScalarFuncSig::ValuesDuration, "cast"}, - //{tipb::ScalarFuncSig::ValuesInt, "cast"}, - //{tipb::ScalarFuncSig::ValuesJSON, "cast"}, - //{tipb::ScalarFuncSig::ValuesReal, "cast"}, - //{tipb::ScalarFuncSig::ValuesString, "cast"}, - //{tipb::ScalarFuncSig::ValuesTime, "cast"}, - - {tipb::ScalarFuncSig::InInt, "in"}, - {tipb::ScalarFuncSig::InReal, "in"}, - {tipb::ScalarFuncSig::InString, "in"}, - {tipb::ScalarFuncSig::InDecimal, "in"}, - {tipb::ScalarFuncSig::InTime, "in"}, - {tipb::ScalarFuncSig::InDuration, "in"}, - {tipb::ScalarFuncSig::InJson, "in"}, - - {tipb::ScalarFuncSig::IfNullInt, "ifNull"}, - {tipb::ScalarFuncSig::IfNullReal, "ifNull"}, - {tipb::ScalarFuncSig::IfNullString, "ifNull"}, - {tipb::ScalarFuncSig::IfNullDecimal, "ifNull"}, - {tipb::ScalarFuncSig::IfNullTime, "ifNull"}, - {tipb::ScalarFuncSig::IfNullDuration, "ifNull"}, - {tipb::ScalarFuncSig::IfNullJson, "ifNull"}, - - {tipb::ScalarFuncSig::IfInt, "if"}, - {tipb::ScalarFuncSig::IfReal, "if"}, - {tipb::ScalarFuncSig::IfString, "if"}, - {tipb::ScalarFuncSig::IfDecimal, "if"}, - {tipb::ScalarFuncSig::IfTime, "if"}, - {tipb::ScalarFuncSig::IfDuration, "if"}, - {tipb::ScalarFuncSig::IfJson, "if"}, - - //todo need further check for caseWithExpression and multiIf - {tipb::ScalarFuncSig::CaseWhenInt, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenReal, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenString, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenDecimal, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenTime, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenDuration, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenJson, "caseWithExpression"}, - - //{tipb::ScalarFuncSig::AesDecrypt, "cast"}, - //{tipb::ScalarFuncSig::AesEncrypt, "cast"}, - //{tipb::ScalarFuncSig::Compress, "cast"}, - //{tipb::ScalarFuncSig::MD5, "cast"}, - //{tipb::ScalarFuncSig::Password, "cast"}, - //{tipb::ScalarFuncSig::RandomBytes, "cast"}, - //{tipb::ScalarFuncSig::SHA1, "cast"}, - //{tipb::ScalarFuncSig::SHA2, "cast"}, - //{tipb::ScalarFuncSig::Uncompress, "cast"}, - //{tipb::ScalarFuncSig::UncompressedLength, "cast"}, - - //{tipb::ScalarFuncSig::Database, "cast"}, - //{tipb::ScalarFuncSig::FoundRows, "cast"}, - //{tipb::ScalarFuncSig::CurrentUser, "cast"}, - //{tipb::ScalarFuncSig::User, "cast"}, - //{tipb::ScalarFuncSig::ConnectionID, "cast"}, - //{tipb::ScalarFuncSig::LastInsertID, "cast"}, - //{tipb::ScalarFuncSig::LastInsertIDWithID, "cast"}, - //{tipb::ScalarFuncSig::Version, "cast"}, - //{tipb::ScalarFuncSig::TiDBVersion, "cast"}, - //{tipb::ScalarFuncSig::RowCount, "cast"}, - - //{tipb::ScalarFuncSig::Sleep, "cast"}, - //{tipb::ScalarFuncSig::Lock, "cast"}, - //{tipb::ScalarFuncSig::ReleaseLock, "cast"}, - //{tipb::ScalarFuncSig::DecimalAnyValue, "cast"}, - //{tipb::ScalarFuncSig::DurationAnyValue, "cast"}, - //{tipb::ScalarFuncSig::IntAnyValue, "cast"}, - //{tipb::ScalarFuncSig::JSONAnyValue, "cast"}, - //{tipb::ScalarFuncSig::RealAnyValue, "cast"}, - //{tipb::ScalarFuncSig::StringAnyValue, "cast"}, - //{tipb::ScalarFuncSig::TimeAnyValue, "cast"}, - //{tipb::ScalarFuncSig::InetAton, "cast"}, - //{tipb::ScalarFuncSig::InetNtoa, "cast"}, - //{tipb::ScalarFuncSig::Inet6Aton, "cast"}, - //{tipb::ScalarFuncSig::Inet6Ntoa, "cast"}, - //{tipb::ScalarFuncSig::IsIPv4, "cast"}, - //{tipb::ScalarFuncSig::IsIPv4Compat, "cast"}, - //{tipb::ScalarFuncSig::IsIPv4Mapped, "cast"}, - //{tipb::ScalarFuncSig::IsIPv6, "cast"}, - //{tipb::ScalarFuncSig::UUID, "cast"}, - - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, + throw Exception("not supported"); } - ); + func_name = scalarFunMap.find(expr.sig())->second; + break; + default: + throw Exception("not supported"); + } + // build function expr + if (func_name == "in") + { + // for in, we could not represent the function expr using func_name(param1, param2, ...) + throw Exception("not supported"); + } + else + { + ss << func_name << "("; + bool first = true; + for (const tipb::Expr & child : expr.children()) + { + String s = exprToString(child, input_col); + if (first) + { + first = false; + } + else + { + ss << ", "; + } + ss << s; + } + ss << ") "; + return ss.str(); + } +} + +const String & getTypeName(const tipb::Expr & expr) { return tipb::ExprType_Name(expr.tp()); } + +String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns) +{ + return exprToString(expr, current_input_columns); +} + +bool isAggFunctionExpr(const tipb::Expr & expr) +{ + switch (expr.tp()) + { + case tipb::ExprType::Count: + case tipb::ExprType::Sum: + case tipb::ExprType::Avg: + case tipb::ExprType::Min: + case tipb::ExprType::Max: + case tipb::ExprType::First: + case tipb::ExprType::GroupConcat: + case tipb::ExprType::Agg_BitAnd: + case tipb::ExprType::Agg_BitOr: + case tipb::ExprType::Agg_BitXor: + case tipb::ExprType::Std: + case tipb::ExprType::Stddev: + case tipb::ExprType::StddevPop: + case tipb::ExprType::StddevSamp: + case tipb::ExprType::VarPop: + case tipb::ExprType::VarSamp: + case tipb::ExprType::Variance: + case tipb::ExprType::JsonArrayAgg: + case tipb::ExprType::JsonObjectAgg: + return true; + default: + return false; + } +} + +bool isLiteralExpr(const tipb::Expr & expr) +{ + switch (expr.tp()) + { + case tipb::ExprType::Null: + case tipb::ExprType::Int64: + case tipb::ExprType::Uint64: + case tipb::ExprType::Float32: + case tipb::ExprType::Float64: + case tipb::ExprType::String: + case tipb::ExprType::Bytes: + case tipb::ExprType::MysqlBit: + case tipb::ExprType::MysqlDecimal: + case tipb::ExprType::MysqlDuration: + case tipb::ExprType::MysqlEnum: + case tipb::ExprType::MysqlHex: + case tipb::ExprType::MysqlSet: + case tipb::ExprType::MysqlTime: + case tipb::ExprType::MysqlJson: + case tipb::ExprType::ValueList: + return true; + default: + return false; + } } + +bool isColumnExpr(const tipb::Expr & expr) { return expr.tp() == tipb::ExprType::ColumnRef; } + +Field decodeLiteral(const tipb::Expr & expr) +{ + size_t cursor = 0; + switch (expr.tp()) + { + case tipb::ExprType::MysqlBit: + case tipb::ExprType::MysqlDecimal: + case tipb::ExprType::MysqlDuration: + case tipb::ExprType::MysqlEnum: + case tipb::ExprType::MysqlHex: + case tipb::ExprType::MysqlSet: + case tipb::ExprType::MysqlTime: + case tipb::ExprType::MysqlJson: + case tipb::ExprType::ValueList: + throw Exception("mysql type literal is not supported yet"); + default: + return DecodeDatum(cursor, expr.val()); + } +} + +ColumnID getColumnID(const tipb::Expr & expr) +{ + size_t cursor = 1; + return DecodeInt(cursor, expr.val()); +} + +std::unordered_map aggFunMap({ + {tipb::ExprType::Count, "count"}, {tipb::ExprType::Sum, "sum"}, {tipb::ExprType::Avg, "avg"}, {tipb::ExprType::Min, "min"}, + {tipb::ExprType::Max, "max"}, {tipb::ExprType::First, "any"}, + //{tipb::ExprType::GroupConcat, ""}, + //{tipb::ExprType::Agg_BitAnd, ""}, + //{tipb::ExprType::Agg_BitOr, ""}, + //{tipb::ExprType::Agg_BitXor, ""}, + //{tipb::ExprType::Std, ""}, + //{tipb::ExprType::Stddev, ""}, + //{tipb::ExprType::StddevPop, ""}, + //{tipb::ExprType::StddevSamp, ""}, + //{tipb::ExprType::VarPop, ""}, + //{tipb::ExprType::VarSamp, ""}, + //{tipb::ExprType::Variance, ""}, + //{tipb::ExprType::JsonArrayAgg, ""}, + //{tipb::ExprType::JsonObjectAgg, ""}, +}); + +std::unordered_map scalarFunMap({ + {tipb::ScalarFuncSig::CastIntAsInt, "cast"}, + {tipb::ScalarFuncSig::CastIntAsReal, "cast"}, + {tipb::ScalarFuncSig::CastIntAsString, "cast"}, + {tipb::ScalarFuncSig::CastIntAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastIntAsTime, "cast"}, + {tipb::ScalarFuncSig::CastIntAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastIntAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastRealAsInt, "cast"}, + {tipb::ScalarFuncSig::CastRealAsReal, "cast"}, + {tipb::ScalarFuncSig::CastRealAsString, "cast"}, + {tipb::ScalarFuncSig::CastRealAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastRealAsTime, "cast"}, + {tipb::ScalarFuncSig::CastRealAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastRealAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastDecimalAsInt, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsReal, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsString, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsTime, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastDecimalAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastStringAsInt, "cast"}, + {tipb::ScalarFuncSig::CastStringAsReal, "cast"}, + {tipb::ScalarFuncSig::CastStringAsString, "cast"}, + {tipb::ScalarFuncSig::CastStringAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastStringAsTime, "cast"}, + {tipb::ScalarFuncSig::CastStringAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastStringAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastTimeAsInt, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsReal, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsString, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsTime, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastTimeAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastDurationAsInt, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsReal, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsString, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsTime, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastDurationAsJson, "cast"}, + + {tipb::ScalarFuncSig::CastJsonAsInt, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsReal, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsString, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsDecimal, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsTime, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsDuration, "cast"}, + {tipb::ScalarFuncSig::CastJsonAsJson, "cast"}, + + {tipb::ScalarFuncSig::CoalesceInt, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceReal, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceString, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceDecimal, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceTime, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceDuration, "coalesce"}, + {tipb::ScalarFuncSig::CoalesceJson, "coalesce"}, + + {tipb::ScalarFuncSig::LTInt, "less"}, + {tipb::ScalarFuncSig::LTReal, "less"}, + {tipb::ScalarFuncSig::LTString, "less"}, + {tipb::ScalarFuncSig::LTDecimal, "less"}, + {tipb::ScalarFuncSig::LTTime, "less"}, + {tipb::ScalarFuncSig::LTDuration, "less"}, + {tipb::ScalarFuncSig::LTJson, "less"}, + + {tipb::ScalarFuncSig::LEInt, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEReal, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEString, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEDecimal, "lessOrEquals"}, + {tipb::ScalarFuncSig::LETime, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEDuration, "lessOrEquals"}, + {tipb::ScalarFuncSig::LEJson, "lessOrEquals"}, + + {tipb::ScalarFuncSig::GTInt, "greater"}, + {tipb::ScalarFuncSig::GTReal, "greater"}, + {tipb::ScalarFuncSig::GTString, "greater"}, + {tipb::ScalarFuncSig::GTDecimal, "greater"}, + {tipb::ScalarFuncSig::GTTime, "greater"}, + {tipb::ScalarFuncSig::GTDuration, "greater"}, + {tipb::ScalarFuncSig::GTJson, "greater"}, + + {tipb::ScalarFuncSig::GreatestInt, "greatest"}, + {tipb::ScalarFuncSig::GreatestReal, "greatest"}, + {tipb::ScalarFuncSig::GreatestString, "greatest"}, + {tipb::ScalarFuncSig::GreatestDecimal, "greatest"}, + {tipb::ScalarFuncSig::GreatestTime, "greatest"}, + + {tipb::ScalarFuncSig::LeastInt, "least"}, + {tipb::ScalarFuncSig::LeastReal, "least"}, + {tipb::ScalarFuncSig::LeastString, "least"}, + {tipb::ScalarFuncSig::LeastDecimal, "least"}, + {tipb::ScalarFuncSig::LeastTime, "least"}, + + //{tipb::ScalarFuncSig::IntervalInt, "cast"}, + //{tipb::ScalarFuncSig::IntervalReal, "cast"}, + + {tipb::ScalarFuncSig::GEInt, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEReal, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEString, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEDecimal, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GETime, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEDuration, "greaterOrEquals"}, + {tipb::ScalarFuncSig::GEJson, "greaterOrEquals"}, + + {tipb::ScalarFuncSig::EQInt, "equals"}, + {tipb::ScalarFuncSig::EQReal, "equals"}, + {tipb::ScalarFuncSig::EQString, "equals"}, + {tipb::ScalarFuncSig::EQDecimal, "equals"}, + {tipb::ScalarFuncSig::EQTime, "equals"}, + {tipb::ScalarFuncSig::EQDuration, "equals"}, + {tipb::ScalarFuncSig::EQJson, "equals"}, + + {tipb::ScalarFuncSig::NEInt, "notEquals"}, + {tipb::ScalarFuncSig::NEReal, "notEquals"}, + {tipb::ScalarFuncSig::NEString, "notEquals"}, + {tipb::ScalarFuncSig::NEDecimal, "notEquals"}, + {tipb::ScalarFuncSig::NETime, "notEquals"}, + {tipb::ScalarFuncSig::NEDuration, "notEquals"}, + {tipb::ScalarFuncSig::NEJson, "notEquals"}, + + //{tipb::ScalarFuncSig::NullEQInt, "cast"}, + //{tipb::ScalarFuncSig::NullEQReal, "cast"}, + //{tipb::ScalarFuncSig::NullEQString, "cast"}, + //{tipb::ScalarFuncSig::NullEQDecimal, "cast"}, + //{tipb::ScalarFuncSig::NullEQTime, "cast"}, + //{tipb::ScalarFuncSig::NullEQDuration, "cast"}, + //{tipb::ScalarFuncSig::NullEQJson, "cast"}, + + {tipb::ScalarFuncSig::PlusReal, "plus"}, + {tipb::ScalarFuncSig::PlusDecimal, "plus"}, + {tipb::ScalarFuncSig::PlusInt, "plus"}, + + {tipb::ScalarFuncSig::MinusReal, "minus"}, + {tipb::ScalarFuncSig::MinusDecimal, "minus"}, + {tipb::ScalarFuncSig::MinusInt, "minus"}, + + {tipb::ScalarFuncSig::MultiplyReal, "multiply"}, + {tipb::ScalarFuncSig::MultiplyDecimal, "multiply"}, + {tipb::ScalarFuncSig::MultiplyInt, "multiply"}, + + {tipb::ScalarFuncSig::DivideReal, "divide"}, + {tipb::ScalarFuncSig::DivideDecimal, "divide"}, + {tipb::ScalarFuncSig::IntDivideInt, "intDiv"}, + {tipb::ScalarFuncSig::IntDivideDecimal, "divide"}, + + {tipb::ScalarFuncSig::ModReal, "modulo"}, + {tipb::ScalarFuncSig::ModDecimal, "modulo"}, + {tipb::ScalarFuncSig::ModInt, "modulo"}, + + {tipb::ScalarFuncSig::MultiplyIntUnsigned, "multiply"}, + + {tipb::ScalarFuncSig::AbsInt, "abs"}, + {tipb::ScalarFuncSig::AbsUInt, "abs"}, + {tipb::ScalarFuncSig::AbsReal, "abs"}, + {tipb::ScalarFuncSig::AbsDecimal, "abs"}, + + {tipb::ScalarFuncSig::CeilIntToDec, "ceil"}, + {tipb::ScalarFuncSig::CeilIntToInt, "ceil"}, + {tipb::ScalarFuncSig::CeilDecToInt, "ceil"}, + {tipb::ScalarFuncSig::CeilDecToDec, "ceil"}, + {tipb::ScalarFuncSig::CeilReal, "ceil"}, + + {tipb::ScalarFuncSig::FloorIntToDec, "floor"}, + {tipb::ScalarFuncSig::FloorIntToInt, "floor"}, + {tipb::ScalarFuncSig::FloorDecToInt, "floor"}, + {tipb::ScalarFuncSig::FloorDecToDec, "floor"}, + {tipb::ScalarFuncSig::FloorReal, "floor"}, + + {tipb::ScalarFuncSig::RoundReal, "round"}, + {tipb::ScalarFuncSig::RoundInt, "round"}, + {tipb::ScalarFuncSig::RoundDec, "round"}, + //{tipb::ScalarFuncSig::RoundWithFracReal, "cast"}, + //{tipb::ScalarFuncSig::RoundWithFracInt, "cast"}, + //{tipb::ScalarFuncSig::RoundWithFracDec, "cast"}, + + {tipb::ScalarFuncSig::Log1Arg, "log"}, + //{tipb::ScalarFuncSig::Log2Args, "cast"}, + {tipb::ScalarFuncSig::Log2, "log2"}, + {tipb::ScalarFuncSig::Log10, "log10"}, + + {tipb::ScalarFuncSig::Rand, "rand"}, + //{tipb::ScalarFuncSig::RandWithSeed, "cast"}, + + {tipb::ScalarFuncSig::Pow, "pow"}, + //{tipb::ScalarFuncSig::Conv, "cast"}, + //{tipb::ScalarFuncSig::CRC32, "cast"}, + //{tipb::ScalarFuncSig::Sign, "cast"}, + + {tipb::ScalarFuncSig::Sqrt, "sqrt"}, + {tipb::ScalarFuncSig::Acos, "acos"}, + {tipb::ScalarFuncSig::Asin, "asin"}, + {tipb::ScalarFuncSig::Atan1Arg, "atan"}, + //{tipb::ScalarFuncSig::Atan2Args, "cast"}, + {tipb::ScalarFuncSig::Cos, "cos"}, + //{tipb::ScalarFuncSig::Cot, "cast"}, + //{tipb::ScalarFuncSig::Degrees, "cast"}, + {tipb::ScalarFuncSig::Exp, "exp"}, + //{tipb::ScalarFuncSig::PI, "cast"}, + //{tipb::ScalarFuncSig::Radians, "cast"}, + {tipb::ScalarFuncSig::Sin, "sin"}, + {tipb::ScalarFuncSig::Tan, "tan"}, + {tipb::ScalarFuncSig::TruncateInt, "trunc"}, + {tipb::ScalarFuncSig::TruncateReal, "trunc"}, + //{tipb::ScalarFuncSig::TruncateDecimal, "cast"}, + + {tipb::ScalarFuncSig::LogicalAnd, "and"}, + {tipb::ScalarFuncSig::LogicalOr, "or"}, + {tipb::ScalarFuncSig::LogicalXor, "xor"}, + {tipb::ScalarFuncSig::UnaryNot, "not"}, + {tipb::ScalarFuncSig::UnaryMinusInt, "negate"}, + {tipb::ScalarFuncSig::UnaryMinusReal, "negate"}, + {tipb::ScalarFuncSig::UnaryMinusDecimal, "negate"}, + {tipb::ScalarFuncSig::DecimalIsNull, "isNull"}, + {tipb::ScalarFuncSig::DurationIsNull, "isNull"}, + {tipb::ScalarFuncSig::RealIsNull, "isNull"}, + {tipb::ScalarFuncSig::StringIsNull, "isNull"}, + {tipb::ScalarFuncSig::TimeIsNull, "isNull"}, + {tipb::ScalarFuncSig::IntIsNull, "isNull"}, + {tipb::ScalarFuncSig::JsonIsNull, "isNull"}, + + //{tipb::ScalarFuncSig::BitAndSig, "cast"}, + //{tipb::ScalarFuncSig::BitOrSig, "cast"}, + //{tipb::ScalarFuncSig::BitXorSig, "cast"}, + //{tipb::ScalarFuncSig::BitNegSig, "cast"}, + //{tipb::ScalarFuncSig::IntIsTrue, "cast"}, + //{tipb::ScalarFuncSig::RealIsTrue, "cast"}, + //{tipb::ScalarFuncSig::DecimalIsTrue, "cast"}, + //{tipb::ScalarFuncSig::IntIsFalse, "cast"}, + //{tipb::ScalarFuncSig::RealIsFalse, "cast"}, + //{tipb::ScalarFuncSig::DecimalIsFalse, "cast"}, + + //{tipb::ScalarFuncSig::LeftShift, "cast"}, + //{tipb::ScalarFuncSig::RightShift, "cast"}, + + //{tipb::ScalarFuncSig::BitCount, "cast"}, + //{tipb::ScalarFuncSig::GetParamString, "cast"}, + //{tipb::ScalarFuncSig::GetVar, "cast"}, + //{tipb::ScalarFuncSig::RowSig, "cast"}, + //{tipb::ScalarFuncSig::SetVar, "cast"}, + //{tipb::ScalarFuncSig::ValuesDecimal, "cast"}, + //{tipb::ScalarFuncSig::ValuesDuration, "cast"}, + //{tipb::ScalarFuncSig::ValuesInt, "cast"}, + //{tipb::ScalarFuncSig::ValuesJSON, "cast"}, + //{tipb::ScalarFuncSig::ValuesReal, "cast"}, + //{tipb::ScalarFuncSig::ValuesString, "cast"}, + //{tipb::ScalarFuncSig::ValuesTime, "cast"}, + + {tipb::ScalarFuncSig::InInt, "in"}, + {tipb::ScalarFuncSig::InReal, "in"}, + {tipb::ScalarFuncSig::InString, "in"}, + {tipb::ScalarFuncSig::InDecimal, "in"}, + {tipb::ScalarFuncSig::InTime, "in"}, + {tipb::ScalarFuncSig::InDuration, "in"}, + {tipb::ScalarFuncSig::InJson, "in"}, + + {tipb::ScalarFuncSig::IfNullInt, "ifNull"}, + {tipb::ScalarFuncSig::IfNullReal, "ifNull"}, + {tipb::ScalarFuncSig::IfNullString, "ifNull"}, + {tipb::ScalarFuncSig::IfNullDecimal, "ifNull"}, + {tipb::ScalarFuncSig::IfNullTime, "ifNull"}, + {tipb::ScalarFuncSig::IfNullDuration, "ifNull"}, + {tipb::ScalarFuncSig::IfNullJson, "ifNull"}, + + {tipb::ScalarFuncSig::IfInt, "if"}, + {tipb::ScalarFuncSig::IfReal, "if"}, + {tipb::ScalarFuncSig::IfString, "if"}, + {tipb::ScalarFuncSig::IfDecimal, "if"}, + {tipb::ScalarFuncSig::IfTime, "if"}, + {tipb::ScalarFuncSig::IfDuration, "if"}, + {tipb::ScalarFuncSig::IfJson, "if"}, + + //todo need further check for caseWithExpression and multiIf + {tipb::ScalarFuncSig::CaseWhenInt, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenReal, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenString, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenDecimal, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenTime, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenDuration, "caseWithExpression"}, + {tipb::ScalarFuncSig::CaseWhenJson, "caseWithExpression"}, + + //{tipb::ScalarFuncSig::AesDecrypt, "cast"}, + //{tipb::ScalarFuncSig::AesEncrypt, "cast"}, + //{tipb::ScalarFuncSig::Compress, "cast"}, + //{tipb::ScalarFuncSig::MD5, "cast"}, + //{tipb::ScalarFuncSig::Password, "cast"}, + //{tipb::ScalarFuncSig::RandomBytes, "cast"}, + //{tipb::ScalarFuncSig::SHA1, "cast"}, + //{tipb::ScalarFuncSig::SHA2, "cast"}, + //{tipb::ScalarFuncSig::Uncompress, "cast"}, + //{tipb::ScalarFuncSig::UncompressedLength, "cast"}, + + //{tipb::ScalarFuncSig::Database, "cast"}, + //{tipb::ScalarFuncSig::FoundRows, "cast"}, + //{tipb::ScalarFuncSig::CurrentUser, "cast"}, + //{tipb::ScalarFuncSig::User, "cast"}, + //{tipb::ScalarFuncSig::ConnectionID, "cast"}, + //{tipb::ScalarFuncSig::LastInsertID, "cast"}, + //{tipb::ScalarFuncSig::LastInsertIDWithID, "cast"}, + //{tipb::ScalarFuncSig::Version, "cast"}, + //{tipb::ScalarFuncSig::TiDBVersion, "cast"}, + //{tipb::ScalarFuncSig::RowCount, "cast"}, + + //{tipb::ScalarFuncSig::Sleep, "cast"}, + //{tipb::ScalarFuncSig::Lock, "cast"}, + //{tipb::ScalarFuncSig::ReleaseLock, "cast"}, + //{tipb::ScalarFuncSig::DecimalAnyValue, "cast"}, + //{tipb::ScalarFuncSig::DurationAnyValue, "cast"}, + //{tipb::ScalarFuncSig::IntAnyValue, "cast"}, + //{tipb::ScalarFuncSig::JSONAnyValue, "cast"}, + //{tipb::ScalarFuncSig::RealAnyValue, "cast"}, + //{tipb::ScalarFuncSig::StringAnyValue, "cast"}, + //{tipb::ScalarFuncSig::TimeAnyValue, "cast"}, + //{tipb::ScalarFuncSig::InetAton, "cast"}, + //{tipb::ScalarFuncSig::InetNtoa, "cast"}, + //{tipb::ScalarFuncSig::Inet6Aton, "cast"}, + //{tipb::ScalarFuncSig::Inet6Ntoa, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4Compat, "cast"}, + //{tipb::ScalarFuncSig::IsIPv4Mapped, "cast"}, + //{tipb::ScalarFuncSig::IsIPv6, "cast"}, + //{tipb::ScalarFuncSig::UUID, "cast"}, + + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, + {tipb::ScalarFuncSig::Uncompress, "cast"}, +}); +} // namespace DB diff --git a/dbms/src/Interpreters/CoprocessorBuilderUtils.h b/dbms/src/Interpreters/CoprocessorBuilderUtils.h index 908a8638c77..22cf460141b 100644 --- a/dbms/src/Interpreters/CoprocessorBuilderUtils.h +++ b/dbms/src/Interpreters/CoprocessorBuilderUtils.h @@ -2,9 +2,27 @@ #include -namespace DB { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop - extern std::unordered_map aggFunMap; - extern std::unordered_map scalarFunMap; +#include -} +namespace DB +{ + +bool isLiteralExpr(const tipb::Expr & expr); +Field decodeLiteral(const tipb::Expr & expr); +bool isFunctionExpr(const tipb::Expr & expr); +bool isAggFunctionExpr(const tipb::Expr & expr); +const String & getFunctionName(const tipb::Expr & expr); +bool isColumnExpr(const tipb::Expr & expr); +ColumnID getColumnID(const tipb::Expr & expr); +String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns); +const String & getTypeName(const tipb::Expr & expr); +String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col); +extern std::unordered_map aggFunMap; +extern std::unordered_map scalarFunMap; + +} // namespace DB diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp new file mode 100644 index 00000000000..2cc8ce0b9c2 --- /dev/null +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp @@ -0,0 +1,171 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +DAGExpressionAnalyzer::DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_) + : source_columns(source_columns_), context(context_) +{ + settings = context.getSettings(); +} + +bool DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name) +{ + if (sel.conditions_size() == 0) + { + return false; + } + tipb::Expr final_condition; + if (sel.conditions_size() > 1) + { + final_condition.set_tp(tipb::ExprType::ScalarFunc); + final_condition.set_sig(tipb::ScalarFuncSig::LogicalAnd); + + for (auto & condition : sel.conditions()) + { + auto c = final_condition.add_children(); + c->ParseFromString(condition.SerializeAsString()); + } + } + + const tipb::Expr & filter = sel.conditions_size() > 1 ? final_condition : sel.conditions(0); + initChain(chain, source_columns); + filter_column_name = getActions(filter, chain.steps.back().actions); + chain.steps.back().required_output.push_back(filter_column_name); + return true; +} + +bool DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names) +{ + if (topN.order_by_size() == 0) + { + return false; + } + initChain(chain, aggregated_columns); + ExpressionActionsChain::Step & step = chain.steps.back(); + for (const tipb::ByItem & byItem : topN.order_by()) + { + String name = getActions(byItem.expr(), step.actions); + step.required_output.push_back(name); + order_column_names.push_back(name); + } + return true; +} + +const NamesAndTypesList & DAGExpressionAnalyzer::getCurrentInputColumns() { return source_columns; } + +String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions) +{ + String expr_name = getName(expr, getCurrentInputColumns()); + if ((isLiteralExpr(expr) || isFunctionExpr(expr)) && actions->getSampleBlock().has(expr_name)) + { + return expr_name; + } + if (isLiteralExpr(expr)) + { + Field value = decodeLiteral(expr); + DataTypePtr type = expr.has_field_type() ? getDataTypeByFieldType(expr.field_type()) : applyVisitor(FieldToDataType(), value); + + ColumnWithTypeAndName column; + column.column = type->createColumnConst(1, convertFieldToType(value, *type)); + column.name = expr_name; + column.type = type; + + actions->add(ExpressionAction::addColumn(column)); + return column.name; + } + else if (isColumnExpr(expr)) + { + ColumnID columnId = getColumnID(expr); + if (columnId < 1 || columnId > (ColumnID)getCurrentInputColumns().size()) + { + throw Exception("column id out of bound"); + } + //todo check if the column type need to be cast to field type + return expr_name; + } + else if (isFunctionExpr(expr)) + { + if (isAggFunctionExpr(expr)) + { + throw Exception("agg function is not supported yet"); + } + const String & func_name = getFunctionName(expr); + if (func_name == "in" || func_name == "notIn" || func_name == "globalIn" || func_name == "globalNotIn") + { + // todo support in + throw Exception(func_name + " is not supported yet"); + } + + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); + Names argument_names; + DataTypes argument_types; + for (auto & child : expr.children()) + { + String name = getActions(child, actions); + if (actions->getSampleBlock().has(name)) + { + argument_names.push_back(name); + argument_types.push_back(actions->getSampleBlock().getByName(name).type); + } + else + { + throw Exception("Unknown expr: " + child.DebugString()); + } + } + + const ExpressionAction & applyFunction = ExpressionAction::applyFunction(function_builder, argument_names, expr_name); + actions->add(applyFunction); + // add cast if needed + if (expr.has_field_type()) + { + DataTypePtr expected_type = getDataTypeByFieldType(expr.field_type()); + DataTypePtr actual_type = applyFunction.result_type; + //todo maybe use a more decent compare method + if (expected_type->getName() != actual_type->getName()) + { + // need to add cast function + // first construct the second argument + tipb::Expr type_expr; + type_expr.set_tp(tipb::ExprType::String); + std::stringstream ss; + EncodeCompactBytes(expected_type->getName(), ss); + type_expr.set_val(ss.str()); + auto type_field_type = type_expr.field_type(); + type_field_type.set_tp(0xfe); + type_field_type.set_flag(1); + String name = getActions(type_expr, actions); + String cast_name = "cast"; + const FunctionBuilderPtr & cast_func_builder = FunctionFactory::instance().get(cast_name, context); + String cast_expr_name = cast_name + "_" + expr_name + "_" + getName(type_expr, getCurrentInputColumns()); + Names cast_argument_names; + cast_argument_names.push_back(expr_name); + cast_argument_names.push_back(getName(type_expr, getCurrentInputColumns())); + const ExpressionAction & apply_cast_function + = ExpressionAction::applyFunction(cast_func_builder, argument_names, cast_expr_name); + actions->add(apply_cast_function); + return cast_expr_name; + } + else + { + return expr_name; + } + } + else + { + return expr_name; + } + } + else + { + throw Exception("Unsupported expr type: " + getTypeName(expr)); + } +} +} // namespace DB diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.h b/dbms/src/Interpreters/DAGExpressionAnalyzer.h new file mode 100644 index 00000000000..6a63600fb12 --- /dev/null +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/** Transforms an expression from DAG expression into a sequence of actions to execute it. + * + */ +class DAGExpressionAnalyzer : private boost::noncopyable +{ +private: + using ExpressionActionsPtr = std::shared_ptr; + // all columns from table scan + NamesAndTypesList source_columns; + // all columns after aggregation + NamesAndTypesList aggregated_columns; + Settings settings; + const Context & context; + +public: + DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_); + bool appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name); + bool appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names); + void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const + { + if (chain.steps.empty()) + { + chain.settings = settings; + chain.steps.emplace_back(std::make_shared(columns, settings)); + } + } + String getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions); + const NamesAndTypesList & getCurrentInputColumns(); +}; + +} // namespace DB diff --git a/dbms/src/Interpreters/DAGQueryInfo.cpp b/dbms/src/Interpreters/DAGQueryInfo.cpp index 77c6b2daef1..ad03da917d6 100644 --- a/dbms/src/Interpreters/DAGQueryInfo.cpp +++ b/dbms/src/Interpreters/DAGQueryInfo.cpp @@ -1,27 +1,67 @@ -#include #include #include +#include namespace DB { - DAGQueryInfo::DAGQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorContext & coprocessorContext_) - : dag_request(dag_request_), coprocessorContext(coprocessorContext_) {} +const String DAGQueryInfo::TS_NAME("tablescan"); +const String DAGQueryInfo::SEL_NAME("selection"); +const String DAGQueryInfo::AGG_NAME("aggregation"); +const String DAGQueryInfo::TOPN_NAME("topN"); +const String DAGQueryInfo::LIMIT_NAME("limit"); - std::tuple DAGQueryInfo::parse(size_t ) { - query = String("cop query"); - ast = std::make_shared(); - ((ASTSelectQuery*)ast.get())->is_fake_sel = true; - return std::make_tuple(query, ast); +static void assignOrThrowException(Int32 & index, Int32 value, const String & name) +{ + if (index != -1) + { + throw Exception("Duplicated " + name + " in DAG request"); } + index = value; +} - String DAGQueryInfo::get_query_ignore_error(size_t ) { - return query; +DAGQueryInfo::DAGQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorContext & coprocessorContext_) + : dag_request(dag_request_), coprocessorContext(coprocessorContext_) +{ + for (int i = 0; i < dag_request.executors_size(); i++) + { + switch (dag_request.executors(i).tp()) + { + case tipb::ExecType::TypeTableScan: + assignOrThrowException(ts_index, i, TS_NAME); + break; + case tipb::ExecType::TypeSelection: + assignOrThrowException(sel_index, i, SEL_NAME); + break; + case tipb::ExecType::TypeStreamAgg: + case tipb::ExecType::TypeAggregation: + assignOrThrowException(agg_index, i, AGG_NAME); + break; + case tipb::ExecType::TypeTopN: + assignOrThrowException(order_index, i, TOPN_NAME); + case tipb::ExecType::TypeLimit: + assignOrThrowException(limit_index, i, LIMIT_NAME); + break; + default: + throw Exception("Unsupported executor in DAG request: " + dag_request.executors(i).DebugString()); + } } +} - std::unique_ptr DAGQueryInfo::getInterpreter(Context & , QueryProcessingStage::Enum ) { - return std::make_unique(coprocessorContext, dag_request); - } +std::tuple DAGQueryInfo::parse(size_t) +{ + query = String("cop query"); + ast = std::make_shared(); + ((ASTSelectQuery *)ast.get())->is_fake_sel = true; + return std::make_tuple(query, ast); +} + +String DAGQueryInfo::get_query_ignore_error(size_t) { return query; } + +std::unique_ptr DAGQueryInfo::getInterpreter(Context &, QueryProcessingStage::Enum) +{ + return std::make_unique(coprocessorContext, *this); } +} // namespace DB diff --git a/dbms/src/Interpreters/DAGQueryInfo.h b/dbms/src/Interpreters/DAGQueryInfo.h index 826a07cfc33..aa2baa833c9 100644 --- a/dbms/src/Interpreters/DAGQueryInfo.h +++ b/dbms/src/Interpreters/DAGQueryInfo.h @@ -1,10 +1,14 @@ #pragma once -#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" #include -#include -#include +#pragma GCC diagnostic pop + #include +#include +#include +#include namespace DB @@ -15,18 +19,65 @@ namespace DB class DAGQueryInfo : public IQueryInfo { public: + static const String TS_NAME; + static const String SEL_NAME; + static const String AGG_NAME; + static const String TOPN_NAME; + static const String LIMIT_NAME; DAGQueryInfo(const tipb::DAGRequest & dag_request, CoprocessorContext & coprocessorContext_); - bool isInternalQuery() { return false;}; + bool isInternalQuery() { return false; }; virtual std::tuple parse(size_t max_query_size); virtual String get_query_ignore_error(size_t max_query_size); virtual std::unique_ptr getInterpreter(Context & context, QueryProcessingStage::Enum stage); + void assertValid(Int32 index, const String & name) + { + if (index < 0 || index > dag_request.executors_size()) + { + throw Exception("Access invalid executor: " + name); + } + } + bool has_selection() { return sel_index != -1; }; + bool has_aggregation() { return agg_index != -1; }; + bool has_topN() { return order_index != -1; }; + bool has_limit() { return order_index == -1 && limit_index != -1; }; + const tipb::TableScan & get_ts() + { + assertValid(ts_index, TS_NAME); + return dag_request.executors(ts_index).tbl_scan(); + }; + const tipb::Selection & get_sel() + { + assertValid(sel_index, SEL_NAME); + return dag_request.executors(sel_index).selection(); + }; + const tipb::Aggregation & get_agg() + { + assertValid(agg_index, AGG_NAME); + return dag_request.executors(agg_index).aggregation(); + }; + const tipb::TopN & get_topN() + { + assertValid(order_index, TOPN_NAME); + return dag_request.executors(order_index).topn(); + }; + const tipb::Limit & get_limit() + { + assertValid(limit_index, LIMIT_NAME); + return dag_request.executors(limit_index).limit(); + }; + const tipb::DAGRequest & get_dag_request() { return dag_request; }; private: const tipb::DAGRequest & dag_request; CoprocessorContext & coprocessorContext; String query; ASTPtr ast; + Int32 ts_index = -1; + Int32 sel_index = -1; + Int32 agg_index = -1; + Int32 order_index = -1; + Int32 limit_index = -1; }; -} +} // namespace DB diff --git a/dbms/src/Interpreters/DAGStringConverter.cpp b/dbms/src/Interpreters/DAGStringConverter.cpp index f06f92704e4..aa49500d274 100644 --- a/dbms/src/Interpreters/DAGStringConverter.cpp +++ b/dbms/src/Interpreters/DAGStringConverter.cpp @@ -1,209 +1,159 @@ +#include #include +#include #include -#include #include -#include -#include -#include -#include #include #include +#include +#include +#include -namespace DB { +namespace DB +{ - bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) { - TableID id; - if(ts.has_table_id()) { - id = ts.table_id(); - } else { - // do not have table id - return false; - } - auto & tmt_ctx = context.ch_context.getTMTContext(); - auto storage = tmt_ctx.getStorages().get(id); - if(storage == nullptr) { - tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); - storage = tmt_ctx.getStorages().get(id); - } - if(storage == nullptr) { - return false; - } - const auto * merge_tree = dynamic_cast(storage.get()); - if (!merge_tree) { - return false; - } +bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) +{ + TableID id; + if (ts.has_table_id()) + { + id = ts.table_id(); + } + else + { + // do not have table id + return false; + } + auto & tmt_ctx = context.ch_context.getTMTContext(); + auto storage = tmt_ctx.getStorages().get(id); + if (storage == nullptr) + { + tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); + storage = tmt_ctx.getStorages().get(id); + } + if (storage == nullptr) + { + return false; + } + const auto * merge_tree = dynamic_cast(storage.get()); + if (!merge_tree) + { + return false; + } - for(const tipb::ColumnInfo &ci : ts.columns()) { - ColumnID cid = ci.column_id(); - String name = merge_tree->getTableInfo().columns[cid-1].name; - column_name_from_ts.emplace(std::make_pair(cid, name)); - } - if(column_name_from_ts.empty()) { - // no column selected, must be something wrong - return false; + if (ts.columns_size() == 0) + { + // no column selected, must be something wrong + return false; + } + columns_from_ts = storage->getColumns().getAllPhysical(); + for (const tipb::ColumnInfo & ci : ts.columns()) + { + ColumnID cid = ci.column_id(); + if (cid <= 0 || cid > (ColumnID)columns_from_ts.size()) + { + throw Exception("column id out of bound"); } - ss << "FROM " << merge_tree->getTableInfo().db_name << "." << merge_tree->getTableInfo().name << " "; - return true; + String name = merge_tree->getTableInfo().columns[cid - 1].name; + output_from_ts.push_back(std::move(name)); } + ss << "FROM " << merge_tree->getTableInfo().db_name << "." << merge_tree->getTableInfo().name << " "; + return true; +} - String DAGStringConverter::exprToString(const tipb::Expr & expr, bool &succ) { - std::stringstream ss; - succ = true; - size_t cursor = 1; - Int64 columnId = 0; - String func_name; - Field f; - switch (expr.tp()) { - case tipb::ExprType::Null: - return "NULL"; - case tipb::ExprType::Int64: - return std::to_string(DecodeInt(cursor, expr.val())); - case tipb::ExprType::Uint64: - return std::to_string(DecodeInt(cursor, expr.val())); - case tipb::ExprType::Float32: - case tipb::ExprType::Float64: - return std::to_string(DecodeFloat64(cursor, expr.val())); - case tipb::ExprType::String: - // - return expr.val(); - case tipb::ExprType::Bytes: - return DecodeBytes(cursor, expr.val()); - case tipb::ExprType::ColumnRef: - columnId = DecodeInt(cursor, expr.val()); - if(getCurrentColumnNames().count(columnId) == 0) { - succ = false; - return ""; - } - return getCurrentColumnNames().find(columnId)->second; - case tipb::ExprType::Count: - case tipb::ExprType::Sum: - case tipb::ExprType::Avg: - case tipb::ExprType::Min: - case tipb::ExprType::Max: - case tipb::ExprType::First: - if(!aggFunMap.count(expr.tp())) { - succ = false; - return ""; - } - func_name = aggFunMap.find(expr.tp())->second; - break; - case tipb::ExprType::ScalarFunc: - if(!scalarFunMap.count(expr.sig())) { - succ = false; - return ""; - } - func_name = scalarFunMap.find(expr.sig())->second; - break; - default: - succ = false; - return ""; +bool DAGStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) +{ + bool first = true; + for (const tipb::Expr & expr : sel.conditions()) + { + auto s = exprToString(expr, getCurrentColumns()); + if (first) + { + ss << "WHERE "; + first = false; } - // build function expr - if(func_name == "in") { - // for in, we could not represent the function expr using func_name(param1, param2, ...) - succ = false; - return ""; - } else { - ss << func_name << "("; - bool first = true; - bool sub_succ = true; - for(const tipb::Expr &child : expr.children()) { - String s = exprToString(child, sub_succ); - if(!sub_succ) { - succ = false; - return ""; - } - if(first) { - first = false; - } else { - ss << ", "; - } - ss << s; - } - ss << ") "; - return ss.str(); + else + { + ss << "AND "; } + ss << s << " "; } + return true; +} - bool DAGStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) { - bool first = true; - for(const tipb::Expr & expr : sel.conditions()) { - bool succ = true; - auto s = exprToString(expr, succ); - if(!succ) { - return false; - } - if(first) { - ss << "WHERE "; - first = false; - } else { - ss << "AND "; - } - ss << s << " "; - } - return true; - } +bool DAGStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) +{ + ss << "LIMIT " << limit.limit() << " "; + return true; +} - bool DAGStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) { - ss << "LIMIT " << limit.limit() << " "; - return true; +//todo return the error message +bool DAGStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) +{ + switch (executor.tp()) + { + case tipb::ExecType::TypeTableScan: + return buildTSString(executor.tbl_scan(), ss); + case tipb::ExecType::TypeIndexScan: + // index scan not supported + return false; + case tipb::ExecType::TypeSelection: + return buildSelString(executor.selection(), ss); + case tipb::ExecType::TypeAggregation: + // stream agg is not supported, treated as normal agg + case tipb::ExecType::TypeStreamAgg: + //todo support agg + return false; + case tipb::ExecType::TypeTopN: + // todo support top n + return false; + case tipb::ExecType::TypeLimit: + return buildLimitString(executor.limit(), ss); } +} - //todo return the error message - bool DAGStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) { - switch (executor.tp()) { - case tipb::ExecType::TypeTableScan: - return buildTSString(executor.tbl_scan(), ss); - case tipb::ExecType::TypeIndexScan: - // index scan not supported - return false; - case tipb::ExecType::TypeSelection: - return buildSelString(executor.selection(), ss); - case tipb::ExecType::TypeAggregation: - // stream agg is not supported, treated as normal agg - case tipb::ExecType::TypeStreamAgg: - //todo support agg - return false; - case tipb::ExecType::TypeTopN: - // todo support top n - return false; - case tipb::ExecType::TypeLimit: - return buildLimitString(executor.limit(), ss); - } - } +bool isProject(const tipb::Executor &) +{ + // currently, project is not pushed so always return false + return false; +} +DAGStringConverter::DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) + : context(context_), dag_request(dag_request_) +{ + afterAgg = false; +} - bool isProject(const tipb::Executor &) { - // currently, project is not pushed so always return false - return false; - } - DAGStringConverter::DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) - : context(context_), dag_request(dag_request_) { - afterAgg = false; +String DAGStringConverter::buildSqlString() +{ + std::stringstream query_buf; + std::stringstream project; + for (const tipb::Executor & executor : dag_request.executors()) + { + if (!buildString(executor, query_buf)) + { + return ""; + } } - - String DAGStringConverter::buildSqlString() { - std::stringstream query_buf; - std::stringstream project; - for(const tipb::Executor & executor : dag_request.executors()) { - if(!buildString(executor, query_buf)) { - return ""; + if (!isProject(dag_request.executors(dag_request.executors_size() - 1))) + { + //append final project + project << "SELECT "; + bool first = true; + for (UInt32 index : dag_request.output_offsets()) + { + if (first) + { + first = false; } - } - if(!isProject(dag_request.executors(dag_request.executors_size()-1))) { - //append final project - project << "SELECT "; - bool first = true; - for(UInt32 index : dag_request.output_offsets()) { - if(first) { - first = false; - } else { - project << ", "; - } - project << getCurrentColumnNames()[index+1]; + else + { + project << ", "; } - project << " "; + project << getCurrentOutputColumns()[index]; } - return project.str() + query_buf.str(); + project << " "; } - + return project.str() + query_buf.str(); } + +} // namespace DB diff --git a/dbms/src/Interpreters/DAGStringConverter.h b/dbms/src/Interpreters/DAGStringConverter.h index cae42a54f19..2fa200e0f8e 100644 --- a/dbms/src/Interpreters/DAGStringConverter.h +++ b/dbms/src/Interpreters/DAGStringConverter.h @@ -2,40 +2,56 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include #include +#include #pragma GCC diagnostic pop -#include #include +#include -namespace DB { +namespace DB +{ -class DAGStringConverter { +class DAGStringConverter +{ public: DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_); ~DAGStringConverter() = default; String buildSqlString(); + private: bool buildTSString(const tipb::TableScan & ts, std::stringstream & ss); - String exprToString(const tipb::Expr & expr, bool &succ); bool buildSelString(const tipb::Selection & sel, std::stringstream & ss); bool buildLimitString(const tipb::Limit & limit, std::stringstream & ss); bool buildString(const tipb::Executor & executor, std::stringstream & ss); CoprocessorContext & context; tipb::DAGRequest & dag_request; - std::unordered_map column_name_from_ts; - std::unordered_map column_name_from_agg; + // used by columnRef, which starts with 1, and refs column index in the original ts/agg output + NamesAndTypesList columns_from_ts; + NamesAndTypesList columns_from_agg; + // used by output_offset, which starts with 0, and refs the index in the selected output of ts/agg operater + Names output_from_ts; + Names output_from_agg; bool afterAgg; - std::unordered_map & getCurrentColumnNames() { - if(afterAgg) { - return column_name_from_agg; + const NamesAndTypesList & getCurrentColumns() + { + if (afterAgg) + { + return columns_from_agg; } - return column_name_from_ts; + return columns_from_ts; } + const Names & getCurrentOutputColumns() + { + if (afterAgg) + { + return output_from_agg; + } + return output_from_ts; + } }; -} +} // namespace DB diff --git a/dbms/src/Interpreters/InterpreterDAGRequest.cpp b/dbms/src/Interpreters/InterpreterDAGRequest.cpp index 8a8e6fe4698..483ef96fa2e 100644 --- a/dbms/src/Interpreters/InterpreterDAGRequest.cpp +++ b/dbms/src/Interpreters/InterpreterDAGRequest.cpp @@ -1,219 +1,349 @@ #include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include +#include +#include +#include +#include #include +#include #include -#include -#include -#include -#include -#include -#include +#include -namespace DB { +namespace DB +{ + +namespace ErrorCodes +{ +extern const int TOO_MANY_COLUMNS; +} - namespace ErrorCodes +InterpreterDAGRequest::InterpreterDAGRequest(CoprocessorContext & context_, DAGQueryInfo & dag_query_info_) + : context(context_), dag_query_info(dag_query_info_) +{} + +// the flow is the same as executeFetchcolumns +bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) +{ + if (!ts.has_table_id()) { - extern const int TOO_MANY_COLUMNS; + // do not have table id + return false; } - - static void assignOrThrowException(Int32 & index, Int32 value, String name) { - if(index != -1) { - throw Exception("Duplicated " + name + " in DAG request"); - } - index = value; - } - - InterpreterDAGRequest::InterpreterDAGRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request_) - : context(context_), dag_request(dag_request_) { - for(int i = 0; i < dag_request.executors_size(); i++) { - switch (dag_request.executors(i).tp()) { - case tipb::ExecType::TypeTableScan: - assignOrThrowException(ts_index, i, "TableScan"); - break; - case tipb::ExecType::TypeSelection: - assignOrThrowException(sel_index, i, "Selection"); - break; - case tipb::ExecType::TypeStreamAgg: - case tipb::ExecType::TypeAggregation: - assignOrThrowException(agg_index, i, "Aggregation"); - break; - case tipb::ExecType::TypeTopN: - assignOrThrowException(order_index, i, "Order"); - case tipb::ExecType::TypeLimit: - assignOrThrowException(limit_index, i, "Limit"); - break; - default: - throw Exception("Unsupported executor in DAG request: " + dag_request.executors(i).DebugString()); - } - } + TableID id = ts.table_id(); + auto & tmt_ctx = context.ch_context.getTMTContext(); + auto storage = tmt_ctx.getStorages().get(id); + if (storage == nullptr) + { + tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); + storage = tmt_ctx.getStorages().get(id); } - - bool InterpreterDAGRequest::buildSelPlan(const tipb::Selection & , Pipeline & ) { + if (storage == nullptr) + { + return false; + } + auto table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); + const auto * merge_tree = dynamic_cast(storage.get()); + if (!merge_tree) + { return false; } - // the flow is the same as executeFetchcolumns - bool InterpreterDAGRequest::buildTSPlan(const tipb::TableScan & ts, Pipeline & pipeline) { - if(!ts.has_table_id()) { - // do not have table id - return false; - } - TableID id = ts.table_id(); - auto & tmt_ctx = context.ch_context.getTMTContext(); - auto storage = tmt_ctx.getStorages().get(id); - if(storage == nullptr) { - tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); - storage = tmt_ctx.getStorages().get(id); - } - if(storage == nullptr) { - return false; - } - auto table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); - const auto * merge_tree = dynamic_cast(storage.get()); - if(!merge_tree) { + Names required_columns; + for (const tipb::ColumnInfo & ci : ts.columns()) + { + ColumnID cid = ci.column_id(); + if (cid < 1 || cid > (Int64)merge_tree->getTableInfo().columns.size()) + { + // cid out of bound return false; } + String name = merge_tree->getTableInfo().columns[cid - 1].name; + //todo handle output_offset + required_columns.push_back(name); + } + if (required_columns.empty()) + { + // no column selected, must be something wrong + return false; + } - Names required_columns; - for(const tipb::ColumnInfo & ci : ts.columns()) { - ColumnID cid = ci.column_id(); - if(cid < 1 || cid > (Int64)merge_tree->getTableInfo().columns.size()) { - // cid out of bound + if (!dag_query_info.has_aggregation()) + { + // if the dag request does not contain agg, then the final output is + // based on the output of table scan + for (auto i : dag_query_info.get_dag_request().output_offsets()) + { + if (i < 0 || i >= required_columns.size()) + { + // array index out of bound return false; } - String name = merge_tree->getTableInfo().columns[cid - 1].name; - //todo handle output_offset - required_columns.push_back(name); - } - if(required_columns.empty()) { - // no column selected, must be something wrong - return false; + // do not have alias + final_project.emplace_back(required_columns[i], ""); } + } + // todo handle alias column + const Settings & settings = context.ch_context.getSettingsRef(); - if(agg_index == -1) { - // if the dag request does not contain agg, then the final output is - // based on the output of table scan - for (auto i : dag_request.output_offsets()) { - if (i < 0 || i >= required_columns.size()) { - // array index out of bound - return false; - } - // do not have alias - final_project.emplace_back(required_columns[i], ""); - } - } - // todo handle alias column - const Settings & settings = context.ch_context.getSettingsRef(); - - if(settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) { - throw Exception("Limit for number of columns to read exceeded. " - "Requested: " + toString(required_columns.size()) - + ", maximum: " + settings.max_columns_to_read.toString(), - ErrorCodes::TOO_MANY_COLUMNS); - } + if (settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) + { + throw Exception("Limit for number of columns to read exceeded. " + "Requested: " + + toString(required_columns.size()) + ", maximum: " + settings.max_columns_to_read.toString(), + ErrorCodes::TOO_MANY_COLUMNS); + } - size_t max_block_size = settings.max_block_size; - size_t max_streams = settings.max_threads; - QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; - if(max_streams > 1) { - max_streams *= settings.max_streams_to_max_threads_ratio; - } + size_t max_block_size = settings.max_block_size; + max_streams = settings.max_threads; + QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; + if (max_streams > 1) + { + max_streams *= settings.max_streams_to_max_threads_ratio; + } - //todo support index in - SelectQueryInfo query_info; - query_info.query = std::make_unique(); - ((ASTSelectQuery*)query_info.query.get())->is_fake_sel = true; - query_info.mvcc_query_info = std::make_unique(); - query_info.mvcc_query_info->resolve_locks = true; - query_info.mvcc_query_info->read_tso = settings.read_tso; - RegionQueryInfo info; - info.region_id = context.kv_context.region_id(); - info.conf_version = context.kv_context.region_epoch().conf_ver(); - info.version = context.kv_context.region_epoch().version(); - auto current_region = context.ch_context.getTMTContext().getRegionTable().getRegionById(id, info.region_id); - if(!current_region) { - return false; - } - info.range_in_table = current_region->getHandleRangeByTable(id); - query_info.mvcc_query_info->regions_query_info.push_back(info); - query_info.mvcc_query_info->concurrent = 0.0; - pipeline.streams = storage->read(required_columns, query_info, context.ch_context, from_stage, max_block_size, max_streams); - /// Set the limits and quota for reading data, the speed and time of the query. - { - IProfilingBlockInputStream::LocalLimits limits; - limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; - limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); - limits.max_execution_time = settings.max_execution_time; - limits.timeout_overflow_mode = settings.timeout_overflow_mode; + //todo support index in + SelectQueryInfo query_info; + query_info.query = std::make_unique(); + ((ASTSelectQuery *)query_info.query.get())->is_fake_sel = true; + query_info.mvcc_query_info = std::make_unique(); + query_info.mvcc_query_info->resolve_locks = true; + query_info.mvcc_query_info->read_tso = settings.read_tso; + RegionQueryInfo info; + info.region_id = context.kv_context.region_id(); + info.conf_version = context.kv_context.region_epoch().conf_ver(); + info.version = context.kv_context.region_epoch().version(); + auto current_region = context.ch_context.getTMTContext().getRegionTable().getRegionById(id, info.region_id); + if (!current_region) + { + return false; + } + info.range_in_table = current_region->getHandleRangeByTable(id); + query_info.mvcc_query_info->regions_query_info.push_back(info); + query_info.mvcc_query_info->concurrent = 0.0; + pipeline.streams = storage->read(required_columns, query_info, context.ch_context, from_stage, max_block_size, max_streams); + /// Set the limits and quota for reading data, the speed and time of the query. + { + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); + limits.max_execution_time = settings.max_execution_time; + limits.timeout_overflow_mode = settings.timeout_overflow_mode; - /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, + /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, * because the initiating server has a summary of the execution of the request on all servers. * * But limits on data size to read and maximum execution time are reasonable to check both on initiator and * additionally on each remote server, because these limits are checked per block of data processed, * and remote servers may process way more blocks of data than are received by initiator. */ - limits.min_execution_speed = settings.min_execution_speed; - limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; - - QuotaForIntervals & quota = context.ch_context.getQuota(); - - pipeline.transform([&](auto & stream) - { - if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) - { - p_stream->setLimits(limits); - p_stream->setQuota(quota); - } - }); - } - return true; - } + limits.min_execution_speed = settings.min_execution_speed; + limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; - //todo return the error message - bool InterpreterDAGRequest::buildPlan(Pipeline & pipeline) { - // step 1. build table scan - if(!buildTSPlan(dag_request.executors(ts_index).tbl_scan(), pipeline)) { - return false; - } - // step 2. build selection if needed - if(sel_index != -1) { - if(buildSelPlan(dag_request.executors(sel_index).selection(), pipeline)) { - return false; + QuotaForIntervals & quota = context.ch_context.getQuota(); + + pipeline.transform([&](auto & stream) { + if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + { + p_stream->setLimits(limits); + p_stream->setQuota(quota); } + }); + } + ColumnsWithTypeAndName columnsWithTypeAndName = pipeline.firstStream()->getHeader().getColumnsWithTypeAndName(); + source_columns = storage->getColumns().getAllPhysical(); + return true; +} + +InterpreterDAGRequest::AnalysisResult InterpreterDAGRequest::analyzeExpressions() +{ + AnalysisResult res; + ExpressionActionsChain chain; + res.need_aggregate = dag_query_info.has_aggregation(); + DAGExpressionAnalyzer expressionAnalyzer(source_columns, context.ch_context); + if (dag_query_info.has_selection()) + { + if (expressionAnalyzer.appendWhere(chain, dag_query_info.get_sel(), res.filter_column_name)) + { + res.has_where = true; + res.before_where = chain.getLastActions(); + res.filter_column_name = chain.steps.back().required_output[0]; + chain.addStep(); } - // step 3. build agg if needed - if(agg_index != -1) { - return false; - } - // step 3. build order by if needed - if(order_index != -1) { - return false; - } - // step 3. build limit if needed - if(limit_index != -1) { - return false; - } - return true; - } - - BlockIO InterpreterDAGRequest::execute() { - Pipeline pipeline; - buildPlan(pipeline); - // add final project - auto stream_before_project = pipeline.firstStream(); - auto columns = stream_before_project->getHeader(); - NamesAndTypesList input_column; - for(auto column : columns.getColumnsWithTypeAndName()) { - input_column.emplace_back(column.name, column.type); - } - ExpressionActionsPtr project = std::make_shared(input_column, context.ch_context.getSettingsRef()); - project->add(ExpressionAction::project(final_project)); - auto final_stream = std::make_shared(stream_before_project, project); - BlockIO res; - res.in = final_stream; - return res; } + if (res.need_aggregate) + { + throw Exception("agg not supported"); + } + if (dag_query_info.has_topN()) + { + res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag_query_info.get_topN(), res.order_column_names); + } + // append final project results + for (auto & name : final_project) + { + chain.steps.back().required_output.push_back(name.first); + } + res.before_order_and_select = chain.getLastActions(); + chain.finalize(); + chain.clear(); + //todo need call prependProjectInput?? + return res; +} + +void InterpreterDAGRequest::executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column) +{ + pipeline.transform( + [&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr, filter_column); }); +} + +void InterpreterDAGRequest::executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr) +{ + if (expressionActionsPtr->getActions().size() > 0) + { + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr); }); + } +} + +SortDescription InterpreterDAGRequest::getSortDescription(Strings & order_column_names) +{ + // construct SortDescription + SortDescription order_descr; + const tipb::TopN & topN = dag_query_info.get_topN(); + order_descr.reserve(topN.order_by_size()); + for (int i = 0; i < topN.order_by_size(); i++) + { + String name = order_column_names[i]; + int direction = topN.order_by(i).desc() ? -1 : 1; + // todo get this information from DAGRequest + // currently use NULLS LAST + int nulls_direction = direction; + // todo get this information from DAGRequest + // currently use the defalut value + std::shared_ptr collator; + + order_descr.emplace_back(name, direction, nulls_direction, collator); + } + return order_descr; +} + +void InterpreterDAGRequest::executeUnion(Pipeline & pipeline) +{ + if (pipeline.hasMoreThanOneStream()) + { + pipeline.firstStream() = std::make_shared>(pipeline.streams, nullptr, max_streams); + pipeline.streams.resize(1); + } +} + +void InterpreterDAGRequest::executeOrder(Pipeline & pipeline, Strings & order_column_names) +{ + SortDescription order_descr = getSortDescription(order_column_names); + const Settings & settings = context.ch_context.getSettingsRef(); + Int64 limit = dag_query_info.get_topN().limit(); + + pipeline.transform([&](auto & stream) { + auto sorting_stream = std::make_shared(stream, order_descr, limit); + + /// Limits on sorting + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode); + sorting_stream->setLimits(limits); + + stream = sorting_stream; + }); + + /// If there are several streams, we merge them into one + executeUnion(pipeline); + + /// Merge the sorted blocks. + pipeline.firstStream() = std::make_shared(pipeline.firstStream(), order_descr, settings.max_block_size, + limit, settings.max_bytes_before_external_sort, context.ch_context.getTemporaryPath()); +} + +//todo return the error message +bool InterpreterDAGRequest::executeImpl(Pipeline & pipeline) +{ + if (!executeTS(dag_query_info.get_ts(), pipeline)) + { + return false; + } + + auto res = analyzeExpressions(); + // execute selection + if (res.has_where) + { + executeWhere(pipeline, res.before_where, res.filter_column_name); + } + if (res.need_aggregate) + { + // execute aggregation + throw Exception("agg not supported"); + } + else + { + executeExpression(pipeline, res.before_order_and_select); + } + + if (res.has_order_by) + { + // execute topN + executeOrder(pipeline, res.order_column_names); + } + + // execute projection + executeFinalProject(pipeline); + + // execute limit + if (dag_query_info.has_limit() && !dag_query_info.has_topN()) + { + executeLimit(pipeline); + } + return true; +} + +void InterpreterDAGRequest::executeFinalProject(Pipeline & pipeline) +{ + auto columns = pipeline.firstStream()->getHeader(); + NamesAndTypesList input_column; + for (auto column : columns.getColumnsWithTypeAndName()) + { + input_column.emplace_back(column.name, column.type); + } + ExpressionActionsPtr project = std::make_shared(input_column, context.ch_context.getSettingsRef()); + project->add(ExpressionAction::project(final_project)); + // add final project + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, project); }); +} + +void InterpreterDAGRequest::executeLimit(Pipeline & pipeline) +{ + pipeline.transform( + [&](auto & stream) { stream = std::make_shared(stream, dag_query_info.get_limit().limit(), 0, false); }); + if (pipeline.hasMoreThanOneStream()) + { + executeUnion(pipeline); + pipeline.transform( + [&](auto & stream) { stream = std::make_shared(stream, dag_query_info.get_limit().limit(), 0, false); }); + } +} + +BlockIO InterpreterDAGRequest::execute() +{ + Pipeline pipeline; + executeImpl(pipeline); + executeUnion(pipeline); + + BlockIO res; + res.in = pipeline.firstStream(); + return res; } +} // namespace DB diff --git a/dbms/src/Interpreters/InterpreterDAGRequest.h b/dbms/src/Interpreters/InterpreterDAGRequest.h index 13a542b597a..7cfe18c9374 100644 --- a/dbms/src/Interpreters/InterpreterDAGRequest.h +++ b/dbms/src/Interpreters/InterpreterDAGRequest.h @@ -2,23 +2,26 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include #include +#include #pragma GCC diagnostic pop -#include #include +#include #include +#include #include #include -namespace DB { +namespace DB +{ /** build ch plan from dag request: dag executors -> ch plan */ -class InterpreterDAGRequest : public IInterpreter { +class InterpreterDAGRequest : public IInterpreter +{ public: - InterpreterDAGRequest(CoprocessorContext & context_, const tipb::DAGRequest & dag_request); + InterpreterDAGRequest(CoprocessorContext & context_, DAGQueryInfo & dag_query_info); ~InterpreterDAGRequest() = default; @@ -26,13 +29,11 @@ class InterpreterDAGRequest : public IInterpreter { private: CoprocessorContext & context; - const tipb::DAGRequest & dag_request; NamesWithAliases final_project; - Int32 ts_index = -1; - Int32 sel_index = -1; - Int32 agg_index = -1; - Int32 order_index = -1; - Int32 limit_index = -1; + DAGQueryInfo & dag_query_info; + NamesAndTypesList source_columns; + size_t max_streams = 1; + struct Pipeline { BlockInputStreams streams; @@ -46,15 +47,35 @@ class InterpreterDAGRequest : public IInterpreter { transform(stream); } - bool hasMoreThanOneStream() const - { - return streams.size() > 1; - } + bool hasMoreThanOneStream() const { return streams.size() > 1; } }; - bool buildPlan(Pipeline & streams); - bool buildTSPlan(const tipb::TableScan & ts, Pipeline & streams); - bool buildSelPlan(const tipb::Selection & sel, Pipeline & streams); + struct AnalysisResult + { + bool has_where = false; + bool need_aggregate = false; + bool has_order_by = false; + + ExpressionActionsPtr before_where; + ExpressionActionsPtr before_aggregation; + ExpressionActionsPtr before_order_and_select; + ExpressionActionsPtr final_projection; + + String filter_column_name; + Strings order_column_names; + /// Columns from the SELECT list, before renaming them to aliases. + Names selected_columns; + }; + bool executeImpl(Pipeline & pipeline); + bool executeTS(const tipb::TableScan & ts, Pipeline & pipeline); + void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column); + void executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr); + void executeOrder(Pipeline & pipeline, Strings & order_column_names); + void executeUnion(Pipeline & pipeline); + void executeLimit(Pipeline & pipeline); + void executeFinalProject(Pipeline & pipeline); + SortDescription getSortDescription(Strings & order_column_names); + AnalysisResult analyzeExpressions(); }; -} +} // namespace DB diff --git a/dbms/src/Server/cop_test.cpp b/dbms/src/Server/cop_test.cpp index 13559193ad0..d039d90465d 100644 --- a/dbms/src/Server/cop_test.cpp +++ b/dbms/src/Server/cop_test.cpp @@ -1,50 +1,57 @@ +#include #include + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include +#include #include +#include #include -#include #pragma GCC diagnostic pop -#include + #include using ChannelPtr = std::shared_ptr; using SubPtr = std::shared_ptr; static const int DAGREQUEST = 103; -class FlashClient { +class FlashClient +{ private: SubPtr sp; + public: - FlashClient(ChannelPtr cp) : sp(tikvpb::Tikv::NewStub(cp)){ - } - grpc::Status coprocessor(coprocessor::Request* rqst) { + FlashClient(ChannelPtr cp) : sp(tikvpb::Tikv::NewStub(cp)) {} + grpc::Status coprocessor(coprocessor::Request * rqst) + { grpc::ClientContext clientContext; - clientContext.AddMetadata("user_name",""); - clientContext.AddMetadata("builder_version","v2"); + clientContext.AddMetadata("user_name", ""); + clientContext.AddMetadata("builder_version", "v2"); coprocessor::Response response; grpc::Status status = sp->Coprocessor(&clientContext, *rqst, &response); size_t column_num = 3; - if(status.ok()) { + if (status.ok()) + { // if status is ok, try to decode the result tipb::SelectResponse selectResponse; - if(selectResponse.ParseFromString(response.data())) { - for(tipb::Chunk chunk : selectResponse.chunks()) { + if (selectResponse.ParseFromString(response.data())) + { + for (tipb::Chunk chunk : selectResponse.chunks()) + { size_t cursor = 0; std::vector row_result; - const std::string &data = chunk.rows_data(); - while (cursor < data.size()) { + const std::string & data = chunk.rows_data(); + while (cursor < data.size()) + { row_result.push_back(DB::DecodeDatum(cursor, data)); - if(row_result.size() == column_num) { + if (row_result.size() == column_num) + { //print the result - std::cout << row_result[0].get() - << " "<< row_result[1].get() - << " "<< row_result[2].get() << std::endl; + std::cout << row_result[0].get() << " " << row_result[1].get() << " " + << row_result[2].get() << std::endl; row_result.clear(); } } - } } } @@ -53,15 +60,16 @@ class FlashClient { }; using ClientPtr = std::shared_ptr; -grpc::Status rpcTest() { - ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); +grpc::Status rpcTest() +{ + ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); ClientPtr clientPtr = std::make_shared(cp); // construct a dag request tipb::DAGRequest dagRequest; dagRequest.set_start_ts(18446744073709551615uL); - tipb::Executor *executor = dagRequest.add_executors(); + tipb::Executor * executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeTableScan); - tipb::TableScan *ts = executor->mutable_tbl_scan(); + tipb::TableScan * ts = executor->mutable_tbl_scan(); ts->set_table_id(41); tipb::ColumnInfo * ci = ts->add_columns(); ci->set_column_id(1); @@ -70,30 +78,48 @@ grpc::Status rpcTest() { dagRequest.add_output_offsets(1); dagRequest.add_output_offsets(0); dagRequest.add_output_offsets(1); - /* executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeSelection); - tipb::Selection *selection = executor->mutable_selection(); - tipb::Expr *expr = selection->add_conditions(); + tipb::Selection * selection = executor->mutable_selection(); + tipb::Expr * expr = selection->add_conditions(); expr->set_tp(tipb::ExprType::ScalarFunc); expr->set_sig(tipb::ScalarFuncSig::LTInt); - tipb::Expr *col = expr->add_children(); - tipb::Expr *value = expr->add_children(); + tipb::Expr * col = expr->add_children(); + tipb::Expr * value = expr->add_children(); col->set_tp(tipb::ExprType::ColumnRef); std::stringstream ss; DB::EncodeNumber(2, ss); col->set_val(ss.str()); value->set_tp(tipb::ExprType::Int64); ss.str(""); - DB::EncodeNumber(289,ss); + DB::EncodeNumber(123, ss); value->set_val(std::string(ss.str())); - */ + + // topn + executor = dagRequest.add_executors(); + executor->set_tp(tipb::ExecType::TypeTopN); + tipb::TopN * topN = executor->mutable_topn(); + topN->set_limit(3); + tipb::ByItem * byItem = topN->add_order_by(); + byItem->set_desc(true); + tipb::Expr * expr1 = byItem->mutable_expr(); + expr1->set_tp(tipb::ExprType::ColumnRef); + ss.str(""); + DB::EncodeNumber(2, ss); + expr1->set_val(ss.str()); + // limit + /* + executor = dagRequest.add_executors(); + executor->set_tp(tipb::ExecType::TypeLimit); + tipb::Limit *limit = executor->mutable_limit(); + limit->set_limit(1); + */ // construct a coprocessor request coprocessor::Request request; //todo add context info - kvrpcpb::Context *ctx = request.mutable_context(); + kvrpcpb::Context * ctx = request.mutable_context(); ctx->set_region_id(2); auto region_epoch = ctx->mutable_region_epoch(); region_epoch->set_version(20); @@ -104,7 +130,8 @@ grpc::Status rpcTest() { return clientPtr->coprocessor(&request); } -void codecTest() { +void codecTest() +{ Int64 i = 123; std::stringstream ss; DB::EncodeNumber(i, ss); @@ -116,12 +143,13 @@ void codecTest() { r++; } -int main() { -// std::cout << "Before rpcTest"<< std::endl; +int main() +{ + // std::cout << "Before rpcTest"<< std::endl; grpc::Status ret = rpcTest(); -// codecTest(); -// std::cout << "End rpcTest " << std::endl; -// std::cout << "The ret is " << ret.error_code() << " " << ret.error_details() -// << " " << ret.error_message() << std::endl; + // codecTest(); + // std::cout << "End rpcTest " << std::endl; + // std::cout << "The ret is " << ret.error_code() << " " << ret.error_details() + // << " " << ret.error_message() << std::endl; return 0; } diff --git a/dbms/src/Storages/Transaction/TypeMapping.cpp b/dbms/src/Storages/Transaction/TypeMapping.cpp index 91161b787a4..706f98322f7 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.cpp +++ b/dbms/src/Storages/Transaction/TypeMapping.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -85,15 +86,27 @@ DataTypePtr TypeMapping::getUnsigned(const ColumnInfo & column_info) return unsigned_type_map[column_info.tp](column_info); } -TiDB::CodecFlag TypeMapping::getCodecFlag(const DB::DataTypePtr & dataTypePtr) { +TiDB::CodecFlag TypeMapping::getCodecFlag(const DB::DataTypePtr & dataTypePtr) +{ // fixme: String's CodecFlag will be CodecFlagCompactBytes, which is wrong for Json type return codec_flag_map[dataTypePtr->getFamilyName()]; } -TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & dataTypePtr) { +TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & dataTypePtr) +{ return TypeMapping::instance().getCodecFlag(dataTypePtr); } +DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type) +{ + ColumnInfo mock_ci; + mock_ci.tp = static_cast(field_type.tp()); + mock_ci.flag = field_type.flag(); + mock_ci.flen = field_type.flen(); + mock_ci.decimal = field_type.decimal(); + return getDataTypeByColumnInfo(mock_ci); +} + DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info) { DataTypePtr base; diff --git a/dbms/src/Storages/Transaction/TypeMapping.h b/dbms/src/Storages/Transaction/TypeMapping.h index d8b2fc32357..db05d27ff84 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.h +++ b/dbms/src/Storages/Transaction/TypeMapping.h @@ -1,5 +1,10 @@ #pragma once +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop + #include #include @@ -11,6 +16,8 @@ using ColumnInfo = TiDB::ColumnInfo; DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info); +DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type); + TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & dataTypePtr); } From bed0bd4cd5ae1f58d7c35f6fc8e3e4e4949287c3 Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 2 Aug 2019 14:00:06 +0800 Subject: [PATCH 10/79] merge pingcap/cop branch --- contrib/tipb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/tipb b/contrib/tipb index b2d318af5e8..3a69b884cc9 160000 --- a/contrib/tipb +++ b/contrib/tipb @@ -1 +1 @@ -Subproject commit b2d318af5e8af28f54a2c6422bc18631f65a8506 +Subproject commit 3a69b884cc9793da55d7d4ef38dc79459d17583f From 526cad94a535e8f4c3dae67fdd23d8c5e9600214 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Mon, 5 Aug 2019 01:31:07 +0800 Subject: [PATCH 11/79] Code reorg --- dbms/CMakeLists.txt | 3 +- dbms/src/CMakeLists.txt | 3 +- dbms/src/Coprocessor/CoprocessorHandler.cpp | 63 -------------- dbms/src/Coprocessor/CoprocessorHandler.h | 42 ---------- dbms/src/DataStreams/DAGBlockOutputStream.cpp | 84 +++++++++++++++++++ ...kOutputStream.h => DAGBlockOutputStream.h} | 24 +++--- .../DataStreams/TidbCopBlockOutputStream.cpp | 74 ---------------- dbms/src/Flash/CMakeLists.txt | 3 + .../Flash/Coprocessor/CoprocessorHandler.cpp | 56 +++++++++++++ .../Flash/Coprocessor/CoprocessorHandler.h | 56 +++++++++++++ dbms/src/Flash/Coprocessor/DAGDriver.cpp | 61 ++++++++++++++ dbms/src/Flash/Coprocessor/DAGDriver.h | 34 ++++++++ .../Flash/Coprocessor/tests/CMakeLists.txt | 4 + .../Coprocessor/tests}/cop_test.cpp | 0 dbms/src/{Server => Flash}/FlashService.cpp | 81 ++++++++++-------- dbms/src/{Server => Flash}/FlashService.h | 11 ++- dbms/src/Interpreters/ClientInfo.h | 2 +- .../Interpreters/DAGExpressionAnalyzer.cpp | 2 +- dbms/src/Interpreters/DAGExpressionAnalyzer.h | 2 +- .../{DAGQueryInfo.cpp => DAGQuerySource.cpp} | 36 ++++---- .../{DAGQueryInfo.h => DAGQuerySource.h} | 38 ++++++--- dbms/src/Interpreters/DAGStringConverter.cpp | 14 ++-- dbms/src/Interpreters/DAGStringConverter.h | 36 ++++---- ...processorBuilderUtils.cpp => DAGUtils.cpp} | 2 +- .../{CoprocessorBuilderUtils.h => DAGUtils.h} | 0 dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 - .../{IQueryInfo.h => IQuerySource.h} | 13 ++- ...reterDAGRequest.cpp => InterpreterDAG.cpp} | 83 +++++++++--------- ...terpreterDAGRequest.h => InterpreterDAG.h} | 20 +++-- dbms/src/Interpreters/SQLQuerySource.cpp | 36 ++++++++ dbms/src/Interpreters/SQLQuerySource.h | 29 +++++++ dbms/src/Interpreters/StringQueryInfo.cpp | 32 ------- dbms/src/Interpreters/StringQueryInfo.h | 32 ------- dbms/src/Interpreters/executeQuery.cpp | 53 ++++++------ dbms/src/Interpreters/executeQuery.h | 11 ++- dbms/src/Parsers/ASTSelectQuery.h | 1 - dbms/src/Server/CMakeLists.txt | 7 +- dbms/src/Server/Server.cpp | 30 +++---- 38 files changed, 616 insertions(+), 465 deletions(-) delete mode 100644 dbms/src/Coprocessor/CoprocessorHandler.cpp delete mode 100644 dbms/src/Coprocessor/CoprocessorHandler.h create mode 100644 dbms/src/DataStreams/DAGBlockOutputStream.cpp rename dbms/src/DataStreams/{TidbCopBlockOutputStream.h => DAGBlockOutputStream.h} (55%) delete mode 100644 dbms/src/DataStreams/TidbCopBlockOutputStream.cpp create mode 100644 dbms/src/Flash/CMakeLists.txt create mode 100644 dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp create mode 100644 dbms/src/Flash/Coprocessor/CoprocessorHandler.h create mode 100644 dbms/src/Flash/Coprocessor/DAGDriver.cpp create mode 100644 dbms/src/Flash/Coprocessor/DAGDriver.h create mode 100644 dbms/src/Flash/Coprocessor/tests/CMakeLists.txt rename dbms/src/{Server => Flash/Coprocessor/tests}/cop_test.cpp (100%) rename dbms/src/{Server => Flash}/FlashService.cpp (68%) rename dbms/src/{Server => Flash}/FlashService.h (80%) rename dbms/src/Interpreters/{DAGQueryInfo.cpp => DAGQuerySource.cpp} (55%) rename dbms/src/Interpreters/{DAGQueryInfo.h => DAGQuerySource.h} (71%) rename dbms/src/Interpreters/{CoprocessorBuilderUtils.cpp => DAGUtils.cpp} (99%) rename dbms/src/Interpreters/{CoprocessorBuilderUtils.h => DAGUtils.h} (100%) rename dbms/src/Interpreters/{IQueryInfo.h => IQuerySource.h} (51%) rename dbms/src/Interpreters/{InterpreterDAGRequest.cpp => InterpreterDAG.cpp} (78%) rename dbms/src/Interpreters/{InterpreterDAGRequest.h => InterpreterDAG.h} (84%) create mode 100644 dbms/src/Interpreters/SQLQuerySource.cpp create mode 100644 dbms/src/Interpreters/SQLQuerySource.h delete mode 100644 dbms/src/Interpreters/StringQueryInfo.cpp delete mode 100644 dbms/src/Interpreters/StringQueryInfo.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index d2f4f62f4b2..af2eda4461a 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -60,7 +60,8 @@ add_headers_and_sources(dbms src/Storages/Page) add_headers_and_sources(dbms src/Raft) add_headers_and_sources(dbms src/TiDB) add_headers_and_sources(dbms src/Client) -add_headers_and_sources(dbms src/Coprocessor) +add_headers_and_sources(dbms src/Flash) +add_headers_and_sources(dbms src/Flash/Coprocessor) add_headers_only(dbms src/Server) list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) diff --git a/dbms/src/CMakeLists.txt b/dbms/src/CMakeLists.txt index 7ee6bf08cc6..4e8ddf750d9 100644 --- a/dbms/src/CMakeLists.txt +++ b/dbms/src/CMakeLists.txt @@ -13,4 +13,5 @@ add_subdirectory (AggregateFunctions) add_subdirectory (Server) add_subdirectory (Client) add_subdirectory (TableFunctions) -add_subdirectory (Analyzers) \ No newline at end of file +add_subdirectory (Analyzers) +add_subdirectory (Flash) diff --git a/dbms/src/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Coprocessor/CoprocessorHandler.cpp deleted file mode 100644 index 272a28111cb..00000000000 --- a/dbms/src/Coprocessor/CoprocessorHandler.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -CoprocessorHandler::CoprocessorHandler(const coprocessor::Request * cop_request_, coprocessor::Response * cop_response_, CoprocessorContext & context_) - : cop_request(cop_request_), cop_response(cop_response_), context(context_) -{ - if(!dag_request.ParseFromString(cop_request->data())) { - throw Exception("Could not extract dag request from coprocessor request"); - } -} - -CoprocessorHandler::~CoprocessorHandler() -{ -} - -BlockIO CoprocessorHandler::buildCHPlan() { - String builder_version = context.ch_context.getSettings().coprocessor_plan_builder_version; - if(builder_version == "v1") { - DAGStringConverter converter(context, dag_request); - String query = converter.buildSqlString(); - if(query.empty()) { - return BlockIO(); - } - return executeQuery(query, context.ch_context, false, QueryProcessingStage::Complete); - } else if (builder_version == "v2"){ - return executeQuery(dag_request, context, QueryProcessingStage::Complete); - } else { - throw Exception("coprocessor plan builder version should be set to v1 or v2"); - } -} - -bool CoprocessorHandler::execute() { - context.ch_context.setSetting("read_tso", UInt64(dag_request.start_ts())); - BlockIO streams = buildCHPlan(); - if(!streams.in || streams.out) { - // only query is allowed, so streams.in must not be null and streams.out must be null - return false; - } - tipb::SelectResponse select_response; - BlockOutputStreamPtr outputStreamPtr = std::make_shared( - &select_response, context.ch_context.getSettings().records_per_chunk, dag_request.encode_type(), streams.in->getHeader() - ); - copyData(*streams.in, *outputStreamPtr); - cop_response->set_data(select_response.SerializeAsString()); - return true; -} - -} - diff --git a/dbms/src/Coprocessor/CoprocessorHandler.h b/dbms/src/Coprocessor/CoprocessorHandler.h deleted file mode 100644 index 3a61233a939..00000000000 --- a/dbms/src/Coprocessor/CoprocessorHandler.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#include -#pragma GCC diagnostic pop - -#include -#include - -namespace DB { - -struct CoprocessorContext { - Context & ch_context; - const kvrpcpb::Context & kv_context; - grpc::ServerContext & grpc_server_context; - CoprocessorContext(Context & ch_context_, const kvrpcpb::Context & kv_context_, - grpc::ServerContext & grpc_server_context_) - : ch_context(ch_context_), kv_context(kv_context_), grpc_server_context(grpc_server_context_) { - } -}; - -/** handle coprocesssor request, this is used by tiflash coprocessor. - */ -class CoprocessorHandler { -public: - CoprocessorHandler(const coprocessor::Request *cop_request, coprocessor::Response *response, CoprocessorContext &context); - - ~CoprocessorHandler(); - - bool execute(); - -private: - BlockIO buildCHPlan(); - const coprocessor::Request *cop_request; - coprocessor::Response *cop_response; - CoprocessorContext &context; - tipb::DAGRequest dag_request; - -}; -} diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.cpp b/dbms/src/DataStreams/DAGBlockOutputStream.cpp new file mode 100644 index 00000000000..683e0c27809 --- /dev/null +++ b/dbms/src/DataStreams/DAGBlockOutputStream.cpp @@ -0,0 +1,84 @@ + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int UNSUPPORTED_PARAMETER; +} + +struct TypeMapping; + +DAGBlockOutputStream::DAGBlockOutputStream( + tipb::SelectResponse & dag_response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, Block header_) + : dag_response(dag_response_), records_per_chunk(records_per_chunk_), encodeType(encodeType_), header(header_) +{ + if (encodeType == tipb::EncodeType::TypeArrow) + { + throw Exception("Encode type TypeArrow is not supported yet in DAGBlockOutputStream.", ErrorCodes::UNSUPPORTED_PARAMETER); + } + current_chunk = nullptr; + current_records_num = 0; + total_rows = 0; +} + + +void DAGBlockOutputStream::writePrefix() +{ + //something to do here? +} + +void DAGBlockOutputStream::writeSuffix() +{ + // error handle, + if (current_chunk != nullptr && records_per_chunk > 0) + { + current_chunk->set_rows_data(current_ss.str()); + } +} + + +void DAGBlockOutputStream::write(const Block & block) +{ + // Encode data to chunk + size_t rows = block.rows(); + for (size_t i = 0; i < rows; i++) + { + if (current_chunk == nullptr || current_records_num >= records_per_chunk) + { + if (current_chunk) + { + // set the current ss to current chunk + current_chunk->set_rows_data(current_ss.str()); + } + current_chunk = dag_response.add_chunks(); + current_ss.str(""); + records_per_chunk = 0; + } + for (size_t j = 0; j < block.columns(); j++) + { + auto field = (*block.getByPosition(j).column.get())[i]; + const DataTypePtr & data_type = block.getByPosition(j).type; + if (data_type->isNullable()) + { + const DataTypePtr nested = dynamic_cast(data_type.get())->getNestedType(); + EncodeDatum(field, getCodecFlagByDataType(nested), current_ss); + } + else + { + EncodeDatum(field, getCodecFlagByDataType(block.getByPosition(j).type), current_ss); + } + } + // Encode current row + records_per_chunk++; + total_rows++; + } +} + +} // namespace DB diff --git a/dbms/src/DataStreams/TidbCopBlockOutputStream.h b/dbms/src/DataStreams/DAGBlockOutputStream.h similarity index 55% rename from dbms/src/DataStreams/TidbCopBlockOutputStream.h rename to dbms/src/DataStreams/DAGBlockOutputStream.h index 731cba83d89..e14fbdc929a 100644 --- a/dbms/src/DataStreams/TidbCopBlockOutputStream.h +++ b/dbms/src/DataStreams/DAGBlockOutputStream.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -12,16 +12,13 @@ namespace DB { - - -/** Serializes the stream of blocks in tidb coprocessor format. - * Designed for communication with tidb via coprocessor. - */ -class TidbCopBlockOutputStream : public IBlockOutputStream +/// Serializes the stream of blocks in TiDB DAG response format. +/// TODO: May consider using some parallelism. +/// TODO: Consider using output schema in DAG request, do some conversion or checking between DAG schema and block schema. +class DAGBlockOutputStream : public IBlockOutputStream { public: - TidbCopBlockOutputStream( - tipb::SelectResponse *response, Int64 records_per_chunk, tipb::EncodeType encodeType, Block header); + DAGBlockOutputStream(tipb::SelectResponse & response, Int64 records_per_chunk, tipb::EncodeType encodeType, Block header); Block getHeader() const override { return header; } void write(const Block & block) override; @@ -29,15 +26,16 @@ class TidbCopBlockOutputStream : public IBlockOutputStream void writeSuffix() override; private: - tipb::SelectResponse *response; + tipb::SelectResponse & dag_response; + Int64 records_per_chunk; tipb::EncodeType encodeType; Block header; - tipb::Chunk *current_chunk; + + tipb::Chunk * current_chunk; Int64 current_records_num; std::stringstream current_ss; Int64 total_rows; - }; -} +} // namespace DB diff --git a/dbms/src/DataStreams/TidbCopBlockOutputStream.cpp b/dbms/src/DataStreams/TidbCopBlockOutputStream.cpp deleted file mode 100644 index 5993d0d4443..00000000000 --- a/dbms/src/DataStreams/TidbCopBlockOutputStream.cpp +++ /dev/null @@ -1,74 +0,0 @@ - -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes { - extern const int UNSUPPORTED_PARAMETER; -} - -struct TypeMapping; - -TidbCopBlockOutputStream::TidbCopBlockOutputStream( - tipb::SelectResponse *response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, Block header_) - : response(response_), records_per_chunk(records_per_chunk_), encodeType(encodeType_), header(header_) -{ - if(encodeType == tipb::EncodeType::TypeArrow) { - throw Exception("Encode type TypeArrow is not supported yet in TidbCopBlockOutputStream.", ErrorCodes::UNSUPPORTED_PARAMETER); - } - current_chunk = nullptr; - current_records_num = 0; - total_rows = 0; -} - - -void TidbCopBlockOutputStream::writePrefix() -{ - //something to do here? -} - -void TidbCopBlockOutputStream::writeSuffix() -{ - // error handle, - if(current_chunk != nullptr && records_per_chunk > 0) { - current_chunk->set_rows_data(current_ss.str()); - } -} - - -void TidbCopBlockOutputStream::write(const Block & block) -{ - // encode data to chunk - size_t rows = block.rows(); - for(size_t i = 0; i < rows; i++) { - if(current_chunk == nullptr || current_records_num >= records_per_chunk) { - if(current_chunk) { - // set the current ss to current chunk - current_chunk->set_rows_data(current_ss.str()); - } - current_chunk = response->add_chunks(); - current_ss.str(""); - records_per_chunk = 0; - } - for(size_t j = 0; j < block.columns(); j++) { - auto field = (*block.getByPosition(j).column.get())[i]; - const DataTypePtr & dataTypePtr = block.getByPosition(j).type; - if(dataTypePtr->isNullable()) { - const DataTypePtr real = dynamic_cast(dataTypePtr.get())->getNestedType(); - EncodeDatum(field, getCodecFlagByDataType(real), current_ss); - } else { - EncodeDatum(field, getCodecFlagByDataType(block.getByPosition(j).type), current_ss); - } - } - //encode current row - records_per_chunk++; - total_rows++; - } -} - -} diff --git a/dbms/src/Flash/CMakeLists.txt b/dbms/src/Flash/CMakeLists.txt new file mode 100644 index 00000000000..16b3a6d519b --- /dev/null +++ b/dbms/src/Flash/CMakeLists.txt @@ -0,0 +1,3 @@ +if (ENABLE_TESTS) + add_subdirectory (Coprocessor/tests) +endif () diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp new file mode 100644 index 00000000000..b39d1339869 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp @@ -0,0 +1,56 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +CoprocessorHandler::CoprocessorHandler( + CoprocessorContext & cop_context_, const coprocessor::Request * cop_request_, coprocessor::Response * cop_response_) + : cop_context(cop_context_), cop_request(cop_request_), cop_response(cop_response_), log(&Logger::get("CoprocessorHandler")) +{} + +CoprocessorHandler::~CoprocessorHandler() {} + +bool CoprocessorHandler::execute() +{ + switch (cop_request->tp()) + { + case REQ_TYPE_DAG: + { + tipb::DAGRequest dag_request; + dag_request.ParseFromString(cop_request->data()); + tipb::SelectResponse dag_response; + DAGDriver driver(cop_context.db_context, dag_request, cop_context.kv_context.region_id(), + cop_context.kv_context.region_epoch().version(), cop_context.kv_context.region_epoch().conf_ver(), dag_response); + if (driver.execute()) + { + cop_response->set_data(dag_response.SerializeAsString()); + return true; + } + return false; + } + case REQ_TYPE_ANALYZE: + case REQ_TYPE_CHECKSUM: + default: + LOG_ERROR(log, "Flash service Coprocessor other than dag request not implement yet"); + // return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, "Only DAG request is supported"); + return false; + } + + return true; +} + +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.h b/dbms/src/Flash/Coprocessor/CoprocessorHandler.h new file mode 100644 index 00000000000..c704c0ede36 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/CoprocessorHandler.h @@ -0,0 +1,56 @@ +#pragma once + +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#include +#pragma GCC diagnostic pop + +#include +#include + +namespace DB +{ + +struct CoprocessorContext +{ + Context & db_context; + const kvrpcpb::Context & kv_context; + grpc::ServerContext & grpc_server_context; + + CoprocessorContext(Context & db_context_, const kvrpcpb::Context & kv_context_, grpc::ServerContext & grpc_server_context_) + : db_context(db_context_), kv_context(kv_context_), grpc_server_context(grpc_server_context_) + {} +}; + +/// Coprocessor request handler, deals with: +/// 1. DAG request: WIP; +/// 2. Analyze request: NOT IMPLEMENTED; +/// 3. Checksum request: NOT IMPLEMENTED; +class CoprocessorHandler +{ +public: + CoprocessorHandler(CoprocessorContext & cop_context_, const coprocessor::Request * cop_request_, coprocessor::Response * response_); + + ~CoprocessorHandler(); + + bool execute(); + +protected: + enum + { + REQ_TYPE_DAG = 103, + REQ_TYPE_ANALYZE = 104, + REQ_TYPE_CHECKSUM = 105, + }; + + CoprocessorContext & cop_context; + const coprocessor::Request * cop_request; + coprocessor::Response * cop_response; + + Logger * log; +}; + +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp new file mode 100644 index 00000000000..3f51e93f8e5 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, + UInt64 region_conf_version_, tipb::SelectResponse & dag_response_) + : context(context_), + dag_request(dag_request_), + region_id(region_id_), + region_version(region_version_), + region_conf_version(region_conf_version_), + dag_response(dag_response_) +{} + +bool DAGDriver::execute() +{ + context.setSetting("read_tso", UInt64(dag_request.start_ts())); + BlockIO streams = executeDAG(); + if (!streams.in || streams.out) + { + // only query is allowed, so streams.in must not be null and streams.out must be null + return false; + } + BlockOutputStreamPtr outputStreamPtr = std::make_shared( + dag_response, context.getSettings().records_per_chunk, dag_request.encode_type(), streams.in->getHeader()); + copyData(*streams.in, *outputStreamPtr); + return true; +} + +BlockIO DAGDriver::executeDAG() +{ + String builder_version = context.getSettings().coprocessor_plan_builder_version; + if (builder_version == "v1") + { + DAGStringConverter converter(context, dag_request); + String query = converter.buildSqlString(); + if (query.empty()) + { + return BlockIO(); + } + return executeQuery(query, context, false, QueryProcessingStage::Complete); + } + else if (builder_version == "v2") + { + return executeQuery(dag_request, region_id, region_version, region_conf_version, context, QueryProcessingStage::Complete); + } + else + { + throw Exception("coprocessor plan builder version should be set to v1 or v2"); + } +} + +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.h b/dbms/src/Flash/Coprocessor/DAGDriver.h new file mode 100644 index 00000000000..89f01e140a3 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/DAGDriver.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class Context; + +/// An abstraction of driver running DAG request. +/// Now is a naive native executor. Might get evolved to drive MPP-like computation. +class DAGDriver +{ +public: + DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, + UInt64 region_conf_version_, tipb::SelectResponse & dag_response_); + bool execute(); + +private: + BlockIO executeDAG(); + +private: + Context & context; + + const tipb::DAGRequest & dag_request; + + RegionID region_id; + UInt64 region_version; + UInt64 region_conf_version; + + tipb::SelectResponse & dag_response; +}; +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/tests/CMakeLists.txt b/dbms/src/Flash/Coprocessor/tests/CMakeLists.txt new file mode 100644 index 00000000000..c236d367c5d --- /dev/null +++ b/dbms/src/Flash/Coprocessor/tests/CMakeLists.txt @@ -0,0 +1,4 @@ +include_directories (${CMAKE_CURRENT_BINARY_DIR}) + +add_executable (cop_test cop_test.cpp) +target_link_libraries (cop_test dbms) diff --git a/dbms/src/Server/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp similarity index 100% rename from dbms/src/Server/cop_test.cpp rename to dbms/src/Flash/Coprocessor/tests/cop_test.cpp diff --git a/dbms/src/Server/FlashService.cpp b/dbms/src/Flash/FlashService.cpp similarity index 68% rename from dbms/src/Server/FlashService.cpp rename to dbms/src/Flash/FlashService.cpp index b2f65aa17a0..970b46aa638 100644 --- a/dbms/src/Server/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -1,22 +1,16 @@ -#include -#include -#include #include -#include -#include +#include +#include #include #include +#include +#include namespace DB { -const Int64 REQ_TYPE_DAG = 103; -//const Int64 REQ_TYPE_ANALYZE = 104; -//const Int64 REQ_TYPE_CHECKSUM = 105; FlashService::FlashService(const std::string & address_, IServer & server_) - : server(server_), - address(address_), - log(&Logger::get("FlashService")) + : server(server_), address(address_), log(&Logger::get("FlashService")) { grpc::ServerBuilder builder; builder.AddListeningPort(address, grpc::InsecureServerCredentials()); @@ -40,75 +34,90 @@ FlashService::~FlashService() grpc_server->Wait(); } -String getClientMetaVar(grpc::ServerContext * grpc_context, String name, String default_val) { - if(grpc_context->client_metadata().count(name) != 1) { +String getClientMetaVar(grpc::ServerContext * grpc_context, String name, String default_val) +{ + if (grpc_context->client_metadata().count(name) != 1) + { return default_val; - } else { + } + else + { return String(grpc_context->client_metadata().find(name)->second.data()); } } -::grpc::Status setClientInfo(grpc::ServerContext * grpc_context, Context & server_context) { +::grpc::Status setClientInfo(grpc::ServerContext * grpc_context, Context & context) +{ auto client_meta = grpc_context->client_metadata(); String query_id = getClientMetaVar(grpc_context, "query_id", ""); - server_context.setCurrentQueryId(query_id); - ClientInfo & client_info = server_context.getClientInfo(); + context.setCurrentQueryId(query_id); + ClientInfo & client_info = context.getClientInfo(); client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; client_info.interface = ClientInfo::Interface::GRPC; std::string peer = grpc_context->peer(); Int64 pos = peer.find(':'); - if(pos == -1) { + if (pos == -1) + { return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "invalid peer address"); } - std::string client_ip = peer.substr(pos+1); + std::string client_ip = peer.substr(pos + 1); Poco::Net::SocketAddress client_address(client_ip); client_info.current_address = client_address; client_info.current_user = getClientMetaVar(grpc_context, "user", ""); std::string records_per_chunk_str = getClientMetaVar(grpc_context, "records_per_chunk", ""); - if(!records_per_chunk_str.empty()) { - server_context.setSetting("records_per_chunk", records_per_chunk_str); + if (!records_per_chunk_str.empty()) + { + context.setSetting("records_per_chunk", records_per_chunk_str); } std::string builder_version = getClientMetaVar(grpc_context, "builder_version", "v1"); - server_context.setSetting("coprocessor_plan_builder_version", builder_version); + context.setSetting("coprocessor_plan_builder_version", builder_version); return ::grpc::Status::OK; } -grpc::Status FlashService::Coprocessor(grpc::ServerContext * grpc_context, const coprocessor::Request * request, - coprocessor::Response * response) +grpc::Status FlashService::Coprocessor( + grpc::ServerContext * grpc_context, const coprocessor::Request * request, coprocessor::Response * response) { LOG_DEBUG(log, "receive coprocessor request"); LOG_DEBUG(log, request->DebugString()); Context context = server.context(); context.setGlobalContext(server.context()); setClientInfo(grpc_context, context); - if(request->tp() != REQ_TYPE_DAG) { - LOG_ERROR(log, "Flash service Coprocessor other than dag request not implement yet"); - return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, "Only DAG request is supported"); - } - try { + try + { CoprocessorContext cop_context(context, request->context(), *grpc_context); - CoprocessorHandler coprocessorHandler(request, response, cop_context); - if (coprocessorHandler.execute()) { + CoprocessorHandler cop_handler(cop_context, request, response); + if (cop_handler.execute()) + { LOG_DEBUG(log, "Flash service Coprocessor finished"); return ::grpc::Status(::grpc::StatusCode::OK, ""); - } else { + } + else + { LOG_ERROR(log, "Flash service Coprocessor meet internal error"); return ::grpc::Status(::grpc::StatusCode::INTERNAL, ""); } - } catch (LockException & e) { + } + catch (LockException & e) + { //todo set lock error info LOG_ERROR(log, "meet lock exception"); // clear the data to avoid sending partial data response->set_data(""); - } catch (RegionException & e) { + } + catch (RegionException & e) + { // todo set region error info LOG_ERROR(log, "meet region exception"); response->set_data(""); - } catch (Exception & e) { + } + catch (Exception & e) + { // todo return exception message LOG_ERROR(log, "meet unknown exception, errmsg: " + e.message()); response->set_data(""); - } catch (...) { + } + catch (...) + { LOG_ERROR(log, "meet unknown exception"); response->set_data(""); } diff --git a/dbms/src/Server/FlashService.h b/dbms/src/Flash/FlashService.h similarity index 80% rename from dbms/src/Server/FlashService.h rename to dbms/src/Flash/FlashService.h index baeac200657..c0885c9c9a4 100644 --- a/dbms/src/Server/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -1,9 +1,9 @@ #pragma once -#include #include +#include #include -#include "IServer.h" +#include "Server/IServer.h" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include @@ -23,17 +23,16 @@ class FlashService final : public tikvpb::Tikv::Service, public std::enable_shar ~FlashService() final; - grpc::Status Coprocessor(grpc::ServerContext* context, const coprocessor::Request* request, coprocessor::Response* response); -private: + grpc::Status Coprocessor(grpc::ServerContext * context, const coprocessor::Request * request, coprocessor::Response * response); - IServer &server; +private: + IServer & server; std::string address; GRPCServerPtr grpc_server; Logger * log; - }; } // namespace DB diff --git a/dbms/src/Interpreters/ClientInfo.h b/dbms/src/Interpreters/ClientInfo.h index f890c13851b..bf4562bec8f 100644 --- a/dbms/src/Interpreters/ClientInfo.h +++ b/dbms/src/Interpreters/ClientInfo.h @@ -24,7 +24,7 @@ class ClientInfo { TCP = 1, HTTP = 2, - GRPC = 2, + GRPC = 3, }; enum class HTTPMethod : UInt8 diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp index 2cc8ce0b9c2..720f11d85ea 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp @@ -2,8 +2,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.h b/dbms/src/Interpreters/DAGExpressionAnalyzer.h index 6a63600fb12..40e31540b61 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.h +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/dbms/src/Interpreters/DAGQueryInfo.cpp b/dbms/src/Interpreters/DAGQuerySource.cpp similarity index 55% rename from dbms/src/Interpreters/DAGQueryInfo.cpp rename to dbms/src/Interpreters/DAGQuerySource.cpp index ad03da917d6..abbcb1c6559 100644 --- a/dbms/src/Interpreters/DAGQueryInfo.cpp +++ b/dbms/src/Interpreters/DAGQuerySource.cpp @@ -1,17 +1,17 @@ -#include -#include +#include +#include #include namespace DB { -const String DAGQueryInfo::TS_NAME("tablescan"); -const String DAGQueryInfo::SEL_NAME("selection"); -const String DAGQueryInfo::AGG_NAME("aggregation"); -const String DAGQueryInfo::TOPN_NAME("topN"); -const String DAGQueryInfo::LIMIT_NAME("limit"); +const String DAGQuerySource::TS_NAME("tablescan"); +const String DAGQuerySource::SEL_NAME("selection"); +const String DAGQuerySource::AGG_NAME("aggregation"); +const String DAGQuerySource::TOPN_NAME("topN"); +const String DAGQuerySource::LIMIT_NAME("limit"); static void assignOrThrowException(Int32 & index, Int32 value, const String & name) { @@ -22,8 +22,13 @@ static void assignOrThrowException(Int32 & index, Int32 value, const String & na index = value; } -DAGQueryInfo::DAGQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorContext & coprocessorContext_) - : dag_request(dag_request_), coprocessorContext(coprocessorContext_) +DAGQuerySource::DAGQuerySource( + Context & context_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, const tipb::DAGRequest & dag_request_) + : context(context_), + region_id(region_id_), + region_version(region_version_), + region_conf_version(region_conf_version_), + dag_request(dag_request_) { for (int i = 0; i < dag_request.executors_size(); i++) { @@ -50,18 +55,17 @@ DAGQueryInfo::DAGQueryInfo(const tipb::DAGRequest & dag_request_, CoprocessorCon } } -std::tuple DAGQueryInfo::parse(size_t) +std::tuple DAGQuerySource::parse(size_t) { - query = String("cop query"); - ast = std::make_shared(); - ((ASTSelectQuery *)ast.get())->is_fake_sel = true; + auto query = dag_request.DebugString(); + auto ast = std::make_shared(); return std::make_tuple(query, ast); } -String DAGQueryInfo::get_query_ignore_error(size_t) { return query; } +String DAGQuerySource::str(size_t) { return dag_request.DebugString(); } -std::unique_ptr DAGQueryInfo::getInterpreter(Context &, QueryProcessingStage::Enum) +std::unique_ptr DAGQuerySource::interpreter(Context &, QueryProcessingStage::Enum) { - return std::make_unique(coprocessorContext, *this); + return std::make_unique(context, *this); } } // namespace DB diff --git a/dbms/src/Interpreters/DAGQueryInfo.h b/dbms/src/Interpreters/DAGQuerySource.h similarity index 71% rename from dbms/src/Interpreters/DAGQueryInfo.h rename to dbms/src/Interpreters/DAGQuerySource.h index aa2baa833c9..f7225ce46d7 100644 --- a/dbms/src/Interpreters/DAGQueryInfo.h +++ b/dbms/src/Interpreters/DAGQuerySource.h @@ -5,18 +5,20 @@ #include #pragma GCC diagnostic pop -#include #include -#include +#include #include +#include namespace DB { +class Context; + /** DAGQueryInfo for query represented by DAG request. */ -class DAGQueryInfo : public IQueryInfo +class DAGQuerySource : public IQuerySource { public: static const String TS_NAME; @@ -25,11 +27,13 @@ class DAGQueryInfo : public IQueryInfo static const String TOPN_NAME; static const String LIMIT_NAME; - DAGQueryInfo(const tipb::DAGRequest & dag_request, CoprocessorContext & coprocessorContext_); - bool isInternalQuery() { return false; }; - virtual std::tuple parse(size_t max_query_size); - virtual String get_query_ignore_error(size_t max_query_size); - virtual std::unique_ptr getInterpreter(Context & context, QueryProcessingStage::Enum stage); + DAGQuerySource(Context & context_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, + const tipb::DAGRequest & dag_request_); + + virtual std::tuple parse(size_t max_query_size) override; + virtual String str(size_t max_query_size) override; + virtual std::unique_ptr interpreter(Context & context, QueryProcessingStage::Enum stage) override; + void assertValid(Int32 index, const String & name) { if (index < 0 || index > dag_request.executors_size()) @@ -37,10 +41,16 @@ class DAGQueryInfo : public IQueryInfo throw Exception("Access invalid executor: " + name); } } + + RegionID getRegionID() const { return region_id; } + UInt64 getRegionVersion() const { return region_version; } + UInt64 getRegionConfVersion() const { return region_conf_version; } + bool has_selection() { return sel_index != -1; }; bool has_aggregation() { return agg_index != -1; }; bool has_topN() { return order_index != -1; }; bool has_limit() { return order_index == -1 && limit_index != -1; }; + const tipb::TableScan & get_ts() { assertValid(ts_index, TS_NAME); @@ -68,11 +78,15 @@ class DAGQueryInfo : public IQueryInfo }; const tipb::DAGRequest & get_dag_request() { return dag_request; }; -private: +protected: + Context & context; + + const RegionID region_id; + const UInt64 region_version; + const UInt64 region_conf_version; + const tipb::DAGRequest & dag_request; - CoprocessorContext & coprocessorContext; - String query; - ASTPtr ast; + Int32 ts_index = -1; Int32 sel_index = -1; Int32 agg_index = -1; diff --git a/dbms/src/Interpreters/DAGStringConverter.cpp b/dbms/src/Interpreters/DAGStringConverter.cpp index aa49500d274..c99b1607fcd 100644 --- a/dbms/src/Interpreters/DAGStringConverter.cpp +++ b/dbms/src/Interpreters/DAGStringConverter.cpp @@ -1,8 +1,7 @@ -#include -#include -#include #include -#include + +#include +#include #include #include #include @@ -24,11 +23,11 @@ bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringst // do not have table id return false; } - auto & tmt_ctx = context.ch_context.getTMTContext(); + auto & tmt_ctx = context.getTMTContext(); auto storage = tmt_ctx.getStorages().get(id); if (storage == nullptr) { - tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); + tmt_ctx.getSchemaSyncer()->syncSchema(id, context, false); storage = tmt_ctx.getStorages().get(id); } if (storage == nullptr) @@ -117,8 +116,7 @@ bool isProject(const tipb::Executor &) // currently, project is not pushed so always return false return false; } -DAGStringConverter::DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_) - : context(context_), dag_request(dag_request_) +DAGStringConverter::DAGStringConverter(Context & context_, const tipb::DAGRequest & dag_request_) : context(context_), dag_request(dag_request_) { afterAgg = false; } diff --git a/dbms/src/Interpreters/DAGStringConverter.h b/dbms/src/Interpreters/DAGStringConverter.h index 2fa200e0f8e..fc8006f8096 100644 --- a/dbms/src/Interpreters/DAGStringConverter.h +++ b/dbms/src/Interpreters/DAGStringConverter.h @@ -6,35 +6,22 @@ #include #pragma GCC diagnostic pop -#include #include namespace DB { +class Context; + class DAGStringConverter { public: - DAGStringConverter(CoprocessorContext & context_, tipb::DAGRequest & dag_request_); + DAGStringConverter(Context & context_, const tipb::DAGRequest & dag_request_); ~DAGStringConverter() = default; String buildSqlString(); -private: - bool buildTSString(const tipb::TableScan & ts, std::stringstream & ss); - bool buildSelString(const tipb::Selection & sel, std::stringstream & ss); - bool buildLimitString(const tipb::Limit & limit, std::stringstream & ss); - bool buildString(const tipb::Executor & executor, std::stringstream & ss); - CoprocessorContext & context; - tipb::DAGRequest & dag_request; - // used by columnRef, which starts with 1, and refs column index in the original ts/agg output - NamesAndTypesList columns_from_ts; - NamesAndTypesList columns_from_agg; - // used by output_offset, which starts with 0, and refs the index in the selected output of ts/agg operater - Names output_from_ts; - Names output_from_agg; - bool afterAgg; const NamesAndTypesList & getCurrentColumns() { if (afterAgg) @@ -52,6 +39,23 @@ class DAGStringConverter } return output_from_ts; } + +protected: + bool buildTSString(const tipb::TableScan & ts, std::stringstream & ss); + bool buildSelString(const tipb::Selection & sel, std::stringstream & ss); + bool buildLimitString(const tipb::Limit & limit, std::stringstream & ss); + bool buildString(const tipb::Executor & executor, std::stringstream & ss); + +protected: + Context & context; + const tipb::DAGRequest & dag_request; + // used by columnRef, which starts with 1, and refs column index in the original ts/agg output + NamesAndTypesList columns_from_ts; + NamesAndTypesList columns_from_agg; + // used by output_offset, which starts with 0, and refs the index in the selected output of ts/agg operater + Names output_from_ts; + Names output_from_agg; + bool afterAgg; }; } // namespace DB diff --git a/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp b/dbms/src/Interpreters/DAGUtils.cpp similarity index 99% rename from dbms/src/Interpreters/CoprocessorBuilderUtils.cpp rename to dbms/src/Interpreters/DAGUtils.cpp index de720e0c7b4..4bc3b2df207 100644 --- a/dbms/src/Interpreters/CoprocessorBuilderUtils.cpp +++ b/dbms/src/Interpreters/DAGUtils.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include namespace DB diff --git a/dbms/src/Interpreters/CoprocessorBuilderUtils.h b/dbms/src/Interpreters/DAGUtils.h similarity index 100% rename from dbms/src/Interpreters/CoprocessorBuilderUtils.h rename to dbms/src/Interpreters/DAGUtils.h diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 21225f2756b..02faff83e91 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -173,9 +173,6 @@ ExpressionAnalyzer::ExpressionAnalyzer( do_global(do_global_), subqueries_for_sets(subqueries_for_set_) { select_query = typeid_cast(ast.get()); - if(select_query && select_query->is_fake_sel) { - return; - } if (!storage && select_query) { diff --git a/dbms/src/Interpreters/IQueryInfo.h b/dbms/src/Interpreters/IQuerySource.h similarity index 51% rename from dbms/src/Interpreters/IQueryInfo.h rename to dbms/src/Interpreters/IQuerySource.h index 5ef5c60dc33..b23290231d5 100644 --- a/dbms/src/Interpreters/IQueryInfo.h +++ b/dbms/src/Interpreters/IQuerySource.h @@ -1,24 +1,23 @@ #pragma once +#include #include #include -#include namespace DB { /** IQueryInfo interface for different source of queries. */ -class IQueryInfo +class IQuerySource { public: + virtual ~IQuerySource() = default; - virtual bool isInternalQuery() = 0; virtual std::tuple parse(size_t max_query_size) = 0; - virtual String get_query_ignore_error(size_t max_query_size) = 0; - virtual std::unique_ptr getInterpreter(Context & context, QueryProcessingStage::Enum stage) = 0; - virtual ~IQueryInfo() {} + virtual String str(size_t max_query_size) = 0; + virtual std::unique_ptr interpreter(Context & context, QueryProcessingStage::Enum stage) = 0; }; -} +} // namespace DB diff --git a/dbms/src/Interpreters/InterpreterDAGRequest.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp similarity index 78% rename from dbms/src/Interpreters/InterpreterDAGRequest.cpp rename to dbms/src/Interpreters/InterpreterDAG.cpp index 483ef96fa2e..55009e12777 100644 --- a/dbms/src/Interpreters/InterpreterDAGRequest.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -5,9 +5,9 @@ #include #include #include -#include #include -#include +#include +#include #include #include #include @@ -25,12 +25,10 @@ namespace ErrorCodes extern const int TOO_MANY_COLUMNS; } -InterpreterDAGRequest::InterpreterDAGRequest(CoprocessorContext & context_, DAGQueryInfo & dag_query_info_) - : context(context_), dag_query_info(dag_query_info_) -{} +InterpreterDAG::InterpreterDAG(Context & context_, DAGQuerySource & dag_query_src_) : context(context_), dag_query_src(dag_query_src_) {} // the flow is the same as executeFetchcolumns -bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) +bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) { if (!ts.has_table_id()) { @@ -38,11 +36,11 @@ bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pip return false; } TableID id = ts.table_id(); - auto & tmt_ctx = context.ch_context.getTMTContext(); + auto & tmt_ctx = context.getTMTContext(); auto storage = tmt_ctx.getStorages().get(id); if (storage == nullptr) { - tmt_ctx.getSchemaSyncer()->syncSchema(id, context.ch_context, false); + tmt_ctx.getSchemaSyncer()->syncSchema(id, context, false); storage = tmt_ctx.getStorages().get(id); } if (storage == nullptr) @@ -75,11 +73,11 @@ bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pip return false; } - if (!dag_query_info.has_aggregation()) + if (!dag_query_src.has_aggregation()) { // if the dag request does not contain agg, then the final output is // based on the output of table scan - for (auto i : dag_query_info.get_dag_request().output_offsets()) + for (auto i : dag_query_src.get_dag_request().output_offsets()) { if (i < 0 || i >= required_columns.size()) { @@ -91,7 +89,7 @@ bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pip } } // todo handle alias column - const Settings & settings = context.ch_context.getSettingsRef(); + const Settings & settings = context.getSettingsRef(); if (settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) { @@ -112,15 +110,14 @@ bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pip //todo support index in SelectQueryInfo query_info; query_info.query = std::make_unique(); - ((ASTSelectQuery *)query_info.query.get())->is_fake_sel = true; query_info.mvcc_query_info = std::make_unique(); query_info.mvcc_query_info->resolve_locks = true; query_info.mvcc_query_info->read_tso = settings.read_tso; RegionQueryInfo info; - info.region_id = context.kv_context.region_id(); - info.conf_version = context.kv_context.region_epoch().conf_ver(); - info.version = context.kv_context.region_epoch().version(); - auto current_region = context.ch_context.getTMTContext().getRegionTable().getRegionById(id, info.region_id); + info.region_id = dag_query_src.getRegionID(); + info.version = dag_query_src.getRegionVersion(); + info.conf_version = dag_query_src.getRegionConfVersion(); + auto current_region = context.getTMTContext().getRegionTable().getRegionById(id, info.region_id); if (!current_region) { return false; @@ -128,7 +125,7 @@ bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pip info.range_in_table = current_region->getHandleRangeByTable(id); query_info.mvcc_query_info->regions_query_info.push_back(info); query_info.mvcc_query_info->concurrent = 0.0; - pipeline.streams = storage->read(required_columns, query_info, context.ch_context, from_stage, max_block_size, max_streams); + pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); /// Set the limits and quota for reading data, the speed and time of the query. { IProfilingBlockInputStream::LocalLimits limits; @@ -147,7 +144,7 @@ bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pip limits.min_execution_speed = settings.min_execution_speed; limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; - QuotaForIntervals & quota = context.ch_context.getQuota(); + QuotaForIntervals & quota = context.getQuota(); pipeline.transform([&](auto & stream) { if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) @@ -162,15 +159,15 @@ bool InterpreterDAGRequest::executeTS(const tipb::TableScan & ts, Pipeline & pip return true; } -InterpreterDAGRequest::AnalysisResult InterpreterDAGRequest::analyzeExpressions() +InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() { AnalysisResult res; ExpressionActionsChain chain; - res.need_aggregate = dag_query_info.has_aggregation(); - DAGExpressionAnalyzer expressionAnalyzer(source_columns, context.ch_context); - if (dag_query_info.has_selection()) + res.need_aggregate = dag_query_src.has_aggregation(); + DAGExpressionAnalyzer expressionAnalyzer(source_columns, context); + if (dag_query_src.has_selection()) { - if (expressionAnalyzer.appendWhere(chain, dag_query_info.get_sel(), res.filter_column_name)) + if (expressionAnalyzer.appendWhere(chain, dag_query_src.get_sel(), res.filter_column_name)) { res.has_where = true; res.before_where = chain.getLastActions(); @@ -182,9 +179,9 @@ InterpreterDAGRequest::AnalysisResult InterpreterDAGRequest::analyzeExpressions( { throw Exception("agg not supported"); } - if (dag_query_info.has_topN()) + if (dag_query_src.has_topN()) { - res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag_query_info.get_topN(), res.order_column_names); + res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag_query_src.get_topN(), res.order_column_names); } // append final project results for (auto & name : final_project) @@ -198,13 +195,13 @@ InterpreterDAGRequest::AnalysisResult InterpreterDAGRequest::analyzeExpressions( return res; } -void InterpreterDAGRequest::executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column) +void InterpreterDAG::executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column) { pipeline.transform( [&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr, filter_column); }); } -void InterpreterDAGRequest::executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr) +void InterpreterDAG::executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr) { if (expressionActionsPtr->getActions().size() > 0) { @@ -212,11 +209,11 @@ void InterpreterDAGRequest::executeExpression(Pipeline & pipeline, const Express } } -SortDescription InterpreterDAGRequest::getSortDescription(Strings & order_column_names) +SortDescription InterpreterDAG::getSortDescription(Strings & order_column_names) { // construct SortDescription SortDescription order_descr; - const tipb::TopN & topN = dag_query_info.get_topN(); + const tipb::TopN & topN = dag_query_src.get_topN(); order_descr.reserve(topN.order_by_size()); for (int i = 0; i < topN.order_by_size(); i++) { @@ -234,7 +231,7 @@ SortDescription InterpreterDAGRequest::getSortDescription(Strings & order_column return order_descr; } -void InterpreterDAGRequest::executeUnion(Pipeline & pipeline) +void InterpreterDAG::executeUnion(Pipeline & pipeline) { if (pipeline.hasMoreThanOneStream()) { @@ -243,11 +240,11 @@ void InterpreterDAGRequest::executeUnion(Pipeline & pipeline) } } -void InterpreterDAGRequest::executeOrder(Pipeline & pipeline, Strings & order_column_names) +void InterpreterDAG::executeOrder(Pipeline & pipeline, Strings & order_column_names) { SortDescription order_descr = getSortDescription(order_column_names); - const Settings & settings = context.ch_context.getSettingsRef(); - Int64 limit = dag_query_info.get_topN().limit(); + const Settings & settings = context.getSettingsRef(); + Int64 limit = dag_query_src.get_topN().limit(); pipeline.transform([&](auto & stream) { auto sorting_stream = std::make_shared(stream, order_descr, limit); @@ -266,13 +263,13 @@ void InterpreterDAGRequest::executeOrder(Pipeline & pipeline, Strings & order_co /// Merge the sorted blocks. pipeline.firstStream() = std::make_shared(pipeline.firstStream(), order_descr, settings.max_block_size, - limit, settings.max_bytes_before_external_sort, context.ch_context.getTemporaryPath()); + limit, settings.max_bytes_before_external_sort, context.getTemporaryPath()); } //todo return the error message -bool InterpreterDAGRequest::executeImpl(Pipeline & pipeline) +bool InterpreterDAG::executeImpl(Pipeline & pipeline) { - if (!executeTS(dag_query_info.get_ts(), pipeline)) + if (!executeTS(dag_query_src.get_ts(), pipeline)) { return false; } @@ -303,14 +300,14 @@ bool InterpreterDAGRequest::executeImpl(Pipeline & pipeline) executeFinalProject(pipeline); // execute limit - if (dag_query_info.has_limit() && !dag_query_info.has_topN()) + if (dag_query_src.has_limit() && !dag_query_src.has_topN()) { executeLimit(pipeline); } return true; } -void InterpreterDAGRequest::executeFinalProject(Pipeline & pipeline) +void InterpreterDAG::executeFinalProject(Pipeline & pipeline) { auto columns = pipeline.firstStream()->getHeader(); NamesAndTypesList input_column; @@ -318,25 +315,25 @@ void InterpreterDAGRequest::executeFinalProject(Pipeline & pipeline) { input_column.emplace_back(column.name, column.type); } - ExpressionActionsPtr project = std::make_shared(input_column, context.ch_context.getSettingsRef()); + ExpressionActionsPtr project = std::make_shared(input_column, context.getSettingsRef()); project->add(ExpressionAction::project(final_project)); // add final project pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, project); }); } -void InterpreterDAGRequest::executeLimit(Pipeline & pipeline) +void InterpreterDAG::executeLimit(Pipeline & pipeline) { pipeline.transform( - [&](auto & stream) { stream = std::make_shared(stream, dag_query_info.get_limit().limit(), 0, false); }); + [&](auto & stream) { stream = std::make_shared(stream, dag_query_src.get_limit().limit(), 0, false); }); if (pipeline.hasMoreThanOneStream()) { executeUnion(pipeline); pipeline.transform( - [&](auto & stream) { stream = std::make_shared(stream, dag_query_info.get_limit().limit(), 0, false); }); + [&](auto & stream) { stream = std::make_shared(stream, dag_query_src.get_limit().limit(), 0, false); }); } } -BlockIO InterpreterDAGRequest::execute() +BlockIO InterpreterDAG::execute() { Pipeline pipeline; executeImpl(pipeline); diff --git a/dbms/src/Interpreters/InterpreterDAGRequest.h b/dbms/src/Interpreters/InterpreterDAG.h similarity index 84% rename from dbms/src/Interpreters/InterpreterDAGRequest.h rename to dbms/src/Interpreters/InterpreterDAG.h index 7cfe18c9374..1b0ff8c02db 100644 --- a/dbms/src/Interpreters/InterpreterDAGRequest.h +++ b/dbms/src/Interpreters/InterpreterDAG.h @@ -6,31 +6,35 @@ #include #pragma GCC diagnostic pop -#include #include -#include -#include +#include +#include #include #include +#include namespace DB { +class Context; + /** build ch plan from dag request: dag executors -> ch plan */ -class InterpreterDAGRequest : public IInterpreter +class InterpreterDAG : public IInterpreter { public: - InterpreterDAGRequest(CoprocessorContext & context_, DAGQueryInfo & dag_query_info); + InterpreterDAG(Context & context_, DAGQuerySource & dag_query_src_); - ~InterpreterDAGRequest() = default; + ~InterpreterDAG() = default; BlockIO execute(); private: - CoprocessorContext & context; + Context & context; + + DAGQuerySource & dag_query_src; + NamesWithAliases final_project; - DAGQueryInfo & dag_query_info; NamesAndTypesList source_columns; size_t max_streams = 1; diff --git a/dbms/src/Interpreters/SQLQuerySource.cpp b/dbms/src/Interpreters/SQLQuerySource.cpp new file mode 100644 index 00000000000..aeb1a64bcad --- /dev/null +++ b/dbms/src/Interpreters/SQLQuerySource.cpp @@ -0,0 +1,36 @@ +#include +#include +#include +#include + + +namespace DB +{ + +SQLQuerySource::SQLQuerySource(const char * begin_, const char * end_) : begin(begin_), end(end_) {} + +std::tuple SQLQuerySource::parse(size_t max_query_size) +{ + ParserQuery parser(end); + size_t query_size; + /// TODO Parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size); + + /// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion. + if (!(begin <= ast->range.first && ast->range.second <= end)) + throw Exception("Unexpected behavior: AST chars range is not inside source range", ErrorCodes::LOGICAL_ERROR); + query_size = ast->range.second - begin; + query = String(begin, begin + query_size); + return std::make_tuple(query, ast); +} + +String SQLQuerySource::str(size_t max_query_size) +{ + return String(begin, begin + std::min(end - begin, static_cast(max_query_size))); +} + +std::unique_ptr SQLQuerySource::interpreter(Context & context, QueryProcessingStage::Enum stage) +{ + return InterpreterFactory::get(ast, context, stage); +} +} // namespace DB diff --git a/dbms/src/Interpreters/SQLQuerySource.h b/dbms/src/Interpreters/SQLQuerySource.h new file mode 100644 index 00000000000..64a56b30f02 --- /dev/null +++ b/dbms/src/Interpreters/SQLQuerySource.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/** StringQueryInfo for query represented by string. + */ +class SQLQuerySource : public IQuerySource +{ +public: + SQLQuerySource(const char * begin_, const char * end_); + std::tuple parse(size_t max_query_size) override; + String str(size_t max_query_size) override; + std::unique_ptr interpreter(Context & context, QueryProcessingStage::Enum stage) override; + +private: + const char * begin; + const char * end; + String query; + ASTPtr ast; +}; + +} // namespace DB diff --git a/dbms/src/Interpreters/StringQueryInfo.cpp b/dbms/src/Interpreters/StringQueryInfo.cpp deleted file mode 100644 index d326f51dead..00000000000 --- a/dbms/src/Interpreters/StringQueryInfo.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - - StringQueryInfo::StringQueryInfo(const char *begin_, const char *end_, bool internal_) - : begin(begin_), end(end_), internal(internal_){} - std::tuple StringQueryInfo::parse(size_t max_query_size) { - ParserQuery parser(end); - size_t query_size; - /// TODO Parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size); - - /// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion. - if (!(begin <= ast->range.first && ast->range.second <= end)) - throw Exception("Unexpected behavior: AST chars range is not inside source range", ErrorCodes::LOGICAL_ERROR); - query_size = ast->range.second - begin; - query = String(begin, begin + query_size); - return std::make_tuple(query, ast); - } - String StringQueryInfo::get_query_ignore_error(size_t max_query_size) { - return String(begin, begin + std::min(end - begin, static_cast(max_query_size))); - } - - std::unique_ptr StringQueryInfo::getInterpreter(Context & context, QueryProcessingStage::Enum stage) { - return InterpreterFactory::get(ast, context, stage); - } -} diff --git a/dbms/src/Interpreters/StringQueryInfo.h b/dbms/src/Interpreters/StringQueryInfo.h deleted file mode 100644 index d5031c5d4f9..00000000000 --- a/dbms/src/Interpreters/StringQueryInfo.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ - -/** StringQueryInfo for query represented by string. - */ -class StringQueryInfo : public IQueryInfo -{ -public: - - StringQueryInfo(const char * begin_, const char * end_, bool internal_); - std::tuple parse(size_t max_query_size); - String get_query_ignore_error(size_t max_query_size); - std::unique_ptr getInterpreter(Context & context, QueryProcessingStage::Enum stage); - bool isInternalQuery() {return internal;}; - -private: - const char * begin; - const char * end; - bool internal; - String query; - ASTPtr ast; -}; - -} diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index c06de5e6d2d..8d58a8531fc 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -18,16 +18,16 @@ #include #include -#include +#include +#include #include #include #include +#include +#include #include -#include #include #include -#include -#include namespace ProfileEvents @@ -137,8 +137,9 @@ static void onExceptionBeforeStart(const String & query, Context & context, time static std::tuple executeQueryImpl( - IQueryInfo & queryInfo, + IQuerySource & query_src, Context & context, + bool internal, QueryProcessingStage::Enum stage) { ProfileEvents::increment(ProfileEvents::Query); @@ -153,21 +154,21 @@ static std::tuple executeQueryImpl( /// Don't limit the size of internal queries. size_t max_query_size = 0; - if (!queryInfo.isInternalQuery()) + if (!internal) max_query_size = settings.max_query_size; try { - std::tie(query, ast) = queryInfo.parse(max_query_size); + std::tie(query, ast) = query_src.parse(max_query_size); } catch (...) { - if (!queryInfo.isInternalQuery()) + if (!internal) { /// Anyway log the query. - String q = queryInfo.get_query_ignore_error(max_query_size); - logQuery(query.substr(0, settings.log_queries_cut_to_length), context); - onExceptionBeforeStart(query, context, current_time); + String str = query_src.str(max_query_size); + logQuery(str.substr(0, settings.log_queries_cut_to_length), context); + onExceptionBeforeStart(str, context, current_time); } throw; @@ -177,7 +178,7 @@ static std::tuple executeQueryImpl( try { - if (!queryInfo.isInternalQuery()) + if (!internal) logQuery(query.substr(0, settings.log_queries_cut_to_length), context); /// Check the limits. @@ -190,7 +191,7 @@ static std::tuple executeQueryImpl( /// Put query to process list. But don't put SHOW PROCESSLIST query itself. ProcessList::EntryPtr process_list_entry; - if (!queryInfo.isInternalQuery() && nullptr == typeid_cast(&*ast)) + if (!internal && nullptr == typeid_cast(&*ast)) { process_list_entry = context.getProcessList().insert( query, @@ -201,7 +202,7 @@ static std::tuple executeQueryImpl( context.setProcessListElement(&process_list_entry->get()); } - auto interpreter = queryInfo.getInterpreter(context, stage); + auto interpreter = query_src.interpreter(context, stage); res = interpreter->execute(); /// Delayed initialization of query streams (required for KILL QUERY purposes) @@ -253,7 +254,7 @@ static std::tuple executeQueryImpl( elem.client_info = context.getClientInfo(); - bool log_queries = settings.log_queries && !queryInfo.isInternalQuery(); + bool log_queries = settings.log_queries && !internal; /// Log into system table start of query execution, if need. if (log_queries) @@ -358,7 +359,7 @@ static std::tuple executeQueryImpl( } }; - if (!queryInfo.isInternalQuery() && res.in) + if (!internal && res.in) { std::stringstream log_str; log_str << "Query pipeline:\n"; @@ -369,7 +370,7 @@ static std::tuple executeQueryImpl( } catch (...) { - if (!queryInfo.isInternalQuery()) + if (!internal) onExceptionBeforeStart(query, context, current_time); throw; @@ -386,18 +387,22 @@ BlockIO executeQuery( QueryProcessingStage::Enum stage) { BlockIO streams; - StringQueryInfo queryInfo(query.data(), query.data() + query.size(), internal); - std::tie(std::ignore, streams) = executeQueryImpl(queryInfo, context, stage); + SQLQuerySource query_src(query.data(), query.data() + query.size()); + std::tie(std::ignore, streams) = executeQueryImpl(query_src, context, internal, stage); return streams; } -BlockIO executeQuery(const tipb::DAGRequest & dag_request, CoprocessorContext & context, QueryProcessingStage::Enum stage) { + +BlockIO executeQuery(const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version, + Context & context, QueryProcessingStage::Enum stage) +{ BlockIO streams; - DAGQueryInfo queryInfo(dag_request, context); - std::tie(std::ignore, streams) = executeQueryImpl(queryInfo, context.ch_context, stage); + DAGQuerySource query_src(context, region_id, region_version, region_conf_version, dag_request); + std::tie(std::ignore, streams) = executeQueryImpl(query_src, context, false, stage); return streams; } + void executeQuery( ReadBuffer & istr, WriteBuffer & ostr, @@ -434,8 +439,8 @@ void executeQuery( ASTPtr ast; BlockIO streams; - StringQueryInfo queryInfo(begin, end, false); - std::tie(ast, streams) = executeQueryImpl(queryInfo, context, QueryProcessingStage::Complete); + SQLQuerySource query_info(begin, end); + std::tie(ast, streams) = executeQueryImpl(query_info, context, false, QueryProcessingStage::Complete); try { diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index db8c93e68cd..48c061f484d 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -2,8 +2,8 @@ #include #include +#include #include -#include namespace DB @@ -41,6 +41,13 @@ BlockIO executeQuery( QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// To which stage the query must be executed. ); -BlockIO executeQuery(const tipb::DAGRequest & dag_request, CoprocessorContext & context, QueryProcessingStage::Enum stage); + +BlockIO executeQuery( + const tipb::DAGRequest & dag_request, + RegionID region_id, + UInt64 region_version, + UInt64 region_conf_version, + Context & context, + QueryProcessingStage::Enum stage); } diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index fe1e64b43b4..96508073e17 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -20,7 +20,6 @@ class ASTSelectQuery : public IAST ASTPtr clone() const override; - bool is_fake_sel = false; bool raw_for_mutable = false; bool distinct = false; bool no_kvstore = false; diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index 614ae9c6a9f..be452e85d65 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -24,9 +24,7 @@ add_library (clickhouse-server-lib RootRequestHandler.cpp Server.cpp StatusFile.cpp - TCPHandler.cpp - FlashService.cpp - cop_test.cpp) + TCPHandler.cpp) target_link_libraries (clickhouse-server-lib clickhouse_common_io daemon clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions) target_include_directories (clickhouse-server-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) @@ -107,9 +105,6 @@ else () target_include_directories (theflash BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) target_include_directories (theflash PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - add_executable (copClient cop_test.cpp) - target_link_libraries (copClient clickhouse-server-lib) - if (USE_EMBEDDED_COMPILER) target_link_libraries (theflash clickhouse-compiler-lib) endif () diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 537f3173936..d72e8adbb1a 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -1,16 +1,8 @@ #include "Server.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include #include #include #include @@ -20,24 +12,32 @@ #include #include #include +#include #include #include #include #include #include +#include +#include +#include +#include #include #include #include -#include -#include -#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include "Flash/FlashService.h" #include "HTTPHandlerFactory.h" #include "MetricsTransmitter.h" #include "StatusFile.h" #include "TCPHandlerFactory.h" -#include "FlashService.h" #if Poco_NetSSL_FOUND #include From be4d80c1e730bd528dcf05e27f954bf5ba1e5002 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Mon, 5 Aug 2019 01:38:01 +0800 Subject: [PATCH 12/79] Format --- dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp | 3 --- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 3 ++- dbms/src/Flash/Coprocessor/DAGDriver.h | 3 ++- dbms/src/Flash/FlashService.cpp | 3 ++- dbms/src/Flash/FlashService.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp index b39d1339869..2a0d6b078bf 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp +++ b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp @@ -1,13 +1,10 @@ #include #include -#include #include #include -#include #include #include -#include #include #include #include diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 3f51e93f8e5..fab73dfa1d5 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -1,8 +1,9 @@ +#include + #include #include #include #include -#include #include #include #include diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.h b/dbms/src/Flash/Coprocessor/DAGDriver.h index 89f01e140a3..77ffcdea66b 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.h +++ b/dbms/src/Flash/Coprocessor/DAGDriver.h @@ -1,6 +1,7 @@ #pragma once -#include +#include +#include #include namespace DB diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index 970b46aa638..51d7b1f2734 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -1,6 +1,7 @@ +#include + #include #include -#include #include #include #include diff --git a/dbms/src/Flash/FlashService.h b/dbms/src/Flash/FlashService.h index c0885c9c9a4..8dcb31e2bde 100644 --- a/dbms/src/Flash/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -1,9 +1,9 @@ #pragma once +#include #include #include #include -#include "Server/IServer.h" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include From a76fdb33922c867b3b41bd80d6a8e4c2bfa41ba5 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 5 Aug 2019 09:42:49 +0800 Subject: [PATCH 13/79] merge pingcap/cop --- dbms/src/Interpreters/DAGStringConverter.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Interpreters/DAGStringConverter.h b/dbms/src/Interpreters/DAGStringConverter.h index 39c4ad47fdb..fc8006f8096 100644 --- a/dbms/src/Interpreters/DAGStringConverter.h +++ b/dbms/src/Interpreters/DAGStringConverter.h @@ -6,7 +6,6 @@ #include #pragma GCC diagnostic pop -#include #include namespace DB From 0cfe045254a0b06d538c5f1ab7c5f70a15207d20 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Mon, 5 Aug 2019 16:18:04 +0800 Subject: [PATCH 14/79] Refine code --- dbms/src/Core/Defines.h | 2 +- .../Flash/Coprocessor/CoprocessorHandler.cpp | 34 ++--- .../Flash/Coprocessor/CoprocessorHandler.h | 10 +- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 21 +-- dbms/src/Flash/Coprocessor/DAGDriver.h | 2 +- dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 4 +- dbms/src/Flash/FlashService.cpp | 142 ++++++++++-------- dbms/src/Flash/FlashService.h | 4 + dbms/src/Interpreters/DAGQuerySource.h | 13 +- dbms/src/Interpreters/IQuerySource.h | 4 +- dbms/src/Interpreters/SQLQuerySource.h | 8 +- dbms/src/Interpreters/Settings.h | 4 +- 12 files changed, 129 insertions(+), 119 deletions(-) diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index aa01d84aaad..b96737000f9 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -28,7 +28,7 @@ #define DEFAULT_MAX_READ_TSO 0xFFFFFFFFFFFFFFFF -#define DEFAULT_RECORDS_PER_CHUNK 64L +#define DEFAULT_DAG_RECORDS_PER_CHUNK 64L /** Which blocks by default read the data (by number of rows). * Smaller values give better cache locality, less consumption of RAM, but more overhead to process the query. diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp index 2a0d6b078bf..028024d0b47 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp +++ b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include @@ -14,40 +13,39 @@ namespace DB { +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +} + CoprocessorHandler::CoprocessorHandler( CoprocessorContext & cop_context_, const coprocessor::Request * cop_request_, coprocessor::Response * cop_response_) : cop_context(cop_context_), cop_request(cop_request_), cop_response(cop_response_), log(&Logger::get("CoprocessorHandler")) {} -CoprocessorHandler::~CoprocessorHandler() {} - -bool CoprocessorHandler::execute() +void CoprocessorHandler::execute() { switch (cop_request->tp()) { - case REQ_TYPE_DAG: + case COP_REQ_TYPE_DAG: { tipb::DAGRequest dag_request; dag_request.ParseFromString(cop_request->data()); + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling DAG request: " << dag_request.DebugString()); tipb::SelectResponse dag_response; DAGDriver driver(cop_context.db_context, dag_request, cop_context.kv_context.region_id(), cop_context.kv_context.region_epoch().version(), cop_context.kv_context.region_epoch().conf_ver(), dag_response); - if (driver.execute()) - { - cop_response->set_data(dag_response.SerializeAsString()); - return true; - } - return false; + driver.execute(); + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle DAG request done"); + cop_response->set_data(dag_response.SerializeAsString()); + break; } - case REQ_TYPE_ANALYZE: - case REQ_TYPE_CHECKSUM: + case COP_REQ_TYPE_ANALYZE: + case COP_REQ_TYPE_CHECKSUM: default: - LOG_ERROR(log, "Flash service Coprocessor other than dag request not implement yet"); - // return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, "Only DAG request is supported"); - return false; + throw Exception( + "Coprocessor request type " + std::to_string(cop_request->tp()) + " is not implemented", ErrorCodes::NOT_IMPLEMENTED); } - - return true; } } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.h b/dbms/src/Flash/Coprocessor/CoprocessorHandler.h index c704c0ede36..2aafa8a71ff 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorHandler.h +++ b/dbms/src/Flash/Coprocessor/CoprocessorHandler.h @@ -34,16 +34,16 @@ class CoprocessorHandler public: CoprocessorHandler(CoprocessorContext & cop_context_, const coprocessor::Request * cop_request_, coprocessor::Response * response_); - ~CoprocessorHandler(); + ~CoprocessorHandler() = default; - bool execute(); + void execute(); protected: enum { - REQ_TYPE_DAG = 103, - REQ_TYPE_ANALYZE = 104, - REQ_TYPE_CHECKSUM = 105, + COP_REQ_TYPE_DAG = 103, + COP_REQ_TYPE_ANALYZE = 104, + COP_REQ_TYPE_CHECKSUM = 105, }; CoprocessorContext & cop_context; diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index fab73dfa1d5..7fbf2408743 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -10,6 +10,10 @@ namespace DB { +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, tipb::SelectResponse & dag_response_) @@ -21,25 +25,24 @@ DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, dag_response(dag_response_) {} -bool DAGDriver::execute() +void DAGDriver::execute() { context.setSetting("read_tso", UInt64(dag_request.start_ts())); BlockIO streams = executeDAG(); if (!streams.in || streams.out) { - // only query is allowed, so streams.in must not be null and streams.out must be null - return false; + // Only query is allowed, so streams.in must not be null and streams.out must be null + throw Exception("DAG is not query.", ErrorCodes::LOGICAL_ERROR); } BlockOutputStreamPtr outputStreamPtr = std::make_shared( - dag_response, context.getSettings().records_per_chunk, dag_request.encode_type(), streams.in->getHeader()); + dag_response, context.getSettings().dag_records_per_chunk, dag_request.encode_type(), streams.in->getHeader()); copyData(*streams.in, *outputStreamPtr); - return true; } BlockIO DAGDriver::executeDAG() { - String builder_version = context.getSettings().coprocessor_plan_builder_version; - if (builder_version == "v1") + String planner = context.getSettings().dag_planner; + if (planner == "sql") { DAGStringConverter converter(context, dag_request); String query = converter.buildSqlString(); @@ -49,13 +52,13 @@ BlockIO DAGDriver::executeDAG() } return executeQuery(query, context, false, QueryProcessingStage::Complete); } - else if (builder_version == "v2") + else if (planner == "optree") { return executeQuery(dag_request, region_id, region_version, region_conf_version, context, QueryProcessingStage::Complete); } else { - throw Exception("coprocessor plan builder version should be set to v1 or v2"); + throw Exception("Unknown DAG planner type " + planner, ErrorCodes::LOGICAL_ERROR); } } diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.h b/dbms/src/Flash/Coprocessor/DAGDriver.h index 77ffcdea66b..3dce5abeb31 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.h +++ b/dbms/src/Flash/Coprocessor/DAGDriver.h @@ -16,7 +16,7 @@ class DAGDriver public: DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, tipb::SelectResponse & dag_response_); - bool execute(); + void execute(); private: BlockIO executeDAG(); diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp index d039d90465d..0aea9e6bc46 100644 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp @@ -1,5 +1,5 @@ -#include #include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -26,7 +26,7 @@ class FlashClient { grpc::ClientContext clientContext; clientContext.AddMetadata("user_name", ""); - clientContext.AddMetadata("builder_version", "v2"); + clientContext.AddMetadata("dag_planner", "optree"); coprocessor::Response response; grpc::Status status = sp->Coprocessor(&clientContext, *rqst, &response); size_t column_num = 3; diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index 51d7b1f2734..1abb6189fa9 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -10,6 +10,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +} + FlashService::FlashService(const std::string & address_, IServer & server_) : server(server_), address(address_), log(&Logger::get("FlashService")) { @@ -35,22 +40,78 @@ FlashService::~FlashService() grpc_server->Wait(); } -String getClientMetaVar(grpc::ServerContext * grpc_context, String name, String default_val) +grpc::Status FlashService::Coprocessor( + grpc::ServerContext * grpc_context, const coprocessor::Request * request, coprocessor::Response * response) { - if (grpc_context->client_metadata().count(name) != 1) + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling coprocessor request: " << request->DebugString()); + + auto [context, status] = createDBContext(grpc_context); + if (!status.ok()) { - return default_val; + return status; } - else + + try { - return String(grpc_context->client_metadata().find(name)->second.data()); + CoprocessorContext cop_context(context, request->context(), *grpc_context); + CoprocessorHandler cop_handler(cop_context, request, response); + + cop_handler.execute(); + + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle coprocessor request done"); + return ::grpc::Status(::grpc::StatusCode::OK, ""); + } + catch (const LockException & e) + { + // TODO: handle lock error properly. + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": LockException: " << e.displayText()); + response->set_data(""); + return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, e.message()); + } + catch (const RegionException & e) + { + // TODO: handle region error properly. + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": RegionException: " << e.displayText()); + response->set_data(""); + return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, e.message()); + } + catch (const Exception & e) + { + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.displayText()); + response->set_data(""); + + if (e.code() == ErrorCodes::NOT_IMPLEMENTED) + return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, e.message()); + + // TODO: Map other DB error codes to grpc codes. + + return ::grpc::Status(::grpc::StatusCode::INTERNAL, e.message()); + } + catch (const std::exception & e) + { + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.what()); + response->set_data(""); + return ::grpc::Status(::grpc::StatusCode::INTERNAL, e.what()); } } -::grpc::Status setClientInfo(grpc::ServerContext * grpc_context, Context & context) +String getClientMetaVarWithDefault(grpc::ServerContext * grpc_context, const String & name, const String & default_val) { + if (grpc_context->client_metadata().count(name) != 1) + return default_val; + else + return String(grpc_context->client_metadata().find(name)->second.data()); +} + +std::tuple FlashService::createDBContext(grpc::ServerContext * grpc_context) +{ + /// Create DB context. + Context context = server.context(); + context.setGlobalContext(server.context()); + + /// Set a bunch of client information. auto client_meta = grpc_context->client_metadata(); - String query_id = getClientMetaVar(grpc_context, "query_id", ""); + String query_id = getClientMetaVarWithDefault(grpc_context, "query_id", ""); context.setCurrentQueryId(query_id); ClientInfo & client_info = context.getClientInfo(); client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; @@ -59,70 +120,23 @@ ::grpc::Status setClientInfo(grpc::ServerContext * grpc_context, Context & conte Int64 pos = peer.find(':'); if (pos == -1) { - return ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "invalid peer address"); + return std::make_tuple(context, ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Invalid peer address: " + peer)); } std::string client_ip = peer.substr(pos + 1); Poco::Net::SocketAddress client_address(client_ip); client_info.current_address = client_address; - client_info.current_user = getClientMetaVar(grpc_context, "user", ""); - std::string records_per_chunk_str = getClientMetaVar(grpc_context, "records_per_chunk", ""); - if (!records_per_chunk_str.empty()) - { - context.setSetting("records_per_chunk", records_per_chunk_str); - } - std::string builder_version = getClientMetaVar(grpc_context, "builder_version", "v1"); - context.setSetting("coprocessor_plan_builder_version", builder_version); - return ::grpc::Status::OK; -} + client_info.current_user = getClientMetaVarWithDefault(grpc_context, "user", ""); -grpc::Status FlashService::Coprocessor( - grpc::ServerContext * grpc_context, const coprocessor::Request * request, coprocessor::Response * response) -{ - LOG_DEBUG(log, "receive coprocessor request"); - LOG_DEBUG(log, request->DebugString()); - Context context = server.context(); - context.setGlobalContext(server.context()); - setClientInfo(grpc_context, context); - try - { - CoprocessorContext cop_context(context, request->context(), *grpc_context); - CoprocessorHandler cop_handler(cop_context, request, response); - if (cop_handler.execute()) - { - LOG_DEBUG(log, "Flash service Coprocessor finished"); - return ::grpc::Status(::grpc::StatusCode::OK, ""); - } - else - { - LOG_ERROR(log, "Flash service Coprocessor meet internal error"); - return ::grpc::Status(::grpc::StatusCode::INTERNAL, ""); - } - } - catch (LockException & e) - { - //todo set lock error info - LOG_ERROR(log, "meet lock exception"); - // clear the data to avoid sending partial data - response->set_data(""); - } - catch (RegionException & e) + /// Set DAG parameters. + std::string dag_records_per_chunk_str = getClientMetaVarWithDefault(grpc_context, "dag_records_per_chunk", ""); + if (!dag_records_per_chunk_str.empty()) { - // todo set region error info - LOG_ERROR(log, "meet region exception"); - response->set_data(""); - } - catch (Exception & e) - { - // todo return exception message - LOG_ERROR(log, "meet unknown exception, errmsg: " + e.message()); - response->set_data(""); + context.setSetting("dag_records_per_chunk", dag_records_per_chunk_str); } - catch (...) - { - LOG_ERROR(log, "meet unknown exception"); - response->set_data(""); - } - return ::grpc::Status(::grpc::StatusCode::INTERNAL, ""); + std::string planner = getClientMetaVarWithDefault(grpc_context, "dag_planner", "sql"); + context.setSetting("dag_planner", planner); + + return std::make_tuple(context, ::grpc::Status::OK); } } // namespace DB diff --git a/dbms/src/Flash/FlashService.h b/dbms/src/Flash/FlashService.h index 8dcb31e2bde..8ab123cc1fb 100644 --- a/dbms/src/Flash/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -25,6 +26,9 @@ class FlashService final : public tikvpb::Tikv::Service, public std::enable_shar grpc::Status Coprocessor(grpc::ServerContext * context, const coprocessor::Request * request, coprocessor::Response * response); +private: + std::tuple createDBContext(grpc::ServerContext * grpc_contex); + private: IServer & server; diff --git a/dbms/src/Interpreters/DAGQuerySource.h b/dbms/src/Interpreters/DAGQuerySource.h index f7225ce46d7..e162f5f1e87 100644 --- a/dbms/src/Interpreters/DAGQuerySource.h +++ b/dbms/src/Interpreters/DAGQuerySource.h @@ -5,19 +5,16 @@ #include #pragma GCC diagnostic pop -#include #include -#include #include - namespace DB { class Context; -/** DAGQueryInfo for query represented by DAG request. - */ +/// Query source of a DAG request via gRPC. +/// This is also an IR of a DAG. class DAGQuerySource : public IQuerySource { public: @@ -30,9 +27,9 @@ class DAGQuerySource : public IQuerySource DAGQuerySource(Context & context_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, const tipb::DAGRequest & dag_request_); - virtual std::tuple parse(size_t max_query_size) override; - virtual String str(size_t max_query_size) override; - virtual std::unique_ptr interpreter(Context & context, QueryProcessingStage::Enum stage) override; + std::tuple parse(size_t max_query_size) override; + String str(size_t max_query_size) override; + std::unique_ptr interpreter(Context & context, QueryProcessingStage::Enum stage) override; void assertValid(Int32 index, const String & name) { diff --git a/dbms/src/Interpreters/IQuerySource.h b/dbms/src/Interpreters/IQuerySource.h index b23290231d5..0738f47f836 100644 --- a/dbms/src/Interpreters/IQuerySource.h +++ b/dbms/src/Interpreters/IQuerySource.h @@ -1,6 +1,5 @@ #pragma once - #include #include #include @@ -8,8 +7,7 @@ namespace DB { -/** IQueryInfo interface for different source of queries. - */ +/// A tiny abstraction of different sources a query comes from, i.e. SQL string or DAG request. class IQuerySource { public: diff --git a/dbms/src/Interpreters/SQLQuerySource.h b/dbms/src/Interpreters/SQLQuerySource.h index 64a56b30f02..cdffe438c66 100644 --- a/dbms/src/Interpreters/SQLQuerySource.h +++ b/dbms/src/Interpreters/SQLQuerySource.h @@ -1,20 +1,16 @@ #pragma once -#include -#include #include -#include - namespace DB { -/** StringQueryInfo for query represented by string. - */ +/// Regular query source of a SQL string. class SQLQuerySource : public IQuerySource { public: SQLQuerySource(const char * begin_, const char * end_); + std::tuple parse(size_t max_query_size) override; String str(size_t max_query_size) override; std::unique_ptr interpreter(Context & context, QueryProcessingStage::Enum stage) override; diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 4c967747808..2721fa5bacc 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -29,8 +29,8 @@ struct Settings M(SettingString, regions, "", "the region need to be read.") \ M(SettingBool, resolve_locks, false, "tmt read tso.") \ M(SettingUInt64, read_tso, DEFAULT_MAX_READ_TSO, "tmt read tso.") \ - M(SettingInt64, records_per_chunk, DEFAULT_RECORDS_PER_CHUNK, "default chunk size for coprocessor.") \ - M(SettingString, coprocessor_plan_builder_version, "v1", "how to build ch plan in coprocessor handler, v1 means build the plan based on string, v2 means build the plan based on cop executor") \ + M(SettingInt64, dag_records_per_chunk, DEFAULT_DAG_RECORDS_PER_CHUNK, "default chunk size of a DAG response.") \ + M(SettingString, dag_planner, "sql", "planner for DAG query, sql builds the SQL string, optree builds the internal operator(stream) tree.") \ M(SettingUInt64, min_compress_block_size, DEFAULT_MIN_COMPRESS_BLOCK_SIZE, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.") \ M(SettingUInt64, max_compress_block_size, DEFAULT_MAX_COMPRESS_BLOCK_SIZE, "The maximum size of blocks of uncompressed data before compressing for writing to a table.") \ M(SettingUInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading") \ From 3617a87f3f31d9e834a225576c8c2ee913779165 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 5 Aug 2019 17:19:46 +0800 Subject: [PATCH 15/79] basic support for dag agg executor --- dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 25 ++ .../Interpreters/DAGExpressionAnalyzer.cpp | 227 ++++++++++++++---- dbms/src/Interpreters/DAGExpressionAnalyzer.h | 12 +- dbms/src/Interpreters/DAGQuerySource.h | 20 +- dbms/src/Interpreters/DAGUtils.cpp | 12 +- dbms/src/Interpreters/DAGUtils.h | 1 + dbms/src/Interpreters/InterpreterDAG.cpp | 114 +++++++-- dbms/src/Interpreters/InterpreterDAG.h | 6 + 8 files changed, 342 insertions(+), 75 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp index 0aea9e6bc46..79e65a34349 100644 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp @@ -67,6 +67,7 @@ grpc::Status rpcTest() // construct a dag request tipb::DAGRequest dagRequest; dagRequest.set_start_ts(18446744073709551615uL); + // table scan: s,i tipb::Executor * executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeTableScan); tipb::TableScan * ts = executor->mutable_tbl_scan(); @@ -78,6 +79,8 @@ grpc::Status rpcTest() dagRequest.add_output_offsets(1); dagRequest.add_output_offsets(0); dagRequest.add_output_offsets(1); + + // selection: less(i, 123) executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeSelection); tipb::Selection * selection = executor->mutable_selection(); @@ -95,6 +98,28 @@ grpc::Status rpcTest() DB::EncodeNumber(123, ss); value->set_val(std::string(ss.str())); + // agg: count(s) group by i; + /* + executor = dagRequest.add_executors(); + executor->set_tp(tipb::ExecType::TypeAggregation); + auto agg = executor->mutable_aggregation(); + auto agg_func = agg->add_agg_func(); + agg_func->set_tp(tipb::ExprType::Count); + auto child = agg_func->add_children(); + child->set_tp(tipb::ExprType::ColumnRef); + ss.str(""); + DB::EncodeNumber(1, ss); + child->set_val(ss.str()); + auto type = agg_func->mutable_field_type(); + type->set_tp(3); + type->set_flag(33); + auto group_col = agg->add_group_by(); + group_col->set_tp(tipb::ExprType::ColumnRef); + ss.str(""); + DB::EncodeNumber(2,ss); + group_col->set_val(ss.str()); + */ + // topn executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeTopN); diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp index 720f11d85ea..ea824eb2a4d 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp @@ -1,4 +1,5 @@ +#include #include #include #include @@ -10,10 +11,90 @@ namespace DB { +static String genCastString(const String & org_name, const String & target_type_name) +{ + return "cast(" + org_name + ", " + target_type_name + ") "; +} + +static String genFuncString(const String & func_name, const Names & argument_names) +{ + std::stringstream ss; + ss << func_name << "("; + bool first = true; + for (const String & argument_name : argument_names) + { + if (first) + { + first = false; + } + else + { + ss << ", "; + } + ss << argument_name; + } + ss << ") "; + return ss.str(); +} + DAGExpressionAnalyzer::DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_) : source_columns(source_columns_), context(context_) { settings = context.getSettings(); + after_agg = false; +} + +bool DAGExpressionAnalyzer::appendAggregation( + ExpressionActionsChain & chain, const tipb::Aggregation & agg, Names & aggregation_keys, AggregateDescriptions & aggregate_descriptions) +{ + if (agg.group_by_size() == 0 && agg.agg_func_size() == 0) + { + //should not reach here + return false; + } + initChain(chain, getCurrentInputColumns()); + ExpressionActionsChain::Step & step = chain.steps.back(); + + Names agg_argument_names; + for (const tipb::Expr & expr : agg.agg_func()) + { + const String & agg_func_name = getAggFunctionName(expr); + AggregateDescription aggregate; + DataTypes types(expr.children_size()); + aggregate.argument_names.resize(expr.children_size()); + for (Int32 i = 0; i < expr.children_size(); i++) + { + String arg_name = getActions(expr.children(i), step.actions); + agg_argument_names.push_back(arg_name); + types[i] = step.actions->getSampleBlock().getByName(arg_name).type; + aggregate.argument_names[i] = arg_name; + } + String func_string = genFuncString(agg_func_name, agg_argument_names); + aggregate.column_name = func_string; + //todo de-duplicate aggregation column + aggregate.parameters = Array(); + aggregate.function = AggregateFunctionFactory::instance().get(agg_func_name, types); + aggregate_descriptions.push_back(aggregate); + DataTypePtr result_type = aggregate.function->getReturnType(); + // this is a temp result since implicit cast maybe added on these aggregated_columns + aggregated_columns.emplace_back(func_string, result_type); + } + + for (auto name : agg_argument_names) + { + step.required_output.push_back(std::move(name)); + } + + for (const tipb::Expr & expr : agg.group_by()) + { + String name = getActions(expr, step.actions); + step.required_output.push_back(name); + // this is a temp result since implicit cast maybe added on these aggregated_columns + aggregated_columns.emplace_back(name, step.actions->getSampleBlock().getByName(name).type); + aggregation_keys.push_back(name); + } + after_agg = true; + return true; } bool DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name) @@ -36,7 +117,7 @@ bool DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const ti } const tipb::Expr & filter = sel.conditions_size() > 1 ? final_condition : sel.conditions(0); - initChain(chain, source_columns); + initChain(chain, getCurrentInputColumns()); filter_column_name = getActions(filter, chain.steps.back().actions); chain.steps.back().required_output.push_back(filter_column_name); return true; @@ -48,7 +129,7 @@ bool DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const { return false; } - initChain(chain, aggregated_columns); + initChain(chain, getCurrentInputColumns()); ExpressionActionsChain::Step & step = chain.steps.back(); for (const tipb::ByItem & byItem : topN.order_by()) { @@ -59,7 +140,99 @@ bool DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const return true; } -const NamesAndTypesList & DAGExpressionAnalyzer::getCurrentInputColumns() { return source_columns; } +const NamesAndTypesList & DAGExpressionAnalyzer::getCurrentInputColumns() { return after_agg ? aggregated_columns : source_columns; } + +bool DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & aggregation) +{ + initChain(chain, getCurrentInputColumns()); + bool need_update_aggregated_columns = false; + NamesAndTypesList updated_aggregated_columns; + ExpressionActionsChain::Step step = chain.steps.back(); + for (Int32 i = 0; i < aggregation.agg_func_size(); i++) + { + String & name = aggregated_columns.getNames()[i]; + String updated_name = appendCastIfNeeded(aggregation.agg_func(i), step.actions, name); + if (name != updated_name) + { + need_update_aggregated_columns = true; + DataTypePtr type = step.actions->getSampleBlock().getByName(updated_name).type; + updated_aggregated_columns.emplace_back(updated_name, type); + step.required_output.push_back(updated_name); + } + else + { + updated_aggregated_columns.emplace_back(name, aggregated_columns.getTypes()[i]); + step.required_output.push_back(name); + } + } + for (Int32 i = 0; i < aggregation.group_by_size(); i++) + { + String & name = aggregated_columns.getNames()[i + aggregation.agg_func_size()]; + String updated_name = appendCastIfNeeded(aggregation.group_by(i), step.actions, name); + if (name != updated_name) + { + need_update_aggregated_columns = true; + DataTypePtr type = step.actions->getSampleBlock().getByName(updated_name).type; + updated_aggregated_columns.emplace_back(updated_name, type); + step.required_output.push_back(updated_name); + } + else + { + updated_aggregated_columns.emplace_back(name, aggregated_columns.getTypes()[i]); + step.required_output.push_back(name); + } + } + + if (need_update_aggregated_columns) + { + aggregated_columns.clear(); + for (size_t i = 0; i < updated_aggregated_columns.size(); i++) + { + aggregated_columns.emplace_back(updated_aggregated_columns.getNames()[i], updated_aggregated_columns.getTypes()[i]); + } + } + return true; +} + +String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String expr_name) +{ + if (expr.has_field_type() && isFunctionExpr(expr)) + { + DataTypePtr expected_type = getDataTypeByFieldType(expr.field_type()); + DataTypePtr actual_type = actions->getSampleBlock().getByName(expr_name).type; + //todo maybe use a more decent compare method + if (expected_type->getName() != actual_type->getName()) + { + // need to add cast function + // first construct the second argument + tipb::Expr type_expr; + type_expr.set_tp(tipb::ExprType::String); + std::stringstream ss; + EncodeCompactBytes(expected_type->getName(), ss); + type_expr.set_val(ss.str()); + auto type_field_type = type_expr.field_type(); + type_field_type.set_tp(0xfe); + type_field_type.set_flag(1); + String name = getActions(type_expr, actions); + String cast_name = "CAST"; + const FunctionBuilderPtr & cast_func_builder = FunctionFactory::instance().get(cast_name, context); + String cast_expr_name = genCastString(expr_name, getName(type_expr, getCurrentInputColumns())); + + Names cast_argument_names; + cast_argument_names.push_back(expr_name); + cast_argument_names.push_back(getName(type_expr, getCurrentInputColumns())); + const ExpressionAction & apply_cast_function + = ExpressionAction::applyFunction(cast_func_builder, cast_argument_names, cast_expr_name); + actions->add(apply_cast_function); + return cast_expr_name; + } + else + { + return expr_name; + } + } + return expr_name; +} String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions) { @@ -121,47 +294,19 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi } } + // re-construct expr_name, because expr_name generated previously is based on expr tree, + // but for function call, it's argument name may be changed as an implicit cast func maybe + // inserted(refer to the logic below), so we need to update the expr_name + // for example, for a expr and(arg1, arg2), the expr_name is and(arg1_name,arg2_name), but + // if the arg1 need to be casted to the type passed by dag request, then the expr_name + // should be updated to and(casted_arg1_name, arg2_name) + expr_name = genFuncString(func_name, argument_names); + const ExpressionAction & applyFunction = ExpressionAction::applyFunction(function_builder, argument_names, expr_name); actions->add(applyFunction); // add cast if needed - if (expr.has_field_type()) - { - DataTypePtr expected_type = getDataTypeByFieldType(expr.field_type()); - DataTypePtr actual_type = applyFunction.result_type; - //todo maybe use a more decent compare method - if (expected_type->getName() != actual_type->getName()) - { - // need to add cast function - // first construct the second argument - tipb::Expr type_expr; - type_expr.set_tp(tipb::ExprType::String); - std::stringstream ss; - EncodeCompactBytes(expected_type->getName(), ss); - type_expr.set_val(ss.str()); - auto type_field_type = type_expr.field_type(); - type_field_type.set_tp(0xfe); - type_field_type.set_flag(1); - String name = getActions(type_expr, actions); - String cast_name = "cast"; - const FunctionBuilderPtr & cast_func_builder = FunctionFactory::instance().get(cast_name, context); - String cast_expr_name = cast_name + "_" + expr_name + "_" + getName(type_expr, getCurrentInputColumns()); - Names cast_argument_names; - cast_argument_names.push_back(expr_name); - cast_argument_names.push_back(getName(type_expr, getCurrentInputColumns())); - const ExpressionAction & apply_cast_function - = ExpressionAction::applyFunction(cast_func_builder, argument_names, cast_expr_name); - actions->add(apply_cast_function); - return cast_expr_name; - } - else - { - return expr_name; - } - } - else - { - return expr_name; - } + expr_name = appendCastIfNeeded(expr, actions, expr_name); + return expr_name; } else { diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.h b/dbms/src/Interpreters/DAGExpressionAnalyzer.h index 40e31540b61..960bdcd4bd4 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.h +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.h @@ -1,8 +1,13 @@ #pragma once +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop + +#include #include #include -#include namespace DB { @@ -20,11 +25,16 @@ class DAGExpressionAnalyzer : private boost::noncopyable NamesAndTypesList aggregated_columns; Settings settings; const Context & context; + bool after_agg; public: DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_); bool appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name); bool appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names); + bool appendAggregation(ExpressionActionsChain & chain, const tipb::Aggregation & agg, Names & aggregate_keys, + AggregateDescriptions & aggregate_descriptions); + bool appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & agg); + String appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String expr_name); void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const { if (chain.steps.empty()) diff --git a/dbms/src/Interpreters/DAGQuerySource.h b/dbms/src/Interpreters/DAGQuerySource.h index e162f5f1e87..f5c16292196 100644 --- a/dbms/src/Interpreters/DAGQuerySource.h +++ b/dbms/src/Interpreters/DAGQuerySource.h @@ -43,37 +43,37 @@ class DAGQuerySource : public IQuerySource UInt64 getRegionVersion() const { return region_version; } UInt64 getRegionConfVersion() const { return region_conf_version; } - bool has_selection() { return sel_index != -1; }; - bool has_aggregation() { return agg_index != -1; }; - bool has_topN() { return order_index != -1; }; - bool has_limit() { return order_index == -1 && limit_index != -1; }; + bool hasSelection() { return sel_index != -1; }; + bool hasAggregation() { return agg_index != -1; }; + bool hasTopN() { return order_index != -1; }; + bool hasLimit() { return order_index == -1 && limit_index != -1; }; - const tipb::TableScan & get_ts() + const tipb::TableScan & getTS() { assertValid(ts_index, TS_NAME); return dag_request.executors(ts_index).tbl_scan(); }; - const tipb::Selection & get_sel() + const tipb::Selection & getSelection() { assertValid(sel_index, SEL_NAME); return dag_request.executors(sel_index).selection(); }; - const tipb::Aggregation & get_agg() + const tipb::Aggregation & getAggregation() { assertValid(agg_index, AGG_NAME); return dag_request.executors(agg_index).aggregation(); }; - const tipb::TopN & get_topN() + const tipb::TopN & getTopN() { assertValid(order_index, TOPN_NAME); return dag_request.executors(order_index).topn(); }; - const tipb::Limit & get_limit() + const tipb::Limit & getLimit() { assertValid(limit_index, LIMIT_NAME); return dag_request.executors(limit_index).limit(); }; - const tipb::DAGRequest & get_dag_request() { return dag_request; }; + const tipb::DAGRequest & getDAGRequest() { return dag_request; }; protected: Context & context; diff --git a/dbms/src/Interpreters/DAGUtils.cpp b/dbms/src/Interpreters/DAGUtils.cpp index 4bc3b2df207..0cfa906cc02 100644 --- a/dbms/src/Interpreters/DAGUtils.cpp +++ b/dbms/src/Interpreters/DAGUtils.cpp @@ -39,6 +39,15 @@ bool isFunctionExpr(const tipb::Expr & expr) } } +const String & getAggFunctionName(const tipb::Expr & expr) +{ + if (!aggFunMap.count(expr.tp())) + { + throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported."); + } + return aggFunMap[expr.tp()]; +} + const String & getFunctionName(const tipb::Expr & expr) { if (isAggFunctionExpr(expr)) @@ -78,8 +87,7 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col case tipb::ExprType::Float64: return std::to_string(DecodeFloat64(cursor, expr.val())); case tipb::ExprType::String: - // - return expr.val(); + return DecodeCompactBytes(cursor, expr.val()); case tipb::ExprType::Bytes: return DecodeBytes(cursor, expr.val()); case tipb::ExprType::ColumnRef: diff --git a/dbms/src/Interpreters/DAGUtils.h b/dbms/src/Interpreters/DAGUtils.h index 22cf460141b..1048cf8375d 100644 --- a/dbms/src/Interpreters/DAGUtils.h +++ b/dbms/src/Interpreters/DAGUtils.h @@ -17,6 +17,7 @@ Field decodeLiteral(const tipb::Expr & expr); bool isFunctionExpr(const tipb::Expr & expr); bool isAggFunctionExpr(const tipb::Expr & expr); const String & getFunctionName(const tipb::Expr & expr); +const String & getAggFunctionName(const tipb::Expr & expr); bool isColumnExpr(const tipb::Expr & expr); ColumnID getColumnID(const tipb::Expr & expr); String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns); diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index 55009e12777..22cd57aa831 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -1,10 +1,14 @@ +#include #include +#include #include #include #include #include +#include #include #include +#include #include #include #include @@ -64,7 +68,6 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) return false; } String name = merge_tree->getTableInfo().columns[cid - 1].name; - //todo handle output_offset required_columns.push_back(name); } if (required_columns.empty()) @@ -73,11 +76,11 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) return false; } - if (!dag_query_src.has_aggregation()) + if (!dag_query_src.hasAggregation()) { // if the dag request does not contain agg, then the final output is // based on the output of table scan - for (auto i : dag_query_src.get_dag_request().output_offsets()) + for (auto i : dag_query_src.getDAGRequest().output_offsets()) { if (i < 0 || i >= required_columns.size()) { @@ -163,11 +166,11 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() { AnalysisResult res; ExpressionActionsChain chain; - res.need_aggregate = dag_query_src.has_aggregation(); + res.need_aggregate = dag_query_src.hasAggregation(); DAGExpressionAnalyzer expressionAnalyzer(source_columns, context); - if (dag_query_src.has_selection()) + if (dag_query_src.hasSelection()) { - if (expressionAnalyzer.appendWhere(chain, dag_query_src.get_sel(), res.filter_column_name)) + if (expressionAnalyzer.appendWhere(chain, dag_query_src.getSelection(), res.filter_column_name)) { res.has_where = true; res.before_where = chain.getLastActions(); @@ -177,11 +180,24 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() } if (res.need_aggregate) { - throw Exception("agg not supported"); + res.need_aggregate + = expressionAnalyzer.appendAggregation(chain, dag_query_src.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); + res.before_aggregation = chain.getLastActions(); + + chain.finalize(); + chain.clear(); + + // add cast if type is not match + expressionAnalyzer.appendAggSelect(chain, dag_query_src.getAggregation()); + //todo use output_offset to pruner the final project columns + for (auto element : expressionAnalyzer.getCurrentInputColumns()) + { + final_project.emplace_back(element.name, ""); + } } - if (dag_query_src.has_topN()) + if (dag_query_src.hasTopN()) { - res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag_query_src.get_topN(), res.order_column_names); + res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag_query_src.getTopN(), res.order_column_names); } // append final project results for (auto & name : final_project) @@ -201,9 +217,68 @@ void InterpreterDAG::executeWhere(Pipeline & pipeline, const ExpressionActionsPt [&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr, filter_column); }); } +void InterpreterDAG::executeAggregation( + Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, Names & key_names, AggregateDescriptions & aggregates) +{ + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr); }); + + Block header = pipeline.firstStream()->getHeader(); + ColumnNumbers keys; + for (const auto & name : key_names) + { + keys.push_back(header.getPositionByName(name)); + } + for (auto & descr : aggregates) + { + if (descr.arguments.empty()) + { + for (const auto & name : descr.argument_names) + { + descr.arguments.push_back(header.getPositionByName(name)); + } + } + } + + const Settings & settings = context.getSettingsRef(); + + /** Two-level aggregation is useful in two cases: + * 1. Parallel aggregation is done, and the results should be merged in parallel. + * 2. An aggregation is done with store of temporary data on the disk, and they need to be merged in a memory efficient way. + */ + bool allow_to_use_two_level_group_by = pipeline.streams.size() > 1 || settings.max_bytes_before_external_group_by != 0; + + Aggregator::Params params(header, keys, aggregates, false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, + settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile, + allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), + allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), + settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, context.getTemporaryPath()); + + /// If there are several sources, then we perform parallel aggregation + if (pipeline.streams.size() > 1) + { + pipeline.firstStream() = std::make_shared(pipeline.streams, nullptr, params, true, max_streams, + settings.aggregation_memory_efficient_merge_threads ? static_cast(settings.aggregation_memory_efficient_merge_threads) + : static_cast(settings.max_threads)); + + pipeline.streams.resize(1); + } + else + { + BlockInputStreams inputs; + if (!pipeline.streams.empty()) + inputs.push_back(pipeline.firstStream()); + else + pipeline.streams.resize(1); + + pipeline.firstStream() + = std::make_shared(std::make_shared(inputs), params, true); + } + // add cast +} + void InterpreterDAG::executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr) { - if (expressionActionsPtr->getActions().size() > 0) + if (!expressionActionsPtr->getActions().empty()) { pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr); }); } @@ -213,7 +288,7 @@ SortDescription InterpreterDAG::getSortDescription(Strings & order_column_names) { // construct SortDescription SortDescription order_descr; - const tipb::TopN & topN = dag_query_src.get_topN(); + const tipb::TopN & topN = dag_query_src.getTopN(); order_descr.reserve(topN.order_by_size()); for (int i = 0; i < topN.order_by_size(); i++) { @@ -244,7 +319,7 @@ void InterpreterDAG::executeOrder(Pipeline & pipeline, Strings & order_column_na { SortDescription order_descr = getSortDescription(order_column_names); const Settings & settings = context.getSettingsRef(); - Int64 limit = dag_query_src.get_topN().limit(); + Int64 limit = dag_query_src.getTopN().limit(); pipeline.transform([&](auto & stream) { auto sorting_stream = std::make_shared(stream, order_descr, limit); @@ -269,7 +344,7 @@ void InterpreterDAG::executeOrder(Pipeline & pipeline, Strings & order_column_na //todo return the error message bool InterpreterDAG::executeImpl(Pipeline & pipeline) { - if (!executeTS(dag_query_src.get_ts(), pipeline)) + if (!executeTS(dag_query_src.getTS(), pipeline)) { return false; } @@ -283,12 +358,9 @@ bool InterpreterDAG::executeImpl(Pipeline & pipeline) if (res.need_aggregate) { // execute aggregation - throw Exception("agg not supported"); - } - else - { - executeExpression(pipeline, res.before_order_and_select); + executeAggregation(pipeline, res.before_aggregation, res.aggregation_keys, res.aggregate_descriptions); } + executeExpression(pipeline, res.before_order_and_select); if (res.has_order_by) { @@ -300,7 +372,7 @@ bool InterpreterDAG::executeImpl(Pipeline & pipeline) executeFinalProject(pipeline); // execute limit - if (dag_query_src.has_limit() && !dag_query_src.has_topN()) + if (dag_query_src.hasLimit() && !dag_query_src.hasTopN()) { executeLimit(pipeline); } @@ -324,12 +396,12 @@ void InterpreterDAG::executeFinalProject(Pipeline & pipeline) void InterpreterDAG::executeLimit(Pipeline & pipeline) { pipeline.transform( - [&](auto & stream) { stream = std::make_shared(stream, dag_query_src.get_limit().limit(), 0, false); }); + [&](auto & stream) { stream = std::make_shared(stream, dag_query_src.getLimit().limit(), 0, false); }); if (pipeline.hasMoreThanOneStream()) { executeUnion(pipeline); pipeline.transform( - [&](auto & stream) { stream = std::make_shared(stream, dag_query_src.get_limit().limit(), 0, false); }); + [&](auto & stream) { stream = std::make_shared(stream, dag_query_src.getLimit().limit(), 0, false); }); } } diff --git a/dbms/src/Interpreters/InterpreterDAG.h b/dbms/src/Interpreters/InterpreterDAG.h index 1b0ff8c02db..c0302368945 100644 --- a/dbms/src/Interpreters/InterpreterDAG.h +++ b/dbms/src/Interpreters/InterpreterDAG.h @@ -7,6 +7,7 @@ #pragma GCC diagnostic pop #include +#include #include #include #include @@ -69,6 +70,9 @@ class InterpreterDAG : public IInterpreter Strings order_column_names; /// Columns from the SELECT list, before renaming them to aliases. Names selected_columns; + + Names aggregation_keys; + AggregateDescriptions aggregate_descriptions; }; bool executeImpl(Pipeline & pipeline); @@ -78,6 +82,8 @@ class InterpreterDAG : public IInterpreter void executeOrder(Pipeline & pipeline, Strings & order_column_names); void executeUnion(Pipeline & pipeline); void executeLimit(Pipeline & pipeline); + void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, Names & aggregation_keys, + AggregateDescriptions & aggregate_descriptions); void executeFinalProject(Pipeline & pipeline); SortDescription getSortDescription(Strings & order_column_names); AnalysisResult analyzeExpressions(); From cb55df474dd3014fbbd2f82416457f68c1870e43 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Mon, 5 Aug 2019 17:37:11 +0800 Subject: [PATCH 16/79] Code refine --- contrib/tipb | 2 +- dbms/src/Interpreters/InterpreterDAG.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/tipb b/contrib/tipb index 3a69b884cc9..b2d318af5e8 160000 --- a/contrib/tipb +++ b/contrib/tipb @@ -1 +1 @@ -Subproject commit 3a69b884cc9793da55d7d4ef38dc79459d17583f +Subproject commit b2d318af5e8af28f54a2c6422bc18631f65a8506 diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index 22cd57aa831..e7b5aace1f6 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -42,6 +42,7 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) TableID id = ts.table_id(); auto & tmt_ctx = context.getTMTContext(); auto storage = tmt_ctx.getStorages().get(id); + // TODO: Using new get storage in DDL branch. if (storage == nullptr) { tmt_ctx.getSchemaSyncer()->syncSchema(id, context, false); From 08b7142f8b0c32fb752bf2b74dd2095afa4b96e2 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Mon, 5 Aug 2019 22:22:01 +0800 Subject: [PATCH 17/79] Refine code --- dbms/src/DataStreams/DAGBlockOutputStream.h | 1 - dbms/src/Flash/Coprocessor/DAGDriver.cpp | 33 ++-- dbms/src/Flash/Coprocessor/DAGDriver.h | 4 +- dbms/src/Interpreters/InterpreterDAG.cpp | 155 +++++++++++++----- dbms/src/Interpreters/InterpreterDAG.h | 29 +++- dbms/src/Interpreters/executeQuery.cpp | 7 +- dbms/src/Interpreters/executeQuery.h | 10 +- dbms/src/Storages/Transaction/TypeMapping.cpp | 16 -- dbms/src/Storages/Transaction/TypeMapping.h | 3 - 9 files changed, 151 insertions(+), 107 deletions(-) diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.h b/dbms/src/DataStreams/DAGBlockOutputStream.h index e14fbdc929a..8e21161466e 100644 --- a/dbms/src/DataStreams/DAGBlockOutputStream.h +++ b/dbms/src/DataStreams/DAGBlockOutputStream.h @@ -6,7 +6,6 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include - #pragma GCC diagnostic pop namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 7fbf2408743..4454d6fdbd8 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -28,38 +29,34 @@ DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, void DAGDriver::execute() { context.setSetting("read_tso", UInt64(dag_request.start_ts())); - BlockIO streams = executeDAG(); - if (!streams.in || streams.out) - { - // Only query is allowed, so streams.in must not be null and streams.out must be null - throw Exception("DAG is not query.", ErrorCodes::LOGICAL_ERROR); - } - BlockOutputStreamPtr outputStreamPtr = std::make_shared( - dag_response, context.getSettings().dag_records_per_chunk, dag_request.encode_type(), streams.in->getHeader()); - copyData(*streams.in, *outputStreamPtr); -} -BlockIO DAGDriver::executeDAG() -{ + DAGQuerySource dag(context, region_id, region_version, region_conf_version, dag_request); + BlockIO streams; + String planner = context.getSettings().dag_planner; if (planner == "sql") { DAGStringConverter converter(context, dag_request); String query = converter.buildSqlString(); - if (query.empty()) - { - return BlockIO(); - } - return executeQuery(query, context, false, QueryProcessingStage::Complete); + if (!query.empty()) + streams = executeQuery(query, context, false, QueryProcessingStage::Complete); } else if (planner == "optree") { - return executeQuery(dag_request, region_id, region_version, region_conf_version, context, QueryProcessingStage::Complete); + streams = executeQuery(dag, context, QueryProcessingStage::Complete); } else { throw Exception("Unknown DAG planner type " + planner, ErrorCodes::LOGICAL_ERROR); } + + if (!streams.in || streams.out) + // Only query is allowed, so streams.in must not be null and streams.out must be null + throw Exception("DAG is not query.", ErrorCodes::LOGICAL_ERROR); + + BlockOutputStreamPtr outputStreamPtr = std::make_shared( + dag_response, context.getSettings().dag_records_per_chunk, dag_request.encode_type(), streams.in->getHeader()); + copyData(*streams.in, *outputStreamPtr); } } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.h b/dbms/src/Flash/Coprocessor/DAGDriver.h index 3dce5abeb31..b0143591bd5 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.h +++ b/dbms/src/Flash/Coprocessor/DAGDriver.h @@ -16,10 +16,8 @@ class DAGDriver public: DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, tipb::SelectResponse & dag_response_); - void execute(); -private: - BlockIO executeDAG(); + void execute(); private: Context & context; diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index e7b5aace1f6..1c41f12934e 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -26,10 +26,15 @@ namespace DB namespace ErrorCodes { +extern const int UNKNOWN_TABLE; extern const int TOO_MANY_COLUMNS; -} +extern const int SCHEMA_VERSION_ERROR; +extern const int UNKNOWN_EXCEPTION; +} // namespace ErrorCodes -InterpreterDAG::InterpreterDAG(Context & context_, DAGQuerySource & dag_query_src_) : context(context_), dag_query_src(dag_query_src_) {} +InterpreterDAG::InterpreterDAG(Context & context_, DAGQuerySource & dag_) + : context(context_), dag(dag_), log(&Logger::get("InterpreterDAG")) +{} // the flow is the same as executeFetchcolumns bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) @@ -39,36 +44,20 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) // do not have table id return false; } - TableID id = ts.table_id(); - auto & tmt_ctx = context.getTMTContext(); - auto storage = tmt_ctx.getStorages().get(id); - // TODO: Using new get storage in DDL branch. - if (storage == nullptr) - { - tmt_ctx.getSchemaSyncer()->syncSchema(id, context, false); - storage = tmt_ctx.getStorages().get(id); - } - if (storage == nullptr) - { - return false; - } - auto table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); - const auto * merge_tree = dynamic_cast(storage.get()); - if (!merge_tree) - { - return false; - } + TableID table_id = ts.table_id(); + // TODO: Get schema version from DAG request. + getAndLockStorageWithSchemaVersion(table_id, DEFAULT_UNSPECIFIED_SCHEMA_VERSION); Names required_columns; for (const tipb::ColumnInfo & ci : ts.columns()) { ColumnID cid = ci.column_id(); - if (cid < 1 || cid > (Int64)merge_tree->getTableInfo().columns.size()) + if (cid < 1 || cid > (Int64)storage->getTableInfo().columns.size()) { // cid out of bound return false; } - String name = merge_tree->getTableInfo().columns[cid - 1].name; + String name = storage->getTableInfo().columns[cid - 1].name; required_columns.push_back(name); } if (required_columns.empty()) @@ -77,11 +66,11 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) return false; } - if (!dag_query_src.hasAggregation()) + if (!dag.hasAggregation()) { // if the dag request does not contain agg, then the final output is // based on the output of table scan - for (auto i : dag_query_src.getDAGRequest().output_offsets()) + for (auto i : dag.getDAGRequest().output_offsets()) { if (i < 0 || i >= required_columns.size()) { @@ -118,15 +107,15 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) query_info.mvcc_query_info->resolve_locks = true; query_info.mvcc_query_info->read_tso = settings.read_tso; RegionQueryInfo info; - info.region_id = dag_query_src.getRegionID(); - info.version = dag_query_src.getRegionVersion(); - info.conf_version = dag_query_src.getRegionConfVersion(); - auto current_region = context.getTMTContext().getRegionTable().getRegionById(id, info.region_id); + info.region_id = dag.getRegionID(); + info.version = dag.getRegionVersion(); + info.conf_version = dag.getRegionConfVersion(); + auto current_region = context.getTMTContext().getRegionTable().getRegionById(table_id, info.region_id); if (!current_region) { return false; } - info.range_in_table = current_region->getHandleRangeByTable(id); + info.range_in_table = current_region->getHandleRangeByTable(table_id); query_info.mvcc_query_info->regions_query_info.push_back(info); query_info.mvcc_query_info->concurrent = 0.0; pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); @@ -167,11 +156,11 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() { AnalysisResult res; ExpressionActionsChain chain; - res.need_aggregate = dag_query_src.hasAggregation(); + res.need_aggregate = dag.hasAggregation(); DAGExpressionAnalyzer expressionAnalyzer(source_columns, context); - if (dag_query_src.hasSelection()) + if (dag.hasSelection()) { - if (expressionAnalyzer.appendWhere(chain, dag_query_src.getSelection(), res.filter_column_name)) + if (expressionAnalyzer.appendWhere(chain, dag.getSelection(), res.filter_column_name)) { res.has_where = true; res.before_where = chain.getLastActions(); @@ -182,23 +171,23 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() if (res.need_aggregate) { res.need_aggregate - = expressionAnalyzer.appendAggregation(chain, dag_query_src.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); + = expressionAnalyzer.appendAggregation(chain, dag.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); res.before_aggregation = chain.getLastActions(); chain.finalize(); chain.clear(); // add cast if type is not match - expressionAnalyzer.appendAggSelect(chain, dag_query_src.getAggregation()); + expressionAnalyzer.appendAggSelect(chain, dag.getAggregation()); //todo use output_offset to pruner the final project columns for (auto element : expressionAnalyzer.getCurrentInputColumns()) { final_project.emplace_back(element.name, ""); } } - if (dag_query_src.hasTopN()) + if (dag.hasTopN()) { - res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag_query_src.getTopN(), res.order_column_names); + res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag.getTopN(), res.order_column_names); } // append final project results for (auto & name : final_project) @@ -285,11 +274,90 @@ void InterpreterDAG::executeExpression(Pipeline & pipeline, const ExpressionActi } } +void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 schema_version) +{ + /// Lambda for get storage, then align schema version under the read lock. + auto get_and_lock_storage = [&](bool schema_synced) -> std::tuple { + /// Get storage in case it's dropped then re-created. + // If schema synced, call getTable without try, leading to exception on table not existing. + auto storage_ = context.getTMTContext().getStorages().get(table_id); + if (!storage_) + { + if (schema_synced) + throw Exception("Table " + std::to_string(table_id) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); + else + return std::make_tuple(nullptr, nullptr, DEFAULT_UNSPECIFIED_SCHEMA_VERSION, false); + } + + if (storage->getData().merging_params.mode != MergeTreeData::MergingParams::Txn) + throw Exception("Specifying schema_version for non-TMT storage: " + storage_->getName() + ", table: " + std::to_string(table_id) + + " is not allowed", + ErrorCodes::LOGICAL_ERROR); + + /// Lock storage. + auto lock = storage_->lockStructure(false, __PRETTY_FUNCTION__); + + /// Check schema version. + auto storage_schema_version = storage->getTableInfo().schema_version; + if (storage_schema_version > schema_version) + throw Exception("Table " + std::to_string(table_id) + " schema version " + std::to_string(storage_schema_version) + + " newer than query schema version " + std::to_string(schema_version), + ErrorCodes::SCHEMA_VERSION_ERROR); + + if ((schema_synced && storage_schema_version <= schema_version) || (!schema_synced && storage_schema_version == schema_version)) + return std::make_tuple(storage_, lock, storage_schema_version, true); + + return std::make_tuple(nullptr, nullptr, storage_schema_version, false); + }; + + /// Try get storage and lock once. + TMTStoragePtr storage_; + TableStructureReadLockPtr lock; + Int64 storage_schema_version; + bool ok; + { + std::tie(storage_, lock, storage_schema_version, ok) = get_and_lock_storage(false); + if (ok) + { + LOG_DEBUG(log, + __PRETTY_FUNCTION__ << " Table " << table_id << " schema version: " << storage_schema_version + << ", query schema version: " << schema_version << ", OK, no syncing required."); + storage = storage_; + table_lock = lock; + return; + } + } + + /// If first try failed, sync schema and try again. + { + LOG_DEBUG(log, + __PRETTY_FUNCTION__ << " Table " << table_id << " schema version: " << storage_schema_version + << ", query schema version: " << schema_version << ", not OK, syncing schemas."); + auto start_time = Clock::now(); + context.getTMTContext().getSchemaSyncer()->syncSchemas(context); + auto schema_sync_cost = std::chrono::duration_cast(Clock::now() - start_time).count(); + LOG_DEBUG(log, __PRETTY_FUNCTION__ << " Table " << table_id << " schema sync cost " << schema_sync_cost << "ms."); + + std::tie(storage_, lock, storage_schema_version, ok) = get_and_lock_storage(true); + if (ok) + { + LOG_DEBUG(log, + __PRETTY_FUNCTION__ << " Table " << table_id << " schema version: " << storage_schema_version + << ", query schema version: " << schema_version << ", OK after syncing."); + storage = storage_; + table_lock = lock; + return; + } + + throw Exception("Shouldn't reach here", ErrorCodes::UNKNOWN_EXCEPTION); + } +} + SortDescription InterpreterDAG::getSortDescription(Strings & order_column_names) { // construct SortDescription SortDescription order_descr; - const tipb::TopN & topN = dag_query_src.getTopN(); + const tipb::TopN & topN = dag.getTopN(); order_descr.reserve(topN.order_by_size()); for (int i = 0; i < topN.order_by_size(); i++) { @@ -320,7 +388,7 @@ void InterpreterDAG::executeOrder(Pipeline & pipeline, Strings & order_column_na { SortDescription order_descr = getSortDescription(order_column_names); const Settings & settings = context.getSettingsRef(); - Int64 limit = dag_query_src.getTopN().limit(); + Int64 limit = dag.getTopN().limit(); pipeline.transform([&](auto & stream) { auto sorting_stream = std::make_shared(stream, order_descr, limit); @@ -345,7 +413,7 @@ void InterpreterDAG::executeOrder(Pipeline & pipeline, Strings & order_column_na //todo return the error message bool InterpreterDAG::executeImpl(Pipeline & pipeline) { - if (!executeTS(dag_query_src.getTS(), pipeline)) + if (!executeTS(dag.getTS(), pipeline)) { return false; } @@ -373,7 +441,7 @@ bool InterpreterDAG::executeImpl(Pipeline & pipeline) executeFinalProject(pipeline); // execute limit - if (dag_query_src.hasLimit() && !dag_query_src.hasTopN()) + if (dag.hasLimit() && !dag.hasTopN()) { executeLimit(pipeline); } @@ -396,13 +464,12 @@ void InterpreterDAG::executeFinalProject(Pipeline & pipeline) void InterpreterDAG::executeLimit(Pipeline & pipeline) { - pipeline.transform( - [&](auto & stream) { stream = std::make_shared(stream, dag_query_src.getLimit().limit(), 0, false); }); + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, dag.getLimit().limit(), 0, false); }); if (pipeline.hasMoreThanOneStream()) { executeUnion(pipeline); pipeline.transform( - [&](auto & stream) { stream = std::make_shared(stream, dag_query_src.getLimit().limit(), 0, false); }); + [&](auto & stream) { stream = std::make_shared(stream, dag.getLimit().limit(), 0, false); }); } } diff --git a/dbms/src/Interpreters/InterpreterDAG.h b/dbms/src/Interpreters/InterpreterDAG.h index c0302368945..9531c37b82f 100644 --- a/dbms/src/Interpreters/InterpreterDAG.h +++ b/dbms/src/Interpreters/InterpreterDAG.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -24,21 +25,13 @@ class Context; class InterpreterDAG : public IInterpreter { public: - InterpreterDAG(Context & context_, DAGQuerySource & dag_query_src_); + InterpreterDAG(Context & context_, DAGQuerySource & dag_); ~InterpreterDAG() = default; BlockIO execute(); private: - Context & context; - - DAGQuerySource & dag_query_src; - - NamesWithAliases final_project; - NamesAndTypesList source_columns; - size_t max_streams = 1; - struct Pipeline { BlockInputStreams streams; @@ -85,7 +78,25 @@ class InterpreterDAG : public IInterpreter void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, Names & aggregation_keys, AggregateDescriptions & aggregate_descriptions); void executeFinalProject(Pipeline & pipeline); + void getAndLockStorageWithSchemaVersion(TableID table_id, Int64 schema_version); SortDescription getSortDescription(Strings & order_column_names); AnalysisResult analyzeExpressions(); + +private: + Context & context; + + DAGQuerySource & dag; + + NamesWithAliases final_project; + NamesAndTypesList source_columns; + + /// How many streams we ask for storage to produce, and in how many threads we will do further processing. + size_t max_streams = 1; + + /// Table from where to read data, if not subquery. + TMTStoragePtr storage; + TableStructureReadLockPtr table_lock; + + Poco::Logger * log; }; } // namespace DB diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 8d58a8531fc..698da73c9b6 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -393,12 +392,10 @@ BlockIO executeQuery( } -BlockIO executeQuery(const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version, - Context & context, QueryProcessingStage::Enum stage) +BlockIO executeQuery(DAGQuerySource & dag, Context & context, QueryProcessingStage::Enum stage) { BlockIO streams; - DAGQuerySource query_src(context, region_id, region_version, region_conf_version, dag_request); - std::tie(std::ignore, streams) = executeQueryImpl(query_src, context, false, stage); + std::tie(std::ignore, streams) = executeQueryImpl(dag, context, false, stage); return streams; } diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index 48c061f484d..48033a9cf2d 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -2,8 +2,8 @@ #include #include +#include #include -#include namespace DB @@ -42,12 +42,6 @@ BlockIO executeQuery( ); -BlockIO executeQuery( - const tipb::DAGRequest & dag_request, - RegionID region_id, - UInt64 region_version, - UInt64 region_conf_version, - Context & context, - QueryProcessingStage::Enum stage); +BlockIO executeQuery(DAGQuerySource & dag, Context & context, QueryProcessingStage::Enum stage); } diff --git a/dbms/src/Storages/Transaction/TypeMapping.cpp b/dbms/src/Storages/Transaction/TypeMapping.cpp index 5b12a0eb87b..24413713c13 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.cpp +++ b/dbms/src/Storages/Transaction/TypeMapping.cpp @@ -19,19 +19,14 @@ class TypeMapping : public ext::singleton public: using Creator = std::function; using TypeMap = std::unordered_map; - using CodecFlagMap = std::unordered_map; DataTypePtr getDataType(const ColumnInfo & column_info); - TiDB::CodecFlag getCodecFlag(const DB::DataTypePtr & data_type); - private: TypeMapping(); TypeMap type_map; - CodecFlagMap codec_flag_map; - friend class ext::singleton; }; @@ -152,23 +147,14 @@ TypeMapping::TypeMapping() #ifdef M #error "Please undefine macro M first." #endif - #define M(tt, v, cf, ct, w) \ type_map[TiDB::Type##tt] = std::bind(getDataTypeByColumnInfoBase, std::placeholders::_1, (DataType##ct *)nullptr); - codec_flag_map[#ctu] = TiDB::CodecFlag##cfu; - codec_flag_map[#ct] = TiDB::CodecFlag##cf; COLUMN_TYPES(M) #undef M } DataTypePtr TypeMapping::getDataType(const ColumnInfo & column_info) { return type_map[column_info.tp](column_info); } -TiDB::CodecFlag TypeMapping::getCodecFlag(const DB::DataTypePtr & dataTypePtr) -{ - // TODO: String's CodecFlag will be CodecFlagCompactBytes, which is wrong for Json type - return codec_flag_map[dataTypePtr->getFamilyName()]; -} - DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info) { DataTypePtr base = TypeMapping::instance().getDataType(column_info); @@ -192,6 +178,4 @@ DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type) return getDataTypeByColumnInfo(ci); } -TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & data_type) { return TypeMapping::instance().getCodecFlag(data_type); } - } // namespace DB diff --git a/dbms/src/Storages/Transaction/TypeMapping.h b/dbms/src/Storages/Transaction/TypeMapping.h index e323763fad2..d501be75330 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.h +++ b/dbms/src/Storages/Transaction/TypeMapping.h @@ -7,7 +7,6 @@ #include #pragma GCC diagnostic pop - namespace DB { using ColumnInfo = TiDB::ColumnInfo; @@ -16,6 +15,4 @@ DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info); DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type); -TiDB::CodecFlag getCodecFlagByDataType(const DataTypePtr & data_type); - } // namespace DB From bc25942a2ea126a2aca935e883159e7f798a88bb Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 6 Aug 2019 03:47:28 +0800 Subject: [PATCH 18/79] Another way of getting codec flag --- dbms/src/DataStreams/DAGBlockOutputStream.cpp | 37 +++++------ dbms/src/DataStreams/DAGBlockOutputStream.h | 6 +- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 4 +- dbms/src/Interpreters/DAGQuerySource.cpp | 23 ++++++- dbms/src/Interpreters/DAGQuerySource.h | 63 ++++++++++++------- dbms/src/Interpreters/DAGStringConverter.cpp | 8 +-- dbms/src/Interpreters/InterpreterDAG.cpp | 31 ++++----- dbms/src/Interpreters/InterpreterDAG.h | 4 +- 8 files changed, 107 insertions(+), 69 deletions(-) diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.cpp b/dbms/src/DataStreams/DAGBlockOutputStream.cpp index 683e0c27809..6de23407b95 100644 --- a/dbms/src/DataStreams/DAGBlockOutputStream.cpp +++ b/dbms/src/DataStreams/DAGBlockOutputStream.cpp @@ -1,9 +1,7 @@ - #include + #include #include -#include - namespace DB { @@ -11,13 +9,16 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_PARAMETER; -} - -struct TypeMapping; +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes -DAGBlockOutputStream::DAGBlockOutputStream( - tipb::SelectResponse & dag_response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, Block header_) - : dag_response(dag_response_), records_per_chunk(records_per_chunk_), encodeType(encodeType_), header(header_) +DAGBlockOutputStream::DAGBlockOutputStream(tipb::SelectResponse & dag_response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, + FieldTpAndFlags && field_tp_and_flags_, Block header_) + : dag_response(dag_response_), + records_per_chunk(records_per_chunk_), + encodeType(encodeType_), + field_tp_and_flags(field_tp_and_flags_), + header(header_) { if (encodeType == tipb::EncodeType::TypeArrow) { @@ -43,9 +44,13 @@ void DAGBlockOutputStream::writeSuffix() } } - void DAGBlockOutputStream::write(const Block & block) { + if (block.columns() != field_tp_and_flags.size()) + throw Exception("Output column size mismatch with field type size", ErrorCodes::LOGICAL_ERROR); + + // TODO: Check compatibility between field_tp_and_flags and block column types. + // Encode data to chunk size_t rows = block.rows(); for (size_t i = 0; i < rows; i++) @@ -63,17 +68,9 @@ void DAGBlockOutputStream::write(const Block & block) } for (size_t j = 0; j < block.columns(); j++) { + // TODO: No need to encode column id? auto field = (*block.getByPosition(j).column.get())[i]; - const DataTypePtr & data_type = block.getByPosition(j).type; - if (data_type->isNullable()) - { - const DataTypePtr nested = dynamic_cast(data_type.get())->getNestedType(); - EncodeDatum(field, getCodecFlagByDataType(nested), current_ss); - } - else - { - EncodeDatum(field, getCodecFlagByDataType(block.getByPosition(j).type), current_ss); - } + EncodeDatum(field, field_tp_and_flags[j].getCodecFlag(), current_ss); } // Encode current row records_per_chunk++; diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.h b/dbms/src/DataStreams/DAGBlockOutputStream.h index 8e21161466e..f51877a4487 100644 --- a/dbms/src/DataStreams/DAGBlockOutputStream.h +++ b/dbms/src/DataStreams/DAGBlockOutputStream.h @@ -3,6 +3,7 @@ #include #include #include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include @@ -17,7 +18,8 @@ namespace DB class DAGBlockOutputStream : public IBlockOutputStream { public: - DAGBlockOutputStream(tipb::SelectResponse & response, Int64 records_per_chunk, tipb::EncodeType encodeType, Block header); + DAGBlockOutputStream(tipb::SelectResponse & response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, + FieldTpAndFlags && field_tp_and_flags_, Block header_); Block getHeader() const override { return header; } void write(const Block & block) override; @@ -29,6 +31,8 @@ class DAGBlockOutputStream : public IBlockOutputStream Int64 records_per_chunk; tipb::EncodeType encodeType; + FieldTpAndFlags field_tp_and_flags; + Block header; tipb::Chunk * current_chunk; diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 4454d6fdbd8..2eb69d5c452 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -54,8 +54,8 @@ void DAGDriver::execute() // Only query is allowed, so streams.in must not be null and streams.out must be null throw Exception("DAG is not query.", ErrorCodes::LOGICAL_ERROR); - BlockOutputStreamPtr outputStreamPtr = std::make_shared( - dag_response, context.getSettings().dag_records_per_chunk, dag_request.encode_type(), streams.in->getHeader()); + BlockOutputStreamPtr outputStreamPtr = std::make_shared(dag_response, context.getSettings().dag_records_per_chunk, + dag_request.encode_type(), dag.getOutputFieldTpAndFlags(), streams.in->getHeader()); copyData(*streams.in, *outputStreamPtr); } diff --git a/dbms/src/Interpreters/DAGQuerySource.cpp b/dbms/src/Interpreters/DAGQuerySource.cpp index abbcb1c6559..9938acaa34d 100644 --- a/dbms/src/Interpreters/DAGQuerySource.cpp +++ b/dbms/src/Interpreters/DAGQuerySource.cpp @@ -1,9 +1,8 @@ - #include + #include #include - namespace DB { @@ -68,4 +67,24 @@ std::unique_ptr DAGQuerySource::interpreter(Context &, QueryProces { return std::make_unique(context, *this); } + +FieldTpAndFlags DAGQuerySource::getOutputFieldTpAndFlags() const +{ + FieldTpAndFlags output; + + const auto & ts = getTS(); + const auto & column_infos = ts.columns(); + for (auto i : dag_request.output_offsets()) + { + // TODO: Checking bound. + auto & column_info = column_infos[i]; + output.emplace_back(FieldTpAndFlag{static_cast(column_info.tp()), static_cast(column_info.flag())}); + } + + // TODO: Add aggregation columns. + // We either write our own code to infer types that follows the convention between TiDB and TiKV, or ask TiDB to push down aggregation field types. + + return output; +} + } // namespace DB diff --git a/dbms/src/Interpreters/DAGQuerySource.h b/dbms/src/Interpreters/DAGQuerySource.h index f5c16292196..619152986da 100644 --- a/dbms/src/Interpreters/DAGQuerySource.h +++ b/dbms/src/Interpreters/DAGQuerySource.h @@ -1,18 +1,34 @@ #pragma once +#include +#include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include #pragma GCC diagnostic pop -#include -#include - namespace DB { class Context; +/// A handy struct to get codec flag based on tp and flag. +struct FieldTpAndFlag +{ + TiDB::TP tp; + UInt32 flag; + + TiDB::CodecFlag getCodecFlag() const + { + TiDB::ColumnInfo ci; + ci.tp = tp; + ci.flag = flag; + return ci.getCodecFlag(); + } +}; +using FieldTpAndFlags = std::vector; + /// Query source of a DAG request via gRPC. /// This is also an IR of a DAG. class DAGQuerySource : public IQuerySource @@ -31,49 +47,54 @@ class DAGQuerySource : public IQuerySource String str(size_t max_query_size) override; std::unique_ptr interpreter(Context & context, QueryProcessingStage::Enum stage) override; - void assertValid(Int32 index, const String & name) - { - if (index < 0 || index > dag_request.executors_size()) - { - throw Exception("Access invalid executor: " + name); - } - } - RegionID getRegionID() const { return region_id; } UInt64 getRegionVersion() const { return region_version; } UInt64 getRegionConfVersion() const { return region_conf_version; } - bool hasSelection() { return sel_index != -1; }; - bool hasAggregation() { return agg_index != -1; }; - bool hasTopN() { return order_index != -1; }; - bool hasLimit() { return order_index == -1 && limit_index != -1; }; + bool hasSelection() const { return sel_index != -1; }; + bool hasAggregation() const { return agg_index != -1; }; + bool hasTopN() const { return order_index != -1; }; + bool hasLimit() const { return order_index == -1 && limit_index != -1; }; - const tipb::TableScan & getTS() + const tipb::TableScan & getTS() const { assertValid(ts_index, TS_NAME); return dag_request.executors(ts_index).tbl_scan(); }; - const tipb::Selection & getSelection() + const tipb::Selection & getSelection() const { assertValid(sel_index, SEL_NAME); return dag_request.executors(sel_index).selection(); }; - const tipb::Aggregation & getAggregation() + const tipb::Aggregation & getAggregation() const { assertValid(agg_index, AGG_NAME); return dag_request.executors(agg_index).aggregation(); }; - const tipb::TopN & getTopN() + const tipb::TopN & getTopN() const { assertValid(order_index, TOPN_NAME); return dag_request.executors(order_index).topn(); }; - const tipb::Limit & getLimit() + const tipb::Limit & getLimit() const { assertValid(limit_index, LIMIT_NAME); return dag_request.executors(limit_index).limit(); }; - const tipb::DAGRequest & getDAGRequest() { return dag_request; }; + const tipb::DAGRequest & getDAGRequest() const { return dag_request; }; + + /// Used to guide output stream to encode data, as we lost DAG field type during input streams. + /// This will somewhat duplicate the planning logic, but we don't have a decent way to keep this information. + FieldTpAndFlags getOutputFieldTpAndFlags() const; + +protected: + void assertValid(Int32 index, const String & name) const + { + if (index < 0 || index > dag_request.executors_size()) + { + throw Exception("Access invalid executor: " + name); + } + } protected: Context & context; diff --git a/dbms/src/Interpreters/DAGStringConverter.cpp b/dbms/src/Interpreters/DAGStringConverter.cpp index c99b1607fcd..c62da104121 100644 --- a/dbms/src/Interpreters/DAGStringConverter.cpp +++ b/dbms/src/Interpreters/DAGStringConverter.cpp @@ -26,11 +26,6 @@ bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringst auto & tmt_ctx = context.getTMTContext(); auto storage = tmt_ctx.getStorages().get(id); if (storage == nullptr) - { - tmt_ctx.getSchemaSyncer()->syncSchema(id, context, false); - storage = tmt_ctx.getStorages().get(id); - } - if (storage == nullptr) { return false; } @@ -116,7 +111,8 @@ bool isProject(const tipb::Executor &) // currently, project is not pushed so always return false return false; } -DAGStringConverter::DAGStringConverter(Context & context_, const tipb::DAGRequest & dag_request_) : context(context_), dag_request(dag_request_) +DAGStringConverter::DAGStringConverter(Context & context_, const tipb::DAGRequest & dag_request_) + : context(context_), dag_request(dag_request_) { afterAgg = false; } diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index 1c41f12934e..8b235a245cd 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -11,14 +13,12 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include namespace DB @@ -32,7 +32,7 @@ extern const int SCHEMA_VERSION_ERROR; extern const int UNKNOWN_EXCEPTION; } // namespace ErrorCodes -InterpreterDAG::InterpreterDAG(Context & context_, DAGQuerySource & dag_) +InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) : context(context_), dag(dag_), log(&Logger::get("InterpreterDAG")) {} @@ -119,6 +119,9 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) query_info.mvcc_query_info->regions_query_info.push_back(info); query_info.mvcc_query_info->concurrent = 0.0; pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); + + pipeline.transform([&](auto & stream) { stream->addTableLock(table_lock); }); + /// Set the limits and quota for reading data, the speed and time of the query. { IProfilingBlockInputStream::LocalLimits limits; @@ -157,10 +160,10 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() AnalysisResult res; ExpressionActionsChain chain; res.need_aggregate = dag.hasAggregation(); - DAGExpressionAnalyzer expressionAnalyzer(source_columns, context); + DAGExpressionAnalyzer analyzer(source_columns, context); if (dag.hasSelection()) { - if (expressionAnalyzer.appendWhere(chain, dag.getSelection(), res.filter_column_name)) + if (analyzer.appendWhere(chain, dag.getSelection(), res.filter_column_name)) { res.has_where = true; res.before_where = chain.getLastActions(); @@ -170,24 +173,23 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() } if (res.need_aggregate) { - res.need_aggregate - = expressionAnalyzer.appendAggregation(chain, dag.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); + res.need_aggregate = analyzer.appendAggregation(chain, dag.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); res.before_aggregation = chain.getLastActions(); chain.finalize(); chain.clear(); // add cast if type is not match - expressionAnalyzer.appendAggSelect(chain, dag.getAggregation()); + analyzer.appendAggSelect(chain, dag.getAggregation()); //todo use output_offset to pruner the final project columns - for (auto element : expressionAnalyzer.getCurrentInputColumns()) + for (auto element : analyzer.getCurrentInputColumns()) { final_project.emplace_back(element.name, ""); } } if (dag.hasTopN()) { - res.has_order_by = expressionAnalyzer.appendOrderBy(chain, dag.getTopN(), res.order_column_names); + res.has_order_by = analyzer.appendOrderBy(chain, dag.getTopN(), res.order_column_names); } // append final project results for (auto & name : final_project) @@ -201,16 +203,15 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() return res; } -void InterpreterDAG::executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column) +void InterpreterDAG::executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expr, String & filter_column) { - pipeline.transform( - [&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr, filter_column); }); + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expr, filter_column); }); } void InterpreterDAG::executeAggregation( - Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, Names & key_names, AggregateDescriptions & aggregates) + Pipeline & pipeline, const ExpressionActionsPtr & expr, Names & key_names, AggregateDescriptions & aggregates) { - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr); }); + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expr); }); Block header = pipeline.firstStream()->getHeader(); ColumnNumbers keys; diff --git a/dbms/src/Interpreters/InterpreterDAG.h b/dbms/src/Interpreters/InterpreterDAG.h index 9531c37b82f..4ffc0b0067f 100644 --- a/dbms/src/Interpreters/InterpreterDAG.h +++ b/dbms/src/Interpreters/InterpreterDAG.h @@ -25,7 +25,7 @@ class Context; class InterpreterDAG : public IInterpreter { public: - InterpreterDAG(Context & context_, DAGQuerySource & dag_); + InterpreterDAG(Context & context_, const DAGQuerySource & dag_); ~InterpreterDAG() = default; @@ -85,7 +85,7 @@ class InterpreterDAG : public IInterpreter private: Context & context; - DAGQuerySource & dag; + const DAGQuerySource & dag; NamesWithAliases final_project; NamesAndTypesList source_columns; From 059f267385d65963005d84c57c98883574d6ddf2 Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 6 Aug 2019 14:33:32 +0800 Subject: [PATCH 19/79] fix cop test regression (#157) * fix cop test regression * address comments * format code --- dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 10 +++++++--- dbms/src/Interpreters/InterpreterDAG.cpp | 18 +++++++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp index 79e65a34349..0ed89ec308a 100644 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp @@ -71,11 +71,15 @@ grpc::Status rpcTest() tipb::Executor * executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeTableScan); tipb::TableScan * ts = executor->mutable_tbl_scan(); - ts->set_table_id(41); + ts->set_table_id(44); tipb::ColumnInfo * ci = ts->add_columns(); ci->set_column_id(1); + ci->set_tp(0xfe); + ci->set_flag(1); ci = ts->add_columns(); ci->set_column_id(2); + ci->set_tp(8); + ci->set_flag(1); dagRequest.add_output_offsets(1); dagRequest.add_output_offsets(0); dagRequest.add_output_offsets(1); @@ -95,7 +99,7 @@ grpc::Status rpcTest() col->set_val(ss.str()); value->set_tp(tipb::ExprType::Int64); ss.str(""); - DB::EncodeNumber(123, ss); + DB::EncodeNumber(888, ss); value->set_val(std::string(ss.str())); // agg: count(s) group by i; @@ -147,7 +151,7 @@ grpc::Status rpcTest() kvrpcpb::Context * ctx = request.mutable_context(); ctx->set_region_id(2); auto region_epoch = ctx->mutable_region_epoch(); - region_epoch->set_version(20); + region_epoch->set_version(21); region_epoch->set_conf_ver(2); request.set_tp(DAGREQUEST); request.set_data(dagRequest.SerializeAsString()); diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index 8b235a245cd..4e63ae5935c 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -46,7 +46,19 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) } TableID table_id = ts.table_id(); // TODO: Get schema version from DAG request. - getAndLockStorageWithSchemaVersion(table_id, DEFAULT_UNSPECIFIED_SCHEMA_VERSION); + if (context.getSettingsRef().schema_version == DEFAULT_UNSPECIFIED_SCHEMA_VERSION) + { + storage = context.getTMTContext().getStorages().get(table_id); + if (storage == nullptr) + { + throw Exception("Table " + std::to_string(table_id) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); + } + table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); + } + else + { + getAndLockStorageWithSchemaVersion(table_id, DEFAULT_UNSPECIFIED_SCHEMA_VERSION); + } Names required_columns; for (const tipb::ColumnInfo & ci : ts.columns()) @@ -290,7 +302,7 @@ void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 return std::make_tuple(nullptr, nullptr, DEFAULT_UNSPECIFIED_SCHEMA_VERSION, false); } - if (storage->getData().merging_params.mode != MergeTreeData::MergingParams::Txn) + if (storage_->getData().merging_params.mode != MergeTreeData::MergingParams::Txn) throw Exception("Specifying schema_version for non-TMT storage: " + storage_->getName() + ", table: " + std::to_string(table_id) + " is not allowed", ErrorCodes::LOGICAL_ERROR); @@ -299,7 +311,7 @@ void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 auto lock = storage_->lockStructure(false, __PRETTY_FUNCTION__); /// Check schema version. - auto storage_schema_version = storage->getTableInfo().schema_version; + auto storage_schema_version = storage_->getTableInfo().schema_version; if (storage_schema_version > schema_version) throw Exception("Table " + std::to_string(table_id) + " schema version " + std::to_string(storage_schema_version) + " newer than query schema version " + std::to_string(schema_version), From e59e8f3308aab53bda4a46a8378232c94083f786 Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 6 Aug 2019 17:28:23 +0800 Subject: [PATCH 20/79] fix npe during dag execute (#160) * fix cop test regression * address comments * format code * fix npe for dag execute * format code * address comment * add some comments --- dbms/src/DataStreams/DAGBlockOutputStream.cpp | 1 - dbms/src/Interpreters/DAGQuerySource.cpp | 13 +++++++++++-- dbms/src/Interpreters/DAGQuerySource.h | 4 ++++ dbms/src/Interpreters/InterpreterDAG.cpp | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.cpp b/dbms/src/DataStreams/DAGBlockOutputStream.cpp index 6de23407b95..96871dab631 100644 --- a/dbms/src/DataStreams/DAGBlockOutputStream.cpp +++ b/dbms/src/DataStreams/DAGBlockOutputStream.cpp @@ -68,7 +68,6 @@ void DAGBlockOutputStream::write(const Block & block) } for (size_t j = 0; j < block.columns(); j++) { - // TODO: No need to encode column id? auto field = (*block.getByPosition(j).column.get())[i]; EncodeDatum(field, field_tp_and_flags[j].getCodecFlag(), current_ss); } diff --git a/dbms/src/Interpreters/DAGQuerySource.cpp b/dbms/src/Interpreters/DAGQuerySource.cpp index 9938acaa34d..7837a637c00 100644 --- a/dbms/src/Interpreters/DAGQuerySource.cpp +++ b/dbms/src/Interpreters/DAGQuerySource.cpp @@ -2,6 +2,9 @@ #include #include +#include +#include +#include namespace DB { @@ -54,10 +57,16 @@ DAGQuerySource::DAGQuerySource( } } -std::tuple DAGQuerySource::parse(size_t) +std::tuple DAGQuerySource::parse(size_t max_query_size) { + // this is a WAR to avoid NPE when the MergeTreeDataSelectExecutor trying + // to extract key range of the query. + // todo find a way to enable key range extraction for dag query + String tmp = "select 1"; + ParserQuery parser(tmp.data() + tmp.size()); + ASTPtr parent = parseQuery(parser, tmp.data(), tmp.data() + tmp.size(), "", max_query_size); auto query = dag_request.DebugString(); - auto ast = std::make_shared(); + ast = ((ASTSelectWithUnionQuery *)parent.get())->list_of_selects->children.at(0); return std::make_tuple(query, ast); } diff --git a/dbms/src/Interpreters/DAGQuerySource.h b/dbms/src/Interpreters/DAGQuerySource.h index 619152986da..3423df30c4c 100644 --- a/dbms/src/Interpreters/DAGQuerySource.h +++ b/dbms/src/Interpreters/DAGQuerySource.h @@ -87,6 +87,8 @@ class DAGQuerySource : public IQuerySource /// This will somewhat duplicate the planning logic, but we don't have a decent way to keep this information. FieldTpAndFlags getOutputFieldTpAndFlags() const; + ASTPtr getAST() const { return ast; }; + protected: void assertValid(Int32 index, const String & name) const { @@ -110,6 +112,8 @@ class DAGQuerySource : public IQuerySource Int32 agg_index = -1; Int32 order_index = -1; Int32 limit_index = -1; + + ASTPtr ast; }; } // namespace DB diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index 4e63ae5935c..a8bc26e25c5 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -114,7 +114,7 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) //todo support index in SelectQueryInfo query_info; - query_info.query = std::make_unique(); + query_info.query = dag.getAST(); query_info.mvcc_query_info = std::make_unique(); query_info.mvcc_query_info->resolve_locks = true; query_info.mvcc_query_info->read_tso = settings.read_tso; From a618cb57b66a76b75b6beba301a118b16e6e2277 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 6 Aug 2019 17:30:24 +0800 Subject: [PATCH 21/79] Add tipb cpp gen in build script --- docker/builder/build.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docker/builder/build.sh b/docker/builder/build.sh index 8db723b63ba..5e098870056 100755 --- a/docker/builder/build.sh +++ b/docker/builder/build.sh @@ -17,6 +17,13 @@ if [ -d "$SRCPATH/contrib/kvproto" ]; then cd - fi +if [ -d "$SRCPATH/contrib/tipb" ]; then + cd "$SRCPATH/contrib/tipb" + rm -rf cpp/tipb + ./generate-cpp.sh + cd - +fi + build_dir="$SRCPATH/build_docker" mkdir -p $build_dir && cd $build_dir cmake "$SRCPATH" -DENABLE_EMBEDDED_COMPILER=$ENABLE_EMBEDDED_COMPILER -DENABLE_TESTS=$ENABLE_TEST -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE From bb5174915bfb6be8e478875fcdde2ddb64f4fee1 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 6 Aug 2019 18:24:34 +0800 Subject: [PATCH 22/79] Fix build error and adjust some formats --- dbms/src/Interpreters/DAGQuerySource.cpp | 1 + dbms/src/Interpreters/InterpreterDAG.cpp | 2 +- dbms/src/Interpreters/executeQuery.h | 1 - dbms/src/Server/Server.cpp | 2 +- .../MergeTreeDataSelectExecutorCommon.hpp | 12 +++++----- dbms/src/Storages/Transaction/RegionTable.cpp | 22 ++++++++++--------- dbms/src/Storages/Transaction/RegionTable.h | 2 +- 7 files changed, 23 insertions(+), 19 deletions(-) diff --git a/dbms/src/Interpreters/DAGQuerySource.cpp b/dbms/src/Interpreters/DAGQuerySource.cpp index 7837a637c00..ed1b09d6cd5 100644 --- a/dbms/src/Interpreters/DAGQuerySource.cpp +++ b/dbms/src/Interpreters/DAGQuerySource.cpp @@ -48,6 +48,7 @@ DAGQuerySource::DAGQuerySource( break; case tipb::ExecType::TypeTopN: assignOrThrowException(order_index, i, TOPN_NAME); + break; case tipb::ExecType::TypeLimit: assignOrThrowException(limit_index, i, LIMIT_NAME); break; diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index a8bc26e25c5..48842a6e33d 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -122,7 +122,7 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) info.region_id = dag.getRegionID(); info.version = dag.getRegionVersion(); info.conf_version = dag.getRegionConfVersion(); - auto current_region = context.getTMTContext().getRegionTable().getRegionById(table_id, info.region_id); + auto current_region = context.getTMTContext().getRegionTable().getRegionByTableAndID(table_id, info.region_id); if (!current_region) { return false; diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index 48033a9cf2d..55b9ea7306a 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 72bde79c85f..93ff0a41ba9 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include "Flash/FlashService.h" #include "HTTPHandlerFactory.h" #include "MetricsTransmitter.h" #include "StatusFile.h" diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp index 921154ebc05..39c7ce3e451 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp @@ -22,11 +22,13 @@ static inline void extendMutableEngineColumnNames(Names & column_names_to_read, reserved_names.insert(handle_col_name); reserved_names.insert(MutableSupport::version_column_name); reserved_names.insert(MutableSupport::delmark_column_name); - Names org_names; - for (auto & name : column_names_to_read) { - if(reserved_names.count(name) == 0) { - org_names.emplace_back(std::move(name)); + Names names; + for (auto & name : column_names_to_read) + { + if(reserved_names.count(name) == 0) + { + names.emplace_back(std::move(name)); } } column_names_to_read.clear(); @@ -35,7 +37,7 @@ static inline void extendMutableEngineColumnNames(Names & column_names_to_read, column_names_to_read.push_back(MutableSupport::version_column_name); column_names_to_read.push_back(MutableSupport::delmark_column_name); - for (auto & name : org_names) + for (auto & name : names) column_names_to_read.emplace_back(std::move(name)); } diff --git a/dbms/src/Storages/Transaction/RegionTable.cpp b/dbms/src/Storages/Transaction/RegionTable.cpp index 247bb9b94df..92ce94b03a2 100644 --- a/dbms/src/Storages/Transaction/RegionTable.cpp +++ b/dbms/src/Storages/Transaction/RegionTable.cpp @@ -484,37 +484,39 @@ void RegionTable::traverseInternalRegionsByTable(const TableID table_id, std::fu callback(region_info.second); } -RegionPtr RegionTable::getRegionById(const TableID table_id, const RegionID region_id) { +std::vector> RegionTable::getRegionsByTable(const TableID table_id) +{ auto & kvstore = context.getTMTContext().getKVStore(); + std::vector> regions; { std::lock_guard lock(mutex); auto & table = getOrCreateTable(table_id); for (const auto & region_info : table.regions) { - if(region_info.second.region_id == region_id) { - return kvstore->getRegion(region_info.second.region_id); - } + auto region = kvstore->getRegion(region_info.second.region_id); + regions.emplace_back(region_info.second.region_id, region); } } - return nullptr; + return regions; } -std::vector> RegionTable::getRegionsByTable(const TableID table_id) +RegionPtr RegionTable::getRegionByTableAndID(const TableID table_id, const RegionID region_id) { auto & kvstore = context.getTMTContext().getKVStore(); - std::vector> regions; { std::lock_guard lock(mutex); auto & table = getOrCreateTable(table_id); for (const auto & region_info : table.regions) { - auto region = kvstore->getRegion(region_info.second.region_id); - regions.emplace_back(region_info.second.region_id, region); + if (region_info.second.region_id == region_id) + { + return kvstore->getRegion(region_info.second.region_id); + } } } - return regions; + return nullptr; } void RegionTable::mockDropRegionsInTable(TableID table_id) diff --git a/dbms/src/Storages/Transaction/RegionTable.h b/dbms/src/Storages/Transaction/RegionTable.h index 703b0a1c0c0..ab8620c0f47 100644 --- a/dbms/src/Storages/Transaction/RegionTable.h +++ b/dbms/src/Storages/Transaction/RegionTable.h @@ -180,7 +180,7 @@ class RegionTable : private boost::noncopyable void traverseInternalRegions(std::function && callback); void traverseInternalRegionsByTable(const TableID table_id, std::function && callback); std::vector> getRegionsByTable(const TableID table_id); - RegionPtr getRegionById(const TableID table_id, const RegionID region_id); + RegionPtr getRegionByTableAndID(const TableID table_id, const RegionID region_id); /// Write the data of the given region into the table with the given table ID, fill the data list for outer to remove. /// Will trigger schema sync on read error for only once, From da1cb0ed1d2600d43112f06c90f2e7539614c132 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 6 Aug 2019 18:49:37 +0800 Subject: [PATCH 23/79] Fix build error --- dbms/src/Interpreters/DAGStringConverter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Interpreters/DAGStringConverter.cpp b/dbms/src/Interpreters/DAGStringConverter.cpp index c62da104121..a66eeb927a6 100644 --- a/dbms/src/Interpreters/DAGStringConverter.cpp +++ b/dbms/src/Interpreters/DAGStringConverter.cpp @@ -104,6 +104,8 @@ bool DAGStringConverter::buildString(const tipb::Executor & executor, std::strin case tipb::ExecType::TypeLimit: return buildLimitString(executor.limit(), ss); } + + return false; } bool isProject(const tipb::Executor &) From 816ef4bc171943beaf3b05037ef03c0610f47002 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 6 Aug 2019 20:05:28 +0800 Subject: [PATCH 24/79] Fix build error --- dbms/src/Interpreters/InterpreterDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index 48842a6e33d..23f6b24e04d 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -84,7 +84,7 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) // based on the output of table scan for (auto i : dag.getDAGRequest().output_offsets()) { - if (i < 0 || i >= required_columns.size()) + if (i >= required_columns.size()) { // array index out of bound return false; From f18fcddd21b9f3a8cbca0e4a9a3ec1b929cf630e Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 6 Aug 2019 21:06:16 +0800 Subject: [PATCH 25/79] Update flash configs --- tests/docker/config/config.xml | 4 ++++ tests/docker/config/tiflash.xml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tests/docker/config/config.xml b/tests/docker/config/config.xml index e6911f1227e..e514b6a418a 100644 --- a/tests/docker/config/config.xml +++ b/tests/docker/config/config.xml @@ -21,6 +21,10 @@ + + 0.0.0.0:9093 + + 8123 9000 9009 diff --git a/tests/docker/config/tiflash.xml b/tests/docker/config/tiflash.xml index 6ec67cee0e0..0e49966b82a 100644 --- a/tests/docker/config/tiflash.xml +++ b/tests/docker/config/tiflash.xml @@ -24,6 +24,10 @@ + + 0.0.0.0:9093 + + 8123 9000 9009 From 2ade1cbed67b8d3a2becdde71ab13350a995ea76 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 6 Aug 2019 21:06:27 +0800 Subject: [PATCH 26/79] Format --- dbms/src/Server/Server.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 93ff0a41ba9..345adf5c538 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -451,14 +451,16 @@ int Server::main(const std::vector & /*args*/) }); FlashServicePtr flash_service = nullptr; - if(config().has("flash")) { + if (config().has("flash")) + { String flash_service_addr = config().getString("flash.service_addr"); flash_service = std::make_shared(flash_service_addr, *this); } SCOPE_EXIT({ - if (flash_service != nullptr) { - LOG_INFO(log, "Shutting down flash service."); + if (flash_service) + { + LOG_INFO(log, "Shutting down Flash service."); flash_service.reset(); LOG_INFO(log, "Shutted down flash service."); } From 7cb9e71ad646adbd9bd9ba40951b11dc064aa087 Mon Sep 17 00:00:00 2001 From: xufei Date: Wed, 7 Aug 2019 15:34:20 +0800 Subject: [PATCH 27/79] throw exception when meet error duing cop request handling (#162) * fix cop test regression * address comments * format code * fix npe for dag execute * format code * address comment * add some comments * throw exception when meet error duing cop request handling * address comments * add error code * throw exception when meet error duing cop request handling * address comments --- dbms/src/Common/ErrorCodes.cpp | 1 + .../Interpreters/DAGExpressionAnalyzer.cpp | 24 +++++---- dbms/src/Interpreters/DAGExpressionAnalyzer.h | 8 +-- dbms/src/Interpreters/DAGStringConverter.cpp | 50 +++++++++---------- dbms/src/Interpreters/DAGStringConverter.h | 8 +-- dbms/src/Interpreters/InterpreterDAG.cpp | 44 ++++++++-------- dbms/src/Interpreters/InterpreterDAG.h | 4 +- 7 files changed, 68 insertions(+), 71 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 920bc66257d..a5f373defa4 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -392,6 +392,7 @@ namespace ErrorCodes extern const int REGION_MISS = 10002; extern const int SCHEMA_SYNC_ERROR = 10003; extern const int SCHEMA_VERSION_ERROR = 10004; + extern const int COP_BAD_DAG_REQUEST = 10005; } } diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp index ea824eb2a4d..7ebf23309a4 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp @@ -11,6 +11,12 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int COP_BAD_DAG_REQUEST; +} // namespace ErrorCodes + static String genCastString(const String & org_name, const String & target_type_name) { return "cast(" + org_name + ", " + target_type_name + ") "; @@ -44,13 +50,13 @@ DAGExpressionAnalyzer::DAGExpressionAnalyzer(const NamesAndTypesList & source_co after_agg = false; } -bool DAGExpressionAnalyzer::appendAggregation( +void DAGExpressionAnalyzer::appendAggregation( ExpressionActionsChain & chain, const tipb::Aggregation & agg, Names & aggregation_keys, AggregateDescriptions & aggregate_descriptions) { if (agg.group_by_size() == 0 && agg.agg_func_size() == 0) { //should not reach here - return false; + throw Exception("Aggregation executor without group by/agg exprs", ErrorCodes::COP_BAD_DAG_REQUEST); } initChain(chain, getCurrentInputColumns()); ExpressionActionsChain::Step & step = chain.steps.back(); @@ -94,14 +100,13 @@ bool DAGExpressionAnalyzer::appendAggregation( aggregation_keys.push_back(name); } after_agg = true; - return true; } -bool DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name) +void DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name) { if (sel.conditions_size() == 0) { - return false; + throw Exception("Selection executor without condition exprs", ErrorCodes::COP_BAD_DAG_REQUEST); } tipb::Expr final_condition; if (sel.conditions_size() > 1) @@ -120,14 +125,13 @@ bool DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const ti initChain(chain, getCurrentInputColumns()); filter_column_name = getActions(filter, chain.steps.back().actions); chain.steps.back().required_output.push_back(filter_column_name); - return true; } -bool DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names) +void DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names) { if (topN.order_by_size() == 0) { - return false; + throw Exception("TopN executor without order by exprs", ErrorCodes::COP_BAD_DAG_REQUEST); } initChain(chain, getCurrentInputColumns()); ExpressionActionsChain::Step & step = chain.steps.back(); @@ -137,12 +141,11 @@ bool DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const step.required_output.push_back(name); order_column_names.push_back(name); } - return true; } const NamesAndTypesList & DAGExpressionAnalyzer::getCurrentInputColumns() { return after_agg ? aggregated_columns : source_columns; } -bool DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & aggregation) +void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & aggregation) { initChain(chain, getCurrentInputColumns()); bool need_update_aggregated_columns = false; @@ -191,7 +194,6 @@ bool DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, cons aggregated_columns.emplace_back(updated_aggregated_columns.getNames()[i], updated_aggregated_columns.getTypes()[i]); } } - return true; } String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String expr_name) diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.h b/dbms/src/Interpreters/DAGExpressionAnalyzer.h index 960bdcd4bd4..c3fd9eca460 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.h +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.h @@ -29,11 +29,11 @@ class DAGExpressionAnalyzer : private boost::noncopyable public: DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_); - bool appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name); - bool appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names); - bool appendAggregation(ExpressionActionsChain & chain, const tipb::Aggregation & agg, Names & aggregate_keys, + void appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name); + void appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names); + void appendAggregation(ExpressionActionsChain & chain, const tipb::Aggregation & agg, Names & aggregate_keys, AggregateDescriptions & aggregate_descriptions); - bool appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & agg); + void appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & agg); String appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String expr_name); void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const { diff --git a/dbms/src/Interpreters/DAGStringConverter.cpp b/dbms/src/Interpreters/DAGStringConverter.cpp index a66eeb927a6..3cdc1b97692 100644 --- a/dbms/src/Interpreters/DAGStringConverter.cpp +++ b/dbms/src/Interpreters/DAGStringConverter.cpp @@ -11,34 +11,41 @@ namespace DB { -bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) +namespace ErrorCodes { - TableID id; +extern const int UNKNOWN_TABLE; +extern const int COP_BAD_DAG_REQUEST; +extern const int NOT_IMPLEMENTED; +} // namespace ErrorCodes + +void DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringstream & ss) +{ + TableID table_id; if (ts.has_table_id()) { - id = ts.table_id(); + table_id = ts.table_id(); } else { // do not have table id - return false; + throw Exception("Table id not specified in table scan executor", ErrorCodes::COP_BAD_DAG_REQUEST); } auto & tmt_ctx = context.getTMTContext(); - auto storage = tmt_ctx.getStorages().get(id); + auto storage = tmt_ctx.getStorages().get(table_id); if (storage == nullptr) { - return false; + throw Exception("Table " + std::to_string(table_id) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); } const auto * merge_tree = dynamic_cast(storage.get()); if (!merge_tree) { - return false; + throw Exception("Only MergeTree table is supported in DAG request", ErrorCodes::COP_BAD_DAG_REQUEST); } if (ts.columns_size() == 0) { // no column selected, must be something wrong - return false; + throw Exception("No column is selected in table scan executor", ErrorCodes::COP_BAD_DAG_REQUEST); } columns_from_ts = storage->getColumns().getAllPhysical(); for (const tipb::ColumnInfo & ci : ts.columns()) @@ -46,16 +53,15 @@ bool DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringst ColumnID cid = ci.column_id(); if (cid <= 0 || cid > (ColumnID)columns_from_ts.size()) { - throw Exception("column id out of bound"); + throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } String name = merge_tree->getTableInfo().columns[cid - 1].name; output_from_ts.push_back(std::move(name)); } ss << "FROM " << merge_tree->getTableInfo().db_name << "." << merge_tree->getTableInfo().name << " "; - return true; } -bool DAGStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) +void DAGStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) { bool first = true; for (const tipb::Expr & expr : sel.conditions()) @@ -72,17 +78,12 @@ bool DAGStringConverter::buildSelString(const tipb::Selection & sel, std::string } ss << s << " "; } - return true; } -bool DAGStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) -{ - ss << "LIMIT " << limit.limit() << " "; - return true; -} +void DAGStringConverter::buildLimitString(const tipb::Limit & limit, std::stringstream & ss) { ss << "LIMIT " << limit.limit() << " "; } //todo return the error message -bool DAGStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) +void DAGStringConverter::buildString(const tipb::Executor & executor, std::stringstream & ss) { switch (executor.tp()) { @@ -90,22 +91,20 @@ bool DAGStringConverter::buildString(const tipb::Executor & executor, std::strin return buildTSString(executor.tbl_scan(), ss); case tipb::ExecType::TypeIndexScan: // index scan not supported - return false; + throw Exception("IndexScan is not supported", ErrorCodes::NOT_IMPLEMENTED); case tipb::ExecType::TypeSelection: return buildSelString(executor.selection(), ss); case tipb::ExecType::TypeAggregation: // stream agg is not supported, treated as normal agg case tipb::ExecType::TypeStreamAgg: //todo support agg - return false; + throw Exception("Aggregation is not supported", ErrorCodes::NOT_IMPLEMENTED); case tipb::ExecType::TypeTopN: // todo support top n - return false; + throw Exception("TopN is not supported", ErrorCodes::NOT_IMPLEMENTED); case tipb::ExecType::TypeLimit: return buildLimitString(executor.limit(), ss); } - - return false; } bool isProject(const tipb::Executor &) @@ -125,10 +124,7 @@ String DAGStringConverter::buildSqlString() std::stringstream project; for (const tipb::Executor & executor : dag_request.executors()) { - if (!buildString(executor, query_buf)) - { - return ""; - } + buildString(executor, query_buf); } if (!isProject(dag_request.executors(dag_request.executors_size() - 1))) { diff --git a/dbms/src/Interpreters/DAGStringConverter.h b/dbms/src/Interpreters/DAGStringConverter.h index fc8006f8096..fa91a72c7b7 100644 --- a/dbms/src/Interpreters/DAGStringConverter.h +++ b/dbms/src/Interpreters/DAGStringConverter.h @@ -41,10 +41,10 @@ class DAGStringConverter } protected: - bool buildTSString(const tipb::TableScan & ts, std::stringstream & ss); - bool buildSelString(const tipb::Selection & sel, std::stringstream & ss); - bool buildLimitString(const tipb::Limit & limit, std::stringstream & ss); - bool buildString(const tipb::Executor & executor, std::stringstream & ss); + void buildTSString(const tipb::TableScan & ts, std::stringstream & ss); + void buildSelString(const tipb::Selection & sel, std::stringstream & ss); + void buildLimitString(const tipb::Limit & limit, std::stringstream & ss); + void buildString(const tipb::Executor & executor, std::stringstream & ss); protected: Context & context; diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index 23f6b24e04d..52226c03de6 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ extern const int UNKNOWN_TABLE; extern const int TOO_MANY_COLUMNS; extern const int SCHEMA_VERSION_ERROR; extern const int UNKNOWN_EXCEPTION; +extern const int COP_BAD_DAG_REQUEST; } // namespace ErrorCodes InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) @@ -37,12 +39,12 @@ InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) {} // the flow is the same as executeFetchcolumns -bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) +void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) { if (!ts.has_table_id()) { // do not have table id - return false; + throw Exception("Table id not specified in table scan executor", ErrorCodes::COP_BAD_DAG_REQUEST); } TableID table_id = ts.table_id(); // TODO: Get schema version from DAG request. @@ -67,7 +69,7 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) if (cid < 1 || cid > (Int64)storage->getTableInfo().columns.size()) { // cid out of bound - return false; + throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } String name = storage->getTableInfo().columns[cid - 1].name; required_columns.push_back(name); @@ -75,7 +77,7 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) if (required_columns.empty()) { // no column selected, must be something wrong - return false; + throw Exception("No column is selected in table scan executor", ErrorCodes::COP_BAD_DAG_REQUEST); } if (!dag.hasAggregation()) @@ -87,7 +89,7 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) if (i >= required_columns.size()) { // array index out of bound - return false; + throw Exception("Output offset index is out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } // do not have alias final_project.emplace_back(required_columns[i], ""); @@ -125,7 +127,10 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) auto current_region = context.getTMTContext().getRegionTable().getRegionByTableAndID(table_id, info.region_id); if (!current_region) { - return false; + //todo add more region error info in RegionException + std::vector region_ids; + region_ids.push_back(info.region_id); + throw RegionException(region_ids); } info.range_in_table = current_region->getHandleRangeByTable(table_id); query_info.mvcc_query_info->regions_query_info.push_back(info); @@ -164,7 +169,6 @@ bool InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) } ColumnsWithTypeAndName columnsWithTypeAndName = pipeline.firstStream()->getHeader().getColumnsWithTypeAndName(); source_columns = storage->getColumns().getAllPhysical(); - return true; } InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() @@ -175,17 +179,15 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() DAGExpressionAnalyzer analyzer(source_columns, context); if (dag.hasSelection()) { - if (analyzer.appendWhere(chain, dag.getSelection(), res.filter_column_name)) - { - res.has_where = true; - res.before_where = chain.getLastActions(); - res.filter_column_name = chain.steps.back().required_output[0]; - chain.addStep(); - } + analyzer.appendWhere(chain, dag.getSelection(), res.filter_column_name); + res.has_where = true; + res.before_where = chain.getLastActions(); + res.filter_column_name = chain.steps.back().required_output[0]; + chain.addStep(); } if (res.need_aggregate) { - res.need_aggregate = analyzer.appendAggregation(chain, dag.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); + analyzer.appendAggregation(chain, dag.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); res.before_aggregation = chain.getLastActions(); chain.finalize(); @@ -201,7 +203,8 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() } if (dag.hasTopN()) { - res.has_order_by = analyzer.appendOrderBy(chain, dag.getTopN(), res.order_column_names); + res.has_order_by = true; + analyzer.appendOrderBy(chain, dag.getTopN(), res.order_column_names); } // append final project results for (auto & name : final_project) @@ -423,13 +426,9 @@ void InterpreterDAG::executeOrder(Pipeline & pipeline, Strings & order_column_na limit, settings.max_bytes_before_external_sort, context.getTemporaryPath()); } -//todo return the error message -bool InterpreterDAG::executeImpl(Pipeline & pipeline) +void InterpreterDAG::executeImpl(Pipeline & pipeline) { - if (!executeTS(dag.getTS(), pipeline)) - { - return false; - } + executeTS(dag.getTS(), pipeline); auto res = analyzeExpressions(); // execute selection @@ -458,7 +457,6 @@ bool InterpreterDAG::executeImpl(Pipeline & pipeline) { executeLimit(pipeline); } - return true; } void InterpreterDAG::executeFinalProject(Pipeline & pipeline) diff --git a/dbms/src/Interpreters/InterpreterDAG.h b/dbms/src/Interpreters/InterpreterDAG.h index 4ffc0b0067f..28c9a784108 100644 --- a/dbms/src/Interpreters/InterpreterDAG.h +++ b/dbms/src/Interpreters/InterpreterDAG.h @@ -68,8 +68,8 @@ class InterpreterDAG : public IInterpreter AggregateDescriptions aggregate_descriptions; }; - bool executeImpl(Pipeline & pipeline); - bool executeTS(const tipb::TableScan & ts, Pipeline & pipeline); + void executeImpl(Pipeline & pipeline); + void executeTS(const tipb::TableScan & ts, Pipeline & pipeline); void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column); void executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr); void executeOrder(Pipeline & pipeline, Strings & order_column_names); From 0174b7e3ee3a418e9059ad8720188b160cd2b150 Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 8 Aug 2019 18:22:11 +0800 Subject: [PATCH 28/79] add DAGContext so InterpreterDAG can exchange information with DAGDriver (#166) * fix cop test regression * address comments * format code * fix npe for dag execute * format code * address comment * add some comments * throw exception when meet error duing cop request handling * address comments * add error code * throw exception when meet error duing cop request handling * address comments * add DAGContext so InterpreterDAG can exchange information with DAGDriver * fix bug * 1. refine code, 2. address comments * update comments --- dbms/src/DataStreams/DAGBlockOutputStream.cpp | 9 +- dbms/src/DataStreams/DAGBlockOutputStream.h | 4 +- dbms/src/Flash/Coprocessor/DAGContext.h | 17 ++++ dbms/src/Flash/Coprocessor/DAGDriver.cpp | 26 +++++- dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 2 +- .../Interpreters/DAGExpressionAnalyzer.cpp | 19 +++-- dbms/src/Interpreters/DAGExpressionAnalyzer.h | 3 +- dbms/src/Interpreters/DAGQuerySource.cpp | 84 +++++++++++++++---- dbms/src/Interpreters/DAGQuerySource.h | 38 ++++----- dbms/src/Interpreters/InterpreterDAG.cpp | 15 +++- dbms/src/Interpreters/InterpreterDAG.h | 1 + dbms/src/Storages/Transaction/TypeMapping.cpp | 10 +++ dbms/src/Storages/Transaction/TypeMapping.h | 2 + 13 files changed, 176 insertions(+), 54 deletions(-) create mode 100644 dbms/src/Flash/Coprocessor/DAGContext.h diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.cpp b/dbms/src/DataStreams/DAGBlockOutputStream.cpp index 96871dab631..2eb88f43bf7 100644 --- a/dbms/src/DataStreams/DAGBlockOutputStream.cpp +++ b/dbms/src/DataStreams/DAGBlockOutputStream.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -13,11 +14,11 @@ extern const int LOGICAL_ERROR; } // namespace ErrorCodes DAGBlockOutputStream::DAGBlockOutputStream(tipb::SelectResponse & dag_response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, - FieldTpAndFlags && field_tp_and_flags_, Block header_) + std::vector && result_field_types_, Block header_) : dag_response(dag_response_), records_per_chunk(records_per_chunk_), encodeType(encodeType_), - field_tp_and_flags(field_tp_and_flags_), + result_field_types(result_field_types_), header(header_) { if (encodeType == tipb::EncodeType::TypeArrow) @@ -46,7 +47,7 @@ void DAGBlockOutputStream::writeSuffix() void DAGBlockOutputStream::write(const Block & block) { - if (block.columns() != field_tp_and_flags.size()) + if (block.columns() != result_field_types.size()) throw Exception("Output column size mismatch with field type size", ErrorCodes::LOGICAL_ERROR); // TODO: Check compatibility between field_tp_and_flags and block column types. @@ -69,7 +70,7 @@ void DAGBlockOutputStream::write(const Block & block) for (size_t j = 0; j < block.columns(); j++) { auto field = (*block.getByPosition(j).column.get())[i]; - EncodeDatum(field, field_tp_and_flags[j].getCodecFlag(), current_ss); + EncodeDatum(field, getCodecFlagByFieldType(result_field_types[j]), current_ss); } // Encode current row records_per_chunk++; diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.h b/dbms/src/DataStreams/DAGBlockOutputStream.h index f51877a4487..e9e9b6ed626 100644 --- a/dbms/src/DataStreams/DAGBlockOutputStream.h +++ b/dbms/src/DataStreams/DAGBlockOutputStream.h @@ -19,7 +19,7 @@ class DAGBlockOutputStream : public IBlockOutputStream { public: DAGBlockOutputStream(tipb::SelectResponse & response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, - FieldTpAndFlags && field_tp_and_flags_, Block header_); + std::vector && result_field_types, Block header_); Block getHeader() const override { return header; } void write(const Block & block) override; @@ -31,7 +31,7 @@ class DAGBlockOutputStream : public IBlockOutputStream Int64 records_per_chunk; tipb::EncodeType encodeType; - FieldTpAndFlags field_tp_and_flags; + std::vector result_field_types; Block header; diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h new file mode 100644 index 00000000000..9221dc38bef --- /dev/null +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class Context; + +class DAGContext +{ +public: + DAGContext(size_t profile_list_size) { profile_streams_list.resize(profile_list_size); }; + std::vector profile_streams_list; +}; +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 2eb69d5c452..091bd3234a9 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,8 @@ void DAGDriver::execute() { context.setSetting("read_tso", UInt64(dag_request.start_ts())); - DAGQuerySource dag(context, region_id, region_version, region_conf_version, dag_request); + DAGContext dag_context(dag_request.executors_size()); + DAGQuerySource dag(context, dag_context, region_id, region_version, region_conf_version, dag_request); BlockIO streams; String planner = context.getSettings().dag_planner; @@ -55,8 +57,28 @@ void DAGDriver::execute() throw Exception("DAG is not query.", ErrorCodes::LOGICAL_ERROR); BlockOutputStreamPtr outputStreamPtr = std::make_shared(dag_response, context.getSettings().dag_records_per_chunk, - dag_request.encode_type(), dag.getOutputFieldTpAndFlags(), streams.in->getHeader()); + dag_request.encode_type(), dag.getResultFieldTypes(), streams.in->getHeader()); copyData(*streams.in, *outputStreamPtr); + // add ExecutorExecutionSummary info + for (auto & p_streams : dag_context.profile_streams_list) + { + auto * executeSummary = dag_response.add_execution_summaries(); + UInt64 time_processed_ns = 0; + UInt64 num_produced_rows = 0; + UInt64 num_iterations = 0; + for (auto & streamPtr : p_streams) + { + if (auto * p_stream = dynamic_cast(streamPtr.get())) + { + time_processed_ns += p_stream->getProfileInfo().total_stopwatch.elapsed(); + num_produced_rows += p_stream->getProfileInfo().rows; + num_iterations += p_stream->getProfileInfo().blocks; + } + } + executeSummary->set_time_processed_ns(time_processed_ns); + executeSummary->set_num_produced_rows(num_produced_rows); + executeSummary->set_num_iterations(num_iterations); + } } } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp index 0ed89ec308a..47071244d20 100644 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp @@ -99,7 +99,7 @@ grpc::Status rpcTest() col->set_val(ss.str()); value->set_tp(tipb::ExprType::Int64); ss.str(""); - DB::EncodeNumber(888, ss); + DB::EncodeNumber(10, ss); value->set_val(std::string(ss.str())); // agg: count(s) group by i; diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp index 7ebf23309a4..f8f230f5a79 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp @@ -14,7 +14,7 @@ namespace DB namespace ErrorCodes { - extern const int COP_BAD_DAG_REQUEST; +extern const int COP_BAD_DAG_REQUEST; } // namespace ErrorCodes static String genCastString(const String & org_name, const String & target_type_name) @@ -151,9 +151,10 @@ void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, cons bool need_update_aggregated_columns = false; NamesAndTypesList updated_aggregated_columns; ExpressionActionsChain::Step step = chain.steps.back(); + auto agg_col_names = aggregated_columns.getNames(); for (Int32 i = 0; i < aggregation.agg_func_size(); i++) { - String & name = aggregated_columns.getNames()[i]; + String & name = agg_col_names[i]; String updated_name = appendCastIfNeeded(aggregation.agg_func(i), step.actions, name); if (name != updated_name) { @@ -170,7 +171,7 @@ void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, cons } for (Int32 i = 0; i < aggregation.group_by_size(); i++) { - String & name = aggregated_columns.getNames()[i + aggregation.agg_func_size()]; + String & name = agg_col_names[i + aggregation.agg_func_size()]; String updated_name = appendCastIfNeeded(aggregation.group_by(i), step.actions, name); if (name != updated_name) { @@ -188,17 +189,23 @@ void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, cons if (need_update_aggregated_columns) { + auto updated_agg_col_names = updated_aggregated_columns.getNames(); + auto updated_agg_col_types = updated_aggregated_columns.getTypes(); aggregated_columns.clear(); for (size_t i = 0; i < updated_aggregated_columns.size(); i++) { - aggregated_columns.emplace_back(updated_aggregated_columns.getNames()[i], updated_aggregated_columns.getTypes()[i]); + aggregated_columns.emplace_back(updated_agg_col_names[i], updated_agg_col_types[i]); } } } -String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String expr_name) +String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String & expr_name) { - if (expr.has_field_type() && isFunctionExpr(expr)) + if (!expr.has_field_type()) + { + throw Exception("Expression without field type", ErrorCodes::COP_BAD_DAG_REQUEST); + } + if (isFunctionExpr(expr)) { DataTypePtr expected_type = getDataTypeByFieldType(expr.field_type()); DataTypePtr actual_type = actions->getSampleBlock().getByName(expr_name).type; diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.h b/dbms/src/Interpreters/DAGExpressionAnalyzer.h index c3fd9eca460..ab53485bd9f 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.h +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.h @@ -5,6 +5,7 @@ #include #pragma GCC diagnostic pop +#include #include #include #include @@ -34,7 +35,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable void appendAggregation(ExpressionActionsChain & chain, const tipb::Aggregation & agg, Names & aggregate_keys, AggregateDescriptions & aggregate_descriptions); void appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & agg); - String appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String expr_name); + String appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String & expr_name); void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const { if (chain.steps.empty()) diff --git a/dbms/src/Interpreters/DAGQuerySource.cpp b/dbms/src/Interpreters/DAGQuerySource.cpp index ed1b09d6cd5..f453ca5e12a 100644 --- a/dbms/src/Interpreters/DAGQuerySource.cpp +++ b/dbms/src/Interpreters/DAGQuerySource.cpp @@ -9,6 +9,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int COP_BAD_DAG_REQUEST; +} // namespace ErrorCodes + const String DAGQuerySource::TS_NAME("tablescan"); const String DAGQuerySource::SEL_NAME("selection"); const String DAGQuerySource::AGG_NAME("aggregation"); @@ -24,9 +29,10 @@ static void assignOrThrowException(Int32 & index, Int32 value, const String & na index = value; } -DAGQuerySource::DAGQuerySource( - Context & context_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, const tipb::DAGRequest & dag_request_) +DAGQuerySource::DAGQuerySource(Context & context_, DAGContext & dag_context_, RegionID region_id_, UInt64 region_version_, + UInt64 region_conf_version_, const tipb::DAGRequest & dag_request_) : context(context_), + dag_context(dag_context_), region_id(region_id_), region_version(region_version_), region_conf_version(region_conf_version_), @@ -48,6 +54,7 @@ DAGQuerySource::DAGQuerySource( break; case tipb::ExecType::TypeTopN: assignOrThrowException(order_index, i, TOPN_NAME); + assignOrThrowException(limit_index, i, TOPN_NAME); break; case tipb::ExecType::TypeLimit: assignOrThrowException(limit_index, i, LIMIT_NAME); @@ -78,23 +85,70 @@ std::unique_ptr DAGQuerySource::interpreter(Context &, QueryProces return std::make_unique(context, *this); } -FieldTpAndFlags DAGQuerySource::getOutputFieldTpAndFlags() const +bool fillExecutorOutputFieldTypes(const tipb::Executor & executor, std::vector & output_field_types) { - FieldTpAndFlags output; - - const auto & ts = getTS(); - const auto & column_infos = ts.columns(); - for (auto i : dag_request.output_offsets()) + tipb::FieldType field_type; + switch (executor.tp()) { - // TODO: Checking bound. - auto & column_info = column_infos[i]; - output.emplace_back(FieldTpAndFlag{static_cast(column_info.tp()), static_cast(column_info.flag())}); + case tipb::ExecType::TypeTableScan: + for (auto ci : executor.tbl_scan().columns()) + { + field_type.set_tp(ci.tp()); + field_type.set_flag(ci.flag()); + output_field_types.push_back(field_type); + } + return true; + case tipb::ExecType::TypeStreamAgg: + case tipb::ExecType::TypeAggregation: + for (auto & expr : executor.aggregation().agg_func()) + { + if (!expr.has_field_type()) + { + throw Exception("Agg expression without field type", ErrorCodes::COP_BAD_DAG_REQUEST); + } + output_field_types.push_back(expr.field_type()); + } + for (auto & expr : executor.aggregation().group_by()) + { + if (!expr.has_field_type()) + { + throw Exception("Group by expression without field type", ErrorCodes::COP_BAD_DAG_REQUEST); + } + output_field_types.push_back(expr.field_type()); + } + return true; + default: + return false; } +} - // TODO: Add aggregation columns. - // We either write our own code to infer types that follows the convention between TiDB and TiKV, or ask TiDB to push down aggregation field types. - - return output; +std::vector DAGQuerySource::getResultFieldTypes() const +{ + std::vector executor_output; + for (int i = dag_request.executors_size() - 1; i >= 0; i--) + { + if (fillExecutorOutputFieldTypes(dag_request.executors(i), executor_output)) + { + break; + } + } + if (executor_output.empty()) + { + throw Exception("Do not found result field type for current dag request", ErrorCodes::COP_BAD_DAG_REQUEST); + } + // tispark assumes that if there is a agg, the output offset is + // ignored and the request out put is the same as the agg's output. + // todo should always use output offset to re-construct the output field types + if (hasAggregation()) + { + return executor_output; + } + std::vector ret; + for (int i : dag_request.output_offsets()) + { + ret.push_back(executor_output[i]); + } + return ret; } } // namespace DB diff --git a/dbms/src/Interpreters/DAGQuerySource.h b/dbms/src/Interpreters/DAGQuerySource.h index 3423df30c4c..c0600620ba1 100644 --- a/dbms/src/Interpreters/DAGQuerySource.h +++ b/dbms/src/Interpreters/DAGQuerySource.h @@ -1,33 +1,20 @@ #pragma once -#include -#include -#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include #pragma GCC diagnostic pop +#include +#include +#include +#include + namespace DB { class Context; -/// A handy struct to get codec flag based on tp and flag. -struct FieldTpAndFlag -{ - TiDB::TP tp; - UInt32 flag; - - TiDB::CodecFlag getCodecFlag() const - { - TiDB::ColumnInfo ci; - ci.tp = tp; - ci.flag = flag; - return ci.getCodecFlag(); - } -}; -using FieldTpAndFlags = std::vector; /// Query source of a DAG request via gRPC. /// This is also an IR of a DAG. @@ -40,13 +27,15 @@ class DAGQuerySource : public IQuerySource static const String TOPN_NAME; static const String LIMIT_NAME; - DAGQuerySource(Context & context_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, + DAGQuerySource(Context & context_, DAGContext & dag_context_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, const tipb::DAGRequest & dag_request_); std::tuple parse(size_t max_query_size) override; String str(size_t max_query_size) override; std::unique_ptr interpreter(Context & context, QueryProcessingStage::Enum stage) override; + DAGContext & getDAGContext() const { return dag_context; }; + RegionID getRegionID() const { return region_id; } UInt64 getRegionVersion() const { return region_version; } UInt64 getRegionConfVersion() const { return region_conf_version; } @@ -56,6 +45,12 @@ class DAGQuerySource : public IQuerySource bool hasTopN() const { return order_index != -1; }; bool hasLimit() const { return order_index == -1 && limit_index != -1; }; + Int32 getTSIndex() const { return ts_index; }; + Int32 getSelectionIndex() const { return sel_index; }; + Int32 getAggregationIndex() const { return agg_index; }; + Int32 getTopNIndex() const { return order_index; }; + Int32 getLimitIndex() const { return limit_index; }; + const tipb::TableScan & getTS() const { assertValid(ts_index, TS_NAME); @@ -83,9 +78,7 @@ class DAGQuerySource : public IQuerySource }; const tipb::DAGRequest & getDAGRequest() const { return dag_request; }; - /// Used to guide output stream to encode data, as we lost DAG field type during input streams. - /// This will somewhat duplicate the planning logic, but we don't have a decent way to keep this information. - FieldTpAndFlags getOutputFieldTpAndFlags() const; + std::vector getResultFieldTypes() const; ASTPtr getAST() const { return ast; }; @@ -100,6 +93,7 @@ class DAGQuerySource : public IQuerySource protected: Context & context; + DAGContext & dag_context; const RegionID region_id; const UInt64 region_version; diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index 52226c03de6..d5d5c739508 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -195,7 +195,7 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() // add cast if type is not match analyzer.appendAggSelect(chain, dag.getAggregation()); - //todo use output_offset to pruner the final project columns + //todo use output_offset to reconstruct the final project columns for (auto element : analyzer.getCurrentInputColumns()) { final_project.emplace_back(element.name, ""); @@ -426,20 +426,31 @@ void InterpreterDAG::executeOrder(Pipeline & pipeline, Strings & order_column_na limit, settings.max_bytes_before_external_sort, context.getTemporaryPath()); } +void InterpreterDAG::recordProfileStreams(Pipeline & pipeline, Int32 index) +{ + for (auto & stream : pipeline.streams) + { + dag.getDAGContext().profile_streams_list[index].push_back(stream); + } +} + void InterpreterDAG::executeImpl(Pipeline & pipeline) { executeTS(dag.getTS(), pipeline); + recordProfileStreams(pipeline, dag.getTSIndex()); auto res = analyzeExpressions(); // execute selection if (res.has_where) { executeWhere(pipeline, res.before_where, res.filter_column_name); + recordProfileStreams(pipeline, dag.getSelectionIndex()); } if (res.need_aggregate) { // execute aggregation executeAggregation(pipeline, res.before_aggregation, res.aggregation_keys, res.aggregate_descriptions); + recordProfileStreams(pipeline, dag.getAggregationIndex()); } executeExpression(pipeline, res.before_order_and_select); @@ -447,6 +458,7 @@ void InterpreterDAG::executeImpl(Pipeline & pipeline) { // execute topN executeOrder(pipeline, res.order_column_names); + recordProfileStreams(pipeline, dag.getTopNIndex()); } // execute projection @@ -456,6 +468,7 @@ void InterpreterDAG::executeImpl(Pipeline & pipeline) if (dag.hasLimit() && !dag.hasTopN()) { executeLimit(pipeline); + recordProfileStreams(pipeline, dag.getLimitIndex()); } } diff --git a/dbms/src/Interpreters/InterpreterDAG.h b/dbms/src/Interpreters/InterpreterDAG.h index 28c9a784108..222237952fc 100644 --- a/dbms/src/Interpreters/InterpreterDAG.h +++ b/dbms/src/Interpreters/InterpreterDAG.h @@ -81,6 +81,7 @@ class InterpreterDAG : public IInterpreter void getAndLockStorageWithSchemaVersion(TableID table_id, Int64 schema_version); SortDescription getSortDescription(Strings & order_column_names); AnalysisResult analyzeExpressions(); + void recordProfileStreams(Pipeline & pipeline, Int32 index); private: Context & context; diff --git a/dbms/src/Storages/Transaction/TypeMapping.cpp b/dbms/src/Storages/Transaction/TypeMapping.cpp index 24413713c13..bfce8ceb1db 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.cpp +++ b/dbms/src/Storages/Transaction/TypeMapping.cpp @@ -178,4 +178,14 @@ DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type) return getDataTypeByColumnInfo(ci); } +TiDB::CodecFlag getCodecFlagByFieldType(const tipb::FieldType & field_type) +{ + ColumnInfo ci; + ci.tp = static_cast(field_type.tp()); + ci.flag = field_type.flag(); + ci.flen = field_type.flen(); + ci.decimal = field_type.decimal(); + return ci.getCodecFlag(); +} + } // namespace DB diff --git a/dbms/src/Storages/Transaction/TypeMapping.h b/dbms/src/Storages/Transaction/TypeMapping.h index d501be75330..2c44b38b0e7 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.h +++ b/dbms/src/Storages/Transaction/TypeMapping.h @@ -15,4 +15,6 @@ DataTypePtr getDataTypeByColumnInfo(const ColumnInfo & column_info); DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type); +TiDB::CodecFlag getCodecFlagByFieldType(const tipb::FieldType & field_type); + } // namespace DB From 9a1dd23b67ca74b10732dcf6d5789916cf925a79 Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 8 Aug 2019 18:56:20 +0800 Subject: [PATCH 29/79] columnref index is based on executor output schema (#167) * fix cop test regression * address comments * format code * fix npe for dag execute * format code * address comment * add some comments * throw exception when meet error duing cop request handling * address comments * add error code * throw exception when meet error duing cop request handling * address comments * add DAGContext so InterpreterDAG can exchange information with DAGDriver * fix bug * 1. refine code, 2. address comments * update comments * columnref index is based on executor output schema --- dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 104 ++++++++++++++---- .../Interpreters/DAGExpressionAnalyzer.cpp | 23 ++-- dbms/src/Interpreters/DAGUtils.cpp | 4 +- dbms/src/Interpreters/InterpreterDAG.cpp | 5 +- dbms/src/Storages/Transaction/TiDB.cpp | 50 +++++++-- dbms/src/Storages/Transaction/TiDB.h | 1 + 6 files changed, 133 insertions(+), 54 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp index 47071244d20..b82525eb901 100644 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp @@ -21,40 +21,78 @@ class FlashClient SubPtr sp; public: + static std::string decodeDatumToString(size_t & cursor, const std::string & raw_data) + { + switch (raw_data[cursor++]) + { + case TiDB::CodecFlagNil: + return "NULL"; + case TiDB::CodecFlagInt: + return std::to_string(DB::DecodeInt(cursor, raw_data)); + case TiDB::CodecFlagUInt: + return std::to_string(DB::DecodeInt(cursor, raw_data)); + case TiDB::CodecFlagBytes: + return DB::DecodeBytes(cursor, raw_data); + case TiDB::CodecFlagCompactBytes: + return DB::DecodeCompactBytes(cursor, raw_data); + case TiDB::CodecFlagFloat: + return std::to_string(DB::DecodeFloat64(cursor, raw_data)); + case TiDB::CodecFlagVarUInt: + return std::to_string(DB::DecodeVarUInt(cursor, raw_data)); + case TiDB::CodecFlagVarInt: + return std::to_string(DB::DecodeVarInt(cursor, raw_data)); + case TiDB::CodecFlagDuration: + throw DB::Exception("Not implented yet. DecodeDatum: CodecFlagDuration"); + case TiDB::CodecFlagDecimal: + return DB::DecodeDecimal(cursor, raw_data).toString(); + default: + throw DB::Exception("Unknown Type:" + std::to_string(raw_data[cursor - 1])); + } + } + FlashClient(ChannelPtr cp) : sp(tikvpb::Tikv::NewStub(cp)) {} - grpc::Status coprocessor(coprocessor::Request * rqst) + grpc::Status coprocessor(coprocessor::Request * rqst, size_t output_column_num) { grpc::ClientContext clientContext; clientContext.AddMetadata("user_name", ""); clientContext.AddMetadata("dag_planner", "optree"); coprocessor::Response response; grpc::Status status = sp->Coprocessor(&clientContext, *rqst, &response); - size_t column_num = 3; if (status.ok()) { // if status is ok, try to decode the result tipb::SelectResponse selectResponse; if (selectResponse.ParseFromString(response.data())) { - for (tipb::Chunk chunk : selectResponse.chunks()) + for (const tipb::Chunk & chunk : selectResponse.chunks()) { size_t cursor = 0; - std::vector row_result; const std::string & data = chunk.rows_data(); while (cursor < data.size()) { - row_result.push_back(DB::DecodeDatum(cursor, data)); - if (row_result.size() == column_num) + for (size_t i = 0; i < output_column_num; i++) { - //print the result - std::cout << row_result[0].get() << " " << row_result[1].get() << " " - << row_result[2].get() << std::endl; - row_result.clear(); + std::cout << decodeDatumToString(cursor, data) << " "; } + std::cout << std::endl; } } + std::cout << "Execute summary: " << std::endl; + for (int i = 0; i < selectResponse.execution_summaries_size(); i++) + { + auto & summary = selectResponse.execution_summaries(i); + std::cout << "Executor " << i; + std::cout << " time = " << summary.time_processed_ns() << " ns "; + std::cout << " rows = " << summary.num_produced_rows(); + std::cout << " iter nums = " << summary.num_iterations(); + std::cout << std::endl; + } } } + else + { + std::cout << "Coprocessor request failed, error code " << status.error_code() << " error msg " << status.error_message(); + } return status; } }; @@ -64,6 +102,7 @@ grpc::Status rpcTest() { ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); ClientPtr clientPtr = std::make_shared(cp); + size_t result_field_num = 0; // construct a dag request tipb::DAGRequest dagRequest; dagRequest.set_start_ts(18446744073709551615uL); @@ -75,14 +114,15 @@ grpc::Status rpcTest() tipb::ColumnInfo * ci = ts->add_columns(); ci->set_column_id(1); ci->set_tp(0xfe); - ci->set_flag(1); + ci->set_flag(0); ci = ts->add_columns(); ci->set_column_id(2); ci->set_tp(8); - ci->set_flag(1); + ci->set_flag(0); dagRequest.add_output_offsets(1); dagRequest.add_output_offsets(0); dagRequest.add_output_offsets(1); + result_field_num = 3; // selection: less(i, 123) executor = dagRequest.add_executors(); @@ -95,15 +135,23 @@ grpc::Status rpcTest() tipb::Expr * value = expr->add_children(); col->set_tp(tipb::ExprType::ColumnRef); std::stringstream ss; - DB::EncodeNumber(2, ss); + DB::EncodeNumber(1, ss); col->set_val(ss.str()); + auto * type = col->mutable_field_type(); + type->set_tp(8); + type->set_flag(0); value->set_tp(tipb::ExprType::Int64); ss.str(""); DB::EncodeNumber(10, ss); value->set_val(std::string(ss.str())); + type = value->mutable_field_type(); + type->set_tp(8); + type->set_flag(1); + type = expr->mutable_field_type(); + type->set_tp(1); + type->set_flag(1 << 5); // agg: count(s) group by i; - /* executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeAggregation); auto agg = executor->mutable_aggregation(); @@ -112,36 +160,44 @@ grpc::Status rpcTest() auto child = agg_func->add_children(); child->set_tp(tipb::ExprType::ColumnRef); ss.str(""); - DB::EncodeNumber(1, ss); + DB::EncodeNumber(0, ss); child->set_val(ss.str()); - auto type = agg_func->mutable_field_type(); - type->set_tp(3); - type->set_flag(33); + auto f_type = agg_func->mutable_field_type(); + f_type->set_tp(3); + f_type->set_flag(33); auto group_col = agg->add_group_by(); group_col->set_tp(tipb::ExprType::ColumnRef); ss.str(""); - DB::EncodeNumber(2,ss); + DB::EncodeNumber(1, ss); group_col->set_val(ss.str()); - */ + f_type = group_col->mutable_field_type(); + f_type->set_tp(8); + f_type->set_flag(1); + result_field_num = 2; // topn + /* executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeTopN); tipb::TopN * topN = executor->mutable_topn(); topN->set_limit(3); tipb::ByItem * byItem = topN->add_order_by(); - byItem->set_desc(true); + byItem->set_desc(false); tipb::Expr * expr1 = byItem->mutable_expr(); expr1->set_tp(tipb::ExprType::ColumnRef); ss.str(""); - DB::EncodeNumber(2, ss); + DB::EncodeNumber(1, ss); expr1->set_val(ss.str()); + type = expr1->mutable_field_type(); + type->set_tp(8); + type->set_tp(0); + */ // limit /* executor = dagRequest.add_executors(); executor->set_tp(tipb::ExecType::TypeLimit); tipb::Limit *limit = executor->mutable_limit(); - limit->set_limit(1); + limit->set_limit(5); */ @@ -156,7 +212,7 @@ grpc::Status rpcTest() request.set_tp(DAGREQUEST); request.set_data(dagRequest.SerializeAsString()); //request.add_ranges(); - return clientPtr->coprocessor(&request); + return clientPtr->coprocessor(&request, result_field_num); } void codecTest() diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp index f8f230f5a79..8d36848c82c 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp @@ -15,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int COP_BAD_DAG_REQUEST; +extern const int UNSUPPORTED_METHOD; } // namespace ErrorCodes static String genCastString(const String & org_name, const String & target_type_name) @@ -210,6 +211,7 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres DataTypePtr expected_type = getDataTypeByFieldType(expr.field_type()); DataTypePtr actual_type = actions->getSampleBlock().getByName(expr_name).type; //todo maybe use a more decent compare method + // todo ignore nullable info?? if (expected_type->getName() != actual_type->getName()) { // need to add cast function @@ -266,9 +268,9 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi else if (isColumnExpr(expr)) { ColumnID columnId = getColumnID(expr); - if (columnId < 1 || columnId > (ColumnID)getCurrentInputColumns().size()) + if (columnId < 0 || columnId >= (ColumnID)getCurrentInputColumns().size()) { - throw Exception("column id out of bound"); + throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } //todo check if the column type need to be cast to field type return expr_name; @@ -277,13 +279,13 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi { if (isAggFunctionExpr(expr)) { - throw Exception("agg function is not supported yet"); + throw Exception("agg function is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); } const String & func_name = getFunctionName(expr); if (func_name == "in" || func_name == "notIn" || func_name == "globalIn" || func_name == "globalNotIn") { // todo support in - throw Exception(func_name + " is not supported yet"); + throw Exception(func_name + " is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); } const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); @@ -292,15 +294,8 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi for (auto & child : expr.children()) { String name = getActions(child, actions); - if (actions->getSampleBlock().has(name)) - { - argument_names.push_back(name); - argument_types.push_back(actions->getSampleBlock().getByName(name).type); - } - else - { - throw Exception("Unknown expr: " + child.DebugString()); - } + argument_names.push_back(name); + argument_types.push_back(actions->getSampleBlock().getByName(name).type); } // re-construct expr_name, because expr_name generated previously is based on expr tree, @@ -319,7 +314,7 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi } else { - throw Exception("Unsupported expr type: " + getTypeName(expr)); + throw Exception("Unsupported expr type: " + getTypeName(expr), ErrorCodes::UNSUPPORTED_METHOD); } } } // namespace DB diff --git a/dbms/src/Interpreters/DAGUtils.cpp b/dbms/src/Interpreters/DAGUtils.cpp index 0cfa906cc02..ce1e88cd65b 100644 --- a/dbms/src/Interpreters/DAGUtils.cpp +++ b/dbms/src/Interpreters/DAGUtils.cpp @@ -92,11 +92,11 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col return DecodeBytes(cursor, expr.val()); case tipb::ExprType::ColumnRef: columnId = DecodeInt(cursor, expr.val()); - if (columnId < 1 || columnId > (ColumnID)input_col.size()) + if (columnId < 0 || columnId >= (ColumnID)input_col.size()) { throw Exception("out of bound"); } - return input_col.getNames()[columnId - 1]; + return input_col.getNames()[columnId]; case tipb::ExprType::Count: case tipb::ExprType::Sum: case tipb::ExprType::Avg: diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Interpreters/InterpreterDAG.cpp index d5d5c739508..5a12143d8bd 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Interpreters/InterpreterDAG.cpp @@ -71,8 +71,10 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) // cid out of bound throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } - String name = storage->getTableInfo().columns[cid - 1].name; + String name = storage->getTableInfo().getColumnName(cid); required_columns.push_back(name); + NameAndTypePair nameAndTypePair = storage->getColumns().getPhysical(name); + source_columns.push_back(nameAndTypePair); } if (required_columns.empty()) { @@ -168,7 +170,6 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) }); } ColumnsWithTypeAndName columnsWithTypeAndName = pipeline.firstStream()->getHeader().getColumnsWithTypeAndName(); - source_columns = storage->getColumns().getAllPhysical(); } InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 83715fd5bb4..5483b5ee26b 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -15,7 +15,8 @@ ColumnInfo::ColumnInfo(Poco::JSON::Object::Ptr json) { deserialize(json); } Field ColumnInfo::defaultValueToField() const { auto & value = origin_default_value; - if (value.isEmpty()) { + if (value.isEmpty()) + { return Field(); } switch (tp) @@ -55,7 +56,8 @@ Field ColumnInfo::defaultValueToField() const return Field(); } -Poco::JSON::Object::Ptr ColumnInfo::getJSONObject() const try +Poco::JSON::Object::Ptr ColumnInfo::getJSONObject() const +try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); @@ -98,7 +100,8 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (ColumnInfo): " + e.displayText(), DB::Exception(e)); } -void ColumnInfo::deserialize(Poco::JSON::Object::Ptr json) try +void ColumnInfo::deserialize(Poco::JSON::Object::Ptr json) +try { id = json->getValue("id"); name = json->getObject("name")->getValue("L"); @@ -132,7 +135,8 @@ catch (const Poco::Exception & e) PartitionDefinition::PartitionDefinition(Poco::JSON::Object::Ptr json) { deserialize(json); } -Poco::JSON::Object::Ptr PartitionDefinition::getJSONObject() const try +Poco::JSON::Object::Ptr PartitionDefinition::getJSONObject() const +try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); json->set("id", id); @@ -153,7 +157,8 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (PartitionDef): " + e.displayText(), DB::Exception(e)); } -void PartitionDefinition::deserialize(Poco::JSON::Object::Ptr json) try +void PartitionDefinition::deserialize(Poco::JSON::Object::Ptr json) +try { id = json->getValue("id"); name = json->getObject("name")->getValue("L"); @@ -168,7 +173,8 @@ catch (const Poco::Exception & e) PartitionInfo::PartitionInfo(Poco::JSON::Object::Ptr json) { deserialize(json); } -Poco::JSON::Object::Ptr PartitionInfo::getJSONObject() const try +Poco::JSON::Object::Ptr PartitionInfo::getJSONObject() const +try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); @@ -197,7 +203,8 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (PartitionInfo): " + e.displayText(), DB::Exception(e)); } -void PartitionInfo::deserialize(Poco::JSON::Object::Ptr json) try +void PartitionInfo::deserialize(Poco::JSON::Object::Ptr json) +try { type = static_cast(json->getValue("type")); expr = json->getValue("expr"); @@ -221,7 +228,8 @@ catch (const Poco::Exception & e) TableInfo::TableInfo(const String & table_info_json) { deserialize(table_info_json); } -String TableInfo::serialize(bool escaped) const try +String TableInfo::serialize(bool escaped) const +try { std::stringstream buf; @@ -279,7 +287,8 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (TableInfo): " + e.displayText(), DB::Exception(e)); } -void DBInfo::deserialize(const String & json_str) try +void DBInfo::deserialize(const String & json_str) +try { Poco::JSON::Parser parser; Poco::Dynamic::Var result = parser.parse(json_str); @@ -297,7 +306,8 @@ catch (const Poco::Exception & e) DB::Exception(e)); } -void TableInfo::deserialize(const String & json_str) try +void TableInfo::deserialize(const String & json_str) +try { if (json_str.empty()) { @@ -334,7 +344,8 @@ void TableInfo::deserialize(const String & json_str) try belonging_table_id = obj->getValue("belonging_table_id"); partition.deserialize(partition_obj); } - if (obj->has("schema_version")) { + if (obj->has("schema_version")) + { schema_version = obj->getValue("schema_version"); } } @@ -382,7 +393,7 @@ CodecFlag ColumnInfo::getCodecFlag() const ColumnID TableInfo::getColumnID(const String & name) const { - for (auto col : columns) + for (auto & col : columns) { if (name == col.name) { @@ -396,4 +407,19 @@ ColumnID TableInfo::getColumnID(const String & name) const throw DB::Exception(std::string(__PRETTY_FUNCTION__) + ": Unknown column name " + name, DB::ErrorCodes::LOGICAL_ERROR); } +String TableInfo::getColumnName(const ColumnID id) const +{ + for (auto & col : columns) + { + if (id == col.id) + { + return col.name; + } + } + + throw DB::Exception( + std::string(__PRETTY_FUNCTION__) + ": Invalidate column id " + std::to_string(id) + " for table " + db_name + "." + name, + DB::ErrorCodes::LOGICAL_ERROR); +} + } // namespace TiDB diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index dfd38f294cb..cd6fc0651d9 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -263,6 +263,7 @@ struct TableInfo Int64 schema_version = -1; ColumnID getColumnID(const String & name) const; + String getColumnName(const ColumnID id) const; TableInfo producePartitionTableInfo(TableID table_or_partition_id) const { From 26e20d58530fb1390bf9c2eb6cb5be2fae1867ae Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Thu, 8 Aug 2019 19:04:29 +0800 Subject: [PATCH 30/79] Move flash/cop/dag to individual library --- dbms/CMakeLists.txt | 4 ++-- dbms/src/Flash/CMakeLists.txt | 8 ++++++++ dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp | 3 +-- .../Coprocessor}/DAGBlockOutputStream.cpp | 2 +- .../Coprocessor}/DAGBlockOutputStream.h | 2 +- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 6 +++--- .../Coprocessor}/DAGExpressionAnalyzer.cpp | 6 +++--- .../Coprocessor}/DAGExpressionAnalyzer.h | 2 +- .../Coprocessor}/DAGQuerySource.cpp | 5 ++--- .../{Interpreters => Flash/Coprocessor}/DAGQuerySource.h | 0 .../Coprocessor}/DAGStringConverter.cpp | 4 ++-- .../Coprocessor}/DAGStringConverter.h | 0 dbms/src/{Interpreters => Flash/Coprocessor}/DAGUtils.cpp | 6 +++--- dbms/src/{Interpreters => Flash/Coprocessor}/DAGUtils.h | 2 ++ .../Coprocessor}/InterpreterDAG.cpp | 6 +++--- .../{Interpreters => Flash/Coprocessor}/InterpreterDAG.h | 4 ++-- dbms/src/Interpreters/executeQuery.h | 2 +- 17 files changed, 35 insertions(+), 27 deletions(-) rename dbms/src/{DataStreams => Flash/Coprocessor}/DAGBlockOutputStream.cpp (97%) rename dbms/src/{DataStreams => Flash/Coprocessor}/DAGBlockOutputStream.h (96%) rename dbms/src/{Interpreters => Flash/Coprocessor}/DAGExpressionAnalyzer.cpp (98%) rename dbms/src/{Interpreters => Flash/Coprocessor}/DAGExpressionAnalyzer.h (97%) rename dbms/src/{Interpreters => Flash/Coprocessor}/DAGQuerySource.cpp (96%) rename dbms/src/{Interpreters => Flash/Coprocessor}/DAGQuerySource.h (100%) rename dbms/src/{Interpreters => Flash/Coprocessor}/DAGStringConverter.cpp (98%) rename dbms/src/{Interpreters => Flash/Coprocessor}/DAGStringConverter.h (100%) rename dbms/src/{Interpreters => Flash/Coprocessor}/DAGUtils.cpp (99%) rename dbms/src/{Interpreters => Flash/Coprocessor}/DAGUtils.h (94%) rename dbms/src/{Interpreters => Flash/Coprocessor}/InterpreterDAG.cpp (99%) rename dbms/src/{Interpreters => Flash/Coprocessor}/InterpreterDAG.h (97%) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index e4ee47a0d45..018cf6ae0d6 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -60,8 +60,7 @@ add_headers_and_sources(dbms src/Storages/Page) add_headers_and_sources(dbms src/Raft) add_headers_and_sources(dbms src/TiDB) add_headers_and_sources(dbms src/Client) -add_headers_and_sources(dbms src/Flash) -add_headers_and_sources(dbms src/Flash/Coprocessor) +add_headers_only(dbms src/Flash/Coprocessor) add_headers_only(dbms src/Server) list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) @@ -151,6 +150,7 @@ target_link_libraries (dbms clickhouse_parsers clickhouse_common_config clickhouse_common_io + flash_service kvproto kv_client tipb diff --git a/dbms/src/Flash/CMakeLists.txt b/dbms/src/Flash/CMakeLists.txt index 16b3a6d519b..365716927af 100644 --- a/dbms/src/Flash/CMakeLists.txt +++ b/dbms/src/Flash/CMakeLists.txt @@ -1,3 +1,11 @@ +include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) + +add_headers_and_sources(flash_service .) +add_headers_and_sources(flash_service ./Coprocessor) + +add_library(flash_service ${flash_service_headers} ${flash_service_sources}) +target_link_libraries(flash_service dbms) + if (ENABLE_TESTS) add_subdirectory (Coprocessor/tests) endif () diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp index 028024d0b47..aed26c39fb2 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp +++ b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp @@ -2,8 +2,7 @@ #include #include -#include -#include +#include #include #include #include diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.cpp b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp similarity index 97% rename from dbms/src/DataStreams/DAGBlockOutputStream.cpp rename to dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp index 96871dab631..add55ecad02 100644 --- a/dbms/src/DataStreams/DAGBlockOutputStream.cpp +++ b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/dbms/src/DataStreams/DAGBlockOutputStream.h b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.h similarity index 96% rename from dbms/src/DataStreams/DAGBlockOutputStream.h rename to dbms/src/Flash/Coprocessor/DAGBlockOutputStream.h index f51877a4487..ef85bceb4a9 100644 --- a/dbms/src/DataStreams/DAGBlockOutputStream.h +++ b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 2eb69d5c452..be6a441c879 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -2,11 +2,11 @@ #include #include -#include #include +#include +#include +#include #include -#include -#include #include namespace DB diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp similarity index 98% rename from dbms/src/Interpreters/DAGExpressionAnalyzer.cpp rename to dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 7ebf23309a4..cbc6f8fd1b7 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -1,10 +1,10 @@ +#include #include #include +#include #include #include -#include -#include #include #include #include @@ -14,7 +14,7 @@ namespace DB namespace ErrorCodes { - extern const int COP_BAD_DAG_REQUEST; +extern const int COP_BAD_DAG_REQUEST; } // namespace ErrorCodes static String genCastString(const String & org_name, const String & target_type_name) diff --git a/dbms/src/Interpreters/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h similarity index 97% rename from dbms/src/Interpreters/DAGExpressionAnalyzer.h rename to dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index c3fd9eca460..37906a3540c 100644 --- a/dbms/src/Interpreters/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -5,8 +5,8 @@ #include #pragma GCC diagnostic pop +#include #include -#include #include namespace DB diff --git a/dbms/src/Interpreters/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp similarity index 96% rename from dbms/src/Interpreters/DAGQuerySource.cpp rename to dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index ed1b09d6cd5..f1c49a51f88 100644 --- a/dbms/src/Interpreters/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -1,7 +1,6 @@ -#include +#include -#include -#include +#include #include #include #include diff --git a/dbms/src/Interpreters/DAGQuerySource.h b/dbms/src/Flash/Coprocessor/DAGQuerySource.h similarity index 100% rename from dbms/src/Interpreters/DAGQuerySource.h rename to dbms/src/Flash/Coprocessor/DAGQuerySource.h diff --git a/dbms/src/Interpreters/DAGStringConverter.cpp b/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp similarity index 98% rename from dbms/src/Interpreters/DAGStringConverter.cpp rename to dbms/src/Flash/Coprocessor/DAGStringConverter.cpp index 3cdc1b97692..36ab11801b9 100644 --- a/dbms/src/Interpreters/DAGStringConverter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp @@ -1,7 +1,7 @@ -#include +#include #include -#include +#include #include #include #include diff --git a/dbms/src/Interpreters/DAGStringConverter.h b/dbms/src/Flash/Coprocessor/DAGStringConverter.h similarity index 100% rename from dbms/src/Interpreters/DAGStringConverter.h rename to dbms/src/Flash/Coprocessor/DAGStringConverter.h diff --git a/dbms/src/Interpreters/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp similarity index 99% rename from dbms/src/Interpreters/DAGUtils.cpp rename to dbms/src/Flash/Coprocessor/DAGUtils.cpp index 0cfa906cc02..4b859491df2 100644 --- a/dbms/src/Interpreters/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -1,11 +1,11 @@ - -#include +#include #include #include -#include #include +#include + namespace DB { diff --git a/dbms/src/Interpreters/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h similarity index 94% rename from dbms/src/Interpreters/DAGUtils.h rename to dbms/src/Flash/Coprocessor/DAGUtils.h index 1048cf8375d..71a52533ea3 100644 --- a/dbms/src/Interpreters/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -7,6 +7,8 @@ #include #pragma GCC diagnostic pop +#include +#include #include namespace DB diff --git a/dbms/src/Interpreters/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp similarity index 99% rename from dbms/src/Interpreters/InterpreterDAG.cpp rename to dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 52226c03de6..7e8c37860f8 100644 --- a/dbms/src/Interpreters/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -10,9 +10,9 @@ #include #include #include +#include +#include #include -#include -#include #include #include #include diff --git a/dbms/src/Interpreters/InterpreterDAG.h b/dbms/src/Flash/Coprocessor/InterpreterDAG.h similarity index 97% rename from dbms/src/Interpreters/InterpreterDAG.h rename to dbms/src/Flash/Coprocessor/InterpreterDAG.h index 28c9a784108..93b167c2b89 100644 --- a/dbms/src/Interpreters/InterpreterDAG.h +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.h @@ -7,9 +7,9 @@ #pragma GCC diagnostic pop #include +#include +#include #include -#include -#include #include #include #include diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index 55b9ea7306a..e8f4bbd266b 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB From 62ced38d2a17b97020179a75bbfaa85328ee4bfb Mon Sep 17 00:00:00 2001 From: ruoxi Date: Fri, 9 Aug 2019 15:42:12 +0800 Subject: [PATCH 31/79] DAG planner fix and mock dag request (#169) * Enhance dbg invoke and add dag as schemaful function * Add basic sql parse to dag * Column id starts from 1 * Fix value to ref * Add basic dag test * Fix dag bugs and pass 1st mock test * Make dag go normal routine and add mock dag * Add todo * Add comment * Fix gcc compile error * Enhance dag test * Address comments --- dbms/src/Debug/DBGInvoker.cpp | 106 +++++---- dbms/src/Debug/DBGInvoker.h | 11 +- dbms/src/Debug/MockTiDB.cpp | 2 +- dbms/src/Debug/dbgFuncCoprocessor.cpp | 211 ++++++++++++++++++ dbms/src/Debug/dbgFuncCoprocessor.h | 23 ++ dbms/src/Flash/Coprocessor/DAGContext.h | 2 + dbms/src/Flash/Coprocessor/DAGDriver.cpp | 9 +- dbms/src/Flash/Coprocessor/DAGDriver.h | 4 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 5 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 24 +- dbms/src/Interpreters/executeQuery.cpp | 4 +- dbms/src/Interpreters/executeQuery.h | 2 +- tests/mutable-test/txn_dag/table_scan.test | 35 +++ 13 files changed, 373 insertions(+), 65 deletions(-) create mode 100644 dbms/src/Debug/dbgFuncCoprocessor.cpp create mode 100644 dbms/src/Debug/dbgFuncCoprocessor.h create mode 100644 tests/mutable-test/txn_dag/table_scan.test diff --git a/dbms/src/Debug/DBGInvoker.cpp b/dbms/src/Debug/DBGInvoker.cpp index 512f1468c0d..dadb7285915 100644 --- a/dbms/src/Debug/DBGInvoker.cpp +++ b/dbms/src/Debug/DBGInvoker.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -29,43 +30,46 @@ void dbgFuncSleep(Context &, const ASTs & args, DBGInvoker::Printer output) DBGInvoker::DBGInvoker() { - regFunc("echo", dbgFuncEcho); + regSchemalessFunc("echo", dbgFuncEcho); // TODO: remove this, use sleep in bash script - regFunc("sleep", dbgFuncSleep); - - regFunc("mock_tidb_table", MockTiDBTable::dbgFuncMockTiDBTable); - regFunc("mock_tidb_db", MockTiDBTable::dbgFuncMockTiDBDB); - regFunc("mock_tidb_partition", MockTiDBTable::dbgFuncMockTiDBPartition); - regFunc("rename_table_for_partition", MockTiDBTable::dbgFuncRenameTableForPartition); - regFunc("drop_tidb_table", MockTiDBTable::dbgFuncDropTiDBTable); - regFunc("drop_tidb_db", MockTiDBTable::dbgFuncDropTiDBDB); - regFunc("add_column_to_tidb_table", MockTiDBTable::dbgFuncAddColumnToTiDBTable); - regFunc("drop_column_from_tidb_table", MockTiDBTable::dbgFuncDropColumnFromTiDBTable); - regFunc("modify_column_in_tidb_table", MockTiDBTable::dbgFuncModifyColumnInTiDBTable); - regFunc("rename_tidb_table", MockTiDBTable::dbgFuncRenameTiDBTable); - regFunc("truncate_tidb_table", MockTiDBTable::dbgFuncTruncateTiDBTable); - - regFunc("set_flush_threshold", dbgFuncSetFlushThreshold); - - regFunc("raft_insert_row", dbgFuncRaftInsertRow); - regFunc("raft_insert_row_full", dbgFuncRaftInsertRowFull); - regFunc("raft_insert_rows", dbgFuncRaftInsertRows); - regFunc("raft_update_rows", dbgFuncRaftUpdateRows); - regFunc("raft_delete_rows", dbgFuncRaftDelRows); - regFunc("raft_delete_row", dbgFuncRaftDeleteRow); - - regFunc("put_region", dbgFuncPutRegion); - regFunc("region_snapshot", dbgFuncRegionSnapshot); - regFunc("region_snapshot_data", dbgFuncRegionSnapshotWithData); - - regFunc("try_flush", dbgFuncTryFlush); - regFunc("try_flush_region", dbgFuncTryFlushRegion); - - regFunc("dump_all_region", dbgFuncDumpAllRegion); - - regFunc("enable_schema_sync_service", dbgFuncEnableSchemaSyncService); - regFunc("refresh_schemas", dbgFuncRefreshSchemas); - regFunc("reset_schemas", dbgFuncResetSchemas); + regSchemalessFunc("sleep", dbgFuncSleep); + + regSchemalessFunc("mock_tidb_table", MockTiDBTable::dbgFuncMockTiDBTable); + regSchemalessFunc("mock_tidb_db", MockTiDBTable::dbgFuncMockTiDBDB); + regSchemalessFunc("mock_tidb_partition", MockTiDBTable::dbgFuncMockTiDBPartition); + regSchemalessFunc("rename_table_for_partition", MockTiDBTable::dbgFuncRenameTableForPartition); + regSchemalessFunc("drop_tidb_table", MockTiDBTable::dbgFuncDropTiDBTable); + regSchemalessFunc("drop_tidb_db", MockTiDBTable::dbgFuncDropTiDBDB); + regSchemalessFunc("add_column_to_tidb_table", MockTiDBTable::dbgFuncAddColumnToTiDBTable); + regSchemalessFunc("drop_column_from_tidb_table", MockTiDBTable::dbgFuncDropColumnFromTiDBTable); + regSchemalessFunc("modify_column_in_tidb_table", MockTiDBTable::dbgFuncModifyColumnInTiDBTable); + regSchemalessFunc("rename_tidb_table", MockTiDBTable::dbgFuncRenameTiDBTable); + regSchemalessFunc("truncate_tidb_table", MockTiDBTable::dbgFuncTruncateTiDBTable); + + regSchemalessFunc("set_flush_threshold", dbgFuncSetFlushThreshold); + + regSchemalessFunc("raft_insert_row", dbgFuncRaftInsertRow); + regSchemalessFunc("raft_insert_row_full", dbgFuncRaftInsertRowFull); + regSchemalessFunc("raft_insert_rows", dbgFuncRaftInsertRows); + regSchemalessFunc("raft_update_rows", dbgFuncRaftUpdateRows); + regSchemalessFunc("raft_delete_rows", dbgFuncRaftDelRows); + regSchemalessFunc("raft_delete_row", dbgFuncRaftDeleteRow); + + regSchemalessFunc("put_region", dbgFuncPutRegion); + regSchemalessFunc("region_snapshot", dbgFuncRegionSnapshot); + regSchemalessFunc("region_snapshot_data", dbgFuncRegionSnapshotWithData); + + regSchemalessFunc("try_flush", dbgFuncTryFlush); + regSchemalessFunc("try_flush_region", dbgFuncTryFlushRegion); + + regSchemalessFunc("dump_all_region", dbgFuncDumpAllRegion); + + regSchemalessFunc("enable_schema_sync_service", dbgFuncEnableSchemaSyncService); + regSchemalessFunc("refresh_schemas", dbgFuncRefreshSchemas); + regSchemalessFunc("reset_schemas", dbgFuncResetSchemas); + + regSchemafulFunc("dag", dbgFuncDAG); + regSchemafulFunc("mock_dag", dbgFuncMockDAG); } void replaceSubstr(std::string & str, const std::string & target, const std::string & replacement) @@ -97,10 +101,25 @@ BlockInputStreamPtr DBGInvoker::invoke(Context & context, const std::string & or name = ori_name.substr(prefix_not_print_res.size(), ori_name.size() - prefix_not_print_res.size()); } - auto it = funcs.find(name); - if (it == funcs.end()) - throw Exception("DBG function not found", ErrorCodes::BAD_ARGUMENTS); + BlockInputStreamPtr res; + auto it_schemaless = schemaless_funcs.find(name); + if (it_schemaless != schemaless_funcs.end()) + res = invokeSchemaless(context, name, it_schemaless->second, args); + else + { + auto it_schemaful = schemaful_funcs.find(name); + if (it_schemaful != schemaful_funcs.end()) + res = invokeSchemaful(context, name, it_schemaful->second, args); + if (it_schemaful == schemaful_funcs.end()) + throw Exception("DBG function not found", ErrorCodes::BAD_ARGUMENTS); + } + + return print_res ? res : std::shared_ptr(); +} +BlockInputStreamPtr DBGInvoker::invokeSchemaless( + Context & context, const std::string & name, const SchemalessDBGFunc & func, const ASTs & args) +{ std::stringstream col_name; col_name << name << "("; for (size_t i = 0; i < args.size(); ++i) @@ -113,9 +132,14 @@ BlockInputStreamPtr DBGInvoker::invoke(Context & context, const std::string & or std::shared_ptr res = std::make_shared(col_name.str()); Printer printer = [&](const std::string & s) { res->append(s); }; - (it->second)(context, args, printer); + func(context, args, printer); - return print_res ? res : std::shared_ptr(); + return res; +} + +BlockInputStreamPtr DBGInvoker::invokeSchemaful(Context & context, const std::string &, const SchemafulDBGFunc & func, const ASTs & args) +{ + return func(context, args); } } // namespace DB diff --git a/dbms/src/Debug/DBGInvoker.h b/dbms/src/Debug/DBGInvoker.h index 71e8487f1fa..95b2449fd9b 100644 --- a/dbms/src/Debug/DBGInvoker.h +++ b/dbms/src/Debug/DBGInvoker.h @@ -25,16 +25,21 @@ class DBGInvoker { public: using Printer = std::function; - using DBGFunc = std::function; + using SchemalessDBGFunc = std::function; + using SchemafulDBGFunc = std::function; DBGInvoker(); - void regFunc(const std::string & name, DBGFunc func) { funcs[name] = func; } + void regSchemalessFunc(const std::string & name, SchemalessDBGFunc func) { schemaless_funcs[name] = func; } + void regSchemafulFunc(const std::string & name, SchemafulDBGFunc func) { schemaful_funcs[name] = func; } BlockInputStreamPtr invoke(Context & context, const std::string & ori_name, const ASTs & args); + BlockInputStreamPtr invokeSchemaless(Context & context, const std::string & name, const SchemalessDBGFunc & func, const ASTs & args); + BlockInputStreamPtr invokeSchemaful(Context & context, const std::string & name, const SchemafulDBGFunc & func, const ASTs & args); private: - std::unordered_map funcs; + std::unordered_map schemaless_funcs; + std::unordered_map schemaful_funcs; }; } // namespace DB diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 70c25cec551..e32fead22d2 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -174,7 +174,7 @@ TableID MockTiDB::newTable(const String & database_name, const String & table_na table_info.id = table_id_allocator++; table_info.name = table_name; - int i = 0; + int i = 1; for (auto & column : columns.getAllPhysical()) { table_info.columns.emplace_back(getColumnInfoFromColumn(column, i++)); diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp new file mode 100644 index 00000000000..1277b3fe5e4 --- /dev/null +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} // namespace ErrorCodes + +using DAGField = std::pair; +using DAGSchema = std::vector; +using SchemaFetcher = std::function; +std::tuple compileQuery( + Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts); +tipb::SelectResponse executeDAGRequest( + Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version); +BlockInputStreamPtr outputDAGResponse(Context & context, const DAGSchema & schema, const tipb::SelectResponse & dag_response); + +BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) +{ + if (args.size() < 1 || args.size() > 2) + throw Exception("Args not matched, should be: query[, region-id]", ErrorCodes::BAD_ARGUMENTS); + + String query = safeGet(typeid_cast(*args[0]).value); + RegionID region_id = InvalidRegionID; + if (args.size() == 2) + region_id = safeGet(typeid_cast(*args[1]).value); + Timestamp start_ts = context.getTMTContext().getPDClient()->getTS(); + + auto [table_id, schema, dag_request] = compileQuery(context, query, + [&](const String & database_name, const String & table_name) { + auto storage = context.getTable(database_name, table_name); + auto mmt = std::dynamic_pointer_cast(storage); + if (!mmt || mmt->getData().merging_params.mode != MergeTreeData::MergingParams::Txn) + throw Exception("Not TMT", ErrorCodes::BAD_ARGUMENTS); + return mmt->getTableInfo(); + }, + start_ts); + + RegionPtr region; + if (region_id == InvalidRegionID) + { + auto regions = context.getTMTContext().getRegionTable().getRegionsByTable(table_id); + if (regions.empty()) + throw Exception("No region for table", ErrorCodes::BAD_ARGUMENTS); + region = context.getTMTContext().getRegionTable().getRegionsByTable(table_id).front().second; + } + else + { + region = context.getTMTContext().getRegionTable().getRegionByTableAndID(table_id, region_id); + if (!region) + throw Exception("No such region", ErrorCodes::BAD_ARGUMENTS); + } + tipb::SelectResponse dag_response = executeDAGRequest(context, dag_request, region_id, region->version(), region->confVer()); + + return outputDAGResponse(context, schema, dag_response); +} + +BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args) +{ + if (args.size() < 2 || args.size() > 3) + throw Exception("Args not matched, should be: query, region-id[, start-ts]", ErrorCodes::BAD_ARGUMENTS); + + String query = safeGet(typeid_cast(*args[0]).value); + RegionID region_id = safeGet(typeid_cast(*args[1]).value); + Timestamp start_ts = DEFAULT_MAX_READ_TSO; + if (args.size() == 3) + start_ts = safeGet(typeid_cast(*args[2]).value); + if (start_ts == 0) + start_ts = context.getTMTContext().getPDClient()->getTS(); + + auto [table_id, schema, dag_request] = compileQuery(context, query, + [&](const String & database_name, const String & table_name) { + return MockTiDB::instance().getTableByName(database_name, table_name)->table_info; + }, + start_ts); + std::ignore = table_id; + + RegionPtr region = context.getTMTContext().getKVStore()->getRegion(region_id); + tipb::SelectResponse dag_response = executeDAGRequest(context, dag_request, region_id, region->version(), region->confVer()); + + return outputDAGResponse(context, schema, dag_response); +} + +std::tuple compileQuery( + Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts) +{ + DAGSchema schema; + tipb::DAGRequest dag_request; + + dag_request.set_start_ts(start_ts); + + ParserSelectQuery parser; + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "from DAG compiler", 0); + ASTSelectQuery & ast_query = typeid_cast(*ast); + + String database_name, table_name; + auto query_database = ast_query.database(); + auto query_table = ast_query.table(); + if (query_database) + database_name = typeid_cast(*query_database).name; + if (query_table) + table_name = typeid_cast(*query_table).name; + if (!query_table) + { + database_name = "system"; + table_name = "one"; + } + else if (!query_database) + { + database_name = context.getCurrentDatabase(); + } + auto table_info = schema_fetcher(database_name, table_name); + + tipb::Executor * executor = dag_request.add_executors(); + executor->set_tp(tipb::ExecType::TypeTableScan); + tipb::TableScan * ts = executor->mutable_tbl_scan(); + ts->set_table_id(table_info.id); + size_t i = 0; + for (const auto & column_info : table_info.columns) + { + tipb::ColumnInfo * ci = ts->add_columns(); + ci->set_column_id(column_info.id); + ci->set_tp(column_info.tp); + ci->set_flag(column_info.flag); + + tipb::FieldType field_type; + field_type.set_tp(column_info.tp); + field_type.set_flag(column_info.flag); + field_type.set_flen(column_info.flen); + field_type.set_decimal(column_info.decimal); + schema.emplace_back(std::make_pair(column_info.name, std::move(field_type))); + + dag_request.add_output_offsets(i); + + i++; + } + + // TODO: Other operator compile. + + return std::make_tuple(table_info.id, std::move(schema), std::move(dag_request)); +} + +tipb::SelectResponse executeDAGRequest( + Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version) +{ + tipb::SelectResponse dag_response; + DAGDriver driver(context, dag_request, region_id, region_version, region_conf_version, dag_response, true); + driver.execute(); + return dag_response; +} + +BlockInputStreamPtr outputDAGResponse(Context &, const DAGSchema & schema, const tipb::SelectResponse & dag_response) +{ + BlocksList blocks; + for (const auto & chunk : dag_response.chunks()) + { + std::vector> rows; + std::vector curr_row; + const std::string & data = chunk.rows_data(); + size_t cursor = 0; + while (cursor < data.size()) + { + curr_row.push_back(DB::DecodeDatum(cursor, data)); + if (curr_row.size() == schema.size()) + { + rows.emplace_back(std::move(curr_row)); + curr_row.clear(); + } + } + + ColumnsWithTypeAndName columns; + for (auto & field : schema) + { + const auto & name = field.first; + auto data_type = getDataTypeByFieldType(field.second); + ColumnWithTypeAndName col(data_type, name); + col.column->assumeMutable()->reserve(rows.size()); + columns.emplace_back(std::move(col)); + } + for (const auto & row : rows) + { + for (size_t i = 0; i < row.size(); i++) + { + columns[i].column->assumeMutable()->insert(row[i]); + } + } + + blocks.emplace_back(Block(columns)); + } + + return std::make_shared(std::move(blocks)); +} + +} // namespace DB diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h new file mode 100644 index 00000000000..eb8cc989fd5 --- /dev/null +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class Context; + +// Coprocessor debug tools + +// Run a DAG request using given query that will be compiled to DAG request, with the given (optional) region ID. +// Usage: +// ./storages-client.sh "DBGInvoke dag(query[, region_id])" +BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args); + +// Mock a DAG request using given query that will be compiled (with the metadata from MockTiDB) to DAG request, with the given region ID and (optional) start ts. +// Usage: +// ./storages-client.sh "DBGInvoke mock_dag(query, region_id[, start_ts])" +BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args); + +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index 9221dc38bef..30e492f360f 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -8,10 +8,12 @@ namespace DB class Context; +/// A context used to track the information that needs to be passed around during DAG planning. class DAGContext { public: DAGContext(size_t profile_list_size) { profile_streams_list.resize(profile_list_size); }; std::vector profile_streams_list; }; + } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index a6c391e89e5..ed051b22119 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -18,13 +18,14 @@ extern const int LOGICAL_ERROR; } DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, - UInt64 region_conf_version_, tipb::SelectResponse & dag_response_) + UInt64 region_conf_version_, tipb::SelectResponse & dag_response_, bool internal_) : context(context_), dag_request(dag_request_), region_id(region_id_), region_version(region_version_), region_conf_version(region_conf_version_), - dag_response(dag_response_) + dag_response(dag_response_), + internal(internal_) {} void DAGDriver::execute() @@ -41,11 +42,11 @@ void DAGDriver::execute() DAGStringConverter converter(context, dag_request); String query = converter.buildSqlString(); if (!query.empty()) - streams = executeQuery(query, context, false, QueryProcessingStage::Complete); + streams = executeQuery(query, context, internal, QueryProcessingStage::Complete); } else if (planner == "optree") { - streams = executeQuery(dag, context, QueryProcessingStage::Complete); + streams = executeQuery(dag, context, internal, QueryProcessingStage::Complete); } else { diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.h b/dbms/src/Flash/Coprocessor/DAGDriver.h index b0143591bd5..a9eda48b025 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.h +++ b/dbms/src/Flash/Coprocessor/DAGDriver.h @@ -15,7 +15,7 @@ class DAGDriver { public: DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, - UInt64 region_conf_version_, tipb::SelectResponse & dag_response_); + UInt64 region_conf_version_, tipb::SelectResponse & dag_response_, bool internal_ = false); void execute(); @@ -29,5 +29,7 @@ class DAGDriver UInt64 region_conf_version; tipb::SelectResponse & dag_response; + + bool internal; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 0e3779363a0..e407d5c0d6b 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -87,10 +87,7 @@ void DAGExpressionAnalyzer::appendAggregation( aggregated_columns.emplace_back(func_string, result_type); } - for (auto name : agg_argument_names) - { - step.required_output.push_back(std::move(name)); - } + std::move(agg_argument_names.begin(), agg_argument_names.end(), std::back_inserter(step.required_output)); for (const tipb::Expr & expr : agg.group_by()) { diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 715d829a926..e8eea5a1491 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -176,19 +176,19 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() { AnalysisResult res; ExpressionActionsChain chain; - res.need_aggregate = dag.hasAggregation(); DAGExpressionAnalyzer analyzer(source_columns, context); if (dag.hasSelection()) { analyzer.appendWhere(chain, dag.getSelection(), res.filter_column_name); res.has_where = true; res.before_where = chain.getLastActions(); - res.filter_column_name = chain.steps.back().required_output[0]; chain.addStep(); } - if (res.need_aggregate) + // There will be either Agg... + if (dag.hasAggregation()) { analyzer.appendAggregation(chain, dag.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); + res.need_aggregate = true; res.before_aggregation = chain.getLastActions(); chain.finalize(); @@ -202,17 +202,22 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() final_project.emplace_back(element.name, ""); } } + // Or TopN, not both. if (dag.hasTopN()) { res.has_order_by = true; analyzer.appendOrderBy(chain, dag.getTopN(), res.order_column_names); } - // append final project results - for (auto & name : final_project) + // Append final project results if needed. + // TODO: Refine this logic by an `analyzer.appendFinalProject()`-like call. + if (dag.hasSelection() || dag.hasAggregation() || dag.hasTopN()) { - chain.steps.back().required_output.push_back(name.first); + for (auto & name : final_project) + { + chain.steps.back().required_output.push_back(name.first); + } + res.before_order_and_select = chain.getLastActions(); } - res.before_order_and_select = chain.getLastActions(); chain.finalize(); chain.clear(); //todo need call prependProjectInput?? @@ -453,7 +458,10 @@ void InterpreterDAG::executeImpl(Pipeline & pipeline) executeAggregation(pipeline, res.before_aggregation, res.aggregation_keys, res.aggregate_descriptions); recordProfileStreams(pipeline, dag.getAggregationIndex()); } - executeExpression(pipeline, res.before_order_and_select); + if (res.before_order_and_select) + { + executeExpression(pipeline, res.before_order_and_select); + } if (res.has_order_by) { diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 698da73c9b6..6ebe2f72b50 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -392,10 +392,10 @@ BlockIO executeQuery( } -BlockIO executeQuery(DAGQuerySource & dag, Context & context, QueryProcessingStage::Enum stage) +BlockIO executeQuery(DAGQuerySource & dag, Context & context, bool internal, QueryProcessingStage::Enum stage) { BlockIO streams; - std::tie(std::ignore, streams) = executeQueryImpl(dag, context, false, stage); + std::tie(std::ignore, streams) = executeQueryImpl(dag, context, internal, stage); return streams; } diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index e8f4bbd266b..4f4ef136ed2 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -41,6 +41,6 @@ BlockIO executeQuery( ); -BlockIO executeQuery(DAGQuerySource & dag, Context & context, QueryProcessingStage::Enum stage); +BlockIO executeQuery(DAGQuerySource & dag, Context & context, bool internal, QueryProcessingStage::Enum stage); } diff --git a/tests/mutable-test/txn_dag/table_scan.test b/tests/mutable-test/txn_dag/table_scan.test new file mode 100644 index 00000000000..28d6599f6de --- /dev/null +++ b/tests/mutable-test/txn_dag/table_scan.test @@ -0,0 +1,35 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1') + +# DAG read by not specifying region id. +=> DBGInvoke dag('select * from default.test') " --dag_planner="optree +┌─col_1─┐ +│ test1 │ +└───────┘ + +# DAG read by explicitly specifying region id. +=> DBGInvoke dag('select * from default.test', 4) " --dag_planner="optree +┌─col_1─┐ +│ test1 │ +└───────┘ + +# Mock DAG read. +=> DBGInvoke mock_dag('select * from default.test', 4) " --dag_planner="optree +┌─col_1─┐ +│ test1 │ +└───────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test From 57cd3824dc89beef1c33f7831df1f044f37637e2 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Fri, 9 Aug 2019 16:34:54 +0800 Subject: [PATCH 32/79] Fix DAG get and lock storage --- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index e8eea5a1491..be5ad0df950 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -296,8 +296,11 @@ void InterpreterDAG::executeExpression(Pipeline & pipeline, const ExpressionActi } } -void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 schema_version) +void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 query_schema_version) { + /// Get current schema version in schema syncer for a chance to shortcut. + auto global_schema_version = context.getTMTContext().getSchemaSyncer()->getCurrentVersion(); + /// Lambda for get storage, then align schema version under the read lock. auto get_and_lock_storage = [&](bool schema_synced) -> std::tuple { /// Get storage in case it's dropped then re-created. @@ -321,12 +324,17 @@ void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 /// Check schema version. auto storage_schema_version = storage_->getTableInfo().schema_version; - if (storage_schema_version > schema_version) + // Not allow storage schema version greater than query schema version in any case. + if (storage_schema_version > query_schema_version) throw Exception("Table " + std::to_string(table_id) + " schema version " + std::to_string(storage_schema_version) - + " newer than query schema version " + std::to_string(schema_version), + + " newer than query schema version " + std::to_string(query_schema_version), ErrorCodes::SCHEMA_VERSION_ERROR); - if ((schema_synced && storage_schema_version <= schema_version) || (!schema_synced && storage_schema_version == schema_version)) + // If schema synced, we must be very recent so we are good as long as storage schema version is no greater than query schema version. + // If schema not synced, we are good if storage schema version is right on query schema version. + // Otherwise we are at the risk of out-of-date schema, but we still have a chance to be sure that we are good, if global schema version is greater than query schema version. + if ((schema_synced && storage_schema_version <= query_schema_version) + || (!schema_synced && (storage_schema_version == query_schema_version || global_schema_version > query_schema_version))) return std::make_tuple(storage_, lock, storage_schema_version, true); return std::make_tuple(nullptr, nullptr, storage_schema_version, false); @@ -336,14 +344,17 @@ void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 TMTStoragePtr storage_; TableStructureReadLockPtr lock; Int64 storage_schema_version; + auto log_schema_version = [&](const String & result) { + LOG_DEBUG(log, + __PRETTY_FUNCTION__ << " Table " << table_id << " schema " << result << " Schema version [storage, global, query]: " + << "[" << storage_schema_version << ", " << global_schema_version << ", " << query_schema_version << "]."); + }; bool ok; { std::tie(storage_, lock, storage_schema_version, ok) = get_and_lock_storage(false); if (ok) { - LOG_DEBUG(log, - __PRETTY_FUNCTION__ << " Table " << table_id << " schema version: " << storage_schema_version - << ", query schema version: " << schema_version << ", OK, no syncing required."); + log_schema_version("OK, no syncing required."); storage = storage_; table_lock = lock; return; @@ -352,9 +363,7 @@ void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 /// If first try failed, sync schema and try again. { - LOG_DEBUG(log, - __PRETTY_FUNCTION__ << " Table " << table_id << " schema version: " << storage_schema_version - << ", query schema version: " << schema_version << ", not OK, syncing schemas."); + log_schema_version("not OK, syncing schemas."); auto start_time = Clock::now(); context.getTMTContext().getSchemaSyncer()->syncSchemas(context); auto schema_sync_cost = std::chrono::duration_cast(Clock::now() - start_time).count(); @@ -363,9 +372,7 @@ void InterpreterDAG::getAndLockStorageWithSchemaVersion(TableID table_id, Int64 std::tie(storage_, lock, storage_schema_version, ok) = get_and_lock_storage(true); if (ok) { - LOG_DEBUG(log, - __PRETTY_FUNCTION__ << " Table " << table_id << " schema version: " << storage_schema_version - << ", query schema version: " << schema_version << ", OK after syncing."); + log_schema_version("OK after syncing."); storage = storage_; table_lock = lock; return; From 4a76e912af09e1e48e3bc1ddc0a21991771dde6f Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 12 Aug 2019 15:04:42 +0800 Subject: [PATCH 33/79] handle error in cop request (#171) * fix cop test regression * address comments * format code * fix npe for dag execute * format code * address comment * add some comments * throw exception when meet error duing cop request handling * address comments * add error code * throw exception when meet error duing cop request handling * address comments * add DAGContext so InterpreterDAG can exchange information with DAGDriver * fix bug * 1. refine code, 2. address comments * update comments * columnref index is based on executor output schema * handle error in coprocessor request * refine code * use Clear to clear a protobuf message completely * refine code --- .../Flash/Coprocessor/CoprocessorHandler.cpp | 61 ++++++++++++++++++- .../Flash/Coprocessor/CoprocessorHandler.h | 2 +- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 37 ++++++++++- dbms/src/Flash/Coprocessor/DAGDriver.h | 2 + dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 3 +- dbms/src/Flash/FlashService.cpp | 47 ++------------ .../MergeTree/MergeTreeDataSelectExecutor.cpp | 18 +++--- .../Storages/Transaction/RegionException.h | 6 +- 8 files changed, 117 insertions(+), 59 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp index aed26c39fb2..a92f98b2945 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp +++ b/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -22,7 +24,8 @@ CoprocessorHandler::CoprocessorHandler( : cop_context(cop_context_), cop_request(cop_request_), cop_response(cop_response_), log(&Logger::get("CoprocessorHandler")) {} -void CoprocessorHandler::execute() +grpc::Status CoprocessorHandler::execute() +try { switch (cop_request->tp()) { @@ -45,6 +48,62 @@ void CoprocessorHandler::execute() throw Exception( "Coprocessor request type " + std::to_string(cop_request->tp()) + " is not implemented", ErrorCodes::NOT_IMPLEMENTED); } + return ::grpc::Status(::grpc::StatusCode::OK, ""); +} +catch (const LockException & e) +{ + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": LockException: " << e.displayText()); + cop_response->Clear(); + kvrpcpb::LockInfo * lock_info = cop_response->mutable_locked(); + lock_info->set_key(e.lock_infos[0]->key); + lock_info->set_primary_lock(e.lock_infos[0]->primary_lock); + lock_info->set_lock_ttl(e.lock_infos[0]->lock_ttl); + lock_info->set_lock_version(e.lock_infos[0]->lock_version); + // return ok so TiDB has the chance to see the LockException + return ::grpc::Status(::grpc::StatusCode::OK, ""); +} +catch (const RegionException & e) +{ + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": RegionException: " << e.displayText()); + cop_response->Clear(); + errorpb::Error * region_err; + switch (e.status) + { + case RegionTable::RegionReadStatus::NOT_FOUND: + case RegionTable::RegionReadStatus::PENDING_REMOVE: + region_err = cop_response->mutable_region_error(); + region_err->mutable_region_not_found()->set_region_id(cop_request->context().region_id()); + break; + case RegionTable::RegionReadStatus::VERSION_ERROR: + region_err = cop_response->mutable_region_error(); + region_err->mutable_epoch_not_match(); + break; + default: + // should not happen + break; + } + // return ok so TiDB has the chance to see the LockException + return ::grpc::Status(::grpc::StatusCode::OK, ""); +} +catch (const Exception & e) +{ + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.displayText()); + cop_response->Clear(); + cop_response->set_other_error(e.message()); + + if (e.code() == ErrorCodes::NOT_IMPLEMENTED) + return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, e.message()); + + // TODO: Map other DB error codes to grpc codes. + + return ::grpc::Status(::grpc::StatusCode::INTERNAL, e.message()); +} +catch (const std::exception & e) +{ + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.what()); + cop_response->Clear(); + cop_response->set_other_error(e.what()); + return ::grpc::Status(::grpc::StatusCode::INTERNAL, e.what()); } } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.h b/dbms/src/Flash/Coprocessor/CoprocessorHandler.h index 2aafa8a71ff..517875e9ace 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorHandler.h +++ b/dbms/src/Flash/Coprocessor/CoprocessorHandler.h @@ -36,7 +36,7 @@ class CoprocessorHandler ~CoprocessorHandler() = default; - void execute(); + grpc::Status execute(); protected: enum diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index ed051b22119..6e25308f5ba 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -9,13 +9,16 @@ #include #include #include +#include +#include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; -} +extern const int UNKNOWN_EXCEPTION; +} // namespace ErrorCodes DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, tipb::SelectResponse & dag_response_, bool internal_) @@ -29,6 +32,7 @@ DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, {} void DAGDriver::execute() +try { context.setSetting("read_tso", UInt64(dag_request.start_ts())); @@ -57,8 +61,11 @@ void DAGDriver::execute() // Only query is allowed, so streams.in must not be null and streams.out must be null throw Exception("DAG is not query.", ErrorCodes::LOGICAL_ERROR); - BlockOutputStreamPtr outputStreamPtr = std::make_shared(dag_response, context.getSettings().dag_records_per_chunk, - dag_request.encode_type(), dag.getResultFieldTypes(), streams.in->getHeader()); + BlockOutputStreamPtr outputStreamPtr = std::make_shared(dag_response, + context.getSettings().dag_records_per_chunk, + dag_request.encode_type(), + dag.getResultFieldTypes(), + streams.in->getHeader()); copyData(*streams.in, *outputStreamPtr); // add ExecutorExecutionSummary info for (auto & p_streams : dag_context.profile_streams_list) @@ -81,5 +88,29 @@ void DAGDriver::execute() executeSummary->set_num_iterations(num_iterations); } } +catch (const RegionException & e) +{ + e.rethrow(); +} +catch (const LockException & e) +{ + e.rethrow(); +} +catch (const Exception & e) +{ + recordError(e.code(), e.message()); +} +catch (const std::exception & e) +{ + recordError(ErrorCodes::UNKNOWN_EXCEPTION, e.what()); +} + +void DAGDriver::recordError(Int32 err_code, const String & err_msg) +{ + dag_response.Clear(); + tipb::Error * error = dag_response.mutable_error(); + error->set_code(err_code); + error->set_msg(err_msg); +} } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.h b/dbms/src/Flash/Coprocessor/DAGDriver.h index a9eda48b025..4c84cfcb020 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.h +++ b/dbms/src/Flash/Coprocessor/DAGDriver.h @@ -31,5 +31,7 @@ class DAGDriver tipb::SelectResponse & dag_response; bool internal; + + void recordError(Int32 err_code, const String & err_msg); }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index be5ad0df950..beb8c0a3bd0 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -129,10 +129,9 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) auto current_region = context.getTMTContext().getRegionTable().getRegionByTableAndID(table_id, info.region_id); if (!current_region) { - //todo add more region error info in RegionException std::vector region_ids; region_ids.push_back(info.region_id); - throw RegionException(region_ids); + throw RegionException(std::move(region_ids), RegionTable::RegionReadStatus::NOT_FOUND); } info.range_in_table = current_region->getHandleRangeByTable(table_id); query_info.mvcc_query_info->regions_query_info.push_back(info); diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index 1abb6189fa9..2b8941f0472 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -2,8 +2,6 @@ #include #include -#include -#include #include #include @@ -51,48 +49,13 @@ grpc::Status FlashService::Coprocessor( return status; } - try - { - CoprocessorContext cop_context(context, request->context(), *grpc_context); - CoprocessorHandler cop_handler(cop_context, request, response); + CoprocessorContext cop_context(context, request->context(), *grpc_context); + CoprocessorHandler cop_handler(cop_context, request, response); - cop_handler.execute(); + auto ret = cop_handler.execute(); - LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle coprocessor request done"); - return ::grpc::Status(::grpc::StatusCode::OK, ""); - } - catch (const LockException & e) - { - // TODO: handle lock error properly. - LOG_ERROR(log, __PRETTY_FUNCTION__ << ": LockException: " << e.displayText()); - response->set_data(""); - return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, e.message()); - } - catch (const RegionException & e) - { - // TODO: handle region error properly. - LOG_ERROR(log, __PRETTY_FUNCTION__ << ": RegionException: " << e.displayText()); - response->set_data(""); - return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, e.message()); - } - catch (const Exception & e) - { - LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.displayText()); - response->set_data(""); - - if (e.code() == ErrorCodes::NOT_IMPLEMENTED) - return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, e.message()); - - // TODO: Map other DB error codes to grpc codes. - - return ::grpc::Status(::grpc::StatusCode::INTERNAL, e.message()); - } - catch (const std::exception & e) - { - LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.what()); - response->set_data(""); - return ::grpc::Status(::grpc::StatusCode::INTERNAL, e.what()); - } + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle coprocessor request done"); + return ret; } String getClientMetaVarWithDefault(grpc::ServerContext * grpc_context, const String & name, const String & default_val) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index b9d6773207a..8b1e15a95f3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -222,12 +222,12 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(const Names & column_names_t // the index of column is constant after MergeTreeBlockInputStream is constructed. exception will be thrown if not found. const size_t handle_column_index = 0, version_column_index = 1, delmark_column_index = 2; - const auto func_throw_retry_region = [&]() { + const auto func_throw_retry_region = [&](RegionTable::RegionReadStatus status) { std::vector region_ids; region_ids.reserve(regions_executor_data.size()); for (const auto & query_info : regions_executor_data) region_ids.push_back(query_info.info.region_id); - throw RegionException(region_ids); + throw RegionException(std::move(region_ids), status); }; /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. @@ -314,7 +314,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(const Names & column_names_t if (region == nullptr) { LOG_WARNING(log, "[region " << query_info.info.region_id << "] is not found in KVStore, try again"); - func_throw_retry_region(); + func_throw_retry_region(RegionTable::RegionReadStatus::NOT_FOUND); } kvstore_region.emplace(query_info.info.region_id, std::move(region)); } @@ -331,13 +331,13 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(const Names & column_names_t auto start_time = Clock::now(); const size_t mem_region_num = regions_executor_data.size(); const size_t batch_size = mem_region_num / concurrent_num; - std::atomic_bool need_retry = false; + std::atomic_uint8_t region_status = RegionTable::RegionReadStatus::OK; const auto func_run_learner_read = [&](const size_t region_begin) { const size_t region_end = std::min(region_begin + batch_size, mem_region_num); for (size_t region_index = region_begin; region_index < region_end; ++region_index) { - if (need_retry) + if (region_status != RegionTable::RegionReadStatus::OK) return; RegionQueryInfo & region_query_info = regions_executor_data[region_index].info; @@ -359,7 +359,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(const Names & column_names_t << ", handle range [" << region_query_info.range_in_table.first.toString() << ", " << region_query_info.range_in_table.second.toString() << ") , status " << RegionTable::RegionReadStatusString(status)); - need_retry = true; + region_status = status; } else if (block) regions_executor_data[region_index].block = std::move(block); @@ -379,8 +379,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(const Names & column_names_t func_run_learner_read(0); } - if (need_retry) - func_throw_retry_region(); + if (region_status != RegionTable::RegionReadStatus::OK) + func_throw_retry_region(static_cast(region_status.load())); auto end_time = Clock::now(); LOG_DEBUG(log, @@ -862,7 +862,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(const Names & column_names_t << region_query_info.range_in_table.second.toString() << ") , status " << RegionTable::RegionReadStatusString(status)); // throw exception and exit. - func_throw_retry_region(); + func_throw_retry_region(status); } } } diff --git a/dbms/src/Storages/Transaction/RegionException.h b/dbms/src/Storages/Transaction/RegionException.h index 9e661d9eeb8..dfa20262dd4 100644 --- a/dbms/src/Storages/Transaction/RegionException.h +++ b/dbms/src/Storages/Transaction/RegionException.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB @@ -9,9 +10,12 @@ namespace DB class RegionException : public Exception { public: - explicit RegionException(std::vector region_ids_) : region_ids(region_ids_) {} + RegionException(std::vector && region_ids_, RegionTable::RegionReadStatus status_) + : Exception(RegionTable::RegionReadStatusString(status_)), region_ids(region_ids_), status(status_) + {} std::vector region_ids; + RegionTable::RegionReadStatus status; }; } // namespace DB From 2d093a82d4c1e8c2f57cd59880a0afc32eca27ab Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 12 Aug 2019 16:04:19 +0800 Subject: [PATCH 34/79] code refine && several minor bug fix (#174) * fix cop test regression * address comments * format code * fix npe for dag execute * format code * address comment * add some comments * throw exception when meet error duing cop request handling * address comments * add error code * throw exception when meet error duing cop request handling * address comments * add DAGContext so InterpreterDAG can exchange information with DAGDriver * fix bug * 1. refine code, 2. address comments * update comments * columnref index is based on executor output schema * handle error in coprocessor request * refine code * use Clear to clear a protobuf message completely * refine code * code refine && several minor bug fix * address comments * address comments --- .../Coprocessor/DAGBlockOutputStream.cpp | 2 + dbms/src/Flash/Coprocessor/DAGDriver.cpp | 2 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 13 +++- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 1 + dbms/src/Flash/Coprocessor/DAGQuerySource.cpp | 4 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 11 +-- dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 75 ++++++++++++------- dbms/src/Flash/FlashService.cpp | 4 +- dbms/src/Interpreters/Settings.h | 1 + dbms/src/Storages/Transaction/Codec.h | 9 +++ 10 files changed, 82 insertions(+), 40 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp index 74cf847118f..0ef25b08700 100644 --- a/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp +++ b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp @@ -42,6 +42,7 @@ void DAGBlockOutputStream::writeSuffix() if (current_chunk != nullptr && records_per_chunk > 0) { current_chunk->set_rows_data(current_ss.str()); + dag_response.add_output_counts(records_per_chunk); } } @@ -62,6 +63,7 @@ void DAGBlockOutputStream::write(const Block & block) { // set the current ss to current chunk current_chunk->set_rows_data(current_ss.str()); + dag_response.add_output_counts(current_records_num); } current_chunk = dag_response.add_chunks(); current_ss.str(""); diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 6e25308f5ba..62e6f861db0 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -78,7 +78,7 @@ try { if (auto * p_stream = dynamic_cast(streamPtr.get())) { - time_processed_ns += p_stream->getProfileInfo().total_stopwatch.elapsed(); + time_processed_ns = std::max(time_processed_ns, p_stream->getProfileInfo().total_stopwatch.elapsed()); num_produced_rows += p_stream->getProfileInfo().rows; num_iterations += p_stream->getProfileInfo().blocks; } diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index e407d5c0d6b..5b8b5fa9165 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -143,6 +143,15 @@ void DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const const NamesAndTypesList & DAGExpressionAnalyzer::getCurrentInputColumns() { return after_agg ? aggregated_columns : source_columns; } +void DAGExpressionAnalyzer::appendFinalProject(ExpressionActionsChain & chain, const NamesWithAliases & final_project) +{ + initChain(chain, getCurrentInputColumns()); + for (auto name : final_project) + { + chain.steps.back().required_output.push_back(name.first); + } +} + void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & aggregation) { initChain(chain, getCurrentInputColumns()); @@ -199,11 +208,11 @@ void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, cons String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String & expr_name) { - if (!expr.has_field_type()) + if (!expr.has_field_type() && context.getSettingsRef().dag_expr_field_type_strict_check) { throw Exception("Expression without field type", ErrorCodes::COP_BAD_DAG_REQUEST); } - if (isFunctionExpr(expr)) + if (expr.has_field_type() && isFunctionExpr(expr)) { DataTypePtr expected_type = getDataTypeByFieldType(expr.field_type()); DataTypePtr actual_type = actions->getSampleBlock().getByName(expr_name).type; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index b05b974d37a..cdc3acbac5b 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -44,6 +44,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable chain.steps.emplace_back(std::make_shared(columns, settings)); } } + void appendFinalProject(ExpressionActionsChain & chain, const NamesWithAliases & final_project); String getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions); const NamesAndTypesList & getCurrentInputColumns(); }; diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index d59e93401ea..3b2d3f4b8c3 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -90,7 +90,7 @@ bool fillExecutorOutputFieldTypes(const tipb::Executor & executor, std::vector DAGQuerySource::getResultFieldTypes() const throw Exception("Do not found result field type for current dag request", ErrorCodes::COP_BAD_DAG_REQUEST); } // tispark assumes that if there is a agg, the output offset is - // ignored and the request out put is the same as the agg's output. + // ignored and the request output is the same as the agg's output. // todo should always use output offset to re-construct the output field types if (hasAggregation()) { diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index beb8c0a3bd0..be2f8700a04 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -208,15 +208,8 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() analyzer.appendOrderBy(chain, dag.getTopN(), res.order_column_names); } // Append final project results if needed. - // TODO: Refine this logic by an `analyzer.appendFinalProject()`-like call. - if (dag.hasSelection() || dag.hasAggregation() || dag.hasTopN()) - { - for (auto & name : final_project) - { - chain.steps.back().required_output.push_back(name.first); - } - res.before_order_and_select = chain.getLastActions(); - } + analyzer.appendFinalProject(chain, final_project); + res.before_order_and_select = chain.getLastActions(); chain.finalize(); chain.clear(); //todo need call prependProjectInput?? diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp index b82525eb901..e18c3c4dd74 100644 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp @@ -98,16 +98,11 @@ class FlashClient }; using ClientPtr = std::shared_ptr; -grpc::Status rpcTest() + +void appendTS(tipb::DAGRequest & dag_request, size_t & result_field_num) { - ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); - ClientPtr clientPtr = std::make_shared(cp); - size_t result_field_num = 0; - // construct a dag request - tipb::DAGRequest dagRequest; - dagRequest.set_start_ts(18446744073709551615uL); // table scan: s,i - tipb::Executor * executor = dagRequest.add_executors(); + tipb::Executor * executor = dag_request.add_executors(); executor->set_tp(tipb::ExecType::TypeTableScan); tipb::TableScan * ts = executor->mutable_tbl_scan(); ts->set_table_id(44); @@ -119,13 +114,16 @@ grpc::Status rpcTest() ci->set_column_id(2); ci->set_tp(8); ci->set_flag(0); - dagRequest.add_output_offsets(1); - dagRequest.add_output_offsets(0); - dagRequest.add_output_offsets(1); + dag_request.add_output_offsets(1); + dag_request.add_output_offsets(0); + dag_request.add_output_offsets(1); result_field_num = 3; +} +void appendSelection(tipb::DAGRequest & dag_request) +{ // selection: less(i, 123) - executor = dagRequest.add_executors(); + auto * executor = dag_request.add_executors(); executor->set_tp(tipb::ExecType::TypeSelection); tipb::Selection * selection = executor->mutable_selection(); tipb::Expr * expr = selection->add_conditions(); @@ -150,16 +148,19 @@ grpc::Status rpcTest() type = expr->mutable_field_type(); type->set_tp(1); type->set_flag(1 << 5); +} +void appendAgg(tipb::DAGRequest & dag_request, size_t & result_field_num) +{ // agg: count(s) group by i; - executor = dagRequest.add_executors(); + auto * executor = dag_request.add_executors(); executor->set_tp(tipb::ExecType::TypeAggregation); auto agg = executor->mutable_aggregation(); auto agg_func = agg->add_agg_func(); agg_func->set_tp(tipb::ExprType::Count); auto child = agg_func->add_children(); child->set_tp(tipb::ExprType::ColumnRef); - ss.str(""); + std::stringstream ss; DB::EncodeNumber(0, ss); child->set_val(ss.str()); auto f_type = agg_func->mutable_field_type(); @@ -174,10 +175,11 @@ grpc::Status rpcTest() f_type->set_tp(8); f_type->set_flag(1); result_field_num = 2; +} - // topn - /* - executor = dagRequest.add_executors(); +void appendTopN(tipb::DAGRequest & dag_request) +{ + auto * executor = dag_request.add_executors(); executor->set_tp(tipb::ExecType::TypeTopN); tipb::TopN * topN = executor->mutable_topn(); topN->set_limit(3); @@ -185,21 +187,44 @@ grpc::Status rpcTest() byItem->set_desc(false); tipb::Expr * expr1 = byItem->mutable_expr(); expr1->set_tp(tipb::ExprType::ColumnRef); - ss.str(""); + std::stringstream ss; DB::EncodeNumber(1, ss); expr1->set_val(ss.str()); - type = expr1->mutable_field_type(); + auto * type = expr1->mutable_field_type(); type->set_tp(8); type->set_tp(0); - */ - // limit - /* - executor = dagRequest.add_executors(); +} + +void appendLimit(tipb::DAGRequest & dag_request) +{ + auto * executor = dag_request.add_executors(); executor->set_tp(tipb::ExecType::TypeLimit); - tipb::Limit *limit = executor->mutable_limit(); + tipb::Limit * limit = executor->mutable_limit(); limit->set_limit(5); - */ +} + +grpc::Status rpcTest() +{ + ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); + ClientPtr clientPtr = std::make_shared(cp); + size_t result_field_num = 0; + bool has_selection = false; + bool has_agg = true; + bool has_topN = false; + bool has_limit = false; + // construct a dag request + tipb::DAGRequest dagRequest; + dagRequest.set_start_ts(18446744073709551615uL); + appendTS(dagRequest, result_field_num); + if (has_selection) + appendSelection(dagRequest); + if (has_agg) + appendAgg(dagRequest, result_field_num); + if (has_topN) + appendTopN(dagRequest); + if (has_limit) + appendLimit(dagRequest); // construct a coprocessor request coprocessor::Request request; diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index 2b8941f0472..0489b6b8777 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -96,8 +96,10 @@ std::tuple FlashService::createDBContext(grpc::ServerCo { context.setSetting("dag_records_per_chunk", dag_records_per_chunk_str); } - std::string planner = getClientMetaVarWithDefault(grpc_context, "dag_planner", "sql"); + std::string planner = getClientMetaVarWithDefault(grpc_context, "dag_planner", "optree"); context.setSetting("dag_planner", planner); + std::string expr_field_type_check = getClientMetaVarWithDefault(grpc_context, "dag_expr_field_type_strict_check", "1"); + context.setSetting("dag_expr_field_type_strict_check", expr_field_type_check); return std::make_tuple(context, ::grpc::Status::OK); } diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 1efd1dfcad3..86b42cf9ce2 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -31,6 +31,7 @@ struct Settings M(SettingUInt64, read_tso, DEFAULT_MAX_READ_TSO, "tmt read tso.") \ M(SettingInt64, dag_records_per_chunk, DEFAULT_DAG_RECORDS_PER_CHUNK, "default chunk size of a DAG response.") \ M(SettingString, dag_planner, "sql", "planner for DAG query, sql builds the SQL string, optree builds the internal operator(stream) tree.") \ + M(SettingBool, dag_expr_field_type_strict_check, true, "when set to true, every expr in the dag request must provide field type, otherwise only the result expr will be checked.") \ M(SettingInt64, schema_version, DEFAULT_UNSPECIFIED_SCHEMA_VERSION, "tmt schema version.") \ M(SettingUInt64, min_compress_block_size, DEFAULT_MIN_COMPRESS_BLOCK_SIZE, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.") \ M(SettingUInt64, max_compress_block_size, DEFAULT_MAX_COMPRESS_BLOCK_SIZE, "The maximum size of blocks of uncompressed data before compressing for writing to a table.") \ diff --git a/dbms/src/Storages/Transaction/Codec.h b/dbms/src/Storages/Transaction/Codec.h index ff32d38e39d..e41295a1e2e 100644 --- a/dbms/src/Storages/Transaction/Codec.h +++ b/dbms/src/Storages/Transaction/Codec.h @@ -324,6 +324,11 @@ inline void EncodeVarUInt(UInt64 num, std::stringstream & ss) TiKV::writeVarUInt(num, ss); } +inline void EncodeNull(std::stringstream &ss) +{ + writeIntBinary(UInt8(TiDB::CodecFlagNil), ss); +} + inline void EncodeDecimal(const Decimal & dec, std::stringstream & ss) { writeIntBinary(UInt8(TiDB::CodecFlagDecimal), ss); @@ -394,6 +399,10 @@ T getFieldValue(const Field & field) inline void EncodeDatum(const Field & field, TiDB::CodecFlag flag, std::stringstream & ss) { + if (field.isNull()) + { + return EncodeNull(ss); + } switch (flag) { case TiDB::CodecFlagDecimal: From c8cd3d777f860d90f810e525ae49ef027a8d587a Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Mon, 12 Aug 2019 17:00:07 +0800 Subject: [PATCH 35/79] Fix region id in mock dag --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 1277b3fe5e4..84882c4597d 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -67,7 +67,7 @@ BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) if (!region) throw Exception("No such region", ErrorCodes::BAD_ARGUMENTS); } - tipb::SelectResponse dag_response = executeDAGRequest(context, dag_request, region_id, region->version(), region->confVer()); + tipb::SelectResponse dag_response = executeDAGRequest(context, dag_request, region->id(), region->version(), region->confVer()); return outputDAGResponse(context, schema, dag_response); } From 0492af6a5ac0076ced5ef53f17efad04f9f54c25 Mon Sep 17 00:00:00 2001 From: xufei Date: Wed, 14 Aug 2019 14:04:16 +0800 Subject: [PATCH 36/79] support udf in (#175) * fix cop test regression * address comments * format code * fix npe for dag execute * format code * address comment * add some comments * throw exception when meet error duing cop request handling * address comments * add error code * throw exception when meet error duing cop request handling * address comments * add DAGContext so InterpreterDAG can exchange information with DAGDriver * fix bug * 1. refine code, 2. address comments * update comments * columnref index is based on executor output schema * handle error in coprocessor request * refine code * use Clear to clear a protobuf message completely * refine code * code refine && several minor bug fix * address comments * address comments * support udf in * refine code * address comments * address comments --- .../Coprocessor/DAGExpressionAnalyzer.cpp | 60 +++++++++++++-- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 6 ++ .../Flash/Coprocessor/DAGStringConverter.cpp | 2 +- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 61 +++++++++------- dbms/src/Flash/Coprocessor/DAGUtils.h | 3 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 15 ++-- dbms/src/Flash/Coprocessor/InterpreterDAG.h | 3 + dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 73 ++++++++++++++++++- dbms/src/Interpreters/Set.cpp | 39 ++++++++++ dbms/src/Interpreters/Set.h | 10 +++ 10 files changed, 224 insertions(+), 48 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 5b8b5fa9165..d2dda6a5bb7 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -1,10 +1,13 @@ #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -251,6 +254,32 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres return expr_name; } +void DAGExpressionAnalyzer::makeExplicitSet( + const tipb::Expr & expr, const Block & sample_block, bool create_ordered_set, const String & left_arg_name) +{ + if (prepared_sets.count(&expr)) + { + return; + } + DataTypes set_element_types; + // todo support tuple in, i.e. (a,b) in ((1,2), (3,4)), currently TiDB convert tuple in into a series of or/and/eq exprs + // which means tuple in is never be pushed to coprocessor, but it is quite in-efficient + set_element_types.push_back(sample_block.getByName(left_arg_name).type); + + // todo if this is a single value in, then convert it to equal expr + SetPtr set = std::make_shared(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode)); + set->createFromDAGExpr(set_element_types, expr, create_ordered_set); + prepared_sets[&expr] = std::move(set); +} + +static String getUniqueName(const Block & block, const String & prefix) +{ + int i = 1; + while (block.has(prefix + toString(i))) + ++i; + return prefix + toString(i); +} + String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions) { String expr_name = getName(expr, getCurrentInputColumns()); @@ -288,20 +317,35 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi throw Exception("agg function is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); } const String & func_name = getFunctionName(expr); - if (func_name == "in" || func_name == "notIn" || func_name == "globalIn" || func_name == "globalNotIn") - { - // todo support in - throw Exception(func_name + " is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); - } - const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); Names argument_names; DataTypes argument_types; - for (auto & child : expr.children()) + + if (isInOrGlobalInOperator(func_name)) { - String name = getActions(child, actions); + String name = getActions(expr.children(0), actions); argument_names.push_back(name); argument_types.push_back(actions->getSampleBlock().getByName(name).type); + makeExplicitSet(expr, actions->getSampleBlock(), false, name); + ColumnWithTypeAndName column; + column.type = std::make_shared(); + + const SetPtr & set = prepared_sets[&expr]; + + column.name = getUniqueName(actions->getSampleBlock(), "___set"); + column.column = ColumnSet::create(1, set); + actions->add(ExpressionAction::addColumn(column)); + argument_names.push_back(column.name); + argument_types.push_back(column.type); + } + else + { + for (auto & child : expr.children()) + { + String name = getActions(child, actions); + argument_names.push_back(name); + argument_types.push_back(actions->getSampleBlock().getByName(name).type); + } } // re-construct expr_name, because expr_name generated previously is based on expr tree, diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index cdc3acbac5b..959729886c7 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -13,6 +13,10 @@ namespace DB { +class Set; +using SetPtr = std::shared_ptr; +using DAGPreparedSets = std::unordered_map; + /** Transforms an expression from DAG expression into a sequence of actions to execute it. * */ @@ -24,6 +28,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable NamesAndTypesList source_columns; // all columns after aggregation NamesAndTypesList aggregated_columns; + DAGPreparedSets prepared_sets; Settings settings; const Context & context; bool after_agg; @@ -47,6 +52,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable void appendFinalProject(ExpressionActionsChain & chain, const NamesWithAliases & final_project); String getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions); const NamesAndTypesList & getCurrentInputColumns(); + void makeExplicitSet(const tipb::Expr & expr, const Block & sample_block, bool create_ordered_set, const String & left_arg_name); }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp b/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp index 36ab11801b9..4a11d21f075 100644 --- a/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp @@ -58,7 +58,7 @@ void DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringst String name = merge_tree->getTableInfo().columns[cid - 1].name; output_from_ts.push_back(std::move(name)); } - ss << "FROM " << merge_tree->getTableInfo().db_name << "." << merge_tree->getTableInfo().name << " "; + ss << "FROM " << storage->getDatabaseName() << "." << storage->getTableName() << " "; } void DAGStringConverter::buildSelString(const tipb::Selection & sel, std::stringstream & ss) diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 0ed3db9dfc7..d46bf5acf5e 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -9,6 +9,12 @@ namespace DB { +namespace ErrorCodes +{ +extern const int COP_BAD_DAG_REQUEST; +extern const int UNSUPPORTED_METHOD; +} // namespace ErrorCodes + bool isFunctionExpr(const tipb::Expr & expr) { switch (expr.tp()) @@ -43,7 +49,7 @@ const String & getAggFunctionName(const tipb::Expr & expr) { if (!aggFunMap.count(expr.tp())) { - throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported."); + throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } return aggFunMap[expr.tp()]; } @@ -54,7 +60,7 @@ const String & getFunctionName(const tipb::Expr & expr) { if (!aggFunMap.count(expr.tp())) { - throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported."); + throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } return aggFunMap[expr.tp()]; } @@ -62,13 +68,13 @@ const String & getFunctionName(const tipb::Expr & expr) { if (!scalarFunMap.count(expr.sig())) { - throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + " is not supported."); + throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } return scalarFunMap[expr.sig()]; } } -String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col) +String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col, bool for_parser) { std::stringstream ss; size_t cursor = 1; @@ -94,7 +100,7 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col columnId = DecodeInt(cursor, expr.val()); if (columnId < 0 || columnId >= (ColumnID)input_col.size()) { - throw Exception("out of bound"); + throw Exception("Column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } return input_col.getNames()[columnId]; case tipb::ExprType::Count: @@ -105,53 +111,50 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col case tipb::ExprType::First: if (!aggFunMap.count(expr.tp())) { - throw Exception("not supported"); + throw Exception(tipb::ExprType_Name(expr.tp()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); } func_name = aggFunMap.find(expr.tp())->second; break; case tipb::ExprType::ScalarFunc: if (!scalarFunMap.count(expr.sig())) { - throw Exception("not supported"); + throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); } func_name = scalarFunMap.find(expr.sig())->second; break; default: - throw Exception("not supported"); + throw Exception(tipb::ExprType_Name(expr.tp()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); } // build function expr - if (func_name == "in") + if (isInOrGlobalInOperator(func_name) && for_parser) { // for in, we could not represent the function expr using func_name(param1, param2, ...) - throw Exception("not supported"); + throw Exception("Function " + func_name + " not supported", ErrorCodes::UNSUPPORTED_METHOD); } - else + ss << func_name << "("; + bool first = true; + for (const tipb::Expr & child : expr.children()) { - ss << func_name << "("; - bool first = true; - for (const tipb::Expr & child : expr.children()) + String s = exprToString(child, input_col, for_parser); + if (first) { - String s = exprToString(child, input_col); - if (first) - { - first = false; - } - else - { - ss << ", "; - } - ss << s; + first = false; } - ss << ") "; - return ss.str(); + else + { + ss << ", "; + } + ss << s; } + ss << ") "; + return ss.str(); } const String & getTypeName(const tipb::Expr & expr) { return tipb::ExprType_Name(expr.tp()); } String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns) { - return exprToString(expr, current_input_columns); + return exprToString(expr, current_input_columns, false); } bool isAggFunctionExpr(const tipb::Expr & expr) @@ -225,7 +228,7 @@ Field decodeLiteral(const tipb::Expr & expr) case tipb::ExprType::MysqlTime: case tipb::ExprType::MysqlJson: case tipb::ExprType::ValueList: - throw Exception("mysql type literal is not supported yet"); + throw Exception(tipb::ExprType_Name(expr.tp()) + "is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); default: return DecodeDatum(cursor, expr.val()); } @@ -237,6 +240,8 @@ ColumnID getColumnID(const tipb::Expr & expr) return DecodeInt(cursor, expr.val()); } +bool isInOrGlobalInOperator(const String & name) { return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn"; } + std::unordered_map aggFunMap({ {tipb::ExprType::Count, "count"}, {tipb::ExprType::Sum, "sum"}, {tipb::ExprType::Avg, "avg"}, {tipb::ExprType::Min, "min"}, {tipb::ExprType::Max, "max"}, {tipb::ExprType::First, "any"}, diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index 71a52533ea3..ec6b96d2fbb 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -24,7 +24,8 @@ bool isColumnExpr(const tipb::Expr & expr); ColumnID getColumnID(const tipb::Expr & expr); String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns); const String & getTypeName(const tipb::Expr & expr); -String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col); +String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col, bool for_parser = true); +bool isInOrGlobalInOperator(const String & name); extern std::unordered_map aggFunMap; extern std::unordered_map scalarFunMap; diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index be2f8700a04..cbc95e795e9 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -82,6 +82,8 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) throw Exception("No column is selected in table scan executor", ErrorCodes::COP_BAD_DAG_REQUEST); } + analyzer = std::make_unique(source_columns, context); + if (!dag.hasAggregation()) { // if the dag request does not contain agg, then the final output is @@ -175,10 +177,9 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() { AnalysisResult res; ExpressionActionsChain chain; - DAGExpressionAnalyzer analyzer(source_columns, context); if (dag.hasSelection()) { - analyzer.appendWhere(chain, dag.getSelection(), res.filter_column_name); + analyzer->appendWhere(chain, dag.getSelection(), res.filter_column_name); res.has_where = true; res.before_where = chain.getLastActions(); chain.addStep(); @@ -186,7 +187,7 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() // There will be either Agg... if (dag.hasAggregation()) { - analyzer.appendAggregation(chain, dag.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); + analyzer->appendAggregation(chain, dag.getAggregation(), res.aggregation_keys, res.aggregate_descriptions); res.need_aggregate = true; res.before_aggregation = chain.getLastActions(); @@ -194,9 +195,9 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() chain.clear(); // add cast if type is not match - analyzer.appendAggSelect(chain, dag.getAggregation()); + analyzer->appendAggSelect(chain, dag.getAggregation()); //todo use output_offset to reconstruct the final project columns - for (auto element : analyzer.getCurrentInputColumns()) + for (auto element : analyzer->getCurrentInputColumns()) { final_project.emplace_back(element.name, ""); } @@ -205,10 +206,10 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() if (dag.hasTopN()) { res.has_order_by = true; - analyzer.appendOrderBy(chain, dag.getTopN(), res.order_column_names); + analyzer->appendOrderBy(chain, dag.getTopN(), res.order_column_names); } // Append final project results if needed. - analyzer.appendFinalProject(chain, final_project); + analyzer->appendFinalProject(chain, final_project); res.before_order_and_select = chain.getLastActions(); chain.finalize(); chain.clear(); diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.h b/dbms/src/Flash/Coprocessor/InterpreterDAG.h index 099e1382e8d..22ba126df96 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.h +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.h @@ -7,6 +7,7 @@ #pragma GCC diagnostic pop #include +#include #include #include #include @@ -98,6 +99,8 @@ class InterpreterDAG : public IInterpreter TMTStoragePtr storage; TableStructureReadLockPtr table_lock; + std::unique_ptr analyzer; + Poco::Logger * log; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp index e18c3c4dd74..267056e018c 100644 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp @@ -56,6 +56,7 @@ class FlashClient grpc::ClientContext clientContext; clientContext.AddMetadata("user_name", ""); clientContext.AddMetadata("dag_planner", "optree"); + clientContext.AddMetadata("dag_expr_field_type_strict_check", "0"); coprocessor::Response response; grpc::Status status = sp->Coprocessor(&clientContext, *rqst, &response); if (status.ok()) @@ -64,6 +65,12 @@ class FlashClient tipb::SelectResponse selectResponse; if (selectResponse.ParseFromString(response.data())) { + if (selectResponse.has_error()) + { + std::cout << "Coprocessor request failed, error code " << selectResponse.error().code() << " error msg " + << selectResponse.error().msg(); + return status; + } for (const tipb::Chunk & chunk : selectResponse.chunks()) { size_t cursor = 0; @@ -148,6 +155,66 @@ void appendSelection(tipb::DAGRequest & dag_request) type = expr->mutable_field_type(); type->set_tp(1); type->set_flag(1 << 5); + + // selection i in (5,10,11) + selection->clear_conditions(); + expr = selection->add_conditions(); + expr->set_tp(tipb::ExprType::ScalarFunc); + expr->set_sig(tipb::ScalarFuncSig::InInt); + col = expr->add_children(); + col->set_tp(tipb::ExprType::ColumnRef); + ss.str(""); + DB::EncodeNumber(1, ss); + col->set_val(ss.str()); + type = col->mutable_field_type(); + type->set_tp(8); + type->set_flag(0); + value = expr->add_children(); + value->set_tp(tipb::ExprType::Int64); + ss.str(""); + DB::EncodeNumber(10, ss); + value->set_val(std::string(ss.str())); + type = value->mutable_field_type(); + type->set_tp(8); + type->set_flag(1); + type = expr->mutable_field_type(); + type->set_tp(1); + type->set_flag(1 << 5); + value = expr->add_children(); + value->set_tp(tipb::ExprType::Int64); + ss.str(""); + DB::EncodeNumber(5, ss); + value->set_val(std::string(ss.str())); + type = value->mutable_field_type(); + type->set_tp(8); + type->set_flag(1); + type = expr->mutable_field_type(); + type->set_tp(1); + type->set_flag(1 << 5); + value = expr->add_children(); + value->set_tp(tipb::ExprType::Int64); + ss.str(""); + DB::EncodeNumber(11, ss); + value->set_val(std::string(ss.str())); + type = value->mutable_field_type(); + type->set_tp(8); + type->set_flag(1); + type = expr->mutable_field_type(); + type->set_tp(1); + type->set_flag(1 << 5); + + // selection i is null + /* + selection->clear_conditions(); + expr = selection->add_conditions(); + expr->set_tp(tipb::ExprType::ScalarFunc); + expr->set_sig(tipb::ScalarFuncSig::IntIsNull); + col = expr->add_children(); + col->set_tp(tipb::ExprType::ColumnRef); + ss.str(""); + DB::EncodeNumber(1, ss); + col->set_val(ss.str()); + */ } void appendAgg(tipb::DAGRequest & dag_request, size_t & result_field_num) @@ -208,9 +275,9 @@ grpc::Status rpcTest() ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); ClientPtr clientPtr = std::make_shared(cp); size_t result_field_num = 0; - bool has_selection = false; - bool has_agg = true; - bool has_topN = false; + bool has_selection = true; + bool has_agg = false; + bool has_topN = true; bool has_limit = false; // construct a dag request tipb::DAGRequest dagRequest; diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 925479e05e1..27e8757c658 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -12,6 +12,8 @@ #include #include +#include + #include #include #include @@ -22,6 +24,7 @@ #include #include +#include namespace DB @@ -34,6 +37,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int INCORRECT_ELEMENT_OF_SET; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int COP_BAD_DAG_REQUEST; } @@ -256,6 +260,41 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co insertFromBlock(block, fill_set_elements); } +void Set::createFromDAGExpr(const DataTypes & types, const tipb::Expr & expr, bool fill_set_elements) +{ + /// Will form a block with values from the set. + + Block header; + size_t num_columns = types.size(); + if (num_columns != 1) + { + throw Exception("Incorrect element of set, tuple in is not supported yet", ErrorCodes::INCORRECT_ELEMENT_OF_SET); + } + for (size_t i = 0; i < num_columns; ++i) + header.insert(ColumnWithTypeAndName(types[i]->createColumn(), types[i], "_" + toString(i))); + setHeader(header); + + MutableColumns columns = header.cloneEmptyColumns(); + + for (int i = 1; i < expr.children_size(); i++) + { + auto & child = expr.children(i); + // todo support constant expression by constant folding + if (!isLiteralExpr(child)) + { + throw Exception("Only literal is supported in children of expr `in`", ErrorCodes::COP_BAD_DAG_REQUEST); + } + Field value = decodeLiteral(child); + DataTypePtr type = child.has_field_type() ? getDataTypeByFieldType(child.field_type()) : types[0]; + value = convertFieldToType(value, *type); + + if (!value.isNull()) + columns[0]->insert(value); + } + + Block block = header.cloneWithColumns(std::move(columns)); + insertFromBlock(block, fill_set_elements); +} ColumnPtr Set::execute(const Block & block, bool negative) const { diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index e27bdf58ec6..9600ed2065f 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -1,5 +1,10 @@ #pragma once +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop + #include #include #include @@ -48,6 +53,11 @@ class Set */ void createFromAST(const DataTypes & types, ASTPtr node, const Context & context, bool fill_set_elements); + /** + * Create a Set from DAG Expr, used when processing DAG Request + */ + void createFromDAGExpr(const DataTypes & types, const tipb::Expr & expr, bool fill_set_elements); + /** Create a Set from stream. * Call setHeader, then call insertFromBlock for each block. */ From 8713ff27f753622fb68669c4126bca431d4d85e2 Mon Sep 17 00:00:00 2001 From: xufei Date: Wed, 14 Aug 2019 15:27:37 +0800 Subject: [PATCH 37/79] 1. fix decode literal expr error, 2. add all scalar function sig in scalar_func_map (#177) * add all scalar function sig in scalarFunMap * fix literal expr decode * enable ltrim && rtrim * code refine * use throw instead of rethrow in DAGDriver.cpp --- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 4 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 11 +- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 361 +++++++++++++----- dbms/src/Flash/Coprocessor/DAGUtils.h | 4 +- dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 20 +- dbms/src/Storages/Transaction/Codec.h | 2 +- 6 files changed, 288 insertions(+), 114 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 62e6f861db0..b5f72738ab0 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -90,11 +90,11 @@ try } catch (const RegionException & e) { - e.rethrow(); + throw; } catch (const LockException & e) { - e.rethrow(); + throw; } catch (const Exception & e) { diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index d2dda6a5bb7..45b05b7dc97 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -228,8 +228,7 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres tipb::Expr type_expr; type_expr.set_tp(tipb::ExprType::String); std::stringstream ss; - EncodeCompactBytes(expected_type->getName(), ss); - type_expr.set_val(ss.str()); + type_expr.set_val(expected_type->getName()); auto type_field_type = type_expr.field_type(); type_field_type.set_tp(0xfe); type_field_type.set_flag(1); @@ -302,8 +301,8 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi } else if (isColumnExpr(expr)) { - ColumnID columnId = getColumnID(expr); - if (columnId < 0 || columnId >= (ColumnID)getCurrentInputColumns().size()) + ColumnID column_id = getColumnID(expr); + if (column_id < 0 || column_id >= (ColumnID)getCurrentInputColumns().size()) { throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } @@ -356,8 +355,8 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi // should be updated to and(casted_arg1_name, arg2_name) expr_name = genFuncString(func_name, argument_names); - const ExpressionAction & applyFunction = ExpressionAction::applyFunction(function_builder, argument_names, expr_name); - actions->add(applyFunction); + const ExpressionAction & apply_function = ExpressionAction::applyFunction(function_builder, argument_names, expr_name); + actions->add(apply_function); // add cast if needed expr_name = appendCastIfNeeded(expr, actions, expr_name); return expr_name; diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index d46bf5acf5e..150238a4789 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -13,72 +13,45 @@ namespace ErrorCodes { extern const int COP_BAD_DAG_REQUEST; extern const int UNSUPPORTED_METHOD; +extern const int LOGICAL_ERROR; } // namespace ErrorCodes -bool isFunctionExpr(const tipb::Expr & expr) -{ - switch (expr.tp()) - { - case tipb::ExprType::ScalarFunc: - case tipb::ExprType::Count: - case tipb::ExprType::Sum: - case tipb::ExprType::Avg: - case tipb::ExprType::Min: - case tipb::ExprType::Max: - case tipb::ExprType::First: - case tipb::ExprType::GroupConcat: - case tipb::ExprType::Agg_BitAnd: - case tipb::ExprType::Agg_BitOr: - case tipb::ExprType::Agg_BitXor: - case tipb::ExprType::Std: - case tipb::ExprType::Stddev: - case tipb::ExprType::StddevPop: - case tipb::ExprType::StddevSamp: - case tipb::ExprType::VarPop: - case tipb::ExprType::VarSamp: - case tipb::ExprType::Variance: - case tipb::ExprType::JsonArrayAgg: - case tipb::ExprType::JsonObjectAgg: - return true; - default: - return false; - } -} +bool isFunctionExpr(const tipb::Expr & expr) { return expr.tp() == tipb::ExprType::ScalarFunc || isAggFunctionExpr(expr); } const String & getAggFunctionName(const tipb::Expr & expr) { - if (!aggFunMap.count(expr.tp())) + if (!agg_func_map.count(expr.tp())) { throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } - return aggFunMap[expr.tp()]; + return agg_func_map[expr.tp()]; } const String & getFunctionName(const tipb::Expr & expr) { if (isAggFunctionExpr(expr)) { - if (!aggFunMap.count(expr.tp())) + if (!agg_func_map.count(expr.tp())) { throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } - return aggFunMap[expr.tp()]; + return agg_func_map[expr.tp()]; } else { - if (!scalarFunMap.count(expr.sig())) + if (!scalar_func_map.count(expr.sig())) { throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } - return scalarFunMap[expr.sig()]; + return scalar_func_map[expr.sig()]; } } String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col, bool for_parser) { std::stringstream ss; - size_t cursor = 1; - Int64 columnId = 0; + size_t cursor = 0; + Int64 column_id = 0; String func_name; Field f; switch (expr.tp()) @@ -93,34 +66,33 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col case tipb::ExprType::Float64: return std::to_string(DecodeFloat64(cursor, expr.val())); case tipb::ExprType::String: - return DecodeCompactBytes(cursor, expr.val()); case tipb::ExprType::Bytes: - return DecodeBytes(cursor, expr.val()); + return expr.val(); case tipb::ExprType::ColumnRef: - columnId = DecodeInt(cursor, expr.val()); - if (columnId < 0 || columnId >= (ColumnID)input_col.size()) + column_id = DecodeInt(cursor, expr.val()); + if (column_id < 0 || column_id >= (ColumnID)input_col.size()) { throw Exception("Column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } - return input_col.getNames()[columnId]; + return input_col.getNames()[column_id]; case tipb::ExprType::Count: case tipb::ExprType::Sum: case tipb::ExprType::Avg: case tipb::ExprType::Min: case tipb::ExprType::Max: case tipb::ExprType::First: - if (!aggFunMap.count(expr.tp())) + if (!agg_func_map.count(expr.tp())) { throw Exception(tipb::ExprType_Name(expr.tp()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); } - func_name = aggFunMap.find(expr.tp())->second; + func_name = agg_func_map.find(expr.tp())->second; break; case tipb::ExprType::ScalarFunc: - if (!scalarFunMap.count(expr.sig())) + if (!scalar_func_map.count(expr.sig())) { throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); } - func_name = scalarFunMap.find(expr.sig())->second; + func_name = scalar_func_map.find(expr.sig())->second; break; default: throw Exception(tipb::ExprType_Name(expr.tp()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); @@ -219,6 +191,18 @@ Field decodeLiteral(const tipb::Expr & expr) size_t cursor = 0; switch (expr.tp()) { + case tipb::ExprType::Null: + return Field(); + case tipb::ExprType::Int64: + return DecodeInt(cursor, expr.val()); + case tipb::ExprType::Uint64: + return DecodeInt(cursor, expr.val()); + case tipb::ExprType::Float32: + case tipb::ExprType::Float64: + return DecodeFloat64(cursor, expr.val()); + case tipb::ExprType::String: + case tipb::ExprType::Bytes: + return expr.val(); case tipb::ExprType::MysqlBit: case tipb::ExprType::MysqlDecimal: case tipb::ExprType::MysqlDuration: @@ -230,21 +214,22 @@ Field decodeLiteral(const tipb::Expr & expr) case tipb::ExprType::ValueList: throw Exception(tipb::ExprType_Name(expr.tp()) + "is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); default: - return DecodeDatum(cursor, expr.val()); + throw Exception("Should not reach here: not a literal expression", ErrorCodes::LOGICAL_ERROR); } } ColumnID getColumnID(const tipb::Expr & expr) { - size_t cursor = 1; + size_t cursor = 0; return DecodeInt(cursor, expr.val()); } bool isInOrGlobalInOperator(const String & name) { return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn"; } -std::unordered_map aggFunMap({ - {tipb::ExprType::Count, "count"}, {tipb::ExprType::Sum, "sum"}, {tipb::ExprType::Avg, "avg"}, {tipb::ExprType::Min, "min"}, - {tipb::ExprType::Max, "max"}, {tipb::ExprType::First, "any"}, +std::unordered_map agg_func_map({ + {tipb::ExprType::Count, "count"}, {tipb::ExprType::Sum, "sum"}, {tipb::ExprType::Min, "min"}, {tipb::ExprType::Max, "max"}, + {tipb::ExprType::First, "any"}, + //{tipb::ExprType::Avg, ""}, //{tipb::ExprType::GroupConcat, ""}, //{tipb::ExprType::Agg_BitAnd, ""}, //{tipb::ExprType::Agg_BitOr, ""}, @@ -260,7 +245,8 @@ std::unordered_map aggFunMap({ //{tipb::ExprType::JsonObjectAgg, ""}, }); -std::unordered_map scalarFunMap({ +std::unordered_map scalar_func_map({ + /* {tipb::ScalarFuncSig::CastIntAsInt, "cast"}, {tipb::ScalarFuncSig::CastIntAsReal, "cast"}, {tipb::ScalarFuncSig::CastIntAsString, "cast"}, @@ -316,6 +302,7 @@ std::unordered_map scalarFunMap({ {tipb::ScalarFuncSig::CastJsonAsTime, "cast"}, {tipb::ScalarFuncSig::CastJsonAsDuration, "cast"}, {tipb::ScalarFuncSig::CastJsonAsJson, "cast"}, + */ {tipb::ScalarFuncSig::CoalesceInt, "coalesce"}, {tipb::ScalarFuncSig::CoalesceReal, "coalesce"}, @@ -540,13 +527,13 @@ std::unordered_map scalarFunMap({ {tipb::ScalarFuncSig::IfJson, "if"}, //todo need further check for caseWithExpression and multiIf - {tipb::ScalarFuncSig::CaseWhenInt, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenReal, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenString, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenDecimal, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenTime, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenDuration, "caseWithExpression"}, - {tipb::ScalarFuncSig::CaseWhenJson, "caseWithExpression"}, + //{tipb::ScalarFuncSig::CaseWhenInt, "caseWithExpression"}, + //{tipb::ScalarFuncSig::CaseWhenReal, "caseWithExpression"}, + //{tipb::ScalarFuncSig::CaseWhenString, "caseWithExpression"}, + //{tipb::ScalarFuncSig::CaseWhenDecimal, "caseWithExpression"}, + //{tipb::ScalarFuncSig::CaseWhenTime, "caseWithExpression"}, + //{tipb::ScalarFuncSig::CaseWhenDuration, "caseWithExpression"}, + //{tipb::ScalarFuncSig::CaseWhenJson, "caseWithExpression"}, //{tipb::ScalarFuncSig::AesDecrypt, "cast"}, //{tipb::ScalarFuncSig::AesEncrypt, "cast"}, @@ -590,39 +577,227 @@ std::unordered_map scalarFunMap({ //{tipb::ScalarFuncSig::IsIPv6, "cast"}, //{tipb::ScalarFuncSig::UUID, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, - {tipb::ScalarFuncSig::Uncompress, "cast"}, + //{tipb::ScalarFuncSig::LikeSig, "cast"}, + //{tipb::ScalarFuncSig::RegexpBinarySig, "cast"}, + //{tipb::ScalarFuncSig::RegexpSig, "cast"}, + + //{tipb::ScalarFuncSig::JsonExtractSig, "cast"}, + //{tipb::ScalarFuncSig::JsonUnquoteSig, "cast"}, + //{tipb::ScalarFuncSig::JsonTypeSig, "cast"}, + //{tipb::ScalarFuncSig::JsonSetSig, "cast"}, + //{tipb::ScalarFuncSig::JsonInsertSig, "cast"}, + //{tipb::ScalarFuncSig::JsonReplaceSig, "cast"}, + //{tipb::ScalarFuncSig::JsonRemoveSig, "cast"}, + //{tipb::ScalarFuncSig::JsonMergeSig, "cast"}, + //{tipb::ScalarFuncSig::JsonObjectSig, "cast"}, + //{tipb::ScalarFuncSig::JsonArraySig, "cast"}, + //{tipb::ScalarFuncSig::JsonValidJsonSig, "cast"}, + //{tipb::ScalarFuncSig::JsonContainsSig, "cast"}, + //{tipb::ScalarFuncSig::JsonArrayAppendSig, "cast"}, + //{tipb::ScalarFuncSig::JsonArrayInsertSig, "cast"}, + //{tipb::ScalarFuncSig::JsonMergePatchSig, "cast"}, + //{tipb::ScalarFuncSig::JsonMergePreserveSig, "cast"}, + //{tipb::ScalarFuncSig::JsonContainsPathSig, "cast"}, + //{tipb::ScalarFuncSig::JsonPrettySig, "cast"}, + //{tipb::ScalarFuncSig::JsonQuoteSig, "cast"}, + //{tipb::ScalarFuncSig::JsonSearchSig, "cast"}, + //{tipb::ScalarFuncSig::JsonStorageSizeSig, "cast"}, + //{tipb::ScalarFuncSig::JsonDepthSig, "cast"}, + //{tipb::ScalarFuncSig::JsonKeysSig, "cast"}, + //{tipb::ScalarFuncSig::JsonLengthSig, "cast"}, + //{tipb::ScalarFuncSig::JsonKeys2ArgsSig, "cast"}, + //{tipb::ScalarFuncSig::JsonValidStringSig, "cast"}, + + //{tipb::ScalarFuncSig::DateFormatSig, "cast"}, + //{tipb::ScalarFuncSig::DateLiteral, "cast"}, + //{tipb::ScalarFuncSig::DateDiff, "cast"}, + //{tipb::ScalarFuncSig::NullTimeDiff, "cast"}, + //{tipb::ScalarFuncSig::TimeStringTimeDiff, "cast"}, + //{tipb::ScalarFuncSig::DurationDurationTimeDiff, "cast"}, + //{tipb::ScalarFuncSig::DurationDurationTimeDiff, "cast"}, + //{tipb::ScalarFuncSig::StringTimeTimeDiff, "cast"}, + //{tipb::ScalarFuncSig::StringDurationTimeDiff, "cast"}, + //{tipb::ScalarFuncSig::StringStringTimeDiff, "cast"}, + //{tipb::ScalarFuncSig::TimeTimeTimeDiff, "cast"}, + + //{tipb::ScalarFuncSig::Date, "cast"}, + //{tipb::ScalarFuncSig::Hour, "cast"}, + //{tipb::ScalarFuncSig::Minute, "cast"}, + //{tipb::ScalarFuncSig::Second, "cast"}, + //{tipb::ScalarFuncSig::MicroSecond, "cast"}, + //{tipb::ScalarFuncSig::Month, "cast"}, + //{tipb::ScalarFuncSig::MonthName, "cast"}, + + //{tipb::ScalarFuncSig::NowWithArg, "cast"}, + //{tipb::ScalarFuncSig::NowWithoutArg, "cast"}, + + //{tipb::ScalarFuncSig::DayName, "cast"}, + //{tipb::ScalarFuncSig::DayOfMonth, "cast"}, + //{tipb::ScalarFuncSig::DayOfWeek, "cast"}, + //{tipb::ScalarFuncSig::DayOfYear, "cast"}, + + //{tipb::ScalarFuncSig::WeekWithMode, "cast"}, + //{tipb::ScalarFuncSig::WeekWithoutMode, "cast"}, + //{tipb::ScalarFuncSig::WeekDay, "cast"}, + //{tipb::ScalarFuncSig::WeekOfYear, "cast"}, + + //{tipb::ScalarFuncSig::Year, "cast"}, + //{tipb::ScalarFuncSig::YearWeekWithMode, "cast"}, + //{tipb::ScalarFuncSig::YearWeekWithoutMode, "cast"}, + + //{tipb::ScalarFuncSig::GetFormat, "cast"}, + //{tipb::ScalarFuncSig::SysDateWithFsp, "cast"}, + //{tipb::ScalarFuncSig::SysDateWithoutFsp, "cast"}, + //{tipb::ScalarFuncSig::CurrentDate, "cast"}, + //{tipb::ScalarFuncSig::CurrentTime0Arg, "cast"}, + //{tipb::ScalarFuncSig::CurrentTime1Arg, "cast"}, + + //{tipb::ScalarFuncSig::Time, "cast"}, + //{tipb::ScalarFuncSig::TimeLiteral, "cast"}, + //{tipb::ScalarFuncSig::UTCDate, "cast"}, + //{tipb::ScalarFuncSig::UTCTimestampWithArg, "cast"}, + //{tipb::ScalarFuncSig::UTCTimestampWithoutArg, "cast"}, + + //{tipb::ScalarFuncSig::AddDatetimeAndDuration, "cast"}, + //{tipb::ScalarFuncSig::AddDatetimeAndString, "cast"}, + //{tipb::ScalarFuncSig::AddTimeDateTimeNull, "cast"}, + //{tipb::ScalarFuncSig::AddStringAndDuration, "cast"}, + //{tipb::ScalarFuncSig::AddStringAndString, "cast"}, + //{tipb::ScalarFuncSig::AddTimeStringNull, "cast"}, + //{tipb::ScalarFuncSig::AddDurationAndDuration, "cast"}, + //{tipb::ScalarFuncSig::AddDurationAndString, "cast"}, + //{tipb::ScalarFuncSig::AddTimeDurationNull, "cast"}, + //{tipb::ScalarFuncSig::AddDateAndDuration, "cast"}, + //{tipb::ScalarFuncSig::AddDateAndString, "cast"}, + + //{tipb::ScalarFuncSig::SubDateAndDuration, "cast"}, + //{tipb::ScalarFuncSig::SubDateAndString, "cast"}, + //{tipb::ScalarFuncSig::SubTimeDateTimeNull, "cast"}, + //{tipb::ScalarFuncSig::SubStringAndDuration, "cast"}, + //{tipb::ScalarFuncSig::SubStringAndString, "cast"}, + //{tipb::ScalarFuncSig::SubTimeStringNull, "cast"}, + //{tipb::ScalarFuncSig::SubDurationAndDuration, "cast"}, + //{tipb::ScalarFuncSig::SubDurationAndString, "cast"}, + //{tipb::ScalarFuncSig::SubDateAndDuration, "cast"}, + //{tipb::ScalarFuncSig::SubDateAndString, "cast"}, + + //{tipb::ScalarFuncSig::UnixTimestampCurrent, "cast"}, + //{tipb::ScalarFuncSig::UnixTimestampInt, "cast"}, + //{tipb::ScalarFuncSig::UnixTimestampDec, "cast"}, + + //{tipb::ScalarFuncSig::ConvertTz, "cast"}, + //{tipb::ScalarFuncSig::MakeDate, "cast"}, + //{tipb::ScalarFuncSig::MakeTime, "cast"}, + //{tipb::ScalarFuncSig::PeriodAdd, "cast"}, + //{tipb::ScalarFuncSig::PeriodDiff, "cast"}, + //{tipb::ScalarFuncSig::Quarter, "cast"}, + + //{tipb::ScalarFuncSig::SecToTime, "cast"}, + //{tipb::ScalarFuncSig::TimeToSec, "cast"}, + //{tipb::ScalarFuncSig::TimestampAdd, "cast"}, + //{tipb::ScalarFuncSig::ToDays, "cast"}, + //{tipb::ScalarFuncSig::ToSeconds, "cast"}, + //{tipb::ScalarFuncSig::UTCTimeWithArg, "cast"}, + //{tipb::ScalarFuncSig::UTCTimestampWithoutArg, "cast"}, + //{tipb::ScalarFuncSig::Timestamp1Arg, "cast"}, + //{tipb::ScalarFuncSig::Timestamp2Args, "cast"}, + //{tipb::ScalarFuncSig::TimestampLiteral, "cast"}, + + //{tipb::ScalarFuncSig::LastDay, "cast"}, + //{tipb::ScalarFuncSig::StrToDateDate, "cast"}, + //{tipb::ScalarFuncSig::StrToDateDatetime, "cast"}, + //{tipb::ScalarFuncSig::StrToDateDuration, "cast"}, + //{tipb::ScalarFuncSig::FromUnixTime1Arg, "cast"}, + //{tipb::ScalarFuncSig::FromUnixTime2Arg, "cast"}, + //{tipb::ScalarFuncSig::ExtractDatetime, "cast"}, + //{tipb::ScalarFuncSig::ExtractDuration, "cast"}, + + //{tipb::ScalarFuncSig::AddDateStringString, "cast"}, + //{tipb::ScalarFuncSig::AddDateStringInt, "cast"}, + //{tipb::ScalarFuncSig::AddDateStringDecimal, "cast"}, + //{tipb::ScalarFuncSig::AddDateIntString, "cast"}, + //{tipb::ScalarFuncSig::AddDateIntInt, "cast"}, + //{tipb::ScalarFuncSig::AddDateDatetimeString, "cast"}, + //{tipb::ScalarFuncSig::AddDateDatetimeInt, "cast"}, + + //{tipb::ScalarFuncSig::SubDateStringString, "cast"}, + //{tipb::ScalarFuncSig::SubDateStringInt, "cast"}, + //{tipb::ScalarFuncSig::SubDateStringDecimal, "cast"}, + //{tipb::ScalarFuncSig::SubDateIntString, "cast"}, + //{tipb::ScalarFuncSig::SubDateIntInt, "cast"}, + //{tipb::ScalarFuncSig::SubDateDatetimeString, "cast"}, + //{tipb::ScalarFuncSig::SubDateDatetimeInt, "cast"}, + + //{tipb::ScalarFuncSig::FromDays, "cast"}, + //{tipb::ScalarFuncSig::TimeFormat, "cast"}, + //{tipb::ScalarFuncSig::TimestampDiff, "cast"}, + + //{tipb::ScalarFuncSig::BitLength, "cast"}, + //{tipb::ScalarFuncSig::Bin, "cast"}, + //{tipb::ScalarFuncSig::ASCII, "cast"}, + //{tipb::ScalarFuncSig::Char, "cast"}, + {tipb::ScalarFuncSig::CharLength, "lengthUTF8"}, + //{tipb::ScalarFuncSig::Concat, "cast"}, + //{tipb::ScalarFuncSig::ConcatWS, "cast"}, + //{tipb::ScalarFuncSig::Convert, "cast"}, + //{tipb::ScalarFuncSig::Elt, "cast"}, + //{tipb::ScalarFuncSig::ExportSet3Arg, "cast"}, + //{tipb::ScalarFuncSig::ExportSet4Arg, "cast"}, + //{tipb::ScalarFuncSig::ExportSet5Arg, "cast"}, + //{tipb::ScalarFuncSig::FieldInt, "cast"}, + //{tipb::ScalarFuncSig::FieldReal, "cast"}, + //{tipb::ScalarFuncSig::FieldString, "cast"}, + + //{tipb::ScalarFuncSig::FindInSet, "cast"}, + //{tipb::ScalarFuncSig::Format, "cast"}, + //{tipb::ScalarFuncSig::FormatWithLocale, "cast"}, + //{tipb::ScalarFuncSig::FromBase64, "cast"}, + //{tipb::ScalarFuncSig::HexIntArg, "cast"}, + //{tipb::ScalarFuncSig::HexStrArg, "cast"}, + //{tipb::ScalarFuncSig::Insert, "cast"}, + //{tipb::ScalarFuncSig::InsertBinary, "cast"}, + //{tipb::ScalarFuncSig::Instr, "cast"}, + //{tipb::ScalarFuncSig::InstrBinary, "cast"}, + + {tipb::ScalarFuncSig::LTrim, "ltrim"}, + //{tipb::ScalarFuncSig::Left, "cast"}, + //{tipb::ScalarFuncSig::LeftBinary, "cast"}, + {tipb::ScalarFuncSig::Length, "length"}, + //{tipb::ScalarFuncSig::Locate2Args, "cast"}, + //{tipb::ScalarFuncSig::Locate3Args, "cast"}, + //{tipb::ScalarFuncSig::LocateBinary2Args, "cast"}, + //{tipb::ScalarFuncSig::LocateBinary3Args, "cast"}, + + {tipb::ScalarFuncSig::Lower, "lower"}, + //{tipb::ScalarFuncSig::Lpad, "cast"}, + //{tipb::ScalarFuncSig::LpadBinary, "cast"}, + //{tipb::ScalarFuncSig::MakeSet, "cast"}, + //{tipb::ScalarFuncSig::OctInt, "cast"}, + //{tipb::ScalarFuncSig::OctString, "cast"}, + //{tipb::ScalarFuncSig::Ord, "cast"}, + //{tipb::ScalarFuncSig::Quote, "cast"}, + {tipb::ScalarFuncSig::RTrim, "rtrim"}, + //{tipb::ScalarFuncSig::Repeat, "cast"}, + //{tipb::ScalarFuncSig::Replace, "cast"}, + //{tipb::ScalarFuncSig::Reverse, "cast"}, + //{tipb::ScalarFuncSig::ReverseBinary, "cast"}, + //{tipb::ScalarFuncSig::Right, "cast"}, + //{tipb::ScalarFuncSig::RightBinary, "cast"}, + //{tipb::ScalarFuncSig::Rpad, "cast"}, + //{tipb::ScalarFuncSig::RpadBinary, "cast"}, + //{tipb::ScalarFuncSig::Space, "cast"}, + //{tipb::ScalarFuncSig::Strcmp, "cast"}, + //{tipb::ScalarFuncSig::Substring2Args, "cast"}, + //{tipb::ScalarFuncSig::Substring3Args, "cast"}, + //{tipb::ScalarFuncSig::SubstringBinary2Args, "cast"}, + //{tipb::ScalarFuncSig::SubstringBinary3Args, "cast"}, + //{tipb::ScalarFuncSig::SubstringIndex, "cast"}, + + //{tipb::ScalarFuncSig::ToBase64, "cast"}, + //{tipb::ScalarFuncSig::Trim1Arg, "cast"}, + //{tipb::ScalarFuncSig::Trim2Args, "cast"}, + //{tipb::ScalarFuncSig::Trim3Args, "cast"}, + //{tipb::ScalarFuncSig::UnHex, "cast"}, + {tipb::ScalarFuncSig::Upper, "upper"}, }); } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index ec6b96d2fbb..410c447d2fd 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -26,7 +26,7 @@ String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_ const String & getTypeName(const tipb::Expr & expr); String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col, bool for_parser = true); bool isInOrGlobalInOperator(const String & name); -extern std::unordered_map aggFunMap; -extern std::unordered_map scalarFunMap; +extern std::unordered_map agg_func_map; +extern std::unordered_map scalar_func_map; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp index 267056e018c..4babeececd4 100644 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp @@ -140,14 +140,14 @@ void appendSelection(tipb::DAGRequest & dag_request) tipb::Expr * value = expr->add_children(); col->set_tp(tipb::ExprType::ColumnRef); std::stringstream ss; - DB::EncodeNumber(1, ss); + DB::EncodeNumber(1, ss); col->set_val(ss.str()); auto * type = col->mutable_field_type(); type->set_tp(8); type->set_flag(0); value->set_tp(tipb::ExprType::Int64); ss.str(""); - DB::EncodeNumber(10, ss); + DB::EncodeNumber(10, ss); value->set_val(std::string(ss.str())); type = value->mutable_field_type(); type->set_tp(8); @@ -164,7 +164,7 @@ void appendSelection(tipb::DAGRequest & dag_request) col = expr->add_children(); col->set_tp(tipb::ExprType::ColumnRef); ss.str(""); - DB::EncodeNumber(1, ss); + DB::EncodeNumber(1, ss); col->set_val(ss.str()); type = col->mutable_field_type(); type->set_tp(8); @@ -172,7 +172,7 @@ void appendSelection(tipb::DAGRequest & dag_request) value = expr->add_children(); value->set_tp(tipb::ExprType::Int64); ss.str(""); - DB::EncodeNumber(10, ss); + DB::EncodeNumber(10, ss); value->set_val(std::string(ss.str())); type = value->mutable_field_type(); type->set_tp(8); @@ -183,7 +183,7 @@ void appendSelection(tipb::DAGRequest & dag_request) value = expr->add_children(); value->set_tp(tipb::ExprType::Int64); ss.str(""); - DB::EncodeNumber(5, ss); + DB::EncodeNumber(5, ss); value->set_val(std::string(ss.str())); type = value->mutable_field_type(); type->set_tp(8); @@ -194,7 +194,7 @@ void appendSelection(tipb::DAGRequest & dag_request) value = expr->add_children(); value->set_tp(tipb::ExprType::Int64); ss.str(""); - DB::EncodeNumber(11, ss); + DB::EncodeNumber(11, ss); value->set_val(std::string(ss.str())); type = value->mutable_field_type(); type->set_tp(8); @@ -212,7 +212,7 @@ void appendSelection(tipb::DAGRequest & dag_request) col = expr->add_children(); col->set_tp(tipb::ExprType::ColumnRef); ss.str(""); - DB::EncodeNumber(1, ss); + DB::EncodeNumber(1, ss); col->set_val(ss.str()); */ } @@ -228,7 +228,7 @@ void appendAgg(tipb::DAGRequest & dag_request, size_t & result_field_num) auto child = agg_func->add_children(); child->set_tp(tipb::ExprType::ColumnRef); std::stringstream ss; - DB::EncodeNumber(0, ss); + DB::EncodeNumber(0, ss); child->set_val(ss.str()); auto f_type = agg_func->mutable_field_type(); f_type->set_tp(3); @@ -236,7 +236,7 @@ void appendAgg(tipb::DAGRequest & dag_request, size_t & result_field_num) auto group_col = agg->add_group_by(); group_col->set_tp(tipb::ExprType::ColumnRef); ss.str(""); - DB::EncodeNumber(1, ss); + DB::EncodeNumber(1, ss); group_col->set_val(ss.str()); f_type = group_col->mutable_field_type(); f_type->set_tp(8); @@ -255,7 +255,7 @@ void appendTopN(tipb::DAGRequest & dag_request) tipb::Expr * expr1 = byItem->mutable_expr(); expr1->set_tp(tipb::ExprType::ColumnRef); std::stringstream ss; - DB::EncodeNumber(1, ss); + DB::EncodeNumber(1, ss); expr1->set_val(ss.str()); auto * type = expr1->mutable_field_type(); type->set_tp(8); diff --git a/dbms/src/Storages/Transaction/Codec.h b/dbms/src/Storages/Transaction/Codec.h index e41295a1e2e..1bf09ef2625 100644 --- a/dbms/src/Storages/Transaction/Codec.h +++ b/dbms/src/Storages/Transaction/Codec.h @@ -243,7 +243,7 @@ inline Field DecodeDatum(size_t & cursor, const String & raw_value) case TiDB::CodecFlagVarInt: return DecodeVarInt(cursor, raw_value); case TiDB::CodecFlagDuration: - throw Exception("Not implented yet. DecodeDatum: CodecFlagDuration", ErrorCodes::LOGICAL_ERROR); + throw Exception("Not implemented yet. DecodeDatum: CodecFlagDuration", ErrorCodes::LOGICAL_ERROR); case TiDB::CodecFlagDecimal: return DecodeDecimal(cursor, raw_value); default: From b25d1cc04922bd0e046f58cd592fc548116bd1cd Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 15 Aug 2019 13:42:13 +0800 Subject: [PATCH 38/79] some bug fix (#179) * add all scalar function sig in scalarFunMap * fix literal expr decode * enable ltrim && rtrim * code refine * use throw instead of rethrow in DAGDriver.cpp * 1. fix decode UInt literal error, 2. support mysqlDecimal type * format code --- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 150238a4789..ee125ce3f97 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -59,7 +60,7 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col case tipb::ExprType::Null: return "NULL"; case tipb::ExprType::Int64: - return std::to_string(DecodeInt(cursor, expr.val())); + return std::to_string(RecordKVFormat::decodeInt64(RecordKVFormat::read(expr.val().data()))); case tipb::ExprType::Uint64: return std::to_string(DecodeInt(cursor, expr.val())); case tipb::ExprType::Float32: @@ -68,8 +69,10 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col case tipb::ExprType::String: case tipb::ExprType::Bytes: return expr.val(); + case tipb::ExprType::MysqlDecimal: + return DecodeDecimal(cursor, expr.val()).toString(); case tipb::ExprType::ColumnRef: - column_id = DecodeInt(cursor, expr.val()); + column_id = RecordKVFormat::decodeInt64(RecordKVFormat::read(expr.val().data())); if (column_id < 0 || column_id >= (ColumnID)input_col.size()) { throw Exception("Column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); @@ -83,19 +86,19 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col case tipb::ExprType::First: if (!agg_func_map.count(expr.tp())) { - throw Exception(tipb::ExprType_Name(expr.tp()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); + throw Exception(tipb::ExprType_Name(expr.tp()) + " not supported", ErrorCodes::UNSUPPORTED_METHOD); } func_name = agg_func_map.find(expr.tp())->second; break; case tipb::ExprType::ScalarFunc: if (!scalar_func_map.count(expr.sig())) { - throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); + throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + " not supported", ErrorCodes::UNSUPPORTED_METHOD); } func_name = scalar_func_map.find(expr.sig())->second; break; default: - throw Exception(tipb::ExprType_Name(expr.tp()) + "not supported", ErrorCodes::UNSUPPORTED_METHOD); + throw Exception(tipb::ExprType_Name(expr.tp()) + " not supported", ErrorCodes::UNSUPPORTED_METHOD); } // build function expr if (isInOrGlobalInOperator(func_name) && for_parser) @@ -194,7 +197,7 @@ Field decodeLiteral(const tipb::Expr & expr) case tipb::ExprType::Null: return Field(); case tipb::ExprType::Int64: - return DecodeInt(cursor, expr.val()); + return RecordKVFormat::decodeInt64(RecordKVFormat::read(expr.val().data())); case tipb::ExprType::Uint64: return DecodeInt(cursor, expr.val()); case tipb::ExprType::Float32: @@ -203,8 +206,9 @@ Field decodeLiteral(const tipb::Expr & expr) case tipb::ExprType::String: case tipb::ExprType::Bytes: return expr.val(); - case tipb::ExprType::MysqlBit: case tipb::ExprType::MysqlDecimal: + return DecodeDecimal(cursor, expr.val()); + case tipb::ExprType::MysqlBit: case tipb::ExprType::MysqlDuration: case tipb::ExprType::MysqlEnum: case tipb::ExprType::MysqlHex: @@ -212,7 +216,7 @@ Field decodeLiteral(const tipb::Expr & expr) case tipb::ExprType::MysqlTime: case tipb::ExprType::MysqlJson: case tipb::ExprType::ValueList: - throw Exception(tipb::ExprType_Name(expr.tp()) + "is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); + throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); default: throw Exception("Should not reach here: not a literal expression", ErrorCodes::LOGICAL_ERROR); } @@ -220,8 +224,8 @@ Field decodeLiteral(const tipb::Expr & expr) ColumnID getColumnID(const tipb::Expr & expr) { - size_t cursor = 0; - return DecodeInt(cursor, expr.val()); + auto column_id = RecordKVFormat::decodeInt64(RecordKVFormat::read(expr.val().data())); + return column_id; } bool isInOrGlobalInOperator(const String & name) { return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn"; } From 3d38b7b4d97dd7af15d656b3338baf84e122c8e3 Mon Sep 17 00:00:00 2001 From: ruoxi Date: Thu, 15 Aug 2019 16:36:10 +0800 Subject: [PATCH 39/79] Support all DAG operator types in mock SQL -> DAG parser (#176) * Enhance dbg invoke and add dag as schemaful function * Add basic sql parse to dag * Column id starts from 1 * Fix value to ref * Add basic dag test * Fix dag bugs and pass 1st mock test * Make dag go normal routine and add mock dag * Add todo * Add comment * Fix gcc compile error * Enhance dag test * Address comments * Enhance mock sql -> dag compiler and add project test * Mock sql dag compiler support more expression types and add filter test * Add topn and limit test * Add agg for sql -> dag parser and agg test * Add dag specific codec * type * Update codec accordingly * Remove cop-test --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 378 ++++++++++++++++-- dbms/src/Flash/Coprocessor/DAGCodec.cpp | 65 +++ dbms/src/Flash/Coprocessor/DAGCodec.h | 25 ++ dbms/src/Flash/Coprocessor/DAGUtils.cpp | 33 +- .../Flash/Coprocessor/tests/CMakeLists.txt | 3 - dbms/src/Flash/Coprocessor/tests/cop_test.cpp | 332 --------------- dbms/src/Storages/Transaction/TiDB.h | 10 + tests/mutable-test/txn_dag/aggregation.test | 32 ++ tests/mutable-test/txn_dag/filter.test | 37 ++ tests/mutable-test/txn_dag/limit.test | 31 ++ tests/mutable-test/txn_dag/project.test | 41 ++ tests/mutable-test/txn_dag/table_scan.test | 2 +- tests/mutable-test/txn_dag/topn.test | 30 ++ 13 files changed, 634 insertions(+), 385 deletions(-) create mode 100644 dbms/src/Flash/Coprocessor/DAGCodec.cpp create mode 100644 dbms/src/Flash/Coprocessor/DAGCodec.h delete mode 100644 dbms/src/Flash/Coprocessor/tests/cop_test.cpp create mode 100644 tests/mutable-test/txn_dag/aggregation.test create mode 100644 tests/mutable-test/txn_dag/filter.test create mode 100644 tests/mutable-test/txn_dag/limit.test create mode 100644 tests/mutable-test/txn_dag/project.test create mode 100644 tests/mutable-test/txn_dag/topn.test diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 84882c4597d..dbc3c9986f3 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -1,10 +1,15 @@ +#include #include #include #include #include +#include #include +#include +#include #include #include +#include #include #include #include @@ -21,6 +26,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; +extern const int LOGICA_ERROR; } // namespace ErrorCodes using DAGField = std::pair; @@ -98,6 +104,110 @@ BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args) return outputDAGResponse(context, schema, dag_response); } +struct ExecutorCtx +{ + tipb::Executor * input; + DAGSchema output; + std::unordered_map col_ref_map; +}; + +void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::unordered_set & referred_columns, + std::unordered_map & col_ref_map) +{ + if (ASTIdentifier * id = typeid_cast(ast.get())) + { + auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { return field.first == id->getColumnName(); }); + if (ft == input.end()) + throw DB::Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + expr->set_tp(tipb::ColumnRef); + *(expr->mutable_field_type()) = (*ft).second; + + referred_columns.emplace((*ft).first); + col_ref_map.emplace((*ft).first, expr); + } + else if (ASTFunction * func = typeid_cast(ast.get())) + { + // TODO: Support agg functions. + for (const auto & child_ast : func->arguments->children) + { + tipb::Expr * child = expr->add_children(); + compileExpr(input, child_ast, child, referred_columns, col_ref_map); + } + + String func_name_lowercase = Poco::toLower(func->name); + // TODO: Support more functions. + // TODO: Support type inference. + if (func_name_lowercase == "equals") + { + expr->set_sig(tipb::ScalarFuncSig::EQInt); + auto * ft = expr->mutable_field_type(); + // TODO: TiDB will infer Int64. + ft->set_tp(TiDB::TypeTiny); + ft->set_flag(TiDB::ColumnFlagUnsigned); + } + else if (func_name_lowercase == "and") + { + expr->set_sig(tipb::ScalarFuncSig::LogicalAnd); + auto * ft = expr->mutable_field_type(); + // TODO: TiDB will infer Int64. + ft->set_tp(TiDB::TypeTiny); + ft->set_flag(TiDB::ColumnFlagUnsigned); + } + else if (func_name_lowercase == "or") + { + expr->set_sig(tipb::ScalarFuncSig::LogicalOr); + auto * ft = expr->mutable_field_type(); + // TODO: TiDB will infer Int64. + ft->set_tp(TiDB::TypeTiny); + ft->set_flag(TiDB::ColumnFlagUnsigned); + } + else + { + throw DB::Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); + } + expr->set_tp(tipb::ExprType::ScalarFunc); + } + else if (ASTLiteral * lit = typeid_cast(ast.get())) + { + std::stringstream ss; + switch (lit->value.getType()) + { + case Field::Types::Which::Null: + expr->set_tp(tipb::Null); + // Null literal expr doesn't need value. + break; + case Field::Types::Which::UInt64: + expr->set_tp(tipb::Uint64); + encodeDAGUInt64(lit->value.get(), ss); + break; + case Field::Types::Which::Int64: + expr->set_tp(tipb::Int64); + encodeDAGInt64(lit->value.get(), ss); + break; + case Field::Types::Which::Float64: + expr->set_tp(tipb::Float64); + encodeDAGFloat64(lit->value.get(), ss); + break; + case Field::Types::Which::Decimal: + expr->set_tp(tipb::MysqlDecimal); + encodeDAGDecimal(lit->value.get(), ss); + break; + case Field::Types::Which::String: + expr->set_tp(tipb::String); + // TODO: Align with TiDB. + encodeDAGBytes(lit->value.get(), ss); + break; + default: + throw DB::Exception(String("Unsupported literal type: ") + lit->value.getTypeName(), ErrorCodes::LOGICAL_ERROR); + } + expr->set_val(ss.str()); + } + else + { + throw DB::Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); + } +} + std::tuple compileQuery( Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts) { @@ -110,49 +220,244 @@ std::tuple compileQuery( ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "from DAG compiler", 0); ASTSelectQuery & ast_query = typeid_cast(*ast); - String database_name, table_name; - auto query_database = ast_query.database(); - auto query_table = ast_query.table(); - if (query_database) - database_name = typeid_cast(*query_database).name; - if (query_table) - table_name = typeid_cast(*query_table).name; - if (!query_table) + /// Get table metadata. + TiDB::TableInfo table_info; + { + String database_name, table_name; + auto query_database = ast_query.database(); + auto query_table = ast_query.table(); + if (query_database) + database_name = typeid_cast(*query_database).name; + if (query_table) + table_name = typeid_cast(*query_table).name; + if (!query_table) + { + database_name = "system"; + table_name = "one"; + } + else if (!query_database) + { + database_name = context.getCurrentDatabase(); + } + + table_info = schema_fetcher(database_name, table_name); + } + + std::unordered_map executor_ctx_map; + std::unordered_set referred_columns; + tipb::TableScan * ts = nullptr; + tipb::Executor * last_executor = nullptr; + + /// Table scan. + { + tipb::Executor * ts_exec = dag_request.add_executors(); + ts_exec->set_tp(tipb::ExecType::TypeTableScan); + ts = ts_exec->mutable_tbl_scan(); + ts->set_table_id(table_info.id); + DAGSchema ts_output; + for (const auto & column_info : table_info.columns) + { + tipb::FieldType field_type; + field_type.set_tp(column_info.tp); + field_type.set_flag(column_info.flag); + field_type.set_flen(column_info.flen); + field_type.set_decimal(column_info.decimal); + ts_output.emplace_back(std::make_pair(column_info.name, std::move(field_type))); + } + executor_ctx_map.emplace(ts_exec, ExecutorCtx{nullptr, std::move(ts_output), std::unordered_map{}}); + last_executor = ts_exec; + } + + /// Filter. + if (ast_query.where_expression) + { + tipb::Executor * filter_exec = dag_request.add_executors(); + filter_exec->set_tp(tipb::ExecType::TypeSelection); + tipb::Selection * filter = filter_exec->mutable_selection(); + tipb::Expr * cond = filter->add_conditions(); + std::unordered_map col_ref_map; + compileExpr(executor_ctx_map[last_executor].output, ast_query.where_expression, cond, referred_columns, col_ref_map); + executor_ctx_map.emplace(filter_exec, ExecutorCtx{last_executor, executor_ctx_map[last_executor].output, std::move(col_ref_map)}); + last_executor = filter_exec; + } + + /// TopN. + if (ast_query.order_expression_list && ast_query.limit_length) { - database_name = "system"; - table_name = "one"; + tipb::Executor * topn_exec = dag_request.add_executors(); + topn_exec->set_tp(tipb::ExecType::TypeTopN); + tipb::TopN * topn = topn_exec->mutable_topn(); + std::unordered_map col_ref_map; + for (const auto & child : ast_query.order_expression_list->children) + { + ASTOrderByElement * elem = typeid_cast(child.get()); + if (!elem) + throw DB::Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = topn->add_order_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + compileExpr(executor_ctx_map[last_executor].output, elem->children[0], expr, referred_columns, col_ref_map); + } + auto limit = safeGet(typeid_cast(*ast_query.limit_length).value); + topn->set_limit(limit); + executor_ctx_map.emplace(topn_exec, ExecutorCtx{last_executor, executor_ctx_map[last_executor].output, std::move(col_ref_map)}); + last_executor = topn_exec; } - else if (!query_database) + else if (ast_query.limit_length) { - database_name = context.getCurrentDatabase(); + tipb::Executor * limit_exec = dag_request.add_executors(); + limit_exec->set_tp(tipb::ExecType::TypeLimit); + tipb::Limit * limit = limit_exec->mutable_limit(); + auto limit_length = safeGet(typeid_cast(*ast_query.limit_length).value); + limit->set_limit(limit_length); + executor_ctx_map.emplace( + limit_exec, ExecutorCtx{last_executor, executor_ctx_map[last_executor].output, std::unordered_map{}}); + last_executor = limit_exec; } - auto table_info = schema_fetcher(database_name, table_name); - - tipb::Executor * executor = dag_request.add_executors(); - executor->set_tp(tipb::ExecType::TypeTableScan); - tipb::TableScan * ts = executor->mutable_tbl_scan(); - ts->set_table_id(table_info.id); - size_t i = 0; - for (const auto & column_info : table_info.columns) + + /// Column pruner. + std::function column_pruner = [&](ExecutorCtx & executor_ctx) { + if (!executor_ctx.input) + { + executor_ctx.output.erase(std::remove_if(executor_ctx.output.begin(), executor_ctx.output.end(), + [&](const auto & field) { return referred_columns.count(field.first) == 0; }), + executor_ctx.output.end()); + + for (const auto & field : executor_ctx.output) + { + tipb::ColumnInfo * ci = ts->add_columns(); + ci->set_column_id(table_info.getColumnID(field.first)); + ci->set_tp(field.second.tp()); + ci->set_flag(field.second.flag()); + ci->set_columnlen(field.second.flen()); + ci->set_decimal(field.second.decimal()); + } + + return; + } + column_pruner(executor_ctx_map[executor_ctx.input]); + const auto & last_output = executor_ctx_map[executor_ctx.input].output; + for (const auto & pair : executor_ctx.col_ref_map) + { + auto iter = std::find_if(last_output.begin(), last_output.end(), [&](const auto & field) { return field.first == pair.first; }); + if (iter == last_output.end()) + throw DB::Exception("Column not found when pruning: " + pair.first, ErrorCodes::LOGICAL_ERROR); + std::stringstream ss; + encodeDAGInt64(iter - last_output.begin(), ss); + pair.second->set_val(ss.str()); + } + executor_ctx.output = last_output; + }; + + /// Aggregation finalize. { - tipb::ColumnInfo * ci = ts->add_columns(); - ci->set_column_id(column_info.id); - ci->set_tp(column_info.tp); - ci->set_flag(column_info.flag); + bool has_gby = ast_query.group_expression_list != nullptr; + bool has_agg_func = false; + for (const auto & child : ast_query.select_expression_list->children) + { + const ASTFunction * func = typeid_cast(child.get()); + if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + { + has_agg_func = true; + break; + } + } + + if (has_gby || has_agg_func) + { + if (last_executor->has_limit() || last_executor->has_topn()) + throw DB::Exception("Limit/TopN and Agg cannot co-exist.", ErrorCodes::LOGICAL_ERROR); + + tipb::Executor * agg_exec = dag_request.add_executors(); + agg_exec->set_tp(tipb::ExecType::TypeAggregation); + tipb::Aggregation * agg = agg_exec->mutable_aggregation(); + std::unordered_map col_ref_map; + for (const auto & expr : ast_query.select_expression_list->children) + { + const ASTFunction * func = typeid_cast(expr.get()); + if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + throw DB::Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); + + tipb::Expr * agg_func = agg->add_agg_func(); - tipb::FieldType field_type; - field_type.set_tp(column_info.tp); - field_type.set_flag(column_info.flag); - field_type.set_flen(column_info.flen); - field_type.set_decimal(column_info.decimal); - schema.emplace_back(std::make_pair(column_info.name, std::move(field_type))); + for (const auto & arg : func->arguments->children) + { + tipb::Expr * arg_expr = agg_func->add_children(); + compileExpr(executor_ctx_map[last_executor].output, arg, arg_expr, referred_columns, col_ref_map); + } - dag_request.add_output_offsets(i); + if (func->name == "count") + { + agg_func->set_tp(tipb::Count); + auto ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); + } + // TODO: Other agg func. + else + { + throw DB::Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); + } - i++; + schema.emplace_back(std::make_pair(func->getColumnName(), agg_func->field_type())); + } + + if (has_gby) + { + for (const auto & child : ast_query.group_expression_list->children) + { + tipb::Expr * gby = agg->add_group_by(); + compileExpr(executor_ctx_map[last_executor].output, child, gby, referred_columns, col_ref_map); + schema.emplace_back(std::make_pair(child->getColumnName(), gby->field_type())); + } + } + + executor_ctx_map.emplace(agg_exec, ExecutorCtx{last_executor, DAGSchema{}, std::move(col_ref_map)}); + last_executor = agg_exec; + + column_pruner(executor_ctx_map[last_executor]); + } } - // TODO: Other operator compile. + /// Non-aggregation finalize. + if (!last_executor->has_aggregation()) + { + std::vector final_output; + for (const auto & expr : ast_query.select_expression_list->children) + { + if (ASTIdentifier * id = typeid_cast(expr.get())) + { + referred_columns.emplace(id->getColumnName()); + final_output.emplace_back(id->getColumnName()); + } + else if (typeid_cast(expr.get())) + { + const auto & last_output = executor_ctx_map[last_executor].output; + for (const auto & field : last_output) + { + referred_columns.emplace(field.first); + final_output.push_back(field.first); + } + } + else + { + throw DB::Exception("Unsupported expression type in select", ErrorCodes::LOGICAL_ERROR); + } + } + + column_pruner(executor_ctx_map[last_executor]); + + const auto & last_output = executor_ctx_map[last_executor].output; + for (const auto & field : final_output) + { + auto iter + = std::find_if(last_output.begin(), last_output.end(), [&](const auto & last_field) { return last_field.first == field; }); + if (iter == last_output.end()) + throw DB::Exception("Column not found after pruning: " + field, ErrorCodes::LOGICAL_ERROR); + dag_request.add_output_offsets(iter - last_output.begin()); + schema.push_back(*iter); + } + } return std::make_tuple(table_info.id, std::move(schema), std::move(dag_request)); } @@ -160,14 +465,21 @@ std::tuple compileQuery( tipb::SelectResponse executeDAGRequest( Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version) { + static Logger * log = &Logger::get("MockDAG"); + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling DAG request: " << dag_request.DebugString()); + context.setSetting("dag_planner", "optree"); tipb::SelectResponse dag_response; DAGDriver driver(context, dag_request, region_id, region_version, region_conf_version, dag_response, true); driver.execute(); + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle DAG request done"); return dag_response; } BlockInputStreamPtr outputDAGResponse(Context &, const DAGSchema & schema, const tipb::SelectResponse & dag_response) { + if (dag_response.has_error()) + throw DB::Exception(dag_response.error().msg(), dag_response.error().code()); + BlocksList blocks; for (const auto & chunk : dag_response.chunks()) { diff --git a/dbms/src/Flash/Coprocessor/DAGCodec.cpp b/dbms/src/Flash/Coprocessor/DAGCodec.cpp new file mode 100644 index 00000000000..9d809cc1258 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/DAGCodec.cpp @@ -0,0 +1,65 @@ +#include + +#include +#include + +namespace DB +{ + +void encodeDAGInt64(Int64 i, std::stringstream & ss) +{ + auto u = RecordKVFormat::encodeInt64(i); + ss.write(reinterpret_cast(&u), sizeof(u)); +} + +void encodeDAGUInt64(UInt64 i, std::stringstream & ss) +{ + auto u = RecordKVFormat::encodeUInt64(i); + ss.write(reinterpret_cast(&u), sizeof(u)); +} + +void encodeDAGFloat32(Float32 f, std::stringstream & ss) { EncodeFloat64(f, ss); } + +void encodeDAGFloat64(Float64 f, std::stringstream & ss) { EncodeFloat64(f, ss); } + +void encodeDAGString(const String & s, std::stringstream & ss) { ss << s; } + +void encodeDAGBytes(const String & bytes, std::stringstream & ss) { ss << bytes; } + +void encodeDAGDecimal(const Decimal & d, std::stringstream & ss) { EncodeDecimal(d, ss); } + +Int64 decodeDAGInt64(const String & s) +{ + auto u = *(reinterpret_cast(s.data())); + return RecordKVFormat::decodeInt64(u); +} + +UInt64 decodeDAGUInt64(const String & s) +{ + auto u = *(reinterpret_cast(s.data())); + return RecordKVFormat::decodeUInt64(u); +} + +Float32 decodeDAGFloat32(const String & s) +{ + size_t cursor = 0; + return DecodeFloat64(cursor, s); +} + +Float64 decodeDAGFloat64(const String & s) +{ + size_t cursor = 0; + return DecodeFloat64(cursor, s); +} + +String decodeDAGString(const String & s) { return s; } + +String decodeDAGBytes(const String & s) { return s; } + +Decimal decodeDAGDecimal(const String & s) +{ + size_t cursor = 0; + return DecodeDecimal(cursor, s); +} + +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGCodec.h b/dbms/src/Flash/Coprocessor/DAGCodec.h new file mode 100644 index 00000000000..faecf74df1f --- /dev/null +++ b/dbms/src/Flash/Coprocessor/DAGCodec.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace DB +{ + +void encodeDAGInt64(Int64, std::stringstream &); +void encodeDAGUInt64(UInt64, std::stringstream &); +void encodeDAGFloat32(Float32, std::stringstream &); +void encodeDAGFloat64(Float64, std::stringstream &); +void encodeDAGString(const String &, std::stringstream &); +void encodeDAGBytes(const String &, std::stringstream &); +void encodeDAGDecimal(const Decimal &, std::stringstream &); + +Int64 decodeDAGInt64(const String &); +UInt64 decodeDAGUInt64(const String &); +Float32 decodeDAGFloat32(const String &); +Float64 decodeDAGFloat64(const String &); +String decodeDAGString(const String &); +String decodeDAGBytes(const String &); +Decimal decodeDAGDecimal(const String &); + +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index ee125ce3f97..79720f0b37b 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -1,9 +1,8 @@ #include #include +#include #include -#include -#include #include @@ -51,7 +50,6 @@ const String & getFunctionName(const tipb::Expr & expr) String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col, bool for_parser) { std::stringstream ss; - size_t cursor = 0; Int64 column_id = 0; String func_name; Field f; @@ -60,19 +58,21 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col case tipb::ExprType::Null: return "NULL"; case tipb::ExprType::Int64: - return std::to_string(RecordKVFormat::decodeInt64(RecordKVFormat::read(expr.val().data()))); + return std::to_string(decodeDAGInt64(expr.val())); case tipb::ExprType::Uint64: - return std::to_string(DecodeInt(cursor, expr.val())); + return std::to_string(decodeDAGUInt64(expr.val())); case tipb::ExprType::Float32: + return std::to_string(decodeDAGFloat32(expr.val())); case tipb::ExprType::Float64: - return std::to_string(DecodeFloat64(cursor, expr.val())); + return std::to_string(decodeDAGFloat64(expr.val())); case tipb::ExprType::String: + return decodeDAGString(expr.val()); case tipb::ExprType::Bytes: - return expr.val(); + return decodeDAGBytes(expr.val()); case tipb::ExprType::MysqlDecimal: - return DecodeDecimal(cursor, expr.val()).toString(); + return decodeDAGDecimal(expr.val()).toString(); case tipb::ExprType::ColumnRef: - column_id = RecordKVFormat::decodeInt64(RecordKVFormat::read(expr.val().data())); + column_id = decodeDAGInt64(expr.val()); if (column_id < 0 || column_id >= (ColumnID)input_col.size()) { throw Exception("Column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); @@ -191,23 +191,24 @@ bool isColumnExpr(const tipb::Expr & expr) { return expr.tp() == tipb::ExprType: Field decodeLiteral(const tipb::Expr & expr) { - size_t cursor = 0; switch (expr.tp()) { case tipb::ExprType::Null: return Field(); case tipb::ExprType::Int64: - return RecordKVFormat::decodeInt64(RecordKVFormat::read(expr.val().data())); + return decodeDAGInt64(expr.val()); case tipb::ExprType::Uint64: - return DecodeInt(cursor, expr.val()); + return decodeDAGUInt64(expr.val()); case tipb::ExprType::Float32: + return Float64(decodeDAGFloat32(expr.val())); case tipb::ExprType::Float64: - return DecodeFloat64(cursor, expr.val()); + return decodeDAGFloat64(expr.val()); case tipb::ExprType::String: + return decodeDAGString(expr.val()); case tipb::ExprType::Bytes: - return expr.val(); + return decodeDAGBytes(expr.val()); case tipb::ExprType::MysqlDecimal: - return DecodeDecimal(cursor, expr.val()); + return decodeDAGDecimal(expr.val()); case tipb::ExprType::MysqlBit: case tipb::ExprType::MysqlDuration: case tipb::ExprType::MysqlEnum: @@ -224,7 +225,7 @@ Field decodeLiteral(const tipb::Expr & expr) ColumnID getColumnID(const tipb::Expr & expr) { - auto column_id = RecordKVFormat::decodeInt64(RecordKVFormat::read(expr.val().data())); + auto column_id = decodeDAGInt64(expr.val()); return column_id; } diff --git a/dbms/src/Flash/Coprocessor/tests/CMakeLists.txt b/dbms/src/Flash/Coprocessor/tests/CMakeLists.txt index c236d367c5d..b8e4b57cbca 100644 --- a/dbms/src/Flash/Coprocessor/tests/CMakeLists.txt +++ b/dbms/src/Flash/Coprocessor/tests/CMakeLists.txt @@ -1,4 +1 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR}) - -add_executable (cop_test cop_test.cpp) -target_link_libraries (cop_test dbms) diff --git a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp b/dbms/src/Flash/Coprocessor/tests/cop_test.cpp deleted file mode 100644 index 4babeececd4..00000000000 --- a/dbms/src/Flash/Coprocessor/tests/cop_test.cpp +++ /dev/null @@ -1,332 +0,0 @@ -#include -#include - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#include -#include -#include -#pragma GCC diagnostic pop - -#include - - -using ChannelPtr = std::shared_ptr; -using SubPtr = std::shared_ptr; -static const int DAGREQUEST = 103; -class FlashClient -{ -private: - SubPtr sp; - -public: - static std::string decodeDatumToString(size_t & cursor, const std::string & raw_data) - { - switch (raw_data[cursor++]) - { - case TiDB::CodecFlagNil: - return "NULL"; - case TiDB::CodecFlagInt: - return std::to_string(DB::DecodeInt(cursor, raw_data)); - case TiDB::CodecFlagUInt: - return std::to_string(DB::DecodeInt(cursor, raw_data)); - case TiDB::CodecFlagBytes: - return DB::DecodeBytes(cursor, raw_data); - case TiDB::CodecFlagCompactBytes: - return DB::DecodeCompactBytes(cursor, raw_data); - case TiDB::CodecFlagFloat: - return std::to_string(DB::DecodeFloat64(cursor, raw_data)); - case TiDB::CodecFlagVarUInt: - return std::to_string(DB::DecodeVarUInt(cursor, raw_data)); - case TiDB::CodecFlagVarInt: - return std::to_string(DB::DecodeVarInt(cursor, raw_data)); - case TiDB::CodecFlagDuration: - throw DB::Exception("Not implented yet. DecodeDatum: CodecFlagDuration"); - case TiDB::CodecFlagDecimal: - return DB::DecodeDecimal(cursor, raw_data).toString(); - default: - throw DB::Exception("Unknown Type:" + std::to_string(raw_data[cursor - 1])); - } - } - - FlashClient(ChannelPtr cp) : sp(tikvpb::Tikv::NewStub(cp)) {} - grpc::Status coprocessor(coprocessor::Request * rqst, size_t output_column_num) - { - grpc::ClientContext clientContext; - clientContext.AddMetadata("user_name", ""); - clientContext.AddMetadata("dag_planner", "optree"); - clientContext.AddMetadata("dag_expr_field_type_strict_check", "0"); - coprocessor::Response response; - grpc::Status status = sp->Coprocessor(&clientContext, *rqst, &response); - if (status.ok()) - { - // if status is ok, try to decode the result - tipb::SelectResponse selectResponse; - if (selectResponse.ParseFromString(response.data())) - { - if (selectResponse.has_error()) - { - std::cout << "Coprocessor request failed, error code " << selectResponse.error().code() << " error msg " - << selectResponse.error().msg(); - return status; - } - for (const tipb::Chunk & chunk : selectResponse.chunks()) - { - size_t cursor = 0; - const std::string & data = chunk.rows_data(); - while (cursor < data.size()) - { - for (size_t i = 0; i < output_column_num; i++) - { - std::cout << decodeDatumToString(cursor, data) << " "; - } - std::cout << std::endl; - } - } - std::cout << "Execute summary: " << std::endl; - for (int i = 0; i < selectResponse.execution_summaries_size(); i++) - { - auto & summary = selectResponse.execution_summaries(i); - std::cout << "Executor " << i; - std::cout << " time = " << summary.time_processed_ns() << " ns "; - std::cout << " rows = " << summary.num_produced_rows(); - std::cout << " iter nums = " << summary.num_iterations(); - std::cout << std::endl; - } - } - } - else - { - std::cout << "Coprocessor request failed, error code " << status.error_code() << " error msg " << status.error_message(); - } - return status; - } -}; - -using ClientPtr = std::shared_ptr; - -void appendTS(tipb::DAGRequest & dag_request, size_t & result_field_num) -{ - // table scan: s,i - tipb::Executor * executor = dag_request.add_executors(); - executor->set_tp(tipb::ExecType::TypeTableScan); - tipb::TableScan * ts = executor->mutable_tbl_scan(); - ts->set_table_id(44); - tipb::ColumnInfo * ci = ts->add_columns(); - ci->set_column_id(1); - ci->set_tp(0xfe); - ci->set_flag(0); - ci = ts->add_columns(); - ci->set_column_id(2); - ci->set_tp(8); - ci->set_flag(0); - dag_request.add_output_offsets(1); - dag_request.add_output_offsets(0); - dag_request.add_output_offsets(1); - result_field_num = 3; -} - -void appendSelection(tipb::DAGRequest & dag_request) -{ - // selection: less(i, 123) - auto * executor = dag_request.add_executors(); - executor->set_tp(tipb::ExecType::TypeSelection); - tipb::Selection * selection = executor->mutable_selection(); - tipb::Expr * expr = selection->add_conditions(); - expr->set_tp(tipb::ExprType::ScalarFunc); - expr->set_sig(tipb::ScalarFuncSig::LTInt); - tipb::Expr * col = expr->add_children(); - tipb::Expr * value = expr->add_children(); - col->set_tp(tipb::ExprType::ColumnRef); - std::stringstream ss; - DB::EncodeNumber(1, ss); - col->set_val(ss.str()); - auto * type = col->mutable_field_type(); - type->set_tp(8); - type->set_flag(0); - value->set_tp(tipb::ExprType::Int64); - ss.str(""); - DB::EncodeNumber(10, ss); - value->set_val(std::string(ss.str())); - type = value->mutable_field_type(); - type->set_tp(8); - type->set_flag(1); - type = expr->mutable_field_type(); - type->set_tp(1); - type->set_flag(1 << 5); - - // selection i in (5,10,11) - selection->clear_conditions(); - expr = selection->add_conditions(); - expr->set_tp(tipb::ExprType::ScalarFunc); - expr->set_sig(tipb::ScalarFuncSig::InInt); - col = expr->add_children(); - col->set_tp(tipb::ExprType::ColumnRef); - ss.str(""); - DB::EncodeNumber(1, ss); - col->set_val(ss.str()); - type = col->mutable_field_type(); - type->set_tp(8); - type->set_flag(0); - value = expr->add_children(); - value->set_tp(tipb::ExprType::Int64); - ss.str(""); - DB::EncodeNumber(10, ss); - value->set_val(std::string(ss.str())); - type = value->mutable_field_type(); - type->set_tp(8); - type->set_flag(1); - type = expr->mutable_field_type(); - type->set_tp(1); - type->set_flag(1 << 5); - value = expr->add_children(); - value->set_tp(tipb::ExprType::Int64); - ss.str(""); - DB::EncodeNumber(5, ss); - value->set_val(std::string(ss.str())); - type = value->mutable_field_type(); - type->set_tp(8); - type->set_flag(1); - type = expr->mutable_field_type(); - type->set_tp(1); - type->set_flag(1 << 5); - value = expr->add_children(); - value->set_tp(tipb::ExprType::Int64); - ss.str(""); - DB::EncodeNumber(11, ss); - value->set_val(std::string(ss.str())); - type = value->mutable_field_type(); - type->set_tp(8); - type->set_flag(1); - type = expr->mutable_field_type(); - type->set_tp(1); - type->set_flag(1 << 5); - - // selection i is null - /* - selection->clear_conditions(); - expr = selection->add_conditions(); - expr->set_tp(tipb::ExprType::ScalarFunc); - expr->set_sig(tipb::ScalarFuncSig::IntIsNull); - col = expr->add_children(); - col->set_tp(tipb::ExprType::ColumnRef); - ss.str(""); - DB::EncodeNumber(1, ss); - col->set_val(ss.str()); - */ -} - -void appendAgg(tipb::DAGRequest & dag_request, size_t & result_field_num) -{ - // agg: count(s) group by i; - auto * executor = dag_request.add_executors(); - executor->set_tp(tipb::ExecType::TypeAggregation); - auto agg = executor->mutable_aggregation(); - auto agg_func = agg->add_agg_func(); - agg_func->set_tp(tipb::ExprType::Count); - auto child = agg_func->add_children(); - child->set_tp(tipb::ExprType::ColumnRef); - std::stringstream ss; - DB::EncodeNumber(0, ss); - child->set_val(ss.str()); - auto f_type = agg_func->mutable_field_type(); - f_type->set_tp(3); - f_type->set_flag(33); - auto group_col = agg->add_group_by(); - group_col->set_tp(tipb::ExprType::ColumnRef); - ss.str(""); - DB::EncodeNumber(1, ss); - group_col->set_val(ss.str()); - f_type = group_col->mutable_field_type(); - f_type->set_tp(8); - f_type->set_flag(1); - result_field_num = 2; -} - -void appendTopN(tipb::DAGRequest & dag_request) -{ - auto * executor = dag_request.add_executors(); - executor->set_tp(tipb::ExecType::TypeTopN); - tipb::TopN * topN = executor->mutable_topn(); - topN->set_limit(3); - tipb::ByItem * byItem = topN->add_order_by(); - byItem->set_desc(false); - tipb::Expr * expr1 = byItem->mutable_expr(); - expr1->set_tp(tipb::ExprType::ColumnRef); - std::stringstream ss; - DB::EncodeNumber(1, ss); - expr1->set_val(ss.str()); - auto * type = expr1->mutable_field_type(); - type->set_tp(8); - type->set_tp(0); -} - -void appendLimit(tipb::DAGRequest & dag_request) -{ - auto * executor = dag_request.add_executors(); - executor->set_tp(tipb::ExecType::TypeLimit); - tipb::Limit * limit = executor->mutable_limit(); - limit->set_limit(5); -} - -grpc::Status rpcTest() -{ - ChannelPtr cp = grpc::CreateChannel("localhost:9093", grpc::InsecureChannelCredentials()); - ClientPtr clientPtr = std::make_shared(cp); - size_t result_field_num = 0; - bool has_selection = true; - bool has_agg = false; - bool has_topN = true; - bool has_limit = false; - // construct a dag request - tipb::DAGRequest dagRequest; - dagRequest.set_start_ts(18446744073709551615uL); - - appendTS(dagRequest, result_field_num); - if (has_selection) - appendSelection(dagRequest); - if (has_agg) - appendAgg(dagRequest, result_field_num); - if (has_topN) - appendTopN(dagRequest); - if (has_limit) - appendLimit(dagRequest); - - // construct a coprocessor request - coprocessor::Request request; - //todo add context info - kvrpcpb::Context * ctx = request.mutable_context(); - ctx->set_region_id(2); - auto region_epoch = ctx->mutable_region_epoch(); - region_epoch->set_version(21); - region_epoch->set_conf_ver(2); - request.set_tp(DAGREQUEST); - request.set_data(dagRequest.SerializeAsString()); - //request.add_ranges(); - return clientPtr->coprocessor(&request, result_field_num); -} - -void codecTest() -{ - Int64 i = 123; - std::stringstream ss; - DB::EncodeNumber(i, ss); - std::string val = ss.str(); - std::stringstream decode_ss; - size_t cursor = 0; - DB::Field f = DB::DecodeDatum(cursor, val); - Int64 r = f.get(); - r++; -} - -int main() -{ - // std::cout << "Before rpcTest"<< std::endl; - grpc::Status ret = rpcTest(); - // codecTest(); - // std::cout << "End rpcTest " << std::endl; - // std::cout << "The ret is " << ret.error_code() << " " << ret.error_details() - // << " " << ret.error_message() << std::endl; - return 0; -} diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index 614f3df2703..fb83777423d 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -97,6 +97,16 @@ enum TP M(PartKey, (1 << 14)) \ M(Num, (1 << 15)) +enum ColumnFlag +{ +#ifdef M +#error "Please undefine macro M first." +#endif +#define M(cf, v) ColumnFlag##cf = v, + COLUMN_FLAGS(M) +#undef M +}; + // Codec flags. // In format: TiDB codec flag, int value. #ifdef M diff --git a/tests/mutable-test/txn_dag/aggregation.test b/tests/mutable-test/txn_dag/aggregation.test new file mode 100644 index 00000000000..0f8ec4c30e3 --- /dev/null +++ b/tests/mutable-test/txn_dag/aggregation.test @@ -0,0 +1,32 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1', 666) +=> DBGInvoke __raft_insert_row(default, test, 4, 51, 'test2', 666) +=> DBGInvoke __raft_insert_row(default, test, 4, 52, 'test3', 777) + +# DAG read by not specifying region id, group by. +=> DBGInvoke dag('select count(col_1) from default.test group by col_2') +┌─count(col_1)─┬─col_2─┐ +│ 2 │ 666 │ +│ 1 │ 777 │ +└──────────────┴───────┘ + +# DAG read by explicitly specifying region id, where + group by. +=> DBGInvoke dag('select count(col_1) from default.test where col_2 = 666 group by col_2', 4) +┌─count(col_1)─┬─col_2─┐ +│ 2 │ 666 │ +└──────────────┴───────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test diff --git a/tests/mutable-test/txn_dag/filter.test b/tests/mutable-test/txn_dag/filter.test new file mode 100644 index 00000000000..9045b9da1b4 --- /dev/null +++ b/tests/mutable-test/txn_dag/filter.test @@ -0,0 +1,37 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1', 666) +=> DBGInvoke __raft_insert_row(default, test, 4, 51, 'test2', 777) + +# DAG read by not specifying region id, where col_1 = 666. +=> DBGInvoke dag('select * from default.test where col_2 = 666') +┌─col_1─┬─col_2─┐ +│ test1 │ 666 │ +└───────┴───────┘ + +# DAG read by explicitly specifying region id, where col_2 = 'test2'. +=> DBGInvoke dag('select col_2 from default.test where col_1 = \'test2\'', 4) +┌─col_2─┐ +│ 777 │ +└───────┘ + +# Mock DAG read, where or. +=> DBGInvoke mock_dag('select col_2, col_1, col_2 from default.test where col_1 = \'test2\' or col_2 = 666', 4) +┌─col_2─┬─col_1─┬─col_2─┐ +│ 666 │ test1 │ 666 │ +│ 777 │ test2 │ 777 │ +└───────┴───────┴───────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test diff --git a/tests/mutable-test/txn_dag/limit.test b/tests/mutable-test/txn_dag/limit.test new file mode 100644 index 00000000000..ee8d97f75a7 --- /dev/null +++ b/tests/mutable-test/txn_dag/limit.test @@ -0,0 +1,31 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1', 666) +=> DBGInvoke __raft_insert_row(default, test, 4, 51, 'test1', 666) + +# DAG read by not specifying region id, order by col_2 limit 1. +=> DBGInvoke dag('select * from default.test') +┌─col_1─┬─col_2─┐ +│ test1 │ 666 │ +│ test1 │ 666 │ +└───────┴───────┘ + +# Mock DAG read, where + topn. +=> DBGInvoke mock_dag('select col_2, col_1, col_2 from default.test where col_2 = 666 limit 1', 4) +┌─col_2─┬─col_1─┬─col_2─┐ +│ 666 │ test1 │ 666 │ +└───────┴───────┴───────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test diff --git a/tests/mutable-test/txn_dag/project.test b/tests/mutable-test/txn_dag/project.test new file mode 100644 index 00000000000..8b29b4a7a08 --- /dev/null +++ b/tests/mutable-test/txn_dag/project.test @@ -0,0 +1,41 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1', 666) + +# DAG read by not specifying region id, select *. +=> DBGInvoke dag('select * from default.test') " --dag_planner="optree +┌─col_1─┬─col_2─┐ +│ test1 │ 666 │ +└───────┴───────┘ + +# DAG read by not specifying region id, select col_1. +=> DBGInvoke dag('select col_1 from default.test') " --dag_planner="optree +┌─col_1─┐ +│ test1 │ +└───────┘ + +# DAG read by explicitly specifying region id, select col_2. +=> DBGInvoke dag('select col_2 from default.test', 4) " --dag_planner="optree +┌─col_2─┐ +│ 666 │ +└───────┘ + +# Mock DAG read, select col_2, col_1, col_2. +=> DBGInvoke mock_dag('select col_2, col_1, col_2 from default.test', 4) " --dag_planner="optree +┌─col_2─┬─col_1─┬─col_2─┐ +│ 666 │ test1 │ 666 │ +└───────┴───────┴───────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test diff --git a/tests/mutable-test/txn_dag/table_scan.test b/tests/mutable-test/txn_dag/table_scan.test index 28d6599f6de..953af0cef9d 100644 --- a/tests/mutable-test/txn_dag/table_scan.test +++ b/tests/mutable-test/txn_dag/table_scan.test @@ -6,7 +6,7 @@ => DBGInvoke __set_flush_threshold(1000000, 1000000) -# Data +# Data. => DBGInvoke __mock_tidb_table(default, test, 'col_1 String') => DBGInvoke __refresh_schemas() => DBGInvoke __put_region(4, 0, 100, default, test) diff --git a/tests/mutable-test/txn_dag/topn.test b/tests/mutable-test/txn_dag/topn.test new file mode 100644 index 00000000000..1708402ca40 --- /dev/null +++ b/tests/mutable-test/txn_dag/topn.test @@ -0,0 +1,30 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1', 666) +=> DBGInvoke __raft_insert_row(default, test, 4, 51, 'test2', 777) + +# DAG read by not specifying region id, order by col_2 limit 1. +=> DBGInvoke dag('select * from default.test order by col_2 limit 1') +┌─col_1─┬─col_2─┐ +│ test1 │ 666 │ +└───────┴───────┘ + +# Mock DAG read, where + topn. +=> DBGInvoke mock_dag('select col_2, col_1, col_2 from default.test where col_1 = \'test2\' or col_2 = 666 order by col_1 desc limit 1', 4) +┌─col_2─┬─col_1─┬─col_2─┐ +│ 777 │ test2 │ 777 │ +└───────┴───────┴───────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test From cbcfdb0ec57c1c205aa83b02bec0506024de16eb Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 16 Aug 2019 15:07:03 +0800 Subject: [PATCH 40/79] filter column must be uint8 in tiflash (#180) * add all scalar function sig in scalarFunMap * fix literal expr decode * enable ltrim && rtrim * code refine * use throw instead of rethrow in DAGDriver.cpp * 1. fix decode UInt literal error, 2. support mysqlDecimal type * format code * filter column must be uint8 in tiflash * address comments * address comments * address comments * remove useless include --- .../Coprocessor/DAGExpressionAnalyzer.cpp | 27 ++++++++++++++++++- dbms/src/Flash/Coprocessor/DAGQuerySource.cpp | 2 ++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 45b05b7dc97..9386f2af560 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include #include #include @@ -103,6 +105,12 @@ void DAGExpressionAnalyzer::appendAggregation( after_agg = true; } +bool isUInt8Type(const DataTypePtr & type) +{ + auto non_nullable_type = type->isNullable() ? std::dynamic_pointer_cast(type)->getNestedType() : type; + return std::dynamic_pointer_cast(non_nullable_type) != nullptr; +} + void DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name) { if (sel.conditions_size() == 0) @@ -124,7 +132,24 @@ void DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const ti const tipb::Expr & filter = sel.conditions_size() > 1 ? final_condition : sel.conditions(0); initChain(chain, getCurrentInputColumns()); - filter_column_name = getActions(filter, chain.steps.back().actions); + ExpressionActionsChain::Step & last_step = chain.steps.back(); + filter_column_name = getActions(filter, last_step.actions); + auto & filter_column_type = chain.steps.back().actions->getSampleBlock().getByName(filter_column_name).type; + if (!isUInt8Type(filter_column_type)) + { + // find the original unit8 column + auto & last_actions = last_step.actions->getActions(); + for (auto it = last_actions.rbegin(); it != last_actions.rend(); ++it) + { + if (it->type == ExpressionAction::Type::APPLY_FUNCTION && it->result_name == filter_column_name + && it->function->getName() == "CAST") + { + // for cast function, the casted column is the first argument + filter_column_name = it->argument_names[0]; + break; + } + } + } chain.steps.back().required_output.push_back(filter_column_name); } diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 3b2d3f4b8c3..8d67c97c348 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -94,6 +94,8 @@ bool fillExecutorOutputFieldTypes(const tipb::Executor & executor, std::vector Date: Fri, 16 Aug 2019 16:37:18 +0800 Subject: [PATCH 41/79] 1. fix encode null error, 2. fix empty field type generated by TiFlash (#182) * add all scalar function sig in scalarFunMap * fix literal expr decode * enable ltrim && rtrim * code refine * use throw instead of rethrow in DAGDriver.cpp * 1. fix decode UInt literal error, 2. support mysqlDecimal type * format code * filter column must be uint8 in tiflash * address comments * address comments * address comments * remove useless include * 1. fix encode null error, 2. fix empty field type generated by TiFlash --- .../Coprocessor/DAGExpressionAnalyzer.cpp | 60 +++++++++---------- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 1 + dbms/src/Storages/Transaction/Codec.cpp | 5 ++ 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 9386f2af560..37e259ed21e 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -23,11 +23,6 @@ extern const int COP_BAD_DAG_REQUEST; extern const int UNSUPPORTED_METHOD; } // namespace ErrorCodes -static String genCastString(const String & org_name, const String & target_type_name) -{ - return "cast(" + org_name + ", " + target_type_name + ") "; -} - static String genFuncString(const String & func_name, const Names & argument_names) { std::stringstream ss; @@ -111,29 +106,39 @@ bool isUInt8Type(const DataTypePtr & type) return std::dynamic_pointer_cast(non_nullable_type) != nullptr; } +String DAGExpressionAnalyzer::applyFunction(const String & func_name, Names & arg_names, ExpressionActionsPtr & actions) +{ + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); + String result_name = genFuncString(func_name, arg_names); + const ExpressionAction & apply_function = ExpressionAction::applyFunction(function_builder, arg_names, result_name); + actions->add(apply_function); + return result_name; +} + void DAGExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name) { if (sel.conditions_size() == 0) { throw Exception("Selection executor without condition exprs", ErrorCodes::COP_BAD_DAG_REQUEST); } - tipb::Expr final_condition; - if (sel.conditions_size() > 1) - { - final_condition.set_tp(tipb::ExprType::ScalarFunc); - final_condition.set_sig(tipb::ScalarFuncSig::LogicalAnd); - - for (auto & condition : sel.conditions()) - { - auto c = final_condition.add_children(); - c->ParseFromString(condition.SerializeAsString()); - } - } - const tipb::Expr & filter = sel.conditions_size() > 1 ? final_condition : sel.conditions(0); initChain(chain, getCurrentInputColumns()); ExpressionActionsChain::Step & last_step = chain.steps.back(); - filter_column_name = getActions(filter, last_step.actions); + Names arg_names; + for (auto & condition : sel.conditions()) + { + arg_names.push_back(getActions(condition, last_step.actions)); + } + if (arg_names.size() == 1) + { + filter_column_name = arg_names[0]; + } + else + { + // connect all the conditions by logical and + filter_column_name = applyFunction("and", arg_names, last_step.actions); + } + auto & filter_column_type = chain.steps.back().actions->getSampleBlock().getByName(filter_column_name).type; if (!isUInt8Type(filter_column_type)) { @@ -257,17 +262,12 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres auto type_field_type = type_expr.field_type(); type_field_type.set_tp(0xfe); type_field_type.set_flag(1); - String name = getActions(type_expr, actions); - String cast_name = "CAST"; - const FunctionBuilderPtr & cast_func_builder = FunctionFactory::instance().get(cast_name, context); - String cast_expr_name = genCastString(expr_name, getName(type_expr, getCurrentInputColumns())); + getActions(type_expr, actions); Names cast_argument_names; cast_argument_names.push_back(expr_name); cast_argument_names.push_back(getName(type_expr, getCurrentInputColumns())); - const ExpressionAction & apply_cast_function - = ExpressionAction::applyFunction(cast_func_builder, cast_argument_names, cast_expr_name); - actions->add(apply_cast_function); + String cast_expr_name = applyFunction("CAST", cast_argument_names, actions); return cast_expr_name; } else @@ -341,7 +341,6 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi throw Exception("agg function is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); } const String & func_name = getFunctionName(expr); - const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); Names argument_names; DataTypes argument_types; @@ -372,16 +371,13 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi } } - // re-construct expr_name, because expr_name generated previously is based on expr tree, + // need to re-construct expr_name, because expr_name generated previously is based on expr tree, // but for function call, it's argument name may be changed as an implicit cast func maybe // inserted(refer to the logic below), so we need to update the expr_name // for example, for a expr and(arg1, arg2), the expr_name is and(arg1_name,arg2_name), but // if the arg1 need to be casted to the type passed by dag request, then the expr_name // should be updated to and(casted_arg1_name, arg2_name) - expr_name = genFuncString(func_name, argument_names); - - const ExpressionAction & apply_function = ExpressionAction::applyFunction(function_builder, argument_names, expr_name); - actions->add(apply_function); + expr_name = applyFunction(func_name, argument_names, actions); // add cast if needed expr_name = appendCastIfNeeded(expr, actions, expr_name); return expr_name; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 959729886c7..8931f2b42da 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -53,6 +53,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable String getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions); const NamesAndTypesList & getCurrentInputColumns(); void makeExplicitSet(const tipb::Expr & expr, const Block & sample_block, bool create_ordered_set, const String & left_arg_name); + String applyFunction(const String & func_name, Names & arg_names, ExpressionActionsPtr & actions); }; } // namespace DB diff --git a/dbms/src/Storages/Transaction/Codec.cpp b/dbms/src/Storages/Transaction/Codec.cpp index 0f8278c47cc..ab7ecd96c0b 100644 --- a/dbms/src/Storages/Transaction/Codec.cpp +++ b/dbms/src/Storages/Transaction/Codec.cpp @@ -347,6 +347,11 @@ inline T getFieldValue(const Field & field) void EncodeDatum(const Field & field, TiDB::CodecFlag flag, std::stringstream & ss) { + if (field.isNull()) + { + EncodeNumber(UInt8(TiDB::CodecFlagNil), ss); + return; + } EncodeNumber(UInt8(flag), ss); switch (flag) { From 5853b917e479091609aade54ee476324c78b0692 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 19 Aug 2019 13:45:11 +0800 Subject: [PATCH 42/79] check validation of dag exprs field type (#183) * check validation of dag exprs field type * format code * address comments --- dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp | 4 ++-- dbms/src/Flash/Coprocessor/DAGQuerySource.cpp | 8 ++++---- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 10 ++++++++++ dbms/src/Flash/Coprocessor/DAGUtils.h | 1 + 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 37e259ed21e..8aea31eb98f 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -245,7 +245,7 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres { throw Exception("Expression without field type", ErrorCodes::COP_BAD_DAG_REQUEST); } - if (expr.has_field_type() && isFunctionExpr(expr)) + if (exprHasValidFieldType(expr) && isFunctionExpr(expr)) { DataTypePtr expected_type = getDataTypeByFieldType(expr.field_type()); DataTypePtr actual_type = actions->getSampleBlock().getByName(expr_name).type; @@ -314,7 +314,7 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi if (isLiteralExpr(expr)) { Field value = decodeLiteral(expr); - DataTypePtr type = expr.has_field_type() ? getDataTypeByFieldType(expr.field_type()) : applyVisitor(FieldToDataType(), value); + DataTypePtr type = exprHasValidFieldType(expr) ? getDataTypeByFieldType(expr.field_type()) : applyVisitor(FieldToDataType(), value); ColumnWithTypeAndName column; column.column = type->createColumnConst(1, convertFieldToType(value, *type)); diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 8d67c97c348..2a1fa961f56 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -103,17 +103,17 @@ bool fillExecutorOutputFieldTypes(const tipb::Executor & executor, std::vector #include #include +#include #include @@ -231,6 +232,15 @@ ColumnID getColumnID(const tipb::Expr & expr) bool isInOrGlobalInOperator(const String & name) { return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn"; } +// for some historical or unknown reasons, TiDB might set a invalid +// field type. This function checks if the expr has a valid field type +// so far the known invalid field types are: +// 1. decimal type with scale -1 +bool exprHasValidFieldType(const tipb::Expr & expr) +{ + return expr.has_field_type() && !(expr.field_type().tp() == TiDB::TP::TypeNewDecimal && expr.field_type().decimal() == -1); +} + std::unordered_map agg_func_map({ {tipb::ExprType::Count, "count"}, {tipb::ExprType::Sum, "sum"}, {tipb::ExprType::Min, "min"}, {tipb::ExprType::Max, "max"}, {tipb::ExprType::First, "any"}, diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index 410c447d2fd..2d05f4b5a29 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -26,6 +26,7 @@ String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_ const String & getTypeName(const tipb::Expr & expr); String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col, bool for_parser = true); bool isInOrGlobalInOperator(const String & name); +bool exprHasValidFieldType(const tipb::Expr & expr); extern std::unordered_map agg_func_map; extern std::unordered_map scalar_func_map; From 5de0ec66ff6f908a5f45f75051e14dc147da2af5 Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 20 Aug 2019 17:09:14 +0800 Subject: [PATCH 43/79] add more coprocessor mock tests (#185) * check validation of dag exprs field type * format code * address comments * add more filter test * add data type tests * remove useless comment * disable decimal test --- dbms/src/Debug/MockTiDB.cpp | 9 ++ dbms/src/Debug/dbgFuncCoprocessor.cpp | 91 ++++++++++++++----- .../txn_dag/data_type_number.test | 91 +++++++++++++++++++ .../txn_dag/data_type_others.test | 41 +++++++++ .../mutable-test/txn_dag/data_type_time.test | 24 +++++ tests/mutable-test/txn_dag/filter.test | 6 ++ 6 files changed, 240 insertions(+), 22 deletions(-) create mode 100644 tests/mutable-test/txn_dag/data_type_number.test create mode 100644 tests/mutable-test/txn_dag/data_type_others.test create mode 100644 tests/mutable-test/txn_dag/data_type_time.test diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 5cab3649cd2..4a175676b00 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -138,6 +138,15 @@ ColumnInfo getColumnInfoFromColumn(const NameAndTypePair & column, ColumnID id) column_info.flen = decimal_type->getPrec(); column_info.decimal = decimal_type->getScale(); } + if (checkDataType(nested_type)) + { + auto enum16_type = checkAndGetDataType(nested_type); + column_info.tp = TiDB::TypeEnum; + for (auto & element : enum16_type->getValues()) + { + column_info.elems.emplace_back(element.first, element.second); + } + } #ifdef M #error "Please undefine macro M first." diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index dbc3c9986f3..83828238803 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -29,8 +29,8 @@ extern const int BAD_ARGUMENTS; extern const int LOGICA_ERROR; } // namespace ErrorCodes -using DAGField = std::pair; -using DAGSchema = std::vector; +using DAGColumnInfo = std::pair; +using DAGSchema = std::vector; using SchemaFetcher = std::function; std::tuple compileQuery( Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts); @@ -49,7 +49,8 @@ BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) region_id = safeGet(typeid_cast(*args[1]).value); Timestamp start_ts = context.getTMTContext().getPDClient()->getTS(); - auto [table_id, schema, dag_request] = compileQuery(context, query, + auto [table_id, schema, dag_request] = compileQuery( + context, query, [&](const String & database_name, const String & table_name) { auto storage = context.getTable(database_name, table_name); auto mmt = std::dynamic_pointer_cast(storage); @@ -91,7 +92,8 @@ BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args) if (start_ts == 0) start_ts = context.getTMTContext().getPDClient()->getTS(); - auto [table_id, schema, dag_request] = compileQuery(context, query, + auto [table_id, schema, dag_request] = compileQuery( + context, query, [&](const String & database_name, const String & table_name) { return MockTiDB::instance().getTableByName(database_name, table_name)->table_info; }, @@ -111,6 +113,26 @@ struct ExecutorCtx std::unordered_map col_ref_map; }; +tipb::FieldType columnInfoToFieldType(const ColumnInfo & ci) +{ + tipb::FieldType ret; + ret.set_tp(ci.tp); + ret.set_flag(ci.flag); + ret.set_flen(ci.flen); + ret.set_decimal(ci.decimal); + return ret; +} + +ColumnInfo fieldTypeToColumnInfo(const tipb::FieldType & field_type) +{ + ColumnInfo ret; + ret.tp = static_cast(field_type.tp()); + ret.flag = field_type.flag(); + ret.flen = field_type.flen(); + ret.decimal = field_type.decimal(); + return ret; +} + void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::unordered_set & referred_columns, std::unordered_map & col_ref_map) { @@ -120,7 +142,7 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un if (ft == input.end()) throw DB::Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); expr->set_tp(tipb::ColumnRef); - *(expr->mutable_field_type()) = (*ft).second; + *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); referred_columns.emplace((*ft).first); col_ref_map.emplace((*ft).first, expr); @@ -208,6 +230,24 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un } } +void compileFilter(const DAGSchema & input, ASTPtr ast, tipb::Selection * filter, std::unordered_set & referred_columns, + std::unordered_map & col_ref_map) +{ + if (auto * func = typeid_cast(ast.get())) + { + if (func->name == "and") + { + for (auto & child : func->arguments->children) + { + compileFilter(input, child, filter, referred_columns, col_ref_map); + } + return; + } + } + tipb::Expr * cond = filter->add_conditions(); + compileExpr(input, ast, cond, referred_columns, col_ref_map); +} + std::tuple compileQuery( Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts) { @@ -257,12 +297,13 @@ std::tuple compileQuery( DAGSchema ts_output; for (const auto & column_info : table_info.columns) { - tipb::FieldType field_type; - field_type.set_tp(column_info.tp); - field_type.set_flag(column_info.flag); - field_type.set_flen(column_info.flen); - field_type.set_decimal(column_info.decimal); - ts_output.emplace_back(std::make_pair(column_info.name, std::move(field_type))); + ColumnInfo ci; + ci.tp = column_info.tp; + ci.flag = column_info.flag; + ci.flen = column_info.flen; + ci.decimal = column_info.flen; + ci.elems = column_info.elems; + ts_output.emplace_back(std::make_pair(column_info.name, std::move(ci))); } executor_ctx_map.emplace(ts_exec, ExecutorCtx{nullptr, std::move(ts_output), std::unordered_map{}}); last_executor = ts_exec; @@ -274,9 +315,8 @@ std::tuple compileQuery( tipb::Executor * filter_exec = dag_request.add_executors(); filter_exec->set_tp(tipb::ExecType::TypeSelection); tipb::Selection * filter = filter_exec->mutable_selection(); - tipb::Expr * cond = filter->add_conditions(); std::unordered_map col_ref_map; - compileExpr(executor_ctx_map[last_executor].output, ast_query.where_expression, cond, referred_columns, col_ref_map); + compileFilter(executor_ctx_map[last_executor].output, ast_query.where_expression, filter, referred_columns, col_ref_map); executor_ctx_map.emplace(filter_exec, ExecutorCtx{last_executor, executor_ctx_map[last_executor].output, std::move(col_ref_map)}); last_executor = filter_exec; } @@ -323,14 +363,21 @@ std::tuple compileQuery( [&](const auto & field) { return referred_columns.count(field.first) == 0; }), executor_ctx.output.end()); - for (const auto & field : executor_ctx.output) + for (const auto & info : executor_ctx.output) { tipb::ColumnInfo * ci = ts->add_columns(); - ci->set_column_id(table_info.getColumnID(field.first)); - ci->set_tp(field.second.tp()); - ci->set_flag(field.second.flag()); - ci->set_columnlen(field.second.flen()); - ci->set_decimal(field.second.decimal()); + ci->set_column_id(table_info.getColumnID(info.first)); + ci->set_tp(info.second.tp); + ci->set_flag(info.second.flag); + ci->set_columnlen(info.second.flen); + ci->set_decimal(info.second.decimal); + if (info.second.elems.size() != 0) + { + for (auto & pair : info.second.elems) + { + ci->add_elems(pair.first); + } + } } return; @@ -399,7 +446,7 @@ std::tuple compileQuery( throw DB::Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); } - schema.emplace_back(std::make_pair(func->getColumnName(), agg_func->field_type())); + schema.emplace_back(std::make_pair(func->getColumnName(), fieldTypeToColumnInfo(agg_func->field_type()))); } if (has_gby) @@ -408,7 +455,7 @@ std::tuple compileQuery( { tipb::Expr * gby = agg->add_group_by(); compileExpr(executor_ctx_map[last_executor].output, child, gby, referred_columns, col_ref_map); - schema.emplace_back(std::make_pair(child->getColumnName(), gby->field_type())); + schema.emplace_back(std::make_pair(child->getColumnName(), fieldTypeToColumnInfo(gby->field_type()))); } } @@ -501,7 +548,7 @@ BlockInputStreamPtr outputDAGResponse(Context &, const DAGSchema & schema, const for (auto & field : schema) { const auto & name = field.first; - auto data_type = getDataTypeByFieldType(field.second); + auto data_type = getDataTypeByColumnInfo(field.second); ColumnWithTypeAndName col(data_type, name); col.column->assumeMutable()->reserve(rows.size()); columns.emplace_back(std::move(col)); diff --git a/tests/mutable-test/txn_dag/data_type_number.test b/tests/mutable-test/txn_dag/data_type_number.test new file mode 100644 index 00000000000..95ec49b10f6 --- /dev/null +++ b/tests/mutable-test/txn_dag/data_type_number.test @@ -0,0 +1,91 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +#=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Int8, col_2 UInt8, col_3 Int16, col_4 UInt16, col_5 Int32, col_6 UInt32, col_7 Int64, col_8 UInt64, col_9 Float32, col_10 Float64, col_11 Decimal(10,2)') +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Int8, col_2 UInt8, col_3 Int16, col_4 UInt16, col_5 Int32, col_6 UInt32, col_7 Int64, col_8 UInt64, col_9 Float32, col_10 Float64') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +#=> DBGInvoke __raft_insert_row(default, test, 4, 50, -128, 255, -32768, 65535, -2147483648, 4294967295, -9223372036854775808, 18446744073709551615, 12345.6789, 1234567.890123, 666.88) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, -128, 255, -32768, 65535, -2147483648, 4294967295, -9223372036854775808, 18446744073709551615, 12345.6789, 1234567.890123) + +# DAG read full table scan +=> DBGInvoke dag('select * from default.test') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by Int8 column +=> DBGInvoke dag('select * from default.test where col_1 = -128') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by UInt8 column +=> DBGInvoke dag('select * from default.test where col_2 = 255') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by Int16 column +=> DBGInvoke dag('select * from default.test where col_3 = -32768') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by UInt16 column +=> DBGInvoke dag('select * from default.test where col_4 = 65535') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by Int32 column +=> DBGInvoke dag('select * from default.test where col_5 = -2147483648') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by UInt32 column +=> DBGInvoke dag('select * from default.test where col_6 = 4294967295') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by Int64 column +=> DBGInvoke dag('select * from default.test where col_7 = -9223372036854775808') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by UInt64 column +=> DBGInvoke dag('select * from default.test where col_8 = 18446744073709551615') " --dag_planner="optree +┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by Float32 column +#=> DBGInvoke dag('select * from default.test where col_9 = 12345.679') " --dag_planner="optree +#┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +#│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +#└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by Float64 column +#=> DBGInvoke dag('select * from default.test where col_10 = 1234567.890123') " --dag_planner="optree +#┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┐ +#│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ +#└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┘ + +# DAG read filter by Decimal column +#=> DBGInvoke dag('select * from default.test where col_11 = 666.88') " --dag_planner="optree +#┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┬─col_11─┐ +#│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ 666.88 │ +#└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┴────────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test diff --git a/tests/mutable-test/txn_dag/data_type_others.test b/tests/mutable-test/txn_dag/data_type_others.test new file mode 100644 index 00000000000..3194f8e1ba9 --- /dev/null +++ b/tests/mutable-test/txn_dag/data_type_others.test @@ -0,0 +1,41 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Enum16(\'male\' = 1, \'female\' = 2, \'both\' = 3, \'unknown\' = 4)') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'data type test', 2) + +# DAG read full table scan +=> DBGInvoke dag('select * from default.test') " --dag_planner="optree +┌─col_1──────────┬─col_2──┐ +│ data type test │ female │ +└────────────────┴────────┘ + +# DAG read filter string column +=> DBGInvoke dag('select * from default.test where col_1 = \'data type test\'') " --dag_planner="optree +┌─col_1──────────┬─col_2──┐ +│ data type test │ female │ +└────────────────┴────────┘ + +# DAG read filter enum column +=> DBGInvoke dag('select * from default.test where col_2 = \'female\'') " --dag_planner="optree +┌─col_1──────────┬─col_2──┐ +│ data type test │ female │ +└────────────────┴────────┘ + +# DAG read filter enum column +=> DBGInvoke dag('select * from default.test where col_2 = 2') " --dag_planner="optree +┌─col_1──────────┬─col_2──┐ +│ data type test │ female │ +└────────────────┴────────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test diff --git a/tests/mutable-test/txn_dag/data_type_time.test b/tests/mutable-test/txn_dag/data_type_time.test new file mode 100644 index 00000000000..e86de40cb4a --- /dev/null +++ b/tests/mutable-test/txn_dag/data_type_time.test @@ -0,0 +1,24 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Date, col_2 DateTime') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +# not supported +#=> DBGInvoke __raft_insert_row(default, test, 4, 50, '2019-06-10', '2019-06-10 09:00:00') + +# DAG read full table scan +#=> DBGInvoke dag('select * from default.test') " --dag_planner="optree +#┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┬─col_11─┐ +#│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ 666.88 │ +#└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┴────────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test diff --git a/tests/mutable-test/txn_dag/filter.test b/tests/mutable-test/txn_dag/filter.test index 9045b9da1b4..529567a4969 100644 --- a/tests/mutable-test/txn_dag/filter.test +++ b/tests/mutable-test/txn_dag/filter.test @@ -32,6 +32,12 @@ │ 777 │ test2 │ 777 │ └───────┴───────┴───────┘ +# Mock DAG read, where and. +=> DBGInvoke mock_dag('select col_2, col_1, col_2 from default.test where col_1 = \'test2\' and col_2 = 777', 4) +┌─col_2─┬─col_1─┬─col_2─┐ +│ 777 │ test2 │ 777 │ +└───────┴───────┴───────┘ + # Clean up. => DBGInvoke __drop_tidb_table(default, test) => drop table if exists default.test From 61961717996f131197990ed94fa3b37d2b1f2370 Mon Sep 17 00:00:00 2001 From: xufei Date: Wed, 21 Aug 2019 15:54:05 +0800 Subject: [PATCH 44/79] add some log about implicit cast (#188) * check validation of dag exprs field type * format code * address comments * add more filter test * add data type tests * remove useless comment * disable decimal test * add some log about implicit cast * address comment --- dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp | 10 ++++++++-- dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h | 3 +++ dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 8aea31eb98f..9041060a295 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -45,10 +45,13 @@ static String genFuncString(const String & func_name, const Names & argument_nam } DAGExpressionAnalyzer::DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_) - : source_columns(source_columns_), context(context_) + : source_columns(source_columns_), + context(context_), + after_agg(false), + implicit_cast_count(0), + log(&Logger::get("DAGExpressionAnalyzer")) { settings = context.getSettings(); - after_agg = false; } void DAGExpressionAnalyzer::appendAggregation( @@ -253,6 +256,9 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres // todo ignore nullable info?? if (expected_type->getName() != actual_type->getName()) { + LOG_DEBUG( + log, __PRETTY_FUNCTION__ << " Add implicit cast: from " << actual_type->getName() << " to " << expected_type->getName()); + implicit_cast_count++; // need to add cast function // first construct the second argument tipb::Expr type_expr; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 8931f2b42da..24a4e775426 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -32,6 +32,8 @@ class DAGExpressionAnalyzer : private boost::noncopyable Settings settings; const Context & context; bool after_agg; + Int32 implicit_cast_count; + Poco::Logger * log; public: DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_); @@ -54,6 +56,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable const NamesAndTypesList & getCurrentInputColumns(); void makeExplicitSet(const tipb::Expr & expr, const Block & sample_block, bool create_ordered_set, const String & left_arg_name); String applyFunction(const String & func_name, Names & arg_names, ExpressionActionsPtr & actions); + Int32 getImplicitCastCount() { return implicit_cast_count; }; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index cbc95e795e9..ba6f9e67748 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -514,6 +514,9 @@ BlockIO InterpreterDAG::execute() BlockIO res; res.in = pipeline.firstStream(); + + LOG_DEBUG( + log, __PRETTY_FUNCTION__ << " Convert DAG request to BlockIO, adding " << analyzer->getImplicitCastCount() << " implicit cast"); return res; } } // namespace DB From 08bacd71db716ce01449e272a6e3eb523d6bfb4c Mon Sep 17 00:00:00 2001 From: ruoxi Date: Sun, 25 Aug 2019 03:01:44 +0800 Subject: [PATCH 45/79] Pass DAG tests after merging master (#199) * Enhance dbg invoke and add dag as schemaful function * Add basic sql parse to dag * Column id starts from 1 * Fix value to ref * Add basic dag test * Fix dag bugs and pass 1st mock test * Make dag go normal routine and add mock dag * Add todo * Add comment * Fix gcc compile error * Enhance dag test * Address comments * Enhance mock sql -> dag compiler and add project test * Mock sql dag compiler support more expression types and add filter test * Add topn and limit test * Add agg for sql -> dag parser and agg test * Add dag specific codec * type * Update codec accordingly * Remove cop-test * Pass tests after merging master --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 13 +- dbms/src/Debug/dbgFuncRegion.cpp | 5 +- dbms/src/Debug/dbgTools.cpp | 130 ++++++++++++- dbms/src/Debug/dbgTools.h | 2 + .../Coprocessor/DAGBlockOutputStream.cpp | 9 +- dbms/src/Flash/Coprocessor/DAGCodec.cpp | 4 +- dbms/src/Flash/Coprocessor/DAGCodec.h | 4 +- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 14 +- dbms/src/Storages/Transaction/Codec.cpp | 77 ++++---- dbms/src/Storages/Transaction/Codec.h | 3 +- dbms/src/Storages/Transaction/Datum.cpp | 11 ++ .../src/Storages/Transaction/MyTimeParser.cpp | 172 ++++++++++++++++++ dbms/src/Storages/Transaction/MyTimeParser.h | 165 +---------------- dbms/src/Storages/Transaction/TiDB.cpp | 25 +-- dbms/src/Storages/Transaction/TiDB.h | 3 +- .../Storages/Transaction/TiKVRecordFormat.h | 15 -- 16 files changed, 396 insertions(+), 256 deletions(-) create mode 100644 dbms/src/Storages/Transaction/MyTimeParser.cpp diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 83828238803..594a0c4ee1c 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -49,8 +49,7 @@ BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) region_id = safeGet(typeid_cast(*args[1]).value); Timestamp start_ts = context.getTMTContext().getPDClient()->getTS(); - auto [table_id, schema, dag_request] = compileQuery( - context, query, + auto [table_id, schema, dag_request] = compileQuery(context, query, [&](const String & database_name, const String & table_name) { auto storage = context.getTable(database_name, table_name); auto mmt = std::dynamic_pointer_cast(storage); @@ -92,8 +91,7 @@ BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args) if (start_ts == 0) start_ts = context.getTMTContext().getPDClient()->getTS(); - auto [table_id, schema, dag_request] = compileQuery( - context, query, + auto [table_id, schema, dag_request] = compileQuery(context, query, [&](const String & database_name, const String & table_name) { return MockTiDB::instance().getTableByName(database_name, table_name)->table_info; }, @@ -210,9 +208,12 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un expr->set_tp(tipb::Float64); encodeDAGFloat64(lit->value.get(), ss); break; - case Field::Types::Which::Decimal: + case Field::Types::Which::Decimal32: + case Field::Types::Which::Decimal64: + case Field::Types::Which::Decimal128: + case Field::Types::Which::Decimal256: expr->set_tp(tipb::MysqlDecimal); - encodeDAGDecimal(lit->value.get(), ss); + encodeDAGDecimal(lit->value, ss); break; case Field::Types::Which::String: expr->set_tp(tipb::String); diff --git a/dbms/src/Debug/dbgFuncRegion.cpp b/dbms/src/Debug/dbgFuncRegion.cpp index 1eefde4739a..257d7694d0a 100644 --- a/dbms/src/Debug/dbgFuncRegion.cpp +++ b/dbms/src/Debug/dbgFuncRegion.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -140,7 +139,9 @@ void dbgFuncRegionSnapshotWithData(Context & context, const ASTs & args, DBGInvo } TiKVKey key = RecordKVFormat::genKey(table_id, handle_id); - TiKVValue value = RecordKVFormat::EncodeRow(table->table_info, fields); + std::stringstream ss; + RegionBench::encodeRow(table->table_info, fields, ss); + TiKVValue value(ss.str()); UInt64 commit_ts = tso; UInt64 prewrite_ts = tso; TiKVValue commit_value; diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 5e40faabd9a..305ac3f8260 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,9 @@ extern const int LOGICAL_ERROR; namespace RegionBench { +using TiDB::ColumnInfo; +using TiDB::TableInfo; + RegionPtr createRegion(TableID table_id, RegionID region_id, const HandleID & start, const HandleID & end) { enginepb::SnapshotRequest request; @@ -121,6 +125,128 @@ void addRequestsToRaftCmd(enginepb::CommandRequest * cmd, RegionID region_id, co } } +template +T convertNumber(const Field & field) +{ + switch (field.getType()) + { + case Field::Types::Int64: + return static_cast(field.get()); + case Field::Types::UInt64: + return static_cast(field.get()); + case Field::Types::Float64: + return static_cast(field.get()); + case Field::Types::Decimal32: + return static_cast(field.get>()); + case Field::Types::Decimal64: + return static_cast(field.get>()); + case Field::Types::Decimal128: + return static_cast(field.get>()); + case Field::Types::Decimal256: + return static_cast(field.get>()); + default: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to number", ErrorCodes::LOGICAL_ERROR); + } +} + +Field convertDecimal(UInt32 scale, const Field & field) +{ + switch (field.getType()) + { + case Field::Types::Int64: + return DecimalField(ToDecimal(field.get(), scale), scale); + case Field::Types::UInt64: + return DecimalField(ToDecimal(field.get(), scale), scale); + case Field::Types::Float64: + return DecimalField(ToDecimal(field.get(), scale), scale); + case Field::Types::Decimal32: + case Field::Types::Decimal64: + case Field::Types::Decimal128: + case Field::Types::Decimal256: + return field; + default: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to number", ErrorCodes::LOGICAL_ERROR); + } +} + +Field convertEnum(const ColumnInfo & column_info, const Field & field) +{ + switch (field.getType()) + { + case Field::Types::Int64: + case Field::Types::UInt64: + return convertNumber(field); + case Field::Types::String: + return static_cast(column_info.getEnumIndex(field.get())); + default: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to Enum", ErrorCodes::LOGICAL_ERROR); + } +} + +Field convertField(const ColumnInfo & column_info, const Field & field) +{ + if (field.isNull()) + return field; + + switch (column_info.tp) + { + case TiDB::TypeTiny: + case TiDB::TypeShort: + case TiDB::TypeLong: + case TiDB::TypeLongLong: + case TiDB::TypeInt24: + case TiDB::TypeBit: + if (column_info.hasUnsignedFlag()) + return convertNumber(field); + else + return convertNumber(field); + case TiDB::TypeFloat: + case TiDB::TypeDouble: + return convertNumber(field); + case TiDB::TypeDate: + case TiDB::TypeDatetime: + case TiDB::TypeTimestamp: + return DB::parseMyDatetime(field.get()); + case TiDB::TypeVarchar: + case TiDB::TypeTinyBlob: + case TiDB::TypeMediumBlob: + case TiDB::TypeLongBlob: + case TiDB::TypeBlob: + case TiDB::TypeVarString: + case TiDB::TypeString: + return field; + case TiDB::TypeEnum: + return convertEnum(column_info, field); + case TiDB::TypeNull: + return Field(); + case TiDB::TypeDecimal: + case TiDB::TypeNewDecimal: + return convertDecimal(column_info.decimal, field); + case TiDB::TypeTime: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to Time", ErrorCodes::LOGICAL_ERROR); + case TiDB::TypeYear: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to Year", ErrorCodes::LOGICAL_ERROR); + case TiDB::TypeSet: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to Set", ErrorCodes::LOGICAL_ERROR); + default: + return Field(); + } +} + +void encodeRow(const TiDB::TableInfo & table_info, const std::vector & fields, std::stringstream & ss) +{ + if (table_info.columns.size() != fields.size()) + throw Exception("Encoding row has different sizes between columns and values", ErrorCodes::LOGICAL_ERROR); + for (size_t i = 0; i < fields.size(); i++) + { + const TiDB::ColumnInfo & column_info = table_info.columns[i]; + EncodeDatum(Field(column_info.id), TiDB::CodecFlagInt, ss); + Field field = convertField(column_info, fields[i]); + TiDB::DatumBumpy datum = TiDB::DatumBumpy(field, column_info.tp); + EncodeDatum(datum.field(), column_info.getCodecFlag(), ss); + } +} + void insert(const TiDB::TableInfo & table_info, RegionID region_id, HandleID handle_id, ASTs::const_iterator begin, ASTs::const_iterator end, Context & context, const std::optional> & tso_del) { @@ -142,7 +268,9 @@ void insert(const TiDB::TableInfo & table_info, RegionID region_id, HandleID han TableID table_id = RecordKVFormat::getTableId(region->getRange().first); TiKVKey key = RecordKVFormat::genKey(table_id, handle_id); - TiKVValue value = RecordKVFormat::EncodeRow(table_info, fields); + std::stringstream ss; + encodeRow(table_info, fields, ss); + TiKVValue value(ss.str()); UInt64 prewrite_ts = pd_client->getTS(); UInt64 commit_ts = pd_client->getTS(); diff --git a/dbms/src/Debug/dbgTools.h b/dbms/src/Debug/dbgTools.h index 5ed25a4eb02..70372a2c765 100644 --- a/dbms/src/Debug/dbgTools.h +++ b/dbms/src/Debug/dbgTools.h @@ -23,6 +23,8 @@ RegionPtr createRegion(TableID table_id, RegionID region_id, const HandleID & st Regions createRegions(TableID table_id, size_t region_num, size_t key_num_each_region, HandleID handle_begin, RegionID new_region_id_begin); +void encodeRow(const TiDB::TableInfo & table_info, const std::vector & fields, std::stringstream & ss); + void insert(const TiDB::TableInfo & table_info, RegionID region_id, HandleID handle_id, ASTs::const_iterator begin, ASTs::const_iterator end, Context & context, const std::optional> & tso_del = {}); diff --git a/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp index 0ef25b08700..b475ac3cb24 100644 --- a/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp +++ b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp @@ -2,6 +2,7 @@ #include #include +#include #include namespace DB @@ -13,6 +14,9 @@ extern const int UNSUPPORTED_PARAMETER; extern const int LOGICAL_ERROR; } // namespace ErrorCodes +using TiDB::DatumBumpy; +using TiDB::TP; + DAGBlockOutputStream::DAGBlockOutputStream(tipb::SelectResponse & dag_response_, Int64 records_per_chunk_, tipb::EncodeType encodeType_, std::vector && result_field_types_, Block header_) : dag_response(dag_response_), @@ -71,8 +75,9 @@ void DAGBlockOutputStream::write(const Block & block) } for (size_t j = 0; j < block.columns(); j++) { - auto field = (*block.getByPosition(j).column.get())[i]; - EncodeDatum(field, getCodecFlagByFieldType(result_field_types[j]), current_ss); + const auto & field = (*block.getByPosition(j).column.get())[i]; + DatumBumpy datum(field, static_cast(result_field_types[j].tp())); + EncodeDatum(datum.field(), getCodecFlagByFieldType(result_field_types[j]), current_ss); } // Encode current row records_per_chunk++; diff --git a/dbms/src/Flash/Coprocessor/DAGCodec.cpp b/dbms/src/Flash/Coprocessor/DAGCodec.cpp index 9d809cc1258..2316cdcad99 100644 --- a/dbms/src/Flash/Coprocessor/DAGCodec.cpp +++ b/dbms/src/Flash/Coprocessor/DAGCodec.cpp @@ -26,7 +26,7 @@ void encodeDAGString(const String & s, std::stringstream & ss) { ss << s; } void encodeDAGBytes(const String & bytes, std::stringstream & ss) { ss << bytes; } -void encodeDAGDecimal(const Decimal & d, std::stringstream & ss) { EncodeDecimal(d, ss); } +void encodeDAGDecimal(const Field & field, std::stringstream & ss) { EncodeDecimal(field, ss); } Int64 decodeDAGInt64(const String & s) { @@ -56,7 +56,7 @@ String decodeDAGString(const String & s) { return s; } String decodeDAGBytes(const String & s) { return s; } -Decimal decodeDAGDecimal(const String & s) +Field decodeDAGDecimal(const String & s) { size_t cursor = 0; return DecodeDecimal(cursor, s); diff --git a/dbms/src/Flash/Coprocessor/DAGCodec.h b/dbms/src/Flash/Coprocessor/DAGCodec.h index faecf74df1f..44fb9e5bc3f 100644 --- a/dbms/src/Flash/Coprocessor/DAGCodec.h +++ b/dbms/src/Flash/Coprocessor/DAGCodec.h @@ -12,7 +12,7 @@ void encodeDAGFloat32(Float32, std::stringstream &); void encodeDAGFloat64(Float64, std::stringstream &); void encodeDAGString(const String &, std::stringstream &); void encodeDAGBytes(const String &, std::stringstream &); -void encodeDAGDecimal(const Decimal &, std::stringstream &); +void encodeDAGDecimal(const Field &, std::stringstream &); Int64 decodeDAGInt64(const String &); UInt64 decodeDAGUInt64(const String &); @@ -20,6 +20,6 @@ Float32 decodeDAGFloat32(const String &); Float64 decodeDAGFloat64(const String &); String decodeDAGString(const String &); String decodeDAGBytes(const String &); -Decimal decodeDAGDecimal(const String &); +Field decodeDAGDecimal(const String &); } // namespace DB \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index be1bbb92aa9..2a334230573 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -71,7 +71,19 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col case tipb::ExprType::Bytes: return decodeDAGBytes(expr.val()); case tipb::ExprType::MysqlDecimal: - return decodeDAGDecimal(expr.val()).toString(); + { + auto field = decodeDAGDecimal(expr.val()); + if (field.getType() == Field::Types::Decimal32) + return field.get>().toString(); + else if (field.getType() == Field::Types::Decimal64) + return field.get>().toString(); + else if (field.getType() == Field::Types::Decimal128) + return field.get>().toString(); + else if (field.getType() == Field::Types::Decimal256) + return field.get>().toString(); + else + throw Exception("Not decimal literal" + expr.DebugString(), ErrorCodes::COP_BAD_DAG_REQUEST); + } case tipb::ExprType::ColumnRef: column_id = decodeDAGInt64(expr.val()); if (column_id < 0 || column_id >= (ColumnID)input_col.size()) diff --git a/dbms/src/Storages/Transaction/Codec.cpp b/dbms/src/Storages/Transaction/Codec.cpp index 7465a04b6b5..d57c5023d77 100644 --- a/dbms/src/Storages/Transaction/Codec.cpp +++ b/dbms/src/Storages/Transaction/Codec.cpp @@ -410,7 +410,7 @@ inline void writeWord(String & buf, Int32 word, int size) } template -void EncodeDecimal(const T & dec, PrecType prec, ScaleType frac, std::stringstream & ss) +void EncodeDecimalImpl(const T & dec, PrecType prec, ScaleType frac, std::stringstream & ss) { static_assert(IsDecimal); @@ -476,19 +476,31 @@ void EncodeDecimal(const T & dec, PrecType prec, ScaleType frac, std::stringstre ss.write(buf.c_str(), buf.size()); } -template -inline T getFieldValue(const Field & field) +void EncodeDecimal(const Field & field, std::stringstream & ss) { - switch (field.getType()) + if (field.getType() == Field::Types::Decimal32) { - case Field::Types::UInt64: - return static_cast(field.get()); - case Field::Types::Int64: - return static_cast(field.get()); - case Field::Types::Float64: - return static_cast(field.get()); - default: - throw Exception("Unsupport (getFieldValue): " + std::string(field.getTypeName()), ErrorCodes::LOGICAL_ERROR); + auto decimal_field = field.get>(); + return EncodeDecimalImpl(decimal_field.getValue(), decimal_field.getPrec(), decimal_field.getScale(), ss); + } + else if (field.getType() == Field::Types::Decimal64) + { + auto decimal_field = field.get>(); + return EncodeDecimalImpl(decimal_field.getValue(), decimal_field.getPrec(), decimal_field.getScale(), ss); + } + else if (field.getType() == Field::Types::Decimal128) + { + auto decimal_field = field.get>(); + return EncodeDecimalImpl(decimal_field.getValue(), decimal_field.getPrec(), decimal_field.getScale(), ss); + } + else if (field.getType() == Field::Types::Decimal256) + { + auto decimal_field = field.get>(); + return EncodeDecimalImpl(decimal_field.getValue(), decimal_field.getPrec(), decimal_field.getScale(), ss); + } + else + { + throw Exception("Not a decimal when decoding decimal", ErrorCodes::LOGICAL_ERROR); } } @@ -503,46 +515,27 @@ void EncodeDatum(const Field & field, TiDB::CodecFlag flag, std::stringstream & switch (flag) { case TiDB::CodecFlagDecimal: - if (field.getType() == Field::Types::Decimal32) - { - auto decimal_field = field.get>(); - return EncodeDecimal(decimal_field.getValue(), decimal_field.getPrec(), decimal_field.getScale(), ss); - } - else if (field.getType() == Field::Types::Decimal64) - { - auto decimal_field = field.get>(); - return EncodeDecimal(decimal_field.getValue(), decimal_field.getPrec(), decimal_field.getScale(), ss); - } - else if (field.getType() == Field::Types::Decimal128) - { - auto decimal_field = field.get>(); - return EncodeDecimal(decimal_field.getValue(), decimal_field.getPrec(), decimal_field.getScale(), ss); - } - else - { - auto decimal_field = field.get>(); - return EncodeDecimal(decimal_field.getValue(), decimal_field.getPrec(), decimal_field.getScale(), ss); - } + return EncodeDecimal(field, ss); case TiDB::CodecFlagCompactBytes: - return EncodeCompactBytes(field.get(), ss); + return EncodeCompactBytes(field.safeGet(), ss); case TiDB::CodecFlagFloat: - return EncodeFloat64(getFieldValue(field), ss); + return EncodeFloat64(field.safeGet(), ss); case TiDB::CodecFlagUInt: - return EncodeUInt(getFieldValue(field), ss); + return EncodeUInt(field.safeGet(), ss); case TiDB::CodecFlagInt: - return EncodeInt64(getFieldValue(field), ss); + return EncodeInt64(field.safeGet(), ss); case TiDB::CodecFlagVarInt: - return EncodeVarInt(getFieldValue(field), ss); + return EncodeVarInt(field.safeGet(), ss); case TiDB::CodecFlagVarUInt: - return EncodeVarUInt(getFieldValue(field), ss); + return EncodeVarUInt(field.safeGet(), ss); default: throw Exception("Not implemented codec flag: " + std::to_string(flag), ErrorCodes::LOGICAL_ERROR); } } -template void EncodeDecimal(const Decimal32 &, PrecType, ScaleType, std::stringstream & ss); -template void EncodeDecimal(const Decimal64 &, PrecType, ScaleType, std::stringstream & ss); -template void EncodeDecimal(const Decimal128 &, PrecType, ScaleType, std::stringstream & ss); -template void EncodeDecimal(const Decimal256 &, PrecType, ScaleType, std::stringstream & ss); +template void EncodeDecimalImpl(const Decimal32 &, PrecType, ScaleType, std::stringstream & ss); +template void EncodeDecimalImpl(const Decimal64 &, PrecType, ScaleType, std::stringstream & ss); +template void EncodeDecimalImpl(const Decimal128 &, PrecType, ScaleType, std::stringstream & ss); +template void EncodeDecimalImpl(const Decimal256 &, PrecType, ScaleType, std::stringstream & ss); } // namespace DB diff --git a/dbms/src/Storages/Transaction/Codec.h b/dbms/src/Storages/Transaction/Codec.h index 53265ca4d20..38998fd04a0 100644 --- a/dbms/src/Storages/Transaction/Codec.h +++ b/dbms/src/Storages/Transaction/Codec.h @@ -62,8 +62,7 @@ void EncodeVarUInt(UInt64 num, std::stringstream & ss); void EncodeVarInt(Int64 num, std::stringstream & ss); -template -void EncodeDecimal(const T & dec, PrecType prec, ScaleType frac, std::stringstream & ss); +void EncodeDecimal(const Field & field, std::stringstream & ss); void EncodeDatum(const Field & field, TiDB::CodecFlag flag, std::stringstream & ss); diff --git a/dbms/src/Storages/Transaction/Datum.cpp b/dbms/src/Storages/Transaction/Datum.cpp index 8dc4b357c67..72301b9fa9b 100644 --- a/dbms/src/Storages/Transaction/Datum.cpp +++ b/dbms/src/Storages/Transaction/Datum.cpp @@ -135,6 +135,17 @@ struct DatumOp +struct DatumOp::type> +{ + static void unflatten(const Field & orig, std::optional & copy) { copy = static_cast(orig.get()); } + + static void flatten(const Field & orig, std::optional & copy) { copy = static_cast(orig.get()); } + + static bool overflow(const Field &, const ColumnInfo &) { return false; } +}; + DatumFlat::DatumFlat(const DB::Field & field, TP tp) : DatumBase(field, tp) { switch (tp) diff --git a/dbms/src/Storages/Transaction/MyTimeParser.cpp b/dbms/src/Storages/Transaction/MyTimeParser.cpp new file mode 100644 index 00000000000..e183e587ceb --- /dev/null +++ b/dbms/src/Storages/Transaction/MyTimeParser.cpp @@ -0,0 +1,172 @@ +#include + +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +int adjustYear(int year) +{ + if (year >= 0 && year <= 69) + return 2000 + year; + if (year >= 70 && year <= 99) + return 1900 + year; + return year; +} + +void scanTimeArgs(const std::vector & seps, std::initializer_list && list) +{ + int i = 0; + for (auto * ptr : list) + { + *ptr = std::stoi(seps[i]); + i++; + } +} + +int getFracIndex(const String & format) +{ + int idx = -1; + for (int i = int(format.size()) - 1; i >= 0; i--) + { + if (std::ispunct(format[i])) + { + if (format[i] == '.') + { + idx = i; + } + break; + } + } + return idx; +} + +std::vector parseDateFormat(String format) +{ + format = Poco::trimInPlace(format); + + std::vector seps; + size_t start = 0; + for (size_t i = 0; i < format.size(); i++) + { + if (i == 0 || i + 1 == format.size()) + { + if (!std::isdigit(format[i])) + return {}; + continue; + } + + if (!std::isdigit(format[i])) + { + if (!std::isdigit(format[i - 1])) + return {}; + seps.push_back(format.substr(start, i - start)); + start = i + 1; + } + } + seps.push_back(format.substr(start)); + return seps; +} + +std::vector splitDatetime(String format) +{ + int idx = getFracIndex(format); + if (idx > 0) + { + format = format.substr(0, idx); + } + return parseDateFormat(format); +} + +Field parseMyDatetime(const String & str) +{ + Int32 year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; + + const auto & seps = splitDatetime(str); + + switch (seps.size()) + { + // No delimiter + case 1: + { + size_t l = seps[0].size(); + switch (l) + { + case 14: + // YYYYMMDDHHMMSS + { + std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + break; + } + case 12: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + year = adjustYear(year); + break; + } + case 11: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); + year = adjustYear(year); + break; + } + case 10: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); + year = adjustYear(year); + break; + } + case 9: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); + year = adjustYear(year); + break; + } + case 8: + { + std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); + break; + } + case 6: + case 5: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); + year = adjustYear(year); + break; + } + default: + { + throw Exception("Wrong datetime format"); + } + // TODO Process frac! + } + break; + } + case 3: + { + scanTimeArgs(seps, {&year, &month, &day}); + break; + } + case 6: + { + scanTimeArgs(seps, {&year, &month, &day, &hour, &minute, &second}); + break; + } + default: + { + throw Exception("Wrong datetime format"); + } + } + + UInt64 ymd = ((year * 13 + month) << 5) | day; + UInt64 hms = (hour << 12) | (minute << 6) | second; + return Field((ymd << 17 | hms) << 24); +} + +} // namespace DB diff --git a/dbms/src/Storages/Transaction/MyTimeParser.h b/dbms/src/Storages/Transaction/MyTimeParser.h index 009bba29390..d58a4db258c 100644 --- a/dbms/src/Storages/Transaction/MyTimeParser.h +++ b/dbms/src/Storages/Transaction/MyTimeParser.h @@ -2,173 +2,10 @@ #include #include -#include -#include -#include -#include - -#include -#include namespace DB { -int adjustYear(int year) -{ - if (year >= 0 && year <= 69) - return 2000 + year; - if (year >= 70 && year <= 99) - return 1900 + year; - return year; -} - -void scanTimeArgs(const std::vector & seps, std::initializer_list && list) -{ - int i = 0; - for (auto * ptr : list) - { - *ptr = std::stoi(seps[i]); - i++; - } -} - -int getFracIndex(const String & format) -{ - int idx = -1; - for (int i = int(format.size()) - 1; i >= 0; i--) - { - if (std::ispunct(format[i])) - { - if (format[i] == '.') - { - idx = i; - } - break; - } - } - return idx; -} - -std::vector parseDateFormat(String format) -{ - format = Poco::trimInPlace(format); - - std::vector seps; - size_t start = 0; - for (size_t i = 0; i < format.size(); i++) - { - if (i == 0 || i + 1 == format.size()) - { - if (!std::isdigit(format[i])) - return {}; - continue; - } - - if (!std::isdigit(format[i])) - { - if (!std::isdigit(format[i - 1])) - return {}; - seps.push_back(format.substr(start, i - start)); - start = i + 1; - } - } - seps.push_back(format.substr(start)); - return seps; -} - -std::vector splitDatetime(String format) -{ - int idx = getFracIndex(format); - if (idx > 0) - { - format = format.substr(0, idx); - } - return parseDateFormat(format); -} - -Field parseMyDatetime(const String & str) -{ - Int32 year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; - - const auto & seps = splitDatetime(str); - - switch (seps.size()) - { - // No delimiter - case 1: - { - size_t l = seps[0].size(); - switch (l) - { - case 14: - // YYYYMMDDHHMMSS - { - std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); - break; - } - case 12: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); - year = adjustYear(year); - break; - } - case 11: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); - year = adjustYear(year); - break; - } - case 10: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); - year = adjustYear(year); - break; - } - case 9: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); - year = adjustYear(year); - break; - } - case 8: - { - std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); - break; - } - case 6: - case 5: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); - year = adjustYear(year); - break; - } - default: - { - throw Exception("Wrong datetime format"); - } - // TODO Process frac! - } - break; - } - case 3: - { - scanTimeArgs(seps, {&year, &month, &day}); - break; - } - case 6: - { - scanTimeArgs(seps, {&year, &month, &day, &hour, &minute, &second}); - break; - } - default: - { - throw Exception("Wrong datetime format"); - } - } - - UInt64 ymd = ((year * 13 + month) << 5) | day; - UInt64 hms = (hour << 12) | (minute << 6) | second; - return Field((ymd << 17 | hms) << 24); -} +Field parseMyDatetime(const String & str); } // namespace DB diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index f1bd1d52d3e..bbdca6b2877 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -25,6 +25,7 @@ Field ColumnInfo::defaultValueToField() const } switch (tp) { + // TODO: Consider unsigned? // Integer Type. case TypeTiny: case TypeShort: @@ -157,8 +158,7 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (ColumnInfo): " + e.displayText(), DB::Exception(e)); } -void ColumnInfo::deserialize(Poco::JSON::Object::Ptr json) -try +void ColumnInfo::deserialize(Poco::JSON::Object::Ptr json) try { id = json->getValue("id"); name = json->getObject("name")->getValue("L"); @@ -192,8 +192,7 @@ catch (const Poco::Exception & e) PartitionDefinition::PartitionDefinition(Poco::JSON::Object::Ptr json) { deserialize(json); } -Poco::JSON::Object::Ptr PartitionDefinition::getJSONObject() const -try +Poco::JSON::Object::Ptr PartitionDefinition::getJSONObject() const try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); json->set("id", id); @@ -214,8 +213,7 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (PartitionDef): " + e.displayText(), DB::Exception(e)); } -void PartitionDefinition::deserialize(Poco::JSON::Object::Ptr json) -try +void PartitionDefinition::deserialize(Poco::JSON::Object::Ptr json) try { id = json->getValue("id"); name = json->getObject("name")->getValue("L"); @@ -230,8 +228,7 @@ catch (const Poco::Exception & e) PartitionInfo::PartitionInfo(Poco::JSON::Object::Ptr json) { deserialize(json); } -Poco::JSON::Object::Ptr PartitionInfo::getJSONObject() const -try +Poco::JSON::Object::Ptr PartitionInfo::getJSONObject() const try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); @@ -260,8 +257,7 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (PartitionInfo): " + e.displayText(), DB::Exception(e)); } -void PartitionInfo::deserialize(Poco::JSON::Object::Ptr json) -try +void PartitionInfo::deserialize(Poco::JSON::Object::Ptr json) try { type = static_cast(json->getValue("type")); expr = json->getValue("expr"); @@ -285,8 +281,7 @@ catch (const Poco::Exception & e) TableInfo::TableInfo(const String & table_info_json) { deserialize(table_info_json); } -String TableInfo::serialize(bool escaped) const -try +String TableInfo::serialize(bool escaped) const try { std::stringstream buf; @@ -344,8 +339,7 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (TableInfo): " + e.displayText(), DB::Exception(e)); } -void DBInfo::deserialize(const String & json_str) -try +void DBInfo::deserialize(const String & json_str) try { Poco::JSON::Parser parser; Poco::Dynamic::Var result = parser.parse(json_str); @@ -363,8 +357,7 @@ catch (const Poco::Exception & e) DB::Exception(e)); } -void TableInfo::deserialize(const String & json_str) -try +void TableInfo::deserialize(const String & json_str) try { if (json_str.empty()) { diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index 128f5ddd13a..efa0e8af5ed 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -179,12 +179,13 @@ struct ColumnInfo COLUMN_FLAGS(M) #undef M + DB::Field convertField(const DB::Field &) const; DB::Field defaultValueToField() const; + Int64 getEnumIndex(const String &) const; CodecFlag getCodecFlag() const; private: DB::Field getDecimalDefaultValue(const String & str) const; - Int64 getEnumIndex(const String &) const; }; enum PartitionType diff --git a/dbms/src/Storages/Transaction/TiKVRecordFormat.h b/dbms/src/Storages/Transaction/TiKVRecordFormat.h index 12b779be25e..5d27978f2f2 100644 --- a/dbms/src/Storages/Transaction/TiKVRecordFormat.h +++ b/dbms/src/Storages/Transaction/TiKVRecordFormat.h @@ -57,21 +57,6 @@ inline UInt64 decodeUInt64Desc(const UInt64 x) { return ~decodeUInt64(x); } inline Int64 decodeInt64(const UInt64 x) { return static_cast(decodeUInt64(x) ^ SIGN_MASK); } -inline TiKVValue EncodeRow(const TiDB::TableInfo & table_info, const std::vector & fields) -{ - if (table_info.columns.size() != fields.size()) - throw Exception("Encoding row has different sizes between columns and values", ErrorCodes::LOGICAL_ERROR); - std::stringstream ss; - for (size_t i = 0; i < fields.size(); i++) - { - const TiDB::ColumnInfo & column_info = table_info.columns[i]; - EncodeDatum(Field(column_info.id), TiDB::CodecFlagInt, ss); - TiDB::DatumBumpy datum = TiDB::DatumBumpy(fields[i], column_info.tp); - EncodeDatum(datum.field(), column_info.getCodecFlag(), ss); - } - return TiKVValue(ss.str()); -} - template inline T read(const char * s) { From e8b41981c3ac2c387221868ffb85cc069740df4e Mon Sep 17 00:00:00 2001 From: ruoxi Date: Mon, 26 Aug 2019 15:49:58 +0800 Subject: [PATCH 46/79] Fix date/datetime/bit encode error (#200) * Enhance dbg invoke and add dag as schemaful function * Add basic sql parse to dag * Column id starts from 1 * Fix value to ref * Add basic dag test * Fix dag bugs and pass 1st mock test * Make dag go normal routine and add mock dag * Add todo * Add comment * Fix gcc compile error * Enhance dag test * Address comments * Enhance mock sql -> dag compiler and add project test * Mock sql dag compiler support more expression types and add filter test * Add topn and limit test * Add agg for sql -> dag parser and agg test * Add dag specific codec * type * Update codec accordingly * Remove cop-test * Pass tests after merging master * Copy some changes from xufei * Enable date/datetime test * Enable date/datetime test * Refine code * Adjust date/datetime tiflash rep to UInt * Fix datetime to Int * Typo --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 41 ++-- dbms/src/Debug/dbgTools.cpp | 30 ++- .../Coprocessor/DAGExpressionAnalyzer.cpp | 6 +- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 22 ++- dbms/src/Functions/FunctionsComparison.h | 176 +++++++++++++++++- dbms/src/Storages/Transaction/Datum.cpp | 6 +- dbms/src/Storages/Transaction/TiDB.cpp | 12 +- dbms/src/Storages/Transaction/TiDB.h | 15 +- .../mutable-test/txn_dag/data_type_time.test | 18 +- 9 files changed, 271 insertions(+), 55 deletions(-) diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 594a0c4ee1c..d7e2fb5bedf 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -29,9 +30,12 @@ extern const int BAD_ARGUMENTS; extern const int LOGICA_ERROR; } // namespace ErrorCodes +using TiDB::DatumFlat; +using TiDB::TableInfo; + using DAGColumnInfo = std::pair; using DAGSchema = std::vector; -using SchemaFetcher = std::function; +using SchemaFetcher = std::function; std::tuple compileQuery( Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts); tipb::SelectResponse executeDAGRequest( @@ -138,7 +142,7 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un { auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { return field.first == id->getColumnName(); }); if (ft == input.end()) - throw DB::Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); expr->set_tp(tipb::ColumnRef); *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); @@ -183,7 +187,7 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un } else { - throw DB::Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); + throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); } expr->set_tp(tipb::ExprType::ScalarFunc); } @@ -221,13 +225,13 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un encodeDAGBytes(lit->value.get(), ss); break; default: - throw DB::Exception(String("Unsupported literal type: ") + lit->value.getTypeName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(String("Unsupported literal type: ") + lit->value.getTypeName(), ErrorCodes::LOGICAL_ERROR); } expr->set_val(ss.str()); } else { - throw DB::Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); + throw Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); } } @@ -262,7 +266,7 @@ std::tuple compileQuery( ASTSelectQuery & ast_query = typeid_cast(*ast); /// Get table metadata. - TiDB::TableInfo table_info; + TableInfo table_info; { String database_name, table_name; auto query_database = ast_query.database(); @@ -333,7 +337,7 @@ std::tuple compileQuery( { ASTOrderByElement * elem = typeid_cast(child.get()); if (!elem) - throw DB::Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); tipb::ByItem * by = topn->add_order_by(); by->set_desc(elem->direction < 0); tipb::Expr * expr = by->mutable_expr(); @@ -389,7 +393,7 @@ std::tuple compileQuery( { auto iter = std::find_if(last_output.begin(), last_output.end(), [&](const auto & field) { return field.first == pair.first; }); if (iter == last_output.end()) - throw DB::Exception("Column not found when pruning: " + pair.first, ErrorCodes::LOGICAL_ERROR); + throw Exception("Column not found when pruning: " + pair.first, ErrorCodes::LOGICAL_ERROR); std::stringstream ss; encodeDAGInt64(iter - last_output.begin(), ss); pair.second->set_val(ss.str()); @@ -414,7 +418,7 @@ std::tuple compileQuery( if (has_gby || has_agg_func) { if (last_executor->has_limit() || last_executor->has_topn()) - throw DB::Exception("Limit/TopN and Agg cannot co-exist.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Limit/TopN and Agg cannot co-exist.", ErrorCodes::LOGICAL_ERROR); tipb::Executor * agg_exec = dag_request.add_executors(); agg_exec->set_tp(tipb::ExecType::TypeAggregation); @@ -424,7 +428,7 @@ std::tuple compileQuery( { const ASTFunction * func = typeid_cast(expr.get()); if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - throw DB::Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); + throw Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); tipb::Expr * agg_func = agg->add_agg_func(); @@ -444,7 +448,7 @@ std::tuple compileQuery( // TODO: Other agg func. else { - throw DB::Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); + throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); } schema.emplace_back(std::make_pair(func->getColumnName(), fieldTypeToColumnInfo(agg_func->field_type()))); @@ -489,7 +493,7 @@ std::tuple compileQuery( } else { - throw DB::Exception("Unsupported expression type in select", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unsupported expression type in select", ErrorCodes::LOGICAL_ERROR); } } @@ -501,7 +505,7 @@ std::tuple compileQuery( auto iter = std::find_if(last_output.begin(), last_output.end(), [&](const auto & last_field) { return last_field.first == field; }); if (iter == last_output.end()) - throw DB::Exception("Column not found after pruning: " + field, ErrorCodes::LOGICAL_ERROR); + throw Exception("Column not found after pruning: " + field, ErrorCodes::LOGICAL_ERROR); dag_request.add_output_offsets(iter - last_output.begin()); schema.push_back(*iter); } @@ -526,18 +530,18 @@ tipb::SelectResponse executeDAGRequest( BlockInputStreamPtr outputDAGResponse(Context &, const DAGSchema & schema, const tipb::SelectResponse & dag_response) { if (dag_response.has_error()) - throw DB::Exception(dag_response.error().msg(), dag_response.error().code()); + throw Exception(dag_response.error().msg(), dag_response.error().code()); BlocksList blocks; for (const auto & chunk : dag_response.chunks()) { - std::vector> rows; - std::vector curr_row; + std::vector> rows; + std::vector curr_row; const std::string & data = chunk.rows_data(); size_t cursor = 0; while (cursor < data.size()) { - curr_row.push_back(DB::DecodeDatum(cursor, data)); + curr_row.push_back(DecodeDatum(cursor, data)); if (curr_row.size() == schema.size()) { rows.emplace_back(std::move(curr_row)); @@ -558,7 +562,8 @@ BlockInputStreamPtr outputDAGResponse(Context &, const DAGSchema & schema, const { for (size_t i = 0; i < row.size(); i++) { - columns[i].column->assumeMutable()->insert(row[i]); + const Field & field = row[i]; + columns[i].column->assumeMutable()->insert(DatumFlat(field, schema[i].second.tp).field()); } } diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 305ac3f8260..47874e73772 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -149,21 +149,24 @@ T convertNumber(const Field & field) } } -Field convertDecimal(UInt32 scale, const Field & field) +Field convertDecimal(const ColumnInfo & column_info, const Field & field) { switch (field.getType()) { case Field::Types::Int64: - return DecimalField(ToDecimal(field.get(), scale), scale); + return column_info.getDecimalValue(std::to_string(field.get())); case Field::Types::UInt64: - return DecimalField(ToDecimal(field.get(), scale), scale); + return column_info.getDecimalValue(std::to_string(field.get())); case Field::Types::Float64: - return DecimalField(ToDecimal(field.get(), scale), scale); + return column_info.getDecimalValue(std::to_string(field.get())); case Field::Types::Decimal32: + return column_info.getDecimalValue(field.get().toString(column_info.decimal)); case Field::Types::Decimal64: + return column_info.getDecimalValue(field.get().toString(column_info.decimal)); case Field::Types::Decimal128: + return column_info.getDecimalValue(field.get().toString(column_info.decimal)); case Field::Types::Decimal256: - return field; + return column_info.getDecimalValue(field.get().toString(column_info.decimal)); default: throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to number", ErrorCodes::LOGICAL_ERROR); } @@ -204,9 +207,22 @@ Field convertField(const ColumnInfo & column_info, const Field & field) case TiDB::TypeDouble: return convertNumber(field); case TiDB::TypeDate: + { + auto text = field.get(); + ReadBufferFromMemory buf(text.data(), text.size()); + DayNum_t date; + readDateText(date, buf); + return static_cast(date); + } case TiDB::TypeDatetime: case TiDB::TypeTimestamp: - return DB::parseMyDatetime(field.get()); + { + auto text = field.get(); + ReadBufferFromMemory buf(text.data(), text.size()); + time_t dt; + readDateTimeText(dt, buf); + return static_cast(dt); + } case TiDB::TypeVarchar: case TiDB::TypeTinyBlob: case TiDB::TypeMediumBlob: @@ -221,7 +237,7 @@ Field convertField(const ColumnInfo & column_info, const Field & field) return Field(); case TiDB::TypeDecimal: case TiDB::TypeNewDecimal: - return convertDecimal(column_info.decimal, field); + return convertDecimal(column_info, field); case TiDB::TypeTime: throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to Time", ErrorCodes::LOGICAL_ERROR); case TiDB::TypeYear: diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 9041060a295..155ce2b5ac0 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -265,9 +265,9 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres type_expr.set_tp(tipb::ExprType::String); std::stringstream ss; type_expr.set_val(expected_type->getName()); - auto type_field_type = type_expr.field_type(); - type_field_type.set_tp(0xfe); - type_field_type.set_flag(1); + auto * type_field_type = type_expr.mutable_field_type(); + type_field_type->set_tp(0xfe); + type_field_type->set_flag(1); getActions(type_expr, actions); Names cast_argument_names; diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 2a334230573..9359bb7de06 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -84,6 +85,16 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col else throw Exception("Not decimal literal" + expr.DebugString(), ErrorCodes::COP_BAD_DAG_REQUEST); } + case tipb::ExprType::MysqlTime: + { + if (!expr.has_field_type() + || (expr.field_type().tp() != TiDB::TypeDate && expr.field_type().tp() != TiDB::TypeDatetime + && expr.field_type().tp() != TiDB::TypeTimestamp)) + throw Exception("Invalid MySQL Time literal " + expr.DebugString(), ErrorCodes::COP_BAD_DAG_REQUEST); + auto t = decodeDAGUInt64(expr.val()); + // TODO: Use timezone in DAG request. + return std::to_string(TiDB::DatumFlat(t, static_cast(expr.field_type().tp())).field().get()); + } case tipb::ExprType::ColumnRef: column_id = decodeDAGInt64(expr.val()); if (column_id < 0 || column_id >= (ColumnID)input_col.size()) @@ -222,12 +233,21 @@ Field decodeLiteral(const tipb::Expr & expr) return decodeDAGBytes(expr.val()); case tipb::ExprType::MysqlDecimal: return decodeDAGDecimal(expr.val()); + case tipb::ExprType::MysqlTime: + { + if (!expr.has_field_type() + || (expr.field_type().tp() != TiDB::TypeDate && expr.field_type().tp() != TiDB::TypeDatetime + && expr.field_type().tp() != TiDB::TypeTimestamp)) + throw Exception("Invalid MySQL Time literal " + expr.DebugString(), ErrorCodes::COP_BAD_DAG_REQUEST); + auto t = decodeDAGUInt64(expr.val()); + // TODO: Use timezone in DAG request. + return TiDB::DatumFlat(t, static_cast(expr.field_type().tp())).field(); + } case tipb::ExprType::MysqlBit: case tipb::ExprType::MysqlDuration: case tipb::ExprType::MysqlEnum: case tipb::ExprType::MysqlHex: case tipb::ExprType::MysqlSet: - case tipb::ExprType::MysqlTime: case tipb::ExprType::MysqlJson: case tipb::ExprType::ValueList: throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported yet", ErrorCodes::UNSUPPORTED_METHOD); diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 8c4a9e18113..f94fb6f6a2a 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -125,6 +125,116 @@ inline int memcmp16(const void * a, const void * b) } +inline time_t dateToDateTime(UInt32 date_data) +{ + DayNum_t day_num(date_data); + LocalDate local_date(day_num); + // todo use timezone info + return DateLUT::instance().makeDateTime(local_date.year(), local_date.month(), local_date.day(), 0, 0, 0); +} + + +template class Op, bool is_left_date> +struct DateDateTimeComparisonImpl +{ + static void NO_INLINE vector_vector(const PaddedPODArray & a, const PaddedPODArray & b, PaddedPODArray & c) + { + size_t size = a.size(); + const A * a_pos = &a[0]; + const B * b_pos = &b[0]; + UInt8 * c_pos = &c[0]; + const A * a_end = a_pos + size; + while (a_pos < a_end) + { + if (is_left_date) + { + using OpType = B; + time_t date_time = dateToDateTime(*a_pos); + *c_pos = Op::apply((OpType)date_time, *b_pos); + } + else + { + using OpType = A; + time_t date_time = dateToDateTime(*b_pos); + *c_pos = Op::apply(*a_pos, (OpType)date_time); + } + ++a_pos; + ++b_pos; + ++c_pos; + } + } + + static void NO_INLINE vector_constant(const PaddedPODArray & a, B b, PaddedPODArray & c) + { + if (!is_left_date) + { + // datetime vector with date constant + using OpType = A; + time_t date_time = dateToDateTime(b); + NumComparisonImpl>::vector_constant(a, (OpType) date_time, c); + } + else + { + using OpType = B; + size_t size = a.size(); + const A * a_pos = &a[0]; + UInt8 * c_pos = &c[0]; + const A * a_end = a_pos + size; + + while (a_pos < a_end) + { + time_t date_time = dateToDateTime(*a_pos); + *c_pos = Op::apply((OpType)date_time, b); + ++a_pos; + ++c_pos; + } + } + } + + static void constant_vector(A a, const PaddedPODArray & b, PaddedPODArray & c) + { + if (is_left_date) + { + // date constant with datetime vector + using OpType = B; + time_t date_time = dateToDateTime(a); + NumComparisonImpl>::constant_vector((OpType)date_time, b, c); + } + else + { + using OpType = A; + size_t size = b.size(); + const B * b_pos = &b[0]; + UInt8 * c_pos = &c[0]; + const B * b_end = b_pos + size; + + while (b_pos < b_end) + { + time_t date_time = dateToDateTime(*b_pos); + *c_pos = Op::apply(a, (OpType)date_time); + ++b_pos; + ++c_pos; + } + } + } + + static void constant_constant(A a, B b, UInt8 & c) { + if (is_left_date) + { + using OpType = B; + time_t date_time = dateToDateTime(a); + NumComparisonImpl>::constant_constant((OpType) date_time, b, c); + } + else + { + using OpType = A; + time_t date_time = dateToDateTime(b); + NumComparisonImpl>::constant_constant(a, (OpType) date_time, c); + } + } +}; + + template struct StringComparisonImpl { @@ -966,6 +1076,69 @@ class FunctionComparison : public IFunction } } + bool executeDateWithDateTimeOrDateTimeWithDate( + Block &block, size_t result, + const IColumn *col_left_untyped, const IColumn *col_right_untyped, + const DataTypePtr &left_type, const DataTypePtr &right_type) + { + if ((checkDataType(left_type.get()) && checkDataType(right_type.get())) + || (checkDataType(left_type.get()) && checkDataType(right_type.get()))) + { + bool is_left_date = checkDataType(left_type.get()); + if (is_left_date) + { + return executeDateAndDateTimeCompare(block, result, col_left_untyped, col_right_untyped); + } + else + { + return executeDateAndDateTimeCompare(block, result, col_left_untyped, col_right_untyped); + } + } + return false; + } + + template + bool executeDateAndDateTimeCompare(Block & block, size_t result, const IColumn * c0, const IColumn * c1) + { + bool c0_const = c0->isColumnConst(); + bool c1_const = c1->isColumnConst(); + + if (c0_const && c1_const) + { + UInt8 res = 0; + DateDateTimeComparisonImpl::constant_constant( + checkAndGetColumnConst>(c0)->template getValue(), + checkAndGetColumnConst>(c1)-> template getValue(), res); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(c0->size(), toField(res)); + } + else + { + auto c_res = ColumnUInt8::create(); + ColumnUInt8::Container & vec_res = c_res->getData(); + vec_res.resize(c0->size()); + if (c0_const) + { + DateDateTimeComparisonImpl::constant_vector( + checkAndGetColumnConst>(c0)-> template getValue(), + checkAndGetColumn>(c1)->getData(), vec_res); + } + else if (c1_const) + { + DateDateTimeComparisonImpl::vector_constant( + checkAndGetColumn>(c0)->getData(), + checkAndGetColumnConst>(c1)-> template getValue(), vec_res); + } + else + { + DateDateTimeComparisonImpl::vector_vector( + checkAndGetColumn>(c0)->getData(), + checkAndGetColumn>(c1)->getData(), vec_res); + } + block.getByPosition(result).column = std::move(c_res); + } + return true; + } + public: String getName() const override { @@ -1084,7 +1257,8 @@ class FunctionComparison : public IFunction if (left_is_num && right_is_num) { - if (!( executeNumLeftType(block, result, col_left_untyped, col_right_untyped) + if (!(executeDateWithDateTimeOrDateTimeWithDate(block, result, col_left_untyped, col_right_untyped, col_with_type_and_name_left.type, col_with_type_and_name_right.type) + || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) || executeNumLeftType(block, result, col_left_untyped, col_right_untyped) diff --git a/dbms/src/Storages/Transaction/Datum.cpp b/dbms/src/Storages/Transaction/Datum.cpp index 72301b9fa9b..0b7c9d775e2 100644 --- a/dbms/src/Storages/Transaction/Datum.cpp +++ b/dbms/src/Storages/Transaction/Datum.cpp @@ -44,7 +44,7 @@ struct DatumOp(date); + copy = static_cast(date); } else { @@ -86,7 +86,7 @@ struct DatumOp(orig.get())); + time_t date_time(orig.get()); values = date_lut.getValues(date_time); hour = date_lut.toHour(date_time); minute = date_lut.toMinute(date_time); @@ -100,7 +100,7 @@ struct DatumOp struct DatumOp::type> { diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index bbdca6b2877..6d3a8376789 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -56,7 +56,7 @@ Field ColumnInfo::defaultValueToField() const return Field(); case TypeDecimal: case TypeNewDecimal: - return getDecimalDefaultValue(value.convert()); + return getDecimalValue(value.convert()); case TypeTime: case TypeYear: case TypeSet: @@ -68,9 +68,9 @@ Field ColumnInfo::defaultValueToField() const return Field(); } -DB::Field ColumnInfo::getDecimalDefaultValue(const String & str) const +DB::Field ColumnInfo::getDecimalValue(const String & decimal_text) const { - DB::ReadBufferFromString buffer(str); + DB::ReadBufferFromString buffer(decimal_text); auto precision = flen; auto scale = decimal; @@ -102,16 +102,16 @@ DB::Field ColumnInfo::getDecimalDefaultValue(const String & str) const } // FIXME it still has bug: https://github.com/pingcap/tidb/issues/11435 -Int64 ColumnInfo::getEnumIndex(const String & default_str) const +Int64 ColumnInfo::getEnumIndex(const String & enum_id_or_text) const { for (const auto & elem : elems) { - if (elem.first == default_str) + if (elem.first == enum_id_or_text) { return elem.second; } } - int num = std::stoi(default_str); + int num = std::stoi(enum_id_or_text); return num; } diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index efa0e8af5ed..7e0cb03996a 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -43,16 +43,16 @@ using DB::Timestamp; M(Float, 4, Float, Float32, false) \ M(Double, 5, Float, Float64, false) \ M(Null, 6, Nil, Nothing, false) \ - M(Timestamp, 7, Int, DateTime, false) \ + M(Timestamp, 7, UInt, DateTime, false) \ M(LongLong, 8, Int, Int64, false) \ M(Int24, 9, VarInt, Int32, true) \ - M(Date, 10, Int, Date, false) \ + M(Date, 10, UInt, Date, false) \ M(Time, 11, Duration, Int64, false) \ - M(Datetime, 12, Int, DateTime, false) \ + M(Datetime, 12, UInt, DateTime, false) \ M(Year, 13, Int, Int16, false) \ M(NewDate, 14, Int, Date, false) \ M(Varchar, 15, CompactBytes, String, false) \ - M(Bit, 16, CompactBytes, UInt64, false) \ + M(Bit, 16, VarInt, UInt64, false) \ M(JSON, 0xf5, Json, String, false) \ M(NewDecimal, 0xf6, Decimal, Decimal32, false) \ M(Enum, 0xf7, VarUInt, Enum16, false) \ @@ -179,13 +179,10 @@ struct ColumnInfo COLUMN_FLAGS(M) #undef M - DB::Field convertField(const DB::Field &) const; DB::Field defaultValueToField() const; - Int64 getEnumIndex(const String &) const; CodecFlag getCodecFlag() const; - -private: - DB::Field getDecimalDefaultValue(const String & str) const; + DB::Field getDecimalValue(const String &) const; + Int64 getEnumIndex(const String &) const; }; enum PartitionType diff --git a/tests/mutable-test/txn_dag/data_type_time.test b/tests/mutable-test/txn_dag/data_type_time.test index e86de40cb4a..ce43ca8c732 100644 --- a/tests/mutable-test/txn_dag/data_type_time.test +++ b/tests/mutable-test/txn_dag/data_type_time.test @@ -10,14 +10,18 @@ => DBGInvoke __mock_tidb_table(default, test, 'col_1 Date, col_2 DateTime') => DBGInvoke __refresh_schemas() => DBGInvoke __put_region(4, 0, 100, default, test) -# not supported -#=> DBGInvoke __raft_insert_row(default, test, 4, 50, '2019-06-10', '2019-06-10 09:00:00') +=> DBGInvoke __raft_insert_row(default, test, 4, 50, '2019-06-10', '2019-06-10 09:00:00') -# DAG read full table scan -#=> DBGInvoke dag('select * from default.test') " --dag_planner="optree -#┌─col_1─┬─col_2─┬──col_3─┬─col_4─┬───────col_5─┬──────col_6─┬────────────────col_7─┬────────────────col_8─┬─────col_9─┬─────────col_10─┬─col_11─┐ -#│ -128 │ 255 │ -32768 │ 65535 │ -2147483648 │ 4294967295 │ -9223372036854775808 │ 18446744073709551615 │ 12345.679 │ 1234567.890123 │ 666.88 │ -#└───────┴───────┴────────┴───────┴─────────────┴────────────┴──────────────────────┴──────────────────────┴───────────┴────────────────┴────────┘ +=> DBGInvoke dag('select * from default.test') " --dag_planner="optree +┌──────col_1─┬───────────────col_2─┐ +│ 2019-06-10 │ 2019-06-10 09:00:00 │ +└────────────┴─────────────────────┘ + +# Mock DAG doesn't support date/datetime comparison with string, may need type inference and do implicit conversion to literal. +# => DBGInvoke dag('select * from default.test where col_1 = \'2019-06-06\' and col_2 = \'2019-06-10 09:00:00\'') " --dag_planner="optree +# ┌──────col_1─┬───────────────col_2─┐ +# │ 2019-06-10 │ 2019-06-10 09:00:00 │ +# └────────────┴─────────────────────┘ # Clean up. => DBGInvoke __drop_tidb_table(default, test) From 61cdc8f1ae33ab36bde3f1c7273b1cdbaf053078 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 26 Aug 2019 22:12:23 +0800 Subject: [PATCH 47/79] improve dag execution time collection (#202) * improve dag execution time collection * address comment * update comments * update comment * update comment --- dbms/src/DataStreams/BlockStreamProfileInfo.h | 7 +++++++ dbms/src/DataStreams/IProfilingBlockInputStream.cpp | 3 +++ dbms/src/Flash/Coprocessor/DAGDriver.cpp | 2 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 8 +------- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/dbms/src/DataStreams/BlockStreamProfileInfo.h b/dbms/src/DataStreams/BlockStreamProfileInfo.h index 578b0ed5b26..e546dd20e2a 100644 --- a/dbms/src/DataStreams/BlockStreamProfileInfo.h +++ b/dbms/src/DataStreams/BlockStreamProfileInfo.h @@ -29,6 +29,11 @@ struct BlockStreamProfileInfo size_t rows = 0; size_t blocks = 0; size_t bytes = 0; + // execution time is the total time spent on current stream and all its children streams + // note that it is different from total_stopwatch.elapsed(), which includes not only the + // time spent on current stream and all its children streams, but also the time of its + // parent streams + UInt64 execution_time = 0; using BlockStreamProfileInfos = std::vector; @@ -45,6 +50,8 @@ struct BlockStreamProfileInfo void update(Block & block); + void updateExecutionTime(UInt64 time) { execution_time += time; } + /// Binary serialization and deserialization of main fields. /// Writes only main fields i.e. fields that required by internal transmission protocol. void read(ReadBuffer & in); diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index 09eeff2225c..09bf609833f 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -42,6 +42,8 @@ Block IProfilingBlockInputStream::read() if (isCancelledOrThrowIfKilled()) return res; + auto start_time = info.total_stopwatch.elapsed(); + if (!checkTimeLimit()) limit_exceeded_need_break = true; @@ -83,6 +85,7 @@ Block IProfilingBlockInputStream::read() } #endif + info.updateExecutionTime(info.total_stopwatch.elapsed() - start_time); return res; } diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index b5f72738ab0..034139706ef 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -78,7 +78,7 @@ try { if (auto * p_stream = dynamic_cast(streamPtr.get())) { - time_processed_ns = std::max(time_processed_ns, p_stream->getProfileInfo().total_stopwatch.elapsed()); + time_processed_ns = std::max(time_processed_ns, p_stream->getProfileInfo().execution_time); num_produced_rows += p_stream->getProfileInfo().rows; num_iterations += p_stream->getProfileInfo().blocks; } diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index ba6f9e67748..64d39ed3728 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -269,14 +269,8 @@ void InterpreterDAG::executeAggregation( } else { - BlockInputStreams inputs; - if (!pipeline.streams.empty()) - inputs.push_back(pipeline.firstStream()); - else - pipeline.streams.resize(1); - pipeline.firstStream() - = std::make_shared(std::make_shared(inputs), params, true); + = std::make_shared(pipeline.firstStream(), params, true); } // add cast } From 10e3883514c062367d07f7292498860c45241093 Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 27 Aug 2019 18:15:04 +0800 Subject: [PATCH 48/79] column id in table scan operator may be -1 (#205) * improve dag execution time collection * address comment * update comments * update comment * update comment * column id in table scan operator may be -1 * column id in table scan operator may be -1 --- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 64d39ed3728..a46a0af02f0 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -68,6 +68,11 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) ColumnID cid = ci.column_id(); if (cid < 1 || cid > (Int64)storage->getTableInfo().columns.size()) { + if (cid == -1) + { + // for sql that do not need read any column(e.g. select count(*) from t), the column id will be -1 + continue; + } // cid out of bound throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } @@ -78,8 +83,11 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) } if (required_columns.empty()) { - // no column selected, must be something wrong - throw Exception("No column is selected in table scan executor", ErrorCodes::COP_BAD_DAG_REQUEST); + // if no column is selected, use the smallest column + String smallest_column_name = ExpressionActions::getSmallestColumn(storage->getColumns().getAllPhysical()); + required_columns.push_back(smallest_column_name); + auto pair = storage->getColumns().getPhysical(smallest_column_name); + source_columns.push_back(pair); } analyzer = std::make_unique(source_columns, context); @@ -269,8 +277,7 @@ void InterpreterDAG::executeAggregation( } else { - pipeline.firstStream() - = std::make_shared(pipeline.firstStream(), params, true); + pipeline.firstStream() = std::make_shared(pipeline.firstStream(), params, true); } // add cast } From 39d199451ee97d09f680f756a790a986aed36e14 Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 30 Aug 2019 16:51:01 +0800 Subject: [PATCH 49/79] quick fix for decimal encode (#210) * quick fix for decimal encode * address comments * update comments --- dbms/src/Storages/Transaction/Codec.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dbms/src/Storages/Transaction/Codec.cpp b/dbms/src/Storages/Transaction/Codec.cpp index d57c5023d77..8b451952699 100644 --- a/dbms/src/Storages/Transaction/Codec.cpp +++ b/dbms/src/Storages/Transaction/Codec.cpp @@ -414,6 +414,15 @@ void EncodeDecimalImpl(const T & dec, PrecType prec, ScaleType frac, std::string { static_assert(IsDecimal); + // Scale must (if not, then we have bugs) be the same as TiDB expected, but precision will be + // trimmed to as minimal as possible by TiFlash decimal implementation. TiDB doesn't allow + // decimal with precision less than scale, therefore in theory we should align value's precision + // according to data type. But TiDB somehow happens to allow precision not equal to data type, + // of which we take advantage to make such a handy fix. + if (prec < frac) + { + prec = frac; + } constexpr Int32 decimal_mod = powers10[digitsPerWord]; ss << UInt8(prec) << UInt8(frac); From 8a0fb6612d6eeaa302b5c991e22b510088032a83 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 2 Sep 2019 16:45:05 +0800 Subject: [PATCH 50/79] support udf like with 3 arguments (#212) * support udf like with 3 arguments * address comments * add some comments --- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 2 +- dbms/src/Functions/FunctionsStringSearch.cpp | 6 ++ dbms/src/Functions/FunctionsStringSearch.h | 102 ++++++++++++++++++- 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 9359bb7de06..90fe7cb1055 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -624,7 +624,7 @@ std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::IsIPv6, "cast"}, //{tipb::ScalarFuncSig::UUID, "cast"}, - //{tipb::ScalarFuncSig::LikeSig, "cast"}, + {tipb::ScalarFuncSig::LikeSig, "like3Args"}, //{tipb::ScalarFuncSig::RegexpBinarySig, "cast"}, //{tipb::ScalarFuncSig::RegexpSig, "cast"}, diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 2b356923152..37f905f6606 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -1025,6 +1025,10 @@ struct NameLike { static constexpr auto name = "like"; }; +struct NameLike3Args +{ + static constexpr auto name = "like3Args"; +}; struct NameNotLike { static constexpr auto name = "notLike"; @@ -1058,6 +1062,7 @@ using FunctionPositionCaseInsensitiveUTF8 using FunctionMatch = FunctionsStringSearch, NameMatch>; using FunctionLike = FunctionsStringSearch, NameLike>; +using FunctionLike3Args = FunctionsStringSearch, NameLike3Args, 3>; using FunctionNotLike = FunctionsStringSearch, NameNotLike>; using FunctionExtract = FunctionsStringSearchToString; using FunctionReplaceOne = FunctionStringReplace, NameReplaceOne>; @@ -1078,6 +1083,7 @@ void registerFunctionsStringSearch(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); } diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index 9de117464a2..c132d2a1bd3 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -38,12 +38,14 @@ namespace DB * Warning! At this point, the arguments needle, pattern, n, replacement must be constants. */ +static const UInt8 CH_ESCAPE_CHAR = '\\'; -template +template class FunctionsStringSearch : public IFunction { public: static constexpr auto name = Name::name; + static constexpr auto has_3_args = (num_args == 3); static FunctionPtr create(const Context &) { return std::make_shared(); @@ -56,7 +58,7 @@ class FunctionsStringSearch : public IFunction size_t getNumberOfArguments() const override { - return 2; + return num_args; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override @@ -68,10 +70,60 @@ class FunctionsStringSearch : public IFunction if (!arguments[1]->isString()) throw Exception( "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (has_3_args && !arguments[2]->isInteger()) + throw Exception( + "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared>(); } + // replace the escape_char in orig_string with '\\' + // this function does not check the validation of the orig_string + // for example, for string "abcd" and escape char 'd', it will + // return "abc\\" + String replaceEscapeChar(String & orig_string, UInt8 escape_char) + { + std::stringstream ss; + for (size_t i = 0; i < orig_string.size(); i++) + { + auto c = orig_string[i]; + if (c == escape_char) + { + if (i+1 != orig_string.size() && orig_string[i+1] == escape_char) + { + // two successive escape char, which means it is trying to escape itself, just remove one + i++; + ss << escape_char; + } + else + { + // https://github.com/pingcap/tidb/blob/master/util/stringutil/string_util.go#L154 + // if any char following escape char that is not [escape_char,'_','%'], it is invalid escape. + // mysql will treat escape character as the origin value even + // the escape sequence is invalid in Go or C. + // e.g., \m is invalid in Go, but in MySQL we will get "m" for select '\m'. + // Following case is correct just for escape \, not for others like +. + // TODO: Add more checks for other escapes. + if (i+1 != orig_string.size() && orig_string[i+1] == CH_ESCAPE_CHAR) + { + continue; + } + ss << CH_ESCAPE_CHAR; + } + } + else if (c == CH_ESCAPE_CHAR) + { + // need to escape this '\\' + ss << CH_ESCAPE_CHAR << CH_ESCAPE_CHAR; + } + else + { + ss << c; + } + } + return ss.str(); + } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { using ResultType = typename Impl::ResultType; @@ -82,10 +134,44 @@ class FunctionsStringSearch : public IFunction const ColumnConst * col_haystack_const = typeid_cast(&*column_haystack); const ColumnConst * col_needle_const = typeid_cast(&*column_needle); + UInt8 escape_char = CH_ESCAPE_CHAR; + if (has_3_args) + { + auto * col_escape_const = typeid_cast(&*block.getByPosition(arguments[2]).column); + bool valid_args = true; + if (col_needle_const == nullptr || col_escape_const == nullptr) + { + valid_args = false; + } + else + { + auto c = col_escape_const->getValue(); + if (c < 0 || c > 255) + { + // todo maybe use more strict constraint + valid_args = false; + } + else + { + escape_char = (UInt8) c; + } + } + if (!valid_args) + { + throw Exception("2nd and 3rd arguments of function " + getName() + " must " + "be constants, and the 3rd argument must between 0 and 255."); + } + } + if (col_haystack_const && col_needle_const) { ResultType res{}; - Impl::constant_constant(col_haystack_const->getValue(), col_needle_const->getValue(), res); + String needle_string = col_needle_const->getValue(); + if (has_3_args && escape_char != CH_ESCAPE_CHAR) + { + needle_string = replaceEscapeChar(needle_string, escape_char); + } + Impl::constant_constant(col_haystack_const->getValue(), needle_string, res); block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(col_haystack_const->size(), toField(res)); return; } @@ -105,7 +191,15 @@ class FunctionsStringSearch : public IFunction col_needle_vector->getOffsets(), vec_res); else if (col_haystack_vector && col_needle_const) - Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getValue(), vec_res); + { + String needle_string = col_needle_const->getValue(); + if (has_3_args && escape_char != CH_ESCAPE_CHAR) + { + needle_string = replaceEscapeChar(needle_string, escape_char); + } + Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), + needle_string, vec_res); + } else if (col_haystack_const && col_needle_vector) Impl::constant_vector(col_haystack_const->getValue(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res); else From ff9a1dea21cc0983f3b3b96fc22360ca145db901 Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 5 Sep 2019 14:05:24 +0800 Subject: [PATCH 51/79] Flash-473 optimize date and datetime comparison (#221) * support udf like with 3 arguments * address comments * add some comments * Flash-473 optimize date and datetime comparison * address comments --- dbms/src/Functions/FunctionsComparison.h | 79 +++++++++++++++++------- 1 file changed, 57 insertions(+), 22 deletions(-) diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index f94fb6f6a2a..4ddef605914 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -133,6 +133,16 @@ inline time_t dateToDateTime(UInt32 date_data) return DateLUT::instance().makeDateTime(local_date.year(), local_date.month(), local_date.day(), 0, 0, 0); } +inline std::tuple dateTimeToDate(time_t time_data) +{ + // todo use timezone info + auto & date_lut = DateLUT::instance(); + auto truncated = date_lut.toHour(time_data) != 0 || date_lut.toMinute(time_data) != 0 || date_lut.toSecond(time_data) != 0; + auto values = date_lut.getValues(time_data); + auto day_num = date_lut.makeDayNum(values.year, values.month, values.day_of_month); + return std::make_tuple(day_num, truncated); +} + template class Op, bool is_left_date> struct DateDateTimeComparisonImpl @@ -175,18 +185,31 @@ struct DateDateTimeComparisonImpl } else { - using OpType = B; - size_t size = a.size(); - const A * a_pos = &a[0]; - UInt8 * c_pos = &c[0]; - const A * a_end = a_pos + size; - - while (a_pos < a_end) + // date vector with datetime constant + // first check if datetime constant can be convert to date constant + bool truncated; + DayNum_t date_num; + std::tie(date_num, truncated) = dateTimeToDate((time_t) b); + if (!truncated) { - time_t date_time = dateToDateTime(*a_pos); - *c_pos = Op::apply((OpType)date_time, b); - ++a_pos; - ++c_pos; + using OpType = A; + NumComparisonImpl>::vector_constant(a, (OpType) date_num, c); + } + else + { + using OpType = B; + size_t size = a.size(); + const A *a_pos = &a[0]; + UInt8 *c_pos = &c[0]; + const A *a_end = a_pos + size; + + while (a_pos < a_end) + { + time_t date_time = dateToDateTime(*a_pos); + *c_pos = Op::apply((OpType) date_time, b); + ++a_pos; + ++c_pos; + } } } } @@ -202,18 +225,30 @@ struct DateDateTimeComparisonImpl } else { - using OpType = A; - size_t size = b.size(); - const B * b_pos = &b[0]; - UInt8 * c_pos = &c[0]; - const B * b_end = b_pos + size; - - while (b_pos < b_end) + // datetime constant with date vector + bool truncated; + DayNum_t date_num; + std::tie(date_num, truncated) = dateTimeToDate((time_t) a); + if (!truncated) { - time_t date_time = dateToDateTime(*b_pos); - *c_pos = Op::apply(a, (OpType)date_time); - ++b_pos; - ++c_pos; + using OpType = B; + NumComparisonImpl>::vector_constant((OpType)a, date_num, c); + } + else + { + using OpType = A; + size_t size = b.size(); + const B *b_pos = &b[0]; + UInt8 *c_pos = &c[0]; + const B *b_end = b_pos + size; + + while (b_pos < b_end) + { + time_t date_time = dateToDateTime(*b_pos); + *c_pos = Op::apply(a, (OpType) date_time); + ++b_pos; + ++c_pos; + } } } } From 6b14b3856cf9fb55bdc3400a3049c73ee894f7a4 Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 6 Sep 2019 17:53:43 +0800 Subject: [PATCH 52/79] FLASH-479 select from empty table throw error in tiflash (#223) * 1. select from empty table throw error in tiflash, 2. add some logs, 3. disable timestamp literal in DAG request * revert unrelated change --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 2 +- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 5 ++++- dbms/src/Flash/Coprocessor/DAGDriver.h | 2 ++ dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 10 ++++++++-- dbms/src/Storages/Transaction/RegionTable.cpp | 18 ------------------ dbms/src/Storages/Transaction/RegionTable.h | 1 - 6 files changed, 15 insertions(+), 23 deletions(-) diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index d7e2fb5bedf..1e30bf31a9a 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -73,7 +73,7 @@ BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) } else { - region = context.getTMTContext().getRegionTable().getRegionByTableAndID(table_id, region_id); + region = context.getTMTContext().getKVStore()->getRegion(region_id); if (!region) throw Exception("No such region", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 034139706ef..a35be6c8aa3 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -28,7 +28,8 @@ DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, region_version(region_version_), region_conf_version(region_conf_version_), dag_response(dag_response_), - internal(internal_) + internal(internal_), + log(&Logger::get("DAGDriver")) {} void DAGDriver::execute() @@ -98,10 +99,12 @@ catch (const LockException & e) } catch (const Exception & e) { + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.displayText()); recordError(e.code(), e.message()); } catch (const std::exception & e) { + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.what()); recordError(ErrorCodes::UNKNOWN_EXCEPTION, e.what()); } diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.h b/dbms/src/Flash/Coprocessor/DAGDriver.h index 4c84cfcb020..a74341db550 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.h +++ b/dbms/src/Flash/Coprocessor/DAGDriver.h @@ -32,6 +32,8 @@ class DAGDriver bool internal; + Poco::Logger * log; + void recordError(Int32 err_code, const String & err_msg); }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index a46a0af02f0..af763aeeab3 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -136,7 +138,7 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) info.region_id = dag.getRegionID(); info.version = dag.getRegionVersion(); info.conf_version = dag.getRegionConfVersion(); - auto current_region = context.getTMTContext().getRegionTable().getRegionByTableAndID(table_id, info.region_id); + auto current_region = context.getTMTContext().getKVStore()->getRegion(info.region_id); if (!current_region) { std::vector region_ids; @@ -148,6 +150,11 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) query_info.mvcc_query_info->concurrent = 0.0; pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); + if (pipeline.streams.empty()) + { + pipeline.streams.emplace_back(std::make_shared(storage->getSampleBlockForColumns(required_columns))); + } + pipeline.transform([&](auto & stream) { stream->addTableLock(table_lock); }); /// Set the limits and quota for reading data, the speed and time of the query. @@ -178,7 +185,6 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) } }); } - ColumnsWithTypeAndName columnsWithTypeAndName = pipeline.firstStream()->getHeader().getColumnsWithTypeAndName(); } InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() diff --git a/dbms/src/Storages/Transaction/RegionTable.cpp b/dbms/src/Storages/Transaction/RegionTable.cpp index a438c4de8f8..0545c34206f 100644 --- a/dbms/src/Storages/Transaction/RegionTable.cpp +++ b/dbms/src/Storages/Transaction/RegionTable.cpp @@ -529,24 +529,6 @@ std::vector> RegionTable::getRegionsByTable(const return regions; } -RegionPtr RegionTable::getRegionByTableAndID(const TableID table_id, const RegionID region_id) -{ - auto & kvstore = context.getTMTContext().getKVStore(); - { - std::lock_guard lock(mutex); - auto & table = getOrCreateTable(table_id); - - for (const auto & region_info : table.regions) - { - if (region_info.second.region_id == region_id) - { - return kvstore->getRegion(region_info.second.region_id); - } - } - } - return nullptr; -} - void RegionTable::mockDropRegionsInTable(TableID table_id) { std::lock_guard lock(mutex); diff --git a/dbms/src/Storages/Transaction/RegionTable.h b/dbms/src/Storages/Transaction/RegionTable.h index 9775f64dbd9..05f197908bf 100644 --- a/dbms/src/Storages/Transaction/RegionTable.h +++ b/dbms/src/Storages/Transaction/RegionTable.h @@ -183,7 +183,6 @@ class RegionTable : private boost::noncopyable void traverseInternalRegions(std::function && callback); void traverseInternalRegionsByTable(const TableID table_id, std::function && callback); std::vector> getRegionsByTable(const TableID table_id); - RegionPtr getRegionByTableAndID(const TableID table_id, const RegionID region_id); /// Write the data of the given region into the table with the given table ID, fill the data list for outer to remove. /// Will trigger schema sync on read error for only once, From 548e5196f6c6af03db84982b3f2a99b80cf60d0d Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Sat, 7 Sep 2019 01:48:01 +0800 Subject: [PATCH 53/79] Update flash service port --- tests/docker/config/config.xml | 2 +- tests/docker/config/tiflash.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/docker/config/config.xml b/tests/docker/config/config.xml index 6d4b1c8ab37..0e5629a8004 100644 --- a/tests/docker/config/config.xml +++ b/tests/docker/config/config.xml @@ -24,7 +24,7 @@ - 0.0.0.0:9093 + 0.0.0.0:20171 8123 diff --git a/tests/docker/config/tiflash.xml b/tests/docker/config/tiflash.xml index a10e547c5d1..beed683db12 100644 --- a/tests/docker/config/tiflash.xml +++ b/tests/docker/config/tiflash.xml @@ -27,7 +27,7 @@ - 0.0.0.0:9093 + 0.0.0.0:20171 8123 From fce36767c5595329750f93665be99771abc10c05 Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 10 Sep 2019 17:29:22 +0800 Subject: [PATCH 54/79] fix bug in DAGBlockOutputStream (#230) --- dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp | 10 ++++------ dbms/src/Flash/Coprocessor/DAGBlockOutputStream.h | 3 +-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp index b475ac3cb24..3514c1d006d 100644 --- a/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp +++ b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.cpp @@ -31,7 +31,6 @@ DAGBlockOutputStream::DAGBlockOutputStream(tipb::SelectResponse & dag_response_, } current_chunk = nullptr; current_records_num = 0; - total_rows = 0; } @@ -43,10 +42,10 @@ void DAGBlockOutputStream::writePrefix() void DAGBlockOutputStream::writeSuffix() { // error handle, - if (current_chunk != nullptr && records_per_chunk > 0) + if (current_chunk != nullptr && current_records_num > 0) { current_chunk->set_rows_data(current_ss.str()); - dag_response.add_output_counts(records_per_chunk); + dag_response.add_output_counts(current_records_num); } } @@ -71,7 +70,7 @@ void DAGBlockOutputStream::write(const Block & block) } current_chunk = dag_response.add_chunks(); current_ss.str(""); - records_per_chunk = 0; + current_records_num = 0; } for (size_t j = 0; j < block.columns(); j++) { @@ -80,8 +79,7 @@ void DAGBlockOutputStream::write(const Block & block) EncodeDatum(datum.field(), getCodecFlagByFieldType(result_field_types[j]), current_ss); } // Encode current row - records_per_chunk++; - total_rows++; + current_records_num++; } } diff --git a/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.h b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.h index 7f9b09dd0f3..9ac6c5495fa 100644 --- a/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.h +++ b/dbms/src/Flash/Coprocessor/DAGBlockOutputStream.h @@ -29,7 +29,7 @@ class DAGBlockOutputStream : public IBlockOutputStream private: tipb::SelectResponse & dag_response; - Int64 records_per_chunk; + const Int64 records_per_chunk; tipb::EncodeType encodeType; std::vector result_field_types; @@ -38,7 +38,6 @@ class DAGBlockOutputStream : public IBlockOutputStream tipb::Chunk * current_chunk; Int64 current_records_num; std::stringstream current_ss; - Int64 total_rows; }; } // namespace DB From a9f9b488fc275c49a5b7674c65a32d0a1ddd74e8 Mon Sep 17 00:00:00 2001 From: ruoxi Date: Thu, 12 Sep 2019 13:53:02 +0800 Subject: [PATCH 55/79] FLASH-475: Support BATCH COMMANDS in flash service (#232) * Initial batch command support * Add config to control thread pool size * Address comments --- dbms/src/Flash/BatchCommandsHandler.cpp | 94 +++++++++++++++++++ dbms/src/Flash/BatchCommandsHandler.h | 50 ++++++++++ .../{Coprocessor => }/CoprocessorHandler.cpp | 17 ++-- .../{Coprocessor => }/CoprocessorHandler.h | 3 +- dbms/src/Flash/FlashService.cpp | 47 +++++++++- dbms/src/Flash/FlashService.h | 6 +- dbms/src/Interpreters/Settings.h | 1 + 7 files changed, 201 insertions(+), 17 deletions(-) create mode 100644 dbms/src/Flash/BatchCommandsHandler.cpp create mode 100644 dbms/src/Flash/BatchCommandsHandler.h rename dbms/src/Flash/{Coprocessor => }/CoprocessorHandler.cpp (88%) rename dbms/src/Flash/{Coprocessor => }/CoprocessorHandler.h (99%) diff --git a/dbms/src/Flash/BatchCommandsHandler.cpp b/dbms/src/Flash/BatchCommandsHandler.cpp new file mode 100644 index 00000000000..e6768f03c13 --- /dev/null +++ b/dbms/src/Flash/BatchCommandsHandler.cpp @@ -0,0 +1,94 @@ +#include +#include +#include + +namespace DB +{ + +BatchCommandsHandler::BatchCommandsHandler(BatchCommandsContext & batch_commands_context_, const tikvpb::BatchCommandsRequest & request_, + tikvpb::BatchCommandsResponse & response_) + : batch_commands_context(batch_commands_context_), request(request_), response(response_), log(&Logger::get("BatchCommandsHandler")) +{} + +grpc::Status BatchCommandsHandler::execute() +{ + if (request.requests_size() == 0) + return grpc::Status::OK; + + // TODO: Fill transport_layer_load into BatchCommandsResponse. + + auto command_handler_func + = [](BatchCommandsContext::DBContextCreationFunc db_context_creation_func, grpc::ServerContext * grpc_server_context, + const tikvpb::BatchCommandsRequest::Request & req, tikvpb::BatchCommandsResponse::Response & resp, grpc::Status & ret) { + if (!req.has_coprocessor()) + { + ret = grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); + return; + } + + const auto & cop_req = req.coprocessor(); + auto cop_resp = resp.mutable_coprocessor(); + + auto [context, status] = db_context_creation_func(grpc_server_context); + if (!status.ok()) + { + ret = status; + return; + } + + CoprocessorContext cop_context(context, cop_req.context(), *grpc_server_context); + CoprocessorHandler cop_handler(cop_context, &cop_req, cop_resp); + + ret = cop_handler.execute(); + }; + + /// Shortcut for only one request by not going to thread pool. + if (request.requests_size() == 1) + { + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling the only batch command in place."); + + const auto & req = request.requests(0); + auto resp = response.add_responses(); + response.add_request_ids(request.request_ids(0)); + auto ret = grpc::Status::OK; + command_handler_func(batch_commands_context.db_context_creation_func, &batch_commands_context.grpc_server_context, req, *resp, ret); + return ret; + } + + /// Use thread pool to handle requests concurrently. + const Settings & settings = batch_commands_context.db_context.getSettingsRef(); + size_t max_threads = settings.batch_commands_threads ? static_cast(settings.batch_commands_threads) + : static_cast(settings.max_threads); + + LOG_DEBUG( + log, __PRETTY_FUNCTION__ << ": Handling " << request.requests_size() << " batch commands using " << max_threads << " threads."); + + ThreadPool thread_pool(max_threads); + + std::vector rets; + size_t i = 0; + + for (const auto & req : request.requests()) + { + auto resp = response.add_responses(); + response.add_request_ids(request.request_ids(i++)); + rets.emplace_back(grpc::Status::OK); + thread_pool.schedule([&]() { + command_handler_func( + batch_commands_context.db_context_creation_func, &batch_commands_context.grpc_server_context, req, *resp, rets.back()); + }); + } + + thread_pool.wait(); + + // Iterate all return values of each individual commands, returns the first non-OK one if any. + for (const auto & ret : rets) + { + if (!ret.ok()) + return ret; + } + + return grpc::Status::OK; +} + +} // namespace DB diff --git a/dbms/src/Flash/BatchCommandsHandler.h b/dbms/src/Flash/BatchCommandsHandler.h new file mode 100644 index 00000000000..800318be39b --- /dev/null +++ b/dbms/src/Flash/BatchCommandsHandler.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop + +namespace DB +{ + +struct BatchCommandsContext +{ + /// Context for this batch commands. + Context & db_context; + + /// Context creation function for each individual command - they should be handled isolated, + /// given that context is being used to pass arguments regarding queries. + using DBContextCreationFunc = std::function(grpc::ServerContext *)>; + DBContextCreationFunc db_context_creation_func; + + grpc::ServerContext & grpc_server_context; + + BatchCommandsContext( + Context & db_context_, DBContextCreationFunc && db_context_creation_func_, grpc::ServerContext & grpc_server_context_) + : db_context(db_context_), db_context_creation_func(std::move(db_context_creation_func_)), grpc_server_context(grpc_server_context_) + {} +}; + +class BatchCommandsHandler +{ +public: + BatchCommandsHandler(BatchCommandsContext & batch_commands_context_, const tikvpb::BatchCommandsRequest & request_, + tikvpb::BatchCommandsResponse & response_); + + ~BatchCommandsHandler() = default; + + grpc::Status execute(); + +protected: + BatchCommandsContext & batch_commands_context; + const tikvpb::BatchCommandsRequest & request; + tikvpb::BatchCommandsResponse & response; + + Logger * log; +}; + +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp b/dbms/src/Flash/CoprocessorHandler.cpp similarity index 88% rename from dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp rename to dbms/src/Flash/CoprocessorHandler.cpp index a92f98b2945..2d72823f3af 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorHandler.cpp +++ b/dbms/src/Flash/CoprocessorHandler.cpp @@ -1,11 +1,9 @@ -#include +#include -#include #include #include #include #include -#include #include #include #include @@ -24,8 +22,7 @@ CoprocessorHandler::CoprocessorHandler( : cop_context(cop_context_), cop_request(cop_request_), cop_response(cop_response_), log(&Logger::get("CoprocessorHandler")) {} -grpc::Status CoprocessorHandler::execute() -try +grpc::Status CoprocessorHandler::execute() try { switch (cop_request->tp()) { @@ -48,7 +45,7 @@ try throw Exception( "Coprocessor request type " + std::to_string(cop_request->tp()) + " is not implemented", ErrorCodes::NOT_IMPLEMENTED); } - return ::grpc::Status(::grpc::StatusCode::OK, ""); + return grpc::Status::OK; } catch (const LockException & e) { @@ -60,7 +57,7 @@ catch (const LockException & e) lock_info->set_lock_ttl(e.lock_infos[0]->lock_ttl); lock_info->set_lock_version(e.lock_infos[0]->lock_version); // return ok so TiDB has the chance to see the LockException - return ::grpc::Status(::grpc::StatusCode::OK, ""); + return grpc::Status::OK; } catch (const RegionException & e) { @@ -83,7 +80,7 @@ catch (const RegionException & e) break; } // return ok so TiDB has the chance to see the LockException - return ::grpc::Status(::grpc::StatusCode::OK, ""); + return grpc::Status::OK; } catch (const Exception & e) { @@ -96,14 +93,14 @@ catch (const Exception & e) // TODO: Map other DB error codes to grpc codes. - return ::grpc::Status(::grpc::StatusCode::INTERNAL, e.message()); + return grpc::Status(grpc::StatusCode::INTERNAL, e.message()); } catch (const std::exception & e) { LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.what()); cop_response->Clear(); cop_response->set_other_error(e.what()); - return ::grpc::Status(::grpc::StatusCode::INTERNAL, e.what()); + return grpc::Status(grpc::StatusCode::INTERNAL, e.what()); } } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/CoprocessorHandler.h b/dbms/src/Flash/CoprocessorHandler.h similarity index 99% rename from dbms/src/Flash/Coprocessor/CoprocessorHandler.h rename to dbms/src/Flash/CoprocessorHandler.h index 517875e9ace..477daeeb636 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorHandler.h +++ b/dbms/src/Flash/CoprocessorHandler.h @@ -2,13 +2,12 @@ #include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include #include #pragma GCC diagnostic pop - -#include #include namespace DB diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index 0489b6b8777..b2a473d083a 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -1,7 +1,8 @@ #include #include -#include +#include +#include #include #include @@ -54,10 +55,48 @@ grpc::Status FlashService::Coprocessor( auto ret = cop_handler.execute(); - LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle coprocessor request done"); + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle coprocessor request done: " << ret.error_code() << ", " << ret.error_message()); return ret; } +grpc::Status FlashService::BatchCommands( + grpc::ServerContext * grpc_context, grpc::ServerReaderWriter<::tikvpb::BatchCommandsResponse, tikvpb::BatchCommandsRequest> * stream) +{ + auto [context, status] = createDBContext(grpc_context); + if (!status.ok()) + { + return status; + } + + tikvpb::BatchCommandsRequest request; + while (stream->Read(&request)) + { + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling batch commands: " << request.DebugString()); + + tikvpb::BatchCommandsResponse response; + BatchCommandsContext batch_commands_context( + context, [this](grpc::ServerContext * grpc_server_context) { return createDBContext(grpc_server_context); }, *grpc_context); + BatchCommandsHandler batch_commands_handler(batch_commands_context, request, response); + auto ret = batch_commands_handler.execute(); + if (!ret.ok()) + { + LOG_DEBUG( + log, __PRETTY_FUNCTION__ << ": Handle batch commands request done: " << ret.error_code() << ", " << ret.error_message()); + return ret; + } + + if (!stream->Write(response)) + { + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Write response failed for unknown reason."); + return grpc::Status(grpc::StatusCode::UNKNOWN, "Write response failed for unknown reason."); + } + + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle batch commands request done: " << ret.error_code() << ", " << ret.error_message()); + } + + return grpc::Status::OK; +} + String getClientMetaVarWithDefault(grpc::ServerContext * grpc_context, const String & name, const String & default_val) { if (grpc_context->client_metadata().count(name) != 1) @@ -66,7 +105,7 @@ String getClientMetaVarWithDefault(grpc::ServerContext * grpc_context, const Str return String(grpc_context->client_metadata().find(name)->second.data()); } -std::tuple FlashService::createDBContext(grpc::ServerContext * grpc_context) +std::tuple FlashService::createDBContext(grpc::ServerContext * grpc_context) { /// Create DB context. Context context = server.context(); @@ -101,7 +140,7 @@ std::tuple FlashService::createDBContext(grpc::ServerCo std::string expr_field_type_check = getClientMetaVarWithDefault(grpc_context, "dag_expr_field_type_strict_check", "1"); context.setSetting("dag_expr_field_type_strict_check", expr_field_type_check); - return std::make_tuple(context, ::grpc::Status::OK); + return std::make_tuple(context, grpc::Status::OK); } } // namespace DB diff --git a/dbms/src/Flash/FlashService.h b/dbms/src/Flash/FlashService.h index 8ab123cc1fb..f208ec04db0 100644 --- a/dbms/src/Flash/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -24,7 +24,11 @@ class FlashService final : public tikvpb::Tikv::Service, public std::enable_shar ~FlashService() final; - grpc::Status Coprocessor(grpc::ServerContext * context, const coprocessor::Request * request, coprocessor::Response * response); + grpc::Status Coprocessor( + grpc::ServerContext * grpc_context, const coprocessor::Request * request, coprocessor::Response * response) override; + + grpc::Status BatchCommands(grpc::ServerContext * grpc_context, + grpc::ServerReaderWriter * stream) override; private: std::tuple createDBContext(grpc::ServerContext * grpc_contex); diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 86b42cf9ce2..6a2e619c50d 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -33,6 +33,7 @@ struct Settings M(SettingString, dag_planner, "sql", "planner for DAG query, sql builds the SQL string, optree builds the internal operator(stream) tree.") \ M(SettingBool, dag_expr_field_type_strict_check, true, "when set to true, every expr in the dag request must provide field type, otherwise only the result expr will be checked.") \ M(SettingInt64, schema_version, DEFAULT_UNSPECIFIED_SCHEMA_VERSION, "tmt schema version.") \ + M(SettingUInt64, batch_commands_threads, 0, "Number of threads to use for handling batch commands concurrently. 0 means - same as 'max_threads'.") \ M(SettingUInt64, min_compress_block_size, DEFAULT_MIN_COMPRESS_BLOCK_SIZE, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.") \ M(SettingUInt64, max_compress_block_size, DEFAULT_MAX_COMPRESS_BLOCK_SIZE, "The maximum size of blocks of uncompressed data before compressing for writing to a table.") \ M(SettingUInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading") \ From df079398d63895fa06444b9101bde63ab522ecbc Mon Sep 17 00:00:00 2001 From: ruoxi Date: Mon, 16 Sep 2019 18:42:10 +0800 Subject: [PATCH 56/79] FLASH-483: Combine raft service and flash service (#235) * Combine raft service and flash service * Address comment and fix build error * Update configs --- dbms/src/Flash/FlashService.cpp | 7 ++++--- dbms/src/Flash/FlashService.h | 4 +--- dbms/src/Interpreters/Context.cpp | 4 ++-- dbms/src/Interpreters/Context.h | 2 +- dbms/src/Raft/RaftService.cpp | 27 ++++----------------------- dbms/src/Raft/RaftService.h | 6 +----- dbms/src/Server/Server.cpp | 5 +---- dbms/src/Server/config.xml | 5 ++++- tests/docker/config/config.xml | 3 +-- tests/docker/config/tiflash.xml | 3 +-- 10 files changed, 20 insertions(+), 46 deletions(-) diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index b2a473d083a..a5521b9eca9 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include namespace DB @@ -15,13 +15,14 @@ extern const int NOT_IMPLEMENTED; } FlashService::FlashService(const std::string & address_, IServer & server_) - : server(server_), address(address_), log(&Logger::get("FlashService")) + : address(address_), server(server_), log(&Logger::get("FlashService")) { grpc::ServerBuilder builder; builder.AddListeningPort(address, grpc::InsecureServerCredentials()); builder.RegisterService(this); + builder.RegisterService(&server.context().getRaftService()); - // todo should set a reasonable value?? + // Prevent TiKV from throwing "Received message larger than max (4404462 vs. 4194304)" error. builder.SetMaxReceiveMessageSize(-1); builder.SetMaxSendMessageSize(-1); diff --git a/dbms/src/Flash/FlashService.h b/dbms/src/Flash/FlashService.h index f208ec04db0..92b895f0137 100644 --- a/dbms/src/Flash/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -34,10 +34,8 @@ class FlashService final : public tikvpb::Tikv::Service, public std::enable_shar std::tuple createDBContext(grpc::ServerContext * grpc_contex); private: - IServer & server; - std::string address; - + IServer & server; GRPCServerPtr grpc_server; Logger * log; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 2247dd3f04f..67a2b035caf 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1402,12 +1402,12 @@ DDLWorker & Context::getDDLWorker() const return *shared->ddl_worker; } -void Context::initializeRaftService(const std::string & service_addr) +void Context::initializeRaftService() { auto lock = getLock(); if (shared->raft_service) throw Exception("Raft Service has already been initialized.", ErrorCodes::LOGICAL_ERROR); - shared->raft_service = std::make_shared(service_addr, *this); + shared->raft_service = std::make_shared(*this); } void Context::shutdownRaftService() diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 61d97e7fefa..ee7772deafb 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -360,7 +360,7 @@ class Context void setDDLWorker(std::shared_ptr ddl_worker); DDLWorker & getDDLWorker() const; - void initializeRaftService(const std::string & service_addr); + void initializeRaftService(); void shutdownRaftService(); void createTMTContext(const std::vector & pd_addrs, const std::string & learner_key, diff --git a/dbms/src/Raft/RaftService.cpp b/dbms/src/Raft/RaftService.cpp index 651690b7489..1fdc8ce05e9 100644 --- a/dbms/src/Raft/RaftService.cpp +++ b/dbms/src/Raft/RaftService.cpp @@ -8,9 +8,8 @@ namespace DB { -RaftService::RaftService(const std::string & address_, DB::Context & db_context_) - : address(address_), - db_context(db_context_), +RaftService::RaftService(DB::Context & db_context_) + : db_context(db_context_), kvstore(db_context.getTMTContext().getKVStore()), background_pool(db_context.getBackgroundPool()), log(&Logger::get("RaftService")) @@ -18,16 +17,6 @@ RaftService::RaftService(const std::string & address_, DB::Context & db_context_ if (!db_context.getTMTContext().isInitialized()) throw Exception("TMTContext is not initialized", ErrorCodes::LOGICAL_ERROR); - grpc::ServerBuilder builder; - builder.AddListeningPort(address, grpc::InsecureServerCredentials()); - builder.RegisterService(this); - - // Prevent TiKV from throwing "Received message larger than max (4404462 vs. 4194304)" error. - builder.SetMaxReceiveMessageSize(-1); - builder.SetMaxSendMessageSize(-1); - - grpc_server = builder.BuildAndStart(); - persist_handle = background_pool.addTask([this] { return kvstore->tryPersist(); }, false); table_flush_handle = background_pool.addTask([this] { @@ -63,8 +52,6 @@ RaftService::RaftService(const std::string & address_, DB::Context & db_context_ return true; }); - LOG_INFO(log, "Raft service listening on [" << address << "]"); - { std::vector regions; kvstore->traverseRegions([®ions](RegionID, const RegionPtr & region) { @@ -119,12 +106,6 @@ RaftService::~RaftService() background_pool.removeTask(region_decode_handle); region_decode_handle = nullptr; } - - // wait 5 seconds for pending rpcs to gracefully stop - gpr_timespec deadline{5, 0, GPR_TIMESPAN}; - LOG_DEBUG(log, "Begin to shutting down grpc server"); - grpc_server->Shutdown(deadline); - grpc_server->Wait(); } grpc::Status RaftService::ApplyCommandBatch(grpc::ServerContext * grpc_context, CommandServerReaderWriter * stream) @@ -143,7 +124,7 @@ grpc::Status RaftService::ApplyCommandBatch(grpc::ServerContext * grpc_context, } catch (...) { - tryLogCurrentException(log, "gRPC ApplyCommandBatch on " + address + " error"); + tryLogCurrentException(log, "gRPC ApplyCommandBatch error"); } return grpc::Status::CANCELLED; @@ -158,7 +139,7 @@ grpc::Status RaftService::ApplySnapshot(grpc::ServerContext *, CommandServerRead } catch (...) { - tryLogCurrentException(log, "gRPC ApplyCommandBatch on " + address + " error"); + tryLogCurrentException(log, "gRPC ApplyCommandBatch error"); return grpc::Status(grpc::StatusCode::UNKNOWN, "Runtime error, check theflash log for detail."); } } diff --git a/dbms/src/Raft/RaftService.h b/dbms/src/Raft/RaftService.h index 787c5f3e035..38421f0c727 100644 --- a/dbms/src/Raft/RaftService.h +++ b/dbms/src/Raft/RaftService.h @@ -23,7 +23,7 @@ using RegionMap = std::unordered_map; class RaftService final : public enginepb::Engine::Service, public std::enable_shared_from_this, private boost::noncopyable { public: - RaftService(const std::string & address_, Context & db_context); + RaftService(Context & db_context); ~RaftService() final; @@ -37,10 +37,6 @@ class RaftService final : public enginepb::Engine::Service, public std::enable_s grpc::ServerContext * grpc_context, CommandServerReader * reader, enginepb::SnapshotDone * response) override; private: - std::string address; - - GRPCServerPtr grpc_server; - Context & db_context; KVStorePtr kvstore; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index db3e458c724..0a57e8f8ec1 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -452,10 +452,7 @@ int Server::main(const std::vector & /*args*/) } if (need_raft_service) - { - String raft_service_addr = config().getString("raft.service_addr"); - global_context->initializeRaftService(raft_service_addr); - } + global_context->initializeRaftService(); SCOPE_EXIT({ LOG_INFO(log, "Shutting down raft service."); diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml index dc6fc34f507..ced2c54b2c1 100644 --- a/dbms/src/Server/config.xml +++ b/dbms/src/Server/config.xml @@ -313,12 +313,15 @@ - 0.0.0.0:20021 /var/lib/clickhouse/kvstore /var/lib/clickhouse/regmap http://127.0.0.1:13579 + + 0.0.0.0:3930 + + @@ -24,7 +23,7 @@ - 0.0.0.0:20171 + 0.0.0.0:3930 8123 diff --git a/tests/docker/config/tiflash.xml b/tests/docker/config/tiflash.xml index beed683db12..afc6ea001e4 100644 --- a/tests/docker/config/tiflash.xml +++ b/tests/docker/config/tiflash.xml @@ -16,7 +16,6 @@ the flash - 0.0.0.0:3930 /data/kvstore pd0:2379 @@ -27,7 +26,7 @@ - 0.0.0.0:20171 + 0.0.0.0:3930 8123 From 0bb7991764b222042ec50acfd162fa83ea9b1e6e Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 17 Sep 2019 02:43:26 +0800 Subject: [PATCH 57/79] Fix build error --- dbms/src/Debug/dbgTools.cpp | 1 - .../src/Storages/Transaction/MyTimeParser.cpp | 172 ------------------ 2 files changed, 173 deletions(-) delete mode 100644 dbms/src/Storages/Transaction/MyTimeParser.cpp diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index e65cfaf4000..7684cd6ff6e 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/Transaction/MyTimeParser.cpp b/dbms/src/Storages/Transaction/MyTimeParser.cpp deleted file mode 100644 index e183e587ceb..00000000000 --- a/dbms/src/Storages/Transaction/MyTimeParser.cpp +++ /dev/null @@ -1,172 +0,0 @@ -#include - -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -int adjustYear(int year) -{ - if (year >= 0 && year <= 69) - return 2000 + year; - if (year >= 70 && year <= 99) - return 1900 + year; - return year; -} - -void scanTimeArgs(const std::vector & seps, std::initializer_list && list) -{ - int i = 0; - for (auto * ptr : list) - { - *ptr = std::stoi(seps[i]); - i++; - } -} - -int getFracIndex(const String & format) -{ - int idx = -1; - for (int i = int(format.size()) - 1; i >= 0; i--) - { - if (std::ispunct(format[i])) - { - if (format[i] == '.') - { - idx = i; - } - break; - } - } - return idx; -} - -std::vector parseDateFormat(String format) -{ - format = Poco::trimInPlace(format); - - std::vector seps; - size_t start = 0; - for (size_t i = 0; i < format.size(); i++) - { - if (i == 0 || i + 1 == format.size()) - { - if (!std::isdigit(format[i])) - return {}; - continue; - } - - if (!std::isdigit(format[i])) - { - if (!std::isdigit(format[i - 1])) - return {}; - seps.push_back(format.substr(start, i - start)); - start = i + 1; - } - } - seps.push_back(format.substr(start)); - return seps; -} - -std::vector splitDatetime(String format) -{ - int idx = getFracIndex(format); - if (idx > 0) - { - format = format.substr(0, idx); - } - return parseDateFormat(format); -} - -Field parseMyDatetime(const String & str) -{ - Int32 year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; - - const auto & seps = splitDatetime(str); - - switch (seps.size()) - { - // No delimiter - case 1: - { - size_t l = seps[0].size(); - switch (l) - { - case 14: - // YYYYMMDDHHMMSS - { - std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); - break; - } - case 12: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); - year = adjustYear(year); - break; - } - case 11: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); - year = adjustYear(year); - break; - } - case 10: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); - year = adjustYear(year); - break; - } - case 9: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); - year = adjustYear(year); - break; - } - case 8: - { - std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); - break; - } - case 6: - case 5: - { - std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); - year = adjustYear(year); - break; - } - default: - { - throw Exception("Wrong datetime format"); - } - // TODO Process frac! - } - break; - } - case 3: - { - scanTimeArgs(seps, {&year, &month, &day}); - break; - } - case 6: - { - scanTimeArgs(seps, {&year, &month, &day, &hour, &minute, &second}); - break; - } - default: - { - throw Exception("Wrong datetime format"); - } - } - - UInt64 ymd = ((year * 13 + month) << 5) | day; - UInt64 hms = (hour << 12) | (minute << 6) | second; - return Field((ymd << 17 | hms) << 24); -} - -} // namespace DB From f41f853f4835e350be78e07db2485383ad05d8bd Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 17 Sep 2019 03:10:43 +0800 Subject: [PATCH 58/79] Fix test regression --- dbms/src/Debug/dbgTools.cpp | 21 +------------------ dbms/src/Storages/Transaction/Datum.cpp | 11 ++++++++++ .../mutable-test/txn_dag/data_type_time.test | 2 +- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 7684cd6ff6e..2ac2479b225 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -206,22 +206,9 @@ Field convertField(const ColumnInfo & column_info, const Field & field) case TiDB::TypeDouble: return convertNumber(field); case TiDB::TypeDate: - { - auto text = field.get(); - ReadBufferFromMemory buf(text.data(), text.size()); - DayNum_t date; - readDateText(date, buf); - return static_cast(date); - } case TiDB::TypeDatetime: case TiDB::TypeTimestamp: - { - auto text = field.get(); - ReadBufferFromMemory buf(text.data(), text.size()); - time_t dt; - readDateTimeText(dt, buf); - return static_cast(dt); - } + return parseMyDateTime(field.safeGet()); case TiDB::TypeVarchar: case TiDB::TypeTinyBlob: case TiDB::TypeMediumBlob: @@ -262,8 +249,6 @@ void encodeRow(const TiDB::TableInfo & table_info, const std::vector & fi } } -bool isDateTimeType(TiDB::TP tp) { return tp == TiDB::TypeTimestamp || tp == TiDB::TypeDate || tp == TiDB::TypeDatetime; } - void insert(const TiDB::TableInfo & table_info, RegionID region_id, HandleID handle_id, ASTs::const_iterator begin, ASTs::const_iterator end, Context & context, const std::optional> & tso_del) { @@ -273,10 +258,6 @@ void insert(const TiDB::TableInfo & table_info, RegionID region_id, HandleID han while ((it = begin++) != end) { auto field = typeid_cast((*it).get())->value; - if (isDateTimeType(table_info.columns[idx].tp)) - { - field = parseMyDateTime(field.safeGet()); - } fields.emplace_back(field); idx++; } diff --git a/dbms/src/Storages/Transaction/Datum.cpp b/dbms/src/Storages/Transaction/Datum.cpp index 6ed099d8889..b4b3e5a360a 100644 --- a/dbms/src/Storages/Transaction/Datum.cpp +++ b/dbms/src/Storages/Transaction/Datum.cpp @@ -57,6 +57,17 @@ struct DatumOp +struct DatumOp::type> +{ + static void unflatten(const Field & orig, std::optional & copy) { copy = static_cast(orig.get()); } + + static void flatten(const Field & orig, std::optional & copy) { copy = static_cast(orig.get()); } + + static bool overflow(const Field &, const ColumnInfo &) { return false; } +}; + /// Specialized for Enum, using unflatten/flatten to transform UInt to Int back and forth. template struct DatumOp::type> diff --git a/tests/mutable-test/txn_dag/data_type_time.test b/tests/mutable-test/txn_dag/data_type_time.test index ce43ca8c732..de813bbb71a 100644 --- a/tests/mutable-test/txn_dag/data_type_time.test +++ b/tests/mutable-test/txn_dag/data_type_time.test @@ -7,7 +7,7 @@ => DBGInvoke __set_flush_threshold(1000000, 1000000) # Data. -=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Date, col_2 DateTime') +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 MyDate, col_2 MyDateTime') => DBGInvoke __refresh_schemas() => DBGInvoke __put_region(4, 0, 100, default, test) => DBGInvoke __raft_insert_row(default, test, 4, 50, '2019-06-10', '2019-06-10 09:00:00') From 259ec7708baa98297beac365131d114565b57652 Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Tue, 17 Sep 2019 15:40:08 +0800 Subject: [PATCH 59/79] Fix null value bug in datum --- dbms/src/Storages/Transaction/Datum.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/Transaction/Datum.cpp b/dbms/src/Storages/Transaction/Datum.cpp index b4b3e5a360a..e5b15ff8c17 100644 --- a/dbms/src/Storages/Transaction/Datum.cpp +++ b/dbms/src/Storages/Transaction/Datum.cpp @@ -31,9 +31,6 @@ struct DatumOp(field, column_info); @@ -59,7 +56,7 @@ struct DatumOp -struct DatumOp::type> +struct DatumOp::type> { static void unflatten(const Field & orig, std::optional & copy) { copy = static_cast(orig.get()); } @@ -81,6 +78,9 @@ struct DatumOp::type> DatumFlat::DatumFlat(const DB::Field & field, TP tp) : DatumBase(field, tp) { + if (orig.isNull()) + return; + switch (tp) { #ifdef M @@ -99,6 +99,9 @@ bool DatumFlat::invalidNull(const ColumnInfo & column_info) { return column_info bool DatumFlat::overflow(const ColumnInfo & column_info) { + if (orig.isNull()) + return false; + switch (tp) { #ifdef M @@ -116,6 +119,9 @@ bool DatumFlat::overflow(const ColumnInfo & column_info) DatumBumpy::DatumBumpy(const DB::Field & field, TP tp) : DatumBase(field, tp) { + if (orig.isNull()) + return; + switch (tp) { #ifdef M From 708d52f98aafc370c524310e2be78819319d103d Mon Sep 17 00:00:00 2001 From: ruoxi Date: Mon, 23 Sep 2019 12:18:03 +0800 Subject: [PATCH 60/79] FLASH-490: Fix table scan with -1 column ID and no agg (#240) * Fix table scan with -1 column ID and no agg * Add break * Remove useless includes * Use dag context to store void ft instead of dag query source * Fix decimal type reverse get * Change adding smallest column to adding handle column, address comments --- dbms/src/Debug/MockTiDB.cpp | 102 +------------- dbms/src/Debug/dbgFuncCoprocessor.cpp | 63 +++++---- dbms/src/Flash/Coprocessor/DAGContext.h | 5 +- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 8 +- dbms/src/Flash/Coprocessor/DAGQuerySource.cpp | 4 + dbms/src/Flash/Coprocessor/DAGUtils.cpp | 21 +++ dbms/src/Flash/Coprocessor/DAGUtils.h | 4 + dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 44 ++++-- dbms/src/Storages/Transaction/TiDB.cpp | 42 +++--- dbms/src/Storages/Transaction/TiDB.h | 2 + dbms/src/Storages/Transaction/TypeMapping.cpp | 126 ++++++++++++++++++ dbms/src/Storages/Transaction/TypeMapping.h | 7 + tests/mutable-test/txn_dag/table_scan.test | 6 + 13 files changed, 270 insertions(+), 164 deletions(-) diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 523047a5026..56b65d15707 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -1,6 +1,5 @@ #include -#include #include #include #include @@ -10,10 +9,10 @@ #include #include #include -#include #include #include #include +#include namespace DB { @@ -114,99 +113,6 @@ void MockTiDB::dropTable(Context & context, const String & database_name, const version_diff[version] = diff; } -template -bool tryGetDecimalType(const IDataType * nested_type, ColumnInfo & column_info) -{ - using TypeDec = DataTypeDecimal; - if (checkDataType(nested_type)) - { - auto decimal_type = checkAndGetDataType(nested_type); - column_info.flen = decimal_type->getPrec(); - column_info.decimal = decimal_type->getScale(); - column_info.tp = TiDB::TypeNewDecimal; - return true; - } - return false; -} - -ColumnInfo getColumnInfoFromColumn(const NameAndTypePair & column, ColumnID id, const Field & default_value) -{ - ColumnInfo column_info; - column_info.id = id; - column_info.name = column.name; - const IDataType * nested_type = column.type.get(); - if (!column.type->isNullable()) - { - column_info.setNotNullFlag(); - } - else - { - auto nullable_type = checkAndGetDataType(nested_type); - nested_type = nullable_type->getNestedType().get(); - } - if (nested_type->isUnsignedInteger()) - { - column_info.setUnsignedFlag(); - } - else if (tryGetDecimalType(nested_type, column_info)) - { - } - else if (tryGetDecimalType(nested_type, column_info)) - { - } - else if (tryGetDecimalType(nested_type, column_info)) - { - } - else if (tryGetDecimalType(nested_type, column_info)) - { - } - if (checkDataType(nested_type)) - { - auto enum16_type = checkAndGetDataType(nested_type); - column_info.tp = TiDB::TypeEnum; - for (auto & element : enum16_type->getValues()) - { - column_info.elems.emplace_back(element.first, element.second); - } - } - -#ifdef M -#error "Please undefine macro M first." -#endif -#define M(tt, v, cf, ct, w) \ - if (checkDataType(nested_type)) \ - column_info.tp = TiDB::Type##tt; \ - else - COLUMN_TYPES(M) -#undef M - if (checkDataType(nested_type)) - column_info.tp = TiDB::TypeTiny; - else if (checkDataType(nested_type)) - column_info.tp = TiDB::TypeShort; - else if (checkDataType(nested_type)) - column_info.tp = TiDB::TypeLong; - - if (auto type = checkAndGetDataType(nested_type)) - column_info.decimal = type->getFraction(); - // UInt64 is hijacked by the macro expansion, we check it again. - if (checkDataType(nested_type)) - column_info.tp = TiDB::TypeLongLong; - - // Default value, currently we only support int. - if (!default_value.isNull()) - { - // convert any type to string , this is TiDB's style. - - column_info.origin_default_value = applyVisitor(FieldVisitorToString(), default_value); - } - else - { - column_info.setNoDefaultValueFlag(); - } - - return column_info; -} - DatabaseID MockTiDB::newDataBase(const String & database_name) { DatabaseID schema_id = 0; @@ -251,7 +157,7 @@ TableID MockTiDB::newTable(const String & database_name, const String & table_na int i = 1; for (auto & column : columns.getAllPhysical()) { - table_info.columns.emplace_back(getColumnInfoFromColumn(column, i++, Field())); + table_info.columns.emplace_back(reverseGetColumnInfo(column, i++, Field())); } table_info.pk_is_handle = false; @@ -346,7 +252,7 @@ void MockTiDB::addColumnToTable( != columns.end()) throw Exception("Column " + column.name + " already exists in TiDB table " + qualified_name, ErrorCodes::LOGICAL_ERROR); - ColumnInfo column_info = getColumnInfoFromColumn(column, table->allocColumnID(), default_value); + ColumnInfo column_info = reverseGetColumnInfo(column, table->allocColumnID(), default_value); columns.emplace_back(column_info); version++; @@ -393,7 +299,7 @@ void MockTiDB::modifyColumnInTable(const String & database_name, const String & if (it == columns.end()) throw Exception("Column " + column.name + " does not exist in TiDB table " + qualified_name, ErrorCodes::LOGICAL_ERROR); - ColumnInfo column_info = getColumnInfoFromColumn(column, 0, Field()); + ColumnInfo column_info = reverseGetColumnInfo(column, 0, Field()); if (it->hasUnsignedFlag() != column_info.hasUnsignedFlag()) throw Exception("Modify column " + column.name + " UNSIGNED flag is not allowed", ErrorCodes::LOGICAL_ERROR); if (it->tp == column_info.tp && it->hasNotNullFlag() == column_info.hasNotNullFlag()) diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 1e30bf31a9a..3f6a7d7bde2 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -108,6 +109,8 @@ BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args) return outputDAGResponse(context, schema, dag_response); } +const String VOID_COL_NAME = "_void"; + struct ExecutorCtx { tipb::Executor * input; @@ -115,26 +118,6 @@ struct ExecutorCtx std::unordered_map col_ref_map; }; -tipb::FieldType columnInfoToFieldType(const ColumnInfo & ci) -{ - tipb::FieldType ret; - ret.set_tp(ci.tp); - ret.set_flag(ci.flag); - ret.set_flen(ci.flen); - ret.set_decimal(ci.decimal); - return ret; -} - -ColumnInfo fieldTypeToColumnInfo(const tipb::FieldType & field_type) -{ - ColumnInfo ret; - ret.tp = static_cast(field_type.tp()); - ret.flag = field_type.flag(); - ret.flen = field_type.flen(); - ret.decimal = field_type.decimal(); - return ret; -} - void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::unordered_set & referred_columns, std::unordered_map & col_ref_map) { @@ -376,7 +359,7 @@ std::tuple compileQuery( ci->set_flag(info.second.flag); ci->set_columnlen(info.second.flen); ci->set_decimal(info.second.decimal); - if (info.second.elems.size() != 0) + if (!info.second.elems.empty()) { for (auto & pair : info.second.elems) { @@ -500,14 +483,38 @@ std::tuple compileQuery( column_pruner(executor_ctx_map[last_executor]); const auto & last_output = executor_ctx_map[last_executor].output; - for (const auto & field : final_output) + + // For testing VOID column, ignore any other select expressions, unless table contains it. + if (std::find(final_output.begin(), final_output.end(), VOID_COL_NAME) != final_output.end() + && std::find_if( + last_output.begin(), last_output.end(), [&](const auto & last_field) { return last_field.first == VOID_COL_NAME; }) + == last_output.end()) { - auto iter - = std::find_if(last_output.begin(), last_output.end(), [&](const auto & last_field) { return last_field.first == field; }); - if (iter == last_output.end()) - throw Exception("Column not found after pruning: " + field, ErrorCodes::LOGICAL_ERROR); - dag_request.add_output_offsets(iter - last_output.begin()); - schema.push_back(*iter); + dag_request.add_output_offsets(0); + + // Set column ID to -1 to trigger `void` column in DAG processing. + tipb::ColumnInfo * ci = ts->add_columns(); + ci->set_column_id(-1); + + // Set column name to VOID and tp to Nullable(UInt64), + // as chunk decoding doesn't do strict field type check so Nullable(UInt64) should be enough. + ColumnInfo ti_ci; + ti_ci.name = VOID_COL_NAME; + ti_ci.tp = TiDB::TypeLongLong; + ti_ci.setNotNullFlag(); + schema.emplace_back(DAGColumnInfo{VOID_COL_NAME, std::move(ti_ci)}); + } + else + { + for (const auto & field : final_output) + { + auto iter = std::find_if( + last_output.begin(), last_output.end(), [&](const auto & last_field) { return last_field.first == field; }); + if (iter == last_output.end()) + throw Exception("Column not found after pruning: " + field, ErrorCodes::LOGICAL_ERROR); + dag_request.add_output_offsets(iter - last_output.begin()); + schema.push_back(*iter); + } } } diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index 30e492f360f..453e6df219e 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -9,11 +9,12 @@ namespace DB class Context; /// A context used to track the information that needs to be passed around during DAG planning. -class DAGContext +struct DAGContext { -public: DAGContext(size_t profile_list_size) { profile_streams_list.resize(profile_list_size); }; std::vector profile_streams_list; + + tipb::FieldType void_result_ft; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index a35be6c8aa3..301bcdf7cc0 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -32,8 +32,7 @@ DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, log(&Logger::get("DAGDriver")) {} -void DAGDriver::execute() -try +void DAGDriver::execute() try { context.setSetting("read_tso", UInt64(dag_request.start_ts())); @@ -62,12 +61,13 @@ try // Only query is allowed, so streams.in must not be null and streams.out must be null throw Exception("DAG is not query.", ErrorCodes::LOGICAL_ERROR); - BlockOutputStreamPtr outputStreamPtr = std::make_shared(dag_response, + BlockOutputStreamPtr dag_output_stream = std::make_shared(dag_response, context.getSettings().dag_records_per_chunk, dag_request.encode_type(), dag.getResultFieldTypes(), streams.in->getHeader()); - copyData(*streams.in, *outputStreamPtr); + copyData(*streams.in, *dag_output_stream); + // add ExecutorExecutionSummary info for (auto & p_streams : dag_context.profile_streams_list) { diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 2a1fa961f56..4532b6a8180 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -92,6 +92,8 @@ bool fillExecutorOutputFieldTypes(const tipb::Executor & executor, std::vector DAGQuerySource::getResultFieldTypes() const { if (fillExecutorOutputFieldTypes(dag_request.executors(i), executor_output)) { + if (executor_output.empty()) + executor_output.push_back(dag_context.void_result_ft); break; } } diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 90fe7cb1055..dd4497530fb 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -847,4 +847,25 @@ std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::UnHex, "cast"}, {tipb::ScalarFuncSig::Upper, "upper"}, }); + +tipb::FieldType columnInfoToFieldType(const TiDB::ColumnInfo & ci) +{ + tipb::FieldType ret; + ret.set_tp(ci.tp); + ret.set_flag(ci.flag); + ret.set_flen(ci.flen); + ret.set_decimal(ci.decimal); + return ret; +} + +TiDB::ColumnInfo fieldTypeToColumnInfo(const tipb::FieldType & field_type) +{ + TiDB::ColumnInfo ret; + ret.tp = static_cast(field_type.tp()); + ret.flag = field_type.flag(); + ret.flen = field_type.flen(); + ret.decimal = field_type.decimal(); + return ret; +} + } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index 2d05f4b5a29..99768ed060e 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -9,6 +9,7 @@ #include #include +#include #include namespace DB @@ -30,4 +31,7 @@ bool exprHasValidFieldType(const tipb::Expr & expr); extern std::unordered_map agg_func_map; extern std::unordered_map scalar_func_map; +tipb::FieldType columnInfoToFieldType(const TiDB::ColumnInfo & ci); +TiDB::ColumnInfo fieldTypeToColumnInfo(const tipb::FieldType & field_type); + } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index af763aeeab3..35a5600b93f 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include namespace DB @@ -68,28 +70,42 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) for (const tipb::ColumnInfo & ci : ts.columns()) { ColumnID cid = ci.column_id(); + + if (cid == -1) + // Column ID -1 means TiDB expects no specific column, mostly it is for cases like `select count(*)`. + // This means we can return whatever column, we'll choose it later if no other columns are specified either. + continue; + if (cid < 1 || cid > (Int64)storage->getTableInfo().columns.size()) - { - if (cid == -1) - { - // for sql that do not need read any column(e.g. select count(*) from t), the column id will be -1 - continue; - } // cid out of bound throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); - } + String name = storage->getTableInfo().getColumnName(cid); required_columns.push_back(name); - NameAndTypePair nameAndTypePair = storage->getColumns().getPhysical(name); - source_columns.push_back(nameAndTypePair); + auto pair = storage->getColumns().getPhysical(name); + source_columns.emplace_back(std::move(pair)); } if (required_columns.empty()) { - // if no column is selected, use the smallest column - String smallest_column_name = ExpressionActions::getSmallestColumn(storage->getColumns().getAllPhysical()); - required_columns.push_back(smallest_column_name); - auto pair = storage->getColumns().getPhysical(smallest_column_name); - source_columns.push_back(pair); + // No column specified, we choose the handle column as it will be emitted by storage read anyhow. + // Set `void` column field type correspondingly for further needs, i.e. encoding results. + if (auto pk_handle_col = storage->getTableInfo().getPKHandleColumn()) + { + required_columns.push_back(pk_handle_col->get().name); + auto pair = storage->getColumns().getPhysical(pk_handle_col->get().name); + source_columns.push_back(pair); + // For PK handle, use original column info of itself. + dag.getDAGContext().void_result_ft = columnInfoToFieldType(pk_handle_col->get()); + } + else + { + required_columns.push_back(MutableSupport::tidb_pk_column_name); + auto pair = storage->getColumns().getPhysical(MutableSupport::tidb_pk_column_name); + source_columns.push_back(pair); + // For implicit handle, reverse get a column info. + auto column_info = reverseGetColumnInfo(pair, -1, Field()); + dag.getDAGContext().void_result_ft = columnInfoToFieldType(column_info); + } } analyzer = std::make_unique(source_columns, context); diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 6c413000244..6175080a98c 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -117,8 +117,7 @@ Int64 ColumnInfo::getEnumIndex(const String & enum_id_or_text) const return num; } -Poco::JSON::Object::Ptr ColumnInfo::getJSONObject() const -try +Poco::JSON::Object::Ptr ColumnInfo::getJSONObject() const try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); @@ -161,8 +160,7 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (ColumnInfo): " + e.displayText(), DB::Exception(e)); } -void ColumnInfo::deserialize(Poco::JSON::Object::Ptr json) -try +void ColumnInfo::deserialize(Poco::JSON::Object::Ptr json) try { id = json->getValue("id"); name = json->getObject("name")->getValue("L"); @@ -196,8 +194,7 @@ catch (const Poco::Exception & e) PartitionDefinition::PartitionDefinition(Poco::JSON::Object::Ptr json) { deserialize(json); } -Poco::JSON::Object::Ptr PartitionDefinition::getJSONObject() const -try +Poco::JSON::Object::Ptr PartitionDefinition::getJSONObject() const try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); json->set("id", id); @@ -218,8 +215,7 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (PartitionDef): " + e.displayText(), DB::Exception(e)); } -void PartitionDefinition::deserialize(Poco::JSON::Object::Ptr json) -try +void PartitionDefinition::deserialize(Poco::JSON::Object::Ptr json) try { id = json->getValue("id"); name = json->getObject("name")->getValue("L"); @@ -234,8 +230,7 @@ catch (const Poco::Exception & e) PartitionInfo::PartitionInfo(Poco::JSON::Object::Ptr json) { deserialize(json); } -Poco::JSON::Object::Ptr PartitionInfo::getJSONObject() const -try +Poco::JSON::Object::Ptr PartitionInfo::getJSONObject() const try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); @@ -264,8 +259,7 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (PartitionInfo): " + e.displayText(), DB::Exception(e)); } -void PartitionInfo::deserialize(Poco::JSON::Object::Ptr json) -try +void PartitionInfo::deserialize(Poco::JSON::Object::Ptr json) try { type = static_cast(json->getValue("type")); expr = json->getValue("expr"); @@ -289,8 +283,7 @@ catch (const Poco::Exception & e) TableInfo::TableInfo(const String & table_info_json) { deserialize(table_info_json); } -String TableInfo::serialize(bool escaped) const -try +String TableInfo::serialize(bool escaped) const try { std::stringstream buf; @@ -348,8 +341,7 @@ catch (const Poco::Exception & e) std::string(__PRETTY_FUNCTION__) + ": Serialize TiDB schema JSON failed (TableInfo): " + e.displayText(), DB::Exception(e)); } -void DBInfo::deserialize(const String & json_str) -try +void DBInfo::deserialize(const String & json_str) try { Poco::JSON::Parser parser; Poco::Dynamic::Var result = parser.parse(json_str); @@ -367,8 +359,7 @@ catch (const Poco::Exception & e) DB::Exception(e)); } -void TableInfo::deserialize(const String & json_str) -try +void TableInfo::deserialize(const String & json_str) try { if (json_str.empty()) { @@ -483,6 +474,21 @@ String TableInfo::getColumnName(const ColumnID id) const DB::ErrorCodes::LOGICAL_ERROR); } +std::optional> TableInfo::getPKHandleColumn() const +{ + if (!pk_is_handle) + return std::nullopt; + + for (auto & col : columns) + { + if (col.hasPriKeyFlag()) + return std::optional>(col); + } + + throw DB::Exception( + std::string(__PRETTY_FUNCTION__) + ": Cannot get handle column for table " + db_name + "." + name, DB::ErrorCodes::LOGICAL_ERROR); +} + TableInfo TableInfo::producePartitionTableInfo(TableID table_or_partition_id) const { // Some sanity checks for partition table. diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index 7882ead5cab..7b98ee286e5 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -277,6 +277,8 @@ struct TableInfo ColumnID getColumnID(const String & name) const; String getColumnName(const ColumnID id) const; + std::optional> getPKHandleColumn() const; + TableInfo producePartitionTableInfo(TableID table_or_partition_id) const; bool isLogicalPartitionTable() const { return is_partition_table && belonging_table_id == -1 && partition.enable; } diff --git a/dbms/src/Storages/Transaction/TypeMapping.cpp b/dbms/src/Storages/Transaction/TypeMapping.cpp index 830e7d00921..343bc30d4c9 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.cpp +++ b/dbms/src/Storages/Transaction/TypeMapping.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -8,6 +10,7 @@ #include #include #include +#include #include #include @@ -203,4 +206,127 @@ TiDB::CodecFlag getCodecFlagByFieldType(const tipb::FieldType & field_type) return ci.getCodecFlag(); } +template +void setDecimalPrecScale(const T * decimal_type, ColumnInfo & column_info) +{ + column_info.flen = decimal_type->getPrec(); + column_info.decimal = decimal_type->getScale(); +} + +ColumnInfo reverseGetColumnInfo(const NameAndTypePair & column, ColumnID id, const Field & default_value) +{ + ColumnInfo column_info; + column_info.id = id; + column_info.name = column.name; + const IDataType * nested_type = column.type.get(); + + // Fill not null. + if (!column.type->isNullable()) + { + column_info.setNotNullFlag(); + } + else + { + auto nullable_type = checkAndGetDataType(nested_type); + nested_type = nullable_type->getNestedType().get(); + } + + // Fill tp. + switch (nested_type->getTypeId()) + { + case TypeIndex::Nothing: + column_info.tp = TiDB::TypeNull; + break; + case TypeIndex::UInt8: + case TypeIndex::Int8: + column_info.tp = TiDB::TypeTiny; + break; + case TypeIndex::UInt16: + case TypeIndex::Int16: + column_info.tp = TiDB::TypeShort; + break; + case TypeIndex::UInt32: + case TypeIndex::Int32: + column_info.tp = TiDB::TypeLong; + break; + case TypeIndex::UInt64: + case TypeIndex::Int64: + column_info.tp = TiDB::TypeLongLong; + break; + case TypeIndex::Float32: + column_info.tp = TiDB::TypeFloat; + break; + case TypeIndex::Float64: + column_info.tp = TiDB::TypeDouble; + break; + case TypeIndex::Date: + case TypeIndex::MyDate: + column_info.tp = TiDB::TypeDate; + break; + case TypeIndex::DateTime: + case TypeIndex::MyDateTime: + column_info.tp = TiDB::TypeDatetime; + break; + case TypeIndex::MyTimeStamp: + column_info.tp = TiDB::TypeTimestamp; + break; + case TypeIndex::MyTime: + column_info.tp = TiDB::TypeTime; + break; + case TypeIndex::String: + case TypeIndex::FixedString: + column_info.tp = TiDB::TypeString; + break; + case TypeIndex::Decimal32: + case TypeIndex::Decimal64: + case TypeIndex::Decimal128: + case TypeIndex::Decimal256: + column_info.tp = TiDB::TypeNewDecimal; + break; + case TypeIndex::Enum8: + case TypeIndex::Enum16: + column_info.tp = TiDB::TypeEnum; + break; + default: + throw DB::Exception("Unable reverse map TiFlash type " + nested_type->getName() + " to TiDB type", ErrorCodes::LOGICAL_ERROR); + } + + // Fill unsigned flag. + if (nested_type->isUnsignedInteger()) + column_info.setUnsignedFlag(); + + // Fill flen and decimal for decimal. + if (auto decimal_type32 = checkAndGetDataType>(nested_type)) + setDecimalPrecScale(decimal_type32, column_info); + else if (auto decimal_type64 = checkAndGetDataType>(nested_type)) + setDecimalPrecScale(decimal_type64, column_info); + else if (auto decimal_type128 = checkAndGetDataType>(nested_type)) + setDecimalPrecScale(decimal_type128, column_info); + else if (auto decimal_type256 = checkAndGetDataType>(nested_type)) + setDecimalPrecScale(decimal_type256, column_info); + + // Fill decimal for date time. + if (auto type = checkAndGetDataType(nested_type)) + column_info.decimal = type->getFraction(); + + // Fill elems for enum. + if (checkDataType(nested_type)) + { + auto enum16_type = checkAndGetDataType(nested_type); + for (auto & element : enum16_type->getValues()) + { + column_info.elems.emplace_back(element.first, element.second); + } + } + + // Fill default value, currently we only support int. + if (!default_value.isNull()) + // convert any type to string , this is TiDB's style. + column_info.origin_default_value = applyVisitor(FieldVisitorToString(), default_value); + else + column_info.setNoDefaultValueFlag(); + + return column_info; +} + } // namespace DB diff --git a/dbms/src/Storages/Transaction/TypeMapping.h b/dbms/src/Storages/Transaction/TypeMapping.h index 2c44b38b0e7..e0ee9a0198b 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.h +++ b/dbms/src/Storages/Transaction/TypeMapping.h @@ -17,4 +17,11 @@ DataTypePtr getDataTypeByFieldType(const tipb::FieldType & field_type); TiDB::CodecFlag getCodecFlagByFieldType(const tipb::FieldType & field_type); +// Try best to reverse get TiDB's column info from TiFlash info. +// Used for cases that has absolute need to create a TiDB structure from insufficient knowledge, +// such as mock TiDB table using TiFlash SQL parser, and getting field type for `void` column in DAG. +// Note that not every TiFlash type has a corresponding TiDB type, +// caller should make sure the source type is valid, otherwise exception will be thrown. +ColumnInfo reverseGetColumnInfo(const NameAndTypePair & column, ColumnID id, const Field & default_value); + } // namespace DB diff --git a/tests/mutable-test/txn_dag/table_scan.test b/tests/mutable-test/txn_dag/table_scan.test index 953af0cef9d..ab824845d71 100644 --- a/tests/mutable-test/txn_dag/table_scan.test +++ b/tests/mutable-test/txn_dag/table_scan.test @@ -30,6 +30,12 @@ │ test1 │ └───────┘ +# TiDB may push down table scan with -1 column, use keyword _void testing this case. +=> DBGInvoke dag('select _void from default.test') " --dag_planner="optree +┌─_void─┐ +│ 50 │ +└───────┘ + # Clean up. => DBGInvoke __drop_tidb_table(default, test) => drop table if exists default.test From a4c10742effa0b232cff67c0ff73d45ad630ffed Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 24 Sep 2019 17:03:38 +0800 Subject: [PATCH 61/79] throw error if the cop request is not based on full region scan (#247) * throw error if the cop request is not based on full region scan * format code --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 8 ++- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 9 ++- dbms/src/Flash/Coprocessor/DAGDriver.h | 5 +- dbms/src/Flash/Coprocessor/DAGQuerySource.cpp | 4 +- dbms/src/Flash/Coprocessor/DAGQuerySource.h | 5 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 59 +++++++++++++++++++ dbms/src/Flash/CoprocessorHandler.cpp | 15 ++++- dbms/src/Storages/StorageMergeTree.h | 3 + 8 files changed, 97 insertions(+), 11 deletions(-) diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 3f6a7d7bde2..29dca0ae7aa 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -54,7 +54,8 @@ BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) region_id = safeGet(typeid_cast(*args[1]).value); Timestamp start_ts = context.getTMTContext().getPDClient()->getTS(); - auto [table_id, schema, dag_request] = compileQuery(context, query, + auto [table_id, schema, dag_request] = compileQuery( + context, query, [&](const String & database_name, const String & table_name) { auto storage = context.getTable(database_name, table_name); auto mmt = std::dynamic_pointer_cast(storage); @@ -96,7 +97,8 @@ BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args) if (start_ts == 0) start_ts = context.getTMTContext().getPDClient()->getTS(); - auto [table_id, schema, dag_request] = compileQuery(context, query, + auto [table_id, schema, dag_request] = compileQuery( + context, query, [&](const String & database_name, const String & table_name) { return MockTiDB::instance().getTableByName(database_name, table_name)->table_info; }, @@ -528,7 +530,7 @@ tipb::SelectResponse executeDAGRequest( LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling DAG request: " << dag_request.DebugString()); context.setSetting("dag_planner", "optree"); tipb::SelectResponse dag_response; - DAGDriver driver(context, dag_request, region_id, region_version, region_conf_version, dag_response, true); + DAGDriver driver(context, dag_request, region_id, region_version, region_conf_version, {}, dag_response, true); driver.execute(); LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle DAG request done"); return dag_response; diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 301bcdf7cc0..15baa4a698b 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -21,23 +21,26 @@ extern const int UNKNOWN_EXCEPTION; } // namespace ErrorCodes DAGDriver::DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, - UInt64 region_conf_version_, tipb::SelectResponse & dag_response_, bool internal_) + UInt64 region_conf_version_, std::vector> && key_ranges_, + tipb::SelectResponse & dag_response_, bool internal_) : context(context_), dag_request(dag_request_), region_id(region_id_), region_version(region_version_), region_conf_version(region_conf_version_), + key_ranges(std::move(key_ranges_)), dag_response(dag_response_), internal(internal_), log(&Logger::get("DAGDriver")) {} -void DAGDriver::execute() try +void DAGDriver::execute() +try { context.setSetting("read_tso", UInt64(dag_request.start_ts())); DAGContext dag_context(dag_request.executors_size()); - DAGQuerySource dag(context, dag_context, region_id, region_version, region_conf_version, dag_request); + DAGQuerySource dag(context, dag_context, region_id, region_version, region_conf_version, key_ranges, dag_request); BlockIO streams; String planner = context.getSettings().dag_planner; diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.h b/dbms/src/Flash/Coprocessor/DAGDriver.h index a74341db550..38be627dc18 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.h +++ b/dbms/src/Flash/Coprocessor/DAGDriver.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -15,7 +16,8 @@ class DAGDriver { public: DAGDriver(Context & context_, const tipb::DAGRequest & dag_request_, RegionID region_id_, UInt64 region_version_, - UInt64 region_conf_version_, tipb::SelectResponse & dag_response_, bool internal_ = false); + UInt64 region_conf_version_, std::vector> && key_ranges_, + tipb::SelectResponse & dag_response_, bool internal_ = false); void execute(); @@ -27,6 +29,7 @@ class DAGDriver RegionID region_id; UInt64 region_version; UInt64 region_conf_version; + std::vector> key_ranges; tipb::SelectResponse & dag_response; diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 4532b6a8180..2cf291d08dd 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -29,12 +29,14 @@ static void assignOrThrowException(Int32 & index, Int32 value, const String & na } DAGQuerySource::DAGQuerySource(Context & context_, DAGContext & dag_context_, RegionID region_id_, UInt64 region_version_, - UInt64 region_conf_version_, const tipb::DAGRequest & dag_request_) + UInt64 region_conf_version_, const std::vector> & key_ranges_, + const tipb::DAGRequest & dag_request_) : context(context_), dag_context(dag_context_), region_id(region_id_), region_version(region_version_), region_conf_version(region_conf_version_), + key_ranges(key_ranges_), dag_request(dag_request_) { for (int i = 0; i < dag_request.executors_size(); i++) diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.h b/dbms/src/Flash/Coprocessor/DAGQuerySource.h index c0600620ba1..b7f4791ad56 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.h +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.h @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace DB @@ -28,7 +29,7 @@ class DAGQuerySource : public IQuerySource static const String LIMIT_NAME; DAGQuerySource(Context & context_, DAGContext & dag_context_, RegionID region_id_, UInt64 region_version_, UInt64 region_conf_version_, - const tipb::DAGRequest & dag_request_); + const std::vector> & key_ranges_, const tipb::DAGRequest & dag_request_); std::tuple parse(size_t max_query_size) override; String str(size_t max_query_size) override; @@ -39,6 +40,7 @@ class DAGQuerySource : public IQuerySource RegionID getRegionID() const { return region_id; } UInt64 getRegionVersion() const { return region_version; } UInt64 getRegionConfVersion() const { return region_conf_version; } + const std::vector> & getKeyRanges() const { return key_ranges; } bool hasSelection() const { return sel_index != -1; }; bool hasAggregation() const { return agg_index != -1; }; @@ -98,6 +100,7 @@ class DAGQuerySource : public IQuerySource const RegionID region_id; const UInt64 region_version; const UInt64 region_conf_version; + const std::vector> & key_ranges; const tipb::DAGRequest & dag_request; diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 35a5600b93f..05fcc78ee48 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -18,11 +19,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -42,6 +45,59 @@ InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) : context(context_), dag(dag_), log(&Logger::get("InterpreterDAG")) {} +template +bool isAllValueCoveredByRanges(std::vector> & ranges) +{ + if (ranges.empty()) + return false; + std::sort(ranges.begin(), ranges.end(), + [](const HandleRange & a, const HandleRange & b) { return a.first < b.first; }); + + HandleRange merged_range; + merged_range.first = ranges[0].first; + merged_range.second = ranges[0].second; + + for (size_t i = 1; i < ranges.size(); i++) + { + if (merged_range.second >= ranges[i].first) + merged_range.second = merged_range.second >= ranges[i].second ? merged_range.second : ranges[i].second; + else + break; + } + + return merged_range.first == TiKVHandle::Handle::normal_min && merged_range.second == TiKVHandle::Handle::max; +} + +bool checkKeyRanges(const std::vector> & key_ranges, TableID table_id, bool pk_is_uint64) +{ + if (key_ranges.empty()) + return true; + + std::vector> scan_ranges; + for (auto & range : key_ranges) + { + TiKVRange::Handle start = TiKVRange::getRangeHandle(range.first, table_id); + TiKVRange::Handle end = TiKVRange::getRangeHandle(range.second, table_id); + scan_ranges.emplace_back(std::make_pair(start, end)); + } + + if (pk_is_uint64) + { + std::vector> update_ranges; + for (auto & range : scan_ranges) + { + const auto [n, new_range] = CHTableHandle::splitForUInt64TableHandle(range); + + for (int i = 0; i < n; i++) + { + update_ranges.emplace_back(new_range[i]); + } + } + return isAllValueCoveredByRanges(update_ranges); + } + else + return isAllValueCoveredByRanges(scan_ranges); +} // the flow is the same as executeFetchcolumns void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) { @@ -144,6 +200,9 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) max_streams *= settings.max_streams_to_max_threads_ratio; } + if (!checkKeyRanges(dag.getKeyRanges(), table_id, storage->pkIsUInt64())) + throw Exception("Cop request only support full range scan for given region", ErrorCodes::COP_BAD_DAG_REQUEST); + //todo support index in SelectQueryInfo query_info; query_info.query = dag.getAST(); diff --git a/dbms/src/Flash/CoprocessorHandler.cpp b/dbms/src/Flash/CoprocessorHandler.cpp index 2d72823f3af..ccb354e5b5c 100644 --- a/dbms/src/Flash/CoprocessorHandler.cpp +++ b/dbms/src/Flash/CoprocessorHandler.cpp @@ -22,18 +22,29 @@ CoprocessorHandler::CoprocessorHandler( : cop_context(cop_context_), cop_request(cop_request_), cop_response(cop_response_), log(&Logger::get("CoprocessorHandler")) {} -grpc::Status CoprocessorHandler::execute() try +grpc::Status CoprocessorHandler::execute() +try { switch (cop_request->tp()) { case COP_REQ_TYPE_DAG: { + std::vector> key_ranges; + for (auto & range : cop_request->ranges()) + { + std::string start_key(range.start()); + DecodedTiKVKey start(std::move(start_key)); + std::string end_key(range.end()); + DecodedTiKVKey end(std::move(end_key)); + key_ranges.emplace_back(std::make_pair(std::move(start), std::move(end))); + } tipb::DAGRequest dag_request; dag_request.ParseFromString(cop_request->data()); LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling DAG request: " << dag_request.DebugString()); tipb::SelectResponse dag_response; DAGDriver driver(cop_context.db_context, dag_request, cop_context.kv_context.region_id(), - cop_context.kv_context.region_epoch().version(), cop_context.kv_context.region_epoch().conf_ver(), dag_response); + cop_context.kv_context.region_epoch().version(), cop_context.kv_context.region_epoch().conf_ver(), std::move(key_ranges), + dag_response); driver.execute(); LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle DAG request done"); cop_response->set_data(dag_response.SerializeAsString()); diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 6cdfe9ea03d..484a202554d 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -93,6 +94,8 @@ class StorageMergeTree : public ext::shared_ptr_helper, public String getDataPath() const override { return full_path; } + bool pkIsUInt64() const { return getTMTPKType(*data.primary_key_data_types[0]) == TMTPKType::UINT64; } + private: String path; String database_name; From 3a439425d374258bdc4c40f561ca2a3a385550e3 Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 27 Sep 2019 21:23:34 +0800 Subject: [PATCH 62/79] FLASH-437 Support time zone in coprocessor (#259) * do not allow timestamp literal in DAG request * refine code * fix cop date type encode error * support tz info in DAG request * address comments --- dbms/src/Common/MyTime.cpp | 11 ++ dbms/src/Common/MyTime.h | 2 + dbms/src/Debug/dbgFuncCoprocessor.cpp | 57 ++++++-- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 2 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 131 ++++++++++++++++-- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 22 ++- .../Flash/Coprocessor/DAGStringConverter.cpp | 6 +- .../Flash/Coprocessor/DAGStringConverter.h | 6 +- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 14 +- dbms/src/Flash/Coprocessor/DAGUtils.h | 4 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 25 +++- dbms/src/Flash/Coprocessor/InterpreterDAG.h | 2 +- dbms/src/Flash/CoprocessorHandler.cpp | 6 +- dbms/src/Functions/FunctionsDateTime.cpp | 1 + dbms/src/Functions/FunctionsDateTime.h | 65 +++++++++ dbms/src/Storages/Transaction/Datum.cpp | 11 -- dbms/src/Storages/Transaction/TiDB.h | 6 +- tests/mutable-test/txn_dag/time_zone.test | 64 +++++++++ 18 files changed, 369 insertions(+), 66 deletions(-) create mode 100644 tests/mutable-test/txn_dag/time_zone.test diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index bef7a8d608a..e8b12450e3b 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -473,4 +473,15 @@ void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & tim to_time = to_my_time.toPackedUInt(); } +void convertTimeZoneByOffset(UInt64 from_time, UInt64 & to_time, Int64 offset, const DateLUTImpl & time_zone) +{ + MyDateTime from_my_time(from_time); + time_t epoch = time_zone.makeDateTime( + from_my_time.year, from_my_time.month, from_my_time.day, from_my_time.hour, from_my_time.minute, from_my_time.second); + epoch += offset; + MyDateTime to_my_time(time_zone.toYear(epoch), time_zone.toMonth(epoch), time_zone.toDayOfMonth(epoch), + time_zone.toHour(epoch), time_zone.toMinute(epoch), time_zone.toSecond(epoch), from_my_time.micro_second); + to_time = to_my_time.toPackedUInt(); +} + } // namespace DB diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index bcca1324f8a..f8e12f3a825 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -65,4 +65,6 @@ Field parseMyDateTime(const String & str); void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & time_zone_from, const DateLUTImpl & time_zone_to); +void convertTimeZoneByOffset(UInt64 from_time, UInt64 & to_time, Int64 offset, const DateLUTImpl & time_zone); + } // namespace DB diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 29dca0ae7aa..8b84a373963 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -38,20 +38,27 @@ using DAGColumnInfo = std::pair; using DAGSchema = std::vector; using SchemaFetcher = std::function; std::tuple compileQuery( - Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts); + Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts, + Int64 tz_offset, const String & tz_name); tipb::SelectResponse executeDAGRequest( Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version); BlockInputStreamPtr outputDAGResponse(Context & context, const DAGSchema & schema, const tipb::SelectResponse & dag_response); BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) { - if (args.size() < 1 || args.size() > 2) - throw Exception("Args not matched, should be: query[, region-id]", ErrorCodes::BAD_ARGUMENTS); + if (args.size() < 1 || args.size() > 4) + throw Exception("Args not matched, should be: query[, region-id, tz_offset, tz_name]", ErrorCodes::BAD_ARGUMENTS); String query = safeGet(typeid_cast(*args[0]).value); RegionID region_id = InvalidRegionID; - if (args.size() == 2) + if (args.size() >= 2) region_id = safeGet(typeid_cast(*args[1]).value); + Int64 tz_offset = 0; + String tz_name = ""; + if (args.size() >= 3) + tz_offset = get(typeid_cast(*args[2]).value); + if (args.size() >= 4) + tz_name = safeGet(typeid_cast(*args[3]).value); Timestamp start_ts = context.getTMTContext().getPDClient()->getTS(); auto [table_id, schema, dag_request] = compileQuery( @@ -63,7 +70,7 @@ BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) throw Exception("Not TMT", ErrorCodes::BAD_ARGUMENTS); return mmt->getTableInfo(); }, - start_ts); + start_ts, tz_offset, tz_name); RegionPtr region; if (region_id == InvalidRegionID) @@ -86,23 +93,29 @@ BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args) { - if (args.size() < 2 || args.size() > 3) - throw Exception("Args not matched, should be: query, region-id[, start-ts]", ErrorCodes::BAD_ARGUMENTS); + if (args.size() < 2 || args.size() > 5) + throw Exception("Args not matched, should be: query, region-id[, start-ts, tz_offset, tz_name]", ErrorCodes::BAD_ARGUMENTS); String query = safeGet(typeid_cast(*args[0]).value); RegionID region_id = safeGet(typeid_cast(*args[1]).value); Timestamp start_ts = DEFAULT_MAX_READ_TSO; - if (args.size() == 3) + if (args.size() >= 3) start_ts = safeGet(typeid_cast(*args[2]).value); if (start_ts == 0) start_ts = context.getTMTContext().getPDClient()->getTS(); + Int64 tz_offset = 0; + String tz_name = ""; + if (args.size() >= 3) + tz_offset = safeGet(typeid_cast(*args[2]).value); + if (args.size() >= 4) + tz_name = safeGet(typeid_cast(*args[3]).value); auto [table_id, schema, dag_request] = compileQuery( context, query, [&](const String & database_name, const String & table_name) { return MockTiDB::instance().getTableByName(database_name, table_name)->table_info; }, - start_ts); + start_ts, tz_offset, tz_name); std::ignore = table_id; RegionPtr region = context.getTMTContext().getKVStore()->getRegion(region_id); @@ -170,6 +183,14 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un ft->set_tp(TiDB::TypeTiny); ft->set_flag(TiDB::ColumnFlagUnsigned); } + else if (func_name_lowercase == "greater") + { + expr->set_sig(tipb::ScalarFuncSig::GTInt); + auto * ft = expr->mutable_field_type(); + // TODO: TiDB will infer Int64. + ft->set_tp(TiDB::TypeTiny); + ft->set_flag(TiDB::ColumnFlagUnsigned); + } else { throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); @@ -239,10 +260,13 @@ void compileFilter(const DAGSchema & input, ASTPtr ast, tipb::Selection * filter } std::tuple compileQuery( - Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts) + Context & context, const String & query, SchemaFetcher schema_fetcher, + Timestamp start_ts, Int64 tz_offset, const String & tz_name) { DAGSchema schema; tipb::DAGRequest dag_request; + dag_request.set_time_zone_name(tz_name); + dag_request.set_time_zone_offset(tz_offset); dag_request.set_start_ts(start_ts); @@ -291,8 +315,11 @@ std::tuple compileQuery( ci.tp = column_info.tp; ci.flag = column_info.flag; ci.flen = column_info.flen; - ci.decimal = column_info.flen; + ci.decimal = column_info.decimal; ci.elems = column_info.elems; + // a hack to test timestamp type in mock test + if (column_info.tp == TiDB::TypeDatetime && ci.decimal == 5) + ci.tp = TiDB::TypeTimestamp; ts_output.emplace_back(std::make_pair(column_info.name, std::move(ci))); } executor_ctx_map.emplace(ts_exec, ExecutorCtx{nullptr, std::move(ts_output), std::unordered_map{}}); @@ -430,6 +457,14 @@ std::tuple compileQuery( ft->set_tp(TiDB::TypeLongLong); ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); } + else if (func->name == "max") + { + agg_func->set_tp(tipb::Max); + if (agg_func->children_size() != 1) + throw Exception("udaf max only accept 1 argument"); + auto ft = agg_func->mutable_field_type(); + ft->set_tp(agg_func->children(0).field_type().tp()); + } // TODO: Other agg func. else { diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 15baa4a698b..fdc40506743 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -102,7 +102,7 @@ catch (const LockException & e) } catch (const Exception & e) { - LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.displayText()); + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.getStackTrace().toString()); recordError(e.code(), e.message()); } catch (const std::exception & e) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 155ce2b5ac0..fe0eb24c081 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -44,7 +45,7 @@ static String genFuncString(const String & func_name, const Names & argument_nam return ss.str(); } -DAGExpressionAnalyzer::DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_) +DAGExpressionAnalyzer::DAGExpressionAnalyzer(const std::vector && source_columns_, const Context & context_) : source_columns(source_columns_), context(context_), after_agg(false), @@ -177,28 +178,129 @@ void DAGExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, const } } -const NamesAndTypesList & DAGExpressionAnalyzer::getCurrentInputColumns() { return after_agg ? aggregated_columns : source_columns; } +const std::vector & DAGExpressionAnalyzer::getCurrentInputColumns() +{ + return after_agg ? aggregated_columns : source_columns; +} void DAGExpressionAnalyzer::appendFinalProject(ExpressionActionsChain & chain, const NamesWithAliases & final_project) { initChain(chain, getCurrentInputColumns()); - for (auto name : final_project) + for (const auto & name : final_project) { chain.steps.back().required_output.push_back(name.first); } } -void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & aggregation) +void constructTZExpr(tipb::Expr & tz_expr, const tipb::DAGRequest & rqst, bool from_utc) +{ + if (rqst.has_time_zone_name() && rqst.time_zone_name().length() > 0) + { + tz_expr.set_tp(tipb::ExprType::String); + tz_expr.set_val(rqst.time_zone_name()); + auto * field_type = tz_expr.mutable_field_type(); + field_type->set_tp(TiDB::TypeString); + field_type->set_flag(TiDB::ColumnFlagNotNull); + } + else + { + tz_expr.set_tp(tipb::ExprType::Int64); + std::stringstream ss; + encodeDAGInt64(from_utc ? rqst.time_zone_offset() : -rqst.time_zone_offset(), ss); + tz_expr.set_val(ss.str()); + auto * field_type = tz_expr.mutable_field_type(); + field_type->set_tp(TiDB::TypeLongLong); + field_type->set_flag(TiDB::ColumnFlagNotNull); + } +} + +bool hasMeaningfulTZInfo(const tipb::DAGRequest &rqst) +{ + if (rqst.has_time_zone_name() && rqst.time_zone_name().length() > 0) + return rqst.time_zone_name() != "UTC"; + if (rqst.has_time_zone_offset()) + return rqst.has_time_zone_offset() != 0; + return false; +} + +String DAGExpressionAnalyzer::appendTimeZoneCast( + const String & tz_col, const String & ts_col, const String & func_name, ExpressionActionsPtr & actions) +{ + Names cast_argument_names; + cast_argument_names.push_back(ts_col); + cast_argument_names.push_back(tz_col); + String cast_expr_name = applyFunction(func_name, cast_argument_names, actions); + return cast_expr_name; +} + +// add timezone cast after table scan, this is used for session level timezone support +// the basic idea of supporting session level timezone is that: +// 1. for every timestamp column used in the dag request, after reading it from table scan, we add +// cast function to convert its timezone to the timezone specified in DAG request +// 2. for every timestamp column that will be returned to TiDB, we add cast function to convert its +// timezone to UTC +// for timestamp columns without any transformation or calculation(e.g. select ts_col from table), +// this will introduce two useless casts, in order to avoid these redundant cast, when cast the ts +// column to the columns with session-level timezone info, the original ts columns with UTC +// timezone are still kept +// for DAG request that does not contain agg, the final project will select the ts column with UTC +// timezone, which is exactly what TiDB want +// for DAG request that contains agg, any ts column after agg has session-level timezone info(since the ts +// column with UTC timezone will never be used in during agg), all the column with ts datatype will +// convert back to UTC timezone +bool DAGExpressionAnalyzer::appendTimeZoneCastsAfterTS( + ExpressionActionsChain &chain, std::vector is_ts_column, const tipb::DAGRequest &rqst) +{ + if (!hasMeaningfulTZInfo(rqst)) + return false; + + bool ret = false; + initChain(chain, getCurrentInputColumns()); + ExpressionActionsPtr actions = chain.getLastActions(); + tipb::Expr tz_expr; + constructTZExpr(tz_expr, rqst, true); + String tz_col; + String func_name + = rqst.has_time_zone_name() && rqst.time_zone_name().length() > 0 ? "ConvertTimeZoneFromUTC" : "ConvertTimeZoneByOffset"; + for (size_t i = 0; i < is_ts_column.size(); i++) + { + if (is_ts_column[i]) + { + if (tz_col.length() == 0) + tz_col = getActions(tz_expr, actions); + String casted_name = appendTimeZoneCast(tz_col, source_columns[i].name, func_name, actions); + source_columns.emplace_back(source_columns[i].name, source_columns[i].type); + source_columns[i].name = casted_name; + ret = true; + } + } + return ret; +} + +void DAGExpressionAnalyzer::appendAggSelect( + ExpressionActionsChain & chain, const tipb::Aggregation & aggregation, const tipb::DAGRequest & rqst) { initChain(chain, getCurrentInputColumns()); bool need_update_aggregated_columns = false; NamesAndTypesList updated_aggregated_columns; ExpressionActionsChain::Step step = chain.steps.back(); - auto agg_col_names = aggregated_columns.getNames(); + bool need_append_timezone_cast = hasMeaningfulTZInfo(rqst); + tipb::Expr tz_expr; + if (need_append_timezone_cast) + constructTZExpr(tz_expr, rqst, false); + String tz_col; + String tz_cast_func_name + = rqst.has_time_zone_name() && rqst.time_zone_name().length() > 0 ? "ConvertTimeZoneToUTC" : "ConvertTimeZoneByOffset"; for (Int32 i = 0; i < aggregation.agg_func_size(); i++) { - String & name = agg_col_names[i]; + String & name = aggregated_columns[i].name; String updated_name = appendCastIfNeeded(aggregation.agg_func(i), step.actions, name); + if (need_append_timezone_cast && aggregation.agg_func(i).field_type().tp() == TiDB::TypeTimestamp) + { + if (tz_col.length() == 0) + tz_col = getActions(tz_expr, step.actions); + updated_name = appendTimeZoneCast(tz_col, updated_name, tz_cast_func_name, step.actions); + } if (name != updated_name) { need_update_aggregated_columns = true; @@ -208,14 +310,20 @@ void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, cons } else { - updated_aggregated_columns.emplace_back(name, aggregated_columns.getTypes()[i]); + updated_aggregated_columns.emplace_back(name, aggregated_columns[i].type); step.required_output.push_back(name); } } for (Int32 i = 0; i < aggregation.group_by_size(); i++) { - String & name = agg_col_names[i + aggregation.agg_func_size()]; + String & name = aggregated_columns[i + aggregation.agg_func_size()].name; String updated_name = appendCastIfNeeded(aggregation.group_by(i), step.actions, name); + if (need_append_timezone_cast && aggregation.group_by(i).field_type().tp() == TiDB::TypeTimestamp) + { + if (tz_col.length() == 0) + tz_col = getActions(tz_expr, step.actions); + updated_name = appendTimeZoneCast(tz_col, updated_name, tz_cast_func_name, step.actions); + } if (name != updated_name) { need_update_aggregated_columns = true; @@ -225,7 +333,7 @@ void DAGExpressionAnalyzer::appendAggSelect(ExpressionActionsChain & chain, cons } else { - updated_aggregated_columns.emplace_back(name, aggregated_columns.getTypes()[i]); + updated_aggregated_columns.emplace_back(name, aggregated_columns[i].type); step.required_output.push_back(name); } } @@ -263,11 +371,10 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres // first construct the second argument tipb::Expr type_expr; type_expr.set_tp(tipb::ExprType::String); - std::stringstream ss; type_expr.set_val(expected_type->getName()); auto * type_field_type = type_expr.mutable_field_type(); - type_field_type->set_tp(0xfe); - type_field_type->set_flag(1); + type_field_type->set_tp(TiDB::TypeString); + type_field_type->set_flag(TiDB::ColumnFlagNotNull); getActions(type_expr, actions); Names cast_argument_names; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 24a4e775426..1486783d467 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -25,9 +25,9 @@ class DAGExpressionAnalyzer : private boost::noncopyable private: using ExpressionActionsPtr = std::shared_ptr; // all columns from table scan - NamesAndTypesList source_columns; + std::vector source_columns; // all columns after aggregation - NamesAndTypesList aggregated_columns; + std::vector aggregated_columns; DAGPreparedSets prepared_sets; Settings settings; const Context & context; @@ -36,27 +36,35 @@ class DAGExpressionAnalyzer : private boost::noncopyable Poco::Logger * log; public: - DAGExpressionAnalyzer(const NamesAndTypesList & source_columns_, const Context & context_); + DAGExpressionAnalyzer(const std::vector && source_columns_, const Context & context_); void appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name); void appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names); void appendAggregation(ExpressionActionsChain & chain, const tipb::Aggregation & agg, Names & aggregate_keys, AggregateDescriptions & aggregate_descriptions); - void appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & agg); + void appendAggSelect(ExpressionActionsChain & chain, const tipb::Aggregation & agg, const tipb::DAGRequest & rqst); String appendCastIfNeeded(const tipb::Expr & expr, ExpressionActionsPtr & actions, const String & expr_name); - void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const + void initChain(ExpressionActionsChain & chain, const std::vector & columns) const { if (chain.steps.empty()) { chain.settings = settings; - chain.steps.emplace_back(std::make_shared(columns, settings)); + NamesAndTypesList column_list; + for (const auto & col : columns) + { + column_list.emplace_back(col.name, col.type); + } + chain.steps.emplace_back(std::make_shared(column_list, settings)); } } void appendFinalProject(ExpressionActionsChain & chain, const NamesWithAliases & final_project); String getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions); - const NamesAndTypesList & getCurrentInputColumns(); + const std::vector & getCurrentInputColumns(); void makeExplicitSet(const tipb::Expr & expr, const Block & sample_block, bool create_ordered_set, const String & left_arg_name); String applyFunction(const String & func_name, Names & arg_names, ExpressionActionsPtr & actions); Int32 getImplicitCastCount() { return implicit_cast_count; }; + bool appendTimeZoneCastsAfterTS(ExpressionActionsChain &chain, std::vector is_ts_column, + const tipb::DAGRequest &rqst); + String appendTimeZoneCast(const String & tz_col, const String & ts_col, const String & func_name, ExpressionActionsPtr & actions); }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp b/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp index 4a11d21f075..ef5efcc08f7 100644 --- a/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStringConverter.cpp @@ -47,7 +47,11 @@ void DAGStringConverter::buildTSString(const tipb::TableScan & ts, std::stringst // no column selected, must be something wrong throw Exception("No column is selected in table scan executor", ErrorCodes::COP_BAD_DAG_REQUEST); } - columns_from_ts = storage->getColumns().getAllPhysical(); + const auto & column_list = storage->getColumns().getAllPhysical(); + for (auto & column : column_list) + { + columns_from_ts.emplace_back(column.name, column.type); + } for (const tipb::ColumnInfo & ci : ts.columns()) { ColumnID cid = ci.column_id(); diff --git a/dbms/src/Flash/Coprocessor/DAGStringConverter.h b/dbms/src/Flash/Coprocessor/DAGStringConverter.h index fa91a72c7b7..f90396be281 100644 --- a/dbms/src/Flash/Coprocessor/DAGStringConverter.h +++ b/dbms/src/Flash/Coprocessor/DAGStringConverter.h @@ -22,7 +22,7 @@ class DAGStringConverter String buildSqlString(); - const NamesAndTypesList & getCurrentColumns() + const std::vector & getCurrentColumns() { if (afterAgg) { @@ -50,8 +50,8 @@ class DAGStringConverter Context & context; const tipb::DAGRequest & dag_request; // used by columnRef, which starts with 1, and refs column index in the original ts/agg output - NamesAndTypesList columns_from_ts; - NamesAndTypesList columns_from_agg; + std::vector columns_from_ts; + std::vector columns_from_agg; // used by output_offset, which starts with 0, and refs the index in the selected output of ts/agg operater Names output_from_ts; Names output_from_agg; diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index dd4497530fb..df985755a4a 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -49,7 +49,7 @@ const String & getFunctionName(const tipb::Expr & expr) } } -String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col, bool for_parser) +String exprToString(const tipb::Expr & expr, const std::vector & input_col, bool for_parser) { std::stringstream ss; Int64 column_id = 0; @@ -87,9 +87,7 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col } case tipb::ExprType::MysqlTime: { - if (!expr.has_field_type() - || (expr.field_type().tp() != TiDB::TypeDate && expr.field_type().tp() != TiDB::TypeDatetime - && expr.field_type().tp() != TiDB::TypeTimestamp)) + if (!expr.has_field_type() || (expr.field_type().tp() != TiDB::TypeDate && expr.field_type().tp() != TiDB::TypeDatetime)) throw Exception("Invalid MySQL Time literal " + expr.DebugString(), ErrorCodes::COP_BAD_DAG_REQUEST); auto t = decodeDAGUInt64(expr.val()); // TODO: Use timezone in DAG request. @@ -101,7 +99,7 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col { throw Exception("Column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } - return input_col.getNames()[column_id]; + return input_col[column_id].name; case tipb::ExprType::Count: case tipb::ExprType::Sum: case tipb::ExprType::Avg: @@ -151,7 +149,7 @@ String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col const String & getTypeName(const tipb::Expr & expr) { return tipb::ExprType_Name(expr.tp()); } -String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns) +String getName(const tipb::Expr & expr, const std::vector & current_input_columns) { return exprToString(expr, current_input_columns, false); } @@ -235,9 +233,7 @@ Field decodeLiteral(const tipb::Expr & expr) return decodeDAGDecimal(expr.val()); case tipb::ExprType::MysqlTime: { - if (!expr.has_field_type() - || (expr.field_type().tp() != TiDB::TypeDate && expr.field_type().tp() != TiDB::TypeDatetime - && expr.field_type().tp() != TiDB::TypeTimestamp)) + if (!expr.has_field_type() || (expr.field_type().tp() != TiDB::TypeDate && expr.field_type().tp() != TiDB::TypeDatetime)) throw Exception("Invalid MySQL Time literal " + expr.DebugString(), ErrorCodes::COP_BAD_DAG_REQUEST); auto t = decodeDAGUInt64(expr.val()); // TODO: Use timezone in DAG request. diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index 99768ed060e..b2b3a98a62f 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -23,9 +23,9 @@ const String & getFunctionName(const tipb::Expr & expr); const String & getAggFunctionName(const tipb::Expr & expr); bool isColumnExpr(const tipb::Expr & expr); ColumnID getColumnID(const tipb::Expr & expr); -String getName(const tipb::Expr & expr, const NamesAndTypesList & current_input_columns); +String getName(const tipb::Expr & expr, const std::vector & current_input_columns); const String & getTypeName(const tipb::Expr & expr); -String exprToString(const tipb::Expr & expr, const NamesAndTypesList & input_col, bool for_parser = true); +String exprToString(const tipb::Expr & expr, const std::vector & input_col, bool for_parser = true); bool isInOrGlobalInOperator(const String & name); bool exprHasValidFieldType(const tipb::Expr & expr); extern std::unordered_map agg_func_map; diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 05fcc78ee48..d17f2f3e13c 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -123,6 +123,8 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) } Names required_columns; + std::vector source_columns; + std::vector is_ts_column; for (const tipb::ColumnInfo & ci : ts.columns()) { ColumnID cid = ci.column_id(); @@ -140,6 +142,7 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) required_columns.push_back(name); auto pair = storage->getColumns().getPhysical(name); source_columns.emplace_back(std::move(pair)); + is_ts_column.push_back(ci.tp() == TiDB::TypeTimestamp); } if (required_columns.empty()) { @@ -150,6 +153,7 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) required_columns.push_back(pk_handle_col->get().name); auto pair = storage->getColumns().getPhysical(pk_handle_col->get().name); source_columns.push_back(pair); + is_ts_column.push_back(false); // For PK handle, use original column info of itself. dag.getDAGContext().void_result_ft = columnInfoToFieldType(pk_handle_col->get()); } @@ -158,13 +162,14 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) required_columns.push_back(MutableSupport::tidb_pk_column_name); auto pair = storage->getColumns().getPhysical(MutableSupport::tidb_pk_column_name); source_columns.push_back(pair); + is_ts_column.push_back(false); // For implicit handle, reverse get a column info. auto column_info = reverseGetColumnInfo(pair, -1, Field()); dag.getDAGContext().void_result_ft = columnInfoToFieldType(column_info); } } - analyzer = std::make_unique(source_columns, context); + analyzer = std::make_unique(std::move(source_columns), context); if (!dag.hasAggregation()) { @@ -260,6 +265,22 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) } }); } + + addTimeZoneCastAfterTS(is_ts_column, pipeline); +} + +// add timezone cast for timestamp type, this is used to support session level timezone +void InterpreterDAG::addTimeZoneCastAfterTS(std::vector & is_ts_column, Pipeline & pipeline) +{ + bool hasTSColumn = false; + for (auto b : is_ts_column) + hasTSColumn |= b; + if (!hasTSColumn) + return; + + ExpressionActionsChain chain; + if (analyzer->appendTimeZoneCastsAfterTS(chain, is_ts_column, dag.getDAGRequest())) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, chain.getLastActions()); }); } InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() @@ -284,7 +305,7 @@ InterpreterDAG::AnalysisResult InterpreterDAG::analyzeExpressions() chain.clear(); // add cast if type is not match - analyzer->appendAggSelect(chain, dag.getAggregation()); + analyzer->appendAggSelect(chain, dag.getAggregation(), dag.getDAGRequest()); //todo use output_offset to reconstruct the final project columns for (auto element : analyzer->getCurrentInputColumns()) { diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.h b/dbms/src/Flash/Coprocessor/InterpreterDAG.h index 22ba126df96..8a5b7dfe76a 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.h +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.h @@ -83,6 +83,7 @@ class InterpreterDAG : public IInterpreter SortDescription getSortDescription(Strings & order_column_names); AnalysisResult analyzeExpressions(); void recordProfileStreams(Pipeline & pipeline, Int32 index); + void addTimeZoneCastAfterTS(std::vector & is_ts_column, Pipeline & pipeline); private: Context & context; @@ -90,7 +91,6 @@ class InterpreterDAG : public IInterpreter const DAGQuerySource & dag; NamesWithAliases final_project; - NamesAndTypesList source_columns; /// How many streams we ask for storage to produce, and in how many threads we will do further processing. size_t max_streams = 1; diff --git a/dbms/src/Flash/CoprocessorHandler.cpp b/dbms/src/Flash/CoprocessorHandler.cpp index ccb354e5b5c..bed9a27624e 100644 --- a/dbms/src/Flash/CoprocessorHandler.cpp +++ b/dbms/src/Flash/CoprocessorHandler.cpp @@ -60,7 +60,7 @@ try } catch (const LockException & e) { - LOG_ERROR(log, __PRETTY_FUNCTION__ << ": LockException: " << e.displayText()); + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": LockException: " << e.getStackTrace().toString()); cop_response->Clear(); kvrpcpb::LockInfo * lock_info = cop_response->mutable_locked(); lock_info->set_key(e.lock_infos[0]->key); @@ -72,7 +72,7 @@ catch (const LockException & e) } catch (const RegionException & e) { - LOG_ERROR(log, __PRETTY_FUNCTION__ << ": RegionException: " << e.displayText()); + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": RegionException: " << e.getStackTrace().toString()); cop_response->Clear(); errorpb::Error * region_err; switch (e.status) @@ -95,7 +95,7 @@ catch (const RegionException & e) } catch (const Exception & e) { - LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.displayText()); + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": Exception: " << e.getStackTrace().toString()); cop_response->Clear(); cop_response->set_other_error(e.message()); diff --git a/dbms/src/Functions/FunctionsDateTime.cpp b/dbms/src/Functions/FunctionsDateTime.cpp index 58a52188af7..e9e5e065dd1 100644 --- a/dbms/src/Functions/FunctionsDateTime.cpp +++ b/dbms/src/Functions/FunctionsDateTime.cpp @@ -59,6 +59,7 @@ void registerFunctionsDateTime(FunctionFactory & factory) { factory.registerFunction>(); factory.registerFunction>(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index c23b5c1fa7d..e41b762271d 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -1276,6 +1276,71 @@ class FunctionYesterday : public IFunction } }; +class FunctionMyTimeZoneConvertByOffset : public IFunction +{ + using FromFieldType = typename DataTypeMyDateTime::FieldType; + using ToFieldType = typename DataTypeMyDateTime::FieldType; +public: + static FunctionPtr create(const Context &) { return std::make_shared(); }; + static constexpr auto name = "ConvertTimeZoneByOffset"; + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override {return 2; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 2", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!checkDataType(arguments[0].type.get())) + throw Exception{ + "Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + + ". Should be MyDateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + if (!arguments[1].type->isInteger()) + throw Exception{ + "Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + + ". Should be Integer type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return arguments[0].type; + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { + if (const ColumnVector *col_from + = checkAndGetColumn>(block.getByPosition(arguments[0]).column.get())) { + auto col_to = ColumnVector::create(); + const typename ColumnVector::Container &vec_from = col_from->getData(); + typename ColumnVector::Container &vec_to = col_to->getData(); + size_t size = vec_from.size(); + vec_to.resize(size); + + const auto offset_col = block.getByPosition(arguments.back()).column.get(); + if (!offset_col->isColumnConst()) + throw Exception{ + "Second argument of function " + getName() + " must be an integral constant", + ErrorCodes::ILLEGAL_COLUMN}; + + const auto offset = offset_col->getInt(0); + for (size_t i = 0; i < size; ++i) { + UInt64 result_time = vec_from[i] + offset; + // todo maybe affected by daytime saving, need double check + convertTimeZoneByOffset(vec_from[i], result_time, offset, DateLUT::instance("UTC")); + vec_to[i] = result_time; + } + + block.getByPosition(result).column = std::move(col_to); + } else + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + name, + ErrorCodes::ILLEGAL_COLUMN); + } + +}; template class FunctionMyTimeZoneConverter : public IFunction { diff --git a/dbms/src/Storages/Transaction/Datum.cpp b/dbms/src/Storages/Transaction/Datum.cpp index e5b15ff8c17..287c97c1889 100644 --- a/dbms/src/Storages/Transaction/Datum.cpp +++ b/dbms/src/Storages/Transaction/Datum.cpp @@ -54,17 +54,6 @@ struct DatumOp -struct DatumOp::type> -{ - static void unflatten(const Field & orig, std::optional & copy) { copy = static_cast(orig.get()); } - - static void flatten(const Field & orig, std::optional & copy) { copy = static_cast(orig.get()); } - - static bool overflow(const Field &, const ColumnInfo &) { return false; } -}; - /// Specialized for Enum, using unflatten/flatten to transform UInt to Int back and forth. template struct DatumOp::type> diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index 7b98ee286e5..c57feed2639 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -43,12 +43,12 @@ using DB::Timestamp; M(Float, 4, Float, Float32, false) \ M(Double, 5, Float, Float64, false) \ M(Null, 6, Nil, Nothing, false) \ - M(Timestamp, 7, Int, MyDateTime, false) \ + M(Timestamp, 7, UInt, MyDateTime, false) \ M(LongLong, 8, Int, Int64, false) \ M(Int24, 9, VarInt, Int32, true) \ - M(Date, 10, Int, MyDate, false) \ + M(Date, 10, UInt, MyDate, false) \ M(Time, 11, Duration, Int64, false) \ - M(Datetime, 12, Int, MyDateTime, false) \ + M(Datetime, 12, UInt, MyDateTime, false) \ M(Year, 13, Int, Int16, false) \ M(NewDate, 14, Int, MyDate, false) \ M(Varchar, 15, CompactBytes, String, false) \ diff --git a/tests/mutable-test/txn_dag/time_zone.test b/tests/mutable-test/txn_dag/time_zone.test new file mode 100644 index 00000000000..9806a8bcae5 --- /dev/null +++ b/tests/mutable-test/txn_dag/time_zone.test @@ -0,0 +1,64 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 MyDate, col_2 MyDatetime(5), col_3 MyDatetime') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, '2019-06-10', '2019-06-10 09:00:00', '2019-06-10 09:00:00') +=> DBGInvoke __raft_insert_row(default, test, 4, 51, '2019-06-11', '2019-06-11 07:00:00', '2019-06-11 09:00:00') +=> DBGInvoke __raft_insert_row(default, test, 4, 52, '2019-06-11', '2019-06-11 08:00:00', '2019-06-11 09:00:00') +=> DBGInvoke __raft_insert_row(default, test, 4, 53, '2019-06-12', '2019-06-11 08:00:00', '2019-06-11 09:00:00') + +=> DBGInvoke dag('select * from default.test') " --dag_planner="optree +┌──────col_1─┬─────────────────────col_2─┬───────────────col_3─┐ +│ 2019-06-10 │ 2019-06-10 09:00:00.00000 │ 2019-06-10 09:00:00 │ +│ 2019-06-11 │ 2019-06-11 07:00:00.00000 │ 2019-06-11 09:00:00 │ +│ 2019-06-11 │ 2019-06-11 08:00:00.00000 │ 2019-06-11 09:00:00 │ +│ 2019-06-12 │ 2019-06-11 08:00:00.00000 │ 2019-06-11 09:00:00 │ +└────────────┴───────────────────────────┴─────────────────────┘ + +# use tz_offset, result is the same since cop will convert the timestamp value to utc timestamp when returing to tidb +=> DBGInvoke dag('select * from default.test',4,28800) " --dag_planner="optree +┌──────col_1─┬─────────────────────col_2─┬───────────────col_3─┐ +│ 2019-06-10 │ 2019-06-10 09:00:00.00000 │ 2019-06-10 09:00:00 │ +│ 2019-06-11 │ 2019-06-11 07:00:00.00000 │ 2019-06-11 09:00:00 │ +│ 2019-06-11 │ 2019-06-11 08:00:00.00000 │ 2019-06-11 09:00:00 │ +│ 2019-06-12 │ 2019-06-11 08:00:00.00000 │ 2019-06-11 09:00:00 │ +└────────────┴───────────────────────────┴─────────────────────┘ + +=> DBGInvoke dag('select * from default.test where col_2 > col_3') " --dag_planner="optree + +=> DBGInvoke dag('select * from default.test where col_2 > col_3',4,28800) " --dag_planner="optree +┌──────col_1─┬─────────────────────col_2─┬───────────────col_3─┐ +│ 2019-06-10 │ 2019-06-10 09:00:00.00000 │ 2019-06-10 09:00:00 │ +│ 2019-06-11 │ 2019-06-11 07:00:00.00000 │ 2019-06-11 09:00:00 │ +│ 2019-06-11 │ 2019-06-11 08:00:00.00000 │ 2019-06-11 09:00:00 │ +│ 2019-06-12 │ 2019-06-11 08:00:00.00000 │ 2019-06-11 09:00:00 │ +└────────────┴───────────────────────────┴─────────────────────┘ + +# tz_name overwrite tz_offset +=> DBGInvoke dag('select * from default.test where col_2 > col_3',4,28800,'UTC') " --dag_planner="optree + +# ts_col in group by clause +=> DBGInvoke dag('select count(1) from default.test where col_2 > \'2019-06-11 15:00:00\' group by col_2',4,28800) " --dag_planner="optree +┌─count(1)─┬─────────────────────col_2─┐ +│ 2 │ 2019-06-11 08:00:00.00000 │ +└──────────┴───────────────────────────┘ + +# ts_col in agg clause +=> DBGInvoke dag('select max(col_2) from default.test group by col_1',4,28800) " --dag_planner="optree +┌──────────max(col_2)─┬──────col_1─┐ +│ 2019-06-11 08:00:00 │ 2019-06-12 │ +│ 2019-06-11 08:00:00 │ 2019-06-11 │ +│ 2019-06-10 09:00:00 │ 2019-06-10 │ +└─────────────────────┴────────────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test From 8d2576e30016b7489eaf52f223a2be396a51b93a Mon Sep 17 00:00:00 2001 From: zanmato1984 Date: Sun, 29 Sep 2019 13:38:59 +0800 Subject: [PATCH 63/79] Address comment --- dbms/src/Debug/DBGInvoker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Debug/DBGInvoker.cpp b/dbms/src/Debug/DBGInvoker.cpp index 40bd2e39890..99e70bfdd44 100644 --- a/dbms/src/Debug/DBGInvoker.cpp +++ b/dbms/src/Debug/DBGInvoker.cpp @@ -114,7 +114,7 @@ BlockInputStreamPtr DBGInvoker::invoke(Context & context, const std::string & or auto it_schemaful = schemaful_funcs.find(name); if (it_schemaful != schemaful_funcs.end()) res = invokeSchemaful(context, name, it_schemaful->second, args); - if (it_schemaful == schemaful_funcs.end()) + else throw Exception("DBG function not found", ErrorCodes::BAD_ARGUMENTS); } From d33a278184d8f83b64c73fed1f2bbbbc6dc3c9bd Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 30 Sep 2019 13:18:25 +0800 Subject: [PATCH 64/79] FLASH-489 support key condition for coprocessor query (#261) * support key condition for coprocessor query * add tests * remove useless code * check validation when build RPNElement for function in/notIn * address comments * address comments --- dbms/src/Debug/MockTiDB.cpp | 12 +- dbms/src/Debug/MockTiDB.h | 3 +- dbms/src/Debug/dbgFuncCoprocessor.cpp | 19 +- dbms/src/Debug/dbgFuncMockTiDBTable.cpp | 10 +- dbms/src/Debug/dbgTools.cpp | 4 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 37 +- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 6 +- dbms/src/Flash/Coprocessor/DAGQueryInfo.h | 23 + dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 10 + dbms/src/Storages/MergeTree/KeyCondition.cpp | 536 ++--------------- dbms/src/Storages/MergeTree/KeyCondition.h | 145 ++--- dbms/src/Storages/MergeTree/RPNBuilder.cpp | 540 ++++++++++++++++++ dbms/src/Storages/MergeTree/RPNBuilder.h | 67 +++ dbms/src/Storages/SelectQueryInfo.cpp | 6 +- dbms/src/Storages/SelectQueryInfo.h | 5 + tests/mutable-test/txn_dag/key_condition.test | 35 ++ 16 files changed, 854 insertions(+), 604 deletions(-) create mode 100644 dbms/src/Flash/Coprocessor/DAGQueryInfo.h create mode 100644 dbms/src/Storages/MergeTree/RPNBuilder.cpp create mode 100644 dbms/src/Storages/MergeTree/RPNBuilder.h create mode 100644 tests/mutable-test/txn_dag/key_condition.test diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 66e6ed36a22..e6d59fc6c20 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -133,7 +133,8 @@ DatabaseID MockTiDB::newDataBase(const String & database_name) return schema_id; } -TableID MockTiDB::newTable(const String & database_name, const String & table_name, const ColumnsDescription & columns, Timestamp tso) +TableID MockTiDB::newTable(const String & database_name, const String & table_name, + const ColumnsDescription & columns, Timestamp tso, const String & handle_pk_name) { std::lock_guard lock(tables_mutex); @@ -153,14 +154,21 @@ TableID MockTiDB::newTable(const String & database_name, const String & table_na table_info.db_name = database_name; table_info.id = table_id_allocator++; table_info.name = table_name; + table_info.pk_is_handle = false; int i = 1; for (auto & column : columns.getAllPhysical()) { table_info.columns.emplace_back(reverseGetColumnInfo(column, i++, Field())); + if (handle_pk_name == column.name) + { + if (!column.type->isInteger() && !column.type->isUnsignedInteger()) + throw Exception("MockTiDB pk column must be integer or unsigned integer type", ErrorCodes::LOGICAL_ERROR); + table_info.columns.back().setPriKeyFlag(); + table_info.pk_is_handle = true; + } } - table_info.pk_is_handle = false; table_info.comment = "Mocked."; table_info.update_timestamp = tso; diff --git a/dbms/src/Debug/MockTiDB.h b/dbms/src/Debug/MockTiDB.h index ed3d30a1929..20afa87144a 100644 --- a/dbms/src/Debug/MockTiDB.h +++ b/dbms/src/Debug/MockTiDB.h @@ -55,7 +55,8 @@ class MockTiDB : public ext::singleton using TablePtr = std::shared_ptr; public: - TableID newTable(const String & database_name, const String & table_name, const ColumnsDescription & columns, Timestamp tso); + TableID newTable(const String & database_name, const String & table_name, + const ColumnsDescription & columns, Timestamp tso, const String & handle_pk_name); DatabaseID newDataBase(const String & database_name); diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 8b84a373963..3f46106f2cb 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -163,32 +163,35 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un { expr->set_sig(tipb::ScalarFuncSig::EQInt); auto * ft = expr->mutable_field_type(); - // TODO: TiDB will infer Int64. - ft->set_tp(TiDB::TypeTiny); + ft->set_tp(TiDB::TypeLongLong); ft->set_flag(TiDB::ColumnFlagUnsigned); } else if (func_name_lowercase == "and") { expr->set_sig(tipb::ScalarFuncSig::LogicalAnd); auto * ft = expr->mutable_field_type(); - // TODO: TiDB will infer Int64. - ft->set_tp(TiDB::TypeTiny); + ft->set_tp(TiDB::TypeLongLong); ft->set_flag(TiDB::ColumnFlagUnsigned); } else if (func_name_lowercase == "or") { expr->set_sig(tipb::ScalarFuncSig::LogicalOr); auto * ft = expr->mutable_field_type(); - // TODO: TiDB will infer Int64. - ft->set_tp(TiDB::TypeTiny); + ft->set_tp(TiDB::TypeLongLong); ft->set_flag(TiDB::ColumnFlagUnsigned); } else if (func_name_lowercase == "greater") { expr->set_sig(tipb::ScalarFuncSig::GTInt); auto * ft = expr->mutable_field_type(); - // TODO: TiDB will infer Int64. - ft->set_tp(TiDB::TypeTiny); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned); + } + else if (func_name_lowercase == "greaterorequals") + { + expr->set_sig(tipb::ScalarFuncSig::GEInt); + auto *ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); ft->set_flag(TiDB::ColumnFlagUnsigned); } else diff --git a/dbms/src/Debug/dbgFuncMockTiDBTable.cpp b/dbms/src/Debug/dbgFuncMockTiDBTable.cpp index 879c2c7663e..41badcadb75 100644 --- a/dbms/src/Debug/dbgFuncMockTiDBTable.cpp +++ b/dbms/src/Debug/dbgFuncMockTiDBTable.cpp @@ -25,13 +25,17 @@ extern const int LOGICAL_ERROR; void MockTiDBTable::dbgFuncMockTiDBTable(Context & context, const ASTs & args, DBGInvoker::Printer output) { - if (args.size() != 3) - throw Exception("Args not matched, should be: database-name, table-name, schema-string", ErrorCodes::BAD_ARGUMENTS); + if (args.size() != 3 && args.size() != 4) + throw Exception("Args not matched, should be: database-name, table-name, schema-string [, handle_pk_name]", ErrorCodes::BAD_ARGUMENTS); const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; auto schema_str = safeGet(typeid_cast(*args[2]).value); + String handle_pk_name = ""; + if (args.size() == 4) + handle_pk_name = safeGet(typeid_cast(*args[3]).value); + ASTPtr columns_ast; ParserColumnDeclarationList schema_parser; Tokens tokens(schema_str.data(), schema_str.data() + schema_str.length()); @@ -43,7 +47,7 @@ void MockTiDBTable::dbgFuncMockTiDBTable(Context & context, const ASTs & args, D = InterpreterCreateQuery::getColumnsDescription(typeid_cast(*columns_ast), context); auto tso = context.getTMTContext().getPDClient()->getTS(); - TableID table_id = MockTiDB::instance().newTable(database_name, table_name, columns, tso); + TableID table_id = MockTiDB::instance().newTable(database_name, table_name, columns, tso, handle_pk_name); std::stringstream ss; ss << "mock table #" << table_id; diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 2ac2479b225..77c9e0c50da 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -237,7 +237,7 @@ Field convertField(const ColumnInfo & column_info, const Field & field) void encodeRow(const TiDB::TableInfo & table_info, const std::vector & fields, std::stringstream & ss) { - if (table_info.columns.size() != fields.size()) + if (table_info.columns.size() != fields.size() + table_info.pk_is_handle) throw Exception("Encoding row has different sizes between columns and values", ErrorCodes::LOGICAL_ERROR); for (size_t i = 0; i < fields.size(); i++) { @@ -261,7 +261,7 @@ void insert(const TiDB::TableInfo & table_info, RegionID region_id, HandleID han fields.emplace_back(field); idx++; } - if (fields.size() != table_info.columns.size()) + if (fields.size() + table_info.pk_is_handle != table_info.columns.size()) throw Exception("Number of insert values and columns do not match.", ErrorCodes::LOGICAL_ERROR); TMTContext & tmt = context.getTMTContext(); diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index fe0eb24c081..4dd5e48a4a9 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -214,7 +216,7 @@ void constructTZExpr(tipb::Expr & tz_expr, const tipb::DAGRequest & rqst, bool f } } -bool hasMeaningfulTZInfo(const tipb::DAGRequest &rqst) +bool hasMeaningfulTZInfo(const tipb::DAGRequest & rqst) { if (rqst.has_time_zone_name() && rqst.time_zone_name().length() > 0) return rqst.time_zone_name() != "UTC"; @@ -249,7 +251,7 @@ String DAGExpressionAnalyzer::appendTimeZoneCast( // column with UTC timezone will never be used in during agg), all the column with ts datatype will // convert back to UTC timezone bool DAGExpressionAnalyzer::appendTimeZoneCastsAfterTS( - ExpressionActionsChain &chain, std::vector is_ts_column, const tipb::DAGRequest &rqst) + ExpressionActionsChain & chain, std::vector is_ts_column, const tipb::DAGRequest & rqst) { if (!hasMeaningfulTZInfo(rqst)) return false; @@ -391,6 +393,35 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres return expr_name; } +void DAGExpressionAnalyzer::makeExplicitSetForIndex(const tipb::Expr & expr, const TMTStoragePtr & storage) +{ + for (auto & child : expr.children()) + { + makeExplicitSetForIndex(child, storage); + } + if (expr.tp() != tipb::ExprType::ScalarFunc) + { + return; + } + const String & func_name = getFunctionName(expr); + // only support col_name in (value_list) + if (isInOrGlobalInOperator(func_name) && expr.children(0).tp() == tipb::ExprType::ColumnRef && !prepared_sets.count(&expr)) + { + NamesAndTypesList column_list; + for (const auto & col : getCurrentInputColumns()) + { + column_list.emplace_back(col.name, col.type); + } + ExpressionActionsPtr temp_actions = std::make_shared(column_list, settings); + String name = getActions(expr.children(0), temp_actions); + ASTPtr name_ast = std::make_shared(name); + if (storage->mayBenefitFromIndexForIn(name_ast)) + { + makeExplicitSet(expr, temp_actions->getSampleBlock(), true, name); + } + } +} + void DAGExpressionAnalyzer::makeExplicitSet( const tipb::Expr & expr, const Block & sample_block, bool create_ordered_set, const String & left_arg_name) { @@ -400,7 +431,7 @@ void DAGExpressionAnalyzer::makeExplicitSet( } DataTypes set_element_types; // todo support tuple in, i.e. (a,b) in ((1,2), (3,4)), currently TiDB convert tuple in into a series of or/and/eq exprs - // which means tuple in is never be pushed to coprocessor, but it is quite in-efficient + // which means tuple in is never be pushed to coprocessor, but it is quite in-efficient set_element_types.push_back(sample_block.getByName(left_arg_name).type); // todo if this is a single value in, then convert it to equal expr diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 1486783d467..1b5b65f0ff0 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -60,11 +61,12 @@ class DAGExpressionAnalyzer : private boost::noncopyable String getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions); const std::vector & getCurrentInputColumns(); void makeExplicitSet(const tipb::Expr & expr, const Block & sample_block, bool create_ordered_set, const String & left_arg_name); + void makeExplicitSetForIndex(const tipb::Expr & expr, const TMTStoragePtr & storage); String applyFunction(const String & func_name, Names & arg_names, ExpressionActionsPtr & actions); Int32 getImplicitCastCount() { return implicit_cast_count; }; - bool appendTimeZoneCastsAfterTS(ExpressionActionsChain &chain, std::vector is_ts_column, - const tipb::DAGRequest &rqst); + bool appendTimeZoneCastsAfterTS(ExpressionActionsChain & chain, std::vector is_ts_column, const tipb::DAGRequest & rqst); String appendTimeZoneCast(const String & tz_col, const String & ts_col, const String & func_name, ExpressionActionsPtr & actions); + DAGPreparedSets getPreparedSets() { return prepared_sets; } }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryInfo.h b/dbms/src/Flash/Coprocessor/DAGQueryInfo.h new file mode 100644 index 00000000000..20274503782 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/DAGQueryInfo.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ + +struct DAGQueryInfo +{ + DAGQueryInfo(const DAGQuerySource & dag_, DAGPreparedSets dag_sets_, std::vector & source_columns_) + : dag(dag_), dag_sets(std::move(dag_sets_)) + { + for (auto & c : source_columns_) + source_columns.emplace_back(c.name, c.type); + }; + const DAGQuerySource & dag; + DAGPreparedSets dag_sets; + NamesAndTypesList source_columns; +}; +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index d17f2f3e13c..a0029c04114 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -208,9 +209,18 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) if (!checkKeyRanges(dag.getKeyRanges(), table_id, storage->pkIsUInt64())) throw Exception("Cop request only support full range scan for given region", ErrorCodes::COP_BAD_DAG_REQUEST); + if (dag.hasSelection()) + { + for (auto & condition : dag.getSelection().conditions()) + { + analyzer->makeExplicitSetForIndex(condition, storage); + } + } //todo support index in SelectQueryInfo query_info; + // set query to avoid unexpected NPE query_info.query = dag.getAST(); + query_info.dag_query = std::make_unique(dag, analyzer->getPreparedSets(), source_columns); query_info.mvcc_query_info = std::make_unique(); query_info.mvcc_query_info->resolve_locks = true; query_info.mvcc_query_info->read_tso = settings.read_tso; diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index de7797f6063..09e367c449e 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -7,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -109,7 +111,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map { { "notEquals", - [] (RPNElement & out, const Field & value, const ASTPtr &) + [] (RPNElement & out, const Field & value) { out.function = RPNElement::FUNCTION_NOT_IN_RANGE; out.range = Range(value); @@ -118,7 +120,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map }, { "equals", - [] (RPNElement & out, const Field & value, const ASTPtr &) + [] (RPNElement & out, const Field & value) { out.function = RPNElement::FUNCTION_IN_RANGE; out.range = Range(value); @@ -127,7 +129,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map }, { "less", - [] (RPNElement & out, const Field & value, const ASTPtr &) + [] (RPNElement & out, const Field & value) { out.function = RPNElement::FUNCTION_IN_RANGE; out.range = Range::createRightBounded(value, false); @@ -136,7 +138,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map }, { "greater", - [] (RPNElement & out, const Field & value, const ASTPtr &) + [] (RPNElement & out, const Field & value) { out.function = RPNElement::FUNCTION_IN_RANGE; out.range = Range::createLeftBounded(value, false); @@ -145,7 +147,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map }, { "lessOrEquals", - [] (RPNElement & out, const Field & value, const ASTPtr &) + [] (RPNElement & out, const Field & value) { out.function = RPNElement::FUNCTION_IN_RANGE; out.range = Range::createRightBounded(value, true); @@ -154,7 +156,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map }, { "greaterOrEquals", - [] (RPNElement & out, const Field & value, const ASTPtr &) + [] (RPNElement & out, const Field & value) { out.function = RPNElement::FUNCTION_IN_RANGE; out.range = Range::createLeftBounded(value, true); @@ -163,25 +165,23 @@ const KeyCondition::AtomMap KeyCondition::atom_map }, { "in", - [] (RPNElement & out, const Field &, const ASTPtr & node) + [] (RPNElement & out, const Field &) { out.function = RPNElement::FUNCTION_IN_SET; - out.in_function = node; return true; } }, { "notIn", - [] (RPNElement & out, const Field &, const ASTPtr & node) + [] (RPNElement & out, const Field &) { out.function = RPNElement::FUNCTION_NOT_IN_SET; - out.in_function = node; return true; } }, { "like", - [] (RPNElement & out, const Field & value, const ASTPtr &) + [] (RPNElement & out, const Field & value) { if (value.getType() != Field::Types::String) return false; @@ -271,7 +271,7 @@ KeyCondition::KeyCondition( const NamesAndTypesList & all_columns, const SortDescription & sort_descr_, const ExpressionActionsPtr & key_expr_) - : sort_descr(sort_descr_), key_expr(key_expr_), prepared_sets(query_info.sets) + : sort_descr(sort_descr_), key_expr(key_expr_) { for (size_t i = 0; i < sort_descr.size(); ++i) { @@ -280,486 +280,60 @@ KeyCondition::KeyCondition( key_columns[name] = i; } - /** Evaluation of expressions that depend only on constants. - * For the index to be used, if it is written, for example `WHERE Date = toDate(now())`. - */ - Block block_with_constants = getBlockWithConstants(query_info.query, context, all_columns); - - /// Trasform WHERE section to Reverse Polish notation - const ASTSelectQuery & select = typeid_cast(*query_info.query); - if (select.where_expression) - { - traverseAST(select.where_expression, context, block_with_constants); - - if (select.prewhere_expression) - { - traverseAST(select.prewhere_expression, context, block_with_constants); - rpn.emplace_back(RPNElement::FUNCTION_AND); - } - } - else if (select.prewhere_expression) - { - traverseAST(select.prewhere_expression, context, block_with_constants); - } - else - { - rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); - } -} - -bool KeyCondition::addCondition(const String & column, const Range & range) -{ - if (!key_columns.count(column)) - return false; - rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, key_columns[column], range); - rpn.emplace_back(RPNElement::FUNCTION_AND); - return true; -} - -/** Computes value of constant expression and it data type. - * Returns false, if expression isn't constant. - */ -static bool getConstant(const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type) -{ - String column_name = expr->getColumnName(); - - if (const ASTLiteral * lit = typeid_cast(expr.get())) - { - /// By default block_with_constants has only one column named "_dummy". - /// If block contains only constants it's may not be preprocessed by - // ExpressionAnalyzer, so try to look up in the default column. - if (!block_with_constants.has(column_name)) - column_name = "_dummy"; - - /// Simple literal - out_value = lit->value; - out_type = block_with_constants.getByName(column_name).type; - return true; - } - else if (block_with_constants.has(column_name) && block_with_constants.getByName(column_name).column->isColumnConst()) - { - /// An expression which is dependent on constants only - const auto & expr_info = block_with_constants.getByName(column_name); - out_value = (*expr_info.column)[0]; - out_type = expr_info.type; - return true; - } - else - return false; -} - - -static void applyFunction( - const FunctionBasePtr & func, - const DataTypePtr & arg_type, const Field & arg_value, - DataTypePtr & res_type, Field & res_value) -{ - res_type = func->getReturnType(); - - Block block - { - { arg_type->createColumnConst(1, arg_value), arg_type, "x" }, - { nullptr, res_type, "y" } - }; - - func->execute(block, {0}, 1); - - block.safeGetByPosition(1).column->get(0, res_value); -} - - -void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants) -{ - RPNElement element; - - if (ASTFunction * func = typeid_cast(&*node)) - { - if (operatorFromAST(func, element)) - { - auto & args = typeid_cast(*func->arguments).children; - for (size_t i = 0, size = args.size(); i < size; ++i) - { - traverseAST(args[i], context, block_with_constants); - - /** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity - * - in this case `n - 1` elements are added (where `n` is the number of arguments). - */ - if (i != 0 || element.function == RPNElement::FUNCTION_NOT) - rpn.emplace_back(std::move(element)); - } - - return; - } - } - - if (!atomFromAST(node, context, block_with_constants, element)) + if (query_info.fromAST()) { - element.function = RPNElement::FUNCTION_UNKNOWN; - } - - rpn.emplace_back(std::move(element)); -} - - -bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( - const ASTPtr & node, - size_t & out_key_column_num, - DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type) -{ - String expr_name = node->getColumnName(); - const auto & sample_block = key_expr->getSampleBlock(); - if (!sample_block.has(expr_name)) - return false; + RPNBuilder rpn_builder(key_expr_, key_columns, all_columns); + PreparedSets sets(query_info.sets); - bool found_transformation = false; - for (const ExpressionAction & a : key_expr->getActions()) - { - /** The key functional expression constraint may be inferred from a plain column in the expression. - * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, - * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` - * condition also holds, so the index may be used to select only parts satisfying this condition. - * - * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the - * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). - * Instead, we can qualify only functions that do not transform the range (for example rounding), - * which while not strictly monotonic, are monotonic everywhere on the input range. + /** Evaluation of expressions that depend only on constants. + * For the index to be used, if it is written, for example `WHERE Date = toDate(now())`. */ - const auto & action = a.argument_names; - if (a.type == ExpressionAction::Type::APPLY_FUNCTION && action.size() == 1 && a.argument_names[0] == expr_name) - { - if (!a.function->hasInformationAboutMonotonicity()) - return false; - - // Range is irrelevant in this case - IFunction::Monotonicity monotonicity = a.function->getMonotonicityForRange(*out_type, Field(), Field()); - if (!monotonicity.is_always_monotonic) - return false; - - // Apply the next transformation step - DataTypePtr new_type; - applyFunction(a.function, out_type, out_value, new_type, out_value); - if (!new_type) - return false; + Block block_with_constants = getBlockWithConstants(query_info.query, context, all_columns); - out_type.swap(new_type); - expr_name = a.result_name; + /// Trasform WHERE section to Reverse Polish notation + const ASTSelectQuery & select = typeid_cast(*query_info.query); + if (select.where_expression) + { + rpn_builder.traverseNodeTree(select.where_expression, context, block_with_constants, sets, rpn); - // Transformation results in a key expression, accept - auto it = key_columns.find(expr_name); - if (key_columns.end() != it) + if (select.prewhere_expression) { - out_key_column_num = it->second; - out_key_column_type = sample_block.getByName(it->first).type; - found_transformation = true; - break; + rpn_builder.traverseNodeTree(select.prewhere_expression, context, block_with_constants, sets, rpn); + rpn.emplace_back(RPNElement::FUNCTION_AND); } } - } - - return found_transformation; -} - -void KeyCondition::getKeyTuplePositionMapping( - const ASTPtr & node, - const Context & context, - std::vector & indexes_mapping, - const size_t tuple_index, - size_t & out_key_column_num) -{ - MergeTreeSetIndex::KeyTuplePositionMapping index_mapping; - index_mapping.tuple_index = tuple_index; - DataTypePtr data_type; - if (isKeyPossiblyWrappedByMonotonicFunctions( - node, context, index_mapping.key_index, - data_type, index_mapping.functions)) - { - indexes_mapping.push_back(index_mapping); - if (out_key_column_num < index_mapping.key_index) - { - out_key_column_num = index_mapping.key_index; - } - } -} - - -/// Try to prepare KeyTuplePositionMapping for tuples from IN expression. -bool KeyCondition::isTupleIndexable( - const ASTPtr & node, - const Context & context, - RPNElement & out, - const SetPtr & prepared_set, - size_t & out_key_column_num) -{ - out_key_column_num = 0; - std::vector indexes_mapping; - - size_t num_key_columns = prepared_set->getDataTypes().size(); - - const ASTFunction * node_tuple = typeid_cast(node.get()); - if (node_tuple && node_tuple->name == "tuple") - { - if (num_key_columns != node_tuple->arguments->children.size()) - { - std::stringstream message; - message << "Number of columns in section IN doesn't match. " - << node_tuple->arguments->children.size() << " at left, " << num_key_columns << " at right."; - throw Exception(message.str(), ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); - } - - size_t current_tuple_index = 0; - for (const auto & arg : node_tuple->arguments->children) - { - getKeyTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_key_column_num); - ++current_tuple_index; - } + else if (select.prewhere_expression) + rpn_builder.traverseNodeTree(select.prewhere_expression, context, block_with_constants, sets, rpn); + else + rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); } else { - getKeyTuplePositionMapping(node, context, indexes_mapping, 0, out_key_column_num); - } - - if (indexes_mapping.empty()) - return false; - - out.set_index = std::make_shared( - prepared_set->getSetElements(), std::move(indexes_mapping)); - - return true; -} - - -bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( - const ASTPtr & node, - const Context & context, - size_t & out_key_column_num, - DataTypePtr & out_key_res_column_type, - RPNElement::MonotonicFunctionsChain & out_functions_chain) -{ - std::vector chain_not_tested_for_monotonicity; - DataTypePtr key_column_type; - - if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity)) - return false; - - for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it) - { - auto func_builder = FunctionFactory::instance().tryGet((*it)->name, context); - ColumnsWithTypeAndName arguments{{ nullptr, key_column_type, "" }}; - auto func = func_builder->build(arguments); - - if (!func || !func->hasInformationAboutMonotonicity()) - return false; - - key_column_type = func->getReturnType(); - out_functions_chain.push_back(func); - } - - out_key_res_column_type = key_column_type; - - return true; -} - -bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( - const ASTPtr & node, - size_t & out_key_column_num, - DataTypePtr & out_key_column_type, - std::vector & out_functions_chain) -{ - /** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`. - * Therefore, use the full name of the expression for search. - */ - const auto & sample_block = key_expr->getSampleBlock(); - String name = node->getColumnName(); - - auto it = key_columns.find(name); - if (key_columns.end() != it) - { - out_key_column_num = it->second; - out_key_column_type = sample_block.getByName(it->first).type; - return true; - } - - if (const ASTFunction * func = typeid_cast(node.get())) - { - const auto & args = func->arguments->children; - if (args.size() != 1) - return false; - - out_functions_chain.push_back(func); - - if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain)) - return false; - - return true; - } - - return false; -} - - -static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const ASTPtr & node) -{ - if (desired_type->equals(*src_type)) - return; - - try - { - /// NOTE: We don't need accurate info about src_type at this moment - src_value = convertFieldToType(src_value, *desired_type); - } - catch (...) - { - throw Exception("Key expression contains comparison between inconvertible types: " + - desired_type->getName() + " and " + src_type->getName() + - " inside " + queryToString(node), - ErrorCodes::BAD_TYPE_OF_FIELD); - } -} - - -bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out) -{ - /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key, - * or itself, wrapped in a chain of possibly-monotonic functions, - * or constant expression - number. - */ - Field const_value; - DataTypePtr const_type; - if (const ASTFunction * func = typeid_cast(node.get())) - { - const ASTs & args = typeid_cast(*func->arguments).children; - - if (args.size() != 2) - return false; - - DataTypePtr key_expr_type; /// Type of expression containing key column - size_t key_arg_pos; /// Position of argument with key column (non-const argument) - size_t key_column_num; /// Number of a key column (inside sort_descr array) - RPNElement::MonotonicFunctionsChain chain; - bool is_set_const = false; - bool is_constant_transformed = false; - - if (prepared_sets.count(args[1].get()) - && isTupleIndexable(args[0], context, out, prepared_sets[args[1].get()], key_column_num)) + RPNBuilder rpn_builder(key_expr_, key_columns, query_info.dag_query->source_columns); + DAGPreparedSets sets(query_info.dag_query->dag_sets); + const auto & dag = query_info.dag_query->dag; + if (dag.hasSelection()) { - key_arg_pos = 0; - is_set_const = true; - } - else if (getConstant(args[1], block_with_constants, const_value, const_type) - && isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) - { - key_arg_pos = 0; - } - else if (getConstant(args[1], block_with_constants, const_value, const_type) - && canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) - { - key_arg_pos = 0; - is_constant_transformed = true; - } - else if (getConstant(args[0], block_with_constants, const_value, const_type) - && isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) - { - key_arg_pos = 1; - } - else if (getConstant(args[0], block_with_constants, const_value, const_type) - && canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) - { - key_arg_pos = 1; - is_constant_transformed = true; - } - else - return false; - - std::string func_name = func->name; - - /// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5" - if (is_constant_transformed) - { - if (func_name == "less") - func_name = "lessOrEquals"; - else if (func_name == "greater") - func_name = "greaterOrEquals"; - } - - /// Replace on to <-sign> - if (key_arg_pos == 1) - { - if (func_name == "less") - func_name = "greater"; - else if (func_name == "greater") - func_name = "less"; - else if (func_name == "greaterOrEquals") - func_name = "lessOrEquals"; - else if (func_name == "lessOrEquals") - func_name = "greaterOrEquals"; - else if (func_name == "in" || func_name == "notIn" || func_name == "like") + Block block_with_constants{{DataTypeUInt8().createColumnConstWithDefaultValue(1), std::make_shared(), "_dummy"}}; + auto & selection = dag.getSelection(); + for (int i = 0; i < selection.conditions_size(); i++) { - /// "const IN data_column" doesn't make sense (unlike "data_column IN const") - return false; + rpn_builder.traverseNodeTree(selection.conditions(i), context, block_with_constants, sets, rpn); + if (i != 0) + rpn.emplace_back(RPNElement::FUNCTION_AND); } } - - out.key_column = key_column_num; - out.monotonic_functions_chain = std::move(chain); - - const auto atom_it = atom_map.find(func_name); - if (atom_it == std::end(atom_map)) - return false; - - bool cast_not_needed = - is_set_const /// Set args are already casted inside Set::createFromAST - || (key_expr_type->isNumber() && const_type->isNumber()); /// Numbers are accurately compared without cast. - - if (!cast_not_needed) - castValueToType(key_expr_type, const_value, const_type, node); - - return atom_it->second(out, const_value, node); - } - else if (getConstant(node, block_with_constants, const_value, const_type)) /// For cases where it says, for example, `WHERE 0 AND something` - { - if (const_value.getType() == Field::Types::UInt64 - || const_value.getType() == Field::Types::Int64 - || const_value.getType() == Field::Types::Float64) - { - /// Zero in all types is represented in memory the same way as in UInt64. - out.function = const_value.get() - ? RPNElement::ALWAYS_TRUE - : RPNElement::ALWAYS_FALSE; - - return true; - } + else + rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); } - - return false; } -bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out) +bool KeyCondition::addCondition(const String & column, const Range & range) { - /// Functions AND, OR, NOT. - /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses - * (or, the same thing - calling the function `and` from one argument). - */ - const ASTs & args = typeid_cast(*func->arguments).children; - - if (func->name == "not") - { - if (args.size() != 1) - return false; - - out.function = RPNElement::FUNCTION_NOT; - } - else - { - if (func->name == "and" || func->name == "indexHint") - out.function = RPNElement::FUNCTION_AND; - else if (func->name == "or") - out.function = RPNElement::FUNCTION_OR; - else - return false; - } - + if (!key_columns.count(column)) + return false; + rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, key_columns[column], range); + rpn.emplace_back(RPNElement::FUNCTION_AND); return true; } @@ -1014,17 +588,9 @@ bool KeyCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, c element.function == RPNElement::FUNCTION_IN_SET || element.function == RPNElement::FUNCTION_NOT_IN_SET) { - auto in_func = typeid_cast(element.in_function.get()); - const ASTs & args = typeid_cast(*in_func->arguments).children; - PreparedSets::const_iterator it = prepared_sets.find(args[1].get()); - if (in_func && it != prepared_sets.end()) - { - rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(key_ranges, data_types)); - if (element.function == RPNElement::FUNCTION_NOT_IN_SET) - rpn_stack.back() = !rpn_stack.back(); - } - else - throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR); + rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(key_ranges, data_types)); + if (element.function == RPNElement::FUNCTION_NOT_IN_SET) + rpn_stack.back() = !rpn_stack.back(); } else if (element.function == RPNElement::FUNCTION_NOT) { @@ -1076,7 +642,7 @@ bool KeyCondition::mayBeTrueAfter( } -String KeyCondition::RPNElement::toString() const +String RPNElement::toString() const { auto print_wrapped_column = [this](std::ostringstream & ss) { diff --git a/dbms/src/Storages/MergeTree/KeyCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h index c7d55b0a575..6612ec0c7d8 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -216,6 +216,53 @@ class FieldWithInfinity FieldWithInfinity(const Type type_); }; +/// The expression is stored as Reverse Polish Notation. +struct RPNElement +{ + enum Function + { + /// Atoms of a Boolean expression. + FUNCTION_IN_RANGE, + FUNCTION_NOT_IN_RANGE, + FUNCTION_IN_SET, + FUNCTION_NOT_IN_SET, + FUNCTION_UNKNOWN, /// Can take any value. + /// Operators of the logical expression. + FUNCTION_NOT, + FUNCTION_AND, + FUNCTION_OR, + /// Constants + ALWAYS_FALSE, + ALWAYS_TRUE, + }; + + RPNElement() {} + RPNElement(Function function_) : function(function_) {} + RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {} + RPNElement(Function function_, size_t key_column_, const Range & range_) + : function(function_), range(range_), key_column(key_column_) {} + + String toString() const; + + Function function = FUNCTION_UNKNOWN; + + /// For FUNCTION_IN_RANGE and FUNCTION_NOT_IN_RANGE. + Range range; + size_t key_column; + /// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET + using MergeTreeSetIndexPtr = std::shared_ptr; + MergeTreeSetIndexPtr set_index; + + /** A chain of possibly monotone functions. + * If the key column is wrapped in functions that can be monotonous in some value ranges + * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate. + */ + using MonotonicFunctionsChain = std::vector; + mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy. +}; + +using RPN = std::vector; +using ColumnIndices = std::map; /** Condition on the index. * * Consists of the conditions for the key belonging to all possible ranges or sets, @@ -256,57 +303,10 @@ class KeyCondition String toString() const; - - /// The expression is stored as Reverse Polish Notation. - struct RPNElement - { - enum Function - { - /// Atoms of a Boolean expression. - FUNCTION_IN_RANGE, - FUNCTION_NOT_IN_RANGE, - FUNCTION_IN_SET, - FUNCTION_NOT_IN_SET, - FUNCTION_UNKNOWN, /// Can take any value. - /// Operators of the logical expression. - FUNCTION_NOT, - FUNCTION_AND, - FUNCTION_OR, - /// Constants - ALWAYS_FALSE, - ALWAYS_TRUE, - }; - - RPNElement() {} - RPNElement(Function function_) : function(function_) {} - RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {} - RPNElement(Function function_, size_t key_column_, const Range & range_) - : function(function_), range(range_), key_column(key_column_) {} - - String toString() const; - - Function function = FUNCTION_UNKNOWN; - - /// For FUNCTION_IN_RANGE and FUNCTION_NOT_IN_RANGE. - Range range; - size_t key_column; - /// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET - ASTPtr in_function; - using MergeTreeSetIndexPtr = std::shared_ptr; - MergeTreeSetIndexPtr set_index; - - /** A chain of possibly monotone functions. - * If the key column is wrapped in functions that can be monotonous in some value ranges - * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate. - */ - using MonotonicFunctionsChain = std::vector; - mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy. - }; - static Block getBlockWithConstants( - const ASTPtr & query, const Context & context, const NamesAndTypesList & all_columns); + const ASTPtr & query, const Context & context, const NamesAndTypesList & all_columns); - using AtomMap = std::unordered_map; + using AtomMap = std::unordered_map; static const AtomMap atom_map; static std::optional applyMonotonicFunctionsChainToRange( @@ -315,8 +315,6 @@ class KeyCondition DataTypePtr current_type); private: - using RPN = std::vector; - using ColumnIndices = std::map; bool mayBeTrueInRange( size_t used_key_size, @@ -327,56 +325,11 @@ class KeyCondition bool mayBeTrueInRangeImpl(const std::vector & key_ranges, const DataTypes & data_types) const; - void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants); - bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out); - bool operatorFromAST(const ASTFunction * func, RPNElement & out); - - /** Is node the key column - * or expression in which column of key is wrapped by chain of functions, - * that can be monotomic on certain ranges? - * If these conditions are true, then returns number of column in key, type of resulting expression - * and fills chain of possibly-monotonic functions. - */ - bool isKeyPossiblyWrappedByMonotonicFunctions( - const ASTPtr & node, - const Context & context, - size_t & out_key_column_num, - DataTypePtr & out_key_res_column_type, - RPNElement::MonotonicFunctionsChain & out_functions_chain); - - bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( - const ASTPtr & node, - size_t & out_key_column_num, - DataTypePtr & out_key_column_type, - std::vector & out_functions_chain); - - bool canConstantBeWrappedByMonotonicFunctions( - const ASTPtr & node, - size_t & out_key_column_num, - DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type); - - void getKeyTuplePositionMapping( - const ASTPtr & node, - const Context & context, - std::vector & indexes_mapping, - const size_t tuple_index, - size_t & out_key_column_num); - - bool isTupleIndexable( - const ASTPtr & node, - const Context & context, - RPNElement & out, - const SetPtr & prepared_set, - size_t & out_key_column_num); - RPN rpn; SortDescription sort_descr; ColumnIndices key_columns; ExpressionActionsPtr key_expr; - PreparedSets prepared_sets; }; } diff --git a/dbms/src/Storages/MergeTree/RPNBuilder.cpp b/dbms/src/Storages/MergeTree/RPNBuilder.cpp new file mode 100644 index 00000000000..9a2830612b9 --- /dev/null +++ b/dbms/src/Storages/MergeTree/RPNBuilder.cpp @@ -0,0 +1,540 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +extern const int BAD_TYPE_OF_FIELD; +extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; +} // namespace ErrorCodes + +const tipb::Expr & getChild(const tipb::Expr & node, int index) { return node.children(index); } + +const ASTPtr & getChild(const ASTPtr & node, int index) +{ + if (const ASTFunction * func = typeid_cast(node.get())) + { + return func->arguments->children[index]; + } + else + { + return node->children[index]; + } +} + +int getChildCount(const tipb::Expr & node) { return node.children_size(); } + +int getChildCount(const ASTPtr & node) +{ + if (const ASTFunction * func = typeid_cast(node.get())) + { + return func->arguments->children.size(); + } + else + { + return node->children.size(); + } +} + +const String getFuncName(const tipb::Expr & node) { return getFunctionName(node); } + +const String getFuncName(const ASTPtr & node) +{ + if (const ASTFunction * func = typeid_cast(node.get())) + { + return func->name; + } + return ""; +} + +const String getColumnName(const tipb::Expr & node, const NamesAndTypesList & source_columns) +{ + if (node.tp() == tipb::ExprType::ColumnRef) + { + auto col_id = getColumnID(node); + if (col_id < 0 || col_id >= (Int64)source_columns.size()) + return ""; + return source_columns.getNames()[col_id]; + } + return ""; +} + +const String getColumnName(const ASTPtr & node, const NamesAndTypesList &) { return node->getColumnName(); } + +bool isFuncNode(const ASTPtr & node) { return typeid_cast(node.get()); } + +bool isFuncNode(const tipb::Expr & node) { return node.tp() == tipb::ExprType::ScalarFunc; } + +/** Computes value of constant expression and it data type. + * Returns false, if expression isn't constant. + */ +bool getConstant(const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type) +{ + String column_name = expr->getColumnName(); + + if (const ASTLiteral * lit = typeid_cast(expr.get())) + { + /// By default block_with_constants has only one column named "_dummy". + /// If block contains only constants it's may not be preprocessed by + // ExpressionAnalyzer, so try to look up in the default column. + if (!block_with_constants.has(column_name)) + column_name = "_dummy"; + + /// Simple literal + out_value = lit->value; + out_type = block_with_constants.getByName(column_name).type; + return true; + } + else if (block_with_constants.has(column_name) && block_with_constants.getByName(column_name).column->isColumnConst()) + { + /// An expression which is dependent on constants only + const auto & expr_info = block_with_constants.getByName(column_name); + out_value = (*expr_info.column)[0]; + out_type = expr_info.type; + return true; + } + else + return false; +} + +/** Computes value of constant expression and it data type. + * Returns false, if expression isn't constant. + */ +bool getConstant(const tipb::Expr & expr, Block &, Field & out_value, DataTypePtr & out_type) +{ + + if (isLiteralExpr(expr)) + { + out_value = decodeLiteral(expr); + //todo check if need any extra cast + out_type = exprHasValidFieldType(expr) ? getDataTypeByFieldType(expr.field_type()) : applyVisitor(FieldToDataType(), out_value); + return true; + } + + return false; +} + +void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node) +{ + if (desired_type->equals(*src_type)) + return; + + try + { + /// NOTE: We don't need accurate info about src_type at this moment + src_value = convertFieldToType(src_value, *desired_type); + } + catch (...) + { + throw Exception("Key expression contains comparison between inconvertible types: " + desired_type->getName() + " and " + + src_type->getName() + " inside " + node, + ErrorCodes::BAD_TYPE_OF_FIELD); + } +} + +void applyFunction( + const FunctionBasePtr & func, const DataTypePtr & arg_type, const Field & arg_value, DataTypePtr & res_type, Field & res_value) +{ + res_type = func->getReturnType(); + + Block block{{arg_type->createColumnConst(1, arg_value), arg_type, "x"}, {nullptr, res_type, "y"}}; + + func->execute(block, {0}, 1); + + block.safeGetByPosition(1).column->get(0, res_value); +} + +bool setContains(const tipb::Expr & expr, DAGPreparedSets & sets) { return sets.count(&expr); } + +bool setContains(const ASTPtr & expr, PreparedSets & sets) { return sets.count(getChild(expr, 1).get()); } + +SetPtr & lookByExpr(const tipb::Expr & expr, DAGPreparedSets & sets) { return sets[&expr]; } + +SetPtr & lookByExpr(const ASTPtr & expr, PreparedSets & sets) { return sets[getChild(expr, 1).get()]; } + +String nodeToString(const tipb::Expr & node) { return node.DebugString(); } + +String nodeToString(const ASTPtr & node) { return queryToString(node); } + +template +bool RPNBuilder::isKeyPossiblyWrappedByMonotonicFunctionsImpl( + const NodeT & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, std::vector & out_functions_chain) +{ + /** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`. + * Therefore, use the full name of the expression for search. + */ + const auto & sample_block = key_expr->getSampleBlock(); + String name = getColumnName(node, source_columns); + + auto it = key_columns.find(name); + if (key_columns.end() != it) + { + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; + return true; + } + + if (isFuncNode(node)) + { + if (getChildCount(node) != 1) + return false; + + out_functions_chain.push_back(getFuncName(node)); + + if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(getChild(node, 0), out_key_column_num, out_key_column_type, out_functions_chain)) + return false; + + return true; + } + + return false; +} + +template +bool RPNBuilder::isKeyPossiblyWrappedByMonotonicFunctions(const NodeT & node, + const Context & context, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + RPNElement::MonotonicFunctionsChain & out_functions_chain) +{ + std::vector chain_not_tested_for_monotonicity; + DataTypePtr key_column_type; + + if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity)) + return false; + + for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it) + { + auto func_builder = FunctionFactory::instance().tryGet(*it, context); + ColumnsWithTypeAndName arguments{{nullptr, key_column_type, ""}}; + auto func = func_builder->build(arguments); + + if (!func || !func->hasInformationAboutMonotonicity()) + return false; + + key_column_type = func->getReturnType(); + out_functions_chain.push_back(func); + } + + out_key_res_column_type = key_column_type; + + return true; +} + +template +void RPNBuilder::getKeyTuplePositionMapping(const NodeT & node, + const Context & context, + std::vector & indexes_mapping, + const size_t tuple_index, + size_t & out_key_column_num) +{ + MergeTreeSetIndex::KeyTuplePositionMapping index_mapping; + index_mapping.tuple_index = tuple_index; + DataTypePtr data_type; + if (isKeyPossiblyWrappedByMonotonicFunctions(node, context, index_mapping.key_index, data_type, index_mapping.functions)) + { + indexes_mapping.push_back(index_mapping); + if (out_key_column_num < index_mapping.key_index) + { + out_key_column_num = index_mapping.key_index; + } + } +} + +template +bool RPNBuilder::isTupleIndexable( + const NodeT & node, const Context & context, RPNElement & out, const SetPtr & prepared_set, size_t & out_key_column_num) +{ + out_key_column_num = 0; + std::vector indexes_mapping; + + size_t num_key_columns = prepared_set->getDataTypes().size(); + + bool is_func = isFuncNode(node); + if (is_func && getFuncName(node) == "tuple") + { + if (num_key_columns != (size_t)getChildCount(node)) + { + std::stringstream message; + message << "Number of columns in section IN doesn't match. " << getChildCount(node) << " at left, " << num_key_columns + << " at right."; + throw Exception(message.str(), ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); + } + + size_t current_tuple_index = 0; + for (int i = 0; i < getChildCount(node); i++) + { + const auto & arg = getChild(node, i); + getKeyTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_key_column_num); + ++current_tuple_index; + } + } + else + { + getKeyTuplePositionMapping(node, context, indexes_mapping, 0, out_key_column_num); + } + + if (indexes_mapping.empty()) + return false; + + out.set_index = std::make_shared(prepared_set->getSetElements(), std::move(indexes_mapping)); + + return true; +} + +template +bool RPNBuilder::canConstantBeWrappedByMonotonicFunctions( + const NodeT & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type) +{ + String expr_name = getColumnName(node, source_columns); + const auto & sample_block = key_expr->getSampleBlock(); + if (!sample_block.has(expr_name)) + return false; + + bool found_transformation = false; + for (const ExpressionAction & a : key_expr->getActions()) + { + /** The key functional expression constraint may be inferred from a plain column in the expression. + * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, + * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` + * condition also holds, so the index may be used to select only parts satisfying this condition. + * + * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the + * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). + * Instead, we can qualify only functions that do not transform the range (for example rounding), + * which while not strictly monotonic, are monotonic everywhere on the input range. + */ + const auto & action = a.argument_names; + if (a.type == ExpressionAction::Type::APPLY_FUNCTION && action.size() == 1 && a.argument_names[0] == expr_name) + { + if (!a.function->hasInformationAboutMonotonicity()) + return false; + + // Range is irrelevant in this case + IFunction::Monotonicity monotonicity = a.function->getMonotonicityForRange(*out_type, Field(), Field()); + if (!monotonicity.is_always_monotonic) + return false; + + // Apply the next transformation step + DataTypePtr new_type; + applyFunction(a.function, out_type, out_value, new_type, out_value); + if (!new_type) + return false; + + out_type.swap(new_type); + expr_name = a.result_name; + + // Transformation results in a key expression, accept + auto it = key_columns.find(expr_name); + if (key_columns.end() != it) + { + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; + found_transformation = true; + break; + } + } + } + + return found_transformation; +} + +template +bool RPNBuilder::operatorFromNodeTree(const NodeT & node, RPNElement & out) +{ + /// Functions AND, OR, NOT. + /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses + * (or, the same thing - calling the function `and` from one argument). + */ + if (!isFuncNode(node)) + return false; + String name = getFuncName(node); + + if (name == "not") + { + if (getChildCount(node) != 1) + return false; + + out.function = RPNElement::FUNCTION_NOT; + } + else + { + if (name == "and" || name == "indexHint") + out.function = RPNElement::FUNCTION_AND; + else if (name == "or") + out.function = RPNElement::FUNCTION_OR; + else + return false; + } + + return true; +} + +template +bool RPNBuilder::atomFromNodeTree( + const NodeT & node, const Context & context, Block & block_with_constants, PreparedSetsT & sets, RPNElement & out) +{ + /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key, + * or itself, wrapped in a chain of possibly-monotonic functions, + * or constant expression - number. + */ + Field const_value; + DataTypePtr const_type; + if (isFuncNode(node)) + { + if (getChildCount(node) != 2) + return false; + + DataTypePtr key_expr_type; /// Type of expression containing key column + size_t key_arg_pos; /// Position of argument with key column (non-const argument) + size_t key_column_num; /// Number of a key column (inside sort_descr array) + RPNElement::MonotonicFunctionsChain chain; + bool is_set_const = false; + bool is_constant_transformed = false; + const NodeT & child0 = getChild(node, 0); + const NodeT & child1 = getChild(node, 1); + + if (setContains(node, sets) && isTupleIndexable(child0, context, out, lookByExpr(node, sets), key_column_num)) + { + key_arg_pos = 0; + is_set_const = true; + } + else if (getConstant(child1, block_with_constants, const_value, const_type) + && isKeyPossiblyWrappedByMonotonicFunctions(child0, context, key_column_num, key_expr_type, chain)) + { + key_arg_pos = 0; + } + else if (getConstant(child1, block_with_constants, const_value, const_type) + && canConstantBeWrappedByMonotonicFunctions(child0, key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 0; + is_constant_transformed = true; + } + else if (getConstant(child0, block_with_constants, const_value, const_type) + && isKeyPossiblyWrappedByMonotonicFunctions(child1, context, key_column_num, key_expr_type, chain)) + { + key_arg_pos = 1; + } + else if (getConstant(child0, block_with_constants, const_value, const_type) + && canConstantBeWrappedByMonotonicFunctions(child1, key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 1; + is_constant_transformed = true; + } + else + return false; + + std::string func_name = getFuncName(node); + + // make sure that RPNElement of FUNCTION_IN_SET/FUNCTION_NOT_IN_SET + // has valid set in PreparedSets + if (func_name == "in" || func_name == "notIn") + if (!is_set_const) + return false; + + /// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5" + if (is_constant_transformed) + { + if (func_name == "less") + func_name = "lessOrEquals"; + else if (func_name == "greater") + func_name = "greaterOrEquals"; + } + + /// Replace on to <-sign> + if (key_arg_pos == 1) + { + if (func_name == "less") + func_name = "greater"; + else if (func_name == "greater") + func_name = "less"; + else if (func_name == "greaterOrEquals") + func_name = "lessOrEquals"; + else if (func_name == "lessOrEquals") + func_name = "greaterOrEquals"; + else if (func_name == "in" || func_name == "notIn" || func_name == "like") + { + /// "const IN data_column" doesn't make sense (unlike "data_column IN const") + return false; + } + } + + out.key_column = key_column_num; + out.monotonic_functions_chain = std::move(chain); + + const auto atom_it = KeyCondition::atom_map.find(func_name); + if (atom_it == std::end(KeyCondition::atom_map)) + return false; + + bool cast_not_needed = is_set_const /// Set args are already casted inside Set::createFromAST + || (key_expr_type->isNumber() && const_type->isNumber()); /// Numbers are accurately compared without cast. + + if (!cast_not_needed) + castValueToType(key_expr_type, const_value, const_type, nodeToString(node)); + + return atom_it->second(out, const_value); + } + else if (getConstant( + node, block_with_constants, const_value, const_type)) /// For cases where it says, for example, `WHERE 0 AND something` + { + if (const_value.getType() == Field::Types::UInt64 || const_value.getType() == Field::Types::Int64 + || const_value.getType() == Field::Types::Float64) + { + /// Zero in all types is represented in memory the same way as in UInt64. + out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + + return true; + } + } + + return false; +} + +template +void RPNBuilder::traverseNodeTree( + const NodeT & node, const Context & context, Block & block_with_constants, PreparedSetsT & sets, RPN & rpn) +{ + RPNElement element; + + if (isFuncNode(node)) + { + if (operatorFromNodeTree(node, element)) + { + for (size_t i = 0, size = getChildCount(node); i < size; ++i) + { + traverseNodeTree(getChild(node, i), context, block_with_constants, sets, rpn); + + /** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity + * - in this case `n - 1` elements are added (where `n` is the number of arguments). + */ + if (i != 0 || element.function == RPNElement::FUNCTION_NOT) + rpn.push_back(element); + } + + return; + } + } + + if (!atomFromNodeTree(node, context, block_with_constants, sets, element)) + { + element.function = RPNElement::FUNCTION_UNKNOWN; + } + + rpn.emplace_back(std::move(element)); +} + +template class RPNBuilder; +template class RPNBuilder; + +} // namespace DB diff --git a/dbms/src/Storages/MergeTree/RPNBuilder.h b/dbms/src/Storages/MergeTree/RPNBuilder.h new file mode 100644 index 00000000000..f9eaf263cf5 --- /dev/null +++ b/dbms/src/Storages/MergeTree/RPNBuilder.h @@ -0,0 +1,67 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +using RPN = std::vector; +using DAGPreparedSets = std::unordered_map; + +void applyFunction( + const FunctionBasePtr & func, const DataTypePtr & arg_type, const Field & arg_value, DataTypePtr & res_type, Field & res_value); + +template +class RPNBuilder +{ +public: + RPNBuilder(const ExpressionActionsPtr & key_expr_, ColumnIndices & key_columns_, const NamesAndTypesList & source_columns_) + : key_expr(key_expr_), key_columns(key_columns_), source_columns(source_columns_) + {} + + bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( + const NodeT & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, std::vector & out_functions_chain); + + /** Is node the key column + * or expression in which column of key is wrapped by chain of functions, + * that can be monotomic on certain ranges? + * If these conditions are true, then returns number of column in key, type of resulting expression + * and fills chain of possibly-monotonic functions. + */ + bool isKeyPossiblyWrappedByMonotonicFunctions(const NodeT & node, + const Context & context, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + RPNElement::MonotonicFunctionsChain & out_functions_chain); + + void getKeyTuplePositionMapping(const NodeT & node, + const Context & context, + std::vector & indexes_mapping, + const size_t tuple_index, + size_t & out_key_column_num); + /// Try to prepare KeyTuplePositionMapping for tuples from IN expression. + bool isTupleIndexable( + const NodeT & node, const Context & context, RPNElement & out, const SetPtr & prepared_set, size_t & out_key_column_num); + + bool canConstantBeWrappedByMonotonicFunctions( + const NodeT & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type); + + bool operatorFromNodeTree(const NodeT & node, RPNElement & out); + + bool atomFromNodeTree( + const NodeT & node, const Context & context, Block & block_with_constants, PreparedSetsT & sets, RPNElement & out); + + void traverseNodeTree(const NodeT & node, const Context & context, Block & block_with_constants, PreparedSetsT & sets, RPN & rpn); + +protected: + const ExpressionActionsPtr & key_expr; + ColumnIndices & key_columns; + const NamesAndTypesList & source_columns; +}; +} // namespace DB diff --git a/dbms/src/Storages/SelectQueryInfo.cpp b/dbms/src/Storages/SelectQueryInfo.cpp index 65178a3b6b0..de3b3eb3c59 100644 --- a/dbms/src/Storages/SelectQueryInfo.cpp +++ b/dbms/src/Storages/SelectQueryInfo.cpp @@ -7,11 +7,13 @@ namespace DB SelectQueryInfo::SelectQueryInfo(const SelectQueryInfo & query_info_) : query(query_info_.query), sets(query_info_.sets), - mvcc_query_info(query_info_.mvcc_query_info != nullptr ? std::make_unique(*query_info_.mvcc_query_info) : nullptr) + mvcc_query_info(query_info_.mvcc_query_info != nullptr ? std::make_unique(*query_info_.mvcc_query_info) : nullptr), + dag_query(query_info_.dag_query != nullptr ? std::make_unique(*query_info_.dag_query) : nullptr) {} SelectQueryInfo::SelectQueryInfo(SelectQueryInfo && query_info_) - : query(query_info_.query), sets(query_info_.sets), mvcc_query_info(std::move(query_info_.mvcc_query_info)) + : query(query_info_.query), sets(query_info_.sets), mvcc_query_info(std::move(query_info_.mvcc_query_info)), + dag_query(std::move(query_info_.dag_query)) {} } // namespace DB diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h index 01b73ac704f..67a3dcce2ba 100644 --- a/dbms/src/Storages/SelectQueryInfo.h +++ b/dbms/src/Storages/SelectQueryInfo.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -31,11 +32,15 @@ struct SelectQueryInfo std::unique_ptr mvcc_query_info; + std::unique_ptr dag_query; + SelectQueryInfo() = default; SelectQueryInfo(const SelectQueryInfo & query_info_); SelectQueryInfo(SelectQueryInfo && query_info_); + + bool fromAST() const { return dag_query == nullptr; }; }; } // namespace DB diff --git a/tests/mutable-test/txn_dag/key_condition.test b/tests/mutable-test/txn_dag/key_condition.test new file mode 100644 index 00000000000..3c30fcb413a --- /dev/null +++ b/tests/mutable-test/txn_dag/key_condition.test @@ -0,0 +1,35 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64','col_2') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 66, 'test1') +=> DBGInvoke __raft_insert_row(default, test, 4, 77, 'test2') + +# DAG read by not specifying region id, where col_1 = 666. +=> DBGInvoke dag('select * from default.test where col_2 = 66') +┌─col_1─┬─col_2─┐ +│ test1 │ 66 │ +└───────┴───────┘ + +=> DBGInvoke dag('select * from default.test where col_2 > 66') +┌─col_1─┬─col_2─┐ +│ test2 │ 77 │ +└───────┴───────┘ + +=> DBGInvoke dag('select * from default.test where col_2 >= 66') +┌─col_1─┬─col_2─┐ +│ test1 │ 66 │ +│ test2 │ 77 │ +└───────┴───────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test From 4aa2b5815e00c6ea3b52cb1150be79f961333dc1 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 30 Sep 2019 14:09:04 +0800 Subject: [PATCH 65/79] only return execute summaies if requested (#264) --- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index fdc40506743..a0499693def 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -71,6 +71,8 @@ try streams.in->getHeader()); copyData(*streams.in, *dag_output_stream); + if (!dag_request.has_collect_execution_summaries() || !dag_request.collect_execution_summaries()) + return; // add ExecutorExecutionSummary info for (auto & p_streams : dag_context.profile_streams_list) { From 80f6f35f3b9b5799a78d18d5647871f47a736904 Mon Sep 17 00:00:00 2001 From: ruoxi Date: Tue, 8 Oct 2019 17:09:37 +0800 Subject: [PATCH 66/79] Refine service init (#265) --- dbms/src/Flash/FlashService.cpp | 27 +----------- dbms/src/Flash/FlashService.h | 12 +----- dbms/src/Server/Server.cpp | 75 +++++++++++++++++++++++---------- 3 files changed, 56 insertions(+), 58 deletions(-) diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index a5521b9eca9..e1f1cb76094 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -14,31 +15,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -FlashService::FlashService(const std::string & address_, IServer & server_) - : address(address_), server(server_), log(&Logger::get("FlashService")) -{ - grpc::ServerBuilder builder; - builder.AddListeningPort(address, grpc::InsecureServerCredentials()); - builder.RegisterService(this); - builder.RegisterService(&server.context().getRaftService()); - - // Prevent TiKV from throwing "Received message larger than max (4404462 vs. 4194304)" error. - builder.SetMaxReceiveMessageSize(-1); - builder.SetMaxSendMessageSize(-1); - - grpc_server = builder.BuildAndStart(); - - LOG_INFO(log, "Flash service listening on [" << address << "]"); -} - -FlashService::~FlashService() -{ - // wait 5 seconds for pending rpcs to gracefully stop - gpr_timespec deadline{5, 0, GPR_TIMESPAN}; - LOG_DEBUG(log, "Begin to shutting down grpc server"); - grpc_server->Shutdown(deadline); - grpc_server->Wait(); -} +FlashService::FlashService(IServer & server_) : server(server_), log(&Logger::get("FlashService")) {} grpc::Status FlashService::Coprocessor( grpc::ServerContext * grpc_context, const coprocessor::Request * request, coprocessor::Response * response) diff --git a/dbms/src/Flash/FlashService.h b/dbms/src/Flash/FlashService.h index 92b895f0137..15f33df8558 100644 --- a/dbms/src/Flash/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -1,9 +1,7 @@ #pragma once #include -#include #include -#include #include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -13,16 +11,12 @@ namespace DB { -using GRPCServerPtr = std::unique_ptr; -class FlashService; -using FlashServicePtr = std::shared_ptr; +class IServer; class FlashService final : public tikvpb::Tikv::Service, public std::enable_shared_from_this, private boost::noncopyable { public: - FlashService(const std::string & address_, IServer & server_); - - ~FlashService() final; + explicit FlashService(IServer & server_); grpc::Status Coprocessor( grpc::ServerContext * grpc_context, const coprocessor::Request * request, coprocessor::Response * response) override; @@ -34,9 +28,7 @@ class FlashService final : public tikvpb::Tikv::Service, public std::enable_shar std::tuple createDBContext(grpc::ServerContext * grpc_contex); private: - std::string address; IServer & server; - GRPCServerPtr grpc_server; Logger * log; }; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 3b8cfa52ccb..4742ec94a74 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -433,43 +434,71 @@ int Server::main(const std::vector & /*args*/) LOG_DEBUG(log, "Shutted down storages."); }); - if (has_zookeeper && config().has("distributed_ddl")) - { - /// DDL worker should be started after all tables were loaded - String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/"); - global_context->setDDLWorker(std::make_shared(ddl_zookeeper_path, *global_context, &config(), "distributed_ddl")); - } - { /// initialize TMTContext global_context->getTMTContext().restore(); } - if (need_raft_service) - global_context->initializeRaftService(); + /// Then, startup grpc server to serve raft and/or flash services. + String flash_server_addr = config().getString("flash.service_addr", "0.0.0.0:3930"); + std::unique_ptr flash_service = nullptr; + std::unique_ptr flash_grpc_server = nullptr; + { + grpc::ServerBuilder builder; + builder.AddListeningPort(flash_server_addr, grpc::InsecureServerCredentials()); - SCOPE_EXIT({ - LOG_INFO(log, "Shutting down raft service."); - global_context->shutdownRaftService(); - LOG_INFO(log, "Shutted down raft service."); - }); + /// Init and register raft service if necessary. + if (need_raft_service) + { + global_context->initializeRaftService(); + builder.RegisterService(&(global_context->getRaftService())); + LOG_INFO(log, "Raft service registered"); + } - FlashServicePtr flash_service = nullptr; - if (config().has("flash")) - { - String flash_service_addr = config().getString("flash.service_addr"); - flash_service = std::make_shared(flash_service_addr, *this); + /// Init and register flash service. + flash_service = std::make_unique(*this); + builder.RegisterService(flash_service.get()); + LOG_INFO(log, "Flash service registered"); + + /// Kick off grpc server. + // Prevent TiKV from throwing "Received message larger than max (4404462 vs. 4194304)" error. + builder.SetMaxReceiveMessageSize(-1); + builder.SetMaxSendMessageSize(-1); + flash_grpc_server = builder.BuildAndStart(); + LOG_INFO(log, "Flash grpc server listening on [" << flash_server_addr << "]"); } SCOPE_EXIT({ - if (flash_service) + /// Shut down grpc server. + // wait 5 seconds for pending rpcs to gracefully stop + gpr_timespec deadline{5, 0, GPR_TIMESPAN}; + LOG_INFO(log, "Begin to shut down flash grpc server"); + flash_grpc_server->Shutdown(deadline); + flash_grpc_server->Wait(); + flash_grpc_server.reset(); + LOG_INFO(log, "Shut down flash grpc server"); + + /// Close flash service. + LOG_INFO(log, "Begin to shut down flash service"); + flash_service.reset(); + LOG_INFO(log, "Shut down flash service"); + + /// Close raft service if necessary. + if (need_raft_service) { - LOG_INFO(log, "Shutting down Flash service."); - flash_service.reset(); - LOG_INFO(log, "Shutted down flash service."); + LOG_INFO(log, "Begin to shut down raft service"); + global_context->shutdownRaftService(); + LOG_INFO(log, "Shut down raft service"); } }); + if (has_zookeeper && config().has("distributed_ddl")) + { + /// DDL worker should be started after all tables were loaded + String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/"); + global_context->setDDLWorker(std::make_shared(ddl_zookeeper_path, *global_context, &config(), "distributed_ddl")); + } + { Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); From f2553625ebcd76c460800d0aab43a8b5a11506fe Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 10 Oct 2019 20:33:46 +0800 Subject: [PATCH 67/79] FLASH-554 cop check range should be based on region range (#270) * only return execute summaies if requested * cop check range should be based on region range * address comments * add tests * minor improve --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 25 ++++++-- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 64 +++++++++++++++---- tests/mutable-test/txn_dag/key_range.test | 24 +++++++ 3 files changed, 94 insertions(+), 19 deletions(-) create mode 100644 tests/mutable-test/txn_dag/key_range.test diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index 3f46106f2cb..e498ff30065 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -41,7 +41,8 @@ std::tuple compileQuery( Context & context, const String & query, SchemaFetcher schema_fetcher, Timestamp start_ts, Int64 tz_offset, const String & tz_name); tipb::SelectResponse executeDAGRequest( - Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version); + Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, + UInt64 region_conf_version, std::vector> & key_ranges); BlockInputStreamPtr outputDAGResponse(Context & context, const DAGSchema & schema, const tipb::SelectResponse & dag_response); BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) @@ -86,7 +87,14 @@ BlockInputStreamPtr dbgFuncDAG(Context & context, const ASTs & args) if (!region) throw Exception("No such region", ErrorCodes::BAD_ARGUMENTS); } - tipb::SelectResponse dag_response = executeDAGRequest(context, dag_request, region->id(), region->version(), region->confVer()); + + auto handle_range = region->getHandleRangeByTable(table_id); + std::vector> key_ranges; + DecodedTiKVKey start_key = RecordKVFormat::genRawKey(table_id, handle_range.first.handle_id); + DecodedTiKVKey end_key = RecordKVFormat::genRawKey(table_id, handle_range.second.handle_id); + key_ranges.emplace_back(std::make_pair(std::move(start_key), std::move(end_key))); + tipb::SelectResponse dag_response = executeDAGRequest(context, dag_request, region->id(), region->version(), + region->confVer(), key_ranges); return outputDAGResponse(context, schema, dag_response); } @@ -119,7 +127,13 @@ BlockInputStreamPtr dbgFuncMockDAG(Context & context, const ASTs & args) std::ignore = table_id; RegionPtr region = context.getTMTContext().getKVStore()->getRegion(region_id); - tipb::SelectResponse dag_response = executeDAGRequest(context, dag_request, region_id, region->version(), region->confVer()); + auto handle_range = region->getHandleRangeByTable(table_id); + std::vector> key_ranges; + DecodedTiKVKey start_key = RecordKVFormat::genRawKey(table_id, handle_range.first.handle_id); + DecodedTiKVKey end_key = RecordKVFormat::genRawKey(table_id, handle_range.second.handle_id); + key_ranges.emplace_back(std::make_pair(std::move(start_key), std::move(end_key))); + tipb::SelectResponse dag_response = executeDAGRequest(context, dag_request, region_id, region->version(), + region->confVer(), key_ranges); return outputDAGResponse(context, schema, dag_response); } @@ -562,13 +576,14 @@ std::tuple compileQuery( } tipb::SelectResponse executeDAGRequest( - Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, UInt64 region_conf_version) + Context & context, const tipb::DAGRequest & dag_request, RegionID region_id, UInt64 region_version, + UInt64 region_conf_version, std::vector> & key_ranges) { static Logger * log = &Logger::get("MockDAG"); LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handling DAG request: " << dag_request.DebugString()); context.setSetting("dag_planner", "optree"); tipb::SelectResponse dag_response; - DAGDriver driver(context, dag_request, region_id, region_version, region_conf_version, {}, dag_response, true); + DAGDriver driver(context, dag_request, region_id, region_version, region_conf_version, std::move(key_ranges), dag_response, true); driver.execute(); LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle DAG request done"); return dag_response; diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index a0029c04114..eca4627ed21 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -47,13 +47,14 @@ InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) {} template -bool isAllValueCoveredByRanges(std::vector> & ranges) +bool isAllValueCoveredByRanges(std::vector> & ranges, const std::vector> & region_ranges) { if (ranges.empty()) return false; std::sort(ranges.begin(), ranges.end(), [](const HandleRange & a, const HandleRange & b) { return a.first < b.first; }); + std::vector> merged_ranges; HandleRange merged_range; merged_range.first = ranges[0].first; merged_range.second = ranges[0].second; @@ -63,41 +64,77 @@ bool isAllValueCoveredByRanges(std::vector> & ranges) if (merged_range.second >= ranges[i].first) merged_range.second = merged_range.second >= ranges[i].second ? merged_range.second : ranges[i].second; else - break; + { + merged_ranges.emplace_back(std::make_pair(merged_range.first, merged_range.second)); + merged_range.first = ranges[i].first; + merged_range.second = ranges[i].second; + } } + merged_ranges.emplace_back(std::make_pair(merged_range.first, merged_range.second)); - return merged_range.first == TiKVHandle::Handle::normal_min && merged_range.second == TiKVHandle::Handle::max; + for (const auto & region_range : region_ranges) + { + bool covered = false; + for (const auto & range : merged_ranges) + { + if (region_range.first >= range.first && region_range.second <= range.second) + { + covered = true; + break; + } + } + if (!covered && region_range.second > region_range.first) + return false; + } + return true; } -bool checkKeyRanges(const std::vector> & key_ranges, TableID table_id, bool pk_is_uint64) +bool checkKeyRanges(const std::vector> & key_ranges, TableID table_id, bool pk_is_uint64, + const ImutRegionRangePtr & region_key_range) { if (key_ranges.empty()) return true; - std::vector> scan_ranges; + std::vector> handle_ranges; for (auto & range : key_ranges) { TiKVRange::Handle start = TiKVRange::getRangeHandle(range.first, table_id); TiKVRange::Handle end = TiKVRange::getRangeHandle(range.second, table_id); - scan_ranges.emplace_back(std::make_pair(start, end)); + handle_ranges.emplace_back(std::make_pair(start, end)); } + std::vector> region_handle_ranges; + auto & raw_keys = region_key_range->rawKeys(); + TiKVRange::Handle region_start = TiKVRange::getRangeHandle(raw_keys.first, table_id); + TiKVRange::Handle region_end = TiKVRange::getRangeHandle(raw_keys.second, table_id); + region_handle_ranges.emplace_back(std::make_pair(region_start, region_end)); + if (pk_is_uint64) { - std::vector> update_ranges; - for (auto & range : scan_ranges) + std::vector> update_handle_ranges; + for (auto & range : handle_ranges) { const auto [n, new_range] = CHTableHandle::splitForUInt64TableHandle(range); for (int i = 0; i < n; i++) { - update_ranges.emplace_back(new_range[i]); + update_handle_ranges.emplace_back(new_range[i]); } } - return isAllValueCoveredByRanges(update_ranges); + std::vector> update_region_handle_ranges; + for (auto & range : region_handle_ranges) + { + const auto [n, new_range] = CHTableHandle::splitForUInt64TableHandle(range); + + for (int i = 0; i < n; i++) + { + update_region_handle_ranges.emplace_back(new_range[i]); + } + } + return isAllValueCoveredByRanges(update_handle_ranges, update_region_handle_ranges); } else - return isAllValueCoveredByRanges(scan_ranges); + return isAllValueCoveredByRanges(handle_ranges, region_handle_ranges); } // the flow is the same as executeFetchcolumns void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) @@ -206,9 +243,6 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) max_streams *= settings.max_streams_to_max_threads_ratio; } - if (!checkKeyRanges(dag.getKeyRanges(), table_id, storage->pkIsUInt64())) - throw Exception("Cop request only support full range scan for given region", ErrorCodes::COP_BAD_DAG_REQUEST); - if (dag.hasSelection()) { for (auto & condition : dag.getSelection().conditions()) @@ -235,6 +269,8 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) region_ids.push_back(info.region_id); throw RegionException(std::move(region_ids), RegionTable::RegionReadStatus::NOT_FOUND); } + if (!checkKeyRanges(dag.getKeyRanges(), table_id, storage->pkIsUInt64(), current_region->getRange())) + throw Exception("Cop request only support full range scan for given region", ErrorCodes::COP_BAD_DAG_REQUEST); info.range_in_table = current_region->getHandleRangeByTable(table_id); query_info.mvcc_query_info->regions_query_info.push_back(info); query_info.mvcc_query_info->concurrent = 0.0; diff --git a/tests/mutable-test/txn_dag/key_range.test b/tests/mutable-test/txn_dag/key_range.test new file mode 100644 index 00000000000..5a3860c3118 --- /dev/null +++ b/tests/mutable-test/txn_dag/key_range.test @@ -0,0 +1,24 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test_uint) +=> drop table if exists default.test_uint + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test_uint, 'col_1 String, col_2 UInt64','col_2') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test_uint) +=> DBGInvoke __raft_insert_row(default, test_uint, 4, 88, 'test1') +=> DBGInvoke __raft_insert_row(default, test_uint, 4, 99, 'test2') + +=> DBGInvoke dag('select * from default.test_uint where col_2 >= 66') +┌─col_1─┬─col_2─┐ +│ test1 │ 88 │ +│ test2 │ 99 │ +└───────┴───────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test_uint) +=> drop table if exists default.test_uint From 7fc53ad3c9d233aebef02e16fb8e7d2df717fd32 Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 11 Oct 2019 15:18:55 +0800 Subject: [PATCH 68/79] minor improve (#273) --- dbms/src/DataTypes/DataTypeMyDateTime.cpp | 2 +- dbms/src/Interpreters/Settings.h | 2 +- dbms/src/Storages/Transaction/TypeMapping.cpp | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeMyDateTime.cpp b/dbms/src/DataTypes/DataTypeMyDateTime.cpp index 752931dc3fa..5eb6aba8cff 100644 --- a/dbms/src/DataTypes/DataTypeMyDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeMyDateTime.cpp @@ -20,7 +20,7 @@ DataTypeMyDateTime::DataTypeMyDateTime(int fraction_) { fraction = fraction_; if (fraction < 0 || fraction > 6) - throw Exception("fraction must >= 0 and < 6", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception("fraction must >= 0 and <= 6", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } void DataTypeMyDateTime::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr) const diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 6a2e619c50d..83c55d51089 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -30,7 +30,7 @@ struct Settings M(SettingBool, resolve_locks, false, "tmt resolve locks.") \ M(SettingUInt64, read_tso, DEFAULT_MAX_READ_TSO, "tmt read tso.") \ M(SettingInt64, dag_records_per_chunk, DEFAULT_DAG_RECORDS_PER_CHUNK, "default chunk size of a DAG response.") \ - M(SettingString, dag_planner, "sql", "planner for DAG query, sql builds the SQL string, optree builds the internal operator(stream) tree.") \ + M(SettingString, dag_planner, "optree", "planner for DAG query, sql builds the SQL string, optree builds the internal operator(stream) tree.") \ M(SettingBool, dag_expr_field_type_strict_check, true, "when set to true, every expr in the dag request must provide field type, otherwise only the result expr will be checked.") \ M(SettingInt64, schema_version, DEFAULT_UNSPECIFIED_SCHEMA_VERSION, "tmt schema version.") \ M(SettingUInt64, batch_commands_threads, 0, "Number of threads to use for handling batch commands concurrently. 0 means - same as 'max_threads'.") \ diff --git a/dbms/src/Storages/Transaction/TypeMapping.cpp b/dbms/src/Storages/Transaction/TypeMapping.cpp index 343bc30d4c9..d8ab1284b7b 100644 --- a/dbms/src/Storages/Transaction/TypeMapping.cpp +++ b/dbms/src/Storages/Transaction/TypeMapping.cpp @@ -135,7 +135,8 @@ std::enable_if_t, DataTypePtr> getDataTypeByColumnInfoBase(cons template std::enable_if_t, DataTypePtr> getDataTypeByColumnInfoBase(const ColumnInfo & column_info, const T *) { - DataTypePtr t = std::make_shared(column_info.decimal); + // In some cases, TiDB will set the decimal to -1, change -1 to 6 to avoid error + DataTypePtr t = std::make_shared(column_info.decimal == -1 ? 6 : column_info.decimal); if (should_widen) { From a1304ae7997c0b7d57854377ba637ab8d5dc279b Mon Sep 17 00:00:00 2001 From: Soup Date: Fri, 11 Oct 2019 21:48:40 +0800 Subject: [PATCH 69/79] Fix mutex on timezone retrieval (#276) * fix mutex contention * add const ref --- dbms/src/Functions/FunctionsDateTime.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index e41b762271d..1a6fbf3e028 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -1311,6 +1311,7 @@ class FunctionMyTimeZoneConvertByOffset : public IFunction } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { + static const DateLUTImpl & UTC = DateLUT::instance("UTC"); if (const ColumnVector *col_from = checkAndGetColumn>(block.getByPosition(arguments[0]).column.get())) { auto col_to = ColumnVector::create(); @@ -1329,7 +1330,7 @@ class FunctionMyTimeZoneConvertByOffset : public IFunction for (size_t i = 0; i < size; ++i) { UInt64 result_time = vec_from[i] + offset; // todo maybe affected by daytime saving, need double check - convertTimeZoneByOffset(vec_from[i], result_time, offset, DateLUT::instance("UTC")); + convertTimeZoneByOffset(vec_from[i], result_time, offset, UTC); vec_to[i] = result_time; } @@ -1383,7 +1384,7 @@ class FunctionMyTimeZoneConverter : public IFunction size_t size = vec_from.size(); vec_to.resize(size); - const auto & time_zone_utc = DateLUT::instance("UTC"); + static const auto & time_zone_utc = DateLUT::instance("UTC"); const auto & time_zone_other = extractTimeZoneFromFunctionArguments(block, arguments, 1, 0); for (size_t i = 0; i < size; ++i) { From 687dcbe6662f42a53af714b06380c1d28af72510 Mon Sep 17 00:00:00 2001 From: ruoxi Date: Sat, 12 Oct 2019 09:47:08 +0800 Subject: [PATCH 70/79] Fix race condition of batch command handling (#277) --- dbms/src/Flash/BatchCommandsHandler.cpp | 66 +++++++++++++------------ dbms/src/Flash/BatchCommandsHandler.h | 11 +++-- dbms/src/Flash/CoprocessorHandler.cpp | 2 +- dbms/src/Flash/CoprocessorHandler.h | 4 +- dbms/src/Flash/FlashService.cpp | 15 +++--- dbms/src/Flash/FlashService.h | 2 +- 6 files changed, 53 insertions(+), 47 deletions(-) diff --git a/dbms/src/Flash/BatchCommandsHandler.cpp b/dbms/src/Flash/BatchCommandsHandler.cpp index e6768f03c13..c56a8dafad7 100644 --- a/dbms/src/Flash/BatchCommandsHandler.cpp +++ b/dbms/src/Flash/BatchCommandsHandler.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB { @@ -10,6 +9,33 @@ BatchCommandsHandler::BatchCommandsHandler(BatchCommandsContext & batch_commands : batch_commands_context(batch_commands_context_), request(request_), response(response_), log(&Logger::get("BatchCommandsHandler")) {} +ThreadPool::Job BatchCommandsHandler::handleCommandJob( + const tikvpb::BatchCommandsRequest::Request & req, tikvpb::BatchCommandsResponse::Response & resp, grpc::Status & ret) const +{ + return [&]() { + if (!req.has_coprocessor()) + { + ret = grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); + return; + } + + const auto & cop_req = req.coprocessor(); + auto cop_resp = resp.mutable_coprocessor(); + + auto [context, status] = batch_commands_context.db_context_creation_func(&batch_commands_context.grpc_server_context); + if (!status.ok()) + { + ret = status; + return; + } + + CoprocessorContext cop_context(context, cop_req.context(), batch_commands_context.grpc_server_context); + CoprocessorHandler cop_handler(cop_context, &cop_req, cop_resp); + + ret = cop_handler.execute(); + }; +} + grpc::Status BatchCommandsHandler::execute() { if (request.requests_size() == 0) @@ -17,31 +43,6 @@ grpc::Status BatchCommandsHandler::execute() // TODO: Fill transport_layer_load into BatchCommandsResponse. - auto command_handler_func - = [](BatchCommandsContext::DBContextCreationFunc db_context_creation_func, grpc::ServerContext * grpc_server_context, - const tikvpb::BatchCommandsRequest::Request & req, tikvpb::BatchCommandsResponse::Response & resp, grpc::Status & ret) { - if (!req.has_coprocessor()) - { - ret = grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); - return; - } - - const auto & cop_req = req.coprocessor(); - auto cop_resp = resp.mutable_coprocessor(); - - auto [context, status] = db_context_creation_func(grpc_server_context); - if (!status.ok()) - { - ret = status; - return; - } - - CoprocessorContext cop_context(context, cop_req.context(), *grpc_server_context); - CoprocessorHandler cop_handler(cop_context, &cop_req, cop_resp); - - ret = cop_handler.execute(); - }; - /// Shortcut for only one request by not going to thread pool. if (request.requests_size() == 1) { @@ -51,7 +52,7 @@ grpc::Status BatchCommandsHandler::execute() auto resp = response.add_responses(); response.add_request_ids(request.request_ids(0)); auto ret = grpc::Status::OK; - command_handler_func(batch_commands_context.db_context_creation_func, &batch_commands_context.grpc_server_context, req, *resp, ret); + handleCommandJob(req, *resp, ret)(); return ret; } @@ -65,7 +66,7 @@ grpc::Status BatchCommandsHandler::execute() ThreadPool thread_pool(max_threads); - std::vector rets; + std::vector rets(request.requests_size()); size_t i = 0; for (const auto & req : request.requests()) @@ -73,10 +74,8 @@ grpc::Status BatchCommandsHandler::execute() auto resp = response.add_responses(); response.add_request_ids(request.request_ids(i++)); rets.emplace_back(grpc::Status::OK); - thread_pool.schedule([&]() { - command_handler_func( - batch_commands_context.db_context_creation_func, &batch_commands_context.grpc_server_context, req, *resp, rets.back()); - }); + + thread_pool.schedule(handleCommandJob(req, *resp, rets.back())); } thread_pool.wait(); @@ -85,7 +84,10 @@ grpc::Status BatchCommandsHandler::execute() for (const auto & ret : rets) { if (!ret.ok()) + { + response.Clear(); return ret; + } } return grpc::Status::OK; diff --git a/dbms/src/Flash/BatchCommandsHandler.h b/dbms/src/Flash/BatchCommandsHandler.h index 800318be39b..55b07a628fd 100644 --- a/dbms/src/Flash/BatchCommandsHandler.h +++ b/dbms/src/Flash/BatchCommandsHandler.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #pragma GCC diagnostic push @@ -18,10 +19,10 @@ struct BatchCommandsContext /// Context creation function for each individual command - they should be handled isolated, /// given that context is being used to pass arguments regarding queries. - using DBContextCreationFunc = std::function(grpc::ServerContext *)>; + using DBContextCreationFunc = std::function(const grpc::ServerContext *)>; DBContextCreationFunc db_context_creation_func; - grpc::ServerContext & grpc_server_context; + const grpc::ServerContext & grpc_server_context; BatchCommandsContext( Context & db_context_, DBContextCreationFunc && db_context_creation_func_, grpc::ServerContext & grpc_server_context_) @@ -40,7 +41,11 @@ class BatchCommandsHandler grpc::Status execute(); protected: - BatchCommandsContext & batch_commands_context; + ThreadPool::Job handleCommandJob( + const tikvpb::BatchCommandsRequest::Request & req, tikvpb::BatchCommandsResponse::Response & resp, grpc::Status & ret) const; + +protected: + const BatchCommandsContext & batch_commands_context; const tikvpb::BatchCommandsRequest & request; tikvpb::BatchCommandsResponse & response; diff --git a/dbms/src/Flash/CoprocessorHandler.cpp b/dbms/src/Flash/CoprocessorHandler.cpp index bed9a27624e..faeef0d11af 100644 --- a/dbms/src/Flash/CoprocessorHandler.cpp +++ b/dbms/src/Flash/CoprocessorHandler.cpp @@ -46,8 +46,8 @@ try cop_context.kv_context.region_epoch().version(), cop_context.kv_context.region_epoch().conf_ver(), std::move(key_ranges), dag_response); driver.execute(); - LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle DAG request done"); cop_response->set_data(dag_response.SerializeAsString()); + LOG_DEBUG(log, __PRETTY_FUNCTION__ << ": Handle DAG request done"); break; } case COP_REQ_TYPE_ANALYZE: diff --git a/dbms/src/Flash/CoprocessorHandler.h b/dbms/src/Flash/CoprocessorHandler.h index 477daeeb636..900d9d77fbe 100644 --- a/dbms/src/Flash/CoprocessorHandler.h +++ b/dbms/src/Flash/CoprocessorHandler.h @@ -17,9 +17,9 @@ struct CoprocessorContext { Context & db_context; const kvrpcpb::Context & kv_context; - grpc::ServerContext & grpc_server_context; + const grpc::ServerContext & grpc_server_context; - CoprocessorContext(Context & db_context_, const kvrpcpb::Context & kv_context_, grpc::ServerContext & grpc_server_context_) + CoprocessorContext(Context & db_context_, const kvrpcpb::Context & kv_context_, const grpc::ServerContext & grpc_server_context_) : db_context(db_context_), kv_context(kv_context_), grpc_server_context(grpc_server_context_) {} }; diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index e1f1cb76094..5a404b0d5f2 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -53,7 +53,8 @@ grpc::Status FlashService::BatchCommands( tikvpb::BatchCommandsResponse response; BatchCommandsContext batch_commands_context( - context, [this](grpc::ServerContext * grpc_server_context) { return createDBContext(grpc_server_context); }, *grpc_context); + context, [this](const grpc::ServerContext * grpc_server_context) { return createDBContext(grpc_server_context); }, + *grpc_context); BatchCommandsHandler batch_commands_handler(batch_commands_context, request, response); auto ret = batch_commands_handler.execute(); if (!ret.ok()) @@ -75,22 +76,20 @@ grpc::Status FlashService::BatchCommands( return grpc::Status::OK; } -String getClientMetaVarWithDefault(grpc::ServerContext * grpc_context, const String & name, const String & default_val) +String getClientMetaVarWithDefault(const grpc::ServerContext * grpc_context, const String & name, const String & default_val) { - if (grpc_context->client_metadata().count(name) != 1) - return default_val; - else - return String(grpc_context->client_metadata().find(name)->second.data()); + if (auto it = grpc_context->client_metadata().find(name); it != grpc_context->client_metadata().end()) + return it->second.data(); + return default_val; } -std::tuple FlashService::createDBContext(grpc::ServerContext * grpc_context) +std::tuple FlashService::createDBContext(const grpc::ServerContext * grpc_context) const { /// Create DB context. Context context = server.context(); context.setGlobalContext(server.context()); /// Set a bunch of client information. - auto client_meta = grpc_context->client_metadata(); String query_id = getClientMetaVarWithDefault(grpc_context, "query_id", ""); context.setCurrentQueryId(query_id); ClientInfo & client_info = context.getClientInfo(); diff --git a/dbms/src/Flash/FlashService.h b/dbms/src/Flash/FlashService.h index 15f33df8558..09e1640ab23 100644 --- a/dbms/src/Flash/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -25,7 +25,7 @@ class FlashService final : public tikvpb::Tikv::Service, public std::enable_shar grpc::ServerReaderWriter * stream) override; private: - std::tuple createDBContext(grpc::ServerContext * grpc_contex); + std::tuple createDBContext(const grpc::ServerContext * grpc_contex) const; private: IServer & server; From 939b8cf0b2f941355577495242229796b01f3b15 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 14 Oct 2019 09:17:29 +0800 Subject: [PATCH 71/79] address comment --- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 10 +++++----- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 4 ---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index df985755a4a..c50c9d2cb84 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -22,7 +22,7 @@ bool isFunctionExpr(const tipb::Expr & expr) { return expr.tp() == tipb::ExprTyp const String & getAggFunctionName(const tipb::Expr & expr) { - if (!agg_func_map.count(expr.tp())) + if (agg_func_map.find(expr.tp()) == agg_func_map.end()) { throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } @@ -33,7 +33,7 @@ const String & getFunctionName(const tipb::Expr & expr) { if (isAggFunctionExpr(expr)) { - if (!agg_func_map.count(expr.tp())) + if (agg_func_map.find(expr.tp()) == agg_func_map.end()) { throw Exception(tipb::ExprType_Name(expr.tp()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } @@ -41,7 +41,7 @@ const String & getFunctionName(const tipb::Expr & expr) } else { - if (!scalar_func_map.count(expr.sig())) + if (scalar_func_map.find(expr.sig()) == scalar_func_map.end()) { throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + " is not supported.", ErrorCodes::UNSUPPORTED_METHOD); } @@ -106,14 +106,14 @@ String exprToString(const tipb::Expr & expr, const std::vector case tipb::ExprType::Min: case tipb::ExprType::Max: case tipb::ExprType::First: - if (!agg_func_map.count(expr.tp())) + if (agg_func_map.find(expr.tp()) == agg_func_map.end()) { throw Exception(tipb::ExprType_Name(expr.tp()) + " not supported", ErrorCodes::UNSUPPORTED_METHOD); } func_name = agg_func_map.find(expr.tp())->second; break; case tipb::ExprType::ScalarFunc: - if (!scalar_func_map.count(expr.sig())) + if (scalar_func_map.find(expr.sig()) == scalar_func_map.end()) { throw Exception(tipb::ScalarFuncSig_Name(expr.sig()) + " not supported", ErrorCodes::UNSUPPORTED_METHOD); } diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index eca4627ed21..62643024523 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -172,10 +172,6 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) // This means we can return whatever column, we'll choose it later if no other columns are specified either. continue; - if (cid < 1 || cid > (Int64)storage->getTableInfo().columns.size()) - // cid out of bound - throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); - String name = storage->getTableInfo().getColumnName(cid); required_columns.push_back(name); auto pair = storage->getColumns().getPhysical(name); From d25dadc66aea0e57186dff89bff513f823b2c7be Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 14 Oct 2019 09:31:18 +0800 Subject: [PATCH 72/79] address comments --- dbms/src/Flash/CoprocessorHandler.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/src/Flash/CoprocessorHandler.cpp b/dbms/src/Flash/CoprocessorHandler.cpp index faeef0d11af..c45890e29ce 100644 --- a/dbms/src/Flash/CoprocessorHandler.cpp +++ b/dbms/src/Flash/CoprocessorHandler.cpp @@ -113,5 +113,12 @@ catch (const std::exception & e) cop_response->set_other_error(e.what()); return grpc::Status(grpc::StatusCode::INTERNAL, e.what()); } +catch (...) +{ + LOG_ERROR(log, __PRETTY_FUNCTION__ << ": catch other exception."); + cop_response->Clear(); + cop_response->set_other_error("other exception"); + return grpc::Status(grpc::StatusCode::INTERNAL, "other exception"); +} } // namespace DB From 4080fba46f9ff1eb94b07dcf3dfc154470c50d68 Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 14 Oct 2019 16:51:40 +0800 Subject: [PATCH 73/79] address comments --- dbms/src/Storages/MergeTree/KeyCondition.cpp | 2 +- dbms/src/Storages/SelectQueryInfo.cpp | 8 +++++++- dbms/src/Storages/SelectQueryInfo.h | 6 ++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 09e367c449e..b23b9340b35 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -290,7 +290,7 @@ KeyCondition::KeyCondition( */ Block block_with_constants = getBlockWithConstants(query_info.query, context, all_columns); - /// Trasform WHERE section to Reverse Polish notation + /// Transform WHERE section to Reverse Polish notation const ASTSelectQuery & select = typeid_cast(*query_info.query); if (select.where_expression) { diff --git a/dbms/src/Storages/SelectQueryInfo.cpp b/dbms/src/Storages/SelectQueryInfo.cpp index de3b3eb3c59..89b54620161 100644 --- a/dbms/src/Storages/SelectQueryInfo.cpp +++ b/dbms/src/Storages/SelectQueryInfo.cpp @@ -1,9 +1,13 @@ -#include #include +#include +#include + namespace DB { +SelectQueryInfo::SelectQueryInfo() = default; + SelectQueryInfo::SelectQueryInfo(const SelectQueryInfo & query_info_) : query(query_info_.query), sets(query_info_.sets), @@ -16,4 +20,6 @@ SelectQueryInfo::SelectQueryInfo(SelectQueryInfo && query_info_) dag_query(std::move(query_info_.dag_query)) {} +SelectQueryInfo::~SelectQueryInfo() = default; + } // namespace DB diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h index 67a3dcce2ba..053d0ef7c7d 100644 --- a/dbms/src/Storages/SelectQueryInfo.h +++ b/dbms/src/Storages/SelectQueryInfo.h @@ -2,7 +2,6 @@ #include #include -#include namespace DB { @@ -17,6 +16,7 @@ using SetPtr = std::shared_ptr; using PreparedSets = std::unordered_map; struct MvccQueryInfo; +struct DAGQueryInfo; /** Query along with some additional data, * that can be used during query processing @@ -34,12 +34,14 @@ struct SelectQueryInfo std::unique_ptr dag_query; - SelectQueryInfo() = default; + SelectQueryInfo(); SelectQueryInfo(const SelectQueryInfo & query_info_); SelectQueryInfo(SelectQueryInfo && query_info_); + ~SelectQueryInfo(); + bool fromAST() const { return dag_query == nullptr; }; }; From d2890e3974de4ed7070055490d3e59a680615283 Mon Sep 17 00:00:00 2001 From: ruoxi Date: Mon, 14 Oct 2019 21:10:46 +0800 Subject: [PATCH 74/79] Fix NULL order for dag (#281) --- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 15 +++++++-------- tests/mutable-test/txn_dag/topn.test | 9 ++++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 62643024523..d20016f21ad 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -524,17 +524,16 @@ SortDescription InterpreterDAG::getSortDescription(Strings & order_column_names) { // construct SortDescription SortDescription order_descr; - const tipb::TopN & topN = dag.getTopN(); - order_descr.reserve(topN.order_by_size()); - for (int i = 0; i < topN.order_by_size(); i++) + const tipb::TopN & topn = dag.getTopN(); + order_descr.reserve(topn.order_by_size()); + for (int i = 0; i < topn.order_by_size(); i++) { String name = order_column_names[i]; - int direction = topN.order_by(i).desc() ? -1 : 1; + int direction = topn.order_by(i).desc() ? -1 : 1; + // MySQL/TiDB treats NULL as "minimum". + int nulls_direction = -1; // todo get this information from DAGRequest - // currently use NULLS LAST - int nulls_direction = direction; - // todo get this information from DAGRequest - // currently use the defalut value + // currently use the default value std::shared_ptr collator; order_descr.emplace_back(name, direction, nulls_direction, collator); diff --git a/tests/mutable-test/txn_dag/topn.test b/tests/mutable-test/txn_dag/topn.test index 1708402ca40..02cdf79c86f 100644 --- a/tests/mutable-test/txn_dag/topn.test +++ b/tests/mutable-test/txn_dag/topn.test @@ -7,22 +7,25 @@ => DBGInvoke __set_flush_threshold(1000000, 1000000) # Data. -=> DBGInvoke __mock_tidb_table(default, test, 'col_1 String, col_2 Int64') +=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Nullable(String), col_2 Nullable(Int64)') => DBGInvoke __refresh_schemas() => DBGInvoke __put_region(4, 0, 100, default, test) => DBGInvoke __raft_insert_row(default, test, 4, 50, 'test1', 666) => DBGInvoke __raft_insert_row(default, test, 4, 51, 'test2', 777) +=> DBGInvoke __raft_insert_row(default, test, 4, 52, NULL, NULL) # DAG read by not specifying region id, order by col_2 limit 1. -=> DBGInvoke dag('select * from default.test order by col_2 limit 1') +=> DBGInvoke dag('select * from default.test order by col_2 limit 2') ┌─col_1─┬─col_2─┐ +│ \N │ \N │ │ test1 │ 666 │ └───────┴───────┘ # Mock DAG read, where + topn. -=> DBGInvoke mock_dag('select col_2, col_1, col_2 from default.test where col_1 = \'test2\' or col_2 = 666 order by col_1 desc limit 1', 4) +=> DBGInvoke mock_dag('select col_2, col_1, col_2 from default.test where col_1 = \'test2\' or col_2 = 666 order by col_1 desc limit 2', 4) ┌─col_2─┬─col_1─┬─col_2─┐ │ 777 │ test2 │ 777 │ +│ 666 │ test1 │ 666 │ └───────┴───────┴───────┘ # Clean up. From bc075c5b4d22585060894230d8d0b190c25399bc Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 15 Oct 2019 15:26:39 +0800 Subject: [PATCH 75/79] refine get actions in DAGExpressionAnalyzer, fix bug in dbgFuncCoprocessor (#282) --- dbms/src/Debug/dbgFuncCoprocessor.cpp | 38 ++++++++++++++----- .../Coprocessor/DAGExpressionAnalyzer.cpp | 37 ++++++++---------- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 11 ++---- dbms/src/Flash/Coprocessor/DAGUtils.h | 3 +- tests/mutable-test/txn_dag/filter.test | 6 +++ 5 files changed, 54 insertions(+), 41 deletions(-) diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index e498ff30065..962ad335a1c 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -144,11 +144,11 @@ struct ExecutorCtx { tipb::Executor * input; DAGSchema output; - std::unordered_map col_ref_map; + std::unordered_map> col_ref_map; }; void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::unordered_set & referred_columns, - std::unordered_map & col_ref_map) + std::unordered_map> & col_ref_map) { if (ASTIdentifier * id = typeid_cast(ast.get())) { @@ -159,7 +159,9 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); referred_columns.emplace((*ft).first); - col_ref_map.emplace((*ft).first, expr); + if (col_ref_map.find((*ft).first) == col_ref_map.end()) + col_ref_map[(*ft).first] = {}; + col_ref_map[(*ft).first].push_back(expr); } else if (ASTFunction * func = typeid_cast(ast.get())) { @@ -208,6 +210,20 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un ft->set_tp(TiDB::TypeLongLong); ft->set_flag(TiDB::ColumnFlagUnsigned); } + else if (func_name_lowercase == "less") + { + expr->set_sig(tipb::ScalarFuncSig::LTInt); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned); + } + else if (func_name_lowercase == "lessorequals") + { + expr->set_sig(tipb::ScalarFuncSig::LEInt); + auto * ft = expr->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned); + } else { throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); @@ -259,7 +275,7 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un } void compileFilter(const DAGSchema & input, ASTPtr ast, tipb::Selection * filter, std::unordered_set & referred_columns, - std::unordered_map & col_ref_map) + std::unordered_map> & col_ref_map) { if (auto * func = typeid_cast(ast.get())) { @@ -339,7 +355,7 @@ std::tuple compileQuery( ci.tp = TiDB::TypeTimestamp; ts_output.emplace_back(std::make_pair(column_info.name, std::move(ci))); } - executor_ctx_map.emplace(ts_exec, ExecutorCtx{nullptr, std::move(ts_output), std::unordered_map{}}); + executor_ctx_map.emplace(ts_exec, ExecutorCtx{nullptr, std::move(ts_output), std::unordered_map>{}}); last_executor = ts_exec; } @@ -349,7 +365,7 @@ std::tuple compileQuery( tipb::Executor * filter_exec = dag_request.add_executors(); filter_exec->set_tp(tipb::ExecType::TypeSelection); tipb::Selection * filter = filter_exec->mutable_selection(); - std::unordered_map col_ref_map; + std::unordered_map> col_ref_map; compileFilter(executor_ctx_map[last_executor].output, ast_query.where_expression, filter, referred_columns, col_ref_map); executor_ctx_map.emplace(filter_exec, ExecutorCtx{last_executor, executor_ctx_map[last_executor].output, std::move(col_ref_map)}); last_executor = filter_exec; @@ -361,7 +377,7 @@ std::tuple compileQuery( tipb::Executor * topn_exec = dag_request.add_executors(); topn_exec->set_tp(tipb::ExecType::TypeTopN); tipb::TopN * topn = topn_exec->mutable_topn(); - std::unordered_map col_ref_map; + std::unordered_map> col_ref_map; for (const auto & child : ast_query.order_expression_list->children) { ASTOrderByElement * elem = typeid_cast(child.get()); @@ -385,7 +401,7 @@ std::tuple compileQuery( auto limit_length = safeGet(typeid_cast(*ast_query.limit_length).value); limit->set_limit(limit_length); executor_ctx_map.emplace( - limit_exec, ExecutorCtx{last_executor, executor_ctx_map[last_executor].output, std::unordered_map{}}); + limit_exec, ExecutorCtx{last_executor, executor_ctx_map[last_executor].output, std::unordered_map>{}}); last_executor = limit_exec; } @@ -425,7 +441,9 @@ std::tuple compileQuery( throw Exception("Column not found when pruning: " + pair.first, ErrorCodes::LOGICAL_ERROR); std::stringstream ss; encodeDAGInt64(iter - last_output.begin(), ss); - pair.second->set_val(ss.str()); + auto s_val = ss.str(); + for (auto * expr : pair.second) + expr->set_val(s_val); } executor_ctx.output = last_output; }; @@ -452,7 +470,7 @@ std::tuple compileQuery( tipb::Executor * agg_exec = dag_request.add_executors(); agg_exec->set_tp(tipb::ExecType::TypeAggregation); tipb::Aggregation * agg = agg_exec->mutable_aggregation(); - std::unordered_map col_ref_map; + std::unordered_map> col_ref_map; for (const auto & expr : ast_query.select_expression_list->children) { const ASTFunction * func = typeid_cast(expr.get()); diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 4dd5e48a4a9..4850400e44d 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -114,8 +114,10 @@ bool isUInt8Type(const DataTypePtr & type) String DAGExpressionAnalyzer::applyFunction(const String & func_name, Names & arg_names, ExpressionActionsPtr & actions) { - const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); String result_name = genFuncString(func_name, arg_names); + if (actions->getSampleBlock().has(result_name)) + return result_name; + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); const ExpressionAction & apply_function = ExpressionAction::applyFunction(function_builder, arg_names, result_name); actions->add(apply_function); return result_name; @@ -377,11 +379,11 @@ String DAGExpressionAnalyzer::appendCastIfNeeded(const tipb::Expr & expr, Expres auto * type_field_type = type_expr.mutable_field_type(); type_field_type->set_tp(TiDB::TypeString); type_field_type->set_flag(TiDB::ColumnFlagNotNull); - getActions(type_expr, actions); + auto type_expr_name = getActions(type_expr, actions); Names cast_argument_names; cast_argument_names.push_back(expr_name); - cast_argument_names.push_back(getName(type_expr, getCurrentInputColumns())); + cast_argument_names.push_back(type_expr_name); String cast_expr_name = applyFunction("CAST", cast_argument_names, actions); return cast_expr_name; } @@ -450,23 +452,22 @@ static String getUniqueName(const Block & block, const String & prefix) String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActionsPtr & actions) { - String expr_name = getName(expr, getCurrentInputColumns()); - if ((isLiteralExpr(expr) || isFunctionExpr(expr)) && actions->getSampleBlock().has(expr_name)) - { - return expr_name; - } if (isLiteralExpr(expr)) { Field value = decodeLiteral(expr); - DataTypePtr type = exprHasValidFieldType(expr) ? getDataTypeByFieldType(expr.field_type()) : applyVisitor(FieldToDataType(), value); + DataTypePtr flash_type = applyVisitor(FieldToDataType(), value); + DataTypePtr target_type = exprHasValidFieldType(expr) ? getDataTypeByFieldType(expr.field_type()) : flash_type; + String name = exprToString(expr, getCurrentInputColumns()) + "_" + target_type->getName(); + if (actions->getSampleBlock().has(name)) + return name; ColumnWithTypeAndName column; - column.column = type->createColumnConst(1, convertFieldToType(value, *type)); - column.name = expr_name; - column.type = type; + column.column = target_type->createColumnConst(1, convertFieldToType(value, *target_type, flash_type.get())); + column.name = name; + column.type = target_type; actions->add(ExpressionAction::addColumn(column)); - return column.name; + return name; } else if (isColumnExpr(expr)) { @@ -476,7 +477,7 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); } //todo check if the column type need to be cast to field type - return expr_name; + return getCurrentInputColumns()[column_id].name; } else if (isFunctionExpr(expr)) { @@ -515,13 +516,7 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi } } - // need to re-construct expr_name, because expr_name generated previously is based on expr tree, - // but for function call, it's argument name may be changed as an implicit cast func maybe - // inserted(refer to the logic below), so we need to update the expr_name - // for example, for a expr and(arg1, arg2), the expr_name is and(arg1_name,arg2_name), but - // if the arg1 need to be casted to the type passed by dag request, then the expr_name - // should be updated to and(casted_arg1_name, arg2_name) - expr_name = applyFunction(func_name, argument_names, actions); + String expr_name = applyFunction(func_name, argument_names, actions); // add cast if needed expr_name = appendCastIfNeeded(expr, actions, expr_name); return expr_name; diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index c50c9d2cb84..09fc5d0b87a 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -49,7 +49,7 @@ const String & getFunctionName(const tipb::Expr & expr) } } -String exprToString(const tipb::Expr & expr, const std::vector & input_col, bool for_parser) +String exprToString(const tipb::Expr & expr, const std::vector & input_col) { std::stringstream ss; Int64 column_id = 0; @@ -123,7 +123,7 @@ String exprToString(const tipb::Expr & expr, const std::vector throw Exception(tipb::ExprType_Name(expr.tp()) + " not supported", ErrorCodes::UNSUPPORTED_METHOD); } // build function expr - if (isInOrGlobalInOperator(func_name) && for_parser) + if (isInOrGlobalInOperator(func_name)) { // for in, we could not represent the function expr using func_name(param1, param2, ...) throw Exception("Function " + func_name + " not supported", ErrorCodes::UNSUPPORTED_METHOD); @@ -132,7 +132,7 @@ String exprToString(const tipb::Expr & expr, const std::vector bool first = true; for (const tipb::Expr & child : expr.children()) { - String s = exprToString(child, input_col, for_parser); + String s = exprToString(child, input_col); if (first) { first = false; @@ -149,11 +149,6 @@ String exprToString(const tipb::Expr & expr, const std::vector const String & getTypeName(const tipb::Expr & expr) { return tipb::ExprType_Name(expr.tp()); } -String getName(const tipb::Expr & expr, const std::vector & current_input_columns) -{ - return exprToString(expr, current_input_columns, false); -} - bool isAggFunctionExpr(const tipb::Expr & expr) { switch (expr.tp()) diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index b2b3a98a62f..709b7602dba 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -23,9 +23,8 @@ const String & getFunctionName(const tipb::Expr & expr); const String & getAggFunctionName(const tipb::Expr & expr); bool isColumnExpr(const tipb::Expr & expr); ColumnID getColumnID(const tipb::Expr & expr); -String getName(const tipb::Expr & expr, const std::vector & current_input_columns); const String & getTypeName(const tipb::Expr & expr); -String exprToString(const tipb::Expr & expr, const std::vector & input_col, bool for_parser = true); +String exprToString(const tipb::Expr & expr, const std::vector & input_col); bool isInOrGlobalInOperator(const String & name); bool exprHasValidFieldType(const tipb::Expr & expr); extern std::unordered_map agg_func_map; diff --git a/tests/mutable-test/txn_dag/filter.test b/tests/mutable-test/txn_dag/filter.test index 529567a4969..dd31a653c28 100644 --- a/tests/mutable-test/txn_dag/filter.test +++ b/tests/mutable-test/txn_dag/filter.test @@ -32,6 +32,12 @@ │ 777 │ test2 │ 777 │ └───────┴───────┴───────┘ +# DAG read, col used multiple times in the query +=> DBGInvoke dag('select * from default.test where col_2 < 777 or col_2 > 888') +┌─col_1─┬─col_2─┐ +│ test1 │ 666 │ +└───────┴───────┘ + # Mock DAG read, where and. => DBGInvoke mock_dag('select col_2, col_1, col_2 from default.test where col_1 = \'test2\' and col_2 = 777', 4) ┌─col_2─┬─col_1─┬─col_2─┐ From fbcbdc08539d21a8f38f0bbe59e397f20a28a80a Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 15 Oct 2019 18:02:55 +0800 Subject: [PATCH 76/79] remove duplicate agg funcs (#283) * 1. remove duplicate agg funcs, 2. for column ref expr, change column_id to column_index since the value stored in column ref expr is not column id * bug fix --- .../Coprocessor/DAGExpressionAnalyzer.cpp | 37 ++++++++++--------- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 2 +- dbms/src/Flash/Coprocessor/DAGQueryInfo.h | 10 ++--- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 18 ++++----- dbms/src/Flash/Coprocessor/DAGUtils.h | 2 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 2 +- dbms/src/Storages/MergeTree/KeyCondition.cpp | 2 +- dbms/src/Storages/MergeTree/RPNBuilder.cpp | 13 ++----- dbms/src/Storages/MergeTree/RPNBuilder.h | 4 +- tests/mutable-test/txn_dag/aggregation.test | 6 +++ 10 files changed, 46 insertions(+), 50 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 4850400e44d..1a362d688e4 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -47,8 +47,8 @@ static String genFuncString(const String & func_name, const Names & argument_nam return ss.str(); } -DAGExpressionAnalyzer::DAGExpressionAnalyzer(const std::vector && source_columns_, const Context & context_) - : source_columns(source_columns_), +DAGExpressionAnalyzer::DAGExpressionAnalyzer(std::vector && source_columns_, const Context & context_) + : source_columns(std::move(source_columns_)), context(context_), after_agg(false), implicit_cast_count(0), @@ -68,7 +68,6 @@ void DAGExpressionAnalyzer::appendAggregation( initChain(chain, getCurrentInputColumns()); ExpressionActionsChain::Step & step = chain.steps.back(); - Names agg_argument_names; for (const tipb::Expr & expr : agg.agg_func()) { const String & agg_func_name = getAggFunctionName(expr); @@ -78,13 +77,24 @@ void DAGExpressionAnalyzer::appendAggregation( for (Int32 i = 0; i < expr.children_size(); i++) { String arg_name = getActions(expr.children(i), step.actions); - agg_argument_names.push_back(arg_name); types[i] = step.actions->getSampleBlock().getByName(arg_name).type; aggregate.argument_names[i] = arg_name; + step.required_output.push_back(arg_name); } - String func_string = genFuncString(agg_func_name, agg_argument_names); + String func_string = genFuncString(agg_func_name, aggregate.argument_names); + bool duplicate = false; + for (const auto & pre_agg : aggregate_descriptions) + { + if (pre_agg.column_name == func_string) + { + aggregated_columns.emplace_back(func_string, pre_agg.function->getReturnType()); + duplicate = true; + break; + } + } + if (duplicate) + continue; aggregate.column_name = func_string; - //todo de-duplicate aggregation column aggregate.parameters = Array(); aggregate.function = AggregateFunctionFactory::instance().get(agg_func_name, types); aggregate_descriptions.push_back(aggregate); @@ -93,8 +103,6 @@ void DAGExpressionAnalyzer::appendAggregation( aggregated_columns.emplace_back(func_string, result_type); } - std::move(agg_argument_names.begin(), agg_argument_names.end(), std::back_inserter(step.required_output)); - for (const tipb::Expr & expr : agg.group_by()) { String name = getActions(expr, step.actions); @@ -286,7 +294,7 @@ void DAGExpressionAnalyzer::appendAggSelect( { initChain(chain, getCurrentInputColumns()); bool need_update_aggregated_columns = false; - NamesAndTypesList updated_aggregated_columns; + std::vector updated_aggregated_columns; ExpressionActionsChain::Step step = chain.steps.back(); bool need_append_timezone_cast = hasMeaningfulTZInfo(rqst); tipb::Expr tz_expr; @@ -344,12 +352,10 @@ void DAGExpressionAnalyzer::appendAggSelect( if (need_update_aggregated_columns) { - auto updated_agg_col_names = updated_aggregated_columns.getNames(); - auto updated_agg_col_types = updated_aggregated_columns.getTypes(); aggregated_columns.clear(); for (size_t i = 0; i < updated_aggregated_columns.size(); i++) { - aggregated_columns.emplace_back(updated_agg_col_names[i], updated_agg_col_types[i]); + aggregated_columns.emplace_back(updated_aggregated_columns[i].name, updated_aggregated_columns[i].type); } } } @@ -471,13 +477,8 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, ExpressionActi } else if (isColumnExpr(expr)) { - ColumnID column_id = getColumnID(expr); - if (column_id < 0 || column_id >= (ColumnID)getCurrentInputColumns().size()) - { - throw Exception("column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); - } //todo check if the column type need to be cast to field type - return getCurrentInputColumns()[column_id].name; + return getColumnNameForColumnExpr(expr, getCurrentInputColumns()); } else if (isFunctionExpr(expr)) { diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 1b5b65f0ff0..d2a6b5751be 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -37,7 +37,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable Poco::Logger * log; public: - DAGExpressionAnalyzer(const std::vector && source_columns_, const Context & context_); + DAGExpressionAnalyzer(std::vector && source_columns_, const Context & context_); void appendWhere(ExpressionActionsChain & chain, const tipb::Selection & sel, String & filter_column_name); void appendOrderBy(ExpressionActionsChain & chain, const tipb::TopN & topN, Strings & order_column_names); void appendAggregation(ExpressionActionsChain & chain, const tipb::Aggregation & agg, Names & aggregate_keys, diff --git a/dbms/src/Flash/Coprocessor/DAGQueryInfo.h b/dbms/src/Flash/Coprocessor/DAGQueryInfo.h index 20274503782..cb01768d473 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryInfo.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryInfo.h @@ -10,14 +10,10 @@ namespace DB struct DAGQueryInfo { - DAGQueryInfo(const DAGQuerySource & dag_, DAGPreparedSets dag_sets_, std::vector & source_columns_) - : dag(dag_), dag_sets(std::move(dag_sets_)) - { - for (auto & c : source_columns_) - source_columns.emplace_back(c.name, c.type); - }; + DAGQueryInfo(const DAGQuerySource & dag_, DAGPreparedSets dag_sets_, const std::vector & source_columns_) + : dag(dag_), dag_sets(std::move(dag_sets_)), source_columns(source_columns_){}; const DAGQuerySource & dag; DAGPreparedSets dag_sets; - NamesAndTypesList source_columns; + const std::vector & source_columns; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 09fc5d0b87a..0f196df8db1 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -52,7 +52,6 @@ const String & getFunctionName(const tipb::Expr & expr) String exprToString(const tipb::Expr & expr, const std::vector & input_col) { std::stringstream ss; - Int64 column_id = 0; String func_name; Field f; switch (expr.tp()) @@ -94,12 +93,7 @@ String exprToString(const tipb::Expr & expr, const std::vector return std::to_string(TiDB::DatumFlat(t, static_cast(expr.field_type().tp())).field().get()); } case tipb::ExprType::ColumnRef: - column_id = decodeDAGInt64(expr.val()); - if (column_id < 0 || column_id >= (ColumnID)input_col.size()) - { - throw Exception("Column id out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); - } - return input_col[column_id].name; + return getColumnNameForColumnExpr(expr, input_col); case tipb::ExprType::Count: case tipb::ExprType::Sum: case tipb::ExprType::Avg: @@ -247,10 +241,14 @@ Field decodeLiteral(const tipb::Expr & expr) } } -ColumnID getColumnID(const tipb::Expr & expr) +String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col) { - auto column_id = decodeDAGInt64(expr.val()); - return column_id; + auto column_index = decodeDAGInt64(expr.val()); + if (column_index < 0 || column_index >= (Int64)input_col.size()) + { + throw Exception("Column index out of bound", ErrorCodes::COP_BAD_DAG_REQUEST); + } + return input_col[column_index].name; } bool isInOrGlobalInOperator(const String & name) { return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn"; } diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index 709b7602dba..b45c12680c3 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -22,7 +22,7 @@ bool isAggFunctionExpr(const tipb::Expr & expr); const String & getFunctionName(const tipb::Expr & expr); const String & getAggFunctionName(const tipb::Expr & expr); bool isColumnExpr(const tipb::Expr & expr); -ColumnID getColumnID(const tipb::Expr & expr); +String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col); const String & getTypeName(const tipb::Expr & expr); String exprToString(const tipb::Expr & expr, const std::vector & input_col); bool isInOrGlobalInOperator(const String & name); diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index d20016f21ad..f0321b3bd7b 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -250,7 +250,7 @@ void InterpreterDAG::executeTS(const tipb::TableScan & ts, Pipeline & pipeline) SelectQueryInfo query_info; // set query to avoid unexpected NPE query_info.query = dag.getAST(); - query_info.dag_query = std::make_unique(dag, analyzer->getPreparedSets(), source_columns); + query_info.dag_query = std::make_unique(dag, analyzer->getPreparedSets(), analyzer->getCurrentInputColumns()); query_info.mvcc_query_info = std::make_unique(); query_info.mvcc_query_info->resolve_locks = true; query_info.mvcc_query_info->read_tso = settings.read_tso; diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index b23b9340b35..1e1062df6b9 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -282,7 +282,7 @@ KeyCondition::KeyCondition( if (query_info.fromAST()) { - RPNBuilder rpn_builder(key_expr_, key_columns, all_columns); + RPNBuilder rpn_builder(key_expr_, key_columns, {}); PreparedSets sets(query_info.sets); /** Evaluation of expressions that depend only on constants. diff --git a/dbms/src/Storages/MergeTree/RPNBuilder.cpp b/dbms/src/Storages/MergeTree/RPNBuilder.cpp index 9a2830612b9..52e458dea20 100644 --- a/dbms/src/Storages/MergeTree/RPNBuilder.cpp +++ b/dbms/src/Storages/MergeTree/RPNBuilder.cpp @@ -58,19 +58,14 @@ const String getFuncName(const ASTPtr & node) return ""; } -const String getColumnName(const tipb::Expr & node, const NamesAndTypesList & source_columns) +const String getColumnName(const tipb::Expr & node, const std::vector & source_columns) { - if (node.tp() == tipb::ExprType::ColumnRef) - { - auto col_id = getColumnID(node); - if (col_id < 0 || col_id >= (Int64)source_columns.size()) - return ""; - return source_columns.getNames()[col_id]; - } + if (isColumnExpr(node)) + return getColumnNameForColumnExpr(node, source_columns); return ""; } -const String getColumnName(const ASTPtr & node, const NamesAndTypesList &) { return node->getColumnName(); } +const String getColumnName(const ASTPtr & node, const std::vector &) { return node->getColumnName(); } bool isFuncNode(const ASTPtr & node) { return typeid_cast(node.get()); } diff --git a/dbms/src/Storages/MergeTree/RPNBuilder.h b/dbms/src/Storages/MergeTree/RPNBuilder.h index f9eaf263cf5..5eafac0d704 100644 --- a/dbms/src/Storages/MergeTree/RPNBuilder.h +++ b/dbms/src/Storages/MergeTree/RPNBuilder.h @@ -21,7 +21,7 @@ template class RPNBuilder { public: - RPNBuilder(const ExpressionActionsPtr & key_expr_, ColumnIndices & key_columns_, const NamesAndTypesList & source_columns_) + RPNBuilder(const ExpressionActionsPtr & key_expr_, ColumnIndices & key_columns_, const std::vector & source_columns_) : key_expr(key_expr_), key_columns(key_columns_), source_columns(source_columns_) {} @@ -62,6 +62,6 @@ class RPNBuilder protected: const ExpressionActionsPtr & key_expr; ColumnIndices & key_columns; - const NamesAndTypesList & source_columns; + const std::vector & source_columns; }; } // namespace DB diff --git a/tests/mutable-test/txn_dag/aggregation.test b/tests/mutable-test/txn_dag/aggregation.test index 0f8ec4c30e3..ef1905a5dc4 100644 --- a/tests/mutable-test/txn_dag/aggregation.test +++ b/tests/mutable-test/txn_dag/aggregation.test @@ -21,6 +21,12 @@ │ 1 │ 777 │ └──────────────┴───────┘ +=> DBGInvoke dag('select count(col_1),count(col_1) from default.test group by col_2') +┌─count(col_1)─┬─count(col_1)─┬─col_2─┐ +│ 2 │ 2 │ 666 │ +│ 1 │ 1 │ 777 │ +└──────────────┴──────────────┴───────┘ + # DAG read by explicitly specifying region id, where + group by. => DBGInvoke dag('select count(col_1) from default.test where col_2 = 666 group by col_2', 4) ┌─count(col_1)─┬─col_2─┐ From d968c098f417ec2c8e5167e383f55817991b5773 Mon Sep 17 00:00:00 2001 From: xufei Date: Wed, 16 Oct 2019 17:38:41 +0800 Subject: [PATCH 77/79] address comments --- dbms/src/Flash/Coprocessor/DAGQuerySource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 2cf291d08dd..c9d7b22871f 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -61,7 +61,7 @@ DAGQuerySource::DAGQuerySource(Context & context_, DAGContext & dag_context_, Re assignOrThrowException(limit_index, i, LIMIT_NAME); break; default: - throw Exception("Unsupported executor in DAG request: " + dag_request.executors(i).DebugString()); + throw Exception("Unsupported executor in DAG request: " + dag_request.executors(i).DebugString(), ErrorCodes::NOT_IMPLEMENTED); } } } From 4f58878c2ea9fe285c8a167535403d56b335b3af Mon Sep 17 00:00:00 2001 From: ruoxi Date: Thu, 17 Oct 2019 14:51:16 +0800 Subject: [PATCH 78/79] Update dbms/src/Flash/BatchCommandsHandler.cpp Co-Authored-By: JaySon --- dbms/src/Flash/BatchCommandsHandler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Flash/BatchCommandsHandler.cpp b/dbms/src/Flash/BatchCommandsHandler.cpp index c56a8dafad7..f90c92848e2 100644 --- a/dbms/src/Flash/BatchCommandsHandler.cpp +++ b/dbms/src/Flash/BatchCommandsHandler.cpp @@ -66,7 +66,8 @@ grpc::Status BatchCommandsHandler::execute() ThreadPool thread_pool(max_threads); - std::vector rets(request.requests_size()); + std::vector rets; + rets.reserve(request.requests_size()); size_t i = 0; for (const auto & req : request.requests()) From 92c16c25359e775b60d0eeb5cff173d8c3c30431 Mon Sep 17 00:00:00 2001 From: xufei Date: Thu, 17 Oct 2019 17:28:18 +0800 Subject: [PATCH 79/79] revert unnecessary changes --- .../MergeTreeDataSelectExecutorCommon.hpp | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp index 3f83bd0b6e8..38a68e59c09 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutorCommon.hpp @@ -18,25 +18,20 @@ static inline void extendMutableEngineColumnNames(Names & column_names_to_read, /// make pk, version, delmark is always the first 3 columns, maybe some sample column will be added later. static inline void extendMutableEngineColumnNames(Names & column_names_to_read, const std::string & handle_col_name) { - std::set reserved_names; - reserved_names.insert(handle_col_name); - reserved_names.insert(MutableSupport::version_column_name); - reserved_names.insert(MutableSupport::delmark_column_name); + std::set names; - Names names; for (auto & name : column_names_to_read) - { - if(reserved_names.count(name) == 0) - { - names.emplace_back(std::move(name)); - } - } + names.emplace(std::move(name)); column_names_to_read.clear(); column_names_to_read.push_back(handle_col_name); column_names_to_read.push_back(MutableSupport::version_column_name); column_names_to_read.push_back(MutableSupport::delmark_column_name); + names.erase(MutableSupport::version_column_name); + names.erase(MutableSupport::delmark_column_name); + names.erase(handle_col_name); + for (auto & name : names) column_names_to_read.emplace_back(std::move(name)); }