From 8188f568ab0f2a11850046d1fbfc30f44f8a560e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 14 Aug 2019 02:00:29 +0800 Subject: [PATCH] [flash-361] support default value for different types (#144) --- dbms/src/Storages/Transaction/MyTimeParser.h | 174 +++++++++++++++++++ dbms/src/Storages/Transaction/TiDB.cpp | 52 ++++-- dbms/src/Storages/Transaction/TiDB.h | 8 +- 3 files changed, 222 insertions(+), 12 deletions(-) create mode 100644 dbms/src/Storages/Transaction/MyTimeParser.h diff --git a/dbms/src/Storages/Transaction/MyTimeParser.h b/dbms/src/Storages/Transaction/MyTimeParser.h new file mode 100644 index 00000000000..009bba29390 --- /dev/null +++ b/dbms/src/Storages/Transaction/MyTimeParser.h @@ -0,0 +1,174 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +int adjustYear(int year) +{ + if (year >= 0 && year <= 69) + return 2000 + year; + if (year >= 70 && year <= 99) + return 1900 + year; + return year; +} + +void scanTimeArgs(const std::vector & seps, std::initializer_list && list) +{ + int i = 0; + for (auto * ptr : list) + { + *ptr = std::stoi(seps[i]); + i++; + } +} + +int getFracIndex(const String & format) +{ + int idx = -1; + for (int i = int(format.size()) - 1; i >= 0; i--) + { + if (std::ispunct(format[i])) + { + if (format[i] == '.') + { + idx = i; + } + break; + } + } + return idx; +} + +std::vector parseDateFormat(String format) +{ + format = Poco::trimInPlace(format); + + std::vector seps; + size_t start = 0; + for (size_t i = 0; i < format.size(); i++) + { + if (i == 0 || i + 1 == format.size()) + { + if (!std::isdigit(format[i])) + return {}; + continue; + } + + if (!std::isdigit(format[i])) + { + if (!std::isdigit(format[i - 1])) + return {}; + seps.push_back(format.substr(start, i - start)); + start = i + 1; + } + } + seps.push_back(format.substr(start)); + return seps; +} + +std::vector splitDatetime(String format) +{ + int idx = getFracIndex(format); + if (idx > 0) + { + format = format.substr(0, idx); + } + return parseDateFormat(format); +} + +Field parseMyDatetime(const String & str) +{ + Int32 year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; + + const auto & seps = splitDatetime(str); + + switch (seps.size()) + { + // No delimiter + case 1: + { + size_t l = seps[0].size(); + switch (l) + { + case 14: + // YYYYMMDDHHMMSS + { + std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + break; + } + case 12: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + year = adjustYear(year); + break; + } + case 11: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); + year = adjustYear(year); + break; + } + case 10: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); + year = adjustYear(year); + break; + } + case 9: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); + year = adjustYear(year); + break; + } + case 8: + { + std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); + break; + } + case 6: + case 5: + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); + year = adjustYear(year); + break; + } + default: + { + throw Exception("Wrong datetime format"); + } + // TODO Process frac! + } + break; + } + case 3: + { + scanTimeArgs(seps, {&year, &month, &day}); + break; + } + case 6: + { + scanTimeArgs(seps, {&year, &month, &day, &hour, &minute, &second}); + break; + } + default: + { + throw Exception("Wrong datetime format"); + } + } + + UInt64 ymd = ((year * 13 + month) << 5) | day; + UInt64 hms = (hour << 12) | (minute << 6) | second; + return Field((ymd << 17 | hms) << 24); +} + +} // namespace DB diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 83715fd5bb4..452d037fbcd 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -1,5 +1,7 @@ +#include #include #include +#include #include namespace TiDB @@ -9,13 +11,11 @@ using DB::WriteBufferFromOwnString; ColumnInfo::ColumnInfo(Poco::JSON::Object::Ptr json) { deserialize(json); } -// TODO:: Refine Decimal Default Value !! -// TODO:: Refine Enum Default Value !! -// TODO:: Refine Date/Datatime/TimeStamp Defalut Value !! Field ColumnInfo::defaultValueToField() const { auto & value = origin_default_value; - if (value.isEmpty()) { + if (value.isEmpty()) + { return Field(); } switch (tp) @@ -26,16 +26,16 @@ Field ColumnInfo::defaultValueToField() const case TypeLong: case TypeLongLong: case TypeInt24: + case TypeBit: return value.convert(); // Floating type. case TypeFloat: case TypeDouble: return value.convert(); - case TypeTimestamp: - // FIXME: may be string - return value.convert(); case TypeDate: case TypeDatetime: + case TypeTimestamp: + return DB::parseMyDatetime(value.convert()); case TypeVarchar: case TypeTinyBlob: case TypeMediumBlob: @@ -45,16 +45,47 @@ Field ColumnInfo::defaultValueToField() const case TypeString: return value.convert(); case TypeEnum: - // FIXME: may be int or string - return value.convert(); + return getEnumIndex(value.convert()); case TypeNull: return Field(); + case TypeDecimal: + case TypeNewDecimal: + return getDecimalDefaultValue(value.convert()); + case TypeTime: + case TypeYear: + case TypeSet: + // TODO support it ! + return Field(); default: throw Exception("Have not proccessed type: " + std::to_string(tp)); } return Field(); } +DB::Decimal ColumnInfo::getDecimalDefaultValue(const String & str) const +{ + DB::ReadBufferFromString buffer(str); + DB::Decimal result; + result.precision = flen; + result.scale = decimal; + DB::readDecimalText(result, buffer); + return result; +} + +// FIXME it still has bug: https://github.com/pingcap/tidb/issues/11435 +Int64 ColumnInfo::getEnumIndex(const String & default_str) const +{ + for (const auto & elem : elems) + { + if (elem.first == default_str) + { + return elem.second; + } + } + int num = std::stoi(default_str); + return num; +} + Poco::JSON::Object::Ptr ColumnInfo::getJSONObject() const try { Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); @@ -334,7 +365,8 @@ void TableInfo::deserialize(const String & json_str) try belonging_table_id = obj->getValue("belonging_table_id"); partition.deserialize(partition_obj); } - if (obj->has("schema_version")) { + if (obj->has("schema_version")) + { schema_version = obj->getValue("schema_version"); } } diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index dfd38f294cb..456b176d85c 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -55,7 +55,7 @@ using DB::Timestamp; M(Bit, 16, CompactBytes, UInt64, false) \ M(JSON, 0xf5, Json, String, false) \ M(NewDecimal, 0xf6, Decimal, Decimal, false) \ - M(Enum, 0xf7, CompactBytes, Enum16, false) \ + M(Enum, 0xf7, VarUInt, Enum16, false) \ M(Set, 0xf8, CompactBytes, String, false) \ M(TinyBlob, 0xf9, CompactBytes, String, false) \ M(MediumBlob, 0xfa, CompactBytes, String, false) \ @@ -169,8 +169,12 @@ struct ColumnInfo COLUMN_FLAGS(M) #undef M - CodecFlag getCodecFlag() const; DB::Field defaultValueToField() const; + CodecFlag getCodecFlag() const; + +private: + DB::Decimal getDecimalDefaultValue(const String & str) const; + Int64 getEnumIndex(const String &) const; }; enum PartitionType