From a6f9423db6c4422d56be9236564db1c3e107644e Mon Sep 17 00:00:00 2001 From: abingcbc Date: Fri, 29 Mar 2024 03:14:52 +0000 Subject: [PATCH 1/6] refactor logic of last matched line --- core/reader/LogFileReader.cpp | 73 +++-- core/reader/LogFileReader.h | 5 +- .../reader/LastMatchedLineUnittest.cpp | 271 ++++-------------- 3 files changed, 97 insertions(+), 252 deletions(-) diff --git a/core/reader/LogFileReader.cpp b/core/reader/LogFileReader.cpp index 22acd7270f..b4090735c6 100644 --- a/core/reader/LogFileReader.cpp +++ b/core/reader/LogFileReader.cpp @@ -18,17 +18,16 @@ #include #include #endif +#include #include #include +#include +#include #include #include #include -#include -#include -#include - #include "GloablFileDescriptorManager.h" #include "app_config/AppConfig.h" #include "checkpoint/CheckPointManager.h" @@ -2004,52 +2003,48 @@ int32_t LogFileReader::LastMatchedLine(char* buffer, int32_t size, int32_t& roll return 0; } // Multiline rollback - int begPs = size - 2; std::string exception; - while (begPs >= 0) { - if (buffer[begPs] == '\n' || begPs == 0) { - int lineBegin = begPs == 0 ? 0 : begPs + 1; - if (mMultilineConfig.first->GetContinuePatternReg() - && BoostRegexMatch(buffer + lineBegin, - endPs - lineBegin, - *mMultilineConfig.first->GetContinuePatternReg(), - exception)) { - ++rollbackLineFeedCount; - endPs = begPs; - } else if (mMultilineConfig.first->GetEndPatternReg() - && BoostRegexMatch(buffer + lineBegin, - endPs - lineBegin, - *mMultilineConfig.first->GetEndPatternReg(), - exception)) { + while (endPs >= 0) { + size_t begPs = GetNextLine(buffer, endPs); + if (mMultilineConfig.first->GetEndPatternReg()) { + // start + end, continue + end, end + if (BoostRegexMatch( + buffer + begPs, endPs - begPs, *mMultilineConfig.first->GetEndPatternReg(), exception)) { // Ensure the end line is complete if (buffer[endPs] == '\n') { return endPs + 1; } else { ++rollbackLineFeedCount; - endPs = begPs; - } - } else if (mMultilineConfig.first->GetStartPatternReg() - && BoostRegexMatch(buffer + lineBegin, - endPs - lineBegin, - *mMultilineConfig.first->GetStartPatternReg(), - exception)) { - ++rollbackLineFeedCount; - // Keep all the buffer if rollback all - return lineBegin; - } else if (mMultilineConfig.first->GetContinuePatternReg()) { - // We can confirm the logs before are complete if continue is configured but no regex pattern can match. - if (buffer[endPs] == '\n') { - return endPs + 1; - } else { - // Keep all the buffer if rollback all - return lineBegin; + endPs = begPs - 1; } } else { ++rollbackLineFeedCount; - endPs = begPs; + endPs = begPs - 1; } + } else if (mMultilineConfig.first->GetStartPatternReg() + && BoostRegexMatch( + buffer + begPs, endPs - begPs, *mMultilineConfig.first->GetStartPatternReg(), exception)) { + // start + continue, start + ++rollbackLineFeedCount; + // Keep all the buffer if rollback all + return begPs; + } else { + ++rollbackLineFeedCount; + endPs = begPs - 1; + } + } + return 0; +} + +size_t LogFileReader::GetNextLine(const char* buffer, size_t end) { + if (end <= 0) { + return 0; + } + + for (size_t begin = end; begin > 0; --begin) { + if (buffer[begin - 1] == '\n') { + return begin; } - begPs--; } return 0; } diff --git a/core/reader/LogFileReader.h b/core/reader/LogFileReader.h index e242737522..ed17cda76c 100644 --- a/core/reader/LogFileReader.h +++ b/core/reader/LogFileReader.h @@ -504,6 +504,8 @@ class LogFileReader { // @param fromCpt: if the read size is recoveried from checkpoint, set it to true. size_t getNextReadSize(int64_t fileEnd, bool& fromCpt); + size_t GetNextLine(const char* buffer, size_t begin); + // Update current checkpoint's read offset and length after success read. void setExactlyOnceCheckpointAfterRead(size_t readSize); @@ -589,8 +591,7 @@ class LogFileReader { friend class LogSplitUnittest; friend class LogSplitDiscardUnmatchUnittest; friend class LogSplitNoDiscardUnmatchUnittest; - friend class LastMatchedLineDiscardUnmatchUnittest; - friend class LastMatchedLineNoDiscardUnmatchUnittest; + friend class LastMatchedLineMultilineUnittest; friend class LogFileReaderCheckpointUnittest; protected: diff --git a/core/unittest/reader/LastMatchedLineUnittest.cpp b/core/unittest/reader/LastMatchedLineUnittest.cpp index 11cbc1d6ee..d3f7113bf9 100644 --- a/core/unittest/reader/LastMatchedLineUnittest.cpp +++ b/core/unittest/reader/LastMatchedLineUnittest.cpp @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "unittest/Unittest.h" +#include "common/FileSystemUtil.h" #include "reader/LogFileReader.h" #include "reader/SourceBuffer.h" -#include "common/FileSystemUtil.h" +#include "unittest/Unittest.h" namespace logtail { @@ -171,7 +171,7 @@ void LastMatchedLineUnittest::TestMultiline() { } } -class LastMatchedLineDiscardUnmatchUnittest : public ::testing::Test { +class LastMatchedLineMultilineUnittest : public ::testing::Test { public: void TestLastMatchedLineWithBeginContinue(); void TestLastMatchedLineWithBeginEnd(); @@ -184,13 +184,13 @@ class LastMatchedLineDiscardUnmatchUnittest : public ::testing::Test { PipelineContext ctx; }; -UNIT_TEST_CASE(LastMatchedLineDiscardUnmatchUnittest, TestLastMatchedLineWithBeginContinue); -UNIT_TEST_CASE(LastMatchedLineDiscardUnmatchUnittest, TestLastMatchedLineWithBeginEnd); -UNIT_TEST_CASE(LastMatchedLineDiscardUnmatchUnittest, TestLastMatchedLineWithBegin); -UNIT_TEST_CASE(LastMatchedLineDiscardUnmatchUnittest, TestLastMatchedLineWithContinueEnd); -UNIT_TEST_CASE(LastMatchedLineDiscardUnmatchUnittest, TestLastMatchedLineWithEnd); +UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithBeginContinue); +UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithBeginEnd); +UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithBegin); +UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithContinueEnd); +UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithEnd); -void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithBeginContinue() { +void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginContinue() { Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["ContinuePattern"] = LOG_CONTINUE_REGEX; @@ -220,19 +220,28 @@ void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithBeginContinue APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } { // case: end with unmatch - std::string expectMatch - = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_UNMATCH + "\n"; - std::string testLog = std::string(expectMatch.data()); + std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n"; + std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n"; int32_t rollbackLineFeedCount = 0; int32_t matchSize = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); + } + { // case: only \n + std::string expectMatch = ""; + std::string testLog = expectMatch + "\n\n"; + int32_t rollbackLineFeedCount = 0; + int32_t matchSize + = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); + APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); + APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); } } -void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithBeginEnd() { +void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginEnd() { Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["EndPattern"] = LOG_END_REGEX; @@ -252,8 +261,8 @@ void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithBeginEnd() { APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); } { // case: end with begin - std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_UNMATCH + '\n'; - std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n"; + std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; + std::string testLog = expectMatch + LOG_BEGIN_STRING + LOG_BEGIN_STRING + "\n"; int32_t rollbackLineFeedCount = 0; int32_t matchSize = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); @@ -271,9 +280,19 @@ void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithBeginEnd() { APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } + { // case: only \n + std::string expectMatch = ""; + std::string testLog = expectMatch + "\n\n"; + int32_t rollbackLineFeedCount = 0; + int32_t matchSize + = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); + APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); + APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); + } } -void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithBegin() { +void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBegin() { Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; MultilineOptions multilineOpts; @@ -301,9 +320,19 @@ void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithBegin() { APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); } + { // case: only \n + std::string expectMatch = ""; + std::string testLog = expectMatch + "\n\n"; + int32_t rollbackLineFeedCount = 0; + int32_t matchSize + = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); + APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); + APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); + } } -void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithContinueEnd() { +void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithContinueEnd() { Json::Value config; config["ContinuePattern"] = LOG_CONTINUE_REGEX; config["EndPattern"] = LOG_END_REGEX; @@ -333,38 +362,7 @@ void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithContinueEnd() APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } { // case: end with unmatch - std::string expectMatch - = LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_END_STRING + "\n" + LOG_UNMATCH + "\n"; - std::string testLog = std::string(expectMatch.data()); - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); - } -} - -void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithEnd() { - Json::Value config; - config["EndPattern"] = LOG_END_REGEX; - MultilineOptions multilineOpts; - multilineOpts.Init(config, ctx, ""); - LogFileReader logFileReader( - "dir", "file", DevInode(), std::make_pair(&readerOpts, &ctx), std::make_pair(&multilineOpts, &ctx)); - // logFileReader.mDiscardUnmatch = true; - { // case: end with end - std::string expectMatch = LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; - std::string testLog = std::string(expectMatch.data()); - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); - } - { // case: end with unmatch - std::string expectMatch = LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; + std::string expectMatch = LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_END_STRING + "\n"; std::string testLog = expectMatch + LOG_UNMATCH + "\n"; int32_t rollbackLineFeedCount = 0; int32_t matchSize @@ -373,38 +371,9 @@ void LastMatchedLineDiscardUnmatchUnittest::TestLastMatchedLineWithEnd() { APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } -} - -class LastMatchedLineNoDiscardUnmatchUnittest : public ::testing::Test { -public: - void TestLastMatchedLineWithBeginContinue(); - void TestLastMatchedLineWithBeginEnd(); - void TestLastMatchedLineWithBegin(); - void TestLastMatchedLineWithContinueEnd(); - void TestLastMatchedLineWithEnd(); - -private: - FileReaderOptions readerOpts; - PipelineContext ctx; -}; - -UNIT_TEST_CASE(LastMatchedLineNoDiscardUnmatchUnittest, TestLastMatchedLineWithBeginContinue); -UNIT_TEST_CASE(LastMatchedLineNoDiscardUnmatchUnittest, TestLastMatchedLineWithBeginEnd); -UNIT_TEST_CASE(LastMatchedLineNoDiscardUnmatchUnittest, TestLastMatchedLineWithBegin); -UNIT_TEST_CASE(LastMatchedLineNoDiscardUnmatchUnittest, TestLastMatchedLineWithContinueEnd); -UNIT_TEST_CASE(LastMatchedLineNoDiscardUnmatchUnittest, TestLastMatchedLineWithEnd); - -void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithBeginContinue() { - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["ContinuePattern"] = LOG_CONTINUE_REGEX; - MultilineOptions multilineOpts; - multilineOpts.Init(config, ctx, ""); - LogFileReader logFileReader( - "dir", "file", DevInode(), std::make_pair(&readerOpts, &ctx), std::make_pair(&multilineOpts, &ctx)); - { // case: end with begin continue - std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + '\n'; - std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n"; + { // case: only \n + std::string expectMatch = ""; + std::string testLog = expectMatch + "\n\n"; int32_t rollbackLineFeedCount = 0; int32_t matchSize = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); @@ -412,39 +381,18 @@ void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithBeginContin APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); } - { // case: end with begin - std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + '\n'; - std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n"; - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); - } - { // case: end with unmatch - std::string expectMatch - = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_UNMATCH + "\n"; - std::string testLog = std::string(expectMatch.data()); - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); - } } -void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithBeginEnd() { +void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithEnd() { Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; config["EndPattern"] = LOG_END_REGEX; MultilineOptions multilineOpts; multilineOpts.Init(config, ctx, ""); LogFileReader logFileReader( "dir", "file", DevInode(), std::make_pair(&readerOpts, &ctx), std::make_pair(&multilineOpts, &ctx)); - { // case: end with begin end - std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; + // logFileReader.mDiscardUnmatch = true; + { // case: end with end + std::string expectMatch = LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; std::string testLog = std::string(expectMatch.data()); int32_t rollbackLineFeedCount = 0; int32_t matchSize @@ -453,38 +401,9 @@ void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithBeginEnd() APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); } - { // case: end with begin - std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + '\n'; - std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n"; - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); - } { // case: end with unmatch - std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + "\n"; - std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n"; - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); - } -} - -void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithBegin() { - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - MultilineOptions multilineOpts; - multilineOpts.Init(config, ctx, ""); - LogFileReader logFileReader( - "dir", "file", DevInode(), std::make_pair(&readerOpts, &ctx), std::make_pair(&multilineOpts, &ctx)); - { // case: end with begin - std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_UNMATCH + '\n'; - std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n"; + std::string expectMatch = LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; + std::string testLog = expectMatch + LOG_UNMATCH + "\n"; int32_t rollbackLineFeedCount = 0; int32_t matchSize = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); @@ -492,9 +411,9 @@ void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithBegin() { APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } - { // case: end with unmatch - std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n"; - std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n"; + { // case: only \n + std::string expectMatch = ""; + std::string testLog = expectMatch + "\n\n"; int32_t rollbackLineFeedCount = 0; int32_t matchSize = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); @@ -504,76 +423,6 @@ void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithBegin() { } } -void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithContinueEnd() { - Json::Value config; - config["ContinuePattern"] = LOG_CONTINUE_REGEX; - config["EndPattern"] = LOG_END_REGEX; - MultilineOptions multilineOpts; - multilineOpts.Init(config, ctx, ""); - LogFileReader logFileReader( - "dir", "file", DevInode(), std::make_pair(&readerOpts, &ctx), std::make_pair(&multilineOpts, &ctx)); - { // case: end with continue end - std::string expectMatch = LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_END_STRING + '\n'; - std::string testLog = std::string(expectMatch.data()); - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); - } - { // case: end with continue - std::string expectMatch = LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_END_STRING + '\n'; - std::string testLog = expectMatch + LOG_CONTINUE_STRING + "\n"; - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); - } - { // case: end with unmatch - std::string expectMatch - = LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_END_STRING + "\n" + LOG_UNMATCH + "\n"; - std::string testLog = std::string(expectMatch.data()); - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); - } -} - -void LastMatchedLineNoDiscardUnmatchUnittest::TestLastMatchedLineWithEnd() { - Json::Value config; - config["EndPattern"] = LOG_END_REGEX; - MultilineOptions multilineOpts; - multilineOpts.Init(config, ctx, ""); - LogFileReader logFileReader( - "dir", "file", DevInode(), std::make_pair(&readerOpts, &ctx), std::make_pair(&multilineOpts, &ctx)); - { // case: end with end - std::string expectMatch = LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + "\n"; - std::string testLog = std::string(expectMatch.data()); - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); - } - { // case: end with unmatch - std::string expectMatch = LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + "\n"; - std::string testLog = expectMatch + LOG_UNMATCH + "\n"; - int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); - APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); - } -} - } // namespace logtail UNIT_TEST_MAIN From 39d1cabf36824ebf90990cfed3777187e5d43982 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Mon, 1 Apr 2024 02:49:25 +0000 Subject: [PATCH 2/6] fix --- core/reader/JsonLogFileReader.cpp | 6 +- core/reader/JsonLogFileReader.h | 8 +- core/reader/LogFileReader.cpp | 102 ++++----- core/reader/LogFileReader.h | 6 +- core/unittest/reader/CMakeLists.txt | 2 +- .../reader/JsonLogFileReaderUnittest.cpp | 108 +++++----- ...pp => RemoveLastIncompleteLogUnittest.cpp} | 200 +++++++++--------- 7 files changed, 220 insertions(+), 212 deletions(-) rename core/unittest/reader/{LastMatchedLineUnittest.cpp => RemoveLastIncompleteLogUnittest.cpp} (69%) diff --git a/core/reader/JsonLogFileReader.cpp b/core/reader/JsonLogFileReader.cpp index 3d1aeb6cbb..0796b1abca 100644 --- a/core/reader/JsonLogFileReader.cpp +++ b/core/reader/JsonLogFileReader.cpp @@ -19,8 +19,10 @@ using namespace std; namespace logtail { -int32_t -JsonLogFileReader::LastMatchedLine(char* buffer, int32_t size, int32_t& rollbackLineFeedCount, bool allowRollback) { +int32_t JsonLogFileReader::RemoveLastIncompleteLog(char* buffer, + int32_t size, + int32_t& rollbackLineFeedCount, + bool allowRollback) { int32_t readBytes = 0; int32_t endIdx = 0; int32_t beginIdx = 0; diff --git a/core/reader/JsonLogFileReader.h b/core/reader/JsonLogFileReader.h index 6df04229cc..c8bbc8a3e9 100644 --- a/core/reader/JsonLogFileReader.h +++ b/core/reader/JsonLogFileReader.h @@ -30,8 +30,10 @@ class JsonLogFileReader : public LogFileReader { : LogFileReader(hostLogPathDir, hostLogPathFile, devInode, readerConfig, multilineConfig) {} protected: - int32_t - LastMatchedLine(char* buffer, int32_t size, int32_t& rollbackLineFeedCount, bool allowRollback = true) override; + int32_t RemoveLastIncompleteLog(char* buffer, + int32_t size, + int32_t& rollbackLineFeedCount, + bool allowRollback = true) override; private: bool FindJsonMatch( @@ -40,7 +42,7 @@ class JsonLogFileReader : public LogFileReader { #ifdef APSARA_UNIT_TEST_MAIN friend class JsonLogFileReaderUnittest; friend class JsonParseLogLineUnittest; - friend class LastMatchedLineUnittest; + friend class RemoveLastIncompleteLogUnittest; #endif }; diff --git a/core/reader/LogFileReader.cpp b/core/reader/LogFileReader.cpp index b4090735c6..0cd75750de 100644 --- a/core/reader/LogFileReader.cpp +++ b/core/reader/LogFileReader.cpp @@ -1678,7 +1678,7 @@ void LogFileReader::ReadUTF8(LogBuffer& logBuffer, int64_t end, bool& moreData, } if (allowRollback || mReaderConfig.second->RequiringJsonReader()) { int32_t rollbackLineFeedCount; - nbytes = LastMatchedLine(stringBuffer, alignedBytes, rollbackLineFeedCount, allowRollback); + nbytes = RemoveLastIncompleteLog(stringBuffer, alignedBytes, rollbackLineFeedCount, allowRollback); } if (nbytes == 0) { @@ -1686,7 +1686,7 @@ void LogFileReader::ReadUTF8(LogBuffer& logBuffer, int64_t end, bool& moreData, nbytes = alignedBytes ? alignedBytes : BUFFER_SIZE; if (mReaderConfig.second->RequiringJsonReader()) { int32_t rollbackLineFeedCount; - nbytes = LastMatchedLine(stringBuffer, nbytes, rollbackLineFeedCount, false); + nbytes = RemoveLastIncompleteLog(stringBuffer, nbytes, rollbackLineFeedCount, false); } LOG_WARNING( sLogger, @@ -1812,7 +1812,7 @@ void LogFileReader::ReadGBK(LogBuffer& logBuffer, int64_t end, bool& moreData, b int32_t rollbackLineFeedCount = 0; int32_t bakResultCharCount = resultCharCount; if (allowRollback || mReaderConfig.second->RequiringJsonReader()) { - resultCharCount = LastMatchedLine(stringBuffer, resultCharCount, rollbackLineFeedCount, allowRollback); + resultCharCount = RemoveLastIncompleteLog(stringBuffer, resultCharCount, rollbackLineFeedCount, allowRollback); } if (resultCharCount == 0) { if (moreData) { @@ -1820,7 +1820,7 @@ void LogFileReader::ReadGBK(LogBuffer& logBuffer, int64_t end, bool& moreData, b rollbackLineFeedCount = 0; if (mReaderConfig.second->RequiringJsonReader()) { int32_t rollbackLineFeedCount; - LastMatchedLine(stringBuffer, resultCharCount, rollbackLineFeedCount, false); + RemoveLastIncompleteLog(stringBuffer, resultCharCount, rollbackLineFeedCount, false); } // Cannot get the split position here, so just mark a flag and send alarm later logTooLongSplitFlag = true; @@ -1983,70 +1983,70 @@ LogFileReader::FileCompareResult LogFileReader::CompareToFile(const string& file 1. xxx\nend\n -> xxx\nend 1. xxx\nend\nxxx\n -> xxx\nend */ -int32_t LogFileReader::LastMatchedLine(char* buffer, int32_t size, int32_t& rollbackLineFeedCount, bool allowRollback) { +/* + return: the number of bytes left +*/ +int32_t +LogFileReader::RemoveLastIncompleteLog(char* buffer, int32_t size, int32_t& rollbackLineFeedCount, bool allowRollback) { if (!allowRollback) { return size; } - int endPs = size - 1; // buffer[size] = 0 , buffer[size-1] = '\n' + int endPs = size - 1; rollbackLineFeedCount = 0; - // Single line rollback - if (!mMultilineConfig.first->IsMultiline()) { - while (endPs >= 0) { - if (buffer[endPs] == '\n') { - if (endPs != size - 1) { // if last line dose not end with '\n', rollback - ++rollbackLineFeedCount; - } - return endPs + 1; - } - endPs--; - } - return 0; - } // Multiline rollback - std::string exception; - while (endPs >= 0) { - size_t begPs = GetNextLine(buffer, endPs); - if (mMultilineConfig.first->GetEndPatternReg()) { - // start + end, continue + end, end - if (BoostRegexMatch( - buffer + begPs, endPs - begPs, *mMultilineConfig.first->GetEndPatternReg(), exception)) { - // Ensure the end line is complete - if (buffer[endPs] == '\n') { - return endPs + 1; - } else { - ++rollbackLineFeedCount; - endPs = begPs - 1; + if (mMultilineConfig.first->IsMultiline()) { + std::string exception; + while (endPs >= 0) { + StringView content = GetNextLine(StringView(buffer, size), endPs); + if (mMultilineConfig.first->GetEndPatternReg()) { + // start + end, continue + end, end + if (BoostRegexMatch( + content.data(), content.size(), *mMultilineConfig.first->GetEndPatternReg(), exception)) { + // Ensure the end line is complete + if (buffer[endPs] == '\n') { + return endPs + 1; + } } - } else { + } else if (mMultilineConfig.first->GetStartPatternReg() + && BoostRegexMatch( + content.data(), content.size(), *mMultilineConfig.first->GetStartPatternReg(), exception)) { + // start + continue, start ++rollbackLineFeedCount; - endPs = begPs - 1; + // Keep all the buffer if rollback all + return content.data() - buffer; } - } else if (mMultilineConfig.first->GetStartPatternReg() - && BoostRegexMatch( - buffer + begPs, endPs - begPs, *mMultilineConfig.first->GetStartPatternReg(), exception)) { - // start + continue, start ++rollbackLineFeedCount; - // Keep all the buffer if rollback all - return begPs; - } else { - ++rollbackLineFeedCount; - endPs = begPs - 1; + endPs = content.data() - buffer - 1; } } - return 0; + // Single line rollback or all unmatch rollback + rollbackLineFeedCount = 0; + StringView content = GetNextLine(StringView(buffer, size), size); + size_t rollbackSize = content.data() - buffer; + if (rollbackSize < size) { + ++rollbackLineFeedCount; + } + return rollbackSize; } -size_t LogFileReader::GetNextLine(const char* buffer, size_t end) { - if (end <= 0) { - return 0; +/* + params: + buffer: all read logs + end: the end position of current line + return: + next line (backward), with \n +*/ +StringView LogFileReader::GetNextLine(StringView buffer, size_t end) { + if (end == 0) { + return buffer; } for (size_t begin = end; begin > 0; --begin) { if (buffer[begin - 1] == '\n') { - return begin; + return StringView(buffer.data() + begin, end - begin); } } - return 0; + return StringView(buffer.data(), end); } size_t LogFileReader::AlignLastCharacter(char* buffer, size_t size) { @@ -2060,8 +2060,8 @@ size_t LogFileReader::AlignLastCharacter(char* buffer, size_t size) { // 1. The number of byte for one character can be 1, 2, 4. // 2. 1 byte character: the top bit is 0. // 3. 2 bytes character: the 1st byte is between 0x81 and 0xFE; the 2nd byte is between 0x40 and 0xFE. - // 4. 4 bytes character: the 1st and 3rd byte is between 0x81 and 0xFE; the 2nd and 4th byte are between 0x30 - // and 0x39. (not supported to align) + // 4. 4 bytes character: the 1st and 3rd byte is between 0x81 and 0xFE; the 2nd and 4th byte are between + // 0x30 and 0x39. (not supported to align) // 1 byte character, 2nd byte of 2 bytes, 2nd or 4th byte of 4 bytes if ((buffer[endPs] & 0x80) == 0 || size == 1) { diff --git a/core/reader/LogFileReader.h b/core/reader/LogFileReader.h index ed17cda76c..fba2fd65f3 100644 --- a/core/reader/LogFileReader.h +++ b/core/reader/LogFileReader.h @@ -130,7 +130,7 @@ class LogFileReader { FileCompareResult CompareToFile(const std::string& filePath); virtual int32_t - LastMatchedLine(char* buffer, int32_t size, int32_t& rollbackLineFeedCount, bool allowRollback = true); + RemoveLastIncompleteLog(char* buffer, int32_t size, int32_t& rollbackLineFeedCount, bool allowRollback = true); size_t AlignLastCharacter(char* buffer, size_t size); @@ -504,7 +504,7 @@ class LogFileReader { // @param fromCpt: if the read size is recoveried from checkpoint, set it to true. size_t getNextReadSize(int64_t fileEnd, bool& fromCpt); - size_t GetNextLine(const char* buffer, size_t begin); + StringView GetNextLine(StringView buffer, size_t begin); // Update current checkpoint's read offset and length after success read. void setExactlyOnceCheckpointAfterRead(size_t readSize); @@ -591,7 +591,7 @@ class LogFileReader { friend class LogSplitUnittest; friend class LogSplitDiscardUnmatchUnittest; friend class LogSplitNoDiscardUnmatchUnittest; - friend class LastMatchedLineMultilineUnittest; + friend class RemoveLastIncompleteLogMultilineUnittest; friend class LogFileReaderCheckpointUnittest; protected: diff --git a/core/unittest/reader/CMakeLists.txt b/core/unittest/reader/CMakeLists.txt index c57cfa4c5c..22370e9c3a 100644 --- a/core/unittest/reader/CMakeLists.txt +++ b/core/unittest/reader/CMakeLists.txt @@ -24,7 +24,7 @@ target_link_libraries(file_reader_options_unittest unittest_base) add_executable(json_log_file_reader_unittest JsonLogFileReaderUnittest.cpp) target_link_libraries(json_log_file_reader_unittest unittest_base) -add_executable(last_matched_line_unittest LastMatchedLineUnittest.cpp) +add_executable(last_matched_line_unittest RemoveLastIncompleteLogUnittest.cpp) target_link_libraries(last_matched_line_unittest unittest_base) add_executable(log_file_reader_unittest LogFileReaderUnittest.cpp) diff --git a/core/unittest/reader/JsonLogFileReaderUnittest.cpp b/core/unittest/reader/JsonLogFileReaderUnittest.cpp index b673f593a9..2e1e7e0d65 100644 --- a/core/unittest/reader/JsonLogFileReaderUnittest.cpp +++ b/core/unittest/reader/JsonLogFileReaderUnittest.cpp @@ -12,13 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "unittest/Unittest.h" #include + #include -#include "reader/JsonLogFileReader.h" -#include "common/RuntimeUtil.h" + #include "common/FileSystemUtil.h" +#include "common/RuntimeUtil.h" #include "file_server/FileServer.h" +#include "reader/JsonLogFileReader.h" +#include "unittest/Unittest.h" DECLARE_FLAG_INT32(force_release_deleted_file_fd_timeout); @@ -227,21 +229,21 @@ void JsonLogFileReaderUnittest::TestReadUTF8() { } } -class LastMatchedLineUnittest : public ::testing::Test { +class RemoveLastIncompleteLogUnittest : public ::testing::Test { public: void SetUp() override { mLogFileReader.reset(new JsonLogFileReader( "dir", "file", DevInode(), std::make_pair(&readerOpts, &ctx), std::make_pair(&multilineOpts, &ctx))); } - void TestLastMatchedLineSingleLine(); - void TestLastMatchedLineSingleLineIncomplete(); - void TestLastMatchedLineSingleLineIncompleteNoRollback(); - void TestLastMatchedLineMultiline(); - void TestLastMatchedLineMultilineIncomplete(); - void TestLastMatchedLineMultilineIncompleteNoRollback(); - void TestLastMatchedLineNotValidJson(); - void TestLastMatchedLineNotValidJsonNoRollback(); + void TestRemoveLastIncompleteLogSingleLine(); + void TestRemoveLastIncompleteLogSingleLineIncomplete(); + void TestRemoveLastIncompleteLogSingleLineIncompleteNoRollback(); + void TestRemoveLastIncompleteLogMultiline(); + void TestRemoveLastIncompleteLogMultilineIncomplete(); + void TestRemoveLastIncompleteLogMultilineIncompleteNoRollback(); + void TestRemoveLastIncompleteLogNotValidJson(); + void TestRemoveLastIncompleteLogNotValidJsonNoRollback(); std::unique_ptr mLogFileReader; MultilineOptions multilineOpts; @@ -249,16 +251,16 @@ class LastMatchedLineUnittest : public ::testing::Test { PipelineContext ctx; }; -UNIT_TEST_CASE(LastMatchedLineUnittest, TestLastMatchedLineSingleLine) -UNIT_TEST_CASE(LastMatchedLineUnittest, TestLastMatchedLineSingleLineIncomplete) -UNIT_TEST_CASE(LastMatchedLineUnittest, TestLastMatchedLineSingleLineIncompleteNoRollback) -UNIT_TEST_CASE(LastMatchedLineUnittest, TestLastMatchedLineMultiline) -UNIT_TEST_CASE(LastMatchedLineUnittest, TestLastMatchedLineMultilineIncomplete) -UNIT_TEST_CASE(LastMatchedLineUnittest, TestLastMatchedLineMultilineIncompleteNoRollback) -UNIT_TEST_CASE(LastMatchedLineUnittest, TestLastMatchedLineNotValidJson) -UNIT_TEST_CASE(LastMatchedLineUnittest, TestLastMatchedLineNotValidJsonNoRollback) +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestRemoveLastIncompleteLogSingleLine) +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestRemoveLastIncompleteLogSingleLineIncomplete) +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestRemoveLastIncompleteLogSingleLineIncompleteNoRollback) +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestRemoveLastIncompleteLogMultiline) +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestRemoveLastIncompleteLogMultilineIncomplete) +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestRemoveLastIncompleteLogMultilineIncompleteNoRollback) +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestRemoveLastIncompleteLogNotValidJson) +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestRemoveLastIncompleteLogNotValidJsonNoRollback) -void LastMatchedLineUnittest::TestLastMatchedLineSingleLine() { +void RemoveLastIncompleteLogUnittest::TestRemoveLastIncompleteLogSingleLine() { { // case single line std::string line1 = R"({"key": "first value"})"; std::string line2 = R"({"key": "second value"})"; @@ -266,15 +268,15 @@ void LastMatchedLineUnittest::TestLastMatchedLineSingleLine() { std::string expectMatch = line1 + '\0' + line2 + '\0' + line3 + '\0'; std::string testLog = line1 + '\n' + line2 + '\n' + line3 + '\n'; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); } } -void LastMatchedLineUnittest::TestLastMatchedLineSingleLineIncomplete() { +void RemoveLastIncompleteLogUnittest::TestRemoveLastIncompleteLogSingleLineIncomplete() { { // case single line, buffer size not big enough, json truncated std::string line1 = R"({"key": "first value"})"; std::string line2 = R"({"key": "second value"})"; @@ -282,8 +284,8 @@ void LastMatchedLineUnittest::TestLastMatchedLineSingleLineIncomplete() { std::string expectMatch = line1 + '\0' + line2 + '\0'; std::string testLog = line1 + '\n' + line2 + '\n' + line3; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); @@ -295,15 +297,15 @@ void LastMatchedLineUnittest::TestLastMatchedLineSingleLineIncomplete() { std::string expectMatch = line1 + '\0' + line2 + '\0'; std::string testLog = line1 + '\n' + line2 + '\n' + line3; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } } -void LastMatchedLineUnittest::TestLastMatchedLineSingleLineIncompleteNoRollback() { +void RemoveLastIncompleteLogUnittest::TestRemoveLastIncompleteLogSingleLineIncompleteNoRollback() { { // case single line, buffer size not big enough, json truncated std::string line1 = R"({"key": "first value"})"; std::string line2 = R"({"key": "second value"})"; @@ -311,7 +313,7 @@ void LastMatchedLineUnittest::TestLastMatchedLineSingleLineIncompleteNoRollback( std::string expectMatch = line1 + '\0' + line2 + '\0' + line3; std::string testLog = line1 + '\n' + line2 + '\n' + line3; int32_t rollbackLineFeedCount = 0; - size_t matchSize = mLogFileReader->LastMatchedLine( + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount, false); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); @@ -324,7 +326,7 @@ void LastMatchedLineUnittest::TestLastMatchedLineSingleLineIncompleteNoRollback( std::string expectMatch = line1 + '\0' + line2 + '\0' + line3; std::string testLog = line1 + '\n' + line2 + '\n' + line3; int32_t rollbackLineFeedCount = 0; - size_t matchSize = mLogFileReader->LastMatchedLine( + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount, false); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); @@ -332,7 +334,7 @@ void LastMatchedLineUnittest::TestLastMatchedLineSingleLineIncompleteNoRollback( } } -void LastMatchedLineUnittest::TestLastMatchedLineMultiline() { +void RemoveLastIncompleteLogUnittest::TestRemoveLastIncompleteLogMultiline() { { // case multi line std::vector index; std::string firstLog = R"({ @@ -346,15 +348,15 @@ void LastMatchedLineUnittest::TestLastMatchedLineMultiline() { std::string expectMatch = firstLog + '\0' + secondLog + '\0'; std::string testLog = firstLog + '\n' + secondLog + '\n'; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); } } -void LastMatchedLineUnittest::TestLastMatchedLineMultilineIncomplete() { +void RemoveLastIncompleteLogUnittest::TestRemoveLastIncompleteLogMultilineIncomplete() { { // case multi line, buffer size not enough, json truncated std::vector index; std::string firstLog = R"({ @@ -367,8 +369,8 @@ void LastMatchedLineUnittest::TestLastMatchedLineMultilineIncomplete() { std::string expectMatch = firstLog + '\0'; std::string testLog = firstLog + '\n' + secondLog + '\n'; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(4, rollbackLineFeedCount); @@ -386,15 +388,15 @@ void LastMatchedLineUnittest::TestLastMatchedLineMultilineIncomplete() { std::string expectMatch = firstLog + '\0'; std::string testLog = firstLog + '\n' + secondLog; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(5, rollbackLineFeedCount); } } -void LastMatchedLineUnittest::TestLastMatchedLineMultilineIncompleteNoRollback() { +void RemoveLastIncompleteLogUnittest::TestRemoveLastIncompleteLogMultilineIncompleteNoRollback() { { // case multi line, buffer size not enough, json truncated std::vector index; std::string firstLog = R"({ @@ -409,7 +411,7 @@ void LastMatchedLineUnittest::TestLastMatchedLineMultilineIncompleteNoRollback() std::string expectMatch = firstLog + '\0' + splittedSecondLog + '\0'; std::string testLog = firstLog + '\n' + secondLog + '\n'; int32_t rollbackLineFeedCount = 0; - size_t matchSize = mLogFileReader->LastMatchedLine( + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount, false); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); @@ -428,7 +430,7 @@ void LastMatchedLineUnittest::TestLastMatchedLineMultilineIncompleteNoRollback() std::string expectMatch = firstLog + '\0' + secondLog; std::string testLog = firstLog + '\n' + secondLog; int32_t rollbackLineFeedCount = 0; - size_t matchSize = mLogFileReader->LastMatchedLine( + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount, false); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); @@ -436,12 +438,12 @@ void LastMatchedLineUnittest::TestLastMatchedLineMultilineIncompleteNoRollback() } } -void LastMatchedLineUnittest::TestLastMatchedLineNotValidJson() { +void RemoveLastIncompleteLogUnittest::TestRemoveLastIncompleteLogNotValidJson() { { // case not json, skip all std::string testLog = "not a json at all.\nnot a json at all.\nnot a json at all.\n"; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(testLog.size(), matchSize); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); } @@ -451,8 +453,8 @@ void LastMatchedLineUnittest::TestLastMatchedLineNotValidJson() { std::replace(expectMatch.begin(), expectMatch.end(), '\n', '\0'); std::string testLog = "not a json at all.\nnot a json at all.\n{partial json\n"; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); @@ -469,19 +471,19 @@ void LastMatchedLineUnittest::TestLastMatchedLineNotValidJson() { std::string expectMatch = firstLog + '\0' + notjson; ; int32_t rollbackLineFeedCount = 0; - size_t matchSize - = mLogFileReader->LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); } } -void LastMatchedLineUnittest::TestLastMatchedLineNotValidJsonNoRollback() { +void RemoveLastIncompleteLogUnittest::TestRemoveLastIncompleteLogNotValidJsonNoRollback() { { // case not json std::string testLog = "not a json at all.\nnot a json at all.\nnot a json at all.\n"; int32_t rollbackLineFeedCount = 0; - size_t matchSize = mLogFileReader->LastMatchedLine( + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount, false); APSARA_TEST_EQUAL_FATAL(testLog.size(), matchSize); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); @@ -491,7 +493,7 @@ void LastMatchedLineUnittest::TestLastMatchedLineNotValidJsonNoRollback() { std::string expectMatch = testLog; std::replace(expectMatch.begin(), expectMatch.end(), '\n', '\0'); int32_t rollbackLineFeedCount = 0; - size_t matchSize = mLogFileReader->LastMatchedLine( + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount, false); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); @@ -509,7 +511,7 @@ void LastMatchedLineUnittest::TestLastMatchedLineNotValidJsonNoRollback() { std::string expectMatch = firstLog + '\0' + notjson; ; int32_t rollbackLineFeedCount = 0; - size_t matchSize = mLogFileReader->LastMatchedLine( + size_t matchSize = mLogFileReader->RemoveLastIncompleteLog( const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount, false); APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); diff --git a/core/unittest/reader/LastMatchedLineUnittest.cpp b/core/unittest/reader/RemoveLastIncompleteLogUnittest.cpp similarity index 69% rename from core/unittest/reader/LastMatchedLineUnittest.cpp rename to core/unittest/reader/RemoveLastIncompleteLogUnittest.cpp index d3f7113bf9..1907372e46 100644 --- a/core/unittest/reader/LastMatchedLineUnittest.cpp +++ b/core/unittest/reader/RemoveLastIncompleteLogUnittest.cpp @@ -27,7 +27,7 @@ const std::string LOG_END_STRING = " ...23 more"; const std::string LOG_END_REGEX = R"(\s*\.\.\.\d+ more)"; const std::string LOG_UNMATCH = "unmatch log"; -class LastMatchedLineUnittest : public ::testing::Test { +class RemoveLastIncompleteLogUnittest : public ::testing::Test { public: static void SetUpTestCase() { logPathDir = GetProcessExecutionDir(); @@ -75,14 +75,14 @@ class LastMatchedLineUnittest : public ::testing::Test { static std::string utf8File; }; -UNIT_TEST_CASE(LastMatchedLineUnittest, TestSingleline); -UNIT_TEST_CASE(LastMatchedLineUnittest, TestMultiline); +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestSingleline); +UNIT_TEST_CASE(RemoveLastIncompleteLogUnittest, TestMultiline); -std::string LastMatchedLineUnittest::logPathDir; -std::string LastMatchedLineUnittest::gbkFile; -std::string LastMatchedLineUnittest::utf8File; +std::string RemoveLastIncompleteLogUnittest::logPathDir; +std::string RemoveLastIncompleteLogUnittest::gbkFile; +std::string RemoveLastIncompleteLogUnittest::utf8File; -void LastMatchedLineUnittest::TestSingleline() { +void RemoveLastIncompleteLogUnittest::TestSingleline() { MultilineOptions multilineOpts; LogFileReader logFileReader( logPathDir, utf8File, DevInode(), std::make_pair(&readerOpts, &ctx), std::make_pair(&multilineOpts, &ctx)); @@ -93,8 +93,8 @@ void LastMatchedLineUnittest::TestSingleline() { std::string expectMatch = line1 + '\n' + line2 + '\n' + line3 + '\n'; std::string testLog = expectMatch; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); @@ -106,24 +106,25 @@ void LastMatchedLineUnittest::TestSingleline() { std::string expectMatch = line1 + '\n' + line2 + '\n'; std::string testLog = expectMatch + line3; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } { // case single line, cannot be split, buffer size not big enough (no new line at the end of line) + // it will be force read in ReadUTF8/ReadGBK std::string testLog = "first."; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(0, matchSize); // return the whole buffer, so no rollback - APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } } -void LastMatchedLineUnittest::TestMultiline() { +void RemoveLastIncompleteLogUnittest::TestMultiline() { Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; MultilineOptions multilineOpts; @@ -137,8 +138,8 @@ void LastMatchedLineUnittest::TestMultiline() { std::string secondLog = LOG_BEGIN_STRING + "second.\nmultiline1\nmultiline2"; std::string expectMatch = firstLog + '\n'; std::string testLog = expectMatch + secondLog + '\n'; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(3, rollbackLineFeedCount); @@ -149,48 +150,49 @@ void LastMatchedLineUnittest::TestMultiline() { std::string secondLog = LOG_BEGIN_STRING + "second.\nmultiline1\nmultiline2"; std::string expectMatch = firstLog + '\n'; std::string testLog = expectMatch + secondLog; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(3, rollbackLineFeedCount); } { // case multi line not match std::string testLog2 = "log begin does not match.\nlog begin does not match.\nlog begin does not match.\n"; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog2.data()), testLog2.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(0, matchSize); - APSARA_TEST_EQUAL_FATAL(3, rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog2.data()), testLog2.size(), rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(testLog2.size(), matchSize); + APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); } { // case multi line not match, buffer size not big enough (no new line at the end of line) - std::string testLog2 = "log begin does not match.\nlog begin does not match.\nlog begin does not"; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog2.data()), testLog2.size(), rollbackLineFeedCount); - APSARA_TEST_EQUAL_FATAL(0, matchSize); - APSARA_TEST_EQUAL_FATAL(3, rollbackLineFeedCount); + std::string expectMatch = "log begin does not match.\nlog begin does not match.\n"; + std::string testLog2 = expectMatch + "log begin does not"; + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog2.data()), testLog2.size(), rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(expectMatch.size(), matchSize); + APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } } -class LastMatchedLineMultilineUnittest : public ::testing::Test { +class RemoveLastIncompleteLogMultilineUnittest : public ::testing::Test { public: - void TestLastMatchedLineWithBeginContinue(); - void TestLastMatchedLineWithBeginEnd(); - void TestLastMatchedLineWithBegin(); - void TestLastMatchedLineWithContinueEnd(); - void TestLastMatchedLineWithEnd(); + void TestRemoveLastIncompleteLogWithBeginContinue(); + void TestRemoveLastIncompleteLogWithBeginEnd(); + void TestRemoveLastIncompleteLogWithBegin(); + void TestRemoveLastIncompleteLogWithContinueEnd(); + void TestRemoveLastIncompleteLogWithEnd(); private: FileReaderOptions readerOpts; PipelineContext ctx; }; -UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithBeginContinue); -UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithBeginEnd); -UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithBegin); -UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithContinueEnd); -UNIT_TEST_CASE(LastMatchedLineMultilineUnittest, TestLastMatchedLineWithEnd); +UNIT_TEST_CASE(RemoveLastIncompleteLogMultilineUnittest, TestRemoveLastIncompleteLogWithBeginContinue); +UNIT_TEST_CASE(RemoveLastIncompleteLogMultilineUnittest, TestRemoveLastIncompleteLogWithBeginEnd); +UNIT_TEST_CASE(RemoveLastIncompleteLogMultilineUnittest, TestRemoveLastIncompleteLogWithBegin); +UNIT_TEST_CASE(RemoveLastIncompleteLogMultilineUnittest, TestRemoveLastIncompleteLogWithContinueEnd); +UNIT_TEST_CASE(RemoveLastIncompleteLogMultilineUnittest, TestRemoveLastIncompleteLogWithEnd); -void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginContinue() { +void RemoveLastIncompleteLogMultilineUnittest::TestRemoveLastIncompleteLogWithBeginContinue() { Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["ContinuePattern"] = LOG_CONTINUE_REGEX; @@ -203,8 +205,8 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginContinue() { std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + '\n'; std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); @@ -213,8 +215,8 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginContinue() { std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + '\n'; std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); @@ -223,25 +225,25 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginContinue() { std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n"; std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); } - { // case: only \n - std::string expectMatch = ""; - std::string testLog = expectMatch + "\n\n"; + { // case: all unmatch + std::string expectMatch = "\n\n"; + std::string testLog = expectMatch + LOG_UNMATCH; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } } -void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginEnd() { +void RemoveLastIncompleteLogMultilineUnittest::TestRemoveLastIncompleteLogWithBeginEnd() { Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["EndPattern"] = LOG_END_REGEX; @@ -254,8 +256,8 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginEnd() { std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; std::string testLog = std::string(expectMatch.data()); int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); @@ -264,8 +266,8 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginEnd() { std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; std::string testLog = expectMatch + LOG_BEGIN_STRING + LOG_BEGIN_STRING + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); @@ -274,25 +276,25 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBeginEnd() { std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + "\n"; std::string testLog = expectMatch + LOG_UNMATCH + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } - { // case: only \n - std::string expectMatch = ""; - std::string testLog = expectMatch + "\n\n"; + { // case: all unmatch + std::string expectMatch = "\n\n"; + std::string testLog = expectMatch + LOG_UNMATCH; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } } -void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBegin() { +void RemoveLastIncompleteLogMultilineUnittest::TestRemoveLastIncompleteLogWithBegin() { Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; MultilineOptions multilineOpts; @@ -304,8 +306,8 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBegin() { std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_UNMATCH + '\n'; std::string testLog = expectMatch + LOG_BEGIN_STRING; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); @@ -314,25 +316,25 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithBegin() { std::string expectMatch = LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n"; std::string testLog = expectMatch + LOG_BEGIN_STRING + "\n" + LOG_UNMATCH + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); } - { // case: only \n - std::string expectMatch = ""; - std::string testLog = expectMatch + "\n\n"; + { // case: all unmatch + std::string expectMatch = "\n\n"; + std::string testLog = expectMatch + LOG_UNMATCH; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } } -void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithContinueEnd() { +void RemoveLastIncompleteLogMultilineUnittest::TestRemoveLastIncompleteLogWithContinueEnd() { Json::Value config; config["ContinuePattern"] = LOG_CONTINUE_REGEX; config["EndPattern"] = LOG_END_REGEX; @@ -345,8 +347,8 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithContinueEnd() { std::string expectMatch = LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_END_STRING + '\n'; std::string testLog = std::string(expectMatch.data()); int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); @@ -355,8 +357,8 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithContinueEnd() { std::string expectMatch = LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_END_STRING + '\n'; std::string testLog = expectMatch + LOG_CONTINUE_STRING + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); @@ -365,25 +367,25 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithContinueEnd() { std::string expectMatch = LOG_CONTINUE_STRING + "\n" + LOG_CONTINUE_STRING + "\n" + LOG_END_STRING + "\n"; std::string testLog = expectMatch + LOG_UNMATCH + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } { // case: only \n - std::string expectMatch = ""; - std::string testLog = expectMatch + "\n\n"; + std::string expectMatch = "\n\n"; + std::string testLog = expectMatch + LOG_UNMATCH; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } } -void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithEnd() { +void RemoveLastIncompleteLogMultilineUnittest::TestRemoveLastIncompleteLogWithEnd() { Json::Value config; config["EndPattern"] = LOG_END_REGEX; MultilineOptions multilineOpts; @@ -395,8 +397,8 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithEnd() { std::string expectMatch = LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; std::string testLog = std::string(expectMatch.data()); int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(0, rollbackLineFeedCount); @@ -405,21 +407,21 @@ void LastMatchedLineMultilineUnittest::TestLastMatchedLineWithEnd() { std::string expectMatch = LOG_UNMATCH + "\n" + LOG_UNMATCH + "\n" + LOG_END_STRING + '\n'; std::string testLog = expectMatch + LOG_UNMATCH + "\n"; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } - { // case: only \n - std::string expectMatch = ""; - std::string testLog = expectMatch + "\n\n"; + { // case: all unmatch + std::string expectMatch = "\n\n"; + std::string testLog = expectMatch + LOG_UNMATCH; int32_t rollbackLineFeedCount = 0; - int32_t matchSize - = logFileReader.LastMatchedLine(const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); + int32_t matchSize = logFileReader.RemoveLastIncompleteLog( + const_cast(testLog.data()), testLog.size(), rollbackLineFeedCount); APSARA_TEST_EQUAL_FATAL(static_cast(expectMatch.size()), matchSize); APSARA_TEST_EQUAL_FATAL(std::string(testLog.data(), matchSize), expectMatch); - APSARA_TEST_EQUAL_FATAL(2, rollbackLineFeedCount); + APSARA_TEST_EQUAL_FATAL(1, rollbackLineFeedCount); } } From 28daf55047886dd5f5aabc54494f00b6d2c03c19 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Mon, 1 Apr 2024 03:22:01 +0000 Subject: [PATCH 3/6] fix rename --- core/reader/LogFileReader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/reader/LogFileReader.cpp b/core/reader/LogFileReader.cpp index ba158bfa82..08ec7fc5f3 100644 --- a/core/reader/LogFileReader.cpp +++ b/core/reader/LogFileReader.cpp @@ -1698,7 +1698,7 @@ void LogFileReader::ReadUTF8(LogBuffer& logBuffer, int64_t end, bool& moreData, } if (allowRollback || mReaderConfig.second->RequiringJsonReader()) { int32_t rollbackLineFeedCount; - nbytes = LastMatchedLine(stringBuffer, alignedBytes, rollbackLineFeedCount, allowRollback); + nbytes = RemoveLastIncompleteLog(stringBuffer, alignedBytes, rollbackLineFeedCount, allowRollback); } if (nbytes == 0) { @@ -1706,7 +1706,7 @@ void LogFileReader::ReadUTF8(LogBuffer& logBuffer, int64_t end, bool& moreData, nbytes = alignedBytes ? alignedBytes : BUFFER_SIZE; if (mReaderConfig.second->RequiringJsonReader()) { int32_t rollbackLineFeedCount; - nbytes = LastMatchedLine(stringBuffer, nbytes, rollbackLineFeedCount, false); + nbytes = RemoveLastIncompleteLog(stringBuffer, nbytes, rollbackLineFeedCount, false); } LOG_WARNING(sLogger, ("Log is too long and forced to be split at offset: ", From 549c7ef2b525b7e28222acd414ef9caba3bad89f Mon Sep 17 00:00:00 2001 From: abingcbc Date: Mon, 1 Apr 2024 09:59:38 +0000 Subject: [PATCH 4/6] fix --- core/reader/LogFileReader.cpp | 27 ++++++++++++++++----------- core/reader/LogFileReader.h | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/core/reader/LogFileReader.cpp b/core/reader/LogFileReader.cpp index 08ec7fc5f3..a10d2e3371 100644 --- a/core/reader/LogFileReader.cpp +++ b/core/reader/LogFileReader.cpp @@ -2028,20 +2028,25 @@ LogFileReader::FileCompareResult LogFileReader::CompareToFile(const string& file 1. xxx\nend\nxxx\n -> xxx\nend */ /* - return: the number of bytes left + return: the number of bytes left, including \n */ int32_t LogFileReader::RemoveLastIncompleteLog(char* buffer, int32_t size, int32_t& rollbackLineFeedCount, bool allowRollback) { if (!allowRollback) { return size; } - int endPs = size - 1; + int32_t endPs; // the position of \n or \0 + if (buffer[size - 1] == '\n') { + endPs = size - 1; + } else { + endPs = size; + } rollbackLineFeedCount = 0; // Multiline rollback if (mMultilineConfig.first->IsMultiline()) { std::string exception; while (endPs >= 0) { - StringView content = GetNextLine(StringView(buffer, size), endPs); + StringView content = GetLastLine(StringView(buffer, size), endPs); if (mMultilineConfig.first->GetEndPatternReg()) { // start + end, continue + end, end if (BoostRegexMatch( @@ -2065,22 +2070,22 @@ LogFileReader::RemoveLastIncompleteLog(char* buffer, int32_t size, int32_t& roll } // Single line rollback or all unmatch rollback rollbackLineFeedCount = 0; - StringView content = GetNextLine(StringView(buffer, size), size); - size_t rollbackSize = content.data() - buffer; - if (rollbackSize < size) { - ++rollbackLineFeedCount; + if (buffer[size - 1] == '\n') { + return size; } - return rollbackSize; + StringView content = GetLastLine(StringView(buffer, size), size - 1); + ++rollbackLineFeedCount; + return content.data() - buffer; } /* params: buffer: all read logs - end: the end position of current line + end: the end position of current line, \n or \0 return: - next line (backward), with \n + last line (backward), without \n or \0 */ -StringView LogFileReader::GetNextLine(StringView buffer, size_t end) { +StringView LogFileReader::GetLastLine(StringView buffer, size_t end) { if (end == 0) { return buffer; } diff --git a/core/reader/LogFileReader.h b/core/reader/LogFileReader.h index 53c84fa4b3..25c71d081f 100644 --- a/core/reader/LogFileReader.h +++ b/core/reader/LogFileReader.h @@ -502,7 +502,7 @@ class LogFileReader { // @param fromCpt: if the read size is recoveried from checkpoint, set it to true. size_t getNextReadSize(int64_t fileEnd, bool& fromCpt); - StringView GetNextLine(StringView buffer, size_t begin); + StringView GetLastLine(StringView buffer, size_t begin); // Update current checkpoint's read offset and length after success read. void setExactlyOnceCheckpointAfterRead(size_t readSize); From 036c052a667ca8406f3be1bbdd6fb128972bf916 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Mon, 1 Apr 2024 12:01:03 +0000 Subject: [PATCH 5/6] fix --- core/reader/LogFileReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/reader/LogFileReader.h b/core/reader/LogFileReader.h index 25c71d081f..11586122cb 100644 --- a/core/reader/LogFileReader.h +++ b/core/reader/LogFileReader.h @@ -502,7 +502,7 @@ class LogFileReader { // @param fromCpt: if the read size is recoveried from checkpoint, set it to true. size_t getNextReadSize(int64_t fileEnd, bool& fromCpt); - StringView GetLastLine(StringView buffer, size_t begin); + StringView GetLastLine(StringView buffer, size_t end); // Update current checkpoint's read offset and length after success read. void setExactlyOnceCheckpointAfterRead(size_t readSize); From 65ee4c908fb1047de526c7a0041acf730491240e Mon Sep 17 00:00:00 2001 From: Abingcbc Date: Tue, 2 Apr 2024 08:06:21 +0000 Subject: [PATCH 6/6] fix --- core/processor/ProcessorSplitMultilineLogStringNative.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index dbd45f30f7..12df049b34 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -348,7 +348,7 @@ void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& EventsContainer& newEvents, StringView logPath, int* unmatchLines) { - size_t begin, fisrtLogSize, totalLines = 0; + size_t begin = 0, fisrtLogSize = 0, totalLines = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); ++(*unmatchLines);