Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix issue 4274 #4627

Merged
merged 3 commits into from
Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/kvstore/Part.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "common/utils/Utils.h"
#include "kvstore/LogEncoder.h"
#include "kvstore/RocksEngineConfig.h"
#include "kvstore/stats/KVStats.h"

DEFINE_int32(cluster_id, 0, "A unique id for each cluster");

Expand Down Expand Up @@ -214,7 +215,9 @@ std::tuple<nebula::cpp2::ErrorCode, LogID, TermID> Part::commitLogs(
std::unique_ptr<LogIterator> iter, bool wait, bool needLock) {
// We should apply any membership change which happens before start time. Because when we start
// up, the peers comes from meta, has already contains all previous changes.
SCOPED_TIMER(&execTime_);
SCOPED_TIMER([](uint64_t elapsedTime) {
stats::StatsManager::addValue(kCommitLogLatencyUs, elapsedTime);
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good

auto batch = engine_->startBatchWrite();
LogID lastId = kNoCommitLogId;
TermID lastTerm = kNoCommitLogTerm;
Expand Down Expand Up @@ -364,7 +367,9 @@ std::tuple<nebula::cpp2::ErrorCode, int64_t, int64_t> Part::commitSnapshot(
LogID committedLogId,
TermID committedLogTerm,
bool finished) {
SCOPED_TIMER(&execTime_);
SCOPED_TIMER([](uint64_t elapsedTime) {
stats::StatsManager::addValue(kCommitSnapshotLatencyUs, elapsedTime);
});
auto batch = engine_->startBatchWrite();
int64_t count = 0;
int64_t size = 0;
Expand Down
12 changes: 5 additions & 7 deletions src/kvstore/raftex/RaftPart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,8 @@ void RaftPart::appendLogsInternal(AppendLogsIterator iter, TermID termId) {
committed = committedLogId_;
// Step 1: Write WAL
{
SCOPED_TIMER(&execTime_);
SCOPED_TIMER(
[](uint64_t execTime) { stats::StatsManager::addValue(kAppendWalLatencyUs, execTime); });
if (!wal_->appendLogs(iter)) {
VLOG_EVERY_N(2, 1000) << idStr_ << "Failed to write into WAL";
res = nebula::cpp2::ErrorCode::E_RAFT_WAL_FAIL;
Expand All @@ -885,7 +886,6 @@ void RaftPart::appendLogsInternal(AppendLogsIterator iter, TermID termId) {
break;
}
}
stats::StatsManager::addValue(kAppendWalLatencyUs, execTime_);
lastId = wal_->lastLogId();
VLOG(4) << idStr_ << "Succeeded writing logs [" << iter.firstLogId() << ", " << lastId
<< "] to WAL";
Expand Down Expand Up @@ -1063,7 +1063,6 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps,
*/
auto [code, lastCommitId, lastCommitTerm] = commitLogs(std::move(walIt), true, true);
if (code == nebula::cpp2::ErrorCode::SUCCEEDED) {
stats::StatsManager::addValue(kCommitLogLatencyUs, execTime_);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could add the kCommitLogLatencyUs in the scope from line 1054 to 1082?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two places that will call commitlogs, so if you only add it to the place you said, then the commit latency will record less data. If you add it to both places, then the meaning of this metric will be unclear

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about add it in commitLogs itself, that could be unified.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this PR, it is modified in this way

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👌 my bad...

std::lock_guard<std::mutex> g(raftLock_);
CHECK_EQ(lastLogId, lastCommitId);
committedLogId_ = lastCommitId;
Expand Down Expand Up @@ -1744,10 +1743,11 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req,
if (hasLogsToAppend) {
bool result = false;
{
SCOPED_TIMER(&execTime_);
SCOPED_TIMER([](uint64_t execTime) {
stats::StatsManager::addValue(kAppendWalLatencyUs, execTime);
});
result = wal_->appendLogs(logIter);
}
stats::StatsManager::addValue(kAppendWalLatencyUs, execTime_);
if (result) {
CHECK_EQ(lastId, wal_->lastLogId());
lastLogId_ = wal_->lastLogId();
Expand Down Expand Up @@ -1776,7 +1776,6 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req,
// raftLock_ has been acquired, so the third parameter is false as well.
auto [code, lastCommitId, lastCommitTerm] = commitLogs(std::move(walIt), false, false);
if (code == nebula::cpp2::ErrorCode::SUCCEEDED) {
stats::StatsManager::addValue(kCommitLogLatencyUs, execTime_);
VLOG(4) << idStr_ << "Follower succeeded committing log " << committedLogId_ + 1 << " to "
<< lastLogIdCanCommit;
CHECK_EQ(lastLogIdCanCommit, lastCommitId);
Expand Down Expand Up @@ -1993,7 +1992,6 @@ void RaftPart::processSendSnapshotRequest(const cpp2::SendSnapshotRequest& req,
resp.error_code_ref() = nebula::cpp2::ErrorCode::E_RAFT_PERSIST_SNAPSHOT_FAILED;
return;
}
stats::StatsManager::addValue(kCommitSnapshotLatencyUs, execTime_);
lastTotalCount_ += std::get<1>(ret);
lastTotalSize_ += std::get<2>(ret);
if (lastTotalCount_ != req.get_total_count() || lastTotalSize_ != req.get_total_size()) {
Expand Down
12 changes: 0 additions & 12 deletions src/kvstore/raftex/RaftPart.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,15 +396,6 @@ class RaftPart : public std::enable_shared_from_this<RaftPart> {
*/
void reset();

/**
* @brief Execution time of some operation, for statistics
*
* @return uint64_t Time in us
*/
uint64_t execTime() const {
return execTime_;
}

protected:
/**
* @brief Construct a new RaftPart
Expand Down Expand Up @@ -896,9 +887,6 @@ class RaftPart : public std::enable_shared_from_this<RaftPart> {
int64_t startTimeMs_ = 0;

std::atomic<bool> blocking_{false};

// For stats info
uint64_t execTime_{0};
};

} // namespace raftex
Expand Down
2 changes: 1 addition & 1 deletion src/storage/admin/AdminProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class TransLeaderProcessor : public BaseProcessor<cpp2::AdminExecResp> {
onFinished();
return;
} else if (code == nebula::cpp2::ErrorCode::SUCCEEDED) {
stats::StatsManager::addValue(kTransferLeaderLatencyUs, part->execTime());
// stats::StatsManager::addValue(kTransferLeaderLatencyUs, part->execTime());
// To avoid dead lock, we use another ioThreadPool to check the
// leader information.
folly::via(folly::getGlobalIOExecutor().get(), [this, part, spaceId, partId] {
Expand Down