Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FLASH-332] Add backoff to getting gc safe point from pd #106

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <common/CltException.h>

namespace pingcap {
namespace kv {
namespace common {

enum Jitter {
NoJitter = 1,
Expand Down
3 changes: 3 additions & 0 deletions contrib/client-c/include/pd/Client.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <condition_variable>
#include <kvproto/pdpb.grpc.pb.h>
#include <common/Log.h>
#include <common/Backoff.h>
#include "IClient.h"

namespace pingcap{
Expand All @@ -27,6 +28,8 @@ class Client : public IClient {

const std::chrono::seconds update_leader_interval;

const size_t get_gc_safe_point_timeout; // ms

public:

Client(const std::vector<std::string> & addrs);
Expand Down
3 changes: 2 additions & 1 deletion contrib/client-c/include/tikv/Region.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <pd/Client.h>
#include <pd/MockPDClient.h>
#include <kvproto/metapb.pb.h>
#include <tikv/Backoff.h>
#include <common/Backoff.h>
#include <common/Log.h>
#include <kvproto/errorpb.pb.h>

Expand Down Expand Up @@ -123,6 +123,7 @@ struct RPCContext {
};

using RPCContextPtr = std::shared_ptr<RPCContext>;
using Backoffer = common::Backoffer;

class RegionCache {
public:
Expand Down
14 changes: 7 additions & 7 deletions contrib/client-c/include/tikv/RegionClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include<tikv/Rpc.h>
#include<tikv/Region.h>
#include<tikv/Backoff.h>
#include<common/Backoff.h>

namespace pingcap {
namespace kv {
Expand All @@ -19,7 +19,7 @@ struct RegionClient {

int64_t getReadIndex() {
auto request = new kvrpcpb::ReadIndexRequest();
Backoffer bo(readIndexMaxBackoff);
Backoffer bo(common::readIndexMaxBackoff);
auto rpc_call = std::make_shared<RpcCall<kvrpcpb::ReadIndexRequest>>(request);
sendReqToRegion(bo, rpc_call, true);
return rpc_call -> getResp() -> read_index();
Expand Down Expand Up @@ -57,10 +57,10 @@ struct RegionClient {
auto not_leader = err.not_leader();
if (not_leader.has_leader()) {
cache -> updateLeader(bo, rpc_ctx->region, not_leader.leader().store_id());
bo.backoff(boUpdateLeader, Exception("not leader"));
bo.backoff(common::boUpdateLeader, Exception("not leader"));
} else {
cache -> dropRegion(rpc_ctx->region);
bo.backoff(boRegionMiss, Exception("not leader"));
bo.backoff(common::boRegionMiss, Exception("not leader"));
}
return;
}
Expand All @@ -76,7 +76,7 @@ struct RegionClient {
}

if (err.has_server_is_busy()) {
bo.backoff(boServerBusy, Exception("server busy"));
bo.backoff(common::boServerBusy, Exception("server busy"));
return;
}

Expand All @@ -93,12 +93,12 @@ struct RegionClient {

void onGetLearnerFail(Backoffer & bo, const Exception & e) {
log -> error("error found, retrying. The error msg is: "+ e.message());
bo.backoff(boTiKVRPC, e);
bo.backoff(common::boTiKVRPC, e);
}

void onSendFail(Backoffer & bo, const Exception & e, RPCContextPtr rpc_ctx) {
cache->dropStoreOnSendReqFail(rpc_ctx, e);
bo.backoff(boTiKVRPC, e);
bo.backoff(common::boTiKVRPC, e);
}
};

Expand Down
2 changes: 1 addition & 1 deletion contrib/client-c/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set(kvClient_sources)
list(APPEND kvClient_sources pd/Client.cc)
list(APPEND kvClient_sources tikv/Region.cc)
list(APPEND kvClient_sources tikv/RegionClient.cc)
list(APPEND kvClient_sources tikv/Backoff.cc)
list(APPEND kvClient_sources common/Backoff.cc)
list(APPEND kvClient_sources tikv/Rpc.cc)

set(kvClient_INCLUDE_DIR ${kvClient_SOURCE_DIR}/include)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#include <tikv/Backoff.h>
#include <common/Backoff.h>
#include <common/CltException.h>

namespace pingcap {
namespace kv {
namespace common {

BackoffPtr newBackoff(BackoffType tp) {
switch(tp) {
Expand Down
11 changes: 6 additions & 5 deletions contrib/client-c/src/pd/Client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Client::Client(const std::vector<std::string> & addrs)
pd_timeout(3),
loop_interval(100),
update_leader_interval(60),
get_gc_safe_point_timeout(20000),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the overall retry logic here? How much total retry time budget and what if still fail?

urls(addrsToUrls(addrs)),
log(&Logger::get("pingcap.pd"))
{
Expand Down Expand Up @@ -187,11 +188,12 @@ uint64_t Client::getGCSafePoint() {
pdpb::GetGCSafePointRequest request{};
pdpb::GetGCSafePointResponse response{};
request.set_allocated_header(requestHeader());
;

common::Backoffer bo(get_gc_safe_point_timeout);
::grpc::Status status;
std::string err_msg;

for (int i = 0; i < max_init_cluster_retries; i++) {
while (true) {
grpc::ClientContext context;

context.set_deadline(std::chrono::system_clock::now() + pd_timeout);
Expand All @@ -202,10 +204,9 @@ uint64_t Client::getGCSafePoint() {
err_msg = "get safe point failed: " + std::to_string(status.error_code()) + ": " + status.error_message();
log->error(err_msg);
check_leader.store(true);
usleep(100000);
// TODO retry outside.
bo.backoff(common::BackoffType::boPDRPC, Exception(err_msg, status.error_code()));
}
throw Exception(err_msg, status.error_code());

}

std::tuple<metapb::Region, metapb::Peer, std::vector<metapb::Peer>> Client::getRegion(std::string key) {
Expand Down
6 changes: 3 additions & 3 deletions contrib/client-c/src/tikv/Region.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ RegionPtr RegionCache::loadRegionByID(Backoffer & bo, uint64_t region_id) {
}
return region;
} catch (const Exception & e) {
bo.backoff(boPDRPC, e);
bo.backoff(common::boPDRPC, e);
}
}
}
Expand Down Expand Up @@ -103,7 +103,7 @@ RegionPtr RegionCache::loadRegion(Backoffer & bo, std::string key) {
}
return region;
} catch (const Exception & e) {
bo.backoff(boPDRPC, e);
bo.backoff(common::boPDRPC, e);
}
}
}
Expand All @@ -114,7 +114,7 @@ metapb::Store RegionCache::loadStore(Backoffer & bo, uint64_t id) {
const auto & store = pdClient->getStore(id);
return store;
} catch (Exception & e) {
bo.backoff(boPDRPC, e);
bo.backoff(common::boPDRPC, e);
}
}
}
Expand Down