From 65c86e316f3eabd7d02e539abca3324da35c9b09 Mon Sep 17 00:00:00 2001 From: Elisha Ziskind Date: Thu, 1 Nov 2018 11:49:37 -0400 Subject: [PATCH 1/8] Fix envoy crash during shutdown Signed-off-by: Elisha Ziskind --- include/envoy/server/instance.h | 5 ++++ source/server/config_validation/server.h | 1 + source/server/server.cc | 34 +++++++++++------------- source/server/server.h | 11 ++++---- test/mocks/server/mocks.h | 1 + test/server/server_test.cc | 12 ++++++--- 6 files changed, 37 insertions(+), 27 deletions(-) diff --git a/include/envoy/server/instance.h b/include/envoy/server/instance.h index 576cf8c4eef4..c455c76681d3 100644 --- a/include/envoy/server/instance.h +++ b/include/envoy/server/instance.h @@ -152,6 +152,11 @@ class Instance { */ virtual void shutdown() PURE; + /** + * @return whether the shutdown method has been called. + */ + virtual bool isShutdown() PURE; + /** * Shutdown the server's admin processing. This includes the admin API, stat flushing, etc. */ diff --git a/source/server/config_validation/server.h b/source/server/config_validation/server.h index 03d0b9538971..dfbab3bb2bd0 100644 --- a/source/server/config_validation/server.h +++ b/source/server/config_validation/server.h @@ -83,6 +83,7 @@ class ValidationInstance : Logger::Loggable, } Runtime::Loader& runtime() override { return *runtime_loader_; } void shutdown() override; + bool isShutdown() override { return false; } void shutdownAdmin() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; } Singleton::Manager& singletonManager() override { return *singleton_manager_; } OverloadManager& overloadManager() override { return *overload_manager_; } diff --git a/source/server/server.cc b/source/server/server.cc index b4e8e0fb3786..391d78962290 100644 --- a/source/server/server.cc +++ b/source/server/server.cc @@ -49,7 +49,7 @@ InstanceImpl::InstanceImpl(Options& options, Event::TimeSystem& time_system, ComponentFactory& component_factory, Runtime::RandomGeneratorPtr&& random_generator, ThreadLocal::Instance& tls) - : options_(options), time_system_(time_system), restarter_(restarter), + : shutdown_(false), options_(options), time_system_(time_system), restarter_(restarter), start_time_(time(nullptr)), original_start_time_(start_time_), stats_store_(store), thread_local_(tls), api_(new Api::Impl(options.fileFlushIntervalMsec())), dispatcher_(api_->allocateDispatcher(time_system)), @@ -387,15 +387,15 @@ void InstanceImpl::loadServerFlags(const absl::optional& flags_path uint64_t InstanceImpl::numConnections() { return listener_manager_->numConnections(); } -RunHelper::RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm, - HotRestart& hot_restart, AccessLog::AccessLogManager& access_log_manager, +RunHelper::RunHelper(Instance& instance, Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm, + AccessLog::AccessLogManager& access_log_manager, InitManagerImpl& init_manager, OverloadManager& overload_manager, std::function workers_start_cb) { // Setup signals. - sigterm_ = dispatcher.listenForSignal(SIGTERM, [this, &hot_restart, &dispatcher]() { + sigterm_ = dispatcher.listenForSignal(SIGTERM, [&instance]() { ENVOY_LOG(warn, "caught SIGTERM"); - shutdown(dispatcher, hot_restart); + instance.shutdown(); }); sig_usr_1_ = dispatcher.listenForSignal(SIGUSR1, [&access_log_manager]() { @@ -412,8 +412,8 @@ RunHelper::RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm // this can fire immediately if all clusters have already initialized. Also note that we need // to guard against shutdown at two different levels since SIGTERM can come in once the run loop // starts. - cm.setInitializedCb([this, &init_manager, &cm, workers_start_cb]() { - if (shutdown_) { + cm.setInitializedCb([&instance, &init_manager, &cm, workers_start_cb]() { + if (instance.isShutdown()) { return; } @@ -423,8 +423,11 @@ RunHelper::RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm cm.adsMux().pause(Config::TypeUrl::get().RouteConfiguration); ENVOY_LOG(info, "all clusters initialized. initializing init manager"); - init_manager.initialize([this, workers_start_cb]() { - if (shutdown_) { + + // Note: the lamda below should not capture "this" since the RunHelper object may + // have been destructed by the time it gets executed. + init_manager.initialize([&instance, workers_start_cb]() { + if (instance.isShutdown()) { return; } @@ -439,16 +442,10 @@ RunHelper::RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm overload_manager.start(); } -void RunHelper::shutdown(Event::Dispatcher& dispatcher, HotRestart& hot_restart) { - shutdown_ = true; - hot_restart.terminateParent(); - dispatcher.exit(); -} - void InstanceImpl::run() { // We need the RunHelper to be available to call from InstanceImpl::shutdown() below, so // we save it as a member variable. - run_helper_ = std::make_unique(*dispatcher_, clusterManager(), restarter_, + run_helper_ = std::make_unique(*this, *dispatcher_, clusterManager(), access_log_manager_, init_manager_, overloadManager(), [this]() -> void { startWorkers(); }); @@ -504,8 +501,9 @@ void InstanceImpl::terminate() { Runtime::Loader& InstanceImpl::runtime() { return *runtime_loader_; } void InstanceImpl::shutdown() { - ASSERT(run_helper_.get() != nullptr); - run_helper_->shutdown(*dispatcher_, restarter_); + shutdown_ = true; + restarter_.terminateParent(); + dispatcher_->exit(); } void InstanceImpl::shutdownAdmin() { diff --git a/source/server/server.h b/source/server/server.h index ae16ad4337f1..22d03a66ee0f 100644 --- a/source/server/server.h +++ b/source/server/server.h @@ -107,25 +107,22 @@ class InstanceUtil : Logger::Loggable { Options& options); }; +class InstanceImpl; + /** * This is a helper used by InstanceImpl::run() on the stack. It's broken out to make testing * easier. */ class RunHelper : Logger::Loggable { public: - RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm, HotRestart& hot_restart, + RunHelper(Instance& instance, Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm, AccessLog::AccessLogManager& access_log_manager, InitManagerImpl& init_manager, OverloadManager& overload_manager, std::function workers_start_cb); - // Helper function to inititate a shutdown. This can be triggered either by catching SIGTERM - // or be called from ServerImpl::shutdown(). - void shutdown(Event::Dispatcher& dispatcher, HotRestart& hot_restart); - private: Event::SignalEventPtr sigterm_; Event::SignalEventPtr sig_usr_1_; Event::SignalEventPtr sig_hup_; - bool shutdown_{}; }; /** @@ -170,6 +167,7 @@ class InstanceImpl : Logger::Loggable, public Instance { } Runtime::Loader& runtime() override; void shutdown() override; + bool isShutdown() override { return shutdown_; } void shutdownAdmin() override; Singleton::Manager& singletonManager() override { return *singleton_manager_; } bool healthCheckFailed() override; @@ -196,6 +194,7 @@ class InstanceImpl : Logger::Loggable, public Instance { void startWorkers(); void terminate(); + bool shutdown_; Options& options_; Event::TimeSystem& time_system_; HotRestart& restarter_; diff --git a/test/mocks/server/mocks.h b/test/mocks/server/mocks.h index dba2ace1723f..c33578e8d068 100644 --- a/test/mocks/server/mocks.h +++ b/test/mocks/server/mocks.h @@ -329,6 +329,7 @@ class MockInstance : public Instance { MOCK_METHOD0(rateLimitClient_, RateLimit::Client*()); MOCK_METHOD0(runtime, Runtime::Loader&()); MOCK_METHOD0(shutdown, void()); + MOCK_METHOD0(isShutdown, bool()); MOCK_METHOD0(shutdownAdmin, void()); MOCK_METHOD0(singletonManager, Singleton::Manager&()); MOCK_METHOD0(startTimeCurrentEpoch, time_t()); diff --git a/test/server/server_test.cc b/test/server/server_test.cc index 6e5191c59b18..56a3411d1123 100644 --- a/test/server/server_test.cc +++ b/test/server/server_test.cc @@ -14,12 +14,14 @@ #include "gtest/gtest.h" using testing::_; +using testing::Assign; using testing::HasSubstr; using testing::InSequence; using testing::Invoke; using testing::InvokeWithoutArgs; using testing::Property; using testing::Ref; +using testing::Return; using testing::SaveArg; using testing::StrictMock; @@ -51,7 +53,7 @@ TEST(ServerInstanceUtil, flushHelper) { class RunHelperTest : public testing::Test { public: - RunHelperTest() { + RunHelperTest() : shutdown_(false) { InSequence s; sigterm_ = new Event::MockSignalEvent(&dispatcher_); @@ -59,14 +61,15 @@ class RunHelperTest : public testing::Test { sighup_ = new Event::MockSignalEvent(&dispatcher_); EXPECT_CALL(cm_, setInitializedCb(_)).WillOnce(SaveArg<0>(&cm_init_callback_)); EXPECT_CALL(overload_manager_, start()); + ON_CALL(server_, shutdown()).WillByDefault(Assign(&shutdown_, true)); - helper_.reset(new RunHelper(dispatcher_, cm_, hot_restart_, access_log_manager_, init_manager_, + helper_.reset(new RunHelper(server_, dispatcher_, cm_, access_log_manager_, init_manager_, overload_manager_, [this] { start_workers_.ready(); })); } + NiceMock server_; NiceMock dispatcher_; NiceMock cm_; - NiceMock hot_restart_; NiceMock access_log_manager_; NiceMock overload_manager_; InitManagerImpl init_manager_; @@ -76,6 +79,7 @@ class RunHelperTest : public testing::Test { Event::MockSignalEvent* sigterm_; Event::MockSignalEvent* sigusr1_; Event::MockSignalEvent* sighup_; + bool shutdown_ = false; }; TEST_F(RunHelperTest, Normal) { @@ -86,6 +90,7 @@ TEST_F(RunHelperTest, Normal) { TEST_F(RunHelperTest, ShutdownBeforeCmInitialize) { EXPECT_CALL(start_workers_, ready()).Times(0); sigterm_->callback_(); + EXPECT_CALL(server_, isShutdown()).WillOnce(Return(shutdown_)); cm_init_callback_(); } @@ -96,6 +101,7 @@ TEST_F(RunHelperTest, ShutdownBeforeInitManagerInit) { EXPECT_CALL(target, initialize(_)); cm_init_callback_(); sigterm_->callback_(); + EXPECT_CALL(server_, isShutdown()).WillOnce(Return(shutdown_)); target.callback_(); } From 1ea2023070848ea398350579c74b7e4fccbe6b03 Mon Sep 17 00:00:00 2001 From: Elisha Ziskind Date: Thu, 1 Nov 2018 11:51:49 -0400 Subject: [PATCH 2/8] fix format Signed-off-by: Elisha Ziskind --- api/envoy/api/v2/core/health_check.proto | 10 ++-------- api/envoy/config/filter/http/buffer/v2/buffer.proto | 9 ++------- source/server/server.cc | 4 ++-- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/api/envoy/api/v2/core/health_check.proto b/api/envoy/api/v2/core/health_check.proto index 0810b22fe0ab..480a37a836cf 100644 --- a/api/envoy/api/v2/core/health_check.proto +++ b/api/envoy/api/v2/core/health_check.proto @@ -22,19 +22,13 @@ message HealthCheck { // The time to wait for a health check response. If the timeout is reached the // health check attempt will be considered a failure. google.protobuf.Duration timeout = 1 [ - (validate.rules).duration = { - required: true, - gt: {seconds: 0} - }, + (validate.rules).duration = {required: true, gt: {seconds: 0}}, (gogoproto.stdduration) = true ]; // The interval between health checks. google.protobuf.Duration interval = 2 [ - (validate.rules).duration = { - required: true, - gt: {seconds: 0} - }, + (validate.rules).duration = {required: true, gt: {seconds: 0}}, (gogoproto.stdduration) = true ]; diff --git a/api/envoy/config/filter/http/buffer/v2/buffer.proto b/api/envoy/config/filter/http/buffer/v2/buffer.proto index 63484d2c25e1..4013050ae62c 100644 --- a/api/envoy/config/filter/http/buffer/v2/buffer.proto +++ b/api/envoy/config/filter/http/buffer/v2/buffer.proto @@ -19,13 +19,8 @@ message Buffer { // The maximum number of seconds that the filter will wait for a complete // request before returning a 408 response. - google.protobuf.Duration max_request_time = 2 [ - (validate.rules).duration = { - required: true, - gt: {} - }, - (gogoproto.stdduration) = true - ]; + google.protobuf.Duration max_request_time = 2 + [(validate.rules).duration = {required: true, gt: {}}, (gogoproto.stdduration) = true]; } message BufferPerRoute { diff --git a/source/server/server.cc b/source/server/server.cc index 391d78962290..f8cd9bf47262 100644 --- a/source/server/server.cc +++ b/source/server/server.cc @@ -387,8 +387,8 @@ void InstanceImpl::loadServerFlags(const absl::optional& flags_path uint64_t InstanceImpl::numConnections() { return listener_manager_->numConnections(); } -RunHelper::RunHelper(Instance& instance, Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm, - AccessLog::AccessLogManager& access_log_manager, +RunHelper::RunHelper(Instance& instance, Event::Dispatcher& dispatcher, + Upstream::ClusterManager& cm, AccessLog::AccessLogManager& access_log_manager, InitManagerImpl& init_manager, OverloadManager& overload_manager, std::function workers_start_cb) { From 0075086379d7de3fb9021b9c381ba592c86c6c59 Mon Sep 17 00:00:00 2001 From: Elisha Ziskind Date: Thu, 1 Nov 2018 14:24:38 -0400 Subject: [PATCH 3/8] fix format Signed-off-by: Elisha Ziskind --- api/envoy/api/v2/core/health_check.proto | 10 ++++++++-- api/envoy/config/filter/http/buffer/v2/buffer.proto | 9 +++++++-- test/server/server_test.cc | 5 +++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/api/envoy/api/v2/core/health_check.proto b/api/envoy/api/v2/core/health_check.proto index 480a37a836cf..0810b22fe0ab 100644 --- a/api/envoy/api/v2/core/health_check.proto +++ b/api/envoy/api/v2/core/health_check.proto @@ -22,13 +22,19 @@ message HealthCheck { // The time to wait for a health check response. If the timeout is reached the // health check attempt will be considered a failure. google.protobuf.Duration timeout = 1 [ - (validate.rules).duration = {required: true, gt: {seconds: 0}}, + (validate.rules).duration = { + required: true, + gt: {seconds: 0} + }, (gogoproto.stdduration) = true ]; // The interval between health checks. google.protobuf.Duration interval = 2 [ - (validate.rules).duration = {required: true, gt: {seconds: 0}}, + (validate.rules).duration = { + required: true, + gt: {seconds: 0} + }, (gogoproto.stdduration) = true ]; diff --git a/api/envoy/config/filter/http/buffer/v2/buffer.proto b/api/envoy/config/filter/http/buffer/v2/buffer.proto index 4013050ae62c..c38ab5dfe11f 100644 --- a/api/envoy/config/filter/http/buffer/v2/buffer.proto +++ b/api/envoy/config/filter/http/buffer/v2/buffer.proto @@ -19,8 +19,13 @@ message Buffer { // The maximum number of seconds that the filter will wait for a complete // request before returning a 408 response. - google.protobuf.Duration max_request_time = 2 - [(validate.rules).duration = {required: true, gt: {}}, (gogoproto.stdduration) = true]; + google.protobuf.Duration max_request_time = 2 [ + (validate.rules).duration = { + required: true, + gt: {} + }, + (gogoproto.stdduration) = true + ]; } message BufferPerRoute { diff --git a/test/server/server_test.cc b/test/server/server_test.cc index 56a3411d1123..0ba5ab858581 100644 --- a/test/server/server_test.cc +++ b/test/server/server_test.cc @@ -63,8 +63,9 @@ class RunHelperTest : public testing::Test { EXPECT_CALL(overload_manager_, start()); ON_CALL(server_, shutdown()).WillByDefault(Assign(&shutdown_, true)); - helper_.reset(new RunHelper(server_, dispatcher_, cm_, access_log_manager_, init_manager_, - overload_manager_, [this] { start_workers_.ready(); })); + helper_ = std::make_unique( + server_, dispatcher_, cm_, access_log_manager_, init_manager_, + overload_manager_, [this] { start_workers_.ready(); }); } NiceMock server_; From 604be5f7358e213b116c94b381e1be6cd5282e58 Mon Sep 17 00:00:00 2001 From: Elisha Ziskind Date: Thu, 1 Nov 2018 14:39:10 -0400 Subject: [PATCH 4/8] fix format Signed-off-by: Elisha Ziskind --- api/envoy/config/filter/http/buffer/v2/buffer.proto | 2 +- test/server/server_test.cc | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/envoy/config/filter/http/buffer/v2/buffer.proto b/api/envoy/config/filter/http/buffer/v2/buffer.proto index c38ab5dfe11f..63484d2c25e1 100644 --- a/api/envoy/config/filter/http/buffer/v2/buffer.proto +++ b/api/envoy/config/filter/http/buffer/v2/buffer.proto @@ -20,7 +20,7 @@ message Buffer { // The maximum number of seconds that the filter will wait for a complete // request before returning a 408 response. google.protobuf.Duration max_request_time = 2 [ - (validate.rules).duration = { + (validate.rules).duration = { required: true, gt: {} }, diff --git a/test/server/server_test.cc b/test/server/server_test.cc index 0ba5ab858581..442c70927556 100644 --- a/test/server/server_test.cc +++ b/test/server/server_test.cc @@ -63,9 +63,9 @@ class RunHelperTest : public testing::Test { EXPECT_CALL(overload_manager_, start()); ON_CALL(server_, shutdown()).WillByDefault(Assign(&shutdown_, true)); - helper_ = std::make_unique( - server_, dispatcher_, cm_, access_log_manager_, init_manager_, - overload_manager_, [this] { start_workers_.ready(); }); + helper_ = + std::make_unique(server_, dispatcher_, cm_, access_log_manager_, init_manager_, + overload_manager_, [this] { start_workers_.ready(); }); } NiceMock server_; From 4c4974c0b229969e3039ecbbc0bc35930a62c9d4 Mon Sep 17 00:00:00 2001 From: Elisha Ziskind Date: Thu, 1 Nov 2018 16:57:02 -0400 Subject: [PATCH 5/8] remove forward declaration Signed-off-by: Elisha Ziskind --- source/server/server.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/server/server.h b/source/server/server.h index 22d03a66ee0f..aeb14776fcaa 100644 --- a/source/server/server.h +++ b/source/server/server.h @@ -107,8 +107,6 @@ class InstanceUtil : Logger::Loggable { Options& options); }; -class InstanceImpl; - /** * This is a helper used by InstanceImpl::run() on the stack. It's broken out to make testing * easier. From d9cb9f4fdffb7cb7e646babdb7c2716c663bbbdc Mon Sep 17 00:00:00 2001 From: Elisha Ziskind Date: Thu, 1 Nov 2018 19:11:54 -0400 Subject: [PATCH 6/8] Make InstanceImpl::isShutdown final Signed-off-by: Elisha Ziskind --- source/server/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/server/server.h b/source/server/server.h index aeb14776fcaa..1fa8ac89cba3 100644 --- a/source/server/server.h +++ b/source/server/server.h @@ -165,7 +165,7 @@ class InstanceImpl : Logger::Loggable, public Instance { } Runtime::Loader& runtime() override; void shutdown() override; - bool isShutdown() override { return shutdown_; } + bool isShutdown() override final { return shutdown_; } void shutdownAdmin() override; Singleton::Manager& singletonManager() override { return *singleton_manager_; } bool healthCheckFailed() override; From 57ea5ea231f6e3bc90ec97900eca8292806f9b0b Mon Sep 17 00:00:00 2001 From: Elisha Ziskind Date: Thu, 1 Nov 2018 21:07:57 -0400 Subject: [PATCH 7/8] resolve conflict Signed-off-by: Elisha Ziskind --- test/server/server_test.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/test/server/server_test.cc b/test/server/server_test.cc index a4669debe2b4..89f6804b752c 100644 --- a/test/server/server_test.cc +++ b/test/server/server_test.cc @@ -65,15 +65,9 @@ class RunHelperTest : public testing::Test { EXPECT_CALL(overload_manager_, start()); ON_CALL(server_, shutdown()).WillByDefault(Assign(&shutdown_, true)); -<<<<<<< HEAD - helper_ = - std::make_unique(server_, dispatcher_, cm_, access_log_manager_, init_manager_, - overload_manager_, [this] { start_workers_.ready(); }); -======= - helper_ = std::make_unique(dispatcher_, cm_, hot_restart_, access_log_manager_, + helper_ = std::make_unique(server_, dispatcher_, cm_, access_log_manager_, init_manager_, overload_manager_, [this] { start_workers_.ready(); }); ->>>>>>> upstream/master } NiceMock server_; From 95a756f4e2e53cafc554c72dffb47bb10838b1bd Mon Sep 17 00:00:00 2001 From: Elisha Ziskind Date: Thu, 1 Nov 2018 21:09:17 -0400 Subject: [PATCH 8/8] fix format Signed-off-by: Elisha Ziskind --- test/server/server_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/server/server_test.cc b/test/server/server_test.cc index 89f6804b752c..90a41300af7f 100644 --- a/test/server/server_test.cc +++ b/test/server/server_test.cc @@ -65,9 +65,9 @@ class RunHelperTest : public testing::Test { EXPECT_CALL(overload_manager_, start()); ON_CALL(server_, shutdown()).WillByDefault(Assign(&shutdown_, true)); - helper_ = std::make_unique(server_, dispatcher_, cm_, access_log_manager_, - init_manager_, overload_manager_, - [this] { start_workers_.ready(); }); + helper_ = + std::make_unique(server_, dispatcher_, cm_, access_log_manager_, init_manager_, + overload_manager_, [this] { start_workers_.ready(); }); } NiceMock server_;