From 62741b6e355893c22647fe8d3235ff2f61bd815f Mon Sep 17 00:00:00 2001 From: Dan Stough Date: Wed, 14 Jun 2023 17:52:46 -0400 Subject: [PATCH] Revert Proxy Lifecycle PRs (#152) * Revert "proxy-lifecycle: catch SIGTERM and initiate graceful shutdown (#130)" This reverts commit 40c99dc7739784ff8c94b7d2980b7d70bf479287. * Revert "proxy-lifecycle: add HTTP Server with endpoints for proxy lifecycle shutdown (#115)" This reverts commit 0047e65ac514d5b5e9f34ad5dd1bbe13519d4122. * Revert "cmd: add CLI flags for proxy shutdown lifecycle management (#100)" This reverts commit 3d37b9f43124f44845af7abd6cdb568fc754e53c. --- .changelog/100.txt | 3 - .changelog/115.txt | 3 - .changelog/130.txt | 3 - .../workflows/consul-dataplane-checks.yaml | 2 +- Makefile | 2 +- cmd/consul-dataplane/main.go | 70 ++---- integration-tests/helpers/dataplane.go | 56 +++-- integration-tests/helpers/suite.go | 14 +- integration-tests/main_test.go | 156 ++----------- pkg/consuldp/config.go | 23 -- pkg/consuldp/consul_dataplane.go | 69 +----- pkg/consuldp/lifecycle.go | 213 ----------------- pkg/consuldp/lifecycle_test.go | 215 ------------------ pkg/consuldp/metrics.go | 2 +- pkg/consuldp/metrics_test.go | 6 - pkg/envoy/proxy.go | 166 +------------- pkg/envoy/proxy_test.go | 14 +- 17 files changed, 106 insertions(+), 911 deletions(-) delete mode 100644 .changelog/100.txt delete mode 100644 .changelog/115.txt delete mode 100644 .changelog/130.txt delete mode 100644 pkg/consuldp/lifecycle.go delete mode 100644 pkg/consuldp/lifecycle_test.go diff --git a/.changelog/100.txt b/.changelog/100.txt deleted file mode 100644 index 4342c503..00000000 --- a/.changelog/100.txt +++ /dev/null @@ -1,3 +0,0 @@ -```release-note:feature -Add -shutdown-drain-listeners, -shutdown-grace-period, -graceful-shutdown-path and -graceful-port flags to configure proxy lifecycle management settings for the Envoy container. -``` diff --git a/.changelog/115.txt b/.changelog/115.txt deleted file mode 100644 index cc62a156..00000000 --- a/.changelog/115.txt +++ /dev/null @@ -1,3 +0,0 @@ -```release-note:feature -Add HTTP server with configurable port and endpoint path for initiating graceful shutdown. -``` diff --git a/.changelog/130.txt b/.changelog/130.txt deleted file mode 100644 index f8d12ac5..00000000 --- a/.changelog/130.txt +++ /dev/null @@ -1,3 +0,0 @@ -```release-note:feature -Catch SIGTERM and SIGINT to initate graceful shutdown in accordance with proxy lifecycle management configuration. -``` diff --git a/.github/workflows/consul-dataplane-checks.yaml b/.github/workflows/consul-dataplane-checks.yaml index d0dbcd46..120e90be 100644 --- a/.github/workflows/consul-dataplane-checks.yaml +++ b/.github/workflows/consul-dataplane-checks.yaml @@ -32,7 +32,7 @@ jobs: - uses: actions/setup-go@4d34df0c2316fe8122ab82dc22947d607c0c91f9 # v4.0.0 with: go-version: ${{ needs.get-go-version.outputs.go-version }} - - run: go test ./... -p 1 # disable parallelism to avoid port conflicts from default metrics and lifecycle server configuration + - run: go test ./... integration-tests: name: integration-tests needs: diff --git a/Makefile b/Makefile index f02999d7..e723b1e5 100644 --- a/Makefile +++ b/Makefile @@ -110,7 +110,7 @@ else $(error Cannot generate changelog without LAST_RELEASE_GIT_TAG) endif -INTEGRATION_TESTS_SERVER_IMAGE ?= hashicorppreview/consul:1.15-dev +INTEGRATION_TESTS_SERVER_IMAGE ?= hashicorppreview/consul:1.14-dev INTEGRATION_TESTS_DATAPLANE_IMAGE ?= $(PRODUCT_NAME)/release-default:$(VERSION) .PHONY: expand-integration-tests-output-dir diff --git a/cmd/consul-dataplane/main.go b/cmd/consul-dataplane/main.go index bfcb5b4f..33607936 100644 --- a/cmd/consul-dataplane/main.go +++ b/cmd/consul-dataplane/main.go @@ -62,26 +62,17 @@ var ( promScrapePath string promMergePort int - adminBindAddr string - adminBindPort int - readyBindAddr string - readyBindPort int - envoyConcurrency int - envoyDrainTimeSeconds int - envoyDrainStrategy string + adminBindAddr string + adminBindPort int + readyBindAddr string + readyBindPort int + envoyConcurrency int xdsBindAddr string xdsBindPort int consulDNSBindAddr string consulDNSPort int - - shutdownDrainListenersEnabled bool - shutdownGracePeriodSeconds int - gracefulShutdownPath string - gracefulPort int - - dumpEnvoyConfigOnExitEnabled bool ) func init() { @@ -135,8 +126,6 @@ func init() { StringVar(&readyBindAddr, "envoy-ready-bind-address", "", "DP_ENVOY_READY_BIND_ADDRESS", "The address on which Envoy's readiness probe is available.") IntVar(&readyBindPort, "envoy-ready-bind-port", 0, "DP_ENVOY_READY_BIND_PORT", "The port on which Envoy's readiness probe is available.") IntVar(&envoyConcurrency, "envoy-concurrency", 2, "DP_ENVOY_CONCURRENCY", "The number of worker threads that Envoy uses.") - IntVar(&envoyDrainTimeSeconds, "envoy-drain-time-seconds", 30, "DP_ENVOY_DRAIN_TIME", "The time in seconds for which Envoy will drain connections.") - StringVar(&envoyDrainStrategy, "envoy-drain-strategy", "immediate", "DP_ENVOY_DRAIN_STRATEGY", "The behaviour of Envoy during the drain sequence. Determines whether all open connections should be encouraged to drain immediately or to increase the percentage gradually as the drain time elapses.") StringVar(&xdsBindAddr, "xds-bind-addr", "127.0.0.1", "DP_XDS_BIND_ADDR", "The address on which the Envoy xDS server is available.") IntVar(&xdsBindPort, "xds-bind-port", 0, "DP_XDS_BIND_PORT", "The port on which the Envoy xDS server is available.") @@ -150,18 +139,6 @@ func init() { StringVar(&consulDNSBindAddr, "consul-dns-bind-addr", "127.0.0.1", "DP_CONSUL_DNS_BIND_ADDR", "The address that will be bound to the consul dns proxy.") IntVar(&consulDNSPort, "consul-dns-bind-port", -1, "DP_CONSUL_DNS_BIND_PORT", "The port the consul dns proxy will listen on. By default -1 disables the dns proxy") - - // Default is false because it will generally be configured appropriately by Helm - // configuration or pod annotation. - BoolVar(&shutdownDrainListenersEnabled, "shutdown-drain-listeners", false, "DP_SHUTDOWN_DRAIN_LISTENERS", "Wait for proxy listeners to drain before terminating the proxy container.") - // Default is 0 because it will generally be configured appropriately by Helm - // configuration or pod annotation. - IntVar(&shutdownGracePeriodSeconds, "shutdown-grace-period-seconds", 0, "DP_SHUTDOWN_GRACE_PERIOD_SECONDS", "Amount of time to wait after receiving a SIGTERM signal before terminating the proxy.") - StringVar(&gracefulShutdownPath, "graceful-shutdown-path", "/graceful_shutdown", "DP_GRACEFUL_SHUTDOWN_PATH", "An HTTP path to serve the graceful shutdown endpoint.") - IntVar(&gracefulPort, "graceful-port", 20300, "DP_GRACEFUL_PORT", "A port to serve HTTP endpoints for graceful shutdown.") - - // Default is false, may be useful for debugging unexpected termination. - BoolVar(&dumpEnvoyConfigOnExitEnabled, "dump-envoy-config-on-exit", false, "DP_DUMP_ENVOY_CONFIG_ON_EXIT", "Call the Envoy /config_dump endpoint during consul-dataplane controlled shutdown.") } // validateFlags performs semantic validation of the flag values @@ -173,13 +150,13 @@ func validateFlags() { } } -func run() error { +func main() { flag.Parse() if printVersion { fmt.Printf("Consul Dataplane v%s\n", version.GetHumanVersion()) fmt.Printf("Revision %s\n", version.GitCommit) - return nil + return } readServiceIDFromFile() @@ -239,19 +216,12 @@ func run() error { }, }, Envoy: &consuldp.EnvoyConfig{ - AdminBindAddress: adminBindAddr, - AdminBindPort: adminBindPort, - ReadyBindAddress: readyBindAddr, - ReadyBindPort: readyBindPort, - EnvoyConcurrency: envoyConcurrency, - EnvoyDrainTimeSeconds: envoyDrainTimeSeconds, - EnvoyDrainStrategy: envoyDrainStrategy, - ShutdownDrainListenersEnabled: shutdownDrainListenersEnabled, - ShutdownGracePeriodSeconds: shutdownGracePeriodSeconds, - GracefulShutdownPath: gracefulShutdownPath, - GracefulPort: gracefulPort, - DumpEnvoyConfigOnExitEnabled: dumpEnvoyConfigOnExitEnabled, - ExtraArgs: flag.Args(), + AdminBindAddress: adminBindAddr, + AdminBindPort: adminBindPort, + ReadyBindAddress: readyBindAddr, + ReadyBindPort: readyBindPort, + EnvoyConcurrency: envoyConcurrency, + ExtraArgs: flag.Args(), }, XDSServer: &consuldp.XDSServer{ BindAddress: xdsBindAddr, @@ -265,28 +235,22 @@ func run() error { consuldpInstance, err := consuldp.NewConsulDP(consuldpCfg) if err != nil { - return err + log.Fatal(err) } ctx, cancel := context.WithCancel(context.Background()) - defer cancel() sigCh := make(chan os.Signal, 1) signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) go func() { - // Block waiting for SIGTERM <-sigCh - - consuldpInstance.GracefulShutdown(cancel) + cancel() }() - return consuldpInstance.Run(ctx) -} - -func main() { - err := run() + err = consuldpInstance.Run(ctx) if err != nil { + cancel() log.Fatal(err) } } diff --git a/integration-tests/helpers/dataplane.go b/integration-tests/helpers/dataplane.go index b1bbc9bf..e5f25d26 100644 --- a/integration-tests/helpers/dataplane.go +++ b/integration-tests/helpers/dataplane.go @@ -5,6 +5,7 @@ package helpers import ( "fmt" + "io" "testing" "github.com/testcontainers/testcontainers-go" @@ -15,16 +16,13 @@ import ( var EnvoyAdminPort = TCP(30000) type DataplaneConfig struct { - Addresses string - ServiceNodeName string - ProxyServiceID string - LoginAuthMethod string - LoginBearerToken string - DNSBindPort string - ServiceMetricsURL string - ShutdownGracePeriodSeconds string - ShutdownDrainListenersEnabled bool - DumpEnvoyConfigOnExitEnabled bool + Addresses string + ServiceNodeName string + ProxyServiceID string + LoginAuthMethod string + LoginBearerToken string + DNSBindPort string + ServiceMetricsURL string } func (cfg DataplaneConfig) ToArgs() []string { @@ -45,19 +43,6 @@ func (cfg DataplaneConfig) ToArgs() []string { "-telemetry-prom-scrape-path", "/metrics", "-telemetry-prom-service-metrics-url", cfg.ServiceMetricsURL, } - - if cfg.ShutdownGracePeriodSeconds != "" { - args = append(args, "-shutdown-grace-period-seconds", cfg.ShutdownGracePeriodSeconds) - } - - if cfg.ShutdownDrainListenersEnabled { - args = append(args, "-shutdown-drain-listeners") - } - - if cfg.DumpEnvoyConfigOnExitEnabled { - args = append(args, "-dump-envoy-config-on-exit") - } - return args } @@ -78,5 +63,30 @@ func RunDataplane(t *testing.T, pod *Pod, suite *Suite, cfg DataplaneConfig) *Co WaitingFor: wait.ForLog("starting main dispatch loop"), // https://github.com/envoyproxy/envoy/blob/ce49966ecb5f2d530117a29ae60b88198746fd74/source/server/server.cc#L906-L907 }) + t.Cleanup(func() { + url := fmt.Sprintf( + "http://%s:%d/config_dump?include_eds", + pod.HostIP, + pod.MappedPorts[EnvoyAdminPort], + ) + + rsp, err := httpClient.Get(url) + if err != nil { + t.Logf("failed to dump Envoy config: %v\n", err) + return + } + defer rsp.Body.Close() + + config, err := io.ReadAll(rsp.Body) + if err != nil { + t.Logf("failed to dump Envoy config: %v\n", err) + return + } + suite.CaptureArtifact( + fmt.Sprintf("%s-envoy-config.json", cfg.ProxyServiceID), + config, + ) + }) + return container } diff --git a/integration-tests/helpers/suite.go b/integration-tests/helpers/suite.go index 6a002e05..3fdf9d22 100644 --- a/integration-tests/helpers/suite.go +++ b/integration-tests/helpers/suite.go @@ -204,17 +204,11 @@ func (s *Suite) Volume(t *testing.T) *Volume { require.NoError(t, err) t.Cleanup(func() { - s.mu.Lock() - defer s.mu.Unlock() + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() - if s.volume != nil { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - if err := docker.VolumeRemove(ctx, v.Name, true); err != nil { - t.Logf("failed to remove volume: %v", err) - } - s.volume = nil + if err := docker.VolumeRemove(ctx, v.Name, true); err != nil { + t.Logf("failed to remove volume: %v", err) } }) diff --git a/integration-tests/main_test.go b/integration-tests/main_test.go index 01b827fe..df58cf20 100644 --- a/integration-tests/main_test.go +++ b/integration-tests/main_test.go @@ -4,7 +4,6 @@ package integrationtests import ( - "context" "flag" "fmt" "net" @@ -14,7 +13,6 @@ import ( "testing" "time" - "github.com/docker/docker/client" "github.com/docker/go-connections/nat" "github.com/hashicorp/consul/api" "github.com/stretchr/testify/require" @@ -24,12 +22,12 @@ import ( ) var ( - // upstreamLocalBindPort is the port each sidecar will bind the local - // listener for its upstream to. + // upstreamLocalBindPort is the port the frontend sidecar will bind the local + // listener for its backend upstream to. upstreamLocalBindPort = TCP(10000) // proxyInboundListenerPort is the port the sidecars will bind their public - // listeners to. + // listeners to. Only the backend sidecar's public port is used in these tests. proxyInboundListenerPort = TCP(20000) // dnsUDPPort is UDP the port Consul Dataplane's DNS proxy wil be bound to. @@ -118,17 +116,16 @@ func TestIntegration(t *testing.T) { server.RegisterSyntheticNode(t) - backendPod := RunPod(t, suite, "backend", []nat.Port{ - EnvoyAdminPort, - upstreamLocalBindPort, - metricsPort, - }) - server.RegisterService(t, &api.AgentService{ Service: "backend", Port: 8080, }) + backendPod := RunPod(t, suite, "backend", []nat.Port{ + EnvoyAdminPort, + metricsPort, + }) + server.RegisterService(t, &api.AgentService{ Service: "backend-sidecar", Kind: api.ServiceKindConnectProxy, @@ -137,28 +134,19 @@ func TestIntegration(t *testing.T) { Proxy: &api.AgentServiceConnectProxyConfig{ DestinationServiceName: "backend", LocalServicePort: 8080, - Upstreams: []api.Upstream{ - { - DestinationType: api.UpstreamDestTypeService, - DestinationName: "frontend", - LocalBindPort: upstreamLocalBindPort.Int(), - LocalBindAddress: "0.0.0.0", - }, - }, }, }) RunService(t, suite, backendPod, "backend") backendDataplane := RunDataplane(t, backendPod, suite, DataplaneConfig{ - Addresses: server.Container.ContainerIP, - ServiceNodeName: SyntheticNodeName, - ProxyServiceID: "backend-sidecar", - LoginAuthMethod: authMethod.Name, - LoginBearerToken: authMethod.GenerateToken(t, "backend"), - DNSBindPort: dnsUDPPort.Port(), - ServiceMetricsURL: "http://localhost:8080", - DumpEnvoyConfigOnExitEnabled: true, + Addresses: server.Container.ContainerIP, + ServiceNodeName: SyntheticNodeName, + ProxyServiceID: "backend-sidecar", + LoginAuthMethod: authMethod.Name, + LoginBearerToken: authMethod.GenerateToken(t, "backend"), + DNSBindPort: dnsUDPPort.Port(), + ServiceMetricsURL: "http://localhost:8080", }) frontendPod := RunPod(t, suite, "frontend", []nat.Port{ @@ -180,7 +168,6 @@ func TestIntegration(t *testing.T) { Address: frontendPod.ContainerIP, Proxy: &api.AgentServiceConnectProxyConfig{ DestinationServiceName: "frontend", - LocalServicePort: 8080, Upstreams: []api.Upstream{ { DestinationType: api.UpstreamDestTypeService, @@ -192,32 +179,16 @@ func TestIntegration(t *testing.T) { }, }) - RunService(t, suite, frontendPod, "frontend") - - frontendDataplane := RunDataplane(t, frontendPod, suite, DataplaneConfig{ - Addresses: server.Container.ContainerIP, - ServiceNodeName: SyntheticNodeName, - ProxyServiceID: "frontend-sidecar", - LoginAuthMethod: authMethod.Name, - LoginBearerToken: authMethod.GenerateToken(t, "frontend"), - DNSBindPort: dnsUDPPort.Port(), - ServiceMetricsURL: "http://localhost:8080", - ShutdownGracePeriodSeconds: "10", - ShutdownDrainListenersEnabled: true, - DumpEnvoyConfigOnExitEnabled: true, + RunDataplane(t, frontendPod, suite, DataplaneConfig{ + Addresses: server.Container.ContainerIP, + ServiceNodeName: SyntheticNodeName, + ProxyServiceID: "frontend-sidecar", + LoginAuthMethod: authMethod.Name, + LoginBearerToken: authMethod.GenerateToken(t, "frontend"), + DNSBindPort: dnsUDPPort.Port(), + ServiceMetricsURL: "http://localhost:8080", }) - // Intentions are configured as default deny in helpers/server.go - ExpectNoHTTPAccess(t, - frontendPod.HostIP, - frontendPod.MappedPorts[upstreamLocalBindPort], - ) - - ExpectNoHTTPAccess(t, - backendPod.HostIP, - backendPod.MappedPorts[upstreamLocalBindPort], - ) - server.SetConfigEntry(t, &api.ServiceIntentionsConfigEntry{ Kind: api.ServiceIntentions, Name: "backend", @@ -290,85 +261,4 @@ func TestIntegration(t *testing.T) { return strings.Contains(output, "{\"custom_field_path\":\"/clusters\"}") }, 30*time.Second, 3*time.Second, "could not find admin access logs in output") } - - // Overwrite deny intention and allow two-way connections to prepare for - // testing graceful shutdown - server.SetConfigEntry(t, &api.ServiceIntentionsConfigEntry{ - Kind: api.ServiceIntentions, - Name: "backend", - Sources: []*api.SourceIntention{ - { - Name: "frontend", - Type: api.IntentionSourceConsul, - Permissions: []*api.IntentionPermission{ - { - Action: api.IntentionActionAllow, - HTTP: &api.IntentionHTTPPermission{ - PathPrefix: "/", - Methods: []string{http.MethodGet}, - }, - }, - }, - }, - }, - }) - server.SetConfigEntry(t, &api.ServiceIntentionsConfigEntry{ - Kind: api.ServiceIntentions, - Name: "frontend", - Sources: []*api.SourceIntention{ - { - Name: "backend", - Type: api.IntentionSourceConsul, - Permissions: []*api.IntentionPermission{ - { - Action: api.IntentionActionAllow, - HTTP: &api.IntentionHTTPPermission{ - PathPrefix: "/", - Methods: []string{http.MethodGet}, - }, - }, - }, - }, - }, - }) - - // Ensure frontend upstream on backend service is working - ExpectHTTPAccess(t, - backendPod.HostIP, - backendPod.MappedPorts[upstreamLocalBindPort], - ) - - // Send SIGTERM to dataplane to start graceful shutdown - containerID := frontendDataplane.Container.GetContainerID() - cli, err := client.NewClientWithOpts( - client.WithAPIVersionNegotiation(), - ) - if err != nil { - fmt.Fprintf(os.Stderr, "error initializing docker client: %s\n", err) - os.Exit(1) - } - err = cli.ContainerKill(context.Background(), containerID, "SIGTERM") - if err != nil { - fmt.Fprintf(os.Stderr, "error killing docker container %s: %s\n", containerID, err) - os.Exit(1) - } - // TODO: It may be preferrable to use ContainerStop to set a longer - // StopTimeout to avoid issues with cleanup, but importing the - // docker/docker/container package for StopOptions has dependency issues. - // https://pkg.go.dev/github.com/docker/docker/client#Client.ContainerStop - // err = cli.ContainerStop(context.Background(), containerID, container.StopOptions{}) - - // Expect outgoing connections through sidecar are allowed until shutdown - // grace period has elapsed. - ExpectHTTPAccess(t, - frontendPod.HostIP, - frontendPod.MappedPorts[upstreamLocalBindPort], - ) - - // Expect inbound connections to the frontend service are rejected while it - // is shutting down if listener draining is configured. - ExpectNoHTTPAccess(t, - backendPod.HostIP, - backendPod.MappedPorts[upstreamLocalBindPort], - ) } diff --git a/pkg/consuldp/config.go b/pkg/consuldp/config.go index 44d1d966..7d41d4fd 100644 --- a/pkg/consuldp/config.go +++ b/pkg/consuldp/config.go @@ -274,29 +274,6 @@ type EnvoyConfig struct { ReadyBindPort int // EnvoyConcurrency is the envoy concurrency https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-concurrency EnvoyConcurrency int - // EnvoyDrainTime is the time in seconds for which Envoy will drain connections - // during a hot restart, when listeners are modified or removed via LDS, or when - // initiated manually via a request to the Envoy admin API. - // The Envoy HTTP connection manager filter will add “Connection: close” to HTTP1 - // requests, send HTTP2 GOAWAY, and terminate connections on request completion - // (after the delayed close period). - // https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-drain-time-s - EnvoyDrainTimeSeconds int - // EnvoyDrainStrategy is the behaviour of Envoy during the drain sequence. - // Determines whether all open connections should be encouraged to drain - // immediately or to increase the percentage gradually as the drain time elapses. - // https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-drain-strategy - EnvoyDrainStrategy string - // ShutdownDrainListenersEnabled configures whether to start draining proxy listeners before terminating the proxy container. Drain time defaults to the value of ShutdownGracePeriodSeconds, but may be set explicitly with EnvoyDrainTimeSeconds. - ShutdownDrainListenersEnabled bool - // ShutdownGracePeriodSeconds is the amount of time to wait after receiving a SIGTERM before terminating the proxy container. - ShutdownGracePeriodSeconds int - // GracefulShutdownPath is the path on which the HTTP endpoint to initiate a graceful shutdown of Envoy is served - GracefulShutdownPath string - // GracefulPort is the port on which the HTTP server for graceful shutdown endpoints will be available. - GracefulPort int - // DumpEnvoyConfigOnExitEnabled configures whether to call Envoy's /config_dump endpoint during consul-dataplane controlled shutdown. - DumpEnvoyConfigOnExitEnabled bool // ExtraArgs are the extra arguments passed to envoy at startup of the proxy ExtraArgs []string } diff --git a/pkg/consuldp/consul_dataplane.go b/pkg/consuldp/consul_dataplane.go index 6ba324c0..d1e88516 100644 --- a/pkg/consuldp/consul_dataplane.go +++ b/pkg/consuldp/consul_dataplane.go @@ -7,7 +7,6 @@ import ( "context" "errors" "fmt" - "io" "net" "net/http" "strings" @@ -32,9 +31,8 @@ type xdsServer struct { exitedCh chan struct{} } -type httpClient interface { +type httpGetter interface { Get(string) (*http.Response, error) - Post(string, string, io.Reader) (*http.Response, error) } // ConsulDataplane represents the consul-dataplane process @@ -46,7 +44,6 @@ type ConsulDataplane struct { xdsServer *xdsServer aclToken string metricsConfig *metricsConfig - lifecycleConfig *lifecycleConfig } // NewConsulDP creates a new instance of ConsulDataplane @@ -212,12 +209,6 @@ func (cdp *ConsulDataplane) Run(ctx context.Context) error { return err } - cdp.lifecycleConfig = NewLifecycleConfig(cdp.cfg, proxy) - err = cdp.lifecycleConfig.startLifecycleManager(ctx) - if err != nil { - return err - } - doneCh := make(chan error) go func() { select { @@ -226,25 +217,12 @@ func (cdp *ConsulDataplane) Run(ctx context.Context) error { case <-proxy.Exited(): doneCh <- errors.New("envoy proxy exited unexpectedly") case <-cdp.xdsServerExited(): - // Initiate graceful shutdown of Envoy, kill if error - if err := proxy.Quit(); err != nil { - cdp.logger.Error("failed to stop proxy, will attempt to kill", "error", err) - if err := proxy.Kill(); err != nil { - cdp.logger.Error("failed to kill proxy", "error", err) - } + if err := proxy.Stop(); err != nil { + cdp.logger.Error("failed to stop proxy", "error", err) } doneCh <- errors.New("xDS server exited unexpectedly") case <-cdp.metricsConfig.metricsServerExited(): doneCh <- errors.New("metrics server exited unexpectedly") - case <-cdp.lifecycleConfig.lifecycleServerExited(): - // Initiate graceful shutdown of Envoy, kill if error - if err := proxy.Quit(); err != nil { - cdp.logger.Error("failed to stop proxy", "error", err) - if err := proxy.Kill(); err != nil { - cdp.logger.Error("failed to kill proxy", "error", err) - } - } - doneCh <- errors.New("proxy lifecycle management server exited unexpectedly") } }() return <-doneCh @@ -272,46 +250,23 @@ func (cdp *ConsulDataplane) startDNSProxy(ctx context.Context) error { } func (cdp *ConsulDataplane) envoyProxyConfig(cfg []byte) envoy.ProxyConfig { + setConcurrency := true extraArgs := cdp.cfg.Envoy.ExtraArgs - - envoyArgs := map[string]interface{}{ - "--concurrency": cdp.cfg.Envoy.EnvoyConcurrency, - "--drain-time-s": cdp.cfg.Envoy.EnvoyDrainTimeSeconds, - "--drain-strategy": cdp.cfg.Envoy.EnvoyDrainStrategy, - } - - // Users could set the Envoy concurrency, drain time, or drain strategy as - // extra args. Prioritize values set in that way over passthrough or defaults - // from consul-dataplane. - for envoyArg, cdpEnvoyValue := range envoyArgs { - for _, v := range extraArgs { - // If found in extraArgs, skip setting value from consul-dataplane Envoy - // config - if v == envoyArg { - break - } + // Users could set the concurrency as an extra args. Take that as priority for best ux + // experience. + for _, v := range extraArgs { + if v == "--concurrency" { + setConcurrency = false } - - // If not found, append value from consul-dataplane Envoy config to extraArgs - extraArgs = append(extraArgs, fmt.Sprintf("%s %v", envoyArg, cdpEnvoyValue)) + } + if setConcurrency { + extraArgs = append(extraArgs, fmt.Sprintf("--concurrency %v", cdp.cfg.Envoy.EnvoyConcurrency)) } return envoy.ProxyConfig{ - AdminAddr: cdp.cfg.Envoy.AdminBindAddress, - AdminBindPort: cdp.cfg.Envoy.AdminBindPort, Logger: cdp.logger, LogJSON: cdp.cfg.Logging.LogJSON, BootstrapConfig: cfg, ExtraArgs: extraArgs, } } - -func (cdp *ConsulDataplane) GracefulShutdown(cancel context.CancelFunc) { - // If proxy lifecycle manager has not been initialized, cancel parent context and - // proceed to exit rather than attempting graceful shutdown - if cdp.lifecycleConfig != nil { - cdp.lifecycleConfig.gracefulShutdown() - } else { - cancel() - } -} diff --git a/pkg/consuldp/lifecycle.go b/pkg/consuldp/lifecycle.go deleted file mode 100644 index 68a6d600..00000000 --- a/pkg/consuldp/lifecycle.go +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: MPL-2.0 - -package consuldp - -import ( - "context" - "fmt" - "net/http" - "strconv" - "sync" - "time" - - "github.com/hashicorp/go-hclog" - - "github.com/hashicorp/consul-dataplane/pkg/envoy" -) - -const ( - // defaultLifecycleBindPort is the port which will serve the proxy lifecycle HTTP - // endpoints on the loopback interface. - defaultLifecycleBindPort = "20300" - cdpLifecycleBindAddr = "127.0.0.1" - cdpLifecycleUrl = "http://" + cdpLifecycleBindAddr - - defaultLifecycleShutdownPath = "/graceful_shutdown" -) - -// lifecycleConfig handles all configuration related to managing the Envoy proxy -// lifecycle, including exposing management controls via an HTTP server. -type lifecycleConfig struct { - logger hclog.Logger - - // consuldp proxy lifecycle management config - shutdownDrainListenersEnabled bool - shutdownGracePeriodSeconds int - gracefulPort int - gracefulShutdownPath string - - dumpEnvoyConfigOnExitEnabled bool - - // manager for controlling the Envoy proxy process - proxy envoy.ProxyManager - - // consuldp proxy lifecycle management server - lifecycleServer *http.Server - - // consuldp proxy lifecycle server control - errorExitCh chan struct{} - running bool - mu sync.Mutex -} - -func NewLifecycleConfig(cfg *Config, proxy envoy.ProxyManager) *lifecycleConfig { - return &lifecycleConfig{ - shutdownDrainListenersEnabled: cfg.Envoy.ShutdownDrainListenersEnabled, - shutdownGracePeriodSeconds: cfg.Envoy.ShutdownGracePeriodSeconds, - gracefulPort: cfg.Envoy.GracefulPort, - gracefulShutdownPath: cfg.Envoy.GracefulShutdownPath, - dumpEnvoyConfigOnExitEnabled: cfg.Envoy.DumpEnvoyConfigOnExitEnabled, - - proxy: proxy, - - errorExitCh: make(chan struct{}, 1), - mu: sync.Mutex{}, - } -} - -func (m *lifecycleConfig) startLifecycleManager(ctx context.Context) error { - m.mu.Lock() - defer m.mu.Unlock() - if m.running { - return nil - } - - m.logger = hclog.FromContext(ctx).Named("lifecycle") - m.running = true - go func() { - <-ctx.Done() - m.stopLifecycleServer() - }() - - // Start the server which will expose HTTP endpoints for proxy lifecycle - // management control - mux := http.NewServeMux() - - // Determine what HTTP endpoint paths to configure for the proxy lifecycle - // management server. These can be set as flags. - cdpLifecycleShutdownPath := defaultLifecycleShutdownPath - if m.gracefulShutdownPath != "" { - cdpLifecycleShutdownPath = m.gracefulShutdownPath - } - - // Set config to allow introspection of default path for testing - m.gracefulShutdownPath = cdpLifecycleShutdownPath - - m.logger.Info(fmt.Sprintf("setting graceful shutdown path: %s\n", cdpLifecycleShutdownPath)) - mux.HandleFunc(cdpLifecycleShutdownPath, m.gracefulShutdownHandler) - - // Determine what the proxy lifecycle management server bind port is. It can be - // set as a flag. - cdpLifecycleBindPort := defaultLifecycleBindPort - if m.gracefulPort != 0 { - cdpLifecycleBindPort = strconv.Itoa(m.gracefulPort) - } - m.lifecycleServer = &http.Server{ - Addr: fmt.Sprintf("%s:%s", cdpLifecycleBindAddr, cdpLifecycleBindPort), - Handler: mux, - } - - // Start the proxy lifecycle management server - go m.startLifecycleServer() - - return nil -} - -// startLifecycleServer starts the main proxy lifecycle management server that -// exposes HTTP endpoints for proxy lifecycle control. -func (m *lifecycleConfig) startLifecycleServer() { - m.logger.Info("starting proxy lifecycle management server", "address", m.lifecycleServer.Addr) - err := m.lifecycleServer.ListenAndServe() - if err != nil && err != http.ErrServerClosed { - m.logger.Error("failed to serve proxy lifecycle management requests", "error", err) - close(m.errorExitCh) - } -} - -// stopLifecycleServer stops the consul dataplane proxy lifecycle server -func (m *lifecycleConfig) stopLifecycleServer() { - m.mu.Lock() - defer m.mu.Unlock() - m.running = false - - if m.lifecycleServer != nil { - m.logger.Info("stopping the lifecycle management server") - err := m.lifecycleServer.Close() - if err != nil { - m.logger.Warn("error while closing lifecycle server", "error", err) - close(m.errorExitCh) - } - } -} - -// lifecycleServerExited is used to signal that the lifecycle server -// recieved a signal to initiate shutdown. -func (m *lifecycleConfig) lifecycleServerExited() <-chan struct{} { - return m.errorExitCh -} - -func (m *lifecycleConfig) gracefulShutdownHandler(rw http.ResponseWriter, _ *http.Request) { - // Kick off graceful shutdown in a separate goroutine to avoid blocking - // sending an HTTP response - go m.gracefulShutdown() - - // Return HTTP 200 Success - rw.WriteHeader(http.StatusOK) -} - -// gracefulShutdown blocks until shutdownGracePeriod seconds have elapsed, and, if -// configured, will drain inbound connections to Envoy listeners during that time. -func (m *lifecycleConfig) gracefulShutdown() { - m.logger.Info("initiating shutdown") - - // Create a context that will signal a cancel at the specified duration. - // TODO: should this use lifecycleManager ctx instead of context.Background? - timeout := time.Duration(m.shutdownGracePeriodSeconds) * time.Second - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - if m.dumpEnvoyConfigOnExitEnabled { - m.logger.Info("dumping Envoy config to disk") - err := m.proxy.DumpConfig() - if err != nil { - m.logger.Warn("error while attempting to dump Envoy config to disk", "error", err) - close(m.errorExitCh) - } - } - - m.logger.Info(fmt.Sprintf("waiting %d seconds before terminating dataplane proxy", m.shutdownGracePeriodSeconds)) - - var wg sync.WaitGroup - wg.Add(1) - - go func() { - defer wg.Done() - - // If shutdownDrainListenersEnabled, initiatie graceful shutdown of Envoy. - // We want to start draining connections from inbound listeners if - // configured, but still allow outbound traffic until gracefulShutdownPeriod - // has elapsed to facilitate a graceful application shutdown. - if m.shutdownDrainListenersEnabled { - err := m.proxy.Drain() - if err != nil { - m.logger.Warn("error while draining Envoy listeners", "error", err) - close(m.errorExitCh) - } - } - - // Block until context timeout has elapsed - <-ctx.Done() - - // Finish graceful shutdown, quit Envoy proxy - m.logger.Info("shutdown grace period timeout reached") - err := m.proxy.Quit() - if err != nil { - m.logger.Warn("error while shutting down Envoy", "error", err) - close(m.errorExitCh) - } - }() - - // Wait for context timeout to elapse - wg.Wait() -} diff --git a/pkg/consuldp/lifecycle_test.go b/pkg/consuldp/lifecycle_test.go deleted file mode 100644 index 6ead73da..00000000 --- a/pkg/consuldp/lifecycle_test.go +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright (c) HashiCorp, envoyAdminPort. -// SPDX-License-Identifier: MPL-2.0 - -package consuldp - -import ( - "context" - "fmt" - "io" - "log" - "net" - "net/http" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -var ( - envoyAdminPort = 19000 - envoyAdminAddr = "127.0.0.1" -) - -func TestLifecycleServerClosed(t *testing.T) { - cfg := Config{ - Envoy: &EnvoyConfig{ - AdminBindAddress: envoyAdminAddr, - AdminBindPort: envoyAdminPort, - }, - } - m := NewLifecycleConfig(&cfg, &mockProxy{}) - - ctx, cancel := context.WithCancel(context.Background()) - - _ = m.startLifecycleManager(ctx) - require.Equal(t, m.running, true) - cancel() - require.Eventually(t, func() bool { - return !m.running - }, time.Second*2, time.Second) - -} - -func TestLifecycleServerEnabled(t *testing.T) { - cases := map[string]struct { - shutdownDrainListenersEnabled bool - shutdownGracePeriodSeconds int - gracefulShutdownPath string - gracefulPort int - }{ - // TODO: testing the actual Envoy behavior here such as how open or new - // connections are handled should happpen in integration or acceptance tests - "connection draining disabled without grace period": { - // All inbound and outbound connections are terminated immediately. - }, - "connection draining enabled without grace period": { - // This should immediately send "Connection: close" to inbound HTTP1 - // connections, GOAWAY to inbound HTTP2, and terminate connections on - // request completion. Outbound connections should start being rejected - // immediately. - shutdownDrainListenersEnabled: true, - }, - "connection draining disabled with grace period": { - // This should immediately terminate any open inbound connections. - // Outbound connections should be allowed until the grace period has - // elapsed. - shutdownGracePeriodSeconds: 5, - }, - "connection draining enabled with grace period": { - // This should immediately send "Connection: close" to inbound HTTP1 - // connections, GOAWAY to inbound HTTP2, and terminate connections on - // request completion. - // Outbound connections should be allowed until the grace period has - // elapsed, then any remaining open connections should be closed and new - // outbound connections should start being rejected until pod termination. - shutdownDrainListenersEnabled: true, - shutdownGracePeriodSeconds: 5, - }, - "custom graceful shutdown path and port": { - shutdownDrainListenersEnabled: true, - shutdownGracePeriodSeconds: 5, - gracefulShutdownPath: "/quit-nicely", - // TODO: should this be random or use freeport? logic disallows passing - // zero value explicitly - gracefulPort: 23108, - }, - } - for name, c := range cases { - c := c - log.Printf("config = %v", c) - - t.Run(name, func(t *testing.T) { - // Add a small margin of error for assertions checking expected - // behavior within the shutdown grace period window. - shutdownTimeout := time.Duration((c.shutdownGracePeriodSeconds + 5)) * time.Second - - cfg := Config{ - Envoy: &EnvoyConfig{ - AdminBindAddress: envoyAdminAddr, - AdminBindPort: envoyAdminPort, - ShutdownDrainListenersEnabled: c.shutdownDrainListenersEnabled, - ShutdownGracePeriodSeconds: c.shutdownGracePeriodSeconds, - GracefulShutdownPath: c.gracefulShutdownPath, - GracefulPort: c.gracefulPort, - }, - } - m := NewLifecycleConfig(&cfg, &mockProxy{}) - - require.NotNil(t, m) - require.NotNil(t, m.proxy) - require.NotNil(t, m.errorExitCh) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - err := m.startLifecycleManager(ctx) - require.NoError(t, err) - - // Have consul-dataplane's lifecycle server start on an open port - // and figure out what port was used so we can make requests to it. - // Conveniently, this seems to wait until the server is ready for requests. - portCh := make(chan int, 1) - if c.gracefulPort == 0 { - m.lifecycleServer.Addr = "127.0.0.1:0" - } - m.lifecycleServer.BaseContext = func(l net.Listener) context.Context { - portCh <- l.Addr().(*net.TCPAddr).Port - return context.Background() - } - - var port int - select { - case port = <-portCh: - case <-time.After(5 * time.Second): - } - - // Check lifecycle server graceful port configuration - if c.gracefulPort != 0 { - require.Equal(t, c.gracefulPort, port, "failed to set lifecycle server port") - } else { - require.NotEqual(t, 0, port, "failed to figure out lifecycle server port") - } - log.Printf("port = %v\n", port) - - // Check lifecycle server graceful shutdown path configuration - if c.gracefulShutdownPath != "" { - require.Equal(t, m.gracefulShutdownPath, c.gracefulShutdownPath, "failed to set lifecycle server graceful shutdown HTTP endpoint path") - } - - // Check lifecycle server graceful shutdown path configuration - url := fmt.Sprintf("http://127.0.0.1:%d%s", port, m.gracefulShutdownPath) - log.Printf("sending request to %s\n", url) - - resp, err := http.Get(url) - - // HTTP handler is not blocking, so need to wait and check mock - // client for expected method calls to proxy manager within - // expected shutdown grace period plus a small margin of error. - if c.shutdownDrainListenersEnabled { - require.Eventually(t, func() bool { - return m.proxy.(*mockProxy).drainCalled == 1 - }, shutdownTimeout, time.Second, "Proxy.Drain() not called as expected") - } else { - require.Never(t, func() bool { - return m.proxy.(*mockProxy).drainCalled == 1 - }, shutdownTimeout, time.Second, "Proxy.Drain() called unexpectedly") - } - - require.Eventually(t, func() bool { - return m.proxy.(*mockProxy).quitCalled == 1 - }, shutdownTimeout, time.Second, "Proxy.Quit() not called as expected") - - // Expect that proxy is not forcefully killed as part of graceful shutdown. - require.Never(t, func() bool { - return m.proxy.(*mockProxy).killCalled == 1 - }, shutdownTimeout, time.Second, "Proxy.Kill() called unexpectedly") - - require.NoError(t, err) - require.NotNil(t, resp) - - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.NotNil(t, body) - }) - } -} - -type mockProxy struct { - runCalled int - drainCalled int - quitCalled int - killCalled int -} - -func (p *mockProxy) Run(ctx context.Context) error { - p.runCalled++ - return nil -} - -func (p *mockProxy) Drain() error { - p.drainCalled++ - return nil -} - -func (p *mockProxy) Quit() error { - p.quitCalled++ - return nil -} -func (p *mockProxy) Kill() error { - p.killCalled++ - return nil -} - -func (p *mockProxy) DumpConfig() error { - return nil -} diff --git a/pkg/consuldp/metrics.go b/pkg/consuldp/metrics.go index 313a182b..d11568e6 100644 --- a/pkg/consuldp/metrics.go +++ b/pkg/consuldp/metrics.go @@ -79,7 +79,7 @@ type metricsConfig struct { // merged metrics config promScrapeServer *http.Server // the server that will serve all the merged metrics - client httpClient // the client that will scrape the urls + client httpGetter // the client that will scrape the urls urls []string // the urls that will be scraped // consuldp metrics server diff --git a/pkg/consuldp/metrics_test.go b/pkg/consuldp/metrics_test.go index 3f443b69..a2f31d37 100644 --- a/pkg/consuldp/metrics_test.go +++ b/pkg/consuldp/metrics_test.go @@ -223,12 +223,6 @@ func (c *mockClient) Get(url string) (*http.Response, error) { }, nil } -func (c *mockClient) Post(url string, contentType string, body io.Reader) (*http.Response, error) { - return &http.Response{ - StatusCode: http.StatusOK, - }, nil -} - func makeFakeMetric(url string) string { return fmt.Sprintf(`fake_metric{url="%s"} 1\n`, url) } diff --git a/pkg/envoy/proxy.go b/pkg/envoy/proxy.go index 005dd883..465adfa0 100644 --- a/pkg/envoy/proxy.go +++ b/pkg/envoy/proxy.go @@ -8,7 +8,6 @@ import ( "errors" "fmt" "io" - "net/http" "os" "os/exec" "path/filepath" @@ -25,9 +24,7 @@ type state uint32 const ( stateInitial state = iota stateRunning - stateDraining stateStopped - stateExited ) const ( @@ -35,15 +32,6 @@ const ( logFormatJSON = `{"@timestamp":"%Y-%m-%dT%T.%fZ%z","@module":"envoy.%n","@level":"%l","@message":"%j","thread":%t}` ) -// ProxyManager is an interface for managing an Envoy proxy process. -type ProxyManager interface { - Run(ctx context.Context) error - Drain() error - Quit() error - Kill() error - DumpConfig() error -} - // Proxy manages an Envoy proxy process. // // TODO(NET-118): properly handle the Envoy process lifecycle, including @@ -51,9 +39,6 @@ type ProxyManager interface { type Proxy struct { cfg ProxyConfig - // client that will dial the managed Envoy proxy - client *http.Client - state state cmd *exec.Cmd exitedCh chan struct{} @@ -66,16 +51,6 @@ type ProxyConfig struct { // Defaults to whichever executable called envoy is found on $PATH. ExecutablePath string - // AdminAddr is the hostname or IP address of the Envoy admin interface. - // - // Defaults to 127.0.0.1 - AdminAddr string - - // AdminBindPort is the port of the Envoy admin interface. - // - // Defaults to 19000 - AdminBindPort int - // ExtraArgs are additional arguments that will be passed to Envoy. ExtraArgs []string @@ -125,12 +100,7 @@ func NewProxy(cfg ProxyConfig) (*Proxy, error) { cfg.EnvoyErrorStream = os.Stderr } return &Proxy{ - cfg: cfg, - - client: &http.Client{ - Timeout: 10 * time.Second, - }, - + cfg: cfg, exitedCh: make(chan struct{}), }, nil } @@ -154,14 +124,6 @@ func (p *Proxy) Run(ctx context.Context) error { // Run the Envoy process. p.cmd = p.buildCommand(ctx, configPath) - - // Start Envoy in its own process group to avoid directly receiving - // SIGTERM intended for consul-dataplane, let proxy manager handle - // graceful shutdown if configured. - p.cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - p.cfg.Logger.Debug("running envoy proxy", "command", strings.Join(p.cmd.Args, " ")) if err := p.cmd.Start(); err != nil { // Clean up the pipe if we weren't able to run Envoy. @@ -177,7 +139,7 @@ func (p *Proxy) Run(ctx context.Context) error { go func() { err := p.cmd.Wait() p.cfg.Logger.Info("envoy process exited", "error", err) - p.transitionState(stateRunning, stateExited) + p.transitionState(stateRunning, stateStopped) if err := cleanup(); err != nil { p.cfg.Logger.Error("failed to cleanup boostrap config", "error", err) } @@ -187,136 +149,22 @@ func (p *Proxy) Run(ctx context.Context) error { return nil } -// Start draining inbound connections to the Envoy proxy process. -// -// Note: the caller is responsible for ensuring Drain is not called concurrently -// with Run, as this is thread-unsafe. -func (p *Proxy) Drain() error { - envoyDrainListenersUrl := fmt.Sprintf("http://%s:%v/drain_listeners?inboundonly&graceful", p.cfg.AdminAddr, p.cfg.AdminBindPort) - switch p.getState() { - case stateExited: - // Nothing to do! - return nil - case stateStopped: - // Nothing to do! - return nil - case stateDraining: - // Nothing to do! - return nil - case stateRunning: - // Start draining inbound connections. - p.cfg.Logger.Debug("draining inbound connections to proxy") - p.transitionState(stateRunning, stateDraining) - _, err := p.client.Post(envoyDrainListenersUrl, "text/plain", nil) - if err != nil { - p.cfg.Logger.Error("envoy: failed to initiate listener drain", "error", err) - } - return err - default: - return errors.New("proxy must be running to drain connections") - } -} - -// Gracefully stop the Envoy proxy process. -// -// Note: the caller is responsible for ensuring Quit is not called concurrently -// with Run, as this is thread-unsafe. -func (p *Proxy) Quit() error { - envoyShutdownUrl := fmt.Sprintf("http://%s:%v/quitquitquit", p.cfg.AdminAddr, p.cfg.AdminBindPort) - - switch p.getState() { - case stateExited: - // Nothing to do! - return nil - case stateStopped: - // Nothing to do! - return nil - case stateDraining: - // Gracefully stop the process after draining connections. - p.cfg.Logger.Debug("stopping proxy connection draining, starting graceful shutdown of Envoy proxy") - p.transitionState(stateDraining, stateStopped) - _, err := p.client.Post(envoyShutdownUrl, "text/plain", nil) - if err != nil { - p.cfg.Logger.Error("envoy: failed to quit", "error", err) - } - return err - case stateRunning: - // Gracefully stop the process. - p.cfg.Logger.Debug("starting graceful shutdown of Envoy proxy") - p.transitionState(stateRunning, stateStopped) - _, err := p.client.Post(envoyShutdownUrl, "text/plain", nil) - if err != nil { - p.cfg.Logger.Error("envoy: failed to quit", "error", err) - } - return err - default: - return errors.New("proxy must be running to be stopped") - } -} - -// Forcefully kill the Envoy proxy process. +// Stop the Envoy proxy process. // // Note: the caller is responsible for ensuring Stop is not called concurrently // with Run, as this is thread-unsafe. -func (p *Proxy) Kill() error { +func (p *Proxy) Stop() error { switch p.getState() { - case stateExited: + case stateStopped: // Nothing to do! return nil - case stateStopped: - // Kill the process, may have failed to gracefully stop. - p.cfg.Logger.Debug("killing Envoy proxy process") - return p.cmd.Process.Kill() - case stateDraining: - // Kill the process, may have failed to gracefully stop. - p.cfg.Logger.Debug("killing Envoy proxy process") - return p.cmd.Process.Kill() case stateRunning: // Kill the process. - p.cfg.Logger.Debug("killing Envoy proxy process") + p.cfg.Logger.Debug("stopping envoy") return p.cmd.Process.Kill() default: - return errors.New("proxy must be running to be killed") - } -} - -// Dump Envoy config to disk. -func (p *Proxy) DumpConfig() error { - switch p.getState() { - case stateExited: - return errors.New("proxy must be running to dump config") - case stateStopped: - return errors.New("proxy must be running to dump config") - case stateDraining: - return p.dumpConfig() - case stateRunning: - return p.dumpConfig() - default: - return errors.New("proxy must be running to dump config") - } -} - -func (p *Proxy) dumpConfig() error { - envoyConfigDumpUrl := fmt.Sprintf("http://%s:%v/config_dump?include_eds", p.cfg.AdminAddr, p.cfg.AdminBindPort) - - rsp, err := p.client.Get(envoyConfigDumpUrl) - if err != nil { - p.cfg.Logger.Error("envoy: failed to dump config", "error", err) - return err - } - defer rsp.Body.Close() - - config, err := io.ReadAll(rsp.Body) - if err != nil { - p.cfg.Logger.Error("envoy: failed to dump config", "error", err) - return err - } - - if _, err := p.cfg.EnvoyOutputStream.Write(config); err != nil { - p.cfg.Logger.Error("envoy: failed to write config to output stream", "error", err) + return errors.New("proxy must be running to be stopped") } - - return err } // Exited returns a channel that is closed when the Envoy process exits. It can diff --git a/pkg/envoy/proxy_test.go b/pkg/envoy/proxy_test.go index 4ab46d5c..b680fcdd 100644 --- a/pkg/envoy/proxy_test.go +++ b/pkg/envoy/proxy_test.go @@ -38,7 +38,7 @@ func TestProxy(t *testing.T) { }) require.NoError(t, err) require.NoError(t, p.Run(context.Background())) - t.Cleanup(func() { _ = p.Kill() }) + t.Cleanup(func() { _ = p.Stop() }) // Read the output written by fake-envoy. It might take a while, so poll the // file for a couple of seconds. @@ -71,8 +71,8 @@ func TestProxy(t *testing.T) { // Check the process is still running. require.NoError(t, p.cmd.Process.Signal(syscall.Signal(0))) - // Ensure Kill kills and reaps the process. - require.NoError(t, p.Kill()) + // Ensure Stop kills and reaps the process. + require.NoError(t, p.Stop()) require.Eventually(t, func() bool { return p.cmd.Process.Signal(syscall.Signal(0)) == os.ErrProcessDone @@ -92,7 +92,7 @@ func TestProxy_Crash(t *testing.T) { }) require.NoError(t, err) require.NoError(t, p.Run(context.Background())) - t.Cleanup(func() { _ = p.Kill() }) + t.Cleanup(func() { _ = p.Stop() }) // Check the process is running. require.NoError(t, p.cmd.Process.Signal(syscall.Signal(0))) @@ -106,7 +106,7 @@ func TestProxy_Crash(t *testing.T) { t.Fatal("timeout waiting for Exited channel to be closed") } - require.Equal(t, stateExited, p.getState()) + require.Equal(t, stateStopped, p.getState()) } func TestProxy_ContextDone(t *testing.T) { @@ -123,7 +123,7 @@ func TestProxy_ContextDone(t *testing.T) { require.NoError(t, err) ctx, cancel := context.WithCancel(context.Background()) require.NoError(t, p.Run(ctx)) - t.Cleanup(func() { _ = p.Kill() }) + t.Cleanup(func() { _ = p.Stop() }) // Check the process is running. require.NoError(t, p.cmd.Process.Signal(syscall.Signal(0))) @@ -137,7 +137,7 @@ func TestProxy_ContextDone(t *testing.T) { t.Fatal("timeout waiting for Exited channel to be closed") } - require.Equal(t, stateExited, p.getState()) + require.Equal(t, stateStopped, p.getState()) } func testOutputPath() string {