forked from monzo/envoy-preflight
-
Notifications
You must be signed in to change notification settings - Fork 25
/
main.go
243 lines (205 loc) · 7.2 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
// Binary scuttle ...
package main
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"os/signal"
"strings"
"time"
"syscall"
"github.com/cenk/backoff"
"github.com/monzo/typhon"
)
// ServerInfo ... represents the response from Envoy's server info endpoint
type ServerInfo struct {
State string `json:"state"`
}
// Version ... Version of the binary, set to value like v1.0.0 in CI using ldflags
var Version = "vlocal"
var (
config ScuttleConfig
)
func main() {
config = getConfig()
log(fmt.Sprintf("Scuttle %s starting up, pid %d", Version, os.Getpid()))
if len(os.Args) < 2 {
log("No arguments received, exiting")
return
}
// Check if logging is enabled
if config.LoggingEnabled {
log("Logging is now enabled")
}
// If an envoy API was set and config is set to wait on envoy
if config.EnvoyAdminAPI != "" {
if blockingCtx := waitForEnvoy(); blockingCtx != nil {
<-blockingCtx.Done()
err := blockingCtx.Err()
if err == nil || errors.Is(err, context.Canceled) {
log("Blocking finished, Envoy has started")
} else if errors.Is(err, context.DeadlineExceeded) && config.QuitWithoutEnvoyTimeout > time.Duration(0) {
log("Blocking timeout reached and Envoy has not started, exiting scuttle")
os.Exit(1)
} else if errors.Is(err, context.DeadlineExceeded) {
log("Blocking timeout reached and Envoy has not started, continuing with passed in executable")
} else {
panic(err.Error())
}
}
}
// Find the executable the user wants to run
binary, err := exec.LookPath(os.Args[1])
if err != nil {
panic(err)
}
var proc *os.Process
stop := make(chan os.Signal, 2)
signal.Notify(stop, syscall.SIGINT) // Only listen to SIGINT until after child proc starts
// Pass signals to the child process
// This takes an OS signal and passes to the child process scuttle starts (proc)
go func() {
for sig := range stop {
if sig == syscall.SIGURG {
// SIGURG is used by Golang for it's own purposes, ignore it as these signals
// are most likely "junk" from Golang not from K8s/Docker
log(fmt.Sprintf("Received signal '%v', ignoring", sig))
} else if proc == nil {
// Signal received before the process even started. Let's just exit.
log(fmt.Sprintf("Received signal '%v', exiting", sig))
kill(1) // Attempt to stop sidecars if configured
} else {
// Proc is not null, so the child process is running and should also receive this signal
log(fmt.Sprintf("Received signal '%v', passing to child", sig))
proc.Signal(sig)
}
}
}()
// Start process passed in by user
proc, err = os.StartProcess(binary, os.Args[1:], &os.ProcAttr{
Files: []*os.File{os.Stdin, os.Stdout, os.Stderr},
})
if err != nil {
panic(err)
}
// Once child process starts, listen for any symbol and pass to the child proc
signal.Notify(stop)
state, err := proc.Wait()
if err != nil {
panic(err)
}
exitCode := state.ExitCode()
kill(exitCode)
os.Exit(exitCode)
}
func kill(exitCode int) {
var logLineUnformatted = "Kill received: (Action: %s, Reason: %s, Exit Code: %d)"
switch {
case config.EnvoyAdminAPI == "":
log(fmt.Sprintf(logLineUnformatted, "Skipping Istio kill", "ENVOY_ADMIN_API not set", exitCode))
case !strings.Contains(config.EnvoyAdminAPI, "127.0.0.1") && !strings.Contains(config.EnvoyAdminAPI, "localhost"):
log(fmt.Sprintf(logLineUnformatted, "Skipping Istio kill", "ENVOY_ADMIN_API is not a localhost or 127.0.0.1", exitCode))
case config.NeverKillIstio:
log(fmt.Sprintf(logLineUnformatted, "Skipping Istio kill", "NEVER_KILL_ISTIO is true", exitCode))
case config.NeverKillIstioOnFailure && exitCode != 0:
log(fmt.Sprintf(logLineUnformatted, "Skipping Istio kill", "NEVER_KILL_ISTIO_ON_FAILURE is true", exitCode))
os.Exit(exitCode)
case config.IstioQuitAPI == "":
// No istio API sent, fallback to Pkill method
log(fmt.Sprintf(logLineUnformatted, "Stopping Istio with pkill", "ISTIO_QUIT_API is not set", exitCode))
killGenericEndpoints()
killIstioWithPkill()
default:
// Stop istio using api
log(fmt.Sprintf(logLineUnformatted, "Stopping Istio with API", "ISTIO_QUIT_API is set", exitCode))
killGenericEndpoints()
killIstioWithAPI()
}
}
func killGenericEndpoints() {
if len(config.GenericQuitEndpoints) == 0 {
return
}
for _, genericEndpoint := range config.GenericQuitEndpoints {
genericEndpoint = strings.Trim(genericEndpoint, " ")
resp := typhon.NewRequest(context.Background(), "POST", genericEndpoint, nil).Send().Response()
if resp.Error != nil {
log(fmt.Sprintf("Sent POST to '%s', error: %s", genericEndpoint, resp.Error))
continue
}
log(fmt.Sprintf("Sent POST to '%s', status code: %d", genericEndpoint, resp.StatusCode))
}
}
func killIstioWithAPI() {
log(fmt.Sprintf("Stopping Istio using Istio API '%s' (intended for Istio >v1.2)", config.IstioQuitAPI))
url := fmt.Sprintf("%s/quitquitquit", config.IstioQuitAPI)
resp := typhon.NewRequest(context.Background(), "POST", url, nil).Send().Response()
responseSuccess := false
if resp.Error != nil {
log(fmt.Sprintf("Sent quitquitquit to Istio, error: %d", resp.Error))
} else {
log(fmt.Sprintf("Sent quitquitquit to Istio, status code: %d", resp.StatusCode))
responseSuccess = resp.StatusCode == 200
}
if !responseSuccess && config.IstioFallbackPkill {
log(fmt.Sprintf("quitquitquit failed, will attempt pkill method"))
killIstioWithPkill()
}
}
func killIstioWithPkill() {
log("Stopping Istio using pkill command (intended for Istio <v1.3)")
cmd := exec.Command("sh", "-c", "pkill -SIGINT pilot-agent")
_, err := cmd.Output()
if err == nil {
log("Process pilot-agent successfully stopped")
} else {
errorMessage := err.Error()
log("pilot-agent could not be stopped, err: " + errorMessage)
}
}
func waitForEnvoy() context.Context {
if config.StartWithoutEnvoy {
return nil
}
var blockingCtx context.Context
var cancel context.CancelFunc
if config.QuitWithoutEnvoyTimeout > time.Duration(0) {
blockingCtx, cancel = context.WithTimeout(context.Background(), config.QuitWithoutEnvoyTimeout)
} else if config.WaitForEnvoyTimeout > time.Duration(0) {
blockingCtx, cancel = context.WithTimeout(context.Background(), config.WaitForEnvoyTimeout)
} else {
blockingCtx, cancel = context.WithCancel(context.Background())
}
log("Blocking until Envoy starts")
go pollEnvoy(blockingCtx, cancel)
return blockingCtx
}
func pollEnvoy(ctx context.Context, cancel context.CancelFunc) {
url := fmt.Sprintf("%s/server_info", config.EnvoyAdminAPI)
pollCount := 0
b := backoff.NewExponentialBackOff()
// We wait forever for envoy to start. In practice k8s will kill the pod if we take too long.
b.MaxElapsedTime = config.WaitForEnvoyTimeout
if config.QuitWithoutEnvoyTimeout > time.Duration(0) {
b.MaxElapsedTime = config.QuitWithoutEnvoyTimeout
}
_ = backoff.Retry(func() error {
pollCount++
rsp := typhon.NewRequest(ctx, "GET", url, nil).Send().Response()
info := &ServerInfo{}
err := rsp.Decode(info)
if err != nil {
log(fmt.Sprintf("Polling Envoy (%d), error: %s", pollCount, err))
return err
}
if info.State != "LIVE" {
log(fmt.Sprintf("Polling Envoy (%d), status: Not ready yet", pollCount))
return errors.New("not live yet")
}
return nil
}, b)
// Notify the context that it's done, if it has not already been cancelled
cancel()
}