Skip to content

Commit

Permalink
Add readiness probe (#1047)
Browse files Browse the repository at this point in the history
Problem: The NKG Pod would report Ready as soon as it started, which could lead to traffic being sent too soon before nginx was actually configured.

Solution: Add a readiness probe that will report Ready once the controller has written its first config to nginx (or has nothing to do on startup).

Also changed the metrics "disable" helm flag to "enable" to be consistent and easier to read.
  • Loading branch information
sjberman authored Sep 15, 2023
1 parent 7871727 commit 8b2900e
Show file tree
Hide file tree
Showing 19 changed files with 419 additions and 127 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ build-nkg-debug-image: debug-build build-nkg-image ## Build NKG image with debug
generate-manifests: ## Generate manifests using Helm.
cp $(CHART_DIR)/crds/* $(MANIFEST_DIR)/crds/
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) $(HELM_TEMPLATE_EXTRA_ARGS_FOR_ALL_MANIFESTS_FILE) -n nginx-gateway | cat $(strip $(MANIFEST_DIR))/namespace.yaml - > $(strip $(MANIFEST_DIR))/nginx-gateway.yaml
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set metrics.disable=true -n nginx-gateway -s templates/deployment.yaml > conformance/provisioner/static-deployment.yaml
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set metrics.enable=false -n nginx-gateway -s templates/deployment.yaml > conformance/provisioner/static-deployment.yaml
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/loadbalancer.yaml
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set service.annotations.'service\.beta\.kubernetes\.io\/aws-load-balancer-type'="nlb" -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/loadbalancer-aws-nlb.yaml
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set service.type=NodePort --set service.externalTrafficPolicy="" -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/nodeport.yaml
Expand Down
30 changes: 29 additions & 1 deletion cmd/gateway/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"go.uber.org/zap"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"sigs.k8s.io/controller-runtime/pkg/log"
ctlrZap "sigs.k8s.io/controller-runtime/pkg/log/zap"

"github.com/nginxinc/nginx-kubernetes-gateway/internal/mode/provisioner"
Expand Down Expand Up @@ -42,11 +43,16 @@ var (
updateGCStatus bool
disableMetrics bool
metricsSecure bool
disableHealth bool

metricsListenPort = intValidatingValue{
validator: validatePort,
value: 9113,
}
healthListenPort = intValidatingValue{
validator: validatePort,
value: 8081,
}
gateway = namespacedNameValue{}
configName = stringValidatingValue{
validator: validateResourceName,
Expand Down Expand Up @@ -92,6 +98,11 @@ func createStaticModeCommand() *cobra.Command {
"commit", commit,
"date", date,
)
log.SetLogger(logger)

if err := ensureNoPortCollisions(metricsListenPort.value, healthListenPort.value); err != nil {
return fmt.Errorf("error validating ports: %w", err)
}

podIP := os.Getenv("POD_IP")
if err := validateIP(podIP); err != nil {
Expand Down Expand Up @@ -126,6 +137,10 @@ func createStaticModeCommand() *cobra.Command {
GatewayNsName: gwNsName,
UpdateGatewayClassStatus: updateGCStatus,
MetricsConfig: metricsConfig,
HealthConfig: config.HealthConfig{
Enabled: !disableHealth,
Port: healthListenPort.value,
},
}

if err := static.StartManager(conf); err != nil {
Expand Down Expand Up @@ -171,7 +186,7 @@ func createStaticModeCommand() *cobra.Command {
cmd.Flags().Var(
&metricsListenPort,
"metrics-port",
"Set the port where the metrics are exposed. Format: [1023 - 65535]",
"Set the port where the metrics are exposed. Format: [1024 - 65535]",
)

cmd.Flags().BoolVar(
Expand All @@ -182,6 +197,19 @@ func createStaticModeCommand() *cobra.Command {
" Please note that this endpoint will be secured with a self-signed certificate.",
)

cmd.Flags().BoolVar(
&disableHealth,
"health-disable",
false,
"Disable running the health probe server.",
)

cmd.Flags().Var(
&healthListenPort,
"health-port",
"Set the port where the health probe server is exposed. Format: [1024 - 65535]",
)

return cmd
}

Expand Down
29 changes: 29 additions & 0 deletions cmd/gateway/commands_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ func TestStaticModeCmdFlagValidation(t *testing.T) {
"--metrics-port=9114",
"--metrics-disable",
"--metrics-secure-serving",
"--health-port=8081",
"--health-disable",
},
wantErr: false,
},
Expand Down Expand Up @@ -214,6 +216,33 @@ func TestStaticModeCmdFlagValidation(t *testing.T) {
expectedErrPrefix: `invalid argument "999" for "--metrics-secure-serving" flag: strconv.ParseBool:` +
` parsing "999": invalid syntax`,
},
{
name: "health-port is invalid type",
args: []string{
"--health-port=invalid", // not an int
},
wantErr: true,
expectedErrPrefix: `invalid argument "invalid" for "--health-port" flag: failed to parse int value:` +
` strconv.ParseInt: parsing "invalid": invalid syntax`,
},
{
name: "health-port is outside of range",
args: []string{
"--health-port=999", // outside of range
},
wantErr: true,
expectedErrPrefix: `invalid argument "999" for "--health-port" flag:` +
` port outside of valid port range [1024 - 65535]: 999`,
},
{
name: "health-disable is not a bool",
args: []string{
"--health-disable=999", // not a bool
},
wantErr: true,
expectedErrPrefix: `invalid argument "999" for "--health-disable" flag: strconv.ParseBool:` +
` parsing "999": invalid syntax`,
},
}

for _, test := range tests {
Expand Down
14 changes: 14 additions & 0 deletions cmd/gateway/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,17 @@ func validatePort(port int) error {
}
return nil
}

// ensureNoPortCollisions checks if the same port has been defined multiple times
func ensureNoPortCollisions(ports ...int) error {
seen := make(map[int]struct{})

for _, port := range ports {
if _, ok := seen[port]; ok {
return fmt.Errorf("port %d has been defined multiple times", port)
}
seen[port] = struct{}{}
}

return nil
}
7 changes: 7 additions & 0 deletions cmd/gateway/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,10 @@ func TestValidatePort(t *testing.T) {
})
}
}

func TestEnsureNoPortCollisions(t *testing.T) {
g := NewWithT(t)

g.Expect(ensureNoPortCollisions(9113, 8081)).To(Succeed())
g.Expect(ensureNoPortCollisions(9113, 9113)).ToNot(Succeed())
}
10 changes: 10 additions & 0 deletions conformance/provisioner/static-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ spec:
- --gatewayclass=nginx
- --config=nginx-gateway-config
- --metrics-disable
- --health-port=8081
env:
- name: POD_IP
valueFrom:
Expand All @@ -41,6 +42,15 @@ spec:
image: ghcr.io/nginxinc/nginx-kubernetes-gateway:edge
imagePullPolicy: Always
name: nginx-gateway
ports:
- name: health
containerPort: 8081
readinessProbe:
httpGet:
path: /readyz
port: health
initialDelaySeconds: 3
periodSeconds: 1
securityContext:
allowPrivilegeEscalation: false
capabilities:
Expand Down
47 changes: 25 additions & 22 deletions deploy/helm-chart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,25 +132,28 @@ kubectl delete -f https://github.com/kubernetes-sigs/gateway-api/releases/downlo

The following tables lists the configurable parameters of the NGINX Kubernetes Gateway chart and their default values.

| Parameter | Description | Default Value |
|--------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
| `nginxGateway.image.repository` | The repository for the NGINX Kubernetes Gateway image. | ghcr.io/nginxinc/nginx-kubernetes-gateway |
| `nginxGateway.image.tag` | The tag for the NGINX Kubernetes Gateway image. | edge |
| `nginxGateway.image.pullPolicy` | The `imagePullPolicy` for the NGINX Kubernetes Gateway image. | Always |
| `nginxGateway.gatewayClassName` | The name of the GatewayClass for the NGINX Kubernetes Gateway deployment. | nginx |
| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | gateway.nginx.org/nginx-gateway-controller |
| `nginxGateway.kind` | The kind of the NGINX Kubernetes Gateway installation - currently, only Deployment is supported. | deployment |
|`nginxGateway.config` | The dynamic configuration for the control plane that is contained in the NginxGateway resource | [See nginxGateway.config section](values.yaml) |
| `nginx.image.repository` | The repository for the NGINX image. | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx |
| `nginx.image.tag` | The tag for the NGINX image. | edge |
| `nginx.image.pullPolicy` | The `imagePullPolicy` for the NGINX image. | Always |
| `serviceAccount.annotations` | The `annotations` for the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | {} |
| `serviceAccount.name` | Name of the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | Autogenerated |
| `service.create` | Creates a service to expose the NGINX Kubernetes Gateway pods. | true |
| `service.type` | The type of service to create for the NGINX Kubernetes Gateway. | Loadbalancer |
| `service.externalTrafficPolicy` | The `externalTrafficPolicy` of the service. The value `Local` preserves the client source IP. | Local |
| `service.annotations` | The `annotations` of the NGINX Kubernetes Gateway service. | {} |
| `service.ports` | A list of ports to expose through the NGINX Kubernetes Gateway service. Update it to match the listener ports from your Gateway resource. Follows the conventional Kubernetes yaml syntax for service ports. | [ port: 80, targetPort: 80, protocol: TCP, name: http; port: 443, targetPort: 443, protocol: TCP, name: https ] |
| `metrics.disable` | Disable exposing metrics in the Prometheus format. |false |
| `metrics.port` | Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535] |9113 |
| `metrics.secure` | Enable serving metrics via https. By default metrics are served via http. Please note that this endpoint will be secured with a self-signed certificate. |false |
| Parameter | Description | Default Value |
|-----------|-------------|---------------|
| `nginxGateway.image.repository` | The repository for the NGINX Kubernetes Gateway image. | ghcr.io/nginxinc/nginx-kubernetes-gateway |
| `nginxGateway.image.tag` | The tag for the NGINX Kubernetes Gateway image. | edge |
| `nginxGateway.image.pullPolicy` | The `imagePullPolicy` for the NGINX Kubernetes Gateway image. | Always |
| `nginxGateway.gatewayClassName` | The name of the GatewayClass for the NGINX Kubernetes Gateway deployment. | nginx |
| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | gateway.nginx.org/nginx-gateway-controller |
| `nginxGateway.kind` | The kind of the NGINX Kubernetes Gateway installation - currently, only Deployment is supported. | deployment |
| `nginxGateway.config` | The dynamic configuration for the control plane that is contained in the NginxGateway resource. | [See nginxGateway.config section](values.yaml) |
| `nginxGateway.readinessProbe.enable` | Enable the /readyz endpoint on the control plane. | true |
| `nginxGateway.readinessProbe.port` | Port in which the readiness endpoint is exposed. | 8081 |
| `nginxGateway.readinessProbe.initialDelaySeconds` | The number of seconds after the Pod has started before the readiness probes are initiated. | 3 |
| `nginx.image.repository` | The repository for the NGINX image. | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx |
| `nginx.image.tag` | The tag for the NGINX image. | edge |
| `nginx.image.pullPolicy` | The `imagePullPolicy` for the NGINX image. | Always |
| `serviceAccount.annotations` | The `annotations` for the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | {} |
| `serviceAccount.name` | Name of the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | Autogenerated |
| `service.create` | Creates a service to expose the NGINX Kubernetes Gateway pods. | true |
| `service.type` | The type of service to create for the NGINX Kubernetes Gateway. | Loadbalancer |
| `service.externalTrafficPolicy` | The `externalTrafficPolicy` of the service. The value `Local` preserves the client source IP. | Local |
| `service.annotations` | The `annotations` of the NGINX Kubernetes Gateway service. | {} |
| `service.ports` | A list of ports to expose through the NGINX Kubernetes Gateway service. Update it to match the listener ports from your Gateway resource. Follows the conventional Kubernetes yaml syntax for service ports. | [ port: 80, targetPort: 80, protocol: TCP, name: http; port: 443, targetPort: 443, protocol: TCP, name: https ] |
| `metrics.enable` | Enable exposing metrics in the Prometheus format. | true |
| `metrics.port` | Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535] | 9113 |
| `metrics.secure` | Enable serving metrics via https. By default metrics are served via http. Please note that this endpoint will be secured with a self-signed certificate. | false |
21 changes: 18 additions & 3 deletions deploy/helm-chart/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
metadata:
labels:
{{- include "nginx-gateway.selectorLabels" . | nindent 8 }}
{{- if not .Values.metrics.disable }}
{{- if .Values.metrics.enable }}
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "{{ .Values.metrics.port }}"
Expand All @@ -31,14 +31,19 @@ spec:
- --gateway-ctlr-name={{ .Values.nginxGateway.gatewayControllerName }}
- --gatewayclass={{ .Values.nginxGateway.gatewayClassName }}
- --config={{ include "nginx-gateway.config-name" . }}
{{- if not .Values.metrics.disable }}
{{- if .Values.metrics.enable }}
- --metrics-port={{ .Values.metrics.port }}
{{- if .Values.metrics.secure }}
- --metrics-secure-serving
{{- end }}
{{- else }}
- --metrics-disable
{{- end }}
{{- if .Values.nginxGateway.readinessProbe.enable }}
- --health-port={{ .Values.nginxGateway.readinessProbe.port }}
{{- else }}
- --health-disable
{{- end }}
env:
- name: POD_IP
valueFrom:
Expand All @@ -51,11 +56,21 @@ spec:
image: {{ .Values.nginxGateway.image.repository }}:{{ .Values.nginxGateway.image.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.nginxGateway.image.pullPolicy }}
name: nginx-gateway
{{- if not .Values.metrics.disable }}
ports:
{{- if .Values.metrics.enable }}
- name: metrics
containerPort: {{ .Values.metrics.port }}
{{- end }}
{{- if .Values.nginxGateway.readinessProbe.enable }}
- name: health
containerPort: {{ .Values.nginxGateway.readinessProbe.port }}
readinessProbe:
httpGet:
path: /readyz
port: health
initialDelaySeconds: {{ .Values.nginxGateway.readinessProbe.initialDelaySeconds }}
periodSeconds: 1
{{- end }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
Expand Down
Loading

0 comments on commit 8b2900e

Please sign in to comment.