Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: stable deployments for spartan #9147

Merged
merged 6 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion scripts/ci/get_e2e_jobs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ cd "$(dirname "$0")"/../..
BRANCH=$1
LABELS=$2

# Define the allow_list
# Define the jobs that will run on every PR
allow_list=(
"e2e-2-pxes"
"e2e-authwit"
Expand All @@ -26,6 +26,7 @@ allow_list=(
"e2e-cheat-codes"
"e2e-prover-fake-proofs"
"e2e-lending-contract"
"kind-network-smoke"
)

# Add labels from input to the allow_list
Expand Down
13 changes: 10 additions & 3 deletions spartan/aztec-network/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,20 @@ http://{{ include "aztec-network.fullname" . }}-metrics.{{ .Release.Namespace }}
{{- end -}}

{{- define "aztec-network.otelCollectorMetricsEndpoint" -}}
http://metrics-opentelemetry-collector.metrics:4318/v1/metrics
{{- if .Values.telemetry.enabled -}}
{{- if .Values.telemetry.otelCollectorEndpoint -}}
{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/metrics
{{- end -}}
{{- end -}}
{{- end -}}

{{- define "aztec-network.otelCollectorTracesEndpoint" -}}
http://metrics-opentelemetry-collector.metrics:4318/v1/traces
{{- if .Values.telemetry.enabled -}}
{{- if .Values.telemetry.otelCollectorEndpoint -}}
{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/traces
{{- end -}}
{{- end -}}
{{- end -}}



{{- define "helpers.flag" -}}
Expand Down
5 changes: 4 additions & 1 deletion spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ spec:
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
- name: deploy-contracts
image: {{ .Values.images.aztec.image }}
command:
Expand All @@ -56,10 +58,11 @@ spec:
- name: boot-node
image: {{ .Values.images.aztec.image }}
command:
# sleep to allow dns name to be resolvable
[
"/bin/bash",
"-c",
"source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer --pxe",
"sleep 10 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer --pxe",
]
livenessProbe:
exec:
Expand Down
2 changes: 1 addition & 1 deletion spartan/aztec-network/templates/l2-contracts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ metadata:
data:
deploy-contracts.sh: |
#!/bin/sh
set -e
set -ex

# Run the deploy-l1-contracts command and capture the output
output=$(node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-contracts --validators {{ join "," .Values.validator.validatorAddresses | quote }})
Expand Down
25 changes: 25 additions & 0 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@ spec:
app: prover-node
spec:
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl -s -X POST -H 'Content-Type: application/json' \
-d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \
{{ include "aztec-network.ethereumHost" . }} | grep -q anvil; do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are likely to change node in the future, the grep "anvil" here should probably also be configurable

echo "Waiting for Ethereum node..."
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done
- name: configure-prover-env
image: "{{ .Values.images.aztec.image }}"
imagePullPolicy: {{ .Values.images.aztec.pullPolicy }}
Expand All @@ -33,6 +57,7 @@ spec:
env:
- name: ETHEREUM_HOST
value: {{ include "aztec-network.ethereumHost" . | quote }}

containers:
- name: prover-node
image: "{{ .Values.images.aztec.image }}"
Expand Down
11 changes: 11 additions & 0 deletions spartan/aztec-network/templates/pxe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ spec:
{{- include "aztec-network.selectorLabels" . | nindent 8 }}
app: pxe
spec:
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done
containers:
- name: pxe
image: "{{ .Values.images.aztec.image }}"
Expand Down
27 changes: 26 additions & 1 deletion spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,31 @@ spec:
# We expect the validators to have already been added to the smart contract by this point - but this container still needs
# to be run in order to get the values
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl -s -X POST -H 'Content-Type: application/json' \
-d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \
{{ include "aztec-network.ethereumHost" . }} | grep -q anvil; do
echo "Waiting for Ethereum node..."
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done

- name: configure-validator-env
image: "{{ .Values.images.aztec.image }}"
imagePullPolicy: {{ .Values.images.aztec.pullPolicy }}
Expand Down Expand Up @@ -50,7 +75,7 @@ spec:
command:
- "/bin/bash"
- "-c"
- "source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer"
- "sleep 10 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer"
Copy link
Collaborator

@ludamad ludamad Oct 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally all sleeps would be wait loops otherwise we invite mysterious flakes (and has been the culprit behind many so far), but if it gets us unblocked can be iterated on

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is to allow the k8s DNS name to get set up. It runs before the node even starts, so shouldn't invite too much flake.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still ideal to query k8s dns if possible, but yeah nbd

volumeMounts:
- name: shared-volume
mountPath: /shared
Expand Down
26 changes: 8 additions & 18 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ network:
public: false
enableBots: true

telemetry:
enabled: false
otelCollectorEndpoint:

images:
aztec:
image: aztecprotocol/aztec
Expand Down Expand Up @@ -31,13 +35,14 @@ bootNode:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "4"
storage: "8Gi"

validator:
replicas: 1
validatorKeys:
- 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80
validatorAddresses:
- 0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266
service:
p2pPort: 40400
nodePort: 8080
Expand All @@ -54,9 +59,6 @@ validator:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "28Gi"
cpu: "7"
storage: "8Gi"

proverNode:
Expand All @@ -71,9 +73,6 @@ proverNode:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "120Gi"
cpu: "15"
storage: "8Gi"

pxe:
Expand All @@ -93,9 +92,6 @@ pxe:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"

bot:
logLevel: "debug"
Expand Down Expand Up @@ -124,9 +120,6 @@ bot:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"

ethereum:
replicas: 1
Expand All @@ -152,7 +145,4 @@ ethereum:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"
storage: "8Gi"
10 changes: 10 additions & 0 deletions spartan/aztec-network/values/16-validators.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
##########
# BEWARE #
##########
# You need to deploy the metrics helm chart before using this values file.
# head to spartan/metrics and run `./install.sh`
# (then `./forward.sh` if you want to see it)
telemetry:
enabled: true
otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318

bootNode:
sequencer:
minTxsPerBlock: 4
Expand Down
10 changes: 10 additions & 0 deletions spartan/aztec-network/values/48-validators.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
##########
# BEWARE #
##########
# You need to deploy the metrics helm chart before using this values file.
# head to spartan/metrics and run `./install.sh`
# (then `./forward.sh` if you want to see it)
telemetry:
enabled: true
otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318

validator:
debug: "aztec:*,-aztec:avm_simulator:*,-aztec:libp2p_service"
replicas: 48
Expand Down
4 changes: 2 additions & 2 deletions yarn-project/end-to-end/Earthfile
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ e2e-cli-wallet:
LOCALLY
RUN COMPOSE_FILE=scripts/docker-compose-wallet.yml ./scripts/e2e_compose_test.sh e2e_cli_wallet

network-smoke:
kind-network-smoke:
ARG values_file
LOCALLY
RUN NAMESPACE=smoke FRESH_INSTALL=true VALUES_FILE=${values_file:-default.yaml} ./scripts/network_test.sh ./src/spartan/smoke.test.ts

network-transfer:
kind-network-transfer:
ARG values_file
LOCALLY
RUN NAMESPACE=transfer FRESH_INSTALL=true VALUES_FILE=${values_file:-default.yaml} ./scripts/network_test.sh ./src/spartan/transfer.test.ts
22 changes: 19 additions & 3 deletions yarn-project/end-to-end/scripts/network_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Main positional parameter
TEST="$1"

REPO=$(git rev-parse --show-toplevel)
if [ "$(uname)" = "Linux" ] && [ "$(uname -m)" = "x86_64" ]; then
"$REPO"/spartan/scripts/setup_local_k8s.sh
else
echo "Not on x64 Linux, not installing k8s and helm."
fi

# Default values for environment variables
VALUES_FILE="${VALUES_FILE:-default.yaml}"
CHAOS_VALUES="${CHAOS_VALUES:-}"
Expand Down Expand Up @@ -55,12 +62,13 @@ function show_status_until_pxe_ready() {
}

show_status_until_pxe_ready &
SHOW_STATUS_PID=$!

# Install the Helm chart
helm upgrade --install spartan "$(git rev-parse --show-toplevel)/spartan/aztec-network/" \
helm upgrade --install spartan "$REPO/spartan/aztec-network/" \
--namespace "$NAMESPACE" \
--create-namespace \
--values "$(git rev-parse --show-toplevel)/spartan/aztec-network/values/$VALUES_FILE" \
--values "$REPO/spartan/aztec-network/values/$VALUES_FILE" \
--set images.aztec.image="aztecprotocol/aztec:$AZTEC_DOCKER_TAG" \
--set ingress.enabled=true \
--wait \
Expand All @@ -71,8 +79,16 @@ kubectl wait pod -l app==pxe --for=condition=Ready -n "$NAMESPACE" --timeout=10m

# tunnel in to get access directly to our PXE service in k8s
(kubectl port-forward --namespace $NAMESPACE svc/spartan-aztec-network-pxe 9082:8080 2>/dev/null >/dev/null || true) &
PORT_FORWARD_PID=$!

cleanup() {
echo "Cleaning up..."
kill $PORT_FORWARD_PID || true
kill $SHOW_STATUS_PID || true
}

trap cleanup EXIT SIGINT SIGTERM

# run our test in the host network namespace (so we can access the above with localhost)
docker run --rm --network=host \
-e PXE_URL=http://localhost:9082 \
-e DEBUG="aztec:*" \
Expand Down
Loading