Skip to content

Commit

Permalink
chore: add equinix e2e-tests
Browse files Browse the repository at this point in the history
Add equinix e2e-tests.

Signed-off-by: Noel Georgi <[email protected]>
  • Loading branch information
frezbo committed Apr 2, 2024
1 parent 117e605 commit f515741
Show file tree
Hide file tree
Showing 14 changed files with 293 additions and 14 deletions.
71 changes: 71 additions & 0 deletions .drone.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ local creds_env_vars = {
// TODO(andrewrynhard): Rename this to the GCP convention.
GCE_SVC_ACCT: { from_secret: 'gce_svc_acct' },
PACKET_AUTH_TOKEN: { from_secret: 'packet_auth_token' },
EM_API_TOKEN: { from_secret: 'em_api_token' },
EM_PROJECT_ID: { from_secret: 'em_project_id' },
GITHUB_TOKEN: { from_secret: 'ghcr_token' }, // Use GitHub API token to avoid rate limiting on CAPI -> GitHub calls.
};

Expand Down Expand Up @@ -917,10 +919,77 @@ local E2EAzure() =

targets;

local E2EEquinixMetal() =
local depends_on = [load_artifacts];

local e2e_equinixmetal_prepare = Step(
'e2e-equinix-metal-prepare',
depends_on=depends_on,
environment=creds_env_vars {
IMAGE_REGISTRY: local_registry,
},
extra_commands=[
'az login --service-principal -u "$${AZURE_CLIENT_ID}" -p "$${AZURE_CLIENT_SECRET}" --tenant "$${AZURE_TENANT_ID}"',
'az storage blob upload-batch --overwrite -s _out --pattern "e2e-equinix-metal-generated/*" -d "${CI_COMMIT_SHA}${DRONE_TAG//./-}"',
]
);

local tf_apply = TriggerDownstream(
'tf-apply',
'e2e-talos-tf-apply',
['siderolabs/contrib@main'],
params=[
'BUCKET_PATH=${CI_COMMIT_SHA}${DRONE_TAG//./-}',
'TYPE=equinix-metal',
],
depends_on=[e2e_equinixmetal_prepare],
);

local e2e_equinixmetal_tf_apply_post = Step(
'e2e-equinix-metal-download-artifacts',
with_make=false,
environment=creds_env_vars,
extra_commands=[
'az login --service-principal -u "$${AZURE_CLIENT_ID}" -p "$${AZURE_CLIENT_SECRET}" --tenant "$${AZURE_TENANT_ID}"',
'az storage blob download -f _out/e2e-equinix-metal-talosconfig -n e2e-equinix-metal-talosconfig -c ${CI_COMMIT_SHA}${DRONE_TAG//./-}',
'az storage blob download -f _out/e2e-equinix-metal-kubeconfig -n e2e-equinix-metal-kubeconfig -c ${CI_COMMIT_SHA}${DRONE_TAG//./-}',
],
depends_on=[tf_apply],
);

local e2e_equinixmetal = Step(
'e2e-equinix-metal',
depends_on=[e2e_equinixmetal_tf_apply_post],
environment=creds_env_vars {}
);

local tf_destroy = TriggerDownstream(
'tf-destroy',
'e2e-talos-tf-destroy',
['siderolabs/contrib@main'],
params=[
'BUCKET_PATH=${CI_COMMIT_SHA}${DRONE_TAG//./-}',
'TYPE=equinix-metal',
'REFRESH_ON_DESTROY=false', // it's safe to skip refresh on destroy for EM, since we don't read any data from Equinix.
],
depends_on=[e2e_equinixmetal],
when={
status: [
'failure',
'success',
],
},
);

local targets = [e2e_equinixmetal_prepare, tf_apply, e2e_equinixmetal_tf_apply_post, e2e_equinixmetal, tf_destroy];

targets;


local e2e_aws = [step for step in E2EAWS('default')];
local e2e_aws_nvidia_oss = [step for step in E2EAWS('nvidia-oss')];
local e2e_azure = [step for step in E2EAzure()];
local e2e_equinixmetal = [step for step in E2EEquinixMetal()];
local e2e_gcp = Step('e2e-gcp', depends_on=[e2e_capi], environment=creds_env_vars);

local e2e_trigger(names) = {
Expand All @@ -936,11 +1005,13 @@ local e2e_pipelines = [
Pipeline('e2e-aws', default_pipeline_steps + e2e_aws) + e2e_trigger(['e2e-aws']),
Pipeline('e2e-aws-nvidia-oss', default_pipeline_steps + e2e_aws_nvidia_oss) + e2e_trigger(['e2e-aws-nvidia-oss']),
Pipeline('e2e-azure', default_pipeline_steps + e2e_azure) + e2e_trigger(['e2e-azure']),
Pipeline('e2e-equinix-metal', default_pipeline_steps + e2e_equinixmetal) + e2e_trigger(['e2e-equinix-metal']),
Pipeline('e2e-gcp', default_pipeline_steps + [capi_docker, e2e_capi, e2e_gcp]) + e2e_trigger(['e2e-gcp']),

// cron pipelines, triggered on schedule events
Pipeline('cron-e2e-aws', default_pipeline_steps + e2e_aws, [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-e2e-azure', default_pipeline_steps + e2e_azure, [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-e2e-equinix-metal', default_pipeline_steps + e2e_equinixmetal, [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-e2e-gcp', default_pipeline_steps + [capi_docker, e2e_capi, e2e_gcp], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
];

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ARTIFACTS := _out
TOOLS ?= ghcr.io/siderolabs/tools:v1.7.0-alpha.0-12-gdfee984

PKGS_PREFIX ?= ghcr.io/siderolabs
PKGS ?= v1.7.0-alpha.0-42-gb65c085
PKGS ?= v1.7.0-alpha.0-46-gaefe000
EXTRAS ?= v1.7.0-alpha.0-3-g47bb718

PKG_FHS ?= $(PKGS_PREFIX)/fhs:$(PKGS)
Expand Down
2 changes: 1 addition & 1 deletion cmd/talosctl/pkg/talos/action/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func (a *nodeTracker) runPostCheckWithRetry(preActionBootID string) error {

// handle retryable errors
statusCode := client.StatusCode(err)
if errors.Is(err, io.EOF) || statusCode == codes.Unavailable {
if errors.Is(err, io.EOF) || statusCode == codes.Unavailable || statusCode == codes.Canceled {
a.update(reporter.Update{
Message: "unavailable, retrying...",
Status: reporter.StatusError,
Expand Down
9 changes: 8 additions & 1 deletion hack/test/e2e-aws-prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,14 @@ jq --null-input \
--arg CLUSTER_NAME "${NAME_PREFIX}" \
--arg TALOS_VERSION_CONTRACT "${TALOS_VERSION}" \
--arg KUBERNETES_VERSION "${KUBERNETES_VERSION}" \
'{worker_group: $WORKER_GROUP, ami_id: $AMI_ID, nvidia_ami_id: $NVIDIA_AMI_ID, cluster_name: $CLUSTER_NAME, talos_version_contract: $TALOS_VERSION_CONTRACT, kubernetes_version: $KUBERNETES_VERSION}' \
'{
worker_group: $WORKER_GROUP,
ami_id: $AMI_ID,
nvidia_ami_id: $NVIDIA_AMI_ID,
cluster_name: $CLUSTER_NAME,
talos_version_contract: $TALOS_VERSION_CONTRACT,
kubernetes_version: $KUBERNETES_VERSION
}' \
| jq -f hack/test/tfvars/aws.jq > "${ARTIFACTS}/e2e-aws-generated/vars.json"

cp hack/test/tfvars/*.yaml "${ARTIFACTS}/e2e-aws-generated"
7 changes: 6 additions & 1 deletion hack/test/e2e-azure-prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@ jq --null-input \
--arg CLUSTER_NAME "${NAME_PREFIX}" \
--arg TALOS_VERSION_CONTRACT "${TALOS_VERSION}" \
--arg KUBERNETES_VERSION "${KUBERNETES_VERSION}" \
'{vm_os_id: $VM_OS_ID, cluster_name: $CLUSTER_NAME, talos_version_contract: $TALOS_VERSION_CONTRACT, kubernetes_version: $KUBERNETES_VERSION}' \
'{
vm_os_id: $VM_OS_ID,
cluster_name: $CLUSTER_NAME,
talos_version_contract: $TALOS_VERSION_CONTRACT,
kubernetes_version: $KUBERNETES_VERSION
}' \
| jq -f hack/test/tfvars/azure.jq > "${ARTIFACTS}/e2e-azure-generated/vars.json"

cp hack/test/tfvars/*.yaml "${ARTIFACTS}/e2e-azure-generated"
143 changes: 143 additions & 0 deletions hack/test/e2e-equinix-metal-prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env bash

set -eou pipefail

source ./hack/test/e2e.sh

export AWS_DEFAULT_REGION="us-east-1"
export BUCKET_NAME="talos-ci-e2e"

export EQUINIX_METRO="dc"

TEMP_DIR=$(mktemp -d)

INSTALLER_IMAGE_NAME=$(cut -d ":" -f1 <<< "${INSTALLER_IMAGE}")
INSTALLER_IMAGE_TAG=$(cut -d ":" -f2 <<< "${INSTALLER_IMAGE}")

rm -rf "${ARTIFACTS}/v2"

function cleanup() {
rm -rf "${TEMP_DIR}"
}

trap cleanup SIGINT EXIT

function generate_ipxe_script() {
CONSOLE="console=ttyS1,115200n8"

[[ "${1}" == "arm64" ]] && CONSOLE="console=ttyAMA0,115200"

cat > "${ARTIFACTS}/ipxe-${1}" << EOF
#!ipxe
kernel https://${BUCKET_NAME}.s3.amazonaws.com/vmlinuz-${1} talos.platform=equinixMetal console=tty0 ${CONSOLE} init_on_alloc=1 slab_nomerge pti=on consoleblank=0 nvme_core.io_timeout=4294967295 printk.devkmsg=on ima_template=ima-ng ima_appraise=fix ima_hash=sha512
initrd https://${BUCKET_NAME}.s3.amazonaws.com/initramfs-${1}.xz
boot
EOF
}

function upload_artifact() {
aws s3 cp --acl public-read "${ARTIFACTS}/${1}" "s3://${BUCKET_NAME}/${1}"
}

shamove() {
SHA=$(sha256sum "${1}" | cut -d " " -f1)

mv "${1}" "${ARTIFACTS}/v2/${INSTALLER_IMAGE_NAME}/${2}/sha256:${SHA}"
}

# adapted from https://github.com/jpetazzo/registrish/
function generate_oci() {
crane pull --format=oci "${INSTALLER_IMAGE}" "${TEMP_DIR}/${INSTALLER_IMAGE_NAME}"

mkdir -p "${ARTIFACTS}/v2/${INSTALLER_IMAGE_NAME}/manifests" "${ARTIFACTS}/v2/${INSTALLER_IMAGE_NAME}/blobs"

find "${TEMP_DIR}/${INSTALLER_IMAGE_NAME}/blobs" -type f | while read -r FILE; do
# gzip files are blobs
if gzip -t "${FILE}"; then
shamove "${FILE}" blobs
else
# json files with architecture are blobs
if [[ $(jq 'select(.architecture != null)' "${FILE}") != "" ]]; then
shamove "${FILE}" blobs

continue
fi

# copying over the index file as tag
[[ $(jq '.mediaType=="application/vnd.oci.image.index.v1+json"' "${FILE}") == "true" ]] && cp "${FILE}" "${ARTIFACTS}/v2/${INSTALLER_IMAGE_NAME}/manifests/${INSTALLER_IMAGE_TAG}"

# anything else is other manifests referenced by the index
shamove "${FILE}" manifests
fi
done
}

# adapted from https://github.com/jpetazzo/registrish/
function upload_oci() {
# remove any existing container image data
aws s3 rm "s3://${BUCKET_NAME}/v2/" --recursive

aws s3 sync "${ARTIFACTS}/v2/" \
"s3://${BUCKET_NAME}/v2/" \
--acl public-read \
--exclude '*/manifests/*'

find "${ARTIFACTS}/v2/" -path '*/manifests/*' -print0 | while IFS= read -r -d '' MANIFEST; do
CONTENT_TYPE=$(jq -r .mediaType < "${MANIFEST}")

if [ "$CONTENT_TYPE" = "null" ]; then
CONTENT_TYPE="application/vnd.docker.distribution.manifest.v1+prettyjws"
fi

aws s3 cp "${MANIFEST}" \
"s3://${BUCKET_NAME}/${MANIFEST/${ARTIFACTS}\//}" \
--acl public-read \
--content-type "${CONTENT_TYPE}" \
--metadata-directive REPLACE
done
}

# generate ipxe script for both amd64 and arm64
generate_ipxe_script "amd64"
generate_ipxe_script "arm64"

upload_artifact "ipxe-amd64"
upload_artifact "ipxe-arm64"

upload_artifact vmlinuz-amd64
upload_artifact initramfs-amd64.xz
upload_artifact vmlinuz-arm64
upload_artifact initramfs-arm64.xz

generate_oci
upload_oci

mkdir -p "${ARTIFACTS}/e2e-equinix-metal-generated"

NAME_PREFIX="talos-e2e-${SHA}-equinix-metal"

jq --null-input \
--arg CLUSTER_NAME "${NAME_PREFIX}" \
--arg EM_API_TOKEN "${EM_API_TOKEN}" \
--arg EM_PROJECT_ID "${EM_PROJECT_ID}" \
--arg TALOS_VERSION_CONTRACT "${TALOS_VERSION}" \
--arg KUBERNETES_VERSION "${KUBERNETES_VERSION}" \
--arg EM_REGION "${EQUINIX_METRO}" \
--arg INSTALL_IMAGE "${BUCKET_NAME}.s3.amazonaws.com/${INSTALLER_IMAGE_NAME}:${INSTALLER_IMAGE_TAG}" \
--arg IPXE_SCRIPT_URL_AMD64 "https://${BUCKET_NAME}.s3.amazonaws.com/ipxe-amd64" \
--arg IPXE_SCRIPT_URL_ARM64 "https://${BUCKET_NAME}.s3.amazonaws.com/ipxe-arm64" \
'{
cluster_name: $CLUSTER_NAME,
em_api_token: $EM_API_TOKEN,
em_project_id: $EM_PROJECT_ID,
talos_version_contract: $TALOS_VERSION_CONTRACT,
kubernetes_version: $KUBERNETES_VERSION,
em_region: $EM_REGION,
ipxe_script_url_amd64: $IPXE_SCRIPT_URL_AMD64,
ipxe_script_url_arm64: $IPXE_SCRIPT_URL_ARM64,
install_image: $INSTALL_IMAGE
}' \
| jq -f hack/test/tfvars/equinix-metal.jq > "${ARTIFACTS}/e2e-equinix-metal-generated/vars.json"

cp hack/test/tfvars/*.yaml "${ARTIFACTS}/e2e-equinix-metal-generated"
15 changes: 15 additions & 0 deletions hack/test/e2e-equinix-metal.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env bash

set -eou pipefail

source ./hack/test/e2e.sh

cp "${ARTIFACTS}/e2e-equinix-metal-talosconfig" "${TALOSCONFIG}"
cp "${ARTIFACTS}/e2e-equinix-metal-kubeconfig" "${KUBECONFIG}"

# set the talosconfig to use the first controlplane ip
CONTROLPLANE0_NODE=$(${TALOSCTL} config info -o json | jq -r '.endpoints[0]')
${TALOSCTL} config node "${CONTROLPLANE0_NODE}"

run_talos_integration_test
run_kubernetes_integration_test
3 changes: 1 addition & 2 deletions hack/test/tfvars/aws.jq
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,5 @@
"Cluster Name": .cluster_name,
"Project": "talos-e2e-ci",
"Environment": "ci"
},

}
}
2 changes: 1 addition & 1 deletion hack/test/tfvars/azure.jq
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@
"Cluster Name": .cluster_name,
"Project": "talos-e2e-ci",
"Environment": "ci"
},
}
}
3 changes: 3 additions & 0 deletions hack/test/tfvars/equinix-arm64.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
machine:
install:
disk: /dev/nvme0n1
38 changes: 38 additions & 0 deletions hack/test/tfvars/equinix-metal.jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"cluster_name": .cluster_name,
"em_api_token": .em_api_token,
"talos_version_contract": .talos_version_contract,
"kubernetes_version": .kubernetes_version,
"em_region": .em_region,
"em_project_id": .em_project_id,
"control_plane": {
"num_instances": 1,
"plan": "c3.small.x86",
"ipxe_script_url": .ipxe_script_url_amd64,
"install_image": .install_image
},
"worker_groups": [
{
"name": "amd64",
"num_instances": 1,
"plan": "c3.small.x86",
"ipxe_script_url": .ipxe_script_url_amd64,
"install_image": .install_image
},
{
"name": "arm64",
"plan": "c3.large.arm64",
"num_instances": 1,
"ipxe_script_url": .ipxe_script_url_arm64,
"install_image": .install_image,
"config_patch_files": [
"equinix-arm64.yaml"
]
}
],
"extra_tags": [
"Cluster Name=" + .cluster_name,
"Project=talos-e2e-ci",
"Environment=ci"
]
}
4 changes: 2 additions & 2 deletions internal/integration/api/reboot.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (suite *RebootSuite) TestRebootMultiple() {
suite.Client.Reboot(nodeCtx),
))

suite.AssertBootIDChanged(nodeCtx, bootID, node, time.Minute*5)
suite.AssertBootIDChanged(nodeCtx, bootID, node, time.Minute*7)

bootID = suite.ReadBootIDWithRetry(nodeCtx, time.Minute*5)

Expand All @@ -111,7 +111,7 @@ func (suite *RebootSuite) TestRebootMultiple() {
return nil
}))

suite.AssertBootIDChanged(nodeCtx, bootID, node, time.Minute*5)
suite.AssertBootIDChanged(nodeCtx, bootID, node, time.Minute*7)
suite.WaitForBootDone(suite.ctx)
}

Expand Down
Loading

0 comments on commit f515741

Please sign in to comment.