diff --git a/examples/tfvars/single-node.tfvars b/examples/tfvars/single-node.tfvars
index 29319668..ecc88ce0 100644
--- a/examples/tfvars/single-node.tfvars
+++ b/examples/tfvars/single-node.tfvars
@@ -23,9 +23,8 @@ single_node = {
instance_type = "m6i.2xlarge"
name = "dev-v2"
ami = {
- name_prefix = "dev-v2_"
- owner = "977170443939"
-
+ name_prefix = "amazon-eks-node-al2023-x86_64-standard-"
+ owner = "602401143452"
}
labels = {
"dominodatalab.com/node-pool" = "default",
@@ -41,5 +40,5 @@ storage = {
}
eks = {
- k8s_version = "1.27"
+ k8s_version = "1.30"
}
diff --git a/modules/eks/submodules/k8s/templates/k8s-functions.sh.tftpl b/modules/eks/submodules/k8s/templates/k8s-functions.sh.tftpl
index 36c035d5..0b187499 100644
--- a/modules/eks/submodules/k8s/templates/k8s-functions.sh.tftpl
+++ b/modules/eks/submodules/k8s/templates/k8s-functions.sh.tftpl
@@ -2,6 +2,7 @@
RED="\e[31m"
GREEN="\e[32m"
+YELLOW="\e[33m"
EC="\e[0m"
KUBECONFIG="${kubeconfig_path}"
@@ -111,7 +112,9 @@ install_calico() {
local sleep_duration=10
for i in $(seq 1 $max_retries); do
- helm_cmd upgrade "calico-tigera-operator" \
+ echo "Attempt $i of $max_retries..."
+
+ if helm_cmd upgrade "calico-tigera-operator" \
tigera-operator \
--repo "https://projectcalico.docs.tigera.io/charts" \
--version "${calico_version}" \
@@ -125,20 +128,23 @@ install_calico() {
--wait \
--timeout 10m \
--create-namespace \
- --install
+ --install; then
- if [ $? -eq 0 ]; then
+ printf "$GREEN Calico installation succeeded. $EC \n"
break
- fi
- if [ $i -lt $max_retries ]; then
- echo "Attempt $i failed. Retrying in $${sleep_duration}s..."
- sleep $sleep_duration
else
- printf "$RED Maximum attempts reached. Exiting. $EC \n"
- exit 1
- fi
+ printf "$YELLOW Helm install attempt $i failed. $EC \n"
+ if [ $i -lt $max_retries ]; then
+ printf "Retrying in $sleep_duration s..."
+ sleep $sleep_duration
+ else
+ printf "$RED Maximum attempts reached. Exiting. $EC \n"
+ exit 1
+ fi
+
+ fi
done
}
diff --git a/modules/external-deployments/operator_role_policies.tf b/modules/external-deployments/operator_role_policies.tf
index afaa06fa..90ccbf7f 100644
--- a/modules/external-deployments/operator_role_policies.tf
+++ b/modules/external-deployments/operator_role_policies.tf
@@ -126,6 +126,9 @@ data "aws_iam_policy_document" "in_account_policies" {
"sagemaker:DescribeModel",
"sagemaker:InvokeEndpoint",
"sagemaker:InvokeEndpointWithResponseStream",
+ "sagemaker:ListEndpointConfigs",
+ "sagemaker:ListEndpoints",
+ "sagemaker:ListModels",
"sagemaker:UpdateEndpoint",
"sagemaker:UpdateEndpointWeightsAndCapacities"
]
diff --git a/modules/flyte/README.md b/modules/flyte/README.md
index f411c348..ed61a2c5 100644
--- a/modules/flyte/README.md
+++ b/modules/flyte/README.md
@@ -53,7 +53,7 @@ No modules.
| [kms\_info](#input\_kms\_info) | key\_id = KMS key id.
key\_arn = KMS key arn.
enabled = KMS key is enabled |
object({| n/a | yes | | [platform\_namespace](#input\_platform\_namespace) | Name of Domino platform namespace for this deploy | `string` | n/a | yes | | [region](#input\_region) | AWS region for the deployment | `string` | n/a | yes | -| [serviceaccount\_names](#input\_serviceaccount\_names) | Service account names for Flyte |
key_id = string
key_arn = string
enabled = bool
})
object({| `{}` | no | +| [serviceaccount\_names](#input\_serviceaccount\_names) | Service account names for Flyte |
datacatalog = optional(string, "datacatalog")
flyteadmin = optional(string, "flyteadmin")
flytepropeller = optional(string, "flytepropeller")
})
object({| `{}` | no | ## Outputs diff --git a/modules/flyte/iam.tf b/modules/flyte/iam.tf index 8f11f26c..ba77c66b 100644 --- a/modules/flyte/iam.tf +++ b/modules/flyte/iam.tf @@ -15,6 +15,7 @@ resource "aws_iam_role" "flyte_controlplane" { "${trimprefix(local.oidc_provider_url, "https://")}:sub" : [ "system:serviceaccount:${var.platform_namespace}:${var.serviceaccount_names.datacatalog}", "system:serviceaccount:${var.platform_namespace}:${var.serviceaccount_names.flytepropeller}", + "system:serviceaccount:${var.platform_namespace}:${var.serviceaccount_names.importer}", ] } } diff --git a/modules/flyte/variables.tf b/modules/flyte/variables.tf index 9536a831..645156c7 100644 --- a/modules/flyte/variables.tf +++ b/modules/flyte/variables.tf @@ -54,6 +54,7 @@ variable "serviceaccount_names" { datacatalog = optional(string, "datacatalog") flyteadmin = optional(string, "flyteadmin") flytepropeller = optional(string, "flytepropeller") + importer = optional(string, "domino-data-importer") }) default = {} diff --git a/modules/infra/submodules/storage/netapp.tf b/modules/infra/submodules/storage/netapp.tf index 1aff2860..adabad66 100644 --- a/modules/infra/submodules/storage/netapp.tf +++ b/modules/infra/submodules/storage/netapp.tf @@ -162,6 +162,8 @@ resource "aws_fsx_ontap_volume" "eks" { ontap_volume_type = "RW" copy_tags_to_backups = true volume_style = "FLEXVOL" + tags = local.backup_tagging + lifecycle { ignore_changes = [name, size_in_megabytes] # This volume is meant to be managed by the trident operator after initial creation. } diff --git a/modules/single-node/single-node.tf b/modules/single-node/single-node.tf index 4ac1cc9b..13bae7df 100644 --- a/modules/single-node/single-node.tf +++ b/modules/single-node/single-node.tf @@ -11,7 +11,7 @@ locals { "cluster" = var.eks_info.cluster.specs.name }, data.aws_default_tags.this.tags, local.node_labels) - kubelet_extra_args = "--kubelet-extra-args '--node-labels=${join(",", [for k, v in local.node_labels : format("%s=%s", k, v)])}'" + kubelet_extra_args = "--node-labels=${join(",", [for k, v in local.node_labels : format("%s=%s", k, v)])}" bootstrap_extra_args = join(" ", [local.kubelet_extra_args, var.single_node.bootstrap_extra_args]) } diff --git a/modules/single-node/templates/linux_user_data.tpl b/modules/single-node/templates/linux_user_data.tpl index 065a60d5..3b2a32ab 100644 --- a/modules/single-node/templates/linux_user_data.tpl +++ b/modules/single-node/templates/linux_user_data.tpl @@ -1,10 +1,11 @@ -#!/bin/bash -set -e -${pre_bootstrap_user_data ~} -%{ if length(cluster_service_ipv4_cidr) > 0 ~} -export SERVICE_IPV4_CIDR=${cluster_service_ipv4_cidr} -%{ endif ~} -B64_CLUSTER_CA=${cluster_auth_base64} -API_SERVER_URL=${cluster_endpoint} -/etc/eks/bootstrap.sh ${cluster_name} ${bootstrap_extra_args} --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL -${post_bootstrap_user_data ~} +--- +apiVersion: node.eks.aws/v1alpha1 +kind: NodeConfig +spec: + cluster: + name: ${cluster_name} + apiServerEndpoint: ${cluster_endpoint} + certificateAuthority: ${cluster_auth_base64} + cidr: ${cluster_service_ipv4_cidr} + kubelet: + flags: ["${bootstrap_extra_args}"] \ No newline at end of file diff --git a/tests/deploy/single-node/single-node.tfvars b/tests/deploy/single-node/single-node.tfvars index 395781ce..8a93d04c 100644 --- a/tests/deploy/single-node/single-node.tfvars +++ b/tests/deploy/single-node/single-node.tfvars @@ -2,12 +2,15 @@ single_node = { instance_type = "m6i.2xlarge" name = "dev-v2" ami = { - name_prefix = "dev-v2_" - owner = "977170443939" - + name_prefix = "amazon-eks-node-al2023-x86_64-standard-" + owner = "602401143452" } labels = { "dominodatalab.com/node-pool" = "default", "dominodatalab.com/domino-node" = "true" }, } + +eks = { + k8s_version = "1.30" +}
datacatalog = optional(string, "datacatalog")
flyteadmin = optional(string, "flyteadmin")
flytepropeller = optional(string, "flytepropeller")
importer = optional(string, "domino-data-importer")
})