Skip to content

Commit

Permalink
Merge branch 'main' into dom-62207
Browse files Browse the repository at this point in the history
  • Loading branch information
msingermann-domino authored Nov 15, 2024
2 parents 72fbf86 + 07917e8 commit 6ce55fd
Show file tree
Hide file tree
Showing 10 changed files with 45 additions and 29 deletions.
7 changes: 3 additions & 4 deletions examples/tfvars/single-node.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ single_node = {
instance_type = "m6i.2xlarge"
name = "dev-v2"
ami = {
name_prefix = "dev-v2_"
owner = "977170443939"

name_prefix = "amazon-eks-node-al2023-x86_64-standard-"
owner = "602401143452"
}
labels = {
"dominodatalab.com/node-pool" = "default",
Expand All @@ -41,5 +40,5 @@ storage = {
}

eks = {
k8s_version = "1.27"
k8s_version = "1.30"
}
26 changes: 16 additions & 10 deletions modules/eks/submodules/k8s/templates/k8s-functions.sh.tftpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

RED="\e[31m"
GREEN="\e[32m"
YELLOW="\e[33m"
EC="\e[0m"

KUBECONFIG="${kubeconfig_path}"
Expand Down Expand Up @@ -111,7 +112,9 @@ install_calico() {
local sleep_duration=10

for i in $(seq 1 $max_retries); do
helm_cmd upgrade "calico-tigera-operator" \
echo "Attempt $i of $max_retries..."

if helm_cmd upgrade "calico-tigera-operator" \
tigera-operator \
--repo "https://projectcalico.docs.tigera.io/charts" \
--version "${calico_version}" \
Expand All @@ -125,20 +128,23 @@ install_calico() {
--wait \
--timeout 10m \
--create-namespace \
--install
--install; then

if [ $? -eq 0 ]; then
printf "$GREEN Calico installation succeeded. $EC \n"
break
fi

if [ $i -lt $max_retries ]; then
echo "Attempt $i failed. Retrying in $${sleep_duration}s..."
sleep $sleep_duration
else
printf "$RED Maximum attempts reached. Exiting. $EC \n"
exit 1
fi
printf "$YELLOW Helm install attempt $i failed. $EC \n"

if [ $i -lt $max_retries ]; then
printf "Retrying in $sleep_duration s..."
sleep $sleep_duration
else
printf "$RED Maximum attempts reached. Exiting. $EC \n"
exit 1
fi

fi
done
}

Expand Down
3 changes: 3 additions & 0 deletions modules/external-deployments/operator_role_policies.tf
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ data "aws_iam_policy_document" "in_account_policies" {
"sagemaker:DescribeModel",
"sagemaker:InvokeEndpoint",
"sagemaker:InvokeEndpointWithResponseStream",
"sagemaker:ListEndpointConfigs",
"sagemaker:ListEndpoints",
"sagemaker:ListModels",
"sagemaker:UpdateEndpoint",
"sagemaker:UpdateEndpointWeightsAndCapacities"
]
Expand Down
2 changes: 1 addition & 1 deletion modules/flyte/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ No modules.
| <a name="input_kms_info"></a> [kms\_info](#input\_kms\_info) | key\_id = KMS key id.<br> key\_arn = KMS key arn.<br> enabled = KMS key is enabled | <pre>object({<br> key_id = string<br> key_arn = string<br> enabled = bool<br> })</pre> | n/a | yes |
| <a name="input_platform_namespace"></a> [platform\_namespace](#input\_platform\_namespace) | Name of Domino platform namespace for this deploy | `string` | n/a | yes |
| <a name="input_region"></a> [region](#input\_region) | AWS region for the deployment | `string` | n/a | yes |
| <a name="input_serviceaccount_names"></a> [serviceaccount\_names](#input\_serviceaccount\_names) | Service account names for Flyte | <pre>object({<br> datacatalog = optional(string, "datacatalog")<br> flyteadmin = optional(string, "flyteadmin")<br> flytepropeller = optional(string, "flytepropeller")<br> })</pre> | `{}` | no |
| <a name="input_serviceaccount_names"></a> [serviceaccount\_names](#input\_serviceaccount\_names) | Service account names for Flyte | <pre>object({<br> datacatalog = optional(string, "datacatalog")<br> flyteadmin = optional(string, "flyteadmin")<br> flytepropeller = optional(string, "flytepropeller")<br> importer = optional(string, "domino-data-importer")<br> })</pre> | `{}` | no |

## Outputs

Expand Down
1 change: 1 addition & 0 deletions modules/flyte/iam.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ resource "aws_iam_role" "flyte_controlplane" {
"${trimprefix(local.oidc_provider_url, "https://")}:sub" : [
"system:serviceaccount:${var.platform_namespace}:${var.serviceaccount_names.datacatalog}",
"system:serviceaccount:${var.platform_namespace}:${var.serviceaccount_names.flytepropeller}",
"system:serviceaccount:${var.platform_namespace}:${var.serviceaccount_names.importer}",
]
}
}
Expand Down
1 change: 1 addition & 0 deletions modules/flyte/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ variable "serviceaccount_names" {
datacatalog = optional(string, "datacatalog")
flyteadmin = optional(string, "flyteadmin")
flytepropeller = optional(string, "flytepropeller")
importer = optional(string, "domino-data-importer")
})

default = {}
Expand Down
2 changes: 2 additions & 0 deletions modules/infra/submodules/storage/netapp.tf
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ resource "aws_fsx_ontap_volume" "eks" {
ontap_volume_type = "RW"
copy_tags_to_backups = true
volume_style = "FLEXVOL"
tags = local.backup_tagging

lifecycle {
ignore_changes = [name, size_in_megabytes] # This volume is meant to be managed by the trident operator after initial creation.
}
Expand Down
2 changes: 1 addition & 1 deletion modules/single-node/single-node.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ locals {
"cluster" = var.eks_info.cluster.specs.name
}, data.aws_default_tags.this.tags, local.node_labels)

kubelet_extra_args = "--kubelet-extra-args '--node-labels=${join(",", [for k, v in local.node_labels : format("%s=%s", k, v)])}'"
kubelet_extra_args = "--node-labels=${join(",", [for k, v in local.node_labels : format("%s=%s", k, v)])}"

bootstrap_extra_args = join(" ", [local.kubelet_extra_args, var.single_node.bootstrap_extra_args])
}
Expand Down
21 changes: 11 additions & 10 deletions modules/single-node/templates/linux_user_data.tpl
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#!/bin/bash
set -e
${pre_bootstrap_user_data ~}
%{ if length(cluster_service_ipv4_cidr) > 0 ~}
export SERVICE_IPV4_CIDR=${cluster_service_ipv4_cidr}
%{ endif ~}
B64_CLUSTER_CA=${cluster_auth_base64}
API_SERVER_URL=${cluster_endpoint}
/etc/eks/bootstrap.sh ${cluster_name} ${bootstrap_extra_args} --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL
${post_bootstrap_user_data ~}
---
apiVersion: node.eks.aws/v1alpha1
kind: NodeConfig
spec:
cluster:
name: ${cluster_name}
apiServerEndpoint: ${cluster_endpoint}
certificateAuthority: ${cluster_auth_base64}
cidr: ${cluster_service_ipv4_cidr}
kubelet:
flags: ["${bootstrap_extra_args}"]
9 changes: 6 additions & 3 deletions tests/deploy/single-node/single-node.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ single_node = {
instance_type = "m6i.2xlarge"
name = "dev-v2"
ami = {
name_prefix = "dev-v2_"
owner = "977170443939"

name_prefix = "amazon-eks-node-al2023-x86_64-standard-"
owner = "602401143452"
}
labels = {
"dominodatalab.com/node-pool" = "default",
"dominodatalab.com/domino-node" = "true"
},
}

eks = {
k8s_version = "1.30"
}

0 comments on commit 6ce55fd

Please sign in to comment.