Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow for configuring auto_scaler_profile #278

Merged
merged 14 commits into from
Jan 17, 2023
214 changes: 116 additions & 98 deletions README.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions locals.tf
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
locals {
# Abstract if auto_scaler_profile_scale_down_delay_after_delete is not set or null we should use the scan_interval.
auto_scaler_profile_scale_down_delay_after_delete = var.auto_scaler_profile_scale_down_delay_after_delete == null ? var.auto_scaler_profile_scan_interval : var.auto_scaler_profile_scale_down_delay_after_delete
# automatic upgrades are either:
# - null
# - patch, but then the kubernetes_version must not specify a patch number and orchestrator_version must be null
Expand Down
23 changes: 23 additions & 0 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,29 @@ resource "azurerm_kubernetes_cluster" "main" {
subnet_name = var.aci_connector_linux_subnet_name
}
}
dynamic "auto_scaler_profile" {
for_each = var.auto_scaler_profile_enabled ? ["default_auto_scaler_profile"] : []

content {
balance_similar_node_groups = var.auto_scaler_profile_balance_similar_node_groups
empty_bulk_delete_max = var.auto_scaler_profile_empty_bulk_delete_max
expander = var.auto_scaler_profile_expander
max_graceful_termination_sec = var.auto_scaler_profile_max_graceful_termination_sec
max_node_provisioning_time = var.auto_scaler_profile_max_node_provisioning_time
max_unready_nodes = var.auto_scaler_profile_max_unready_nodes
max_unready_percentage = var.auto_scaler_profile_max_unready_percentage
new_pod_scale_up_delay = var.auto_scaler_profile_new_pod_scale_up_delay
scale_down_delay_after_add = var.auto_scaler_profile_scale_down_delay_after_add
scale_down_delay_after_delete = local.auto_scaler_profile_scale_down_delay_after_delete
scale_down_delay_after_failure = var.auto_scaler_profile_scale_down_delay_after_failure
scale_down_unneeded = var.auto_scaler_profile_scale_down_unneeded
scale_down_unready = var.auto_scaler_profile_scale_down_unready
scale_down_utilization_threshold = var.auto_scaler_profile_scale_down_utilization_threshold
scan_interval = var.auto_scaler_profile_scan_interval
skip_nodes_with_local_storage = var.auto_scaler_profile_skip_nodes_with_local_storage
skip_nodes_with_system_pods = var.auto_scaler_profile_skip_nodes_with_system_pods
}
}
dynamic "azure_active_directory_role_based_access_control" {
for_each = var.role_based_access_control_enabled && var.rbac_aad && var.rbac_aad_managed ? ["rbac"] : []

Expand Down
26 changes: 26 additions & 0 deletions test/unit/unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,32 @@ func TestInvalidVarsForAutomaticUpgrades(t *testing.T) {
}
}

func TestScaleDownDelayAfterDeleteNotSetShouldUseScanInterval(t *testing.T) {
test_helper.RunE2ETest(t, "../../", "unit-test-fixture", terraform.Options{
Upgrade: false,
Vars: dummyRequiredVariables(),
}, func(t *testing.T, output test_helper.TerraformOutput) {
scaleDownDelayAfterDelete, ok := output["auto_scaler_profile_scale_down_delay_after_delete"].(string)
assert.True(t, ok)
scanInterval, ok := output["auto_scaler_profile_scan_interval"].(string)
assert.True(t, ok)
assert.Equal(t, scanInterval, scaleDownDelayAfterDelete)
})
}

func TestScaleDownDelayAfterDeleteSetShouldUseVar(t *testing.T) {
vars := dummyRequiredVariables()
vars["auto_scaler_profile_scale_down_delay_after_delete"] = "15s"
test_helper.RunE2ETest(t, "../../", "unit-test-fixture", terraform.Options{
Upgrade: false,
Vars: vars,
}, func(t *testing.T, output test_helper.TerraformOutput) {
scaleDownDelayAfterDelete, ok := output["auto_scaler_profile_scale_down_delay_after_delete"].(string)
assert.True(t, ok)
assert.Equal(t, "15s", scaleDownDelayAfterDelete)
})
}

func dummyRequiredVariables() map[string]interface{} {
return map[string]interface{}{
"prefix": "foo",
Expand Down
8 changes: 8 additions & 0 deletions unit-test-fixture/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ output "log_analytics_workspace" {
output "automatic_channel_upgrade_check" {
value = local.automatic_channel_upgrade_check
}

output "auto_scaler_profile_scale_down_delay_after_delete" {
value = local.auto_scaler_profile_scale_down_delay_after_delete
}

output "auto_scaler_profile_scan_interval" {
value = var.auto_scaler_profile_scan_interval
}
113 changes: 113 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,119 @@ variable "api_server_authorized_ip_ranges" {
default = null
}

variable "auto_scaler_profile_balance_similar_node_groups" {
description = "Detect similar node groups and balance the number of nodes between them. Defaults to `false`."
type = bool
default = false
}

variable "auto_scaler_profile_empty_bulk_delete_max" {
description = "Maximum number of empty nodes that can be deleted at the same time. Defaults to `10`."
type = number
default = 10
}

variable "auto_scaler_profile_enabled" {
type = bool
description = "Enable configuring the auto scaler profile"
default = false
nullable = false
}

variable "auto_scaler_profile_expander" {
description = "Expander to use. Possible values are `least-waste`, `priority`, `most-pods` and `random`. Defaults to `random`."
type = string
default = "random"
validation {
condition = contains(["least-waste", "most-pods", "priority", "random"], var.auto_scaler_profile_expander)
error_message = "Must be either `least-waste`, `most-pods`, `priority` or `random`."
}
}

variable "auto_scaler_profile_max_graceful_termination_sec" {
description = "Maximum number of seconds the cluster autoscaler waits for pod termination when trying to scale down a node. Defaults to `600`."
type = string
default = "600"
}

variable "auto_scaler_profile_max_node_provisioning_time" {
description = "Maximum time the autoscaler waits for a node to be provisioned. Defaults to `15m`."
type = string
default = "15m"
}

variable "auto_scaler_profile_max_unready_nodes" {
description = "Maximum Number of allowed unready nodes. Defaults to `3`."
type = number
default = 3
}

variable "auto_scaler_profile_max_unready_percentage" {
description = "Maximum percentage of unready nodes the cluster autoscaler will stop if the percentage is exceeded. Defaults to `45`."
type = number
default = 45
}

variable "auto_scaler_profile_new_pod_scale_up_delay" {
description = "For scenarios like burst/batch scale where you don't want CA to act before the kubernetes scheduler could schedule all the pods, you can tell CA to ignore unscheduled pods before they're a certain age. Defaults to `10s`."
type = string
default = "10s"
}

variable "auto_scaler_profile_scale_down_delay_after_add" {
description = "How long after the scale up of AKS nodes the scale down evaluation resumes. Defaults to `10m`."
type = string
default = "10m"
}

variable "auto_scaler_profile_scale_down_delay_after_delete" {
description = "How long after node deletion that scale down evaluation resumes. Defaults to the value used for `scan_interval`."
type = string
default = null
davidspek marked this conversation as resolved.
Show resolved Hide resolved
}

variable "auto_scaler_profile_scale_down_delay_after_failure" {
description = "How long after scale down failure that scale down evaluation resumes. Defaults to `3m`."
type = string
default = "3m"
}

variable "auto_scaler_profile_scale_down_unneeded" {
description = "How long a node should be unneeded before it is eligible for scale down. Defaults to `10m`."
type = string
default = "10m"
}

variable "auto_scaler_profile_scale_down_unready" {
description = "How long an unready node should be unneeded before it is eligible for scale down. Defaults to `20m`."
type = string
default = "20m"
}

variable "auto_scaler_profile_scale_down_utilization_threshold" {
description = "Node utilization level, defined as sum of requested resources divided by capacity, below which a node can be considered for scale down. Defaults to `0.5`."
type = string
default = "0.5"
}

variable "auto_scaler_profile_scan_interval" {
description = "How often the AKS Cluster should be re-evaluated for scale up/down. Defaults to `10s`."
type = string
default = "10s"
}

variable "auto_scaler_profile_skip_nodes_with_local_storage" {
description = "If `true` cluster autoscaler will never delete nodes with pods with local storage, for example, EmptyDir or HostPath. Defaults to `true`."
type = bool
default = true
}

variable "auto_scaler_profile_skip_nodes_with_system_pods" {
description = "If `true` cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods). Defaults to `true`."
type = bool
default = true
}

variable "automatic_channel_upgrade" {
type = string
default = null
Expand Down