diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c2b4f44120..12678ce3e54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -102,6 +102,7 @@ * [ENHANCEMENT] Added `$._config.usageStatsConfig` to track the installation mode via the anonymous usage statistics. #3294 * [ENHANCEMENT] The query-tee node port (`$._config.query_tee_node_port`) is now optional. #3272 * [ENHANCEMENT] Add support for autoscaling distributors. #3378 +* [ENHANCEMENT] Make auto-scaling logic ensure integer KEDA thresholds. #3512 * [BUGFIX] Fixed query-scheduler ring configuration for dedicated ruler's queries and query-frontends. #3237 #3239 ### Mimirtool diff --git a/operations/mimir-tests/test-autoscaling-generated.yaml b/operations/mimir-tests/test-autoscaling-generated.yaml index 32c3fa63d57..03c351c7ff4 100644 --- a/operations/mimir-tests/test-autoscaling-generated.yaml +++ b/operations/mimir-tests/test-autoscaling-generated.yaml @@ -510,10 +510,10 @@ spec: timeoutSeconds: 1 resources: limits: - memory: 4Gi + memory: 6Gi requests: - cpu: "2" - memory: 2Gi + cpu: 2 + memory: 3.2Gi volumeMounts: - mountPath: /etc/mimir name: overrides @@ -1760,7 +1760,7 @@ spec: metricName: cortex_distributor_memory_hpa_default query: max_over_time(sum(container_memory_working_set_bytes{container="distributor",namespace="default"})[15m:]) serverAddress: http://prometheus.default:9090/prometheus - threshold: "2147483648" + threshold: "3435973836" type: prometheus --- apiVersion: keda.sh/v1alpha1 diff --git a/operations/mimir-tests/test-autoscaling.jsonnet b/operations/mimir-tests/test-autoscaling.jsonnet index f89e40bad4b..fd5f3283a67 100644 --- a/operations/mimir-tests/test-autoscaling.jsonnet +++ b/operations/mimir-tests/test-autoscaling.jsonnet @@ -27,4 +27,11 @@ mimir { autoscaling_distributor_min_replicas: 3, autoscaling_distributor_max_replicas: 30, }, + + local k = import 'ksonnet-util/kausal.libsonnet', + distributor_container+:: + // Test a non-integer memory request, to verify that this gets converted into an integer for + // the KEDA threshold + k.util.resourcesRequests(2, '3.2Gi') + + k.util.resourcesLimits(null, '6Gi'), } diff --git a/operations/mimir/autoscaling.libsonnet b/operations/mimir/autoscaling.libsonnet index 7620ec35316..c69efafc312 100644 --- a/operations/mimir/autoscaling.libsonnet +++ b/operations/mimir/autoscaling.libsonnet @@ -80,7 +80,9 @@ // // Read more: // https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details - threshold: trigger.threshold, + // + // We also have to ensure that the threshold is an integer (represented as a string) + threshold: std.toString(std.parseInt(trigger.threshold)), }, } for trigger in config.triggers @@ -141,17 +143,23 @@ // Simple method to convert binary multiples. // Only works for limited set of SI prefixes (as below). - if std.endsWith(str, 'Ki') then ( - std.parseJson(std.rstripChars(str, 'Ki')) * std.pow(2, 10) - ) else if std.endsWith(str, 'Mi') then ( - std.parseJson(std.rstripChars(str, 'Mi')) * std.pow(2, 20) - ) else if std.endsWith(str, 'Gi') then ( - std.parseJson(std.rstripChars(str, 'Gi')) * std.pow(2, 30) - ) else if std.endsWith(str, 'Ti') then ( - std.parseJson(std.rstripChars(str, 'Ti')) * std.pow(2, 40) - ) else ( - str - ) + // Utility converting the input to a (potentially decimal) number of bytes + local siToBytesDecimal(str) = ( + if std.endsWith(str, 'Ki') then ( + std.parseJson(std.rstripChars(str, 'Ki')) * std.pow(2, 10) + ) else if std.endsWith(str, 'Mi') then ( + std.parseJson(std.rstripChars(str, 'Mi')) * std.pow(2, 20) + ) else if std.endsWith(str, 'Gi') then ( + std.parseJson(std.rstripChars(str, 'Gi')) * std.pow(2, 30) + ) else if std.endsWith(str, 'Ti') then ( + std.parseJson(std.rstripChars(str, 'Ti')) * std.pow(2, 40) + ) else ( + std.parseJson(str) + ) + ); + + // Round down to nearest integer + std.floor(siToBytesDecimal(str)) ), local cpuToMilliCPUInt(str) = ( @@ -256,8 +264,8 @@ // To scale out relatively quickly, but scale in slower, we look at the max memory utilization across all distributors over 15m. query: 'max_over_time(sum(container_memory_working_set_bytes{container="%s",namespace="%s"})[15m:])' % [name, $._config.namespace], - // threshold is expected to be a string, so use '' to cast any ints returned by siToBytes. - threshold: siToBytes(distributor_memory_requests) + '', + // threshold is expected to be a string + threshold: std.toString(siToBytes(distributor_memory_requests)), }, ], }),