From 64da264a3292a16a99e739fc1dc12b8d373d5135 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 16:10:20 +0100 Subject: [PATCH 1/9] terraform, gcp: add variable for cluster and node pools k8s version --- terraform/gcp/cluster.tf | 41 ++++++++++++++++++++++++++++++++++---- terraform/gcp/variables.tf | 31 ++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf index 2e1e4d8db..398a01d26 100644 --- a/terraform/gcp/cluster.tf +++ b/terraform/gcp/cluster.tf @@ -1,9 +1,34 @@ +# This data resource and output provides information on the latest available k8s +# versions in GCP's regular release channel. This can be used when specifying +# versions to upgrade to via the k8s_versions variable. +# +# To get get the output of relevance, run: +# +# terraform plan -var-file=projects/$CLUSTER_NAME.tfvars +# terraform output regular_channel_latest_k8s_versions +# +# data ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_engine_versions +data "google_container_engine_versions" "k8s_version_prefixes" { + project = var.project_id + location = var.zone + + for_each = var.k8s_version_prefixes + version_prefix = each.value +} +output "regular_channel_latest_k8s_versions" { + value = { + for k, v in data.google_container_engine_versions.k8s_version_prefixes : k => v.release_channel_latest_version["REGULAR"] + } +} + +# resource ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_service_account resource "google_service_account" "cluster_sa" { account_id = "${var.prefix}-cluster-sa" display_name = "Service account used by nodes of cluster ${var.prefix}" project = var.project_id } +# resource ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_project_iam#google_project_iam_member resource "google_project_iam_member" "cluster_sa_roles" { # https://cloud.google.com/kubernetes-engine/docs/how-to/hardening-your-cluster # has information on why the cluster SA needs these rights @@ -20,14 +45,16 @@ resource "google_project_iam_member" "cluster_sa_roles" { member = "serviceAccount:${google_service_account.cluster_sa.email}" } +# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_container_cluster resource "google_container_cluster" "cluster" { # Setting cluster autoscaling profile is in google-beta provider = google-beta - name = "${var.prefix}-cluster" - location = var.regional_cluster ? var.region : var.zone - node_locations = var.regional_cluster ? [var.zone] : null - project = var.project_id + name = "${var.prefix}-cluster" + location = var.regional_cluster ? var.region : var.zone + node_locations = var.regional_cluster ? [var.zone] : null + project = var.project_id + min_master_version = var.k8s_versions.min_master_version initial_node_count = 1 remove_default_node_pool = true @@ -150,11 +177,13 @@ resource "google_container_cluster" "cluster" { resource_labels = {} } +# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool resource "google_container_node_pool" "core" { name = "core-pool" cluster = google_container_cluster.cluster.name project = google_container_cluster.cluster.project location = google_container_cluster.cluster.location + version = var.k8s_versions.core_nodes_version initial_node_count = 1 @@ -205,11 +234,13 @@ resource "google_container_node_pool" "core" { } } +# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool resource "google_container_node_pool" "notebook" { name = "nb-${each.key}" cluster = google_container_cluster.cluster.name project = google_container_cluster.cluster.project location = google_container_cluster.cluster.location + version = var.k8s_versions.notebook_nodes_version for_each = var.notebook_nodes @@ -299,11 +330,13 @@ resource "google_container_node_pool" "notebook" { } } +# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool resource "google_container_node_pool" "dask_worker" { name = "dask-${each.key}" cluster = google_container_cluster.cluster.name project = google_container_cluster.cluster.project location = google_container_cluster.cluster.location + version = var.k8s_versions.dask_nodes_version # Default to same config as notebook nodepools config for_each = var.dask_nodes diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 1ad3ba451..2b4456928 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -23,6 +23,37 @@ variable "project_id" { EOT } +variable "k8s_version_prefixes" { + type = set(string) + default = [ + "1.22.", + "1.23.", + "1.24.", + "1.25.", + "1.", + ] + description = <<-EOT + A list of k8s version prefixes that can be evaluated to their latest version by + the output defined in cluster.tf called regular_channel_latest_k8s_versions. + EOT +} + +variable "k8s_versions" { + type = object({ + min_master_version: optional(string, null), + core_nodes_version: optional(string, null), + notebook_nodes_version: optional(string, null), + dask_nodes_version: optional(string, null), + }) + default = {} + description = <<-EOT + Configuration of the k8s cluster's version and node pools' versions. To specify these + + - min_master_nodes is passthrough configuration of google_container_cluster's min_master_version, documented in https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_cluster#min_master_version + - [core|notebook|dask]_nodes_version is passthrough configuration of container_node_pool's version, documented in https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool#version + EOT +} + variable "notebook_nodes" { type = map(object({ min : number, max : number, machine_type : string, labels : map(string), gpu : object({ enabled : bool, type : string, count : number }) })) description = "Notebook node pools to create" From 79254fb65f693ed863842edc7ca0688123e70a5d Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 16:12:03 +0100 Subject: [PATCH 2/9] leap: update to k8s 1.25 and a shared highmem node setup --- terraform/gcp/projects/leap.tfvars | 86 +++++------------------------- 1 file changed, 14 insertions(+), 72 deletions(-) diff --git a/terraform/gcp/projects/leap.tfvars b/terraform/gcp/projects/leap.tfvars index f1e295358..f36547f3f 100644 --- a/terraform/gcp/projects/leap.tfvars +++ b/terraform/gcp/projects/leap.tfvars @@ -1,9 +1,15 @@ prefix = "leap" project_id = "leap-pangeo" -core_node_machine_type = "n1-highmem-4" +# core_node_machine_type is set to n2-highmem-4 instead of n2-highmem-2 because +# prometheus requires more memory than a n2-highmem-2 can provide. +core_node_machine_type = "n2-highmem-4" -# No need for this to be a private cluster, public ones are cheaper -enable_private_cluster = false +k8s_versions = { + min_master_version: "1.25.6-gke.1000", + core_nodes_version: "1.25.6-gke.1000", + notebook_nodes_version: "1.25.6-gke.1000", + dask_nodes_version: "1.25.6-gke.1000", +} # GPUs not available in us-central1-b zone = "us-central1-c" @@ -48,43 +54,12 @@ hub_cloud_permissions = { # Setup notebook node pools notebook_nodes = { - "small" : { - min : 0, - max : 100, - machine_type : "n1-standard-2", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } - }, "medium" : { - min : 0, + # A minimum of one is configured for LEAP to ensure quick startups at all + # time. Cost is not a greater concern than optimizing startup times. + min : 1, max : 100, - machine_type : "n1-standard-4", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } - }, - "large" : { - min : 0, - max : 100, - machine_type : "n1-standard-8", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } - }, - "huge" : { - min : 0, - max : 100, - machine_type : "n1-standard-16", + machine_type : "n2-highmem-16", labels : {}, gpu : { enabled : false, @@ -106,43 +81,10 @@ notebook_nodes = { } dask_nodes = { - "small" : { - min : 0, - max : 200, - machine_type : "n1-highmem-2", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } - }, "medium" : { min : 0, max : 200, - machine_type : "n1-highmem-4", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } - }, - "large" : { - min : 0, - max : 200, - machine_type : "n1-highmem-8", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } - }, - "huge" : { - min : 0, - max : 200, - machine_type : "n1-highmem-16", + machine_type : "n2-highmem-16", labels : {}, gpu : { enabled : false, From a93926a037c84c3613b1c70ce036213418941874 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 16:24:03 +0100 Subject: [PATCH 3/9] leap: stop pulling the tensorflow image (slows down other startups) This was an optimization for a workshop where I assume nodes were pre-started and made sense for that, but would slow down startup on new nodes if not this specific image was requested on a non-pre-warmed node after the workshop. Since the workshop has ended, we can safely remove this now. Related tickets: - Workshop: https://2i2c.freshdesk.com/a/tickets/349 - Slow startup discussed: https://2i2c.freshdesk.com/a/tickets/435 --- config/clusters/leap/common.values.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml index 8b1d3c76b..27a445219 100644 --- a/config/clusters/leap/common.values.yaml +++ b/config/clusters/leap/common.values.yaml @@ -13,11 +13,6 @@ basehub: prePuller: continuous: enabled: true - # Extra images to be pulled on all nodes - extraImages: - tensorflow-image: - name: pangeo/ml-notebook - tag: "ebeb9dd" custom: # Extra mount point for admins to access to all users' home dirs # Ref https://github.com/2i2c-org/infrastructure/issues/2105 From fcb1d454d07fa812cea2804e7aa0993bd7dd7795 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 16:25:45 +0100 Subject: [PATCH 4/9] leap: disable pre pulling as a image choice list is used --- config/clusters/leap/common.values.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml index 27a445219..1cf9c7066 100644 --- a/config/clusters/leap/common.values.yaml +++ b/config/clusters/leap/common.values.yaml @@ -10,9 +10,6 @@ basehub: # Name of Google Filestore share baseShareName: /homes/ jupyterhub: - prePuller: - continuous: - enabled: true custom: # Extra mount point for admins to access to all users' home dirs # Ref https://github.com/2i2c-org/infrastructure/issues/2105 From eda338dbf9b74e4d8151da914082603e3e0bc8e2 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 16:43:43 +0100 Subject: [PATCH 5/9] leap: remove maintenance announcement --- config/clusters/leap/common.values.yaml | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml index 1cf9c7066..cb459bc29 100644 --- a/config/clusters/leap/common.values.yaml +++ b/config/clusters/leap/common.values.yaml @@ -52,21 +52,18 @@ basehub: # Announcement is a JupyterHub feature to present messages to users in # pages under the /hub path, but not via the /user. # - # This specific maintenance announcement was requested via - # https://2i2c.freshdesk.com/a/tickets/525. - # # ref: https://github.com/2i2c-org/infrastructure/issues/1501 # ref: https://jupyterhub.readthedocs.io/en/stable/reference/templates.html#announcement-configuration-variables # - template_vars: - announcement: >- - - Service maintenance is scheduled Sunday March 12, to Monday 8AM - EST. - -
- Running servers may be forcefully stopped and service disruption - is expected. + # template_vars: + # announcement: >- + # + # Service maintenance is scheduled Sunday March 12, to Monday 8AM + # EST. + # + #
+ # Running servers may be forcefully stopped and service disruption + # is expected. GitHubOAuthenticator: populate_teams_in_auth_state: true allowed_organizations: From fe878fbd1932ef9b5fbe4a28ea0b1967589ff184 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 16:44:35 +0100 Subject: [PATCH 6/9] leap: update authorized github teams to base- and full-access --- config/clusters/leap/common.values.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml index cb459bc29..703d5dd4f 100644 --- a/config/clusters/leap/common.values.yaml +++ b/config/clusters/leap/common.values.yaml @@ -67,7 +67,8 @@ basehub: GitHubOAuthenticator: populate_teams_in_auth_state: true allowed_organizations: - - leap-stc:leap-pangeo-users + - leap-stc:leap-pangeo-base-access + - leap-stc:leap-pangeo-full-access - 2i2c-org:hub-access-for-2i2c-staff scope: - read:org From 3f7a9d1d161c1ef69060726ca8bc77cf1f106dc8 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 16:45:36 +0100 Subject: [PATCH 7/9] leap: update to use node sharing --- config/clusters/leap/common.values.yaml | 170 +++++++++++++++++------- 1 file changed, 121 insertions(+), 49 deletions(-) diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml index 703d5dd4f..1818f5728 100644 --- a/config/clusters/leap/common.values.yaml +++ b/config/clusters/leap/common.values.yaml @@ -87,23 +87,96 @@ basehub: # Can be removed once https://github.com/2i2c-org/infrastructure/pull/2160 is merged JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp" profileList: - # The mem-guarantees are here so k8s doesn't schedule other pods - # on these nodes. They need to be just under total allocatable - # RAM on a node, not total node capacity. Values calculated using - # https://learnk8s.io/kubernetes-instance-calculator - - display_name: "Small" - description: 5GB RAM, 2 CPUs + # NOTE: About node sharing + # + # CPU/Memory requests/limits are actively considered still. This + # profile list is setup to involve node sharing as considered in + # https://github.com/2i2c-org/infrastructure/issues/2121. + # + # - Memory requests are lower than the description, with a factor + # of (node_max_mem - 4GB) / node_max_mem. + # - CPU requests are lower than the description, with a factor of + # 10%. + # + # NOTE: This is not a standard node sharing setup, don't copy it! + # + # LEAP has explicitly requested the following adjustments from the + # standardized "medium" instance. + # + # - The standardized "medium" instance is declared twice, once for + # a github team with base access, and once for a github team with + # full access. + # - Limits for CPU and Memory are set to match the node share + # description. Expected consequences of this is: significant CPU + # under-utilization, OOMKilled processes when limit is reached, no + # pod evictions due to node memory pressure. + # + # FIXME: Erik advocates putting a CPU limit is reconsidered, as its seen + # as almost as a pure loss: + # + # - The only win I think of is an possible edge case related to + # inefficient spread CPU workload of workload across CPU cores + # that can only be partially used. + # - The losses I think of are: less performance for users overall, + # fewer users per node without being constrained by the node's + # actual capacity, more frequent need to startup new nodes which + # causes more frequent need to wait on startups, increased cost, + # wasted energy. + # + - display_name: "Medium: up to 16 CPU / 128 GB RAM" + description: &profile_list_description "Start a container limited to a chosen share of capacity on a node of this type" + slug: medium-full default: true allowed_teams: - - leap-stc:leap-pangeo-users - 2i2c-org:hub-access-for-2i2c-staff - kubespawner_override: - mem_limit: 7G - mem_guarantee: 4.5G - node_selector: - node.kubernetes.io/instance-type: n1-standard-2 - profile_options: &profile_options - image: + - leap-stc:leap-pangeo-full-access + profile_options: + requests: + # NOTE: Node share choices are in active development, see comment + # next to profileList: above. + # + # This specific setup is not a standard node sharing setup, + # don't copy it! + # + display_name: Node share + choices: + mem_8: &medium_mem_8 + display_name: ~8 GB, ~1.0 CPU + default: true + kubespawner_override: + mem_guarantee: 7.75G + cpu_guarantee: 0.1 + mem_limit: 8G + cpu_limit: 1 + mem_16: &medium_mem_16 + display_name: ~16 GB, ~2.0 CPU + kubespawner_override: + mem_guarantee: 15.5G + cpu_guarantee: 0.2 + mem_limit: 16G + cpu_limit: 2 + mem_32: &medium_mem_32 + display_name: ~32 GB, ~4.0 CPU + kubespawner_override: + mem_guarantee: 31.0G + cpu_guarantee: 0.4 + mem_limit: 32G + cpu_limit: 4 + mem_64: + display_name: ~64 GB, ~8.0 CPU + kubespawner_override: + mem_guarantee: 62.0G + cpu_guarantee: 0.8 + mem_limit: 64G + cpu_limit: 8 + mem_128: + display_name: ~128 GB, ~16.0 CPU + kubespawner_override: + mem_guarantee: 124.0G + cpu_guarantee: 1.6 + mem_limit: 128G + cpu_limit: 16 + image: &profile_list_profile_options_image display_name: Image choices: pangeo: @@ -127,46 +200,44 @@ basehub: slug: "leap_edu" kubespawner_override: image: "quay.io/jbusecke/leap-edu-image:fa442ab4851c" - - display_name: Medium - description: 11GB RAM, 4 CPUs - allowed_teams: - - leap-stc:leap-pangeo-users - - 2i2c-org:hub-access-for-2i2c-staff - profile_options: *profile_options - kubespawner_override: - mem_limit: 15G - mem_guarantee: 11G + kubespawner_override: &medium_kubespawner_override + cpu_limit: null + mem_limit: null node_selector: - node.kubernetes.io/instance-type: n1-standard-4 - - display_name: Large - description: 24GB RAM, 8 CPUs - allowed_teams: - - leap-stc:leap-pangeo-education - - leap-stc:leap-pangeo-research - - 2i2c-org:hub-access-for-2i2c-staff - kubespawner_override: - mem_limit: 30G - mem_guarantee: 24G - node_selector: - node.kubernetes.io/instance-type: n1-standard-8 - profile_options: *profile_options - - display_name: Huge - description: 52GB RAM, 16 CPUs + node.kubernetes.io/instance-type: n2-highmem-16 + + # NOTE: This is the second medium profile list entry, with less node + # share options for a different subset of users via the basehub + # specific allowed_teams configuration. + # + - display_name: "Medium: up to 16 CPU / 128 GB RAM" + description: *profile_list_description + slug: medium-base + default: true allowed_teams: - - leap-stc:leap-pangeo-research - - 2i2c-org:hub-access-for-2i2c-staff - kubespawner_override: - mem_limit: 60G - mem_guarantee: 52G - node_selector: - node.kubernetes.io/instance-type: n1-standard-16 - profile_options: *profile_options - - display_name: Large + GPU + - leap-stc:leap-pangeo-base-access + profile_options: + requests: + # NOTE: Node share choices are in active development, see comment + # next to profileList: above. + # + # This specific setup is not a standard node sharing setup, + # don't copy it! + # + display_name: Node share + choices: + mem_8: *medium_mem_8 + mem_16: *medium_mem_16 + mem_32: *medium_mem_32 + image: *profile_list_profile_options_image + kubespawner_override: *medium_kubespawner_override + + - display_name: GPU slug: gpu - description: 24GB RAM, 8 CPUs + description: NVIDIA Tesla T4, 24GB RAM, 8 CPUs allowed_teams: - - leap-stc:leap-pangeo-research - 2i2c-org:hub-access-for-2i2c-staff + - leap-stc:leap-pangeo-full-access profile_options: image: display_name: Image @@ -189,6 +260,7 @@ basehub: mem_guarantee: 24G extra_resource_limits: nvidia.com/gpu: "1" + dask-gateway: gateway: backend: From d13adbf12521dffcfd8c5db430519ade24af600d Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 19:21:34 +0100 Subject: [PATCH 8/9] leap: remove redeundant workaround to forge ServerApp --- config/clusters/leap/common.values.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml index 1818f5728..a0ed5272a 100644 --- a/config/clusters/leap/common.values.yaml +++ b/config/clusters/leap/common.values.yaml @@ -82,10 +82,6 @@ basehub: # Temporarily set for *all* pods, including pods without any GPUs, # to work around https://github.com/2i2c-org/infrastructure/issues/1530 NVIDIA_DRIVER_CAPABILITIES: compute,utility - # Latest version of JupyterLab no longer works with NotebookApp, requires ServerApp. - # Latest version of the pangeo image requires this now. - # Can be removed once https://github.com/2i2c-org/infrastructure/pull/2160 is merged - JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp" profileList: # NOTE: About node sharing # From 1e88becaa23277fb0cd0dd161b7c014fadd75773 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Sun, 12 Mar 2023 20:57:45 +0100 Subject: [PATCH 9/9] leap: adjust profile list mem requests based on allocatable memory --- config/clusters/leap/common.values.yaml | 31 +++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml index a0ed5272a..9858939a8 100644 --- a/config/clusters/leap/common.values.yaml +++ b/config/clusters/leap/common.values.yaml @@ -89,8 +89,10 @@ basehub: # profile list is setup to involve node sharing as considered in # https://github.com/2i2c-org/infrastructure/issues/2121. # - # - Memory requests are lower than the description, with a factor - # of (node_max_mem - 4GB) / node_max_mem. + # - Memory requests are different from the description, based on: + # whats found to remain allocate in k8s, subtracting 1GiB + # overhead for misc system pods, and transitioning from GB in + # description to GiB in mem_guarantee. # - CPU requests are lower than the description, with a factor of # 10%. # @@ -136,39 +138,54 @@ basehub: # display_name: Node share choices: + # mem_1: + # display_name: ~1 GB, ~0.125 CPU + # kubespawner_override: + # mem_guarantee: 0.903G + # cpu_guarantee: 0.013 + # mem_2: + # display_name: ~2 GB, ~0.25 CPU + # kubespawner_override: + # mem_guarantee: 1.805G + # cpu_guarantee: 0.025 + # mem_4: + # display_name: ~4 GB, ~0.5 CPU + # kubespawner_override: + # mem_guarantee: 3.611G + # cpu_guarantee: 0.05 mem_8: &medium_mem_8 display_name: ~8 GB, ~1.0 CPU default: true kubespawner_override: - mem_guarantee: 7.75G + mem_guarantee: 7.222G cpu_guarantee: 0.1 mem_limit: 8G cpu_limit: 1 mem_16: &medium_mem_16 display_name: ~16 GB, ~2.0 CPU kubespawner_override: - mem_guarantee: 15.5G + mem_guarantee: 14.444G cpu_guarantee: 0.2 mem_limit: 16G cpu_limit: 2 mem_32: &medium_mem_32 display_name: ~32 GB, ~4.0 CPU kubespawner_override: - mem_guarantee: 31.0G + mem_guarantee: 28.887G cpu_guarantee: 0.4 mem_limit: 32G cpu_limit: 4 mem_64: display_name: ~64 GB, ~8.0 CPU kubespawner_override: - mem_guarantee: 62.0G + mem_guarantee: 57.775G cpu_guarantee: 0.8 mem_limit: 64G cpu_limit: 8 mem_128: display_name: ~128 GB, ~16.0 CPU kubespawner_override: - mem_guarantee: 124.0G + mem_guarantee: 115.549G cpu_guarantee: 1.6 mem_limit: 128G cpu_limit: 16