From 64da264a3292a16a99e739fc1dc12b8d373d5135 Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 16:10:20 +0100
Subject: [PATCH 1/9] terraform, gcp: add variable for cluster and node pools
 k8s version

---
 terraform/gcp/cluster.tf   | 41 ++++++++++++++++++++++++++++++++++----
 terraform/gcp/variables.tf | 31 ++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf
index 2e1e4d8db..398a01d26 100644
--- a/terraform/gcp/cluster.tf
+++ b/terraform/gcp/cluster.tf
@@ -1,9 +1,34 @@
+# This data resource and output provides information on the latest available k8s
+# versions in GCP's regular release channel. This can be used when specifying
+# versions to upgrade to via the k8s_versions variable.
+#
+# To get get the output of relevance, run:
+#
+#   terraform plan -var-file=projects/$CLUSTER_NAME.tfvars
+#   terraform output regular_channel_latest_k8s_versions
+#
+# data ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_engine_versions
+data "google_container_engine_versions" "k8s_version_prefixes" {
+  project  = var.project_id
+  location = var.zone
+
+  for_each = var.k8s_version_prefixes
+  version_prefix = each.value
+}
+output "regular_channel_latest_k8s_versions" {
+  value = {
+    for k, v in data.google_container_engine_versions.k8s_version_prefixes : k => v.release_channel_latest_version["REGULAR"]
+  }
+}
+
+# resource ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_service_account
 resource "google_service_account" "cluster_sa" {
   account_id   = "${var.prefix}-cluster-sa"
   display_name = "Service account used by nodes of cluster ${var.prefix}"
   project      = var.project_id
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_project_iam#google_project_iam_member
 resource "google_project_iam_member" "cluster_sa_roles" {
   # https://cloud.google.com/kubernetes-engine/docs/how-to/hardening-your-cluster
   # has information on why the cluster SA needs these rights
@@ -20,14 +45,16 @@ resource "google_project_iam_member" "cluster_sa_roles" {
   member  = "serviceAccount:${google_service_account.cluster_sa.email}"
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_container_cluster
 resource "google_container_cluster" "cluster" {
   # Setting cluster autoscaling profile is in google-beta
   provider = google-beta
 
-  name           = "${var.prefix}-cluster"
-  location       = var.regional_cluster ? var.region : var.zone
-  node_locations = var.regional_cluster ? [var.zone] : null
-  project        = var.project_id
+  name               = "${var.prefix}-cluster"
+  location           = var.regional_cluster ? var.region : var.zone
+  node_locations     = var.regional_cluster ? [var.zone] : null
+  project            = var.project_id
+  min_master_version = var.k8s_versions.min_master_version
 
   initial_node_count       = 1
   remove_default_node_pool = true
@@ -150,11 +177,13 @@ resource "google_container_cluster" "cluster" {
   resource_labels = {}
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool
 resource "google_container_node_pool" "core" {
   name     = "core-pool"
   cluster  = google_container_cluster.cluster.name
   project  = google_container_cluster.cluster.project
   location = google_container_cluster.cluster.location
+  version  = var.k8s_versions.core_nodes_version
 
 
   initial_node_count = 1
@@ -205,11 +234,13 @@ resource "google_container_node_pool" "core" {
   }
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool
 resource "google_container_node_pool" "notebook" {
   name     = "nb-${each.key}"
   cluster  = google_container_cluster.cluster.name
   project  = google_container_cluster.cluster.project
   location = google_container_cluster.cluster.location
+  version  = var.k8s_versions.notebook_nodes_version
 
   for_each = var.notebook_nodes
 
@@ -299,11 +330,13 @@ resource "google_container_node_pool" "notebook" {
   }
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool
 resource "google_container_node_pool" "dask_worker" {
   name     = "dask-${each.key}"
   cluster  = google_container_cluster.cluster.name
   project  = google_container_cluster.cluster.project
   location = google_container_cluster.cluster.location
+  version  = var.k8s_versions.dask_nodes_version
 
   # Default to same config as notebook nodepools config
   for_each = var.dask_nodes
diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf
index 1ad3ba451..2b4456928 100644
--- a/terraform/gcp/variables.tf
+++ b/terraform/gcp/variables.tf
@@ -23,6 +23,37 @@ variable "project_id" {
   EOT
 }
 
+variable "k8s_version_prefixes" {
+  type        = set(string)
+  default     = [
+    "1.22.",
+    "1.23.",
+    "1.24.",
+    "1.25.",
+    "1.",
+  ]
+  description = <<-EOT
+  A list of k8s version prefixes that can be evaluated to their latest version by
+  the output defined in cluster.tf called regular_channel_latest_k8s_versions.
+  EOT
+}
+
+variable "k8s_versions" {
+  type        = object({
+    min_master_version: optional(string, null),
+    core_nodes_version: optional(string, null),
+    notebook_nodes_version: optional(string, null),
+    dask_nodes_version: optional(string, null),
+  })
+  default     = {}
+  description = <<-EOT
+  Configuration of the k8s cluster's version and node pools' versions. To specify these
+
+  - min_master_nodes is passthrough configuration of google_container_cluster's min_master_version, documented in https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_cluster#min_master_version
+  - [core|notebook|dask]_nodes_version is passthrough configuration of container_node_pool's version, documented in https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool#version
+  EOT
+}
+
 variable "notebook_nodes" {
   type        = map(object({ min : number, max : number, machine_type : string, labels : map(string), gpu : object({ enabled : bool, type : string, count : number }) }))
   description = "Notebook node pools to create"

From 79254fb65f693ed863842edc7ca0688123e70a5d Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 16:12:03 +0100
Subject: [PATCH 2/9] leap: update to k8s 1.25 and a shared highmem node setup

---
 terraform/gcp/projects/leap.tfvars | 86 +++++-------------------------
 1 file changed, 14 insertions(+), 72 deletions(-)

diff --git a/terraform/gcp/projects/leap.tfvars b/terraform/gcp/projects/leap.tfvars
index f1e295358..f36547f3f 100644
--- a/terraform/gcp/projects/leap.tfvars
+++ b/terraform/gcp/projects/leap.tfvars
@@ -1,9 +1,15 @@
 prefix                 = "leap"
 project_id             = "leap-pangeo"
-core_node_machine_type = "n1-highmem-4"
+# core_node_machine_type is set to n2-highmem-4 instead of n2-highmem-2 because
+# prometheus requires more memory than a n2-highmem-2 can provide.
+core_node_machine_type = "n2-highmem-4"
 
-# No need for this to be a private cluster, public ones are cheaper
-enable_private_cluster = false
+k8s_versions = {
+  min_master_version: "1.25.6-gke.1000",
+  core_nodes_version: "1.25.6-gke.1000",
+  notebook_nodes_version: "1.25.6-gke.1000",
+  dask_nodes_version: "1.25.6-gke.1000",
+}
 
 # GPUs not available in us-central1-b
 zone             = "us-central1-c"
@@ -48,43 +54,12 @@ hub_cloud_permissions = {
 
 # Setup notebook node pools
 notebook_nodes = {
-  "small" : {
-    min : 0,
-    max : 100,
-    machine_type : "n1-standard-2",
-    labels : {},
-    gpu : {
-      enabled : false,
-      type : "",
-      count : 0
-    }
-  },
   "medium" : {
-    min : 0,
+    # A minimum of one is configured for LEAP to ensure quick startups at all
+    # time. Cost is not a greater concern than optimizing startup times.
+    min : 1,
     max : 100,
-    machine_type : "n1-standard-4",
-    labels : {},
-    gpu : {
-      enabled : false,
-      type : "",
-      count : 0
-    }
-  },
-  "large" : {
-    min : 0,
-    max : 100,
-    machine_type : "n1-standard-8",
-    labels : {},
-    gpu : {
-      enabled : false,
-      type : "",
-      count : 0
-    }
-  },
-  "huge" : {
-    min : 0,
-    max : 100,
-    machine_type : "n1-standard-16",
+    machine_type : "n2-highmem-16",
     labels : {},
     gpu : {
       enabled : false,
@@ -106,43 +81,10 @@ notebook_nodes = {
 }
 
 dask_nodes = {
-  "small" : {
-    min : 0,
-    max : 200,
-    machine_type : "n1-highmem-2",
-    labels : {},
-    gpu : {
-      enabled : false,
-      type : "",
-      count : 0
-    }
-  },
   "medium" : {
     min : 0,
     max : 200,
-    machine_type : "n1-highmem-4",
-    labels : {},
-    gpu : {
-      enabled : false,
-      type : "",
-      count : 0
-    }
-  },
-  "large" : {
-    min : 0,
-    max : 200,
-    machine_type : "n1-highmem-8",
-    labels : {},
-    gpu : {
-      enabled : false,
-      type : "",
-      count : 0
-    }
-  },
-  "huge" : {
-    min : 0,
-    max : 200,
-    machine_type : "n1-highmem-16",
+    machine_type : "n2-highmem-16",
     labels : {},
     gpu : {
       enabled : false,

From a93926a037c84c3613b1c70ce036213418941874 Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 16:24:03 +0100
Subject: [PATCH 3/9] leap: stop pulling the tensorflow image (slows down other
 startups)

This was an optimization for a workshop where I assume nodes were
pre-started and made sense for that, but would slow down startup on new
nodes if not this specific image was requested on a non-pre-warmed node
after the workshop.

Since the workshop has ended, we can safely remove this now.

Related tickets:
- Workshop: https://2i2c.freshdesk.com/a/tickets/349
- Slow startup discussed: https://2i2c.freshdesk.com/a/tickets/435
---
 config/clusters/leap/common.values.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml
index 8b1d3c76b..27a445219 100644
--- a/config/clusters/leap/common.values.yaml
+++ b/config/clusters/leap/common.values.yaml
@@ -13,11 +13,6 @@ basehub:
     prePuller:
       continuous:
         enabled: true
-      # Extra images to be pulled on all nodes
-      extraImages:
-        tensorflow-image:
-          name: pangeo/ml-notebook
-          tag: "ebeb9dd"
     custom:
       # Extra mount point for admins to access to all users' home dirs
       # Ref https://github.com/2i2c-org/infrastructure/issues/2105

From fcb1d454d07fa812cea2804e7aa0993bd7dd7795 Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 16:25:45 +0100
Subject: [PATCH 4/9] leap: disable pre pulling as a image choice list is used

---
 config/clusters/leap/common.values.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml
index 27a445219..1cf9c7066 100644
--- a/config/clusters/leap/common.values.yaml
+++ b/config/clusters/leap/common.values.yaml
@@ -10,9 +10,6 @@ basehub:
       # Name of Google Filestore share
       baseShareName: /homes/
   jupyterhub:
-    prePuller:
-      continuous:
-        enabled: true
     custom:
       # Extra mount point for admins to access to all users' home dirs
       # Ref https://github.com/2i2c-org/infrastructure/issues/2105

From eda338dbf9b74e4d8151da914082603e3e0bc8e2 Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 16:43:43 +0100
Subject: [PATCH 5/9] leap: remove maintenance announcement

---
 config/clusters/leap/common.values.yaml | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml
index 1cf9c7066..cb459bc29 100644
--- a/config/clusters/leap/common.values.yaml
+++ b/config/clusters/leap/common.values.yaml
@@ -52,21 +52,18 @@ basehub:
           # Announcement is a JupyterHub feature to present messages to users in
           # pages under the /hub path, but not via the /user.
           #
-          # This specific maintenance announcement was requested via
-          # https://2i2c.freshdesk.com/a/tickets/525.
-          #
           # ref: https://github.com/2i2c-org/infrastructure/issues/1501
           # ref: https://jupyterhub.readthedocs.io/en/stable/reference/templates.html#announcement-configuration-variables
           #
-          template_vars:
-            announcement: >-
-              <strong>
-              Service maintenance is scheduled Sunday March 12, to Monday 8AM
-              EST.
-              </strong>
-              <br/>
-              Running servers may be forcefully stopped and service disruption
-              is expected.
+          # template_vars:
+          #   announcement: >-
+          #     <strong>
+          #     Service maintenance is scheduled Sunday March 12, to Monday 8AM
+          #     EST.
+          #     </strong>
+          #     <br/>
+          #     Running servers may be forcefully stopped and service disruption
+          #     is expected.
         GitHubOAuthenticator:
           populate_teams_in_auth_state: true
           allowed_organizations:

From fe878fbd1932ef9b5fbe4a28ea0b1967589ff184 Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 16:44:35 +0100
Subject: [PATCH 6/9] leap: update authorized github teams to base- and
 full-access

---
 config/clusters/leap/common.values.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml
index cb459bc29..703d5dd4f 100644
--- a/config/clusters/leap/common.values.yaml
+++ b/config/clusters/leap/common.values.yaml
@@ -67,7 +67,8 @@ basehub:
         GitHubOAuthenticator:
           populate_teams_in_auth_state: true
           allowed_organizations:
-            - leap-stc:leap-pangeo-users
+            - leap-stc:leap-pangeo-base-access
+            - leap-stc:leap-pangeo-full-access
             - 2i2c-org:hub-access-for-2i2c-staff
           scope:
             - read:org

From 3f7a9d1d161c1ef69060726ca8bc77cf1f106dc8 Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 16:45:36 +0100
Subject: [PATCH 7/9] leap: update to use node sharing

---
 config/clusters/leap/common.values.yaml | 170 +++++++++++++++++-------
 1 file changed, 121 insertions(+), 49 deletions(-)

diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml
index 703d5dd4f..1818f5728 100644
--- a/config/clusters/leap/common.values.yaml
+++ b/config/clusters/leap/common.values.yaml
@@ -87,23 +87,96 @@ basehub:
         # Can be removed once https://github.com/2i2c-org/infrastructure/pull/2160 is merged
         JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp"
       profileList:
-        # The mem-guarantees are here so k8s doesn't schedule other pods
-        # on these nodes. They need to be just under total allocatable
-        # RAM on a node, not total node capacity. Values calculated using
-        # https://learnk8s.io/kubernetes-instance-calculator
-        - display_name: "Small"
-          description: 5GB RAM, 2 CPUs
+        # NOTE: About node sharing
+        #
+        #       CPU/Memory requests/limits are actively considered still. This
+        #       profile list is setup to involve node sharing as considered in
+        #       https://github.com/2i2c-org/infrastructure/issues/2121.
+        #
+        #       - Memory requests are lower than the description, with a factor
+        #         of (node_max_mem - 4GB) / node_max_mem.
+        #       - CPU requests are lower than the description, with a factor of
+        #         10%.
+        #
+        # NOTE: This is not a standard node sharing setup, don't copy it!
+        #
+        #       LEAP has explicitly requested the following adjustments from the
+        #       standardized "medium" instance.
+        #
+        #       - The standardized "medium" instance is declared twice, once for
+        #         a github team with base access, and once for a github team with
+        #         full access.
+        #       - Limits for CPU and Memory are set to match the node share
+        #         description. Expected consequences of this is: significant CPU
+        #         under-utilization, OOMKilled processes when limit is reached, no
+        #         pod evictions due to node memory pressure.
+        #
+        # FIXME: Erik advocates putting a CPU limit is reconsidered, as its seen
+        #        as almost as a pure loss:
+        #
+        #        - The only win I think of is an possible edge case related to
+        #          inefficient spread CPU workload of workload across CPU cores
+        #          that can only be partially used.
+        #        - The losses I think of are: less performance for users overall,
+        #          fewer users per node without being constrained by the node's
+        #          actual capacity, more frequent need to startup new nodes which
+        #          causes more frequent need to wait on startups, increased cost,
+        #          wasted energy.
+        #
+        - display_name: "Medium: up to 16 CPU / 128 GB RAM"
+          description: &profile_list_description "Start a container limited to a chosen share of capacity on a node of this type"
+          slug: medium-full
           default: true
           allowed_teams:
-            - leap-stc:leap-pangeo-users
             - 2i2c-org:hub-access-for-2i2c-staff
-          kubespawner_override:
-            mem_limit: 7G
-            mem_guarantee: 4.5G
-            node_selector:
-              node.kubernetes.io/instance-type: n1-standard-2
-          profile_options: &profile_options
-            image:
+            - leap-stc:leap-pangeo-full-access
+          profile_options:
+            requests:
+              # NOTE: Node share choices are in active development, see comment
+              #       next to profileList: above.
+              #
+              #       This specific setup is not a standard node sharing setup,
+              #       don't copy it!
+              #
+              display_name: Node share
+              choices:
+                mem_8: &medium_mem_8
+                  display_name: ~8 GB, ~1.0 CPU
+                  default: true
+                  kubespawner_override:
+                    mem_guarantee: 7.75G
+                    cpu_guarantee: 0.1
+                    mem_limit: 8G
+                    cpu_limit: 1
+                mem_16: &medium_mem_16
+                  display_name: ~16 GB, ~2.0 CPU
+                  kubespawner_override:
+                    mem_guarantee: 15.5G
+                    cpu_guarantee: 0.2
+                    mem_limit: 16G
+                    cpu_limit: 2
+                mem_32: &medium_mem_32
+                  display_name: ~32 GB, ~4.0 CPU
+                  kubespawner_override:
+                    mem_guarantee: 31.0G
+                    cpu_guarantee: 0.4
+                    mem_limit: 32G
+                    cpu_limit: 4
+                mem_64:
+                  display_name: ~64 GB, ~8.0 CPU
+                  kubespawner_override:
+                    mem_guarantee: 62.0G
+                    cpu_guarantee: 0.8
+                    mem_limit: 64G
+                    cpu_limit: 8
+                mem_128:
+                  display_name: ~128 GB, ~16.0 CPU
+                  kubespawner_override:
+                    mem_guarantee: 124.0G
+                    cpu_guarantee: 1.6
+                    mem_limit: 128G
+                    cpu_limit: 16
+            image: &profile_list_profile_options_image
               display_name: Image
               choices:
                 pangeo:
@@ -127,46 +200,44 @@ basehub:
                   slug: "leap_edu"
                   kubespawner_override:
                     image: "quay.io/jbusecke/leap-edu-image:fa442ab4851c"
-        - display_name: Medium
-          description: 11GB RAM, 4 CPUs
-          allowed_teams:
-            - leap-stc:leap-pangeo-users
-            - 2i2c-org:hub-access-for-2i2c-staff
-          profile_options: *profile_options
-          kubespawner_override:
-            mem_limit: 15G
-            mem_guarantee: 11G
+          kubespawner_override: &medium_kubespawner_override
+            cpu_limit: null
+            mem_limit: null
             node_selector:
-              node.kubernetes.io/instance-type: n1-standard-4
-        - display_name: Large
-          description: 24GB RAM, 8 CPUs
-          allowed_teams:
-            - leap-stc:leap-pangeo-education
-            - leap-stc:leap-pangeo-research
-            - 2i2c-org:hub-access-for-2i2c-staff
-          kubespawner_override:
-            mem_limit: 30G
-            mem_guarantee: 24G
-            node_selector:
-              node.kubernetes.io/instance-type: n1-standard-8
-          profile_options: *profile_options
-        - display_name: Huge
-          description: 52GB RAM, 16 CPUs
+              node.kubernetes.io/instance-type: n2-highmem-16
+
+        # NOTE: This is the second medium profile list entry, with less node
+        #       share options for a different subset of users via the basehub
+        #       specific allowed_teams configuration.
+        #
+        - display_name: "Medium: up to 16 CPU / 128 GB RAM"
+          description: *profile_list_description
+          slug: medium-base
+          default: true
           allowed_teams:
-            - leap-stc:leap-pangeo-research
-            - 2i2c-org:hub-access-for-2i2c-staff
-          kubespawner_override:
-            mem_limit: 60G
-            mem_guarantee: 52G
-            node_selector:
-              node.kubernetes.io/instance-type: n1-standard-16
-          profile_options: *profile_options
-        - display_name: Large + GPU
+            - leap-stc:leap-pangeo-base-access
+          profile_options:
+            requests:
+              # NOTE: Node share choices are in active development, see comment
+              #       next to profileList: above.
+              #
+              #       This specific setup is not a standard node sharing setup,
+              #       don't copy it!
+              #
+              display_name: Node share
+              choices:
+                mem_8: *medium_mem_8
+                mem_16: *medium_mem_16
+                mem_32: *medium_mem_32
+            image: *profile_list_profile_options_image
+          kubespawner_override: *medium_kubespawner_override
+
+        - display_name: GPU
           slug: gpu
-          description: 24GB RAM, 8 CPUs
+          description: NVIDIA Tesla T4, 24GB RAM, 8 CPUs
           allowed_teams:
-            - leap-stc:leap-pangeo-research
             - 2i2c-org:hub-access-for-2i2c-staff
+            - leap-stc:leap-pangeo-full-access
           profile_options:
             image:
               display_name: Image
@@ -189,6 +260,7 @@ basehub:
             mem_guarantee: 24G
             extra_resource_limits:
               nvidia.com/gpu: "1"
+
 dask-gateway:
   gateway:
     backend:

From d13adbf12521dffcfd8c5db430519ade24af600d Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 19:21:34 +0100
Subject: [PATCH 8/9] leap: remove redeundant workaround to forge ServerApp

---
 config/clusters/leap/common.values.yaml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml
index 1818f5728..a0ed5272a 100644
--- a/config/clusters/leap/common.values.yaml
+++ b/config/clusters/leap/common.values.yaml
@@ -82,10 +82,6 @@ basehub:
         # Temporarily set for *all* pods, including pods without any GPUs,
         # to work around https://github.com/2i2c-org/infrastructure/issues/1530
         NVIDIA_DRIVER_CAPABILITIES: compute,utility
-        # Latest version of JupyterLab no longer works with NotebookApp, requires ServerApp.
-        # Latest version of the pangeo image requires this now.
-        # Can be removed once https://github.com/2i2c-org/infrastructure/pull/2160 is merged
-        JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp"
       profileList:
         # NOTE: About node sharing
         #

From 1e88becaa23277fb0cd0dd161b7c014fadd75773 Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 12 Mar 2023 20:57:45 +0100
Subject: [PATCH 9/9] leap: adjust profile list mem requests based on
 allocatable memory

---
 config/clusters/leap/common.values.yaml | 31 +++++++++++++++++++------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml
index a0ed5272a..9858939a8 100644
--- a/config/clusters/leap/common.values.yaml
+++ b/config/clusters/leap/common.values.yaml
@@ -89,8 +89,10 @@ basehub:
         #       profile list is setup to involve node sharing as considered in
         #       https://github.com/2i2c-org/infrastructure/issues/2121.
         #
-        #       - Memory requests are lower than the description, with a factor
-        #         of (node_max_mem - 4GB) / node_max_mem.
+        #       - Memory requests are different from the description, based on:
+        #         whats found to remain allocate in k8s, subtracting 1GiB
+        #         overhead for misc system pods, and transitioning from GB in
+        #         description to GiB in mem_guarantee.
         #       - CPU requests are lower than the description, with a factor of
         #         10%.
         #
@@ -136,39 +138,54 @@ basehub:
               #
               display_name: Node share
               choices:
+                # mem_1:
+                #   display_name: ~1 GB, ~0.125 CPU
+                #   kubespawner_override:
+                #     mem_guarantee: 0.903G
+                #     cpu_guarantee: 0.013
+                # mem_2:
+                #   display_name: ~2 GB, ~0.25 CPU
+                #   kubespawner_override:
+                #     mem_guarantee: 1.805G
+                #     cpu_guarantee: 0.025
+                # mem_4:
+                #   display_name: ~4 GB, ~0.5 CPU
+                #   kubespawner_override:
+                #     mem_guarantee: 3.611G
+                #     cpu_guarantee: 0.05
                 mem_8: &medium_mem_8
                   display_name: ~8 GB, ~1.0 CPU
                   default: true
                   kubespawner_override:
-                    mem_guarantee: 7.75G
+                    mem_guarantee: 7.222G
                     cpu_guarantee: 0.1
                     mem_limit: 8G
                     cpu_limit: 1
                 mem_16: &medium_mem_16
                   display_name: ~16 GB, ~2.0 CPU
                   kubespawner_override:
-                    mem_guarantee: 15.5G
+                    mem_guarantee: 14.444G
                     cpu_guarantee: 0.2
                     mem_limit: 16G
                     cpu_limit: 2
                 mem_32: &medium_mem_32
                   display_name: ~32 GB, ~4.0 CPU
                   kubespawner_override:
-                    mem_guarantee: 31.0G
+                    mem_guarantee: 28.887G
                     cpu_guarantee: 0.4
                     mem_limit: 32G
                     cpu_limit: 4
                 mem_64:
                   display_name: ~64 GB, ~8.0 CPU
                   kubespawner_override:
-                    mem_guarantee: 62.0G
+                    mem_guarantee: 57.775G
                     cpu_guarantee: 0.8
                     mem_limit: 64G
                     cpu_limit: 8
                 mem_128:
                   display_name: ~128 GB, ~16.0 CPU
                   kubespawner_override:
-                    mem_guarantee: 124.0G
+                    mem_guarantee: 115.549G
                     cpu_guarantee: 1.6
                     mem_limit: 128G
                     cpu_limit: 16