2i2c-org · consideRatio · Mar 13, 2023 · Mar 12, 2023 · Mar 12, 2023 · Mar 12, 2023
diff --git a/config/clusters/leap/common.values.yaml b/config/clusters/leap/common.values.yaml
@@ -10,14 +10,6 @@ basehub:
       # Name of Google Filestore share
       baseShareName: /homes/
   jupyterhub:
-    prePuller:
-      continuous:
-        enabled: true
-      # Extra images to be pulled on all nodes
-      extraImages:
-        tensorflow-image:
-          name: pangeo/ml-notebook
-          tag: "ebeb9dd"
     custom:
       # Extra mount point for admins to access to all users' home dirs
       # Ref https://github.com/2i2c-org/infrastructure/issues/2105
@@ -60,25 +52,23 @@ basehub:
           # Announcement is a JupyterHub feature to present messages to users in
           # pages under the /hub path, but not via the /user.
           #
-          # This specific maintenance announcement was requested via
-          # https://2i2c.freshdesk.com/a/tickets/525.
-          #
           # ref: https://github.com/2i2c-org/infrastructure/issues/1501
           # ref: https://jupyterhub.readthedocs.io/en/stable/reference/templates.html#announcement-configuration-variables
           #
-          template_vars:
-            announcement: >-
-              <strong>
-              Service maintenance is scheduled Sunday March 12, to Monday 8AM
-              EST.
-              </strong>
-              <br/>
-              Running servers may be forcefully stopped and service disruption
-              is expected.
+          # template_vars:
+          #   announcement: >-
+          #     <strong>
+          #     Service maintenance is scheduled Sunday March 12, to Monday 8AM
+          #     EST.
+          #     </strong>
+          #     <br/>
+          #     Running servers may be forcefully stopped and service disruption
+          #     is expected.
         GitHubOAuthenticator:
           populate_teams_in_auth_state: true
           allowed_organizations:
-            - leap-stc:leap-pangeo-users
+            - leap-stc:leap-pangeo-base-access
+            - leap-stc:leap-pangeo-full-access
             - 2i2c-org:hub-access-for-2i2c-staff
           scope:
             - read:org
@@ -92,28 +82,114 @@ basehub:
         # Temporarily set for *all* pods, including pods without any GPUs,
         # to work around https://github.com/2i2c-org/infrastructure/issues/1530
         NVIDIA_DRIVER_CAPABILITIES: compute,utility
-        # Latest version of JupyterLab no longer works with NotebookApp, requires ServerApp.
-        # Latest version of the pangeo image requires this now.
-        # Can be removed once https://github.com/2i2c-org/infrastructure/pull/2160 is merged
-        JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp"
       profileList:
-        # The mem-guarantees are here so k8s doesn't schedule other pods
-        # on these nodes. They need to be just under total allocatable
-        # RAM on a node, not total node capacity. Values calculated using
-        # https://learnk8s.io/kubernetes-instance-calculator
-        - display_name: "Small"
-          description: 5GB RAM, 2 CPUs
+        # NOTE: About node sharing
+        #
+        #       CPU/Memory requests/limits are actively considered still. This
+        #       profile list is setup to involve node sharing as considered in
+        #       https://github.com/2i2c-org/infrastructure/issues/2121.
+        #
+        #       - Memory requests are different from the description, based on:
+        #         whats found to remain allocate in k8s, subtracting 1GiB
+        #         overhead for misc system pods, and transitioning from GB in
+        #         description to GiB in mem_guarantee.
+        #       - CPU requests are lower than the description, with a factor of
+        #         10%.
+        #
+        # NOTE: This is not a standard node sharing setup, don't copy it!
+        #
+        #       LEAP has explicitly requested the following adjustments from the
+        #       standardized "medium" instance.
+        #
+        #       - The standardized "medium" instance is declared twice, once for
+        #         a github team with base access, and once for a github team with
+        #         full access.
+        #       - Limits for CPU and Memory are set to match the node share
+        #         description. Expected consequences of this is: significant CPU
+        #         under-utilization, OOMKilled processes when limit is reached, no
+        #         pod evictions due to node memory pressure.
+        #
+        # FIXME: Erik advocates putting a CPU limit is reconsidered, as its seen
+        #        as almost as a pure loss:
+        #
+        #        - The only win I think of is an possible edge case related to
+        #          inefficient spread CPU workload of workload across CPU cores
+        #          that can only be partially used.
+        #        - The losses I think of are: less performance for users overall,
+        #          fewer users per node without being constrained by the node's
+        #          actual capacity, more frequent need to startup new nodes which
+        #          causes more frequent need to wait on startups, increased cost,
+        #          wasted energy.
+        #
+        - display_name: "Medium: up to 16 CPU / 128 GB RAM"
+          description: &profile_list_description "Start a container limited to a chosen share of capacity on a node of this type"
+          slug: medium-full
           default: true
           allowed_teams:
-            - leap-stc:leap-pangeo-users
             - 2i2c-org:hub-access-for-2i2c-staff
-          kubespawner_override:
-            mem_limit: 7G
-            mem_guarantee: 4.5G
-            node_selector:
-              node.kubernetes.io/instance-type: n1-standard-2
-          profile_options: &profile_options
-            image:
+            - leap-stc:leap-pangeo-full-access
+          profile_options:
+            requests:
+              # NOTE: Node share choices are in active development, see comment
+              #       next to profileList: above.
+              #
+              #       This specific setup is not a standard node sharing setup,
+              #       don't copy it!
+              #
+              display_name: Node share
+              choices:
+                # mem_1:
+                #   display_name: ~1 GB, ~0.125 CPU
+                #   kubespawner_override:
+                #     mem_guarantee: 0.903G
+                #     cpu_guarantee: 0.013
+                # mem_2:
+                #   display_name: ~2 GB, ~0.25 CPU
+                #   kubespawner_override:
+                #     mem_guarantee: 1.805G
+                #     cpu_guarantee: 0.025
+                # mem_4:
+                #   display_name: ~4 GB, ~0.5 CPU
+                #   kubespawner_override:
+                #     mem_guarantee: 3.611G
+                #     cpu_guarantee: 0.05
+                mem_8: &medium_mem_8
+                  display_name: ~8 GB, ~1.0 CPU
+                  default: true
+                  kubespawner_override:
+                    mem_guarantee: 7.222G
+                    cpu_guarantee: 0.1
+                    mem_limit: 8G
+                    cpu_limit: 1
+                mem_16: &medium_mem_16
+                  display_name: ~16 GB, ~2.0 CPU
+                  kubespawner_override:
+                    mem_guarantee: 14.444G
+                    cpu_guarantee: 0.2
+                    mem_limit: 16G
+                    cpu_limit: 2
+                mem_32: &medium_mem_32
+                  display_name: ~32 GB, ~4.0 CPU
+                  kubespawner_override:
+                    mem_guarantee: 28.887G
+                    cpu_guarantee: 0.4
+                    mem_limit: 32G
+                    cpu_limit: 4
+                mem_64:
+                  display_name: ~64 GB, ~8.0 CPU
+                  kubespawner_override:
+                    mem_guarantee: 57.775G
+                    cpu_guarantee: 0.8
+                    mem_limit: 64G
+                    cpu_limit: 8
+                mem_128:
+                  display_name: ~128 GB, ~16.0 CPU
+                  kubespawner_override:
+                    mem_guarantee: 115.549G
+                    cpu_guarantee: 1.6
+                    mem_limit: 128G
+                    cpu_limit: 16
+            image: &profile_list_profile_options_image
               display_name: Image
               choices:
                 pangeo:
@@ -137,46 +213,44 @@ basehub:
                   slug: "leap_edu"
                   kubespawner_override:
                     image: "quay.io/jbusecke/leap-edu-image:fa442ab4851c"
-        - display_name: Medium
-          description: 11GB RAM, 4 CPUs
-          allowed_teams:
-            - leap-stc:leap-pangeo-users
-            - 2i2c-org:hub-access-for-2i2c-staff
-          profile_options: *profile_options
-          kubespawner_override:
-            mem_limit: 15G
-            mem_guarantee: 11G
-            node_selector:
-              node.kubernetes.io/instance-type: n1-standard-4
-        - display_name: Large
-          description: 24GB RAM, 8 CPUs
-          allowed_teams:
-            - leap-stc:leap-pangeo-education
-            - leap-stc:leap-pangeo-research
-            - 2i2c-org:hub-access-for-2i2c-staff
-          kubespawner_override:
-            mem_limit: 30G
-            mem_guarantee: 24G
+          kubespawner_override: &medium_kubespawner_override
+            cpu_limit: null
+            mem_limit: null
             node_selector:
-              node.kubernetes.io/instance-type: n1-standard-8
-          profile_options: *profile_options
-        - display_name: Huge
-          description: 52GB RAM, 16 CPUs
+              node.kubernetes.io/instance-type: n2-highmem-16
+
+        # NOTE: This is the second medium profile list entry, with less node
+        #       share options for a different subset of users via the basehub
+        #       specific allowed_teams configuration.
+        #
+        - display_name: "Medium: up to 16 CPU / 128 GB RAM"
+          description: *profile_list_description
+          slug: medium-base
+          default: true
           allowed_teams:
-            - leap-stc:leap-pangeo-research
-            - 2i2c-org:hub-access-for-2i2c-staff
-          kubespawner_override:
-            mem_limit: 60G
-            mem_guarantee: 52G
-            node_selector:
-              node.kubernetes.io/instance-type: n1-standard-16
-          profile_options: *profile_options
-        - display_name: Large + GPU
+            - leap-stc:leap-pangeo-base-access
+          profile_options:
+            requests:
+              # NOTE: Node share choices are in active development, see comment
+              #       next to profileList: above.
+              #
+              #       This specific setup is not a standard node sharing setup,
+              #       don't copy it!
+              #
+              display_name: Node share
+              choices:
+                mem_8: *medium_mem_8
+                mem_16: *medium_mem_16
+                mem_32: *medium_mem_32
+            image: *profile_list_profile_options_image
+          kubespawner_override: *medium_kubespawner_override
+
+        - display_name: GPU
           slug: gpu
-          description: 24GB RAM, 8 CPUs
+          description: NVIDIA Tesla T4, 24GB RAM, 8 CPUs
           allowed_teams:
-            - leap-stc:leap-pangeo-research
             - 2i2c-org:hub-access-for-2i2c-staff
+            - leap-stc:leap-pangeo-full-access
           profile_options:
             image:
               display_name: Image
@@ -199,6 +273,7 @@ basehub:
             mem_guarantee: 24G
             extra_resource_limits:
               nvidia.com/gpu: "1"
+
 dask-gateway:
   gateway:
     backend:

diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf
@@ -1,9 +1,34 @@
+# This data resource and output provides information on the latest available k8s
+# versions in GCP's regular release channel. This can be used when specifying
+# versions to upgrade to via the k8s_versions variable.
+#
+# To get get the output of relevance, run:
+#
+#   terraform plan -var-file=projects/$CLUSTER_NAME.tfvars
+#   terraform output regular_channel_latest_k8s_versions
+#
+# data ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_engine_versions
+data "google_container_engine_versions" "k8s_version_prefixes" {
+  project  = var.project_id
+  location = var.zone
+
+  for_each = var.k8s_version_prefixes
+  version_prefix = each.value
+}
+output "regular_channel_latest_k8s_versions" {
+  value = {
+    for k, v in data.google_container_engine_versions.k8s_version_prefixes : k => v.release_channel_latest_version["REGULAR"]
+  }
+}
+
+# resource ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_service_account
 resource "google_service_account" "cluster_sa" {
   account_id   = "${var.prefix}-cluster-sa"
   display_name = "Service account used by nodes of cluster ${var.prefix}"
   project      = var.project_id
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_project_iam#google_project_iam_member
 resource "google_project_iam_member" "cluster_sa_roles" {
   # https://cloud.google.com/kubernetes-engine/docs/how-to/hardening-your-cluster
   # has information on why the cluster SA needs these rights
@@ -20,14 +45,16 @@ resource "google_project_iam_member" "cluster_sa_roles" {
   member  = "serviceAccount:${google_service_account.cluster_sa.email}"
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_container_cluster
 resource "google_container_cluster" "cluster" {
   # Setting cluster autoscaling profile is in google-beta
   provider = google-beta
 
-  name           = "${var.prefix}-cluster"
-  location       = var.regional_cluster ? var.region : var.zone
-  node_locations = var.regional_cluster ? [var.zone] : null
-  project        = var.project_id
+  name               = "${var.prefix}-cluster"
+  location           = var.regional_cluster ? var.region : var.zone
+  node_locations     = var.regional_cluster ? [var.zone] : null
+  project            = var.project_id
+  min_master_version = var.k8s_versions.min_master_version
 
   initial_node_count       = 1
   remove_default_node_pool = true
@@ -150,11 +177,13 @@ resource "google_container_cluster" "cluster" {
   resource_labels = {}
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool
 resource "google_container_node_pool" "core" {
   name     = "core-pool"
   cluster  = google_container_cluster.cluster.name
   project  = google_container_cluster.cluster.project
   location = google_container_cluster.cluster.location
+  version  = var.k8s_versions.core_nodes_version
 
 
   initial_node_count = 1
@@ -205,11 +234,13 @@ resource "google_container_node_pool" "core" {
   }
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool
 resource "google_container_node_pool" "notebook" {
   name     = "nb-${each.key}"
   cluster  = google_container_cluster.cluster.name
   project  = google_container_cluster.cluster.project
   location = google_container_cluster.cluster.location
+  version  = var.k8s_versions.notebook_nodes_version
 
   for_each = var.notebook_nodes
 
@@ -299,11 +330,13 @@ resource "google_container_node_pool" "notebook" {
   }
 }
 
+# resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool
 resource "google_container_node_pool" "dask_worker" {
   name     = "dask-${each.key}"
   cluster  = google_container_cluster.cluster.name
   project  = google_container_cluster.cluster.project
   location = google_container_cluster.cluster.location
+  version  = var.k8s_versions.dask_nodes_version
 
   # Default to same config as notebook nodepools config
   for_each = var.dask_nodes