Skip to content
This repository has been archived by the owner on Nov 22, 2023. It is now read-only.

DRAFT: docs(non-employee-rbac) #2

Merged
merged 4 commits into from
Sep 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added docs/features/rbac/non-employee-rbac-ns.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
71 changes: 71 additions & 0 deletions docs/features/rbac/non-employee-rbac-ns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

import os
from diagrams import Cluster, Diagram, Edge

from diagrams.k8s.rbac import RoleBinding

from diagrams.onprem.network import Istio

from diagrams.k8s.podconfig import CM

from diagrams.k8s.network import Netpol

from diagrams.k8s.group import NS
from diagrams.k8s.compute import Pod

from diagrams.onprem.workflow import Kubeflow


def myself() -> str:
f = os.path.basename(__file__)
no_ext = ".".join(f.split(".")[:-1])
return no_ext


with Diagram(myself(), show=False):
with Cluster("aks-cluster"):
with Cluster("profile-state-controller"):
profile_state_controller = Pod("profile-state-controller")
non_employee_exceptions = CM("non-employee-exceptions")

with Cluster("allowed-user-ns"):
allowed_k8s_user_ns = NS("allowed-user-namespace")
allowed_k8s_user_profile = Kubeflow("allowed-user-profile")
allowed_ns_rolebinding = RoleBinding("allowed-ns-contributors")
egress_netpol = Netpol("allow-egress-to-cloud-main-system")
cloud_main_vs = Istio("cloud-main-virtual-service")

with Cluster("non-allowed-user-ns"):
non_allowed_k8s_user_ns = NS("non-allowed-user-namespace")
non_allowed_k8s_user_profile = Kubeflow("non-allowed-user-profile")
non_allowed_ns_rolebinding = RoleBinding("non-allowed-ns-contributors")

with Cluster("kubeflow-profiles-namespace"):
cloud_main_controller = Pod("cloud-main-controller")
network_policy_controller = Pod("network-controller")

with Cluster("cloud-main-system"):
ingress_netpol = Netpol("allow-ingress-from-user-ns")

# network policy in cloud-main-system selects namespaces with exists-non-cloud-main-users: false
# and allows ingress from those namespaces
ingress_netpol >> Edge(label="label selector", style="dashed", color="green") >> allowed_k8s_user_ns

# per-namespace network policy controller creates allow-egress network policy to allowed namespaces
network_policy_controller >> Edge(label="creates", style="solid", color="green") >> egress_netpol
cloud_main_controller >> Edge(label="creates", style="solid", color="green") >> cloud_main_vs

# Configmaps are mounted to profile state controller pod and list exception cases for each capability
non_employee_exceptions >> Edge(label="mounts", style="dashed", color="black") >> profile_state_controller

# Profile State Controller watches Kubeflow profiles and checks the subjects of role bindings
profile_state_controller >> Edge(label="watches", style="dashed", color="black") >> allowed_ns_rolebinding
profile_state_controller >> Edge(label="watches", style="dashed", color="black") >> allowed_k8s_user_profile
profile_state_controller >> Edge(label="watches", style="dashed", color="black") >> non_allowed_ns_rolebinding
profile_state_controller >> Edge(label="watches", style="dashed", color="black") >> non_allowed_k8s_user_profile

# Profile State Controller applies capability labels to user profile and namespace
profile_state_controller >> Edge(label="applies label", style="solid", color="green") >> allowed_k8s_user_profile
profile_state_controller >> Edge(label="applies label", style="solid", color="green") >> allowed_k8s_user_ns
profile_state_controller >> Edge(label="applies label", style="solid", color="red") >> non_allowed_k8s_user_profile
profile_state_controller >> Edge(label="applies label", style="solid", color="red") >> non_allowed_k8s_user_ns
Binary file added docs/features/rbac/non-employee-rbac-pod.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
74 changes: 74 additions & 0 deletions docs/features/rbac/non-employee-rbac-pod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@

import os
from diagrams import Cluster, Diagram, Edge

from diagrams.custom import Custom

from diagrams.k8s.rbac import User
from diagrams.k8s.rbac import RoleBinding

from diagrams.k8s.controlplane import API
from diagrams.k8s.infra import ETCD

from diagrams.k8s.podconfig import CM


from diagrams.k8s.group import NS
from diagrams.k8s.compute import Pod

from diagrams.onprem.workflow import Kubeflow


def myself() -> str:
f = os.path.basename(__file__)
no_ext = ".".join(f.split(".")[:-1])
return no_ext


with Diagram(myself(), show=False):
with Cluster("browser"):
browser = Custom("browser", icon_path="img/browser.png")
with Cluster("aks-cluster"):
with Cluster("profile-state-controller"):
profile_state_controller = Pod("profile-state-controller")
non_employee_exceptions = CM("non-employee-exceptions")

with Cluster("user-namespace"):
k8s_user_ns = NS("user-namespace")
k8s_user_profile = Kubeflow("user-profile")
ns_rolebinding = RoleBinding("ns-contributors")

with Cluster("kubeflow-system-namespace"):
kf_notebook_controller = Pod("notebook-controller")
spawner_ui_config = CM("spawner_ui_config.yamll")

with Cluster("system-namespace"):
etcd = ETCD("k8s-etcd")
k8s_api_server = API("k8s-api-server")
gatekeeper_server = Pod("opa-gatekeeper")
gatekeeper_policy = Custom("opa-gatekeeper-policies", icon_path="img/opa.png")

# Configmaps are mounted to profile state controller pod and list exception cases for each capability
non_employee_exceptions >> Edge(label="mounts", style="dashed", color="black") >> profile_state_controller

# User or service account attempts to create resource by posting manifest to k8s API server
kf_notebook_controller >> Edge(label="posts resource manifest", style="dashed", color="orange") >> k8s_api_server

# OPA Gatekeeper policies specify deny rules at validating admission control
gatekeeper_policy >> Edge(label="configures", style="dashed", color="black") >> gatekeeper_server
k8s_api_server >> Edge(label="validating admission webhook", style="dashed", color="orange") >> gatekeeper_server
gatekeeper_server >> Edge(label="allow/deny decision", style="dashed", color="orange") >> k8s_api_server

# If OPA Gatekeeper doesn't deny the request, k8s API server posts the manifest to etcd
k8s_api_server >> Edge(label="posts resource manifest", style="dashed", color="green") >> etcd

# Profile State Controller watches Kubeflow profiles and checks the subjects of role bindings
profile_state_controller >> Edge(label="watches", style="dashed", color="black") >> ns_rolebinding
profile_state_controller >> Edge(label="watches", style="dashed", color="black") >> k8s_user_profile

# Profile State Controller applies capability labels to user profile and namespace
profile_state_controller >> Edge(label="applies label", style="solid", color="green") >> k8s_user_profile
profile_state_controller >> Edge(label="applies label", style="solid", color="green") >> k8s_user_ns

# Spawner UI config configures UI to disable sas image if exists user in namespace without sas notebook capability
spawner_ui_config >> Edge(label="conditionally disables sas icon", style="dashed", color="black") >> browser
81 changes: 81 additions & 0 deletions docs/features/rbac/non-employee-rbac.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Overview

Some non-employee users of the AAW platform are authorized to perform some (but not all) of the capabilities that Statcan employees can perform. The platform components detailed on this page allow administrators to extend certain capabilities to certain non-employee users who are authorized to perform those capabilities.

The two main capabilities that are relevant are (1) the ability to create pods with certain base images (e.g. SAS), or (2) the ability to access certain cloud main resources (e.g. gitlab.k8s). The documentation below focuses on these two types of capabilities; additional controls may be required for capabilities that don't fall under the abovementioned types.

## Relevant Issues

- [Refactor Non-Employee RBAC Model](https://github.com/StatCan/daaas/issues/1335)

## Relevant Repositories

- [aaw-profile-state-controller](https://github.com/StatCan/aaw-profile-state-controller)
- [aaw-kubeflow-profiles-controller](https://github.com/StatCan/aaw-kubeflow-profiles-controller)
- [gatekeeper-policies](https://github.com/StatCan/gatekeeper-policies)
- [aaw-gatekeeper-constraints](https://github.com/StatCan/aaw-gatekeeper-constraints)
- [jupyter-apis](https://github.com/StatCan/jupyter-apis)
- [aaw-kubeflow-profiles](https://github.com/StatCan/aaw-kubeflow-profiles)
- [aaw-network-policies](https://github.com/StatCan/aaw-network-policies)

## How to Add/Remove Users from Capabilities

Go to the [exception-list configmap](https://github.com/StatCan/aaw-kubeflow-profiles/blob/%40cbrown/1335-non-employee-rbac/non-employee-exceptions-config.jsonnet) and add/remove users from the desired exception list category. In general, the format of the exception list is as follows:

```yaml
# ...
exceptionKind:
- [email protected]
- [email protected]
# ...
```

## How to Add a New Capability

1. Create a new exception kind in the [exception-list configmap](https://github.com/StatCan/aaw-kubeflow-profiles/blob/%40cbrown/1335-non-employee-rbac/non-employee-exceptions-config.jsonnet).
2. Make the appropriate changes to the [aaw-profile-state-controller](https://github.com/StatCan/aaw-profile-state-controller) repository.
3. Add any unit tests to the changes in [aaw-profile-state-controller](https://github.com/StatCan/aaw-profile-state-controller) to ensure correct behaviour.

Depending on whether the exception kind is a pod/notebook capability or a namespace capability (described later on this page), the next steps differ.

If the feature is a namespace capability, whatever controllers are involved with rolling out components for the capability should respond accordingly to changes in labels applied by [aaw-profile-state-controller](https://github.com/StatCan/aaw-profile-state-controller). For example, the [cloud-main controller](https://github.com/StatCan/aaw-kubeflow-profiles-controller/blob/main/cmd/cloud-main.go) that permits authorized namespaces to connect to certain cloud-main services operates by creating certain Istio and Kubernetes resources in those namespaces to ensure that traffic from those namespaces is routed through the cloud main egress gateway. If a namespace adds an unauthorized user, the [aaw-profile-state-controller](https://github.com/StatCan/aaw-profile-state-controller) applies the label `state.aaw.statcan.gc.ca/exists-non-cloud-main-user: true`, and the cloud-main controller and [per-namespace network policy controller](https://github.com/StatCan/aaw-kubeflow-profiles-controller/blob/main/cmd/network.go) remove the necessary Istio and Kubernetes components from that namespace. As long as the unauthorized user is in the namespace, traffic from that namespace does not get routed through the cloud-main egress gateway, and is subsequently blocked at the AAW Hub firewall level if an attempt is made to communicate with cloud main services.

If the feature is a pod/notebook capability, there are often two cases that need to be handled explicitly by a Gatekeeper policy:

1. A namespace **already contains a pod with feature X** and an unauthorized user is added to the namespace.
2. A namespace **already contains an unauthorized user** and a pod/notebook with feature X is created in the namespace.

A Gatekeeper policy/constraint must be created for both cases so that the above scenarios are blocked at validating admission control (i.e. before the pod/notebook/rolebinding is posed to etcd).

In addition, it may also be necessary to make changes to the [jupyter-apis](https://github.com/StatCan/jupyter-apis) front-end to indicate on the UI that users are not allowed to perform certain actions. For example, for the SAS Notebook feature of the platform, the option to select a SAS notebook through the jupyter-apis UI is disabled if there exists a user without the SAS Notebook capability in the namespace.

# Feature Deployment

> TODO

# Feature Implementation

This implementation proposes two kinds of feature capabilities: (1) Pod/Notebook features and (2) Namespace features.

For example, the SAS notebook feature is a **Pod/notebook feature** because it requires deploying a specific kind of notebook into a namespace, whereas cloud-main-connectivity is a **namespace feature** because routing pod traffic through the egress gateway is determined by rules that are applied at the level of the namespace.

The semantics for labels behind each feature work as follows.

## Pod/Notebook Feature

![non-employee-rbac-pods](non-employee-rbac-pod.png)

The profile state controller applies a label of the form `has-X-feature` if **any** pod/notebook in the namespace has that feature (e.g. a pod with the sas image). Additionally, the profile state controller applies a label of the form `exists-non-X-user` if **any** subject in **any** rolebinding in the namespace is not an employee or not in the list of exceptions for that capability. The profiles state controller will apply the following labels to the profile and namespace:

- `has-X-feature: true` if **any** pod in the namespace has that feature
- `exists-non-X-user: true` if **any** subject in **any** role binding is not an employee or is not in the exception list for that capability.

If a non-employee without an exception is added to a rolebinding in a namespace where the label `has-X-feature: true` is present, a gatekeeper policy blocks this request.

If a pod with `X-feature` is added to a namespace where the label `exists-non-X-user` is present, a gatekeeper policy blocks this request.

## Namespace Feature

![non-employee-rbac-namespaces](non-employee-rbac-ns.png)

A namespace feature only requires the logic surrounding the `exists-non-X-user` label described above. In the case of cloud-main connectivity, the `cloud-main` and `network` controllers in `aaw-kubeflow-profiles-controller` should automatically reconcile the network policies / virtual services involved if a namespace does not have the cloud main connectivity capability. No gatekeeper policy should be required.
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ nav:
- Networking:
- Cloud Main Connectivity: features/networking/cloud-main-connectivity.md
- Data Virtualization: features/data-virtualization/trino.md
- RBAC:
- Non-Employee RBAC: non-employee-rbac.md
- Resources:
- Overview: resources/index.md
- Developer Tools: developer-tools.md