From d8f63ea5d3479d86fd85abea7da58ec0a6fd6a2c Mon Sep 17 00:00:00 2001
From: Noha Ihab <49988746+NohaIhab@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:41:13 +0300
Subject: [PATCH 1/5] feat: configure the UATs to run behind a proxy using
 Notebooks (#98)

* add poddefault and configure kfp tests

* kfp: use dict.get to avoid KeyError

* configure katib uats

* configure training uats

* add testing with proxy instructions to README
---
 README.md                                     |   49 +
 tests/notebooks/katib/katib-integration.ipynb |  399 +---
 .../notebooks/kfp_v2/kfp-v2-integration.ipynb |  201 +-
 .../training/training-integration.ipynb       | 1813 +++++++++--------
 tests/proxy-poddefault.yaml                   |   22 +
 5 files changed, 1137 insertions(+), 1347 deletions(-)
 create mode 100644 tests/proxy-poddefault.yaml
diff --git a/README.md b/README.md
index 97af1e9..837f93e 100644
--- a/README.md
+++ b/README.md
@@ -139,6 +139,55 @@ tox -e kubeflow-remote
 tox -e kubeflow-local
 ```
 
+### Run behind proxy
+#### Prerequistes
+**To run the tests behind proxy using Notebook or using the driver, the following step is necessary:**
+
+Edit the PodDefault `tests/proxy-poddefault.yaml` to replace the placeholders for:
+   * `<proxy_address>:<proxy_port>`: The address and port of your proxy server
+   * `<cluster cidr>`: you can get this value by running:
+      ```
+      cat /var/snap/microk8s/current/args/kube-proxy | grep cluster-cidr
+      ```
+   * `<service cluster ip range>`: you can get this value by running:
+      ```
+      cat /var/snap/microk8s/current/args/kube-apiserver | grep service-cluster-ip-range
+      ```
+   
+   * `<nodes internal ip(s)>`: the Internal IP of the nodes where your cluster is running, you can
+   get this value by running:
+      ```
+      microk8s kubectl get nodes -o wide
+      ```
+      It is the `INTERNAL-IP` value
+   * `<hostname>`: the name of your host on which the cluster is deployed, you can use the
+   `hostname` command to get it
+
+#### Running using Notebook
+To run the tests behind proxy using Notebook:
+1. Login to the Dashboard and Create a Profile
+2. Apply the PodDefault to your Profile's namespace, make sure you already followed the Prerequisites
+   section to modify the PodDefault. Apply it with:
+   ```
+   kubectl apply -f ./tests/proxy-poddefault.yaml -n <your_namespace>
+   ```
+3. Create a Notebook and from the `Advanced Options > Configurations` select `Add proxy settings`,
+   then click `Launch` to start the Notebook.
+   Wait for the Notebook to be Ready, then Connect to it.
+4. From inside the Notebook, start a new terminal session and clone this repo:
+
+   ```bash
+   git clone https://github.com/canonical/charmed-kubeflow-uats.git
+   ```
+   Open the `charmed-kubeflow-uats/tests` directory and for each `.ipynb` test file there, open it
+   and run the Notebook.
+   
+   Currently, the following tests are supported to run behind proxy:
+   * katib
+   * kserve
+   * kfp_v2
+   * training (except TFJob due to https://github.com/canonical/training-operator/issues/182)
+
 #### Developer Notes
 
 Any environment that can be used to access and configure the Charmed Kubeflow deployment is
diff --git a/tests/notebooks/katib/katib-integration.ipynb b/tests/notebooks/katib/katib-integration.ipynb
index f803073..beb6467 100644
--- a/tests/notebooks/katib/katib-integration.ipynb
+++ b/tests/notebooks/katib/katib-integration.ipynb
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "pytest-skip"
@@ -46,12 +46,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
+    "import os\n",
+    "\n",
     "from kubeflow.katib import (\n",
     "    KatibClient,\n",
     "    V1beta1AlgorithmSpec,\n",
@@ -79,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
@@ -99,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
@@ -110,7 +112,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def proxy_envs_set():\n",
+    "    if os.environ.get('HTTP_PROXY') and os.environ.get('HTTPS_PROXY') and os.environ.get('NO_PROXY'):\n",
+    "        return True\n",
+    "    return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
@@ -184,6 +198,9 @@
     "    }\n",
     "}\n",
     "\n",
+    "if proxy_envs_set():\n",
+    "    trial_spec['spec']['template']['metadata']['labels']={\"notebook-proxy\": \"true\"}\n",
+    "\n",
     "trial_template=V1beta1TrialTemplate(\n",
     "    primary_container_name=\"training-container\",\n",
     "    trial_parameters=[\n",
@@ -226,28 +243,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "scrolled": true,
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Name: cmaes-example\n",
-      "Algorithm: cmaes\n",
-      "Objective: loss\n",
-      "Trial Parameters:\n",
-      "- learningRate: Learning rate for the training model\n",
-      "- momentum: Momentum for the training model\n",
-      "Max Trial Count: 3\n",
-      "Max Failed Trial Count: 1\n",
-      "Parallel Trial Count: 2\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(\"Name:\", experiment.metadata.name)\n",
     "print(\"Algorithm:\", experiment.spec.algorithm.algorithm_name)\n",
@@ -271,22 +272,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "[exp.metadata.name for exp in client.list_experiments()]"
    ]
@@ -302,31 +292,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Experiment user/cmaes-example has been created\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "Katib Experiment cmaes-example link <a href=\"/_/katib/#/katib/hp_monitor/user/cmaes-example\" target=\"_blank\">here</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "client.create_experiment(experiment)"
    ]
@@ -343,7 +313,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
@@ -361,7 +331,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "raises-exception"
@@ -379,132 +349,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {
     "scrolled": true,
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Experiment: cmaes-example\n",
-      "\n",
-      "Experiment Spec:\n",
-      "{'algorithm': {'algorithm_name': 'cmaes', 'algorithm_settings': None},\n",
-      " 'early_stopping': None,\n",
-      " 'max_failed_trial_count': 1,\n",
-      " 'max_trial_count': 3,\n",
-      " 'metrics_collector_spec': {'collector': {'custom_collector': None,\n",
-      "                                          'kind': 'StdOut'},\n",
-      "                            'source': None},\n",
-      " 'nas_config': None,\n",
-      " 'objective': {'additional_metric_names': ['Train-accuracy'],\n",
-      "               'goal': 0.001,\n",
-      "               'metric_strategies': [{'name': 'loss', 'value': 'min'},\n",
-      "                                     {'name': 'Train-accuracy',\n",
-      "                                      'value': 'min'}],\n",
-      "               'objective_metric_name': 'loss',\n",
-      "               'type': 'minimize'},\n",
-      " 'parallel_trial_count': 2,\n",
-      " 'parameters': [{'feasible_space': {'list': None,\n",
-      "                                    'max': '0.06',\n",
-      "                                    'min': '0.01',\n",
-      "                                    'step': None},\n",
-      "                 'name': 'lr',\n",
-      "                 'parameter_type': 'double'},\n",
-      "                {'feasible_space': {'list': None,\n",
-      "                                    'max': '0.9',\n",
-      "                                    'min': '0.5',\n",
-      "                                    'step': None},\n",
-      "                 'name': 'momentum',\n",
-      "                 'parameter_type': 'double'}],\n",
-      " 'resume_policy': 'Never',\n",
-      " 'trial_template': {'config_map': None,\n",
-      "                    'failure_condition': 'status.conditions.#(type==\"Failed\")#|#(status==\"True\")#',\n",
-      "                    'primary_container_name': 'training-container',\n",
-      "                    'primary_pod_labels': None,\n",
-      "                    'retain': None,\n",
-      "                    'success_condition': 'status.conditions.#(type==\"Complete\")#|#(status==\"True\")#',\n",
-      "                    'trial_parameters': [{'description': 'Learning rate for '\n",
-      "                                                         'the training model',\n",
-      "                                          'name': 'learningRate',\n",
-      "                                          'reference': 'lr'},\n",
-      "                                         {'description': 'Momentum for the '\n",
-      "                                                         'training model',\n",
-      "                                          'name': 'momentum',\n",
-      "                                          'reference': 'momentum'}],\n",
-      "                    'trial_spec': {'apiVersion': 'batch/v1',\n",
-      "                                   'kind': 'Job',\n",
-      "                                   'spec': {'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'}},\n",
-      "                                                         'spec': {'containers': [{'command': ['python3',\n",
-      "                                                                                              '/opt/pytorch-mnist/mnist.py',\n",
-      "                                                                                              '--epochs=1',\n",
-      "                                                                                              '--batch-size=64',\n",
-      "                                                                                              '--lr=${trialParameters.learningRate}',\n",
-      "                                                                                              '--momentum=${trialParameters.momentum}'],\n",
-      "                                                                                  'image': 'docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0',\n",
-      "                                                                                  'name': 'training-container'}],\n",
-      "                                                                  'restartPolicy': 'Never'}}}}}}\n",
-      "\n",
-      "Experiment Status:\n",
-      "{'completion_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      " 'conditions': [{'last_transition_time': datetime.datetime(2024, 3, 25, 14, 53, 57, tzinfo=tzlocal()),\n",
-      "                 'last_update_time': datetime.datetime(2024, 3, 25, 14, 53, 57, tzinfo=tzlocal()),\n",
-      "                 'message': 'Experiment is created',\n",
-      "                 'reason': 'ExperimentCreated',\n",
-      "                 'status': 'True',\n",
-      "                 'type': 'Created'},\n",
-      "                {'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'message': 'Experiment is running',\n",
-      "                 'reason': 'ExperimentRunning',\n",
-      "                 'status': 'False',\n",
-      "                 'type': 'Running'},\n",
-      "                {'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'message': 'Experiment has succeeded because max trial count '\n",
-      "                            'has reached',\n",
-      "                 'reason': 'ExperimentMaxTrialsReached',\n",
-      "                 'status': 'True',\n",
-      "                 'type': 'Succeeded'}],\n",
-      " 'current_optimal_trial': {'best_trial_name': 'cmaes-example-dphxbch7',\n",
-      "                           'observation': {'metrics': [{'latest': '0.3130',\n",
-      "                                                        'max': '2.2980',\n",
-      "                                                        'min': '0.2691',\n",
-      "                                                        'name': 'loss'},\n",
-      "                                                       {'latest': 'unavailable',\n",
-      "                                                        'max': 'unavailable',\n",
-      "                                                        'min': 'unavailable',\n",
-      "                                                        'name': 'Train-accuracy'}]},\n",
-      "                           'parameter_assignments': [{'name': 'lr',\n",
-      "                                                      'value': '0.04511033252270099'},\n",
-      "                                                     {'name': 'momentum',\n",
-      "                                                      'value': '0.6980954001565728'}]},\n",
-      " 'early_stopped_trial_list': None,\n",
-      " 'failed_trial_list': None,\n",
-      " 'killed_trial_list': None,\n",
-      " 'last_reconcile_time': None,\n",
-      " 'metrics_unavailable_trial_list': None,\n",
-      " 'pending_trial_list': None,\n",
-      " 'running_trial_list': None,\n",
-      " 'start_time': datetime.datetime(2024, 3, 25, 14, 53, 57, tzinfo=tzlocal()),\n",
-      " 'succeeded_trial_list': ['cmaes-example-9pjzlnzc',\n",
-      "                          'cmaes-example-dphxbch7',\n",
-      "                          'cmaes-example-7zhq4s49'],\n",
-      " 'trial_metrics_unavailable': None,\n",
-      " 'trials': 3,\n",
-      " 'trials_early_stopped': None,\n",
-      " 'trials_failed': None,\n",
-      " 'trials_killed': None,\n",
-      " 'trials_pending': None,\n",
-      " 'trials_running': None,\n",
-      " 'trials_succeeded': 3}\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "exp = client.get_experiment(name=EXPERIMENT_NAME)\n",
     "print(\"Experiment:\", exp.metadata.name, end=\"\\n\\n\")\n",
@@ -523,34 +373,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[{'last_transition_time': datetime.datetime(2024, 3, 25, 14, 53, 57, tzinfo=tzlocal()),\n",
-      " 'last_update_time': datetime.datetime(2024, 3, 25, 14, 53, 57, tzinfo=tzlocal()),\n",
-      " 'message': 'Experiment is created',\n",
-      " 'reason': 'ExperimentCreated',\n",
-      " 'status': 'True',\n",
-      " 'type': 'Created'}, {'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      " 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      " 'message': 'Experiment is running',\n",
-      " 'reason': 'ExperimentRunning',\n",
-      " 'status': 'False',\n",
-      " 'type': 'Running'}, {'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      " 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      " 'message': 'Experiment has succeeded because max trial count has reached',\n",
-      " 'reason': 'ExperimentMaxTrialsReached',\n",
-      " 'status': 'True',\n",
-      " 'type': 'Succeeded'}]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "conditions = client.get_experiment_conditions(name=EXPERIMENT_NAME)\n",
     "print(conditions)"
@@ -558,7 +385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "raises-exception"
@@ -581,32 +408,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'best_trial_name': 'cmaes-example-dphxbch7',\n",
-       " 'observation': {'metrics': [{'latest': '0.3130',\n",
-       "                              'max': '2.2980',\n",
-       "                              'min': '0.2691',\n",
-       "                              'name': 'loss'},\n",
-       "                             {'latest': 'unavailable',\n",
-       "                              'max': 'unavailable',\n",
-       "                              'min': 'unavailable',\n",
-       "                              'name': 'Train-accuracy'}]},\n",
-       " 'parameter_assignments': [{'name': 'lr', 'value': '0.04511033252270099'},\n",
-       "                           {'name': 'momentum', 'value': '0.6980954001565728'}]}"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "client.get_optimal_hyperparameters(name=EXPERIMENT_NAME)"
    ]
@@ -622,46 +428,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {
     "scrolled": true,
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Trial: cmaes-example-dphxbch7\n",
-      "Trial Status:\n",
-      "{'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 25, tzinfo=tzlocal()),\n",
-      " 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 25, tzinfo=tzlocal()),\n",
-      " 'message': 'Trial has succeeded',\n",
-      " 'reason': 'TrialSucceeded',\n",
-      " 'status': 'True',\n",
-      " 'type': 'Succeeded'}\n",
-      "\n",
-      "Trial: cmaes-example-9pjzlnzc\n",
-      "Trial Status:\n",
-      "{'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 27, tzinfo=tzlocal()),\n",
-      " 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 27, tzinfo=tzlocal()),\n",
-      " 'message': 'Trial has succeeded',\n",
-      " 'reason': 'TrialSucceeded',\n",
-      " 'status': 'True',\n",
-      " 'type': 'Succeeded'}\n",
-      "\n",
-      "Trial: cmaes-example-7zhq4s49\n",
-      "Trial Status:\n",
-      "{'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      " 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      " 'message': 'Trial has succeeded',\n",
-      " 'reason': 'TrialSucceeded',\n",
-      " 'status': 'True',\n",
-      " 'type': 'Succeeded'}\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "trial_list = client.list_trials(experiment_name=EXPERIMENT_NAME)\n",
     "for trial in trial_list:\n",
@@ -671,7 +443,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "raises-exception"
@@ -698,78 +470,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Suggestion: cmaes-example\n",
-      "\n",
-      "Suggestion Spec:\n",
-      "{'algorithm': {'algorithm_name': 'cmaes', 'algorithm_settings': None},\n",
-      " 'early_stopping': None,\n",
-      " 'requests': 3,\n",
-      " 'resume_policy': 'Never'}\n",
-      "\n",
-      "Suggestion Status:\n",
-      "{'algorithm_settings': None,\n",
-      " 'completion_time': None,\n",
-      " 'conditions': [{'last_transition_time': datetime.datetime(2024, 3, 25, 14, 53, 57, tzinfo=tzlocal()),\n",
-      "                 'last_update_time': datetime.datetime(2024, 3, 25, 14, 53, 57, tzinfo=tzlocal()),\n",
-      "                 'message': 'Suggestion is created',\n",
-      "                 'reason': 'SuggestionCreated',\n",
-      "                 'status': 'True',\n",
-      "                 'type': 'Created'},\n",
-      "                {'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'message': 'Suggestion is not running',\n",
-      "                 'reason': 'Suggestion is succeeded',\n",
-      "                 'status': 'False',\n",
-      "                 'type': 'Running'},\n",
-      "                {'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'message': 'Deployment is not ready',\n",
-      "                 'reason': 'Suggestion is succeeded',\n",
-      "                 'status': 'False',\n",
-      "                 'type': 'DeploymentReady'},\n",
-      "                {'last_transition_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'last_update_time': datetime.datetime(2024, 3, 25, 14, 55, 58, tzinfo=tzlocal()),\n",
-      "                 'message': \"Suggestion is succeeded, can't be restarted\",\n",
-      "                 'reason': 'Experiment is succeeded',\n",
-      "                 'status': 'True',\n",
-      "                 'type': 'Succeeded'}],\n",
-      " 'last_reconcile_time': None,\n",
-      " 'start_time': datetime.datetime(2024, 3, 25, 14, 53, 57, tzinfo=tzlocal()),\n",
-      " 'suggestion_count': 3,\n",
-      " 'suggestions': [{'early_stopping_rules': None,\n",
-      "                  'labels': None,\n",
-      "                  'name': 'cmaes-example-9pjzlnzc',\n",
-      "                  'parameter_assignments': [{'name': 'lr',\n",
-      "                                             'value': '0.04188612100654'},\n",
-      "                                            {'name': 'momentum',\n",
-      "                                             'value': '0.7043612817216396'}]},\n",
-      "                 {'early_stopping_rules': None,\n",
-      "                  'labels': None,\n",
-      "                  'name': 'cmaes-example-dphxbch7',\n",
-      "                  'parameter_assignments': [{'name': 'lr',\n",
-      "                                             'value': '0.04511033252270099'},\n",
-      "                                            {'name': 'momentum',\n",
-      "                                             'value': '0.6980954001565728'}]},\n",
-      "                 {'early_stopping_rules': None,\n",
-      "                  'labels': None,\n",
-      "                  'name': 'cmaes-example-7zhq4s49',\n",
-      "                  'parameter_assignments': [{'name': 'lr',\n",
-      "                                             'value': '0.02556132716757138'},\n",
-      "                                            {'name': 'momentum',\n",
-      "                                             'value': '0.701003503816815'}]}]}\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "suggestion = client.get_suggestion(name=EXPERIMENT_NAME)\n",
     "print(\"Suggestion:\", suggestion.metadata.name, end=\"\\n\\n\")\n",
@@ -779,7 +484,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "raises-exception"
@@ -801,26 +506,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Experiment user/cmaes-example has been deleted\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "client.delete_experiment(name=EXPERIMENT_NAME)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
@@ -851,7 +548,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {
     "tags": [
      "raises-exception"
@@ -880,7 +577,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/tests/notebooks/kfp_v2/kfp-v2-integration.ipynb b/tests/notebooks/kfp_v2/kfp-v2-integration.ipynb
index 0f50966..1d770d3 100644
--- a/tests/notebooks/kfp_v2/kfp-v2-integration.ipynb
+++ b/tests/notebooks/kfp_v2/kfp-v2-integration.ipynb
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "28f75e55-7bad-44e7-a65f-aedc81734a48",
    "metadata": {
     "tags": [
@@ -31,7 +31,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "4cdd7548-bae9-4430-b548-f420d72a8aec",
    "metadata": {
     "tags": []
@@ -47,28 +47,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "fd576641-1ff4-4fbb-9b3a-122abbd281ed",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.11/site-packages/kfp/client/client.py:159: FutureWarning: This client only works with Kubeflow Pipeline v2.0.0-beta.2 and later versions.\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "client = kfp.Client()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "af70bb9d-3fea-40d7-acb9-649007b0bde6",
    "metadata": {
     "tags": []
@@ -80,21 +71,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
+   "id": "0e0bea73-b980-48b0-8c2f-0709af35798b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "HTTP_PROXY = HTTPS_PROXY = NO_PROXY = None\n",
+    "\n",
+    "if os.environ.get('HTTP_PROXY') and os.environ.get('HTTPS_PROXY') and os.environ.get('NO_PROXY'):\n",
+    "    HTTP_PROXY = os.environ['HTTP_PROXY']\n",
+    "    HTTPS_PROXY = os.environ['HTTPS_PROXY']\n",
+    "    # add `.kubeflow` to NO_PROXY needed for pipelines\n",
+    "    NO_PROXY = os.environ['NO_PROXY']+\",.kubeflow\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb28ac8a-4155-46ab-88a4-dc3f58c24003",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def add_proxy(obj, http_proxy=HTTP_PROXY, https_proxy=HTTPS_PROXY, no_proxy=NO_PROXY):\n",
+    "    \"\"\"Adds the proxy env vars to the PipelineTask object.\"\"\"\n",
+    "    return (\n",
+    "        obj.set_env_variable(name='http_proxy', value=http_proxy)\n",
+    "        .set_env_variable(name='https_proxy', value=https_proxy)\n",
+    "        .set_env_variable(name='HTTP_PROXY', value=http_proxy)\n",
+    "        .set_env_variable(name='HTTPS_PROXY', value=https_proxy)\n",
+    "        .set_env_variable(name='no_proxy', value=no_proxy)\n",
+    "        .set_env_variable(name='NO_PROXY', value=no_proxy)\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd80a4ab-a444-42e0-94ae-ac2d5bd9d315",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def proxy_envs_set():\n",
+    "    if HTTP_PROXY and HTTPS_PROXY and NO_PROXY:\n",
+    "        return True\n",
+    "    return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "40a3a9e1-0645-474e-8451-92ccba88a122",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.11/site-packages/kfp/dsl/component_decorator.py:119: FutureWarning: Python 3.7 has reached end-of-life. The default base_image used by the @dsl.component decorator will switch from 'python:3.7' to 'python:3.8' on April 23, 2024. To ensure your existing components work with versions of the KFP SDK released after that date, you should provide an explicit base_image argument and ensure your component works as intended on Python 3.8.\n",
-      "  return component_factory.create_component_from_func(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "@dsl.component()\n",
     "def flip_coin(force_flip_result: str = '') -> str:\n",
@@ -108,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "1d134c8b-54a7-4d10-ae2f-321ff305600a",
    "metadata": {
     "tags": []
@@ -123,21 +153,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "c8132d87-877c-4bfb-9127-e1f964fe3acb",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_356/2573238994.py:6: DeprecationWarning: dsl.Condition is deprecated. Please use dsl.If instead.\n",
-      "  with dsl.Condition(flip1.output == 'heads'):\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "@dsl.pipeline(name='condition-v2')\n",
     "def condition_pipeline(text: str = 'condition test', force_flip_result: str = ''):\n",
@@ -152,91 +173,63 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
+   "id": "c04029ac-f284-4a13-a39c-6af783ec2b10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@dsl.pipeline(name='condition-v2')\n",
+    "def condition_pipeline_proxy(text: str = 'condition test', force_flip_result: str = ''):\n",
+    "    flip1 = add_proxy(flip_coin(force_flip_result=force_flip_result))\n",
+    "    add_proxy(print_msg(msg=flip1.output))\n",
+    "\n",
+    "    with dsl.Condition(flip1.output == 'heads'):\n",
+    "        flip2 = add_proxy(flip_coin())\n",
+    "        add_proxy(print_msg(msg=flip2.output))\n",
+    "        add_proxy(print_msg(msg=text))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "b85cc961-b6cc-4434-a59d-31e4c8a6e175",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<a href=\"/pipeline/#/experiments/details/721a46c5-c6c9-4d28-af04-00a8503673ac\" target=\"_blank\" >Experiment details</a>."
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<a href=\"/pipeline/#/runs/details/398eb5c4-fc3a-46a3-b69d-2b06419db8c0\" target=\"_blank\" >Run details</a>."
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "run = client.create_run_from_pipeline_func(\n",
-    "    condition_pipeline,\n",
-    "    experiment_name=EXPERIMENT_NAME,\n",
-    ")"
+    "if proxy_envs_set:\n",
+    "    run = client.create_run_from_pipeline_func(\n",
+    "        condition_pipeline_proxy,\n",
+    "        experiment_name=EXPERIMENT_NAME,\n",
+    "    )\n",
+    "else:\n",
+    "    run = client.create_run_from_pipeline_func(\n",
+    "        condition_pipeline,\n",
+    "        experiment_name=EXPERIMENT_NAME,\n",
+    "    )"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "37ebdc86-a16d-40a0-bc7e-33a2b90914f8",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'created_at': datetime.datetime(2023, 11, 21, 10, 35, tzinfo=tzlocal()),\n",
-       "  'description': None,\n",
-       "  'display_name': 'Flip a coin and output tails/heads pipeline',\n",
-       "  'experiment_id': '721a46c5-c6c9-4d28-af04-00a8503673ac',\n",
-       "  'namespace': 'daniela',\n",
-       "  'storage_state': 'AVAILABLE'}]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "client.list_experiments().experiments"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "id": "3226c13b-9d08-47e7-812f-47529c02d9dc",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'SUCCEEDED'"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "client.get_run(run.run_id).state"
    ]
@@ -277,6 +270,14 @@
     "\n",
     "assert_run_succeeded(client, run.run_id)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6eefaf73-53ab-4136-94c9-6b8e5006864a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -295,7 +296,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/tests/notebooks/training/training-integration.ipynb b/tests/notebooks/training/training-integration.ipynb
index 32e0927..c2e65e6 100644
--- a/tests/notebooks/training/training-integration.ipynb
+++ b/tests/notebooks/training/training-integration.ipynb
@@ -1,897 +1,918 @@
 {
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Test Training Operator Integration\n",
-    "\n",
-    "This example notebook is loosely based on the following upstream examples:\n",
-    "* [TFJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/tensorflow/image-classification/create-tfjob.ipynb)\n",
-    "* [PyTorchJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/pytorch/image-classification/create-pytorchjob.ipynb)\n",
-    "* [PaddleJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/paddlepaddle/simple-cpu.yaml)\n",
-    "\n",
-    "Note that the above can get out of sync with the actual testing upstream does, so make sure to also check out [upstream E2E tests](https://github.com/kubeflow/training-operator/tree/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/sdk/python/test/e2e) for updating the notebook.\n",
-    "\n",
-    "The workflow for each job (TFJob, PyTorchJob, and PaddleJob) is:\n",
-    "- create training job\n",
-    "- monitor its execution\n",
-    "- get training logs\n",
-    "- delete job"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "pytest-skip"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# Please check the requirements.in file for more details\n",
-    "!pip install -r requirements.txt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Import required packages"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from kubeflow.training import (\n",
-    "    KubeflowOrgV1PaddleJob,\n",
-    "    KubeflowOrgV1PaddleJobSpec,\n",
-    "    KubeflowOrgV1PyTorchJob,\n",
-    "    KubeflowOrgV1PyTorchJobSpec,\n",
-    "    KubeflowOrgV1TFJob,\n",
-    "    KubeflowOrgV1TFJobSpec,\n",
-    "    TrainingClient,\n",
-    "    V1ReplicaSpec,\n",
-    "    V1RunPolicy,\n",
-    ")\n",
-    "from kubernetes.client import (\n",
-    "    V1Container,\n",
-    "    V1ContainerPort,\n",
-    "    V1ObjectMeta,\n",
-    "    V1PodSpec,\n",
-    "    V1PodTemplateSpec,\n",
-    ")\n",
-    "from tenacity import retry, stop_after_attempt, wait_exponential"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Initialise Training Client\n",
-    "\n",
-    "We will be using the Training SDK for any actions executed as part of this example."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "client = TrainingClient()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Define Helper to print training logs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def print_training_logs(client, job_name: str, container: str, is_master: bool = True):\n",
-    "    logs = client.get_job_logs(name=job_name, container=container, is_master=is_master)\n",
-    "    print(logs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Define Helper to check that Job succeeded"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@retry(\n",
-    "    wait=wait_exponential(multiplier=2, min=1, max=30),\n",
-    "    stop=stop_after_attempt(50),\n",
-    "    reraise=True,\n",
-    ")\n",
-    "def assert_job_succeeded(client, job_name, job_kind):\n",
-    "    \"\"\"Wait for the Job to complete successfully.\"\"\"\n",
-    "    assert client.is_job_succeeded(\n",
-    "        name=job_name, job_kind=job_kind\n",
-    "    ), f\"Job {job_name} was not successful.\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Test TFJob"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Define a TFJob\n",
-    "\n",
-    "Define a TFJob object before deploying it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "TFJOB_NAME = \"mnist\"\n",
-    "TFJOB_CONTAINER = \"tensorflow\"\n",
-    "TFJOB_IMAGE = \"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "container = V1Container(\n",
-    "    name=TFJOB_CONTAINER,\n",
-    "    image=TFJOB_IMAGE,\n",
-    "    command=[\n",
-    "        \"python\",\n",
-    "        \"/var/tf_mnist/mnist_with_summaries.py\",\n",
-    "        \"--log_dir=/train/logs\",\n",
-    "        \"--learning_rate=0.01\",\n",
-    "        \"--batch_size=150\",\n",
-    "    ],\n",
-    ")\n",
-    "\n",
-    "worker = V1ReplicaSpec(\n",
-    "    replicas=2,\n",
-    "    restart_policy=\"Never\",\n",
-    "    template=V1PodTemplateSpec(\n",
-    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}),\n",
-    "        spec=V1PodSpec(containers=[container]),\n",
-    "    ),\n",
-    ")\n",
-    "\n",
-    "chief = V1ReplicaSpec(\n",
-    "    replicas=1,\n",
-    "    restart_policy=\"Never\",\n",
-    "    template=V1PodTemplateSpec(\n",
-    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}),\n",
-    "        spec=V1PodSpec(containers=[container]),\n",
-    "    ),\n",
-    ")\n",
-    "\n",
-    "ps = V1ReplicaSpec(\n",
-    "    replicas=1,\n",
-    "    restart_policy=\"Never\",\n",
-    "    template=V1PodTemplateSpec(\n",
-    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}),\n",
-    "        spec=V1PodSpec(containers=[container]),\n",
-    "    ),\n",
-    ")\n",
-    "\n",
-    "tfjob = KubeflowOrgV1TFJob(\n",
-    "    api_version=\"kubeflow.org/v1\",\n",
-    "    kind=\"TFJob\",\n",
-    "    metadata=V1ObjectMeta(name=TFJOB_NAME),\n",
-    "    spec=KubeflowOrgV1TFJobSpec(\n",
-    "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
-    "        tf_replica_specs={\"Worker\": worker, \"Chief\": chief, \"PS\": ps},\n",
-    "    ),\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Print the Job's info to verify it before submission."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "print(\"Name:\", tfjob.metadata.name)\n",
-    "print(\"Spec:\", tfjob.spec.tf_replica_specs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### List existing TFJobs\n",
-    "\n",
-    "List TFJobs in the current namespace."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "[job.metadata.name for job in client.list_tfjobs()]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Create TFJob\n",
-    "\n",
-    "Create a TFJob using the SDK."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "client.create_tfjob(tfjob)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "### Get TFJob\n",
-    "Get the created TFJob by name and check its data.  \n",
-    "Make sure that it completes successfully before proceeding. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# verify that the Job was created successfully\n",
-    "# raises an error if it doesn't exist\n",
-    "tfjob = client.get_tfjob(name=TFJOB_NAME)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# wait for the Job to complete successfully\n",
-    "assert_job_succeeded(client, TFJOB_NAME, job_kind=\"TFJob\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "print(\"Job:\", tfjob.metadata.name, end=\"\\n\\n\")\n",
-    "print(\"Job Spec:\", tfjob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
-    "print(\"Job Status:\", tfjob.status, sep=\"\\n\", end=\"\\n\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get TFJob Training logs\n",
-    "Get and print the training logs of the TFJob with the training steps "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print_training_logs(client, TFJOB_NAME, container=TFJOB_CONTAINER)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Delete TFJob\n",
-    "\n",
-    "Delete the created TFJob."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "client.delete_tfjob(name=TFJOB_NAME)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@retry(\n",
-    "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
-    "    stop=stop_after_attempt(30),\n",
-    "    reraise=True,\n",
-    ")\n",
-    "def assert_tfjob_removed(client, job_name):\n",
-    "    \"\"\"Wait for TFJob to be removed.\"\"\"\n",
-    "    # fetch the existing TFJob names\n",
-    "    # verify that the Job was deleted successfully\n",
-    "    jobs = {job.metadata.name for job in client.list_tfjobs()}\n",
-    "    assert job_name not in jobs, f\"Failed to delete TFJob {job_name}!\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# wait for TFJob resources to be removed successfully\n",
-    "assert_tfjob_removed(client, TFJOB_NAME)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Test PyTorchJob"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Define a PyTorchJob\n",
-    "Define a PyTorchJob object before deploying it. This PyTorchJob is similar to [this](https://github.com/kubeflow/training-operator/blob/11b7a115e6538caeab405344af98f0d5b42a4c96/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb) example."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "PYTORCHJOB_NAME = \"pytorch-mnist-gloo\"\n",
-    "PYTORCHJOB_CONTAINER = \"pytorch\"\n",
-    "PYTORCHJOB_IMAGE = \"kubeflowkatib/pytorch-mnist-cpu:v0.16.0\"\n",
-    "# The image above should be updated with each release with the corresponding Katib version used in CKF release.\n",
-    "# Note that instead of using the [image from training-operator repository](https://github.com/kubeflow/training-operator/blob/master/examples/pytorch/mnist/Dockerfile),\n",
-    "# the one [from Katib](https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu) is being used\n",
-    "# due to the large size of the first one."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "container = V1Container(\n",
-    "    name=PYTORCHJOB_CONTAINER,\n",
-    "    image=PYTORCHJOB_IMAGE,\n",
-    "    args=[\"--backend\", \"gloo\", \"--epochs\", \"2\"],\n",
-    "    # Passing `epochs`argument since kubeflowkatib image defaults to 10.\n",
-    ")\n",
-    "\n",
-    "replica_spec = V1ReplicaSpec(\n",
-    "    replicas=1,\n",
-    "    restart_policy=\"OnFailure\",\n",
-    "    template=V1PodTemplateSpec(\n",
-    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}),\n",
-    "        spec=V1PodSpec(containers=[container]),\n",
-    "    ),\n",
-    ")\n",
-    "\n",
-    "pytorchjob = KubeflowOrgV1PyTorchJob(\n",
-    "    api_version=\"kubeflow.org/v1\",\n",
-    "    kind=\"PyTorchJob\",\n",
-    "    metadata=V1ObjectMeta(name=PYTORCHJOB_NAME),\n",
-    "    spec=KubeflowOrgV1PyTorchJobSpec(\n",
-    "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
-    "        pytorch_replica_specs={\"Master\": replica_spec, \"Worker\": replica_spec},\n",
-    "    ),\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Print the Job's info to verify it before submission."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"Name:\", pytorchjob.metadata.name)\n",
-    "print(\"Spec:\", pytorchjob.spec.pytorch_replica_specs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### List existing PyTorchJobs\n",
-    "\n",
-    "List PyTorchJobs in the current namespace."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "[job.metadata.name for job in client.list_pytorchjobs()]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Create PyTorchJob\n",
-    "\n",
-    "Create a PyTorchJob using the SDK."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "client.create_pytorchjob(pytorchjob)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "### Get PyTorchJob\n",
-    "Get the created PyTorchJob by name and check its data.  \n",
-    "Make sure that it completes successfully before proceeding. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# verify that the Job was created successfully\n",
-    "# raises an error if it doesn't exist\n",
-    "pytorchjob = client.get_pytorchjob(name=PYTORCHJOB_NAME)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# wait for the Job to complete successfully\n",
-    "assert_job_succeeded(client, PYTORCHJOB_NAME, job_kind=\"PyTorchJob\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "print(\"Job:\", pytorchjob.metadata.name, end=\"\\n\\n\")\n",
-    "print(\"Job Spec:\", pytorchjob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
-    "print(\"Job Status:\", pytorchjob.status, sep=\"\\n\", end=\"\\n\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get PyTorchJob Training logs\n",
-    "Get and print the training logs of the PyTorchJob with the training steps "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print_training_logs(client, PYTORCHJOB_NAME, container=PYTORCHJOB_CONTAINER)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Delete PyTorchJob\n",
-    "\n",
-    "Delete the created PyTorchJob."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "client.delete_pytorchjob(name=PYTORCHJOB_NAME)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@retry(\n",
-    "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
-    "    stop=stop_after_attempt(30),\n",
-    "    reraise=True,\n",
-    ")\n",
-    "def assert_pytorchjob_removed(client, job_name):\n",
-    "    \"\"\"Wait for PyTorchJob to be removed.\"\"\"\n",
-    "    # fetch the existing PyTorchJob names\n",
-    "    # verify that the Job was deleted successfully\n",
-    "    jobs = {job.metadata.name for job in client.list_pytorchjobs()}\n",
-    "    assert job_name not in jobs, f\"Failed to delete PyTorchJob {job_name}!\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# wait for PyTorch job to be removed successfully\n",
-    "assert_pytorchjob_removed(client, PYTORCHJOB_NAME)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Test PaddlePaddle"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Define a PaddleJob\n",
-    "\n",
-    "Define a PaddleJob object before deploying it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "PADDLEJOB_NAME = \"paddle-simple-cpu\"\n",
-    "PADDLEJOB_CONTAINER = \"paddle\"\n",
-    "PADDLEJOB_IMAGE = \"docker.io/paddlepaddle/paddle:2.4.0rc0-cpu\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "port = V1ContainerPort(container_port=37777, name=\"master\")\n",
-    "\n",
-    "container = V1Container(\n",
-    "    name=PADDLEJOB_CONTAINER,\n",
-    "    image=PADDLEJOB_IMAGE,\n",
-    "    command=[\"python\"],\n",
-    "    args=[\"-m\", \"paddle.distributed.launch\", \"run_check\"],\n",
-    "    ports=[port],\n",
-    ")\n",
-    "\n",
-    "replica_spec = V1ReplicaSpec(\n",
-    "    replicas=2,\n",
-    "    restart_policy=\"OnFailure\",\n",
-    "    template=V1PodTemplateSpec(\n",
-    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}),\n",
-    "        spec=V1PodSpec(containers=[container]),\n",
-    "    ),\n",
-    ")\n",
-    "\n",
-    "paddlejob = KubeflowOrgV1PaddleJob(\n",
-    "    api_version=\"kubeflow.org/v1\",\n",
-    "    kind=\"PaddleJob\",\n",
-    "    metadata=V1ObjectMeta(name=PADDLEJOB_NAME),\n",
-    "    spec=KubeflowOrgV1PaddleJobSpec(\n",
-    "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
-    "        paddle_replica_specs={\"Worker\": replica_spec},\n",
-    "    ),\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Print the Job's info to verify it before submission."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "print(\"Name:\", paddlejob.metadata.name)\n",
-    "print(\"Spec:\", paddlejob.spec.paddle_replica_specs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### List existing PaddleJobs\n",
-    "\n",
-    "List PaddleJobs in the current namespace."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "[job.metadata.name for job in client.list_paddlejobs()]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Create PaddleJob\n",
-    "\n",
-    "Create a PaddleJob using the SDK."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "client.create_paddlejob(paddlejob)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "### Get PaddleJob\n",
-    "Get the created PaddleJob by name and check its data.  \n",
-    "Make sure that it completes successfully before proceeding. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# verify that the Job was created successfully\n",
-    "# raises an error if it doesn't exist\n",
-    "paddlejob = client.get_paddlejob(name=PADDLEJOB_NAME)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# wait for the Job to complete successfully\n",
-    "assert_job_succeeded(client, PADDLEJOB_NAME, job_kind=\"PaddleJob\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "print(\"Job:\", paddlejob.metadata.name, end=\"\\n\\n\")\n",
-    "print(\"Job Spec:\", paddlejob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
-    "print(\"Job Status:\", paddlejob.status, sep=\"\\n\", end=\"\\n\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get PaddleJob Training logs\n",
-    "Get and print the training logs of the PaddleJob with the training steps "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# set is_master to False because this example does not include a master replica type\n",
-    "print_training_logs(client, PADDLEJOB_NAME, container=PADDLEJOB_CONTAINER, is_master=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Delete PaddleJob\n",
-    "\n",
-    "Delete the created PaddleJob."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "client.delete_paddlejob(name=PADDLEJOB_NAME)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@retry(\n",
-    "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
-    "    stop=stop_after_attempt(30),\n",
-    "    reraise=True,\n",
-    ")\n",
-    "def assert_paddlejob_removed(client, job_name):\n",
-    "    \"\"\"Wait for PaddleJob to be removed.\"\"\"\n",
-    "    # fetch the existing PaddleJob names\n",
-    "    # verify that the Job was deleted successfully\n",
-    "    jobs = {job.metadata.name for job in client.list_paddlejobs()}\n",
-    "    assert job_name not in jobs, f\"Failed to delete PaddleJob {job_name}!\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "raises-exception"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "# wait for PaddleJob to be removed successfully\n",
-    "assert_paddlejob_removed(client, PADDLEJOB_NAME)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
+    "cells": [
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "# Test Training Operator Integration\n",
+       "\n",
+       "This example notebook is loosely based on the following upstream examples:\n",
+       "* [TFJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/tensorflow/image-classification/create-tfjob.ipynb)\n",
+       "* [PyTorchJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/pytorch/image-classification/create-pytorchjob.ipynb)\n",
+       "* [PaddleJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/paddlepaddle/simple-cpu.yaml)\n",
+       "\n",
+       "Note that the above can get out of sync with the actual testing upstream does, so make sure to also check out [upstream E2E tests](https://github.com/kubeflow/training-operator/tree/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/sdk/python/test/e2e) for updating the notebook.\n",
+       "\n",
+       "The workflow for each job (TFJob, PyTorchJob, and PaddleJob) is:\n",
+       "- create training job\n",
+       "- monitor its execution\n",
+       "- get training logs\n",
+       "- delete job"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "## Setup"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "pytest-skip"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# Please check the requirements.in file for more details\n",
+       "!pip install -r requirements.txt"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Import required packages"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "from kubeflow.training import (\n",
+       "    KubeflowOrgV1PaddleJob,\n",
+       "    KubeflowOrgV1PaddleJobSpec,\n",
+       "    KubeflowOrgV1PyTorchJob,\n",
+       "    KubeflowOrgV1PyTorchJobSpec,\n",
+       "    KubeflowOrgV1TFJob,\n",
+       "    KubeflowOrgV1TFJobSpec,\n",
+       "    TrainingClient,\n",
+       "    V1ReplicaSpec,\n",
+       "    V1RunPolicy,\n",
+       ")\n",
+       "from kubernetes.client import (\n",
+       "    V1Container,\n",
+       "    V1ContainerPort,\n",
+       "    V1ObjectMeta,\n",
+       "    V1PodSpec,\n",
+       "    V1PodTemplateSpec,\n",
+       ")\n",
+       "from tenacity import retry, stop_after_attempt, wait_exponential\n",
+       "\n",
+       "import os"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Initialise Training Client\n",
+       "\n",
+       "We will be using the Training SDK for any actions executed as part of this example."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "client = TrainingClient()"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Define Helper to print training logs"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "def print_training_logs(client, job_name: str, container: str, is_master: bool = True):\n",
+       "    logs = client.get_job_logs(name=job_name, container=container, is_master=is_master)\n",
+       "    print(logs)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Define Helper to check that Job succeeded"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "@retry(\n",
+       "    wait=wait_exponential(multiplier=2, min=1, max=30),\n",
+       "    stop=stop_after_attempt(50),\n",
+       "    reraise=True,\n",
+       ")\n",
+       "def assert_job_succeeded(client, job_name, job_kind):\n",
+       "    \"\"\"Wait for the Job to complete successfully.\"\"\"\n",
+       "    assert client.is_job_succeeded(\n",
+       "        name=job_name, job_kind=job_kind\n",
+       "    ), f\"Job {job_name} was not successful.\""
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Define Helper to get the spec labels\n",
+       "This will add the label for the proxy PodDefault if the proxy envs are set"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "training_labels = {}\n",
+       "if os.environ.get('HTTP_PROXY') and os.environ.get('HTTPS_PROXY') and os.environ.get('NO_PROXY'):\n",
+       "    training_labels = {\"notebook-proxy\": \"true\"}"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "## Test TFJob"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Define a TFJob\n",
+       "\n",
+       "Define a TFJob object before deploying it."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "TFJOB_NAME = \"mnist\"\n",
+       "TFJOB_CONTAINER = \"tensorflow\"\n",
+       "TFJOB_IMAGE = \"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0\""
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "container = V1Container(\n",
+       "    name=TFJOB_CONTAINER,\n",
+       "    image=TFJOB_IMAGE,\n",
+       "    command=[\n",
+       "        \"python\",\n",
+       "        \"/var/tf_mnist/mnist_with_summaries.py\",\n",
+       "        \"--log_dir=/train/logs\",\n",
+       "        \"--learning_rate=0.01\",\n",
+       "        \"--batch_size=150\",\n",
+       "    ],\n",
+       ")\n",
+       "\n",
+       "worker = V1ReplicaSpec(\n",
+       "    replicas=2,\n",
+       "    restart_policy=\"Never\",\n",
+       "    template=V1PodTemplateSpec(\n",
+       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
+       "        spec=V1PodSpec(containers=[container]),\n",
+       "    ),\n",
+       ")\n",
+       "\n",
+       "chief = V1ReplicaSpec(\n",
+       "    replicas=1,\n",
+       "    restart_policy=\"Never\",\n",
+       "    template=V1PodTemplateSpec(\n",
+       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
+       "        spec=V1PodSpec(containers=[container]),\n",
+       "    ),\n",
+       ")\n",
+       "\n",
+       "ps = V1ReplicaSpec(\n",
+       "    replicas=1,\n",
+       "    restart_policy=\"Never\",\n",
+       "    template=V1PodTemplateSpec(\n",
+       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
+       "        spec=V1PodSpec(containers=[container]),\n",
+       "    ),\n",
+       ")\n",
+       "\n",
+       "tfjob = KubeflowOrgV1TFJob(\n",
+       "    api_version=\"kubeflow.org/v1\",\n",
+       "    kind=\"TFJob\",\n",
+       "    metadata=V1ObjectMeta(name=TFJOB_NAME),\n",
+       "    spec=KubeflowOrgV1TFJobSpec(\n",
+       "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
+       "        tf_replica_specs={\"Worker\": worker, \"Chief\": chief, \"PS\": ps},\n",
+       "    ),\n",
+       ")"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "Print the Job's info to verify it before submission."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "scrolled": true
+      },
+      "outputs": [],
+      "source": [
+       "print(\"Name:\", tfjob.metadata.name)\n",
+       "print(\"Spec:\", tfjob.spec.tf_replica_specs)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### List existing TFJobs\n",
+       "\n",
+       "List TFJobs in the current namespace."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "[job.metadata.name for job in client.list_tfjobs()]"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Create TFJob\n",
+       "\n",
+       "Create a TFJob using the SDK."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "client.create_tfjob(tfjob)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {
+       "tags": []
+      },
+      "source": [
+       "### Get TFJob\n",
+       "Get the created TFJob by name and check its data.  \n",
+       "Make sure that it completes successfully before proceeding. "
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# verify that the Job was created successfully\n",
+       "# raises an error if it doesn't exist\n",
+       "tfjob = client.get_tfjob(name=TFJOB_NAME)"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# wait for the Job to complete successfully\n",
+       "assert_job_succeeded(client, TFJOB_NAME, job_kind=\"TFJob\")"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "scrolled": true
+      },
+      "outputs": [],
+      "source": [
+       "print(\"Job:\", tfjob.metadata.name, end=\"\\n\\n\")\n",
+       "print(\"Job Spec:\", tfjob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
+       "print(\"Job Status:\", tfjob.status, sep=\"\\n\", end=\"\\n\\n\")"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Get TFJob Training logs\n",
+       "Get and print the training logs of the TFJob with the training steps "
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "print_training_logs(client, TFJOB_NAME, container=TFJOB_CONTAINER)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Delete TFJob\n",
+       "\n",
+       "Delete the created TFJob."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "client.delete_tfjob(name=TFJOB_NAME)"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "@retry(\n",
+       "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
+       "    stop=stop_after_attempt(30),\n",
+       "    reraise=True,\n",
+       ")\n",
+       "def assert_tfjob_removed(client, job_name):\n",
+       "    \"\"\"Wait for TFJob to be removed.\"\"\"\n",
+       "    # fetch the existing TFJob names\n",
+       "    # verify that the Job was deleted successfully\n",
+       "    jobs = {job.metadata.name for job in client.list_tfjobs()}\n",
+       "    assert job_name not in jobs, f\"Failed to delete TFJob {job_name}!\""
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# wait for TFJob resources to be removed successfully\n",
+       "assert_tfjob_removed(client, TFJOB_NAME)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "## Test PyTorchJob"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Define a PyTorchJob\n",
+       "Define a PyTorchJob object before deploying it. This PyTorchJob is similar to [this](https://github.com/kubeflow/training-operator/blob/11b7a115e6538caeab405344af98f0d5b42a4c96/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb) example."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "PYTORCHJOB_NAME = \"pytorch-mnist-gloo\"\n",
+       "PYTORCHJOB_CONTAINER = \"pytorch\"\n",
+       "PYTORCHJOB_IMAGE = \"kubeflowkatib/pytorch-mnist-cpu:v0.16.0\"\n",
+       "# The image above should be updated with each release with the corresponding Katib version used in CKF release.\n",
+       "# Note that instead of using the [image from training-operator repository](https://github.com/kubeflow/training-operator/blob/master/examples/pytorch/mnist/Dockerfile),\n",
+       "# the one [from Katib](https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu) is being used\n",
+       "# due to the large size of the first one."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "container = V1Container(\n",
+       "    name=PYTORCHJOB_CONTAINER,\n",
+       "    image=PYTORCHJOB_IMAGE,\n",
+       "    args=[\"--backend\", \"gloo\", \"--epochs\", \"2\"],\n",
+       "    # Passing `epochs`argument since kubeflowkatib image defaults to 10.\n",
+       ")\n",
+       "\n",
+       "replica_spec = V1ReplicaSpec(\n",
+       "    replicas=1,\n",
+       "    restart_policy=\"OnFailure\",\n",
+       "    template=V1PodTemplateSpec(\n",
+       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
+       "        spec=V1PodSpec(containers=[container]),\n",
+       "    ),\n",
+       ")\n",
+       "\n",
+       "pytorchjob = KubeflowOrgV1PyTorchJob(\n",
+       "    api_version=\"kubeflow.org/v1\",\n",
+       "    kind=\"PyTorchJob\",\n",
+       "    metadata=V1ObjectMeta(name=PYTORCHJOB_NAME),\n",
+       "    spec=KubeflowOrgV1PyTorchJobSpec(\n",
+       "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
+       "        pytorch_replica_specs={\"Master\": replica_spec, \"Worker\": replica_spec},\n",
+       "    ),\n",
+       ")"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "Print the Job's info to verify it before submission."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "print(\"Name:\", pytorchjob.metadata.name)\n",
+       "print(\"Spec:\", pytorchjob.spec.pytorch_replica_specs)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### List existing PyTorchJobs\n",
+       "\n",
+       "List PyTorchJobs in the current namespace."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "[job.metadata.name for job in client.list_pytorchjobs()]"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Create PyTorchJob\n",
+       "\n",
+       "Create a PyTorchJob using the SDK."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "client.create_pytorchjob(pytorchjob)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {
+       "tags": []
+      },
+      "source": [
+       "### Get PyTorchJob\n",
+       "Get the created PyTorchJob by name and check its data.  \n",
+       "Make sure that it completes successfully before proceeding. "
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# verify that the Job was created successfully\n",
+       "# raises an error if it doesn't exist\n",
+       "pytorchjob = client.get_pytorchjob(name=PYTORCHJOB_NAME)"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# wait for the Job to complete successfully\n",
+       "assert_job_succeeded(client, PYTORCHJOB_NAME, job_kind=\"PyTorchJob\")"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "scrolled": true
+      },
+      "outputs": [],
+      "source": [
+       "print(\"Job:\", pytorchjob.metadata.name, end=\"\\n\\n\")\n",
+       "print(\"Job Spec:\", pytorchjob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
+       "print(\"Job Status:\", pytorchjob.status, sep=\"\\n\", end=\"\\n\\n\")"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Get PyTorchJob Training logs\n",
+       "Get and print the training logs of the PyTorchJob with the training steps "
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "print_training_logs(client, PYTORCHJOB_NAME, container=PYTORCHJOB_CONTAINER)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Delete PyTorchJob\n",
+       "\n",
+       "Delete the created PyTorchJob."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "client.delete_pytorchjob(name=PYTORCHJOB_NAME)"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "@retry(\n",
+       "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
+       "    stop=stop_after_attempt(30),\n",
+       "    reraise=True,\n",
+       ")\n",
+       "def assert_pytorchjob_removed(client, job_name):\n",
+       "    \"\"\"Wait for PyTorchJob to be removed.\"\"\"\n",
+       "    # fetch the existing PyTorchJob names\n",
+       "    # verify that the Job was deleted successfully\n",
+       "    jobs = {job.metadata.name for job in client.list_pytorchjobs()}\n",
+       "    assert job_name not in jobs, f\"Failed to delete PyTorchJob {job_name}!\""
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# wait for PyTorch job to be removed successfully\n",
+       "assert_pytorchjob_removed(client, PYTORCHJOB_NAME)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "## Test PaddlePaddle"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Define a PaddleJob\n",
+       "\n",
+       "Define a PaddleJob object before deploying it."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "PADDLEJOB_NAME = \"paddle-simple-cpu\"\n",
+       "PADDLEJOB_CONTAINER = \"paddle\"\n",
+       "PADDLEJOB_IMAGE = \"docker.io/paddlepaddle/paddle:2.4.0rc0-cpu\""
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "port = V1ContainerPort(container_port=37777, name=\"master\")\n",
+       "\n",
+       "container = V1Container(\n",
+       "    name=PADDLEJOB_CONTAINER,\n",
+       "    image=PADDLEJOB_IMAGE,\n",
+       "    command=[\"python\"],\n",
+       "    args=[\"-m\", \"paddle.distributed.launch\", \"run_check\"],\n",
+       "    ports=[port],\n",
+       ")\n",
+       "\n",
+       "replica_spec = V1ReplicaSpec(\n",
+       "    replicas=2,\n",
+       "    restart_policy=\"OnFailure\",\n",
+       "    template=V1PodTemplateSpec(\n",
+       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}),\n",
+       "        spec=V1PodSpec(containers=[container]),\n",
+       "    ),\n",
+       ")\n",
+       "\n",
+       "paddlejob = KubeflowOrgV1PaddleJob(\n",
+       "    api_version=\"kubeflow.org/v1\",\n",
+       "    kind=\"PaddleJob\",\n",
+       "    metadata=V1ObjectMeta(name=PADDLEJOB_NAME, labels=training_labels),\n",
+       "    spec=KubeflowOrgV1PaddleJobSpec(\n",
+       "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
+       "        paddle_replica_specs={\"Worker\": replica_spec},\n",
+       "    ),\n",
+       ")"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "Print the Job's info to verify it before submission."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "scrolled": true
+      },
+      "outputs": [],
+      "source": [
+       "print(\"Name:\", paddlejob.metadata.name)\n",
+       "print(\"Spec:\", paddlejob.spec.paddle_replica_specs)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### List existing PaddleJobs\n",
+       "\n",
+       "List PaddleJobs in the current namespace."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "[job.metadata.name for job in client.list_paddlejobs()]"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Create PaddleJob\n",
+       "\n",
+       "Create a PaddleJob using the SDK."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "client.create_paddlejob(paddlejob)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {
+       "tags": []
+      },
+      "source": [
+       "### Get PaddleJob\n",
+       "Get the created PaddleJob by name and check its data.  \n",
+       "Make sure that it completes successfully before proceeding. "
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# verify that the Job was created successfully\n",
+       "# raises an error if it doesn't exist\n",
+       "paddlejob = client.get_paddlejob(name=PADDLEJOB_NAME)"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# wait for the Job to complete successfully\n",
+       "assert_job_succeeded(client, PADDLEJOB_NAME, job_kind=\"PaddleJob\")"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "scrolled": true
+      },
+      "outputs": [],
+      "source": [
+       "print(\"Job:\", paddlejob.metadata.name, end=\"\\n\\n\")\n",
+       "print(\"Job Spec:\", paddlejob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
+       "print(\"Job Status:\", paddlejob.status, sep=\"\\n\", end=\"\\n\\n\")"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Get PaddleJob Training logs\n",
+       "Get and print the training logs of the PaddleJob with the training steps "
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "# set is_master to False because this example does not include a master replica type\n",
+       "print_training_logs(client, PADDLEJOB_NAME, container=PADDLEJOB_CONTAINER, is_master=False)"
+      ]
+     },
+     {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Delete PaddleJob\n",
+       "\n",
+       "Delete the created PaddleJob."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "client.delete_paddlejob(name=PADDLEJOB_NAME)"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "@retry(\n",
+       "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
+       "    stop=stop_after_attempt(30),\n",
+       "    reraise=True,\n",
+       ")\n",
+       "def assert_paddlejob_removed(client, job_name):\n",
+       "    \"\"\"Wait for PaddleJob to be removed.\"\"\"\n",
+       "    # fetch the existing PaddleJob names\n",
+       "    # verify that the Job was deleted successfully\n",
+       "    jobs = {job.metadata.name for job in client.list_paddlejobs()}\n",
+       "    assert job_name not in jobs, f\"Failed to delete PaddleJob {job_name}!\""
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+       "tags": [
+        "raises-exception"
+       ]
+      },
+      "outputs": [],
+      "source": [
+       "# wait for PaddleJob to be removed successfully\n",
+       "assert_paddlejob_removed(client, PADDLEJOB_NAME)"
+      ]
+     }
+    ],
+    "metadata": {
+     "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+     },
+     "language_info": {
+      "codemirror_mode": {
+       "name": "ipython",
+       "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.9"
+     }
+    },
+    "nbformat": 4,
+    "nbformat_minor": 4
+   }
diff --git a/tests/proxy-poddefault.yaml b/tests/proxy-poddefault.yaml
new file mode 100644
index 0000000..a1f7300
--- /dev/null
+++ b/tests/proxy-poddefault.yaml
@@ -0,0 +1,22 @@
+apiVersion: kubeflow.org/v1alpha1
+kind: PodDefault
+metadata:
+  name: notebook-proxy
+spec:
+  desc: Add proxy settings
+  env:
+  - name: HTTP_PROXY
+    value: <proxy_address>:<proxy_port>
+  - name: http_proxy
+    value: <proxy_address>:<proxy_port>
+  - name: HTTPS_PROXY
+    value: <proxy_address>:<proxy_port>
+  - name: https_proxy
+    value: <proxy_address>:<proxy_port>
+  - name: NO_PROXY
+    value: <cluster cidr>,<service cluster ip range>,127.0.0.1,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local
+  - name: no_proxy
+    value: <cluster cidr>,<service cluster ip range>,127.0.0.1,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local
+  selector:
+    matchLabels:
+      notebook-proxy: "true"

From 9861a407d213a5d70ec1c20ed78662b77a6ba3f1 Mon Sep 17 00:00:00 2001
From: Michal Hucko <michal.hucko@canonical.com>
Date: Fri, 23 Aug 2024 12:11:35 +0200
Subject: [PATCH 2/5] Prefetch mnist dataset in tfjob to solve proxy problems
 (#105)

* Prefetch mnist dataset in tfjob to solve proxy problems
---
 .../training/training-integration.ipynb       | 1839 +++++++++--------
 1 file changed, 922 insertions(+), 917 deletions(-)

diff --git a/tests/notebooks/training/training-integration.ipynb b/tests/notebooks/training/training-integration.ipynb
index c2e65e6..9cc0b56 100644
--- a/tests/notebooks/training/training-integration.ipynb
+++ b/tests/notebooks/training/training-integration.ipynb
@@ -1,918 +1,923 @@
 {
-    "cells": [
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "# Test Training Operator Integration\n",
-       "\n",
-       "This example notebook is loosely based on the following upstream examples:\n",
-       "* [TFJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/tensorflow/image-classification/create-tfjob.ipynb)\n",
-       "* [PyTorchJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/pytorch/image-classification/create-pytorchjob.ipynb)\n",
-       "* [PaddleJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/paddlepaddle/simple-cpu.yaml)\n",
-       "\n",
-       "Note that the above can get out of sync with the actual testing upstream does, so make sure to also check out [upstream E2E tests](https://github.com/kubeflow/training-operator/tree/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/sdk/python/test/e2e) for updating the notebook.\n",
-       "\n",
-       "The workflow for each job (TFJob, PyTorchJob, and PaddleJob) is:\n",
-       "- create training job\n",
-       "- monitor its execution\n",
-       "- get training logs\n",
-       "- delete job"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## Setup"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "pytest-skip"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# Please check the requirements.in file for more details\n",
-       "!pip install -r requirements.txt"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Import required packages"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "from kubeflow.training import (\n",
-       "    KubeflowOrgV1PaddleJob,\n",
-       "    KubeflowOrgV1PaddleJobSpec,\n",
-       "    KubeflowOrgV1PyTorchJob,\n",
-       "    KubeflowOrgV1PyTorchJobSpec,\n",
-       "    KubeflowOrgV1TFJob,\n",
-       "    KubeflowOrgV1TFJobSpec,\n",
-       "    TrainingClient,\n",
-       "    V1ReplicaSpec,\n",
-       "    V1RunPolicy,\n",
-       ")\n",
-       "from kubernetes.client import (\n",
-       "    V1Container,\n",
-       "    V1ContainerPort,\n",
-       "    V1ObjectMeta,\n",
-       "    V1PodSpec,\n",
-       "    V1PodTemplateSpec,\n",
-       ")\n",
-       "from tenacity import retry, stop_after_attempt, wait_exponential\n",
-       "\n",
-       "import os"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Initialise Training Client\n",
-       "\n",
-       "We will be using the Training SDK for any actions executed as part of this example."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "client = TrainingClient()"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Define Helper to print training logs"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "def print_training_logs(client, job_name: str, container: str, is_master: bool = True):\n",
-       "    logs = client.get_job_logs(name=job_name, container=container, is_master=is_master)\n",
-       "    print(logs)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Define Helper to check that Job succeeded"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "@retry(\n",
-       "    wait=wait_exponential(multiplier=2, min=1, max=30),\n",
-       "    stop=stop_after_attempt(50),\n",
-       "    reraise=True,\n",
-       ")\n",
-       "def assert_job_succeeded(client, job_name, job_kind):\n",
-       "    \"\"\"Wait for the Job to complete successfully.\"\"\"\n",
-       "    assert client.is_job_succeeded(\n",
-       "        name=job_name, job_kind=job_kind\n",
-       "    ), f\"Job {job_name} was not successful.\""
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Define Helper to get the spec labels\n",
-       "This will add the label for the proxy PodDefault if the proxy envs are set"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "training_labels = {}\n",
-       "if os.environ.get('HTTP_PROXY') and os.environ.get('HTTPS_PROXY') and os.environ.get('NO_PROXY'):\n",
-       "    training_labels = {\"notebook-proxy\": \"true\"}"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## Test TFJob"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Define a TFJob\n",
-       "\n",
-       "Define a TFJob object before deploying it."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "TFJOB_NAME = \"mnist\"\n",
-       "TFJOB_CONTAINER = \"tensorflow\"\n",
-       "TFJOB_IMAGE = \"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0\""
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "container = V1Container(\n",
-       "    name=TFJOB_CONTAINER,\n",
-       "    image=TFJOB_IMAGE,\n",
-       "    command=[\n",
-       "        \"python\",\n",
-       "        \"/var/tf_mnist/mnist_with_summaries.py\",\n",
-       "        \"--log_dir=/train/logs\",\n",
-       "        \"--learning_rate=0.01\",\n",
-       "        \"--batch_size=150\",\n",
-       "    ],\n",
-       ")\n",
-       "\n",
-       "worker = V1ReplicaSpec(\n",
-       "    replicas=2,\n",
-       "    restart_policy=\"Never\",\n",
-       "    template=V1PodTemplateSpec(\n",
-       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
-       "        spec=V1PodSpec(containers=[container]),\n",
-       "    ),\n",
-       ")\n",
-       "\n",
-       "chief = V1ReplicaSpec(\n",
-       "    replicas=1,\n",
-       "    restart_policy=\"Never\",\n",
-       "    template=V1PodTemplateSpec(\n",
-       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
-       "        spec=V1PodSpec(containers=[container]),\n",
-       "    ),\n",
-       ")\n",
-       "\n",
-       "ps = V1ReplicaSpec(\n",
-       "    replicas=1,\n",
-       "    restart_policy=\"Never\",\n",
-       "    template=V1PodTemplateSpec(\n",
-       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
-       "        spec=V1PodSpec(containers=[container]),\n",
-       "    ),\n",
-       ")\n",
-       "\n",
-       "tfjob = KubeflowOrgV1TFJob(\n",
-       "    api_version=\"kubeflow.org/v1\",\n",
-       "    kind=\"TFJob\",\n",
-       "    metadata=V1ObjectMeta(name=TFJOB_NAME),\n",
-       "    spec=KubeflowOrgV1TFJobSpec(\n",
-       "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
-       "        tf_replica_specs={\"Worker\": worker, \"Chief\": chief, \"PS\": ps},\n",
-       "    ),\n",
-       ")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "Print the Job's info to verify it before submission."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "scrolled": true
-      },
-      "outputs": [],
-      "source": [
-       "print(\"Name:\", tfjob.metadata.name)\n",
-       "print(\"Spec:\", tfjob.spec.tf_replica_specs)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### List existing TFJobs\n",
-       "\n",
-       "List TFJobs in the current namespace."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "[job.metadata.name for job in client.list_tfjobs()]"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Create TFJob\n",
-       "\n",
-       "Create a TFJob using the SDK."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "client.create_tfjob(tfjob)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {
-       "tags": []
-      },
-      "source": [
-       "### Get TFJob\n",
-       "Get the created TFJob by name and check its data.  \n",
-       "Make sure that it completes successfully before proceeding. "
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# verify that the Job was created successfully\n",
-       "# raises an error if it doesn't exist\n",
-       "tfjob = client.get_tfjob(name=TFJOB_NAME)"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# wait for the Job to complete successfully\n",
-       "assert_job_succeeded(client, TFJOB_NAME, job_kind=\"TFJob\")"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "scrolled": true
-      },
-      "outputs": [],
-      "source": [
-       "print(\"Job:\", tfjob.metadata.name, end=\"\\n\\n\")\n",
-       "print(\"Job Spec:\", tfjob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
-       "print(\"Job Status:\", tfjob.status, sep=\"\\n\", end=\"\\n\\n\")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Get TFJob Training logs\n",
-       "Get and print the training logs of the TFJob with the training steps "
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "print_training_logs(client, TFJOB_NAME, container=TFJOB_CONTAINER)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Delete TFJob\n",
-       "\n",
-       "Delete the created TFJob."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "client.delete_tfjob(name=TFJOB_NAME)"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "@retry(\n",
-       "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
-       "    stop=stop_after_attempt(30),\n",
-       "    reraise=True,\n",
-       ")\n",
-       "def assert_tfjob_removed(client, job_name):\n",
-       "    \"\"\"Wait for TFJob to be removed.\"\"\"\n",
-       "    # fetch the existing TFJob names\n",
-       "    # verify that the Job was deleted successfully\n",
-       "    jobs = {job.metadata.name for job in client.list_tfjobs()}\n",
-       "    assert job_name not in jobs, f\"Failed to delete TFJob {job_name}!\""
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# wait for TFJob resources to be removed successfully\n",
-       "assert_tfjob_removed(client, TFJOB_NAME)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## Test PyTorchJob"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Define a PyTorchJob\n",
-       "Define a PyTorchJob object before deploying it. This PyTorchJob is similar to [this](https://github.com/kubeflow/training-operator/blob/11b7a115e6538caeab405344af98f0d5b42a4c96/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb) example."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "PYTORCHJOB_NAME = \"pytorch-mnist-gloo\"\n",
-       "PYTORCHJOB_CONTAINER = \"pytorch\"\n",
-       "PYTORCHJOB_IMAGE = \"kubeflowkatib/pytorch-mnist-cpu:v0.16.0\"\n",
-       "# The image above should be updated with each release with the corresponding Katib version used in CKF release.\n",
-       "# Note that instead of using the [image from training-operator repository](https://github.com/kubeflow/training-operator/blob/master/examples/pytorch/mnist/Dockerfile),\n",
-       "# the one [from Katib](https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu) is being used\n",
-       "# due to the large size of the first one."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "container = V1Container(\n",
-       "    name=PYTORCHJOB_CONTAINER,\n",
-       "    image=PYTORCHJOB_IMAGE,\n",
-       "    args=[\"--backend\", \"gloo\", \"--epochs\", \"2\"],\n",
-       "    # Passing `epochs`argument since kubeflowkatib image defaults to 10.\n",
-       ")\n",
-       "\n",
-       "replica_spec = V1ReplicaSpec(\n",
-       "    replicas=1,\n",
-       "    restart_policy=\"OnFailure\",\n",
-       "    template=V1PodTemplateSpec(\n",
-       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
-       "        spec=V1PodSpec(containers=[container]),\n",
-       "    ),\n",
-       ")\n",
-       "\n",
-       "pytorchjob = KubeflowOrgV1PyTorchJob(\n",
-       "    api_version=\"kubeflow.org/v1\",\n",
-       "    kind=\"PyTorchJob\",\n",
-       "    metadata=V1ObjectMeta(name=PYTORCHJOB_NAME),\n",
-       "    spec=KubeflowOrgV1PyTorchJobSpec(\n",
-       "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
-       "        pytorch_replica_specs={\"Master\": replica_spec, \"Worker\": replica_spec},\n",
-       "    ),\n",
-       ")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "Print the Job's info to verify it before submission."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "print(\"Name:\", pytorchjob.metadata.name)\n",
-       "print(\"Spec:\", pytorchjob.spec.pytorch_replica_specs)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### List existing PyTorchJobs\n",
-       "\n",
-       "List PyTorchJobs in the current namespace."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "[job.metadata.name for job in client.list_pytorchjobs()]"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Create PyTorchJob\n",
-       "\n",
-       "Create a PyTorchJob using the SDK."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "client.create_pytorchjob(pytorchjob)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {
-       "tags": []
-      },
-      "source": [
-       "### Get PyTorchJob\n",
-       "Get the created PyTorchJob by name and check its data.  \n",
-       "Make sure that it completes successfully before proceeding. "
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# verify that the Job was created successfully\n",
-       "# raises an error if it doesn't exist\n",
-       "pytorchjob = client.get_pytorchjob(name=PYTORCHJOB_NAME)"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# wait for the Job to complete successfully\n",
-       "assert_job_succeeded(client, PYTORCHJOB_NAME, job_kind=\"PyTorchJob\")"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "scrolled": true
-      },
-      "outputs": [],
-      "source": [
-       "print(\"Job:\", pytorchjob.metadata.name, end=\"\\n\\n\")\n",
-       "print(\"Job Spec:\", pytorchjob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
-       "print(\"Job Status:\", pytorchjob.status, sep=\"\\n\", end=\"\\n\\n\")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Get PyTorchJob Training logs\n",
-       "Get and print the training logs of the PyTorchJob with the training steps "
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "print_training_logs(client, PYTORCHJOB_NAME, container=PYTORCHJOB_CONTAINER)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Delete PyTorchJob\n",
-       "\n",
-       "Delete the created PyTorchJob."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "client.delete_pytorchjob(name=PYTORCHJOB_NAME)"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "@retry(\n",
-       "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
-       "    stop=stop_after_attempt(30),\n",
-       "    reraise=True,\n",
-       ")\n",
-       "def assert_pytorchjob_removed(client, job_name):\n",
-       "    \"\"\"Wait for PyTorchJob to be removed.\"\"\"\n",
-       "    # fetch the existing PyTorchJob names\n",
-       "    # verify that the Job was deleted successfully\n",
-       "    jobs = {job.metadata.name for job in client.list_pytorchjobs()}\n",
-       "    assert job_name not in jobs, f\"Failed to delete PyTorchJob {job_name}!\""
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# wait for PyTorch job to be removed successfully\n",
-       "assert_pytorchjob_removed(client, PYTORCHJOB_NAME)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "## Test PaddlePaddle"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Define a PaddleJob\n",
-       "\n",
-       "Define a PaddleJob object before deploying it."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "PADDLEJOB_NAME = \"paddle-simple-cpu\"\n",
-       "PADDLEJOB_CONTAINER = \"paddle\"\n",
-       "PADDLEJOB_IMAGE = \"docker.io/paddlepaddle/paddle:2.4.0rc0-cpu\""
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "port = V1ContainerPort(container_port=37777, name=\"master\")\n",
-       "\n",
-       "container = V1Container(\n",
-       "    name=PADDLEJOB_CONTAINER,\n",
-       "    image=PADDLEJOB_IMAGE,\n",
-       "    command=[\"python\"],\n",
-       "    args=[\"-m\", \"paddle.distributed.launch\", \"run_check\"],\n",
-       "    ports=[port],\n",
-       ")\n",
-       "\n",
-       "replica_spec = V1ReplicaSpec(\n",
-       "    replicas=2,\n",
-       "    restart_policy=\"OnFailure\",\n",
-       "    template=V1PodTemplateSpec(\n",
-       "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}),\n",
-       "        spec=V1PodSpec(containers=[container]),\n",
-       "    ),\n",
-       ")\n",
-       "\n",
-       "paddlejob = KubeflowOrgV1PaddleJob(\n",
-       "    api_version=\"kubeflow.org/v1\",\n",
-       "    kind=\"PaddleJob\",\n",
-       "    metadata=V1ObjectMeta(name=PADDLEJOB_NAME, labels=training_labels),\n",
-       "    spec=KubeflowOrgV1PaddleJobSpec(\n",
-       "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
-       "        paddle_replica_specs={\"Worker\": replica_spec},\n",
-       "    ),\n",
-       ")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "Print the Job's info to verify it before submission."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "scrolled": true
-      },
-      "outputs": [],
-      "source": [
-       "print(\"Name:\", paddlejob.metadata.name)\n",
-       "print(\"Spec:\", paddlejob.spec.paddle_replica_specs)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### List existing PaddleJobs\n",
-       "\n",
-       "List PaddleJobs in the current namespace."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "[job.metadata.name for job in client.list_paddlejobs()]"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Create PaddleJob\n",
-       "\n",
-       "Create a PaddleJob using the SDK."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "client.create_paddlejob(paddlejob)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {
-       "tags": []
-      },
-      "source": [
-       "### Get PaddleJob\n",
-       "Get the created PaddleJob by name and check its data.  \n",
-       "Make sure that it completes successfully before proceeding. "
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# verify that the Job was created successfully\n",
-       "# raises an error if it doesn't exist\n",
-       "paddlejob = client.get_paddlejob(name=PADDLEJOB_NAME)"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# wait for the Job to complete successfully\n",
-       "assert_job_succeeded(client, PADDLEJOB_NAME, job_kind=\"PaddleJob\")"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "scrolled": true
-      },
-      "outputs": [],
-      "source": [
-       "print(\"Job:\", paddlejob.metadata.name, end=\"\\n\\n\")\n",
-       "print(\"Job Spec:\", paddlejob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
-       "print(\"Job Status:\", paddlejob.status, sep=\"\\n\", end=\"\\n\\n\")"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Get PaddleJob Training logs\n",
-       "Get and print the training logs of the PaddleJob with the training steps "
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "# set is_master to False because this example does not include a master replica type\n",
-       "print_training_logs(client, PADDLEJOB_NAME, container=PADDLEJOB_CONTAINER, is_master=False)"
-      ]
-     },
-     {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-       "### Delete PaddleJob\n",
-       "\n",
-       "Delete the created PaddleJob."
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "client.delete_paddlejob(name=PADDLEJOB_NAME)"
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-       "@retry(\n",
-       "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
-       "    stop=stop_after_attempt(30),\n",
-       "    reraise=True,\n",
-       ")\n",
-       "def assert_paddlejob_removed(client, job_name):\n",
-       "    \"\"\"Wait for PaddleJob to be removed.\"\"\"\n",
-       "    # fetch the existing PaddleJob names\n",
-       "    # verify that the Job was deleted successfully\n",
-       "    jobs = {job.metadata.name for job in client.list_paddlejobs()}\n",
-       "    assert job_name not in jobs, f\"Failed to delete PaddleJob {job_name}!\""
-      ]
-     },
-     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-       "tags": [
-        "raises-exception"
-       ]
-      },
-      "outputs": [],
-      "source": [
-       "# wait for PaddleJob to be removed successfully\n",
-       "assert_paddlejob_removed(client, PADDLEJOB_NAME)"
-      ]
-     }
-    ],
-    "metadata": {
-     "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
-      "language": "python",
-      "name": "python3"
-     },
-     "language_info": {
-      "codemirror_mode": {
-       "name": "ipython",
-       "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.11.9"
-     }
-    },
-    "nbformat": 4,
-    "nbformat_minor": 4
-   }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test Training Operator Integration\n",
+    "\n",
+    "This example notebook is loosely based on the following upstream examples:\n",
+    "* [TFJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/tensorflow/image-classification/create-tfjob.ipynb)\n",
+    "* [PyTorchJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/pytorch/image-classification/create-pytorchjob.ipynb)\n",
+    "* [PaddleJob](https://github.com/kubeflow/training-operator/blob/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/examples/paddlepaddle/simple-cpu.yaml)\n",
+    "\n",
+    "Note that the above can get out of sync with the actual testing upstream does, so make sure to also check out [upstream E2E tests](https://github.com/kubeflow/training-operator/tree/964a6e836eedff11edfe79cc9f4e5b7c623cbe88/sdk/python/test/e2e) for updating the notebook.\n",
+    "\n",
+    "The workflow for each job (TFJob, PyTorchJob, and PaddleJob) is:\n",
+    "- create training job\n",
+    "- monitor its execution\n",
+    "- get training logs\n",
+    "- delete job"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "pytest-skip"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# Please check the requirements.in file for more details\n",
+    "!pip install -r requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import required packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from kubeflow.training import (\n",
+    "    KubeflowOrgV1PaddleJob,\n",
+    "    KubeflowOrgV1PaddleJobSpec,\n",
+    "    KubeflowOrgV1PyTorchJob,\n",
+    "    KubeflowOrgV1PyTorchJobSpec,\n",
+    "    KubeflowOrgV1TFJob,\n",
+    "    KubeflowOrgV1TFJobSpec,\n",
+    "    TrainingClient,\n",
+    "    V1ReplicaSpec,\n",
+    "    V1RunPolicy,\n",
+    ")\n",
+    "from kubernetes.client import (\n",
+    "    V1Container,\n",
+    "    V1ContainerPort,\n",
+    "    V1ObjectMeta,\n",
+    "    V1PodSpec,\n",
+    "    V1PodTemplateSpec,\n",
+    ")\n",
+    "from tenacity import retry, stop_after_attempt, wait_exponential\n",
+    "\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialise Training Client\n",
+    "\n",
+    "We will be using the Training SDK for any actions executed as part of this example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = TrainingClient()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define Helper to print training logs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def print_training_logs(client, job_name: str, container: str, is_master: bool = True):\n",
+    "    logs = client.get_job_logs(name=job_name, container=container, is_master=is_master)\n",
+    "    print(logs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define Helper to check that Job succeeded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@retry(\n",
+    "    wait=wait_exponential(multiplier=2, min=1, max=30),\n",
+    "    stop=stop_after_attempt(50),\n",
+    "    reraise=True,\n",
+    ")\n",
+    "def assert_job_succeeded(client, job_name, job_kind):\n",
+    "    \"\"\"Wait for the Job to complete successfully.\"\"\"\n",
+    "    assert client.is_job_succeeded(\n",
+    "        name=job_name, job_kind=job_kind\n",
+    "    ), f\"Job {job_name} was not successful.\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define Helper to get the spec labels\n",
+    "This will add the label for the proxy PodDefault if the proxy envs are set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_labels = {}\n",
+    "if os.environ.get('HTTP_PROXY') and os.environ.get('HTTPS_PROXY') and os.environ.get('NO_PROXY'):\n",
+    "    training_labels = {\"notebook-proxy\": \"true\"}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test TFJob"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define a TFJob\n",
+    "\n",
+    "Define a TFJob object before deploying it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TFJOB_NAME = \"mnist\"\n",
+    "TFJOB_CONTAINER = \"tensorflow\"\n",
+    "TFJOB_IMAGE = \"kubeflow/tf-mnist-with-summaries:latest\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "container = V1Container(\n",
+    "    name=TFJOB_CONTAINER,\n",
+    "    image=TFJOB_IMAGE,\n",
+    "    command=[\n",
+    "        \"sh\", \"-c\",\n",
+    "        # Download MNIST dataset using curl to overcome proxy problems https://github.com/canonical/training-operator/issues/182\n",
+    "        # Mnist data loads from /tmp/tensorflow/mnist/input_data [see reference in mnist_with_summaries.py](https://github.com/kubeflow/training-operator/blob/master/examples/tensorflow/mnist_with_summaries/mnist_with_summaries.py#L213)\n",
+    "        \"mkdir -p /tmp/tensorflow/mnist/input_data/ && \" +\n",
+    "        \"curl -L -o /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/train-images-idx3-ubyte.gz && \" +\n",
+    "        \"curl -L -o /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/train-labels-idx1-ubyte.gz && \" +\n",
+    "        \"curl -L -o /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/t10k-images-idx3-ubyte.gz && \" +\n",
+    "        \"curl -L -o /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz https://github.com/golbin/TensorFlow-MNIST/raw/master/mnist/data/t10k-labels-idx1-ubyte.gz && \" +\n",
+    "        # Run the TensorFlow script after downloading the dataset\n",
+    "        \"python /var/tf_mnist/mnist_with_summaries.py --log_dir=/train/logs --learning_rate=0.01 --batch_size=150\"\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "worker = V1ReplicaSpec(\n",
+    "    replicas=2,\n",
+    "    restart_policy=\"Never\",\n",
+    "    template=V1PodTemplateSpec(\n",
+    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
+    "        spec=V1PodSpec(containers=[container]),\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "chief = V1ReplicaSpec(\n",
+    "    replicas=1,\n",
+    "    restart_policy=\"Never\",\n",
+    "    template=V1PodTemplateSpec(\n",
+    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
+    "        spec=V1PodSpec(containers=[container]),\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "ps = V1ReplicaSpec(\n",
+    "    replicas=1,\n",
+    "    restart_policy=\"Never\",\n",
+    "    template=V1PodTemplateSpec(\n",
+    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
+    "        spec=V1PodSpec(containers=[container]),\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "tfjob = KubeflowOrgV1TFJob(\n",
+    "    api_version=\"kubeflow.org/v1\",\n",
+    "    kind=\"TFJob\",\n",
+    "    metadata=V1ObjectMeta(name=TFJOB_NAME),\n",
+    "    spec=KubeflowOrgV1TFJobSpec(\n",
+    "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
+    "        tf_replica_specs={\"Worker\": worker, \"Chief\": chief, \"PS\": ps},\n",
+    "    ),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Print the Job's info to verify it before submission."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Name:\", tfjob.metadata.name)\n",
+    "print(\"Spec:\", tfjob.spec.tf_replica_specs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### List existing TFJobs\n",
+    "\n",
+    "List TFJobs in the current namespace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "[job.metadata.name for job in client.list_tfjobs()]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create TFJob\n",
+    "\n",
+    "Create a TFJob using the SDK."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.create_tfjob(tfjob)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Get TFJob\n",
+    "Get the created TFJob by name and check its data.  \n",
+    "Make sure that it completes successfully before proceeding. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# verify that the Job was created successfully\n",
+    "# raises an error if it doesn't exist\n",
+    "tfjob = client.get_tfjob(name=TFJOB_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# wait for the Job to complete successfully\n",
+    "assert_job_succeeded(client, TFJOB_NAME, job_kind=\"TFJob\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Job:\", tfjob.metadata.name, end=\"\\n\\n\")\n",
+    "print(\"Job Spec:\", tfjob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
+    "print(\"Job Status:\", tfjob.status, sep=\"\\n\", end=\"\\n\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Get TFJob Training logs\n",
+    "Get and print the training logs of the TFJob with the training steps "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_training_logs(client, TFJOB_NAME, container=TFJOB_CONTAINER)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Delete TFJob\n",
+    "\n",
+    "Delete the created TFJob."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.delete_tfjob(name=TFJOB_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@retry(\n",
+    "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
+    "    stop=stop_after_attempt(30),\n",
+    "    reraise=True,\n",
+    ")\n",
+    "def assert_tfjob_removed(client, job_name):\n",
+    "    \"\"\"Wait for TFJob to be removed.\"\"\"\n",
+    "    # fetch the existing TFJob names\n",
+    "    # verify that the Job was deleted successfully\n",
+    "    jobs = {job.metadata.name for job in client.list_tfjobs()}\n",
+    "    assert job_name not in jobs, f\"Failed to delete TFJob {job_name}!\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# wait for TFJob resources to be removed successfully\n",
+    "assert_tfjob_removed(client, TFJOB_NAME)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test PyTorchJob"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define a PyTorchJob\n",
+    "Define a PyTorchJob object before deploying it. This PyTorchJob is similar to [this](https://github.com/kubeflow/training-operator/blob/11b7a115e6538caeab405344af98f0d5b42a4c96/sdk/python/examples/kubeflow-pytorchjob-sdk.ipynb) example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "PYTORCHJOB_NAME = \"pytorch-mnist-gloo\"\n",
+    "PYTORCHJOB_CONTAINER = \"pytorch\"\n",
+    "PYTORCHJOB_IMAGE = \"kubeflowkatib/pytorch-mnist-cpu:v0.16.0\"\n",
+    "# The image above should be updated with each release with the corresponding Katib version used in CKF release.\n",
+    "# Note that instead of using the [image from training-operator repository](https://github.com/kubeflow/training-operator/blob/master/examples/pytorch/mnist/Dockerfile),\n",
+    "# the one [from Katib](https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu) is being used\n",
+    "# due to the large size of the first one."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "container = V1Container(\n",
+    "    name=PYTORCHJOB_CONTAINER,\n",
+    "    image=PYTORCHJOB_IMAGE,\n",
+    "    args=[\"--backend\", \"gloo\", \"--epochs\", \"2\"],\n",
+    "    # Passing `epochs`argument since kubeflowkatib image defaults to 10.\n",
+    ")\n",
+    "\n",
+    "replica_spec = V1ReplicaSpec(\n",
+    "    replicas=1,\n",
+    "    restart_policy=\"OnFailure\",\n",
+    "    template=V1PodTemplateSpec(\n",
+    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}, labels=training_labels),\n",
+    "        spec=V1PodSpec(containers=[container]),\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "pytorchjob = KubeflowOrgV1PyTorchJob(\n",
+    "    api_version=\"kubeflow.org/v1\",\n",
+    "    kind=\"PyTorchJob\",\n",
+    "    metadata=V1ObjectMeta(name=PYTORCHJOB_NAME),\n",
+    "    spec=KubeflowOrgV1PyTorchJobSpec(\n",
+    "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
+    "        pytorch_replica_specs={\"Master\": replica_spec, \"Worker\": replica_spec},\n",
+    "    ),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Print the Job's info to verify it before submission."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Name:\", pytorchjob.metadata.name)\n",
+    "print(\"Spec:\", pytorchjob.spec.pytorch_replica_specs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### List existing PyTorchJobs\n",
+    "\n",
+    "List PyTorchJobs in the current namespace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "[job.metadata.name for job in client.list_pytorchjobs()]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create PyTorchJob\n",
+    "\n",
+    "Create a PyTorchJob using the SDK."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.create_pytorchjob(pytorchjob)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Get PyTorchJob\n",
+    "Get the created PyTorchJob by name and check its data.  \n",
+    "Make sure that it completes successfully before proceeding. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# verify that the Job was created successfully\n",
+    "# raises an error if it doesn't exist\n",
+    "pytorchjob = client.get_pytorchjob(name=PYTORCHJOB_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# wait for the Job to complete successfully\n",
+    "assert_job_succeeded(client, PYTORCHJOB_NAME, job_kind=\"PyTorchJob\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Job:\", pytorchjob.metadata.name, end=\"\\n\\n\")\n",
+    "print(\"Job Spec:\", pytorchjob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
+    "print(\"Job Status:\", pytorchjob.status, sep=\"\\n\", end=\"\\n\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Get PyTorchJob Training logs\n",
+    "Get and print the training logs of the PyTorchJob with the training steps "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_training_logs(client, PYTORCHJOB_NAME, container=PYTORCHJOB_CONTAINER)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Delete PyTorchJob\n",
+    "\n",
+    "Delete the created PyTorchJob."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.delete_pytorchjob(name=PYTORCHJOB_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@retry(\n",
+    "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
+    "    stop=stop_after_attempt(30),\n",
+    "    reraise=True,\n",
+    ")\n",
+    "def assert_pytorchjob_removed(client, job_name):\n",
+    "    \"\"\"Wait for PyTorchJob to be removed.\"\"\"\n",
+    "    # fetch the existing PyTorchJob names\n",
+    "    # verify that the Job was deleted successfully\n",
+    "    jobs = {job.metadata.name for job in client.list_pytorchjobs()}\n",
+    "    assert job_name not in jobs, f\"Failed to delete PyTorchJob {job_name}!\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# wait for PyTorch job to be removed successfully\n",
+    "assert_pytorchjob_removed(client, PYTORCHJOB_NAME)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test PaddlePaddle"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define a PaddleJob\n",
+    "\n",
+    "Define a PaddleJob object before deploying it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "PADDLEJOB_NAME = \"paddle-simple-cpu\"\n",
+    "PADDLEJOB_CONTAINER = \"paddle\"\n",
+    "PADDLEJOB_IMAGE = \"docker.io/paddlepaddle/paddle:2.4.0rc0-cpu\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "port = V1ContainerPort(container_port=37777, name=\"master\")\n",
+    "\n",
+    "container = V1Container(\n",
+    "    name=PADDLEJOB_CONTAINER,\n",
+    "    image=PADDLEJOB_IMAGE,\n",
+    "    command=[\"python\"],\n",
+    "    args=[\"-m\", \"paddle.distributed.launch\", \"run_check\"],\n",
+    "    ports=[port],\n",
+    ")\n",
+    "\n",
+    "replica_spec = V1ReplicaSpec(\n",
+    "    replicas=2,\n",
+    "    restart_policy=\"OnFailure\",\n",
+    "    template=V1PodTemplateSpec(\n",
+    "        metadata=V1ObjectMeta(annotations={\"sidecar.istio.io/inject\": \"false\"}),\n",
+    "        spec=V1PodSpec(containers=[container]),\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "paddlejob = KubeflowOrgV1PaddleJob(\n",
+    "    api_version=\"kubeflow.org/v1\",\n",
+    "    kind=\"PaddleJob\",\n",
+    "    metadata=V1ObjectMeta(name=PADDLEJOB_NAME, labels=training_labels),\n",
+    "    spec=KubeflowOrgV1PaddleJobSpec(\n",
+    "        run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n",
+    "        paddle_replica_specs={\"Worker\": replica_spec},\n",
+    "    ),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Print the Job's info to verify it before submission."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Name:\", paddlejob.metadata.name)\n",
+    "print(\"Spec:\", paddlejob.spec.paddle_replica_specs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### List existing PaddleJobs\n",
+    "\n",
+    "List PaddleJobs in the current namespace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "[job.metadata.name for job in client.list_paddlejobs()]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create PaddleJob\n",
+    "\n",
+    "Create a PaddleJob using the SDK."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.create_paddlejob(paddlejob)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Get PaddleJob\n",
+    "Get the created PaddleJob by name and check its data.  \n",
+    "Make sure that it completes successfully before proceeding. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# verify that the Job was created successfully\n",
+    "# raises an error if it doesn't exist\n",
+    "paddlejob = client.get_paddlejob(name=PADDLEJOB_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# wait for the Job to complete successfully\n",
+    "assert_job_succeeded(client, PADDLEJOB_NAME, job_kind=\"PaddleJob\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Job:\", paddlejob.metadata.name, end=\"\\n\\n\")\n",
+    "print(\"Job Spec:\", paddlejob.spec, sep=\"\\n\", end=\"\\n\\n\")\n",
+    "print(\"Job Status:\", paddlejob.status, sep=\"\\n\", end=\"\\n\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Get PaddleJob Training logs\n",
+    "Get and print the training logs of the PaddleJob with the training steps "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set is_master to False because this example does not include a master replica type\n",
+    "print_training_logs(client, PADDLEJOB_NAME, container=PADDLEJOB_CONTAINER, is_master=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Delete PaddleJob\n",
+    "\n",
+    "Delete the created PaddleJob."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.delete_paddlejob(name=PADDLEJOB_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@retry(\n",
+    "    wait=wait_exponential(multiplier=2, min=1, max=10),\n",
+    "    stop=stop_after_attempt(30),\n",
+    "    reraise=True,\n",
+    ")\n",
+    "def assert_paddlejob_removed(client, job_name):\n",
+    "    \"\"\"Wait for PaddleJob to be removed.\"\"\"\n",
+    "    # fetch the existing PaddleJob names\n",
+    "    # verify that the Job was deleted successfully\n",
+    "    jobs = {job.metadata.name for job in client.list_paddlejobs()}\n",
+    "    assert job_name not in jobs, f\"Failed to delete PaddleJob {job_name}!\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "raises-exception"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# wait for PaddleJob to be removed successfully\n",
+    "assert_paddlejob_removed(client, PADDLEJOB_NAME)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 66fe640dccbc57377cc256c07bbd0262a8649fb9 Mon Sep 17 00:00:00 2001
From: Daniela Plascencia <daniela.plascencia@canonical.com>
Date: Fri, 23 Aug 2024 17:06:56 +0200
Subject: [PATCH 3/5] tests: add proxy option to test file for driver (#103)

* tests: add proxy option to test file for driver

Adding a proxy option to the driver/test_kubeflow_workloads.py script, will allow users to tell
the driver it will run in a proxied environment. With this, a PodDefault will be used for setting
environment variables (NO_PROXY, HTTP_PROXY, HTTPS_PROXY) in the Pod(s) that execute the Job for UATs.

Fixes #96
---
 README.md                         | 62 +++++++++++++++++++++----------
 assets/test-job.yaml.j2           |  3 ++
 driver/conftest.py                | 11 ++++++
 driver/test_kubeflow_workloads.py | 57 +++++++++++++++++++++++++++-
 tests/proxy-poddefault.yaml       | 22 -----------
 tests/proxy-poddefault.yaml.j2    | 45 ++++++++++++++++++++++
 6 files changed, 156 insertions(+), 44 deletions(-)
 delete mode 100644 tests/proxy-poddefault.yaml
 create mode 100644 tests/proxy-poddefault.yaml.j2

diff --git a/README.md b/README.md
index 837f93e..c5af1ad 100644
--- a/README.md
+++ b/README.md
@@ -140,30 +140,44 @@ tox -e kubeflow-local
 ```
 
 ### Run behind proxy
-#### Prerequistes
-**To run the tests behind proxy using Notebook or using the driver, the following step is necessary:**
+
+#### Running using Notebook
+
+##### Prerequistes
 
 Edit the PodDefault `tests/proxy-poddefault.yaml` to replace the placeholders for:
-   * `<proxy_address>:<proxy_port>`: The address and port of your proxy server
-   * `<cluster cidr>`: you can get this value by running:
-      ```
-      cat /var/snap/microk8s/current/args/kube-proxy | grep cluster-cidr
-      ```
-   * `<service cluster ip range>`: you can get this value by running:
-      ```
-      cat /var/snap/microk8s/current/args/kube-apiserver | grep service-cluster-ip-range
-      ```
+
+* `http_proxy` and `https_proxy` - The address and port of your proxy server, format should be `<proxy_address>:<proxy_port>`
+* `no_proxy` - A comma separated list of items that should not be proxied. It is recommended to include the following:
+
+`<cluster cidr>,<service cluster ip range>,127.0.0.1,localhost,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local`
+
+where,
+
+  * `<cluster cidr>`: you can get this value by running:
+
+    ```
+    cat /var/snap/microk8s/current/args/kube-proxy | grep cluster-cidr
+    ```
+
+  * `<service cluster ip range>`: you can get this value by running:
+
+    ```
+    cat /var/snap/microk8s/current/args/kube-apiserver | grep service-cluster-ip-range
+    ```
    
-   * `<nodes internal ip(s)>`: the Internal IP of the nodes where your cluster is running, you can
-   get this value by running:
-      ```
-      microk8s kubectl get nodes -o wide
-      ```
-      It is the `INTERNAL-IP` value
-   * `<hostname>`: the name of your host on which the cluster is deployed, you can use the
-   `hostname` command to get it
+  * `<nodes internal ip(s)>`: the Internal IP of the nodes where your cluster is running, you can get this value by running:
+
+    ```
+    microk8s kubectl get nodes -o wide
+    ```
+    It is the `INTERNAL-IP` value
+
+  * `<hostname>`: the name of your host on which the cluster is deployed, you can use the `hostname` command to get it
+
+  * `localhost` and `127.0.0.1` are recommended to avoid proxying requests to `localhost`
+
 
-#### Running using Notebook
 To run the tests behind proxy using Notebook:
 1. Login to the Dashboard and Create a Profile
 2. Apply the PodDefault to your Profile's namespace, make sure you already followed the Prerequisites
@@ -188,6 +202,14 @@ To run the tests behind proxy using Notebook:
    * kfp_v2
    * training (except TFJob due to https://github.com/canonical/training-operator/issues/182)
 
+#### Running using `driver`
+
+You can pass the `--proxy` flag and set the values for proxies to the tox command and this should automatically apply the required changes to run behind proxy.
+
+```bash
+tox -e kubeflow-<local|remote> -- --proxy http_proxy="http_proxy:port" https_proxy="https_proxy:port" no_proxy="<cluster cidr>,<service cluster ip range>,127.0.0.1,localhost,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local"
+```
+
 #### Developer Notes
 
 Any environment that can be used to access and configure the Charmed Kubeflow deployment is
diff --git a/assets/test-job.yaml.j2 b/assets/test-job.yaml.j2
index 89d254f..2dd22ca 100644
--- a/assets/test-job.yaml.j2
+++ b/assets/test-job.yaml.j2
@@ -7,6 +7,9 @@ spec:
   template:
     metadata:
       labels:
+          {% if proxy %}
+          notebook-proxy: "true"
+          {% endif %}
           access-minio: "true"
           access-ml-pipeline: "true"
           mlflow-server-minio: "true"
diff --git a/driver/conftest.py b/driver/conftest.py
index aabb9fc..cf66f58 100644
--- a/driver/conftest.py
+++ b/driver/conftest.py
@@ -10,6 +10,17 @@ def pytest_addoption(parser: Parser):
     * Add a `--filter` option to (de)select test cases based on their name (see also
       https://docs.pytest.org/en/7.4.x/reference/reference.html#command-line-flags)
     """
+    parser.addoption(
+        "--proxy",
+        nargs=3,
+        metavar=("http_proxy", "https_proxy", "no_proxy"),
+        help="Set a number of key-value pairs for the proxy environment variables."
+        " Example: "
+        "--proxy http_proxy='proxy:port' https_proxy='proxy:port' no_proxy=<comma separated of no proxy>'"
+        " If used, a PodDefault will be rendered and applied to the Kubernetes deployment."
+        " It is not used by default.",
+        action="store",
+    )
     parser.addoption(
         "--filter",
         help="Provide a filter to (de)select tests cases based on their name. The filter follows"
diff --git a/driver/test_kubeflow_workloads.py b/driver/test_kubeflow_workloads.py
index 203a995..5d9dd22 100644
--- a/driver/test_kubeflow_workloads.py
+++ b/driver/test_kubeflow_workloads.py
@@ -5,10 +5,15 @@
 import os
 import subprocess
 from pathlib import Path
+from typing import Dict
 
 import pytest
 from lightkube import ApiError, Client, codecs
-from lightkube.generic_resource import create_global_resource, load_in_cluster_generic_resources
+from lightkube.generic_resource import (
+    create_global_resource,
+    create_namespaced_resource,
+    load_in_cluster_generic_resources,
+)
 from utils import assert_namespace_active, delete_job, fetch_job_logs, wait_for_job
 
 log = logging.getLogger(__name__)
@@ -34,6 +39,14 @@
 
 PYTEST_CMD_BASE = "pytest"
 
+PODDEFAULT_RESOURCE = create_namespaced_resource(
+    group="kubeflow.org",
+    version="v1alpha1",
+    kind="poddefault",
+    plural="poddefaults",
+)
+PODDEFAULT_WITH_PROXY_PATH = Path("tests") / "proxy-poddefault.yaml.j2"
+
 
 @pytest.fixture(scope="session")
 def pytest_filter(request):
@@ -83,6 +96,33 @@ def create_profile(lightkube_client):
     lightkube_client.delete(PROFILE_RESOURCE, name=NAMESPACE)
 
 
+@pytest.fixture(scope="function")
+def create_poddefaults_on_proxy(request, lightkube_client):
+    """Create PodDefault with proxy env variables for the Notebook inside the Job."""
+    # Simply yield if the proxy flag is not set
+    if not request.config.getoption("proxy"):
+        yield
+    else:
+        log.info("Adding PodDefault with proxy settings.")
+        poddefault_resource = codecs.load_all_yaml(
+            PODDEFAULT_WITH_PROXY_PATH.read_text(),
+            context=proxy_context(request),
+        )
+        # Using the first item of the list of poddefault_resource. It is a one item list.
+        lightkube_client.create(poddefault_resource[0], namespace=NAMESPACE)
+
+        yield
+
+        # delete the PodDefault at the end of the module tests
+        log.info("Deleting PodDefault...")
+        poddefault_resource = codecs.load_all_yaml(
+            PODDEFAULT_WITH_PROXY_PATH.read_text(),
+            context=proxy_context(request),
+        )
+        poddefault_name = poddefault_resource[0].metadata.name
+        lightkube_client.delete(PODDEFAULT_RESOURCE, name=poddefault_name, namespace=NAMESPACE)
+
+
 @pytest.mark.abort_on_fail
 async def test_create_profile(lightkube_client, create_profile):
     """Test Profile creation.
@@ -105,7 +145,9 @@ async def test_create_profile(lightkube_client, create_profile):
     assert_namespace_active(lightkube_client, NAMESPACE)
 
 
-def test_kubeflow_workloads(lightkube_client, pytest_cmd, tests_checked_out_commit):
+def test_kubeflow_workloads(
+    lightkube_client, pytest_cmd, tests_checked_out_commit, request, create_poddefaults_on_proxy
+):
     """Run a K8s Job to execute the notebook tests."""
     log.info(f"Starting Kubernetes Job {NAMESPACE}/{JOB_NAME} to run notebook tests...")
     resources = list(
@@ -118,9 +160,11 @@ def test_kubeflow_workloads(lightkube_client, pytest_cmd, tests_checked_out_comm
                 "tests_image": TESTS_IMAGE,
                 "tests_remote_commit": tests_checked_out_commit,
                 "pytest_cmd": pytest_cmd,
+                "proxy": True if request.config.getoption("proxy") else False,
             },
         )
     )
+
     assert len(resources) == 1, f"Expected 1 Job, got {len(resources)}!"
     lightkube_client.create(resources[0], namespace=NAMESPACE)
 
@@ -140,3 +184,12 @@ def teardown_module():
     """Cleanup resources."""
     log.info(f"Deleting Job {NAMESPACE}/{JOB_NAME}...")
     delete_job(JOB_NAME, NAMESPACE)
+
+
+def proxy_context(request) -> Dict[str, str]:
+    """Return a dictionary with proxy environment variables from user input."""
+    proxy_context = {}
+    for proxy in request.config.getoption("proxy"):
+        key, value = proxy.split("=")
+        proxy_context[key] = value
+    return proxy_context
diff --git a/tests/proxy-poddefault.yaml b/tests/proxy-poddefault.yaml
deleted file mode 100644
index a1f7300..0000000
--- a/tests/proxy-poddefault.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-apiVersion: kubeflow.org/v1alpha1
-kind: PodDefault
-metadata:
-  name: notebook-proxy
-spec:
-  desc: Add proxy settings
-  env:
-  - name: HTTP_PROXY
-    value: <proxy_address>:<proxy_port>
-  - name: http_proxy
-    value: <proxy_address>:<proxy_port>
-  - name: HTTPS_PROXY
-    value: <proxy_address>:<proxy_port>
-  - name: https_proxy
-    value: <proxy_address>:<proxy_port>
-  - name: NO_PROXY
-    value: <cluster cidr>,<service cluster ip range>,127.0.0.1,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local
-  - name: no_proxy
-    value: <cluster cidr>,<service cluster ip range>,127.0.0.1,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local
-  selector:
-    matchLabels:
-      notebook-proxy: "true"
diff --git a/tests/proxy-poddefault.yaml.j2 b/tests/proxy-poddefault.yaml.j2
new file mode 100644
index 0000000..c5a5868
--- /dev/null
+++ b/tests/proxy-poddefault.yaml.j2
@@ -0,0 +1,45 @@
+apiVersion: kubeflow.org/v1alpha1
+kind: PodDefault
+metadata:
+  name: notebook-proxy
+spec:
+  desc: Add proxy settings
+  env:
+  - name: HTTP_PROXY
+    value: {{ http_proxy }}
+  - name: http_proxy
+    value: {{ http_proxy }}
+  - name: HTTPS_PROXY
+    value: {{ https_proxy }}
+  - name: https_proxy
+    value: {{ https_proxy }}
+  - name: NO_PROXY
+    value: {{ no_proxy }}
+  - name: no_proxy
+    value: {{ no_proxy }}
+  _example_env:
+    ################################
+    #                              #
+    #    EXAMPLE CONFIGURATION     #
+    #                              #
+    ################################
+
+    # This is not actually functional, just serves as an example for how to configure
+    # the values of proxy and which ones have to be included to make things work properly.
+    # If you are running the UATs directly in a Notebook, please modify the above env block
+    # with the values that fit your specific configuration.
+  - name: HTTP_PROXY
+    value: <http_proxy_address>:<proxy_port>
+  - name: http_proxy
+    value: <http_proxy_address>:<proxy_port>
+  - name: HTTPS_PROXY
+    value: <https_proxy_address>:<proxy_port>
+  - name: https_proxy
+    value: <https_proxy_address>:<proxy_port>
+  - name: NO_PROXY
+    value: <cluster cidr>,<service cluster ip range>,127.0.0.1,localhost,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local
+  - name: no_proxy
+    value: <cluster cidr>,<service cluster ip range>,127.0.0.1,localhost,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local
+  selector:
+    matchLabels:
+      notebook-proxy: "true"

From fe0e41ff27a5ccb25e3b24a155c7dad07433fd1a Mon Sep 17 00:00:00 2001
From: Daniela Plascencia <daniela.plascencia@canonical.com>
Date: Tue, 27 Aug 2024 15:05:08 +0200
Subject: [PATCH 4/5] docs: remove tfjob note as
 canonical/training-operator#182 is fixed (#111)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c5af1ad..38fafe4 100644
--- a/README.md
+++ b/README.md
@@ -200,7 +200,7 @@ To run the tests behind proxy using Notebook:
    * katib
    * kserve
    * kfp_v2
-   * training (except TFJob due to https://github.com/canonical/training-operator/issues/182)
+   * training
 
 #### Running using `driver`
 

From 135396083d594ab77418029c31f6a3a8ee246a2e Mon Sep 17 00:00:00 2001
From: Noha Ihab <49988746+NohaIhab@users.noreply.github.com>
Date: Tue, 27 Aug 2024 16:29:04 +0300
Subject: [PATCH 5/5] readme: Add to proxy instructions the KServe and Knative
 proxy configs (#112)

---
 README.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/README.md b/README.md
index 38fafe4..ca87727 100644
--- a/README.md
+++ b/README.md
@@ -141,6 +141,31 @@ tox -e kubeflow-local
 
 ### Run behind proxy
 
+#### Prerequisites for KServe UATs
+
+To be able to run the KServe UATs behind proxy, first you need to configure `kserve-controller`
+and `knative-serving` charms to function behind proxy.
+
+> [!NOTE]  
+> For information on how to fill out the proxy config values, see the `Running using Notebook > Prerequisites` section below.
+
+1. Set the `http-proxy`, `https-proxy`, and `no-proxy` configs in `kserve-controller` charm
+```
+juju config kserve-controller http-proxy=<proxy_address>:<proxy_port> https-proxy=<proxy_address>:<proxy_port> no-proxy=<cluster cidr>,<service cluster ip range>,127.0.0.1,localhost,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local
+```
+
+2. Set the `http-proxy`, `https-proxy`, and `no-proxy` configs in `knative-serving` charm
+```
+juju config knative-serving http-proxy=<proxy_address>:<proxy_port> https-proxy=<proxy_address>:<proxy_port> no-proxy=<cluster cidr>,<service cluster ip range>,127.0.0.1,localhost,<nodes internal ip(s)>/24,<cluster hostname>,.svc,.local
+```
+
+For Example:
+```
+juju config knative-serving http-proxy=http://10.0.13.50:3128/ https-proxy=http://10.0.13.50:3128/ no-proxy=10.1.0.0/16,10.152.183.0/24,127.0.0.1,localhost,10.0.2.0/24,ip-10-0-2-157,.svc,.local
+
+juju config kserve-controller http-proxy=http://10.0.13.50:3128/ https-proxy=http://10.0.13.50:3128/ no-proxy=10.1.0.0/16,10.152.183.0/24,127.0.0.1,localhost,10.0.2.0/24,ip-10-0-2-157,.svc,.local
+```
+
 #### Running using Notebook
 
 ##### Prerequistes