diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index f9e2807..15bf6dd 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -3,7 +3,7 @@ # Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. #------------------------------------------------------------------------------------------------------------- -FROM golang:1.12.5-stretch +FROM golang:1.12-buster ARG KubectlVersion=v1.16.2 ARG HelmVersion=v3.0.3 @@ -16,6 +16,10 @@ RUN apt-get update \ && apt-get -y install --no-install-recommends apt-utils 2>&1 \ # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed && apt-get -y install git procps lsb-release \ + # Install python + && apt-get -y install --no-install-recommends git openssl build-essential ca-certificates nano curl python python3-dev python3-pip python3-venv python3-setuptools python3-wheel\ + # Install pylint + && pip3 --disable-pip-version-check --no-cache-dir install pylint \ # Install Editor && apt-get install vim -y \ # Clean up @@ -105,3 +109,8 @@ RUN git clone https://github.com/magicmonty/bash-git-prompt.git ~/.bash-git-prom && echo "if [ -f \"$HOME/.bash-git-prompt/gitprompt.sh\" ]; then GIT_PROMPT_ONLY_IN_REPO=1 && source $HOME/.bash-git-prompt/gitprompt.sh; fi" >> "/root/.bashrc" ENV PATH="/root/.kubectx:${PATH}" + +COPY ./locust/requirements.* ./ +COPY ./.devcontainer/scripts/python_venv.sh ./ + +RUN bash -f ./python_venv.sh diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 9685635..4588b57 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -5,7 +5,7 @@ "workspaceFolder": "/workspace", "context": "..", "workspaceMount": "src=${localWorkspaceFolder},dst=/workspace,type=bind", - "runArgs": [ + "runArgs": [ // Mount the env file "--env-file", "${localWorkspaceFolder}/.devcontainer/.env", // Mount go mod cache @@ -25,11 +25,13 @@ // Optionally mount k8s auth to existing cluster // "-v", "${env:HOME}${env:USERPROFILE}/.kube:/home/vscode/.kube", ], - "forwardPorts": [8085], + "forwardPorts": [8085, 8089, 9090], "extensions": [ "ms-azuretools.vscode-docker", + "ms-vscode.go", + "ms-python.python", "humao.rest-client", - "ms-vscode.go" + "yzhang.markdown-all-in-one" ], "settings": { "go.gopath": "/go", @@ -58,9 +60,19 @@ "go.lintFlags": [ "--fast" ], + "python.pythonPath": "/usr/local/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.testing.pytestEnabled": true, + "python.linting.flake8Enabled": true, + "python.linting.lintOnSave": true, + "python.testing.promptToConfigure": false, + "python.testing.unittestEnabled": false, + "python.testing.nosetestsEnabled": false, + "python.formatting.provider": "black", + "python.linting.pylintPath": "/usr/local/bin/pylint", "remote.extensionKind": { "ms-azuretools.vscode-docker": "workspace" } } - -} +} \ No newline at end of file diff --git a/.devcontainer/scripts/python_venv.sh b/.devcontainer/scripts/python_venv.sh new file mode 100644 index 0000000..2503da2 --- /dev/null +++ b/.devcontainer/scripts/python_venv.sh @@ -0,0 +1,10 @@ +#! /bin/bash +set -e +set -x + +mkdir /python_venv +python3 -m venv /python_venv/venv +source /python_venv/venv/bin/activate # You can also tell VSCode to use the interpretter in this location +pip3 install -r requirements.dev.txt +pip3 install -r requirements.txt +echo "source /python_venv/venv/bin/activate" >> "$HOME/.bashrc" diff --git a/Makefile b/Makefile index 4dde955..95f4cc8 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,4 @@ +# Timestamp for image tags timestamp := $(shell /bin/date "+%Y%m%d-%H%M%S") # Image URL to use all building/pushing image targets @@ -6,6 +7,12 @@ IMG ?= controller:latest # MockAPI image URL to use all building/pushing image targets MOCKAPI_IMG ?= mockapi:${timestamp} +# MockAPI image URL to use all building/pushing image targets +LOCUST_IMG ?= locust:${timestamp} + +# Default namespace for the installation +LOCUST_FILE ?= "behaviours/scenario1_run_submit_delete.py" + # Produce CRDs that work back to Kubernetes 1.11 (no version conversion) CRD_OPTIONS ?= "crd:trivialVersions=true" @@ -18,6 +25,7 @@ KIND_CLUSTER_NAME ?= "azure-databricks-operator" # Default namespace for the installation OPERATOR_NAMESPACE ?= "azure-databricks-operator-system" + # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) ifeq (,$(shell go env GOBIN)) GOBIN=$(shell go env GOPATH)/bin @@ -244,4 +252,14 @@ kind-load-image-mock-api: create-kindcluster docker-build-mock-api install-prom kind-deploy-mock-api: kind-load-image-mock-api apply-manifests-mock-api -aks-deploy-mock-api: docker-push-mock-api install-prometheus apply-manifests-mock-api +kind-deploy-locust: create-kindcluster install-prometheus + docker build -t ${LOCUST_IMG} -f locust/Dockerfile . + kind load docker-image ${LOCUST_IMG} --name ${KIND_CLUSTER_NAME} -v 1 + cat ./locust/manifests/deployment.yaml | sed "s|locust:latest|${LOCUST_IMG}|" | sed "s|behaviours/scenario1_run_submit_delete.py|${LOCUST_FILE}|" | kubectl apply -f - + +format-locust: + black . + +test-locust: + pip install -e ./locust -q + pytest diff --git a/docs/locust.md b/docs/locust.md new file mode 100644 index 0000000..f423e3c --- /dev/null +++ b/docs/locust.md @@ -0,0 +1,193 @@ +# Load testing with locust + +The load testing project for the [azure-databricks-operator](https://github.com/microsoft/azure-databricks-operator/) can be found under `/locust`. Tests are built and run using the python [Locust load testing framework](https://docs.locust.io/en/stable/index.html). + +## Table of contents + +- [Load testing with locust](#load-testing-with-locust) + - [Deploying dependencies](#deploying-dependencies) + - [Build and Test](#build-and-test) + - [Deploy to kind](#deploy-to-kind) + - [Run tests](#run-tests) + - [Adding tests](#adding-tests) + - [Contribute](#contribute) + - [Extending the supported Databricks functionality](#extending-the-supported-databricks-functionality) + - [Prometheus Endpoint](#prometheus-endpoint) + - [Running test under docker](#running-test-under-docker) + - [Test locally against cluster](#test-locally-against-cluster) + - [Deploy into the cluster and run](#deploy-into-the-cluster-and-run) + - [How do I update a dashboard](#how-do-i-update-a-dashboard) + - [How do I set error conditions](#how-do-i-set-error-conditions) + - [Known issues](#known-issues) + +## Deploying dependencies + +For documentation on deploying the `azure-databricks-operator` and `databricks-mock-api` for testing see [deploy/README.md](deploy/README.md) + +## Build and Test + +Everything needed to build and test the project is set up in the dev container. + +To run the project without the dev container you need: + +- Python 3 +- Pip +- Set up your python environment + + ```bash + python -m venv venv + source venv/bin/activate # You can also tell VSCode to use the interpretter in this location + pip install -r requirements.dev.txt + pip install -r requirements.txt + ``` + +### Deploy to kind + +> Before proceeding make sure your container or environment is up and running + +1. Deploy locust to local KIND instance. Set `LOCUST_FILE` to the the locust scenario you'd like to run from `locust/behaviours`. + + ```bash + make kind-deploy-locust LOCUST_FILE="behaviours/scenario1_run_submit_delete.py" + ``` + +2. Start the test server + + ```bash + locust -f behaviours/.py + ``` + +### Run tests + +Tests are written using `pytest`. More information [is available here](https://docs.pytest.org/en/latest/). + +> Before proceeding make sure your container or environment is up and running + +1. Run the tests from the root of the project + + ```bash + make test-locust + ``` + +2. All being well you should see the following output: + + ```bash + ============================================================================================= test session starts ============================================================================================== + platform linux -- Python 3.7.4, pytest-5.3.2, py-1.8.0, pluggy-0.13.1 + rootdir: /workspace + plugins: mock-1.13.0 + collected 8 items + + + test/unit/db_run_client_test.py ........ + ``` + +### Adding tests + +The project is setup to automatically discover any tests under the `locust/test` folder. Provided the following criteria are met: + +- your test `.py` file follows the naming convention `_test.py` +- within your test file your methods follow the naming convention `def test_()` + +## Contribute + +- Test files are added to the `/behaviours` directory +- These files take the recommended format described by the Locust documentation representing the behvaiour of a single (or set of) users + +### Extending the supported Databricks functionality + +- `/locust_files/db_locust` contains all files related to how Locust can interact with Databricks using K8s via the [azure-databricks-operator](https://github.com/microsoft/azure-databricks-operator/) + - `db_locust`: The brain of the behaviour driven tests. Inherits from the default `Locust`, read more [here](https://docs.locust.io/en/stable/testing-other-systems.html) + - `db_client.py`: Core client used by the `db_locust`. It is a wrapper of "sub" clients that interface to specific databricks operator Kinds + - `db_run_client.py`: all actions relating to `run` api interfaces + - More clients to be added - ***this is where the majority of contributions will be made*** + - `db_decorator.py`: A simple decorator for Databricks operations that gives basic metric logging and error handling + +## Prometheus Endpoint + +This suite of Locust tests exposes stats to Prometheus via a web endpoint. + +The endpoint is exposed at `/export/prometheus`. When running the tests with the web endpoints enabled, you can visit to see the exported stats. + +## Running test under docker + +This guide assumes you have used `./deploy/README.md` to deploy an AKS Engine cluster and have the `KUBECONFG` set correctly and also used `./deploy/prometheus-grafana` to setup the `prometheus` operator. + +### Test locally against cluster + +To build and test the locust image locally againt the cluster you can run: + +```bash +make docker-run-local +``` + +This will build a docker image **which contains the kubeconfig** file. + +> Why put the file in the docker image? As we're using a devcontainer the normal approach of mounting a file doesn't work as the path on the host to the file (which is what the dameon uses) isn't the same as the path in the devcontainer so the file is never mounted. + +### Deploy into the cluster and run + +1. To deploy into the cluster run and port forward: + + ```bash + CONTAINER_REGISTRY=$ACR_LOGIN_SERVER make deploy-loadtest + k port-forward service/locust-loadtest 8089:8089 9090:9090 -n locust + ``` + +2. Visit `http://localhost:8089` to start the loadtest from the locust web UI. + +3. View stats on the test + + ```bash + kubectl port-forward service/prom-azure-databricks-operator-grafana 8080:80 + ``` + + ```text + Username: admin + Password: prom-operator + http://localhost:8080 + ``` + + Then navigate to the locust dashboard to view the results. + +If you want to setup port forwards for all the things then do the following: + +```bash +k port-forward service/prom-azure-databricks-oper-prometheus 9091:9090 & +k port-forward service/locust-loadtest 8089:8089 9090:9090 & +k port-forward service/prom-azure-databricks-operator-grafana 8080:80 & + +Browse to locust webui -> http://localhost:8089/ +Browse to locust metrics -> http://localhost:9090/ +Browse to Prometheus -> http://localhost:9091/targets +Browse to Grafana -> http://localhost:8080/ +``` + +> Note: If one of these port forwards stops working use `ps aux | grep kubectl` and look for the process id of the one thats broken then use `kill 21283` (your id in there) to stop it. Then rerun the port forward command. + +#### How do I update a dashboard + +Best way I've found is to import the JSON for the board into the grafana instance, edit it using the UI then export it back to JSON and update the file in the repo. + +#### How do I set error conditions + +For some of the load test scenarios we want to trigger error behaviour in the mock-api during a test run. + +First step for this is to port-forward the mock-api service + +```bash +# port-forward to localhost:8085 +kubectl port-forward -n databricks-mock-api svc/databricks-mock-api 8085:8080 +``` + +Next we can issue a `PATCH` request to update the error rate, e.g. to set 20% probability for status code 500 responses + +```bash +curl --request PATCH \ + --url http://localhost:8085/config \ + --header 'content-type: application/json' \ + --data '{"DATABRICKS_MOCK_API_ERROR_500_PROBABILITY":20}' +``` +## Known issues + +- When the port you're forwarding your Locust server to is not exposed from the container, you cannot hit it from your localhost machine. Use the [VSCode temporary port forwarding](https://code.visualstudio.com/docs/remote/containers#_temporarily-forwarding-a-port) to resolve this. \ No newline at end of file diff --git a/docs/mockapi.md b/docs/mockapi.md index f5bcf5a..2b62a6e 100644 --- a/docs/mockapi.md +++ b/docs/mockapi.md @@ -1,42 +1,57 @@ -# Mock databricks API +# Mock Databricks API -The databricks mock API can be found under `/databricks-mock-api` +The API found under `/mockapi` is a Databricks mock API for following success scenarios: -This is a mock API for following sucess scenarios of the Databricks API: - [Jobs/](https://docs.databricks.com/dev-tools/api/latest/jobs.html): - - Create + - Create + - Get + - List + - Delete + - Runs/ + - Submit - Get + - GetOutput - List - - Delete - - Runs/ - - Submit - - Get - - GetOutput - - List In addition, each submitted run will cycle through it's life states (PENDING -> RUNNING -> TERMINATING -> TERMINATED). The length of each state is set in run_repository.go : timePerRunLifeState. +## Table of contents + +- [Mock Databricks API](#mock-databricks-api) + - [Features](#features) + - [Configurable API latency](#configurable-api-latency) + - [Configurable Rate Limiting](#configurable-rate-limiting) + - [Configurable Errors](#configurable-errors) + - [Dynamic Configuration](#dynamic-configuration) + - [Running locally](#running-locally) + - [Running in Kind](#running-in-kind) + - [Running in a separate cluster](#running-in-a-separate-cluster) + - [Prerequisites](#prerequisites) + - [Deploy to the cluster](#deploy-to-the-cluster) + ## Features ### Configurable API latency -To simulate Databricks API more accurately, we've added an option to configure a range of latency for each request. +To simulate Databricks API more accurately, we've added an option to configure a range of latency for each request. The latency range can be configured by adding a min and max value for desired latency in milliseconds for a fast and slow requests using the environment variables: + ```text DATABRICKS_MOCK_API_LATENCY_MILLISECONDS_SLOW_REQUEST_MIN DATABRICKS_MOCK_API_LATENCY_MILLISECONDS_SLOW_REQUEST_MAX DATABRICKS_MOCK_API_LATENCY_MILLISECONDS_FAST_REQUEST_MIN DATABRICKS_MOCK_API_LATENCY_MILLISECONDS_FAST_REQUEST_MAX ``` + When set, for each request will sleep for a time chosen at random between the min and max values. If either of the variables is not set, the API will default to running with no latency. ### Configurable Rate Limiting -To allow rate-limiting requests to match Databricks API behaviour, a rate limit can be specified by setting `DATABRICKS_MOCK_API_RATE_LIMIT` environment variable to the number of requests per second that should be allowed against the API. +To allow rate-limiting requests to match Databricks API behaviour, a rate limit can be specified by setting `DATABRICKS_MOCK_API_RATE_LIMIT` environment variable to the number of requests per second that should be allowed against the API. ### Configurable Errors @@ -48,7 +63,7 @@ To configure a percentage of calls that should sink-hole, i.e. return no respons To configure a percentage of calls that should respond xml response with status code 200 response in the mock-api you can set`DATABRICKS_MOCK_API_ERROR_XML_RESPONSE_PROBABILITY`.Probabilities are as for `DATABRICKS_MOCK_API_ERROR_500_PROBABILITY`. -The combined probabilities much be <=100 +> NB: The combined probabilities must be <=100 ### Dynamic Configuration @@ -72,7 +87,7 @@ Once the devcontainer has built and started, use `make run-mock-api` to run the To run the mock api in Kind run `make kind-deploy-mock-api`. This will ensure a Kind cluster is created, deploy promethous with helm, build and load a docker image for the mock api into the Kind cluster and then create a Deployment and Service. -To test, run `kubectl port-forward svc/databricks-mock-api 8085:8080 -n databricks-mock-api` and make a request to http://localhost:8085 to verify that the API is running +To test, run `kubectl port-forward svc/databricks-mock-api 8085:8080 -n databricks-mock-api` and make a request to to verify that the API is running ## Running in a separate cluster @@ -89,10 +104,11 @@ Ensure your KUBECONFIG is set to point to the cluster you want to deploy to. ### Deploy to the cluster Deploy to AKS with + ```bash IMG=your-container-registry.azurecr.io/databricks-mock-api:vsomething make aks-deploy-mock-api ``` -To test, run `kubectl port-forward svc/databricks-mock-api 8085:8080` and make a request to http://localhost:8085 to verify that the API is running +To test, run `kubectl port-forward svc/databricks-mock-api 8085:8080` and make a request to to verify that the API is running > NB: Error: 'unauthorized: authentication required' make sure that the image name matches the ACR login server casing or try pushing the docker file outside of the container diff --git a/locust/.flake8 b/locust/.flake8 new file mode 100644 index 0000000..438e405 --- /dev/null +++ b/locust/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 160 +exclude = venv \ No newline at end of file diff --git a/locust/.gitignore b/locust/.gitignore new file mode 100644 index 0000000..0bec9a6 --- /dev/null +++ b/locust/.gitignore @@ -0,0 +1,109 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +**/.DS_Store + +.dockertag +.dockerimg \ No newline at end of file diff --git a/locust/Dockerfile b/locust/Dockerfile new file mode 100644 index 0000000..4411e42 --- /dev/null +++ b/locust/Dockerfile @@ -0,0 +1,14 @@ +FROM locustio/locust:0.13.2 + +# locust image is user which can't install pip things so we go back to root +USER root + + +COPY ./locust/behaviours ./behaviours +COPY ./locust/locust_files ./locust_files + +COPY locust/requirements.txt . + +RUN pip install -r requirements.txt + +ENTRYPOINT [ "locust" ] diff --git a/locust/behaviours/noop_locust.py b/locust/behaviours/noop_locust.py new file mode 100644 index 0000000..9d2e406 --- /dev/null +++ b/locust/behaviours/noop_locust.py @@ -0,0 +1,33 @@ +from locust import TaskSequence, seq_task, between +from locust_files.db_locust import DbLocust +from random import randint + + +class NoopUserBehaviour(TaskSequence): + """ + This class defines a set of tasks that are performed in a + predefined order (a sequence). + """ + + @seq_task(1) + def wait_short(self): + request_time = randint(1, 5) + self.client.noop.noop_with_delay(request_time) + + @seq_task(2) + def wait_medium(self): + self.client.noop.noop() + + @seq_task(3) + def fail_fast(self): + self.client.noop.noop_with_fail() + + +class NoopUser(DbLocust): + """ + This class represents a user of the Databricks Operator + with a particular behaviour + """ + + task_set = NoopUserBehaviour + wait_time = between(1, 5) diff --git a/locust/behaviours/scenario1_run_submit_delete.py b/locust/behaviours/scenario1_run_submit_delete.py new file mode 100644 index 0000000..ed078c0 --- /dev/null +++ b/locust/behaviours/scenario1_run_submit_delete.py @@ -0,0 +1,31 @@ +from locust import TaskSequence, seq_task, between +from locust_files.db_locust import DbLocust + + +class DatabricksRunSubmitUserBehaviour(TaskSequence): + """ + This class defines a set of tasks that are performed in a + predefined order (a sequence). + """ + + @seq_task(1) + def create_databricks_run(self): + self.run_name = self.client.runs.create_run() + + @seq_task(2) + def await_databricks_run_complete(self): + self.client.runs.poll_run_await_completion(self.run_name, 40, 10) + + @seq_task(3) + def delete_databricks_run(self): + self.client.runs.delete_run(self.run_name) + + +class DatabricksRunSubmitUser(DbLocust): + """ + This class represents a user of the Databricks Operator + with a particular behaviour + """ + + task_set = DatabricksRunSubmitUserBehaviour + wait_time = between(1, 5) diff --git a/locust/behaviours/scenario2_run_submit.py b/locust/behaviours/scenario2_run_submit.py new file mode 100644 index 0000000..ad3b11e --- /dev/null +++ b/locust/behaviours/scenario2_run_submit.py @@ -0,0 +1,27 @@ +from locust import TaskSequence, seq_task, between +from locust_files.db_locust import DbLocust + + +class DatabricksRunSubmitUserBehaviour(TaskSequence): + """ + This class defines a set of tasks that are performed in a + predefined order (a sequence). + """ + + @seq_task(1) + def create_databricks_run(self): + self.run_name = self.client.runs.create_run() + + @seq_task(2) + def await_databricks_run_complete(self): + self.client.runs.poll_run_await_completion(self.run_name, 40, 10) + + +class DatabricksRunSubmitUser(DbLocust): + """ + This class represents a user of the Databricks Operator + with a particular behaviour + """ + + task_set = DatabricksRunSubmitUserBehaviour + wait_time = between(1, 5) diff --git a/locust/behaviours/scenario5_run_submit_delete_high_wait_time.py b/locust/behaviours/scenario5_run_submit_delete_high_wait_time.py new file mode 100644 index 0000000..d5ec2fa --- /dev/null +++ b/locust/behaviours/scenario5_run_submit_delete_high_wait_time.py @@ -0,0 +1,31 @@ +from locust import TaskSequence, seq_task, between +from locust_files.db_locust import DbLocust + + +class DatabricksRunSubmitUserBehaviour(TaskSequence): + """ + This class defines a set of tasks that are performed in a + predefined order (a sequence). + """ + + @seq_task(1) + def create_databricks_run(self): + self.run_name = self.client.runs.create_run() + + @seq_task(2) + def await_databricks_run_complete(self): + self.client.runs.poll_run_await_completion(self.run_name, 100, 10) + + @seq_task(3) + def delete_databricks_run(self): + self.client.runs.delete_run(self.run_name) + + +class DatabricksRunSubmitUser(DbLocust): + """ + This class represents a user of the Databricks Operator + with a particular behaviour + """ + + task_set = DatabricksRunSubmitUserBehaviour + wait_time = between(1, 5) diff --git a/locust/locust_files/__init__.py b/locust/locust_files/__init__.py new file mode 100644 index 0000000..822f31d --- /dev/null +++ b/locust/locust_files/__init__.py @@ -0,0 +1,5 @@ +from locust_files.db_locust.db_collector import LocustCollector + +collector = LocustCollector() +collector.register_collector() +collector.start_http_server() diff --git a/locust/locust_files/db_locust/__init__.py b/locust/locust_files/db_locust/__init__.py new file mode 100644 index 0000000..fc42d90 --- /dev/null +++ b/locust/locust_files/db_locust/__init__.py @@ -0,0 +1,3 @@ +from .db_locust import DbLocust + +__all__ = ["DbLocust"] diff --git a/locust/locust_files/db_locust/constant.py b/locust/locust_files/db_locust/constant.py new file mode 100644 index 0000000..b3c16e1 --- /dev/null +++ b/locust/locust_files/db_locust/constant.py @@ -0,0 +1,26 @@ +K8_GROUP = "databricks.microsoft.com" + +VERSION = "v1alpha1" + +RUN_RESOURCE = { + "apiVersion": "databricks.microsoft.com/v1alpha1", + "kind": "Run", + "metadata": {"name": "run-sample"}, + "spec": { + "name": "LocustRun", + "new_cluster": { + "spark_version": "5.3.x-scala2.11", + "node_type_id": "Standard_D3_v2", + "num_workers": 1, + }, + "spark_submit_task": { + "parameters": [ + "--class", + "org.apache.spark.examples.SparkPi", + # For tests against the real databricks API, change this to a valid file location + "dbfs:/FileStore/tables/SparkPi_assembly_0_1-04ede.jar", + "1", + ] + }, + }, +} diff --git a/locust/locust_files/db_locust/db_client.py b/locust/locust_files/db_locust/db_client.py new file mode 100644 index 0000000..8288981 --- /dev/null +++ b/locust/locust_files/db_locust/db_client.py @@ -0,0 +1,20 @@ +from .db_run_client import DbRunClient +from .db_noop_client import DbNoopClient +from kubernetes import client, config + + +class DbClient: + """ + DbClient exposes the Databricks actions that a user will perform + """ + + def __init__(self): + # Try and load the kubeconfig or incluster config + try: + config.load_kube_config() + except: + config.load_incluster_config() + + self.api = client.CustomObjectsApi() + self.runs = DbRunClient(self.api) + self.noop = DbNoopClient() diff --git a/locust/locust_files/db_locust/db_collector.py b/locust/locust_files/db_locust/db_collector.py new file mode 100644 index 0000000..83897d9 --- /dev/null +++ b/locust/locust_files/db_locust/db_collector.py @@ -0,0 +1,111 @@ +from flask import request, Response +from locust import runners, stats as locust_stats +from prometheus_client import Metric, REGISTRY, exposition, start_http_server + + +class LocustCollector(object): + def register_collector(self): + REGISTRY.register(self) + + def start_http_server(self): + start_http_server(9090) + + def collect(self): + # locust_runner is not None, it indicates that test started. + if runners.locust_runner: + + stats = [] + + for s in locust_stats.sort_stats(runners.locust_runner.request_stats): + stats.append( + { + "method": s.method, + "name": s.name, + "num_requests": s.num_requests, + "num_failures": s.num_failures, + "avg_response_time": s.avg_response_time, + "min_response_time": s.min_response_time or 0, + "max_response_time": s.max_response_time, + "current_rps": s.current_rps, + "median_response_time": s.median_response_time, + "avg_content_length": s.avg_content_length, + } + ) + + metric = Metric("locust_user_count", "Swarmed users", "gauge") + metric.add_sample( + "locust_user_count", value=runners.locust_runner.user_count, labels={} + ) + + yield metric + + errors = [e.to_dict() for e in runners.locust_runner.errors.values()] + + metric = Metric("locust_errors", "Locust requests errors", "gauge") + for err in errors: + metric.add_sample( + "locust_errors", + value=err["occurrences"], + labels={"path": err["name"], "method": err["method"]}, + ) + yield metric + + is_distributed = isinstance( + runners.locust_runner, runners.MasterLocustRunner + ) + if is_distributed: + metric = Metric( + "locust_slave_count", "Locust number of slaves", "gauge" + ) + metric.add_sample( + "locust_slave_count", + value=len(runners.locust_runner.clients.values()), + labels={}, + ) + yield metric + + metric = Metric("locust_fail_ratio", "Locust failure ratio", "gauge") + metric.add_sample( + "locust_fail_ratio", + value=runners.locust_runner.stats.total.fail_ratio, + labels={}, + ) + yield metric + + metric = Metric("locust_state", "State of the locust swarm", "gauge") + metric.add_sample( + "locust_state", value=1, labels={"state": runners.locust_runner.state} + ) + yield metric + + stats_metrics = [ + "avg_content_length", + "avg_response_time", + "current_rps", + "max_response_time", + "median_response_time", + "min_response_time", + "num_failures", + "num_requests", + ] + + for mtr in stats_metrics: + mtype = "gauge" + if mtr in ["num_requests", "num_failures"]: + mtype = "counter" + metric = Metric( + "locust_requests_" + mtr, "Locust requests " + mtr, mtype + ) + for stat in stats: + if "Total" not in stat["name"]: + metric.add_sample( + "locust_requests_" + mtr, + value=stat[mtr], + labels={"path": stat["name"], "method": stat["method"]}, + ) + yield metric + + def exported_stats(self): + encoder, content_type = exposition.choose_encoder(request.headers.get("Accept")) + body = encoder(REGISTRY) + return Response(body, content_type=content_type) diff --git a/locust/locust_files/db_locust/db_decorator.py b/locust/locust_files/db_locust/db_decorator.py new file mode 100644 index 0000000..8fd46c7 --- /dev/null +++ b/locust/locust_files/db_locust/db_decorator.py @@ -0,0 +1,56 @@ +from locust import events +from functools import wraps +import time +from locust.exception import InterruptTaskSet + + +def locust_client_function(func): + """ + This decorator captures timings and fires the appropriate locust events + """ + + @wraps(func) + def wrapper(*args, **kwargs): + + start_time = time.time() + name = func.__name__ + + try: + response = func(*args, **kwargs) + except InterruptTaskSet as e: + _record_error_event(name, start_time, e) + raise # ensure we let Locust known exceptions bubble up + except Exception as e: + _record_error_event(name, start_time, e) + else: + total_time = _calc_time_taken(start_time) + events.request_success.fire( + request_type="db_client", + name=name, + response_time=total_time, + response_length=0, + ) + return response + + return wrapper + + +def _record_error_event(name, start_time, e): + """ + Record the exception as a failure into the Locust event chain + """ + total_time = _calc_time_taken(start_time) + events.request_failure.fire( + request_type="db_client", + name=name, + response_time=total_time, + response_length=0, + exception=e, + ) + + +def _calc_time_taken(start_time): + """ + Calculate the amount of seconds elapsed between current time and start_time + """ + return int((time.time() - start_time) * 1000) diff --git a/locust/locust_files/db_locust/db_locust.py b/locust/locust_files/db_locust/db_locust.py new file mode 100644 index 0000000..54252a7 --- /dev/null +++ b/locust/locust_files/db_locust/db_locust.py @@ -0,0 +1,12 @@ +from locust import Locust +from .db_client import DbClient + + +class DbLocust(Locust): + """ + Custom Locust for Databricks Operator + """ + + def __init__(self, *args, **kwargs): + super(DbLocust, self).__init__(*args, **kwargs) + self.client = DbClient() diff --git a/locust/locust_files/db_locust/db_noop_client.py b/locust/locust_files/db_locust/db_noop_client.py new file mode 100644 index 0000000..8d0245c --- /dev/null +++ b/locust/locust_files/db_locust/db_noop_client.py @@ -0,0 +1,29 @@ +from .db_decorator import locust_client_function +import logging +import time + + +class DbNoopClient: + """ + DbNoopClient exposes the no-op actions purely for diagnostics + and testing purposes + """ + + @locust_client_function + def noop_with_delay(self, delay_in_seconds): + logging.info(f"NoOp With Delay: STARTED") + + time.sleep(delay_in_seconds) + + logging.info("NoOp With Delay: COMPLETE") + + @locust_client_function + def noop(self): + logging.info(f"NoOp With Delay: STARTED") + time.sleep(0.5) + logging.info("NoOp With Delay: COMPLETE") + + @locust_client_function + def noop_with_fail(self): + logging.info(f"NoOp With Delay: STARTED") + raise Exception("Stuff happened") diff --git a/locust/locust_files/db_locust/db_run_client.py b/locust/locust_files/db_locust/db_run_client.py new file mode 100644 index 0000000..20dc4f3 --- /dev/null +++ b/locust/locust_files/db_locust/db_run_client.py @@ -0,0 +1,116 @@ +from .db_decorator import locust_client_function +import logging +import uuid +from .constant import RUN_RESOURCE, K8_GROUP, VERSION +import copy +from kubernetes import client +from kubernetes.client.rest import ApiException +from locust import InterruptTaskSet +import time + + +class DbRunClient: + """ + DbRunClient exposes the Databricks actions that a user will perform relating to Runs + """ + + def __init__(self, k8s_api): + self.api = k8s_api + + @locust_client_function + def create_run(self): + logging.info("Run Creation: STARTED") + + run_name = "run-{}".format(str(uuid.uuid4())) + run = copy.deepcopy(RUN_RESOURCE) + run["metadata"]["name"] = run_name + + try: + self.api.create_namespaced_custom_object( + group=K8_GROUP, + version=VERSION, + namespace="default", + plural="runs", + body=run, + ) + except ApiException as e: + logging.error("API Exception: %s\n" % e) + raise InterruptTaskSet(reschedule=False) + + logging.info("Run Creation: COMPLETE - {}".format(run_name)) + return run_name + + @locust_client_function + def get_run(self, run_name): + logging.info("Run Get: STARTED - %s", run_name) + + try: + resource = self.api.get_namespaced_custom_object( + group=K8_GROUP, + version=VERSION, + name=run_name, + namespace="default", + plural="runs", + ) + except ApiException as e: + logging.error("API Exception: %s\n" % e) + raise InterruptTaskSet(reschedule=False) + + return resource + + @locust_client_function + def delete_run(self, run_name): + logging.info("Run Deletion: STARTED - %s", run_name) + + try: + self.api.delete_namespaced_custom_object( + group=K8_GROUP, + version=VERSION, + name=run_name, + namespace="default", + plural="runs", + body=client.V1DeleteOptions(), + ) + except ApiException as e: + logging.error("API Exception: %s\n" % e) + raise InterruptTaskSet(reschedule=False) + + logging.info("Run Deletion: COMPLETE - %s", run_name) + + @locust_client_function + def poll_run_await_completion(self, run_name, max_attempts, polling_secs): + logging.info("Waiting for run to complete: STARTED - %s", run_name) + + completed = False + + for x in range(max_attempts): + run = self.get_run(run_name) + + if run is None: + raise Exception("Unable to get job") + + if "status" in run: + life_cycle_state = run["status"]["metadata"]["state"][ + "life_cycle_state" + ] + + if life_cycle_state == "TERMINATED": + logging.info("Run COMPLETED - %s" % run_name) + completed = True + break + elif ( + life_cycle_state == "SKIPPED" + or life_cycle_state == "INTERNAL_ERROR" + ): + raise Exception( + "Run COMPLETED with error - life_cycle_state: %s" + % life_cycle_state + ) + + logging.info("Run NOT yet complete: WAITING - %s" % run_name) + time.sleep(polling_secs) + + if not completed: + raise Exception("Run did not complete") + + logging.info("Waiting for run to complete: COMPLETE - %s" % run_name) diff --git a/locust/manifests/deployment.yaml b/locust/manifests/deployment.yaml new file mode 100644 index 0000000..7d77e1e --- /dev/null +++ b/locust/manifests/deployment.yaml @@ -0,0 +1,103 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: locust +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: locust-loadtest + namespace: locust + labels: + app: locust-loadtest +spec: + backoffLimit: 0 + parallelism: 1 + completions: 1 + template: + metadata: + labels: + app: locust-loadtest + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: control-plane + operator: In + values: + - controller-manager + topologyKey: kubernetes.io/hostname + namespaces: + - azure-databricks-operator-system + - default + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - databricks-mock-api + topologyKey: kubernetes.io/hostname + namespaces: + - azure-databricks-operator-system + - default + containers: + - name: loadtestrunner + image: locust:latest + command: ['locust', '-f', 'behaviours/scenario1_run_submit_delete.py'] + ports: + - containerPort: 8089 + name: webui + - containerPort: 9090 + name: metrics + restartPolicy: Never +--- +apiVersion: v1 +kind: Service +metadata: + name: locust-loadtest + namespace: locust + labels: + app: locust-loadtest +spec: + ports: + - port: 8089 + protocol: TCP + name: webui + - port: 9090 + protocol: TCP + name: metrics + selector: + app: locust-loadtest +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: locust-loadtest-servicemonitor + namespace: locust + labels: + app: locust-loadtest +spec: + selector: + matchLabels: + app: locust-loadtest + namespaceSelector: + matchNames: + - locust + endpoints: + - port: metrics + path: / +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: locust-rbac +subjects: + - kind: ServiceAccount + name: default + namespace: locust +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io diff --git a/locust/requirements.dev.txt b/locust/requirements.dev.txt new file mode 100644 index 0000000..eb09afb --- /dev/null +++ b/locust/requirements.dev.txt @@ -0,0 +1,5 @@ +wheel +pytest +black +flake8 +pytest-mock \ No newline at end of file diff --git a/locust/requirements.txt b/locust/requirements.txt new file mode 100644 index 0000000..747207f --- /dev/null +++ b/locust/requirements.txt @@ -0,0 +1,3 @@ +kubernetes==10.0.1 +locustio==0.13.2 +prometheus_client==0.7.1 \ No newline at end of file diff --git a/locust/setup.py b/locust/setup.py new file mode 100644 index 0000000..225b020 --- /dev/null +++ b/locust/setup.py @@ -0,0 +1,7 @@ +from setuptools import setup, find_packages + +setup( + name="DbLocust", + packages=find_packages(where="locust_files"), + package_dir={"": "locust_files"}, +) diff --git a/locust/test/unit/__init__.py b/locust/test/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/locust/test/unit/db_run_client_test.py b/locust/test/unit/db_run_client_test.py new file mode 100644 index 0000000..aaa9479 --- /dev/null +++ b/locust/test/unit/db_run_client_test.py @@ -0,0 +1,204 @@ +import uuid +import time +import pytest +from locust import events +from db_locust.constant import RUN_RESOURCE, K8_GROUP, VERSION +from db_locust.db_run_client import DbRunClient +from kubernetes import client +from kubernetes.client.rest import ApiException +from locust.exception import InterruptTaskSet + + +def test_create_run(mocker): + expectedRunName = "run-wibble" + + mocker.patch.dict(RUN_RESOURCE, {"metadata": {"name": "xyz"}}, clear=True) + + mocker.patch("uuid.uuid4") + uuid.uuid4.return_value = "wibble" + + stub = mocker.stub(name="api") + stub.create_namespaced_custom_object = mocker.stub(name="create_run_stub") + + db_run_client = DbRunClient(stub) + result = db_run_client.create_run() + + stub.create_namespaced_custom_object.assert_called_once_with( + group=K8_GROUP, + version=VERSION, + namespace="default", + plural="runs", + body={"metadata": {"name": expectedRunName}}, + ) + assert result == expectedRunName + + +def test_create_run_throws_when_create_object_fails(mocker): + mock = mocker.Mock() + mock.create_namespaced_custom_object = mocker.Mock() + mock.create_namespaced_custom_object.side_effect = ApiException("failed to create") + + db_run_client = DbRunClient(mock) + + with pytest.raises(InterruptTaskSet): + db_run_client.create_run() + + +def test_delete_run(mocker): + run_name = "test_run_wibble" + + stub = mocker.stub(name="api") + stub.delete_namespaced_custom_object = mocker.stub(name="delete_run_stub") + + db_run_client = DbRunClient(stub) + db_run_client.delete_run(run_name) + + stub.delete_namespaced_custom_object.assert_called_once_with( + group=K8_GROUP, + version=VERSION, + namespace="default", + plural="runs", + name=run_name, + body=client.V1DeleteOptions(), + ) + + +def test_delete_run_throws_when_delete_object_fails(mocker): + run_name = "test_run_wibble" + + mock = mocker.Mock() + mock.delete_namespaced_custom_object = mocker.Mock() + mock.delete_namespaced_custom_object.side_effect = ApiException("doesn't exist") + + db_run_client = DbRunClient(mock) + + with pytest.raises(InterruptTaskSet): + db_run_client.delete_run(run_name) + + +def test_get_run(mocker): + run_name = "test_run_wibble" + + mock = mocker.Mock() + mock.get_namespaced_custom_object = mocker.Mock() + mock.get_namespaced_custom_object.return_value = {"name": run_name} + + db_run_client = DbRunClient(mock) + result = db_run_client.get_run(run_name) + + mock.get_namespaced_custom_object.assert_called_once_with( + group=K8_GROUP, + version=VERSION, + namespace="default", + plural="runs", + name=run_name, + ) + + assert result == {"name": run_name} + + +def test_get_run_throws_when_get_object_fails(mocker): + run_name = "test_run_wibble" + + mock = mocker.Mock() + mock.get_namespaced_custom_object = mocker.Mock() + mock.get_namespaced_custom_object.side_effect = ApiException("doesn't exist") + + db_run_client = DbRunClient(mock) + + with pytest.raises(InterruptTaskSet): + db_run_client.get_run(run_name) + + +def test_poll_run_await_completion_polls_until_complete(mocker): + run_name = "test_run_poll_stops_when_run_complete" + + mocker.patch("time.sleep") + + mock = mocker.Mock() + mock.get_namespaced_custom_object = mocker.Mock() + mock.get_namespaced_custom_object.side_effect = [ + {}, + {"wibble": "wobble"}, + {"status": {"metadata": {"state": {"life_cycle_state": "wibble"}}}}, + {"status": {"metadata": {"state": {"life_cycle_state": "TERMINATED"}}}}, + ] + + db_run_client = DbRunClient(mock) + db_run_client.poll_run_await_completion(run_name, 5, 0.1) + + mock.get_namespaced_custom_object.assert_called_with( + group=K8_GROUP, + version=VERSION, + namespace="default", + plural="runs", + name=run_name, + ) + assert time.sleep.call_count == 3 + assert mock.get_namespaced_custom_object.call_count == 4 + + +def test_poll_run_await_completion_polls_throws_exception_on_invalid(mocker): + run_name = "test_run_poll_stops_when_run_complete" + + completed_life_states = ["INTERNAL_ERROR", "SKIPPED"] + for life_state in completed_life_states: + mock = mocker.Mock() + mock.get_namespaced_custom_object = mocker.Mock() + mock.get_namespaced_custom_object.side_effect = [ + {}, + {"wibble": "wobble"}, + {"status": {"metadata": {"state": {"life_cycle_state": "wibble"}}}}, + {"status": {"metadata": {"state": {"life_cycle_state": life_state}}}}, + ] + + stub = mocker.stub(name="locust_event_fired") + events.request_failure = stub + events.request_failure.fire = mocker.stub(name="fire_stub") + + db_run_client = DbRunClient(mock) + db_run_client.poll_run_await_completion(run_name, 5, 0.1) + + mock.get_namespaced_custom_object.assert_called_with( + group=K8_GROUP, + version=VERSION, + namespace="default", + plural="runs", + name=run_name, + ) + + assert mock.get_namespaced_custom_object.call_count == 4 + + events.request_failure.fire.assert_called_once_with( + request_type="db_client", + name="poll_run_await_completion", + response_length=0, + response_time=mocker.ANY, + exception=mocker.ANY, + ) + + +def test_poll_run_await_completion_fails_when_run_fails_to_complete_in_time(mocker): + run_name = "test_run_wibble" + + stub = mocker.stub(name="locust_event_fired") + mocker.patch("locust.events") + events.request_failure = stub + events.request_failure.fire = mocker.stub(name="fire_stub") + + mock = mocker.Mock() + mock.get_namespaced_custom_object = mocker.Mock() + mock.get_namespaced_custom_object.return_value = { + "status": {"metadata": {"state": {"life_cycle_state": "wibble"}}} + } + + db_run_client = DbRunClient(mock) + db_run_client.poll_run_await_completion(run_name, 2, 0.1) + + events.request_failure.fire.assert_called_once_with( + request_type="db_client", + name="poll_run_await_completion", + response_length=0, + response_time=mocker.ANY, + exception=mocker.ANY, + )