From f3e7bb0752f8ac60d878fb7da7b9e029969ac7c5 Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Tue, 27 Jul 2021 18:27:01 -0400 Subject: [PATCH 1/8] Initial scripts and pipeline for stress test discovery/build/push --- eng/pipelines/stress-test-release.yml | 72 ++++++++++ tools/stress-cluster/chaos/README.md | 11 +- .../Chart.lock | 0 .../Chart.yaml | 8 +- .../network-stress-example/Dockerfile | 8 ++ .../install.sh | 0 .../poll.sh | 0 .../templates/network_loss.yaml | 0 .../templates/testjob.yaml | 2 +- .../network_stress_example/Dockerfile | 6 - .../.gitignore | 0 .../Chart.lock | 0 .../Chart.yaml | 8 +- .../install.sh | 0 .../parameters.json | 0 .../templates/deploy-job.yaml | 0 .../test-resources.bicep | 0 .../cluster/azure/parameters/prod.json | 6 + .../scripts/deploy_stress_tests.ps1 | 134 ++++++++++++++++++ .../scripts/find_all_stress_packages.ps1 | 47 ++++++ 20 files changed, 288 insertions(+), 14 deletions(-) create mode 100644 eng/pipelines/stress-test-release.yml rename tools/stress-cluster/chaos/examples/{network_stress_example => network-stress-example}/Chart.lock (100%) rename tools/stress-cluster/chaos/examples/{network_stress_example => network-stress-example}/Chart.yaml (65%) create mode 100644 tools/stress-cluster/chaos/examples/network-stress-example/Dockerfile rename tools/stress-cluster/chaos/examples/{network_stress_example => network-stress-example}/install.sh (100%) rename tools/stress-cluster/chaos/examples/{network_stress_example => network-stress-example}/poll.sh (100%) rename tools/stress-cluster/chaos/examples/{network_stress_example => network-stress-example}/templates/network_loss.yaml (100%) rename tools/stress-cluster/chaos/examples/{network_stress_example => network-stress-example}/templates/testjob.yaml (85%) delete mode 100644 tools/stress-cluster/chaos/examples/network_stress_example/Dockerfile rename tools/stress-cluster/chaos/examples/{stress_deployment_example => stress-deployment-example}/.gitignore (100%) rename tools/stress-cluster/chaos/examples/{stress_deployment_example => stress-deployment-example}/Chart.lock (100%) rename tools/stress-cluster/chaos/examples/{stress_deployment_example => stress-deployment-example}/Chart.yaml (67%) rename tools/stress-cluster/chaos/examples/{stress_deployment_example => stress-deployment-example}/install.sh (100%) rename tools/stress-cluster/chaos/examples/{stress_deployment_example => stress-deployment-example}/parameters.json (100%) rename tools/stress-cluster/chaos/examples/{stress_deployment_example => stress-deployment-example}/templates/deploy-job.yaml (100%) rename tools/stress-cluster/chaos/examples/{stress_deployment_example => stress-deployment-example}/test-resources.bicep (100%) create mode 100644 tools/stress-cluster/scripts/deploy_stress_tests.ps1 create mode 100644 tools/stress-cluster/scripts/find_all_stress_packages.ps1 diff --git a/eng/pipelines/stress-test-release.yml b/eng/pipelines/stress-test-release.yml new file mode 100644 index 00000000000..337d70d2bc4 --- /dev/null +++ b/eng/pipelines/stress-test-release.yml @@ -0,0 +1,72 @@ +pr: none + +trigger: none + +parameters: + - name: Subscription + type: string + default: 2cd617ea-1866-46b1-90e3-fffb087ebf9b + - name: Environment + type: string + default: prod + - name: ClusterName + type: string + default: stress-prod + - name: ClusterGroup + type: string + default: rg-stress-test-cluster-prod + - name: RegistryName + type: string + default: stressprodregistry + +jobs: +- job: ReleaseStressTests + strategy: + matrix: + examples: + Repository: Azure/azure-sdk-tools + Filters: '@{ "example" = "true" }' + javascript: + Repository: Azure/azure-sdk-for-js + Filters: '@{}' + java: + Repository: Azure/azure-sdk-for-java + Filters: '@{}' + net: + Repository: Azure/azure-sdk-for-net + Filters: '@{}' + python: + Repository: Azure/azure-sdk-for-python + Filters: '@{}' + pool: + name: 'azsdk-pool-mms-ubuntu-2004-general' + vmImage: 'MMSUbuntu20.04' + steps: + - template: /eng/common/pipelines/templates/steps/sparse-checkout.yml + parameters: + Repositories: + - Name: Azure/azure-sdk-tools + WorkingDirectory: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools + - Name: $(Repository) + WorkingDirectory: $(System.DefaultWorkingDirectory)/$(Repository) + Paths: + - '!sdk/**/test-recordings' + - '!sdk/**/session-records' + - '!sdk/**/SessionRecords' + + - task: AzureCLI@2 + displayName: Azure CLI + inputs: + azureSubscription: ${{ parameters.Subscription }} + scriptType: pscore + scriptLocation: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/tools/stress-cluster/scripts/deploy_stress_tests.ps1 + arguments: + -searchDirectory $(System.DefaultWorkingDirectory)/$(Repository) ` + -filters $(Filters) ` + -environment ${{ parameters.Environment }} ` + -uploadToRegistry ${{ parameters.RegistryName }} ` + -repository $(Agent.JobName) ` + -deployId $(Build.BuildNumber) ` + -subscription ${{ parameters.Subscription }} ` + -clusterName ${{ parameters.ClusterName }} ` + -clusterGroup ${{ parameters.ClusterGroup }} diff --git a/tools/stress-cluster/chaos/README.md b/tools/stress-cluster/chaos/README.md index b5a516ff661..026a005164d 100644 --- a/tools/stress-cluster/chaos/README.md +++ b/tools/stress-cluster/chaos/README.md @@ -219,7 +219,7 @@ See the [Job Manifest section](#job-manifest) for an example spec containing con ### Helm Chart Dependencies -The `/chart/Chart.yaml` file should look something like below. It must include the `stress-test-addons` dependency: +The `/chart/Chart.yaml` file should look something like below. It must include the `stress-test-addons` dependency and the included annotations: ``` apiVersion: v2 @@ -227,6 +227,9 @@ name: description: version: 0.1.0 appVersion: v0.1 +annotations: + stressTest: 'true' + namespace: dependencies: - name: stress-test-addons @@ -341,14 +344,16 @@ Then install the stress test into the cluster: ``` kubectl create namespace kubectl label namespace owners= -helm install . +helm install -n . ``` To install into a different cluster (test, prod, or dev): ``` az aks get-credentials --subscription '' -g rg-stress-test-cluster- -n stress-test -helm install . --set stress-test-addons.env= +kubectl create namespace +kubectl label namespace owners= +helm install -n . --set stress-test-addons.env= ``` You can check the progress/status of your installation via: diff --git a/tools/stress-cluster/chaos/examples/network_stress_example/Chart.lock b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock similarity index 100% rename from tools/stress-cluster/chaos/examples/network_stress_example/Chart.lock rename to tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock diff --git a/tools/stress-cluster/chaos/examples/network_stress_example/Chart.yaml b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml similarity index 65% rename from tools/stress-cluster/chaos/examples/network_stress_example/Chart.yaml rename to tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml index 0659cbd04d5..f0212b92c15 100644 --- a/tools/stress-cluster/chaos/examples/network_stress_example/Chart.yaml +++ b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml @@ -1,8 +1,12 @@ apiVersion: v2 -name: network_example +name: network-example description: An example stress test chart with network chaos -version: 0.1.0 +version: 0.1.1 appVersion: v0.1 +annotations: + stressTest: 'true' + namespace: 'examples' + example: 'true' dependencies: - name: stress-test-addons diff --git a/tools/stress-cluster/chaos/examples/network-stress-example/Dockerfile b/tools/stress-cluster/chaos/examples/network-stress-example/Dockerfile new file mode 100644 index 00000000000..5cefb5cc4b2 --- /dev/null +++ b/tools/stress-cluster/chaos/examples/network-stress-example/Dockerfile @@ -0,0 +1,8 @@ +FROM alpine:3.14 +RUN apk add --no-cache wget +RUN apk add --no-cache bash + +ADD ./poll.sh /poll.sh +RUN chmod +x /poll.sh + +CMD bash /poll.sh diff --git a/tools/stress-cluster/chaos/examples/network_stress_example/install.sh b/tools/stress-cluster/chaos/examples/network-stress-example/install.sh similarity index 100% rename from tools/stress-cluster/chaos/examples/network_stress_example/install.sh rename to tools/stress-cluster/chaos/examples/network-stress-example/install.sh diff --git a/tools/stress-cluster/chaos/examples/network_stress_example/poll.sh b/tools/stress-cluster/chaos/examples/network-stress-example/poll.sh similarity index 100% rename from tools/stress-cluster/chaos/examples/network_stress_example/poll.sh rename to tools/stress-cluster/chaos/examples/network-stress-example/poll.sh diff --git a/tools/stress-cluster/chaos/examples/network_stress_example/templates/network_loss.yaml b/tools/stress-cluster/chaos/examples/network-stress-example/templates/network_loss.yaml similarity index 100% rename from tools/stress-cluster/chaos/examples/network_stress_example/templates/network_loss.yaml rename to tools/stress-cluster/chaos/examples/network-stress-example/templates/network_loss.yaml diff --git a/tools/stress-cluster/chaos/examples/network_stress_example/templates/testjob.yaml b/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml similarity index 85% rename from tools/stress-cluster/chaos/examples/network_stress_example/templates/testjob.yaml rename to tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml index d37c6e57d70..502cbf465fa 100644 --- a/tools/stress-cluster/chaos/examples/network_stress_example/templates/testjob.yaml +++ b/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: network-example - image: stresstestregistry.azurecr.io/example/networkexample:v1 + image: stresstestregistry.azurecr.io/examples/network-stress-example:v1 command: ["bash", "poll.sh"] {{- include "stress-test-addons.container-env" . | nindent 6 }} {{- end -}} diff --git a/tools/stress-cluster/chaos/examples/network_stress_example/Dockerfile b/tools/stress-cluster/chaos/examples/network_stress_example/Dockerfile deleted file mode 100644 index 5215ab24e0c..00000000000 --- a/tools/stress-cluster/chaos/examples/network_stress_example/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM ubuntu -RUN apt-get update && apt-get install -y wget - -ADD ./poll.sh /poll.sh - -CMD bash /poll.sh diff --git a/tools/stress-cluster/chaos/examples/stress_deployment_example/.gitignore b/tools/stress-cluster/chaos/examples/stress-deployment-example/.gitignore similarity index 100% rename from tools/stress-cluster/chaos/examples/stress_deployment_example/.gitignore rename to tools/stress-cluster/chaos/examples/stress-deployment-example/.gitignore diff --git a/tools/stress-cluster/chaos/examples/stress_deployment_example/Chart.lock b/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock similarity index 100% rename from tools/stress-cluster/chaos/examples/stress_deployment_example/Chart.lock rename to tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock diff --git a/tools/stress-cluster/chaos/examples/stress_deployment_example/Chart.yaml b/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.yaml similarity index 67% rename from tools/stress-cluster/chaos/examples/stress_deployment_example/Chart.yaml rename to tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.yaml index 7977e6317a8..bd7c7e462c6 100644 --- a/tools/stress-cluster/chaos/examples/stress_deployment_example/Chart.yaml +++ b/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.yaml @@ -1,8 +1,12 @@ apiVersion: v2 -name: deployment_example +name: deployment-example description: An example stress test chart for performing azure resource deployments -version: 0.1.0 +version: 0.1.1 appVersion: v0.1 +annotations: + stressTest: 'true' + namespace: 'examples' + example: 'true' dependencies: - name: stress-test-addons diff --git a/tools/stress-cluster/chaos/examples/stress_deployment_example/install.sh b/tools/stress-cluster/chaos/examples/stress-deployment-example/install.sh similarity index 100% rename from tools/stress-cluster/chaos/examples/stress_deployment_example/install.sh rename to tools/stress-cluster/chaos/examples/stress-deployment-example/install.sh diff --git a/tools/stress-cluster/chaos/examples/stress_deployment_example/parameters.json b/tools/stress-cluster/chaos/examples/stress-deployment-example/parameters.json similarity index 100% rename from tools/stress-cluster/chaos/examples/stress_deployment_example/parameters.json rename to tools/stress-cluster/chaos/examples/stress-deployment-example/parameters.json diff --git a/tools/stress-cluster/chaos/examples/stress_deployment_example/templates/deploy-job.yaml b/tools/stress-cluster/chaos/examples/stress-deployment-example/templates/deploy-job.yaml similarity index 100% rename from tools/stress-cluster/chaos/examples/stress_deployment_example/templates/deploy-job.yaml rename to tools/stress-cluster/chaos/examples/stress-deployment-example/templates/deploy-job.yaml diff --git a/tools/stress-cluster/chaos/examples/stress_deployment_example/test-resources.bicep b/tools/stress-cluster/chaos/examples/stress-deployment-example/test-resources.bicep similarity index 100% rename from tools/stress-cluster/chaos/examples/stress_deployment_example/test-resources.bicep rename to tools/stress-cluster/chaos/examples/stress-deployment-example/test-resources.bicep diff --git a/tools/stress-cluster/cluster/azure/parameters/prod.json b/tools/stress-cluster/cluster/azure/parameters/prod.json index 94a7d2524f9..711a854de79 100644 --- a/tools/stress-cluster/cluster/azure/parameters/prod.json +++ b/tools/stress-cluster/cluster/azure/parameters/prod.json @@ -14,6 +14,12 @@ "monitoringLocation": { "value": "centralus" }, + "staticTestSecretsKeyvaultName": { + "value": "StressTestSecrets" + }, + "staticTestSecretsKeyvaultGroup": { + "value": "rg-StressTestSecrets" + }, "enableMonitoring": { "value": true }, diff --git a/tools/stress-cluster/scripts/deploy_stress_tests.ps1 b/tools/stress-cluster/scripts/deploy_stress_tests.ps1 new file mode 100644 index 00000000000..29807cec094 --- /dev/null +++ b/tools/stress-cluster/scripts/deploy_stress_tests.ps1 @@ -0,0 +1,134 @@ +[CmdletBinding(DefaultParameterSetName = 'Default')] +param( + [string]$searchDirectory, + [hashtable]$filters, + [string]$environment, + [string]$uploadToRegistry, + [string]$repository, + + [Parameter(Mandatory=$true)] + [string]$deployId, + + [Parameter(ParameterSetName = 'DoLogin', Mandatory = $true)] + [string]$subscription, + + [Parameter(ParameterSetName = 'DoLogin', Mandatory = $true)] + [string]$clusterName, + + [Parameter(ParameterSetName = 'DoLogin', Mandatory = $true)] + [string]$clusterGroup +) + +. $PSScriptRoot/find_all_stress_packages.ps1 + +# Powershell does not (at time of writing) treat exit codes from external binaries +# as cause for stopping execution, so do this via a wrapper function. +# See https://github.com/PowerShell/PowerShell-RFC/pull/277 +function run() { + Write-Output "" "==> $args" "" + $command, $arguments = $args + & $command $arguments + if (!$?) { + $code = $LASTEXITCODE + Write-Output "Command '$args' failed with code: $code" + exit $code + } +} + +$runFunctionInit=[scriptblock]::create(@" +function run() { + $function:run +} +"@) + +function login([string]$subscription, [string]$clusterName, [string]$clusterGroup, [string]$uploadToRegistry) { + Write-Output "Logging in to subscription, cluster and container registry" + az account show + if (!$?) { + run az login --allow-no-subscriptions + } + + run az aks get-credentials ` + -n "$clusterName" ` + -g "$clusterGroup" ` + --subscription "$subscription" ` + --overwrite-existing + + if ($uploadToRegistry) { + run az acr login -n $uploadToRegistry + } +} + +function deployStressTests( + [string]$searchDirectory = '.', + [hashtable]$filters = @{}, + [string]$environment = 'test', + [string]$uploadToRegistry, + [string]$repository = 'images', + [string]$deployId, + [string]$subscription, + [string]$clusterName, + [string]$clusterGroup +) { + if ($PSCmdlet.ParameterSetName -eq 'DoLogin') { + login $subscription $clusterName $clusterGroup $uploadToRegistry + } + + run helm repo add stress-test-charts https://stresstestcharts.blob.core.windows.net/helm/ + run helm repo update + + findStressPackages $searchDirectory $filters | % { + $args = $_, $deployId, $environment, $uploadToRegistry, $repository + Write-Output "Deploying stress test at '$($_.Directory)'" + deployStressPackage @args + } + + Write-Output "Releases deployed by $deployId" + run helm list --all-namespaces -l deployId=$deployId +} + +function deployStressPackage( + [object]$pkg, + [string]$deployId, + [string]$environment, + [string]$uploadToRegistry, + [string]$repository +) { + if ($uploadToRegistry) { + run helm dependency update $pkg.Directory + + Get-ChildItem "$($pkg.Directory)/Dockerfile*" | % { + # Infer docker image name from parent directory name, if file is named `Dockerfile` + # or from suffix, is file is named like `Dockerfile.myimage` (for multiple dockerfiles). + $prefix, $imageName = $_.Name.Split(".") + if (!$imageName) { + $imageName = $_.Directory.Name + } + $imageTag = "$uploadToRegistry.azurecr.io/$($repository.ToLower())/$($imageName):$deployId" + Write-Output "Building and pushing stress test docker image '$imageTag'" + run docker build -t $imageTag -f $_.FullName $_.DirectoryName + run docker push $imageTag + } + } + + Write-Output "Creating namespace $($pkg.Namespace) if it does not exist..." + kubectl create namespace $pkg.Namespace --dry-run=client -o yaml | kubectl apply -f - + + Write-Output "Installing or upgrading stress test $($pkg.ReleaseName) from $($pkg.Directory)" + run helm upgrade $pkg.ReleaseName $pkg.Directory ` + -n $pkg.Namespace ` + --install ` + --set stress-test-addons.env=$environment + + # Helm 3 stores release information in kubernetes secrets. The only way to add extra labels around + # specific releases (thereby enabling filtering on `helm list`) is to label the underlying secret resources. + # There is not currently support for setting these labels via the helm cli. + $helmReleaseConfig = kubectl get secrets ` + -n $pkg.Namespace ` + -l status!=superseded,name=$($pkg.ReleaseName) ` + -o jsonpath='{.items[0].metadata.name}' + + run kubectl label secret -n $pkg.Namespace --overwrite $helmReleaseConfig deployId=$deployId +} + +deployStressTests @PSBoundParameters diff --git a/tools/stress-cluster/scripts/find_all_stress_packages.ps1 b/tools/stress-cluster/scripts/find_all_stress_packages.ps1 new file mode 100644 index 00000000000..97413dd17a1 --- /dev/null +++ b/tools/stress-cluster/scripts/find_all_stress_packages.ps1 @@ -0,0 +1,47 @@ +param( + [string]$searchDirectory = '.', + [hashtable]$filters = @{} +) + +class StressTestPackageInfo { + [string]$Namespace + [string]$Directory + [string]$ReleaseName +} + +function findStressPackages([string]$directory, [hashtable]$filters = @{}) { + # Bare minimum filter for stress tests + $filters['stressTest'] = 'true' + + Get-ChildItem -Recurse -Filter 'Chart.yaml' -PipelineVariable chartFile $directory + | % { parseChart $chartFile } + | ? { matchesAnnotations $_ $filters } + | % { NewStressTestPackageInfo $_ $chartFile } +} + +function parseChart([string]$chartFile) { + ConvertFrom-Yaml (Get-Content -Raw $chartFile) +} + +function matchesAnnotations([hashtable]$chart, [hashtable]$filters) { + foreach ($filter in $filters.GetEnumerator()) { + if (!$chart.annotations -or $chart.annotations[$filter.Key] -ne $filter.Value) { + return $false + } + } + + return $true +} + +function NewStressTestPackageInfo([hashtable]$chart, [System.IO.FileInfo]$chartFile) { + [StressTestPackageInfo]@{ + Namespace = $chart.annotations.namespace + Directory = $chartFile.DirectoryName + ReleaseName = $chart.name + } +} + +# Don't call functions when the script is being dot sourced +if ($MyInvocation.InvocationName -ne ".") { + findStressPackages $searchDirectory $filters +} From b1569a46caed078ddcc55c4ac2bf5b3f83d36ad1 Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Wed, 28 Jul 2021 14:49:20 -0400 Subject: [PATCH 2/8] Improve stress test deploy script command line flags and execution model --- .../network-stress-example/Chart.lock | 8 +- .../network-stress-example/Chart.yaml | 6 +- .../templates/testjob.yaml | 2 +- tools/stress-cluster/cluster/README.md | 1 + .../kubernetes/stress-test-addons/Chart.yaml | 2 +- .../scripts/deploy_stress_tests.ps1 | 138 +++++++++++------- .../scripts/find_all_stress_packages.ps1 | 26 ++-- 7 files changed, 109 insertions(+), 74 deletions(-) diff --git a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock index d244bc7804f..95192aeab9a 100644 --- a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock +++ b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock @@ -1,6 +1,6 @@ dependencies: - name: stress-test-addons - repository: https://stresstestcharts.blob.core.windows.net/helm/ - version: 0.1.2 -digest: sha256:b38f530a7f691eb3f11d48809ba7f86ea9d7b226c3ecb311d1ae47fbb0585466 -generated: "2021-07-06T18:41:20.4293087-04:00" + repository: file:///home/ben/sdk/azure-sdk-tools/tools/stress-cluster/cluster/kubernetes/stress-test-addons + version: 0.2.0 +digest: sha256:a3259c4f5c639d77f6dd63a1aaa91634a6ac4810fcdbad2d5a8129889720a4d8 +generated: "2021-07-27T19:02:16.3688319-04:00" diff --git a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml index f0212b92c15..38f2aeb7a1f 100644 --- a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml +++ b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml @@ -7,8 +7,10 @@ annotations: stressTest: 'true' namespace: 'examples' example: 'true' + test: 'true' dependencies: - name: stress-test-addons - version: 0.1.2 - repository: https://stresstestcharts.blob.core.windows.net/helm/ + version: 0.2.0 + #repository: https://stresstestcharts.blob.core.windows.net/helm/ + repository: file:///home/ben/sdk/azure-sdk-tools/tools/stress-cluster/cluster/kubernetes/stress-test-addons diff --git a/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml b/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml index 502cbf465fa..f63cf72ac50 100644 --- a/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml +++ b/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: network-example - image: stresstestregistry.azurecr.io/examples/network-stress-example:v1 command: ["bash", "poll.sh"] + image: {{ default "stresstestregistry" .Values.registry }}.azurecr.io/{{ default "images" .Values.repository }}/network-stress-example:v1 {{- include "stress-test-addons.container-env" . | nindent 6 }} {{- end -}} diff --git a/tools/stress-cluster/cluster/README.md b/tools/stress-cluster/cluster/README.md index 753461bbc14..e84c8d5dce0 100644 --- a/tools/stress-cluster/cluster/README.md +++ b/tools/stress-cluster/cluster/README.md @@ -98,6 +98,7 @@ To remove Azure resources: ``` az group delete +az keyvault purge -n ``` # Building out the Main/Prod Testing Cluster diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml index d3bad08f873..cd2a65fdc5a 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: stress-test-addons description: Baseline resources and templates for stress testing clusters -version: 0.1.2 +version: 0.2.0 appVersion: v0.1 diff --git a/tools/stress-cluster/scripts/deploy_stress_tests.ps1 b/tools/stress-cluster/scripts/deploy_stress_tests.ps1 index 29807cec094..573fba61dc5 100644 --- a/tools/stress-cluster/scripts/deploy_stress_tests.ps1 +++ b/tools/stress-cluster/scripts/deploy_stress_tests.ps1 @@ -1,113 +1,136 @@ [CmdletBinding(DefaultParameterSetName = 'Default')] param( - [string]$searchDirectory, - [hashtable]$filters, - [string]$environment, - [string]$uploadToRegistry, - [string]$repository, - - [Parameter(Mandatory=$true)] - [string]$deployId, + [string]$SearchDirectory, + [hashtable]$Filters, + [string]$Environment, + [string]$Repository, + [switch]$PushImages, + [string]$ClusterGroup, + [string]$DeployId, [Parameter(ParameterSetName = 'DoLogin', Mandatory = $true)] - [string]$subscription, + [switch]$Login, - [Parameter(ParameterSetName = 'DoLogin', Mandatory = $true)] - [string]$clusterName, - - [Parameter(ParameterSetName = 'DoLogin', Mandatory = $true)] - [string]$clusterGroup + [Parameter(ParameterSetName = 'DoLogin')] + [string]$Subscription ) +$ErrorActionPreference = 'Stop' + +$FailedCommands = New-Object Collections.Generic.List[hashtable] + . $PSScriptRoot/find_all_stress_packages.ps1 # Powershell does not (at time of writing) treat exit codes from external binaries # as cause for stopping execution, so do this via a wrapper function. # See https://github.com/PowerShell/PowerShell-RFC/pull/277 -function run() { +function Run() { Write-Output "" "==> $args" "" $command, $arguments = $args & $command $arguments - if (!$?) { - $code = $LASTEXITCODE - Write-Output "Command '$args' failed with code: $code" - exit $code + if ($LASTEXITCODE) { + Write-Error "Command '$args' failed with code: $LASTEXITCODE" -ErrorAction 'Continue' + $FailedCommands.Add(@{ command = "$args"; code = $LASTEXITCODE }) } } -$runFunctionInit=[scriptblock]::create(@" -function run() { - $function:run +function RunOrExit() { + run @args + if ($LASTEXITCODE) { + exit $LASTEXITCODE + } } -"@) -function login([string]$subscription, [string]$clusterName, [string]$clusterGroup, [string]$uploadToRegistry) { +function Login([string]$subscription, [string]$clusterGroup, [boolean]$pushImages) { Write-Output "Logging in to subscription, cluster and container registry" az account show - if (!$?) { - run az login --allow-no-subscriptions + if ($LASTEXITCODE) { + RunOrExit az login --allow-no-subscriptions } - run az aks get-credentials ` + $clusterName = (az aks list -g $clusterGroup -o json| ConvertFrom-Json).name + + RunOrExit az aks get-credentials ` -n "$clusterName" ` -g "$clusterGroup" ` --subscription "$subscription" ` --overwrite-existing - if ($uploadToRegistry) { - run az acr login -n $uploadToRegistry + if ($pushImages) { + $registry = (az acr list -g $clusterGroup -o json | ConvertFrom-Json).name + RunOrExit az acr login -n $registry } } -function deployStressTests( +function DeployStressTests( [string]$searchDirectory = '.', [hashtable]$filters = @{}, [string]$environment = 'test', - [string]$uploadToRegistry, [string]$repository = 'images', - [string]$deployId, - [string]$subscription, - [string]$clusterName, - [string]$clusterGroup + [boolean]$pushImages = $false, + [string]$clusterGroup = 'rg-stress-test-cluster-', + [string]$deployId = 'local', + [string]$subscription = 'Azure SDK Test Resources' ) { if ($PSCmdlet.ParameterSetName -eq 'DoLogin') { - login $subscription $clusterName $clusterGroup $uploadToRegistry + Login $subscription $clusterGroup $pushImages } - run helm repo add stress-test-charts https://stresstestcharts.blob.core.windows.net/helm/ - run helm repo update + RunOrExit helm repo add stress-test-charts https://stresstestcharts.blob.core.windows.net/helm/ + Run helm repo update + if ($LASTEXITCODE) { return $LASTEXITCODE } - findStressPackages $searchDirectory $filters | % { - $args = $_, $deployId, $environment, $uploadToRegistry, $repository - Write-Output "Deploying stress test at '$($_.Directory)'" - deployStressPackage @args + $pkgs = FindStressPackages $searchDirectory $filters + Write-Output "" "Found $($pkgs.Length) stress test packages:" + Write-Output $pkgs.Directory "" + foreach ($pkg in $pkgs) { + Write-Output "Deploying stress test at '$($pkg.Directory)'" + DeployStressPackage $pkg $deployId $environment $repository $pushImages } Write-Output "Releases deployed by $deployId" - run helm list --all-namespaces -l deployId=$deployId + Run helm list --all-namespaces -l deployId=$deployId + + if ($FailedCommands) { + Write-Warning "" "The following commands failed:" "" + foreach ($cmd in $FailedCommands) { + Write-Error "'$($cmd.command)' failed with code $($cmd.code)" -ErrorAction 'Continue' + } + exit 1 + } } -function deployStressPackage( +function DeployStressPackage( [object]$pkg, [string]$deployId, [string]$environment, - [string]$uploadToRegistry, - [string]$repository + [string]$repository, + [boolean]$pushImages ) { - if ($uploadToRegistry) { - run helm dependency update $pkg.Directory + $registry = (az acr list -g $clusterGroup -o json | ConvertFrom-Json).name + if (!$registry) { + Write-Output "Could not find container registry in resource group $clusterGroup" + exit 1 + } + + if ($pushImages) { + Run helm dependency update $pkg.Directory + if ($LASTEXITCODE) { return $LASTEXITCODE } - Get-ChildItem "$($pkg.Directory)/Dockerfile*" | % { + $dockerFiles = Get-ChildItem "$($pkg.Directory)/Dockerfile*" + foreach ($dockerFile in $dockerFiles) { # Infer docker image name from parent directory name, if file is named `Dockerfile` # or from suffix, is file is named like `Dockerfile.myimage` (for multiple dockerfiles). - $prefix, $imageName = $_.Name.Split(".") + $prefix, $imageName = $dockerFile.Name.Split(".") if (!$imageName) { - $imageName = $_.Directory.Name + $imageName = $dockerFile.Directory.Name } - $imageTag = "$uploadToRegistry.azurecr.io/$($repository.ToLower())/$($imageName):$deployId" + $imageTag = "$registry.azurecr.io/$($repository.ToLower())/$($imageName):$deployId" Write-Output "Building and pushing stress test docker image '$imageTag'" - run docker build -t $imageTag -f $_.FullName $_.DirectoryName - run docker push $imageTag + Run docker build -t $imageTag -f $dockerFile.FullName $dockerFile.DirectoryName + if ($LASTEXITCODE) { return $LASTEXITCODE } + Run docker push $imageTag + if ($LASTEXITCODE) { return $LASTEXITCODE } } } @@ -115,10 +138,13 @@ function deployStressPackage( kubectl create namespace $pkg.Namespace --dry-run=client -o yaml | kubectl apply -f - Write-Output "Installing or upgrading stress test $($pkg.ReleaseName) from $($pkg.Directory)" - run helm upgrade $pkg.ReleaseName $pkg.Directory ` + Run helm upgrade $pkg.ReleaseName $pkg.Directory ` -n $pkg.Namespace ` --install ` + --set registry=$registry ` + --set repository=$repository ` --set stress-test-addons.env=$environment + if ($LASTEXITCODE) { return $LASTEXITCODE } # Helm 3 stores release information in kubernetes secrets. The only way to add extra labels around # specific releases (thereby enabling filtering on `helm list`) is to label the underlying secret resources. @@ -128,7 +154,7 @@ function deployStressPackage( -l status!=superseded,name=$($pkg.ReleaseName) ` -o jsonpath='{.items[0].metadata.name}' - run kubectl label secret -n $pkg.Namespace --overwrite $helmReleaseConfig deployId=$deployId + Run kubectl label secret -n $pkg.Namespace --overwrite $helmReleaseConfig deployId=$deployId } deployStressTests @PSBoundParameters diff --git a/tools/stress-cluster/scripts/find_all_stress_packages.ps1 b/tools/stress-cluster/scripts/find_all_stress_packages.ps1 index 97413dd17a1..4d567926f9f 100644 --- a/tools/stress-cluster/scripts/find_all_stress_packages.ps1 +++ b/tools/stress-cluster/scripts/find_all_stress_packages.ps1 @@ -9,21 +9,27 @@ class StressTestPackageInfo { [string]$ReleaseName } -function findStressPackages([string]$directory, [hashtable]$filters = @{}) { +function FindStressPackages([string]$directory, [hashtable]$filters = @{}) { # Bare minimum filter for stress tests $filters['stressTest'] = 'true' - Get-ChildItem -Recurse -Filter 'Chart.yaml' -PipelineVariable chartFile $directory - | % { parseChart $chartFile } - | ? { matchesAnnotations $_ $filters } - | % { NewStressTestPackageInfo $_ $chartFile } + $packages = @() + $chartFiles = Get-ChildItem -Recurse -Filter 'Chart.yaml' $directory + foreach ($chartFile in $chartFiles) { + $chart = ParseChart $chartFile + if (matchesAnnotations $chart $filters) { + $packages += NewStressTestPackageInfo $chart $chartFile + } + } + + return $packages } -function parseChart([string]$chartFile) { - ConvertFrom-Yaml (Get-Content -Raw $chartFile) +function ParseChart([string]$chartFile) { + return ConvertFrom-Yaml (Get-Content -Raw $chartFile) } -function matchesAnnotations([hashtable]$chart, [hashtable]$filters) { +function MatchesAnnotations([hashtable]$chart, [hashtable]$filters) { foreach ($filter in $filters.GetEnumerator()) { if (!$chart.annotations -or $chart.annotations[$filter.Key] -ne $filter.Value) { return $false @@ -34,7 +40,7 @@ function matchesAnnotations([hashtable]$chart, [hashtable]$filters) { } function NewStressTestPackageInfo([hashtable]$chart, [System.IO.FileInfo]$chartFile) { - [StressTestPackageInfo]@{ + return [StressTestPackageInfo]@{ Namespace = $chart.annotations.namespace Directory = $chartFile.DirectoryName ReleaseName = $chart.name @@ -43,5 +49,5 @@ function NewStressTestPackageInfo([hashtable]$chart, [System.IO.FileInfo]$chartF # Don't call functions when the script is being dot sourced if ($MyInvocation.InvocationName -ne ".") { - findStressPackages $searchDirectory $filters + FindStressPackages $searchDirectory $filters } From 78266beadf70614c967b7922bc7d1e5b480a09b7 Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Wed, 28 Jul 2021 16:10:06 -0400 Subject: [PATCH 3/8] Update stress test release pipeline arguments --- eng/pipelines/stress-test-release.yml | 28 +++++++++++---------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/eng/pipelines/stress-test-release.yml b/eng/pipelines/stress-test-release.yml index 337d70d2bc4..3b968bdf70c 100644 --- a/eng/pipelines/stress-test-release.yml +++ b/eng/pipelines/stress-test-release.yml @@ -5,19 +5,13 @@ trigger: none parameters: - name: Subscription type: string - default: 2cd617ea-1866-46b1-90e3-fffb087ebf9b + default: 'Azure SDK Test Resources' - name: Environment type: string default: prod - - name: ClusterName - type: string - default: stress-prod - name: ClusterGroup type: string default: rg-stress-test-cluster-prod - - name: RegistryName - type: string - default: stressprodregistry jobs: - job: ReleaseStressTests @@ -59,14 +53,14 @@ jobs: inputs: azureSubscription: ${{ parameters.Subscription }} scriptType: pscore - scriptLocation: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/tools/stress-cluster/scripts/deploy_stress_tests.ps1 + scriptPath: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/tools/stress-cluster/scripts/deploy_stress_tests.ps1 arguments: - -searchDirectory $(System.DefaultWorkingDirectory)/$(Repository) ` - -filters $(Filters) ` - -environment ${{ parameters.Environment }} ` - -uploadToRegistry ${{ parameters.RegistryName }} ` - -repository $(Agent.JobName) ` - -deployId $(Build.BuildNumber) ` - -subscription ${{ parameters.Subscription }} ` - -clusterName ${{ parameters.ClusterName }} ` - -clusterGroup ${{ parameters.ClusterGroup }} + -SearchDirectory $(System.DefaultWorkingDirectory)/$(Repository) ` + -Filters $(Filters) ` + -Environment ${{ parameters.Environment }} ` + -Repository $(Agent.JobName) ` + -PushImages ` + -ClusterGroup ${{ parameters.ClusterGroup }} ` + -Login ` + -Subscription ${{ parameters.Subscription }} ` + -DeployId $(Build.BuildNumber) ` From c5d494a5b0de20a70050994da19fd23cefe059bf Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Wed, 28 Jul 2021 17:22:33 -0400 Subject: [PATCH 4/8] Fixes to get everything working --- eng/pipelines/stress-test-release.yml | 81 ++++++++++++------- .../network-stress-example/Chart.lock | 8 +- .../network-stress-example/Chart.yaml | 5 +- .../templates/testjob.yaml | 2 +- tools/stress-cluster/cluster/README.md | 2 +- .../kubernetes/stress-test-addons/.gitignore | 1 - .../kubernetes/stress-test-addons/Chart.yaml | 2 +- .../kubernetes/stress-test-addons/deploy.sh | 9 ++- .../kubernetes/stress-test-addons/index.yaml | 13 +++ .../kubernetes/stress-test-addons/values.yaml | 6 +- .../scripts/deploy_stress_tests.ps1 | 52 +++++++----- 11 files changed, 118 insertions(+), 63 deletions(-) create mode 100644 tools/stress-cluster/cluster/kubernetes/stress-test-addons/index.yaml diff --git a/eng/pipelines/stress-test-release.yml b/eng/pipelines/stress-test-release.yml index 3b968bdf70c..2558f5a29ef 100644 --- a/eng/pipelines/stress-test-release.yml +++ b/eng/pipelines/stress-test-release.yml @@ -12,55 +12,80 @@ parameters: - name: ClusterGroup type: string default: rg-stress-test-cluster-prod + - name: TestRepository + displayName: Stress Test Repository + type: string + default: all + values: + - all + - examples + - javascript + - java + - net + - python + - go jobs: -- job: ReleaseStressTests +- job: strategy: matrix: - examples: - Repository: Azure/azure-sdk-tools - Filters: '@{ "example" = "true" }' - javascript: - Repository: Azure/azure-sdk-for-js - Filters: '@{}' - java: - Repository: Azure/azure-sdk-for-java - Filters: '@{}' - net: - Repository: Azure/azure-sdk-for-net - Filters: '@{}' - python: - Repository: Azure/azure-sdk-for-python - Filters: '@{}' + ${{ if or(eq(parameters.TestRepository, 'examples'), eq(parameters.TestRepository, 'all')) }}: + examples: + Repository: Azure/azure-sdk-tools + Filters: '@{ "example" = "true" }' + ${{ if or(eq(parameters.TestRepository, 'javascript'), eq(parameters.TestRepository, 'all')) }}: + javascript: + Repository: Azure/azure-sdk-for-js + Filters: '@{}' + ${{ if or(eq(parameters.TestRepository, 'java'), eq(parameters.TestRepository, 'all')) }}: + java: + Repository: Azure/azure-sdk-for-java + Filters: '@{}' + ${{ if or(eq(parameters.TestRepository, 'net'), eq(parameters.TestRepository, 'all')) }}: + net: + Repository: Azure/azure-sdk-for-net + Filters: '@{}' + ${{ if or(eq(parameters.TestRepository, 'python'), eq(parameters.TestRepository, 'all')) }}: + python: + Repository: Azure/azure-sdk-for-python + Filters: '@{}' + ${{ if or(eq(parameters.TestRepository, 'go'), eq(parameters.TestRepository, 'all')) }}: + go: + Repository: Azure/azure-sdk-for-go + Filters: '@{}' pool: - name: 'azsdk-pool-mms-ubuntu-2004-general' - vmImage: 'MMSUbuntu20.04' + vmImage: 'ubuntu-20.04' + #name: 'azsdk-pool-mms-ubuntu-2004-general' + #vmImage: 'MMSUbuntu20.04' steps: - template: /eng/common/pipelines/templates/steps/sparse-checkout.yml parameters: Repositories: - Name: Azure/azure-sdk-tools + Commitish: $(Build.SourceVersion) WorkingDirectory: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools - Name: $(Repository) + Commitish: $(Build.SourceVersion) WorkingDirectory: $(System.DefaultWorkingDirectory)/$(Repository) Paths: + - '/tools' - '!sdk/**/test-recordings' - '!sdk/**/session-records' - '!sdk/**/SessionRecords' - task: AzureCLI@2 - displayName: Azure CLI + displayName: Build and Deploy Stress Tests inputs: azureSubscription: ${{ parameters.Subscription }} scriptType: pscore scriptPath: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/tools/stress-cluster/scripts/deploy_stress_tests.ps1 arguments: - -SearchDirectory $(System.DefaultWorkingDirectory)/$(Repository) ` - -Filters $(Filters) ` - -Environment ${{ parameters.Environment }} ` - -Repository $(Agent.JobName) ` - -PushImages ` - -ClusterGroup ${{ parameters.ClusterGroup }} ` - -Login ` - -Subscription ${{ parameters.Subscription }} ` - -DeployId $(Build.BuildNumber) ` + -SearchDirectory '$(System.DefaultWorkingDirectory)/$(Repository)' + -Filters $(Filters) + -Environment '${{ parameters.Environment }}' + -Repository '$(Agent.JobName)' + -PushImages + -ClusterGroup '${{ parameters.ClusterGroup }}' + -Login + -Subscription '${{ parameters.Subscription }}' + -DeployId '$(Build.BuildNumber)' diff --git a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock index 95192aeab9a..7b4d61db46f 100644 --- a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock +++ b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.lock @@ -1,6 +1,6 @@ dependencies: - name: stress-test-addons - repository: file:///home/ben/sdk/azure-sdk-tools/tools/stress-cluster/cluster/kubernetes/stress-test-addons - version: 0.2.0 -digest: sha256:a3259c4f5c639d77f6dd63a1aaa91634a6ac4810fcdbad2d5a8129889720a4d8 -generated: "2021-07-27T19:02:16.3688319-04:00" + repository: https://stresstestcharts.blob.core.windows.net/helm/ + version: 0.1.2 +digest: sha256:b38f530a7f691eb3f11d48809ba7f86ea9d7b226c3ecb311d1ae47fbb0585466 +generated: "2021-07-28T22:23:31.0555163-04:00" diff --git a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml index 38f2aeb7a1f..ecfea179cab 100644 --- a/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml +++ b/tools/stress-cluster/chaos/examples/network-stress-example/Chart.yaml @@ -11,6 +11,5 @@ annotations: dependencies: - name: stress-test-addons - version: 0.2.0 - #repository: https://stresstestcharts.blob.core.windows.net/helm/ - repository: file:///home/ben/sdk/azure-sdk-tools/tools/stress-cluster/cluster/kubernetes/stress-test-addons + version: 0.1.2 + repository: https://stresstestcharts.blob.core.windows.net/helm/ diff --git a/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml b/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml index f63cf72ac50..c4448ef19a0 100644 --- a/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml +++ b/tools/stress-cluster/chaos/examples/network-stress-example/templates/testjob.yaml @@ -9,6 +9,6 @@ spec: containers: - name: network-example command: ["bash", "poll.sh"] - image: {{ default "stresstestregistry" .Values.registry }}.azurecr.io/{{ default "images" .Values.repository }}/network-stress-example:v1 + image: {{ default "" .Values.repository }}/network-stress-example:{{ default "v1" .Values.tag }} {{- include "stress-test-addons.container-env" . | nindent 6 }} {{- end -}} diff --git a/tools/stress-cluster/cluster/README.md b/tools/stress-cluster/cluster/README.md index e84c8d5dce0..c957217ddbd 100644 --- a/tools/stress-cluster/cluster/README.md +++ b/tools/stress-cluster/cluster/README.md @@ -153,7 +153,7 @@ helm dependency update ./kubernetes/stress-infrastructure helm install stress-infra -n stress-infra --create-namespace ./kubernetes/stress-infrastructure ``` -Copy the deployment outputs to `./kubernetes/environments/` and check in the changes. +Update the values in `./kubernetes/stress-test-addons/values.yaml` to match the deployment outputs and check in the changes. ``` az deployment sub show -o json -n --query properties.outputs diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/.gitignore b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/.gitignore index 07062695067..aa1ec1ea061 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/.gitignore +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/.gitignore @@ -1,2 +1 @@ -index.yaml *.tgz diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml index cd2a65fdc5a..d3bad08f873 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: stress-test-addons description: Baseline resources and templates for stress testing clusters -version: 0.2.0 +version: 0.1.2 appVersion: v0.1 diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/deploy.sh b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/deploy.sh index 3c8413d0d33..076f38f282c 100755 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/deploy.sh +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/deploy.sh @@ -1,11 +1,16 @@ export AZURE_STORAGE_ACCOUNT=stresstestcharts -# AZURE_STORAGE_KEY must be exported too +# AZURE_STORAGE_KEY must be exported too, run the below command to get the key: +# az storage account keys list --account-name stresstestcharts -o json --query '[0].value' rm *.tgz -rm index.yaml helm package . helm repo index --url https://stresstestcharts.blob.core.windows.net/helm/ . az storage blob upload --container-name helm --file index.yaml --name index.yaml az storage blob upload --container-name helm --file *.tgz --name *.tgz + +# index.yaml must be kept up to date, otherwise when helm generates the file, it will not +# merge it with previous entries, and those packages will become inaccessible as they are no +# longer index. +echo "COMMIT CHANGES MADE TO `index.yaml`" diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/index.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/index.yaml new file mode 100644 index 00000000000..b08d3dad1d3 --- /dev/null +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/index.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +entries: + stress-test-addons: + - apiVersion: v2 + appVersion: v0.1 + created: "2021-07-28T22:24:58.3999792-04:00" + description: Baseline resources and templates for stress testing clusters + digest: cce228906811f1b39db7bcd031c94192751caa0a94f7c6035e21e3de8fc5858d + name: stress-test-addons + urls: + - https://stresstestcharts.blob.core.windows.net/helm/stress-test-addons-0.1.2.tgz + version: 0.1.2 +generated: "2021-07-28T22:24:58.3988616-04:00" diff --git a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml index 2370f9f0025..27069a7ce4e 100644 --- a/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml +++ b/tools/stress-cluster/cluster/kubernetes/stress-test-addons/values.yaml @@ -2,7 +2,7 @@ env: test appInsightsKeySecretName: test: appInsightsInstrumentationKey-uj7jqs4ukw2gi - prod: 'not-specified' + prod: appInsightsInstrumentationKey-dqojlttkovp2c dev: 'not-specified' staticTestSecretsKeyvaultName: test: StressTestSecrets @@ -10,11 +10,11 @@ staticTestSecretsKeyvaultName: dev: 'not-specified' clusterTestSecretsKeyvaultName: test: stress-kv-uj7jqs4ukw2gi - prod: 'not-specified' + prod: stress-kv-dqojlttkovp2c dev: 'not-specified' secretProviderIdentity: test: bc7712b9-1622-4b7f-9943-604c73cda131 - prod: 'not-specified' + prod: ea706f92-1d9a-4611-9cde-8305aa3d9e98 dev: 'not-specified' subscription: test: public diff --git a/tools/stress-cluster/scripts/deploy_stress_tests.ps1 b/tools/stress-cluster/scripts/deploy_stress_tests.ps1 index 573fba61dc5..edf07205535 100644 --- a/tools/stress-cluster/scripts/deploy_stress_tests.ps1 +++ b/tools/stress-cluster/scripts/deploy_stress_tests.ps1 @@ -17,15 +17,18 @@ param( $ErrorActionPreference = 'Stop' +. $PSScriptRoot/find_all_stress_packages.ps1 $FailedCommands = New-Object Collections.Generic.List[hashtable] -. $PSScriptRoot/find_all_stress_packages.ps1 +if (!(Get-Module powershell-yaml)) { + Install-Module -Name powershell-yaml -RequiredVersion 0.4.1 -Force -Scope CurrentUser +} # Powershell does not (at time of writing) treat exit codes from external binaries # as cause for stopping execution, so do this via a wrapper function. # See https://github.com/PowerShell/PowerShell-RFC/pull/277 function Run() { - Write-Output "" "==> $args" "" + Write-Host "`n==> $args`n" -ForegroundColor Green $command, $arguments = $args & $command $arguments if ($LASTEXITCODE) { @@ -42,7 +45,7 @@ function RunOrExit() { } function Login([string]$subscription, [string]$clusterGroup, [boolean]$pushImages) { - Write-Output "Logging in to subscription, cluster and container registry" + Write-Host "Logging in to subscription, cluster and container registry" az account show if ($LASTEXITCODE) { RunOrExit az login --allow-no-subscriptions @@ -81,18 +84,18 @@ function DeployStressTests( if ($LASTEXITCODE) { return $LASTEXITCODE } $pkgs = FindStressPackages $searchDirectory $filters - Write-Output "" "Found $($pkgs.Length) stress test packages:" - Write-Output $pkgs.Directory "" + Write-Host "" "Found $($pkgs.Length) stress test packages:" + Write-Host $pkgs.Directory "" foreach ($pkg in $pkgs) { - Write-Output "Deploying stress test at '$($pkg.Directory)'" + Write-Host "Deploying stress test at '$($pkg.Directory)'" DeployStressPackage $pkg $deployId $environment $repository $pushImages } - Write-Output "Releases deployed by $deployId" + Write-Host "Releases deployed by $deployId" Run helm list --all-namespaces -l deployId=$deployId if ($FailedCommands) { - Write-Warning "" "The following commands failed:" "" + Write-Warning "The following commands failed:" foreach ($cmd in $FailedCommands) { Write-Error "'$($cmd.command)' failed with code $($cmd.code)" -ErrorAction 'Continue' } @@ -109,7 +112,7 @@ function DeployStressPackage( ) { $registry = (az acr list -g $clusterGroup -o json | ConvertFrom-Json).name if (!$registry) { - Write-Output "Could not find container registry in resource group $clusterGroup" + Write-Host "Could not find container registry in resource group $clusterGroup" exit 1 } @@ -125,36 +128,47 @@ function DeployStressPackage( if (!$imageName) { $imageName = $dockerFile.Directory.Name } - $imageTag = "$registry.azurecr.io/$($repository.ToLower())/$($imageName):$deployId" - Write-Output "Building and pushing stress test docker image '$imageTag'" + $imageTag = "${registry}.azurecr.io/$($repository.ToLower())/$($imageName):$deployId" + Write-Host "Building and pushing stress test docker image '$imageTag'" Run docker build -t $imageTag -f $dockerFile.FullName $dockerFile.DirectoryName if ($LASTEXITCODE) { return $LASTEXITCODE } Run docker push $imageTag - if ($LASTEXITCODE) { return $LASTEXITCODE } + if ($LASTEXITCODE) { + if ($PSCmdlet.ParameterSetName -ne 'DoLogin') { + Write-Warning "If docker push is failing due to authentication issues, try calling this script with '-Login'" + } + return $LASTEXITCODE + } } } - Write-Output "Creating namespace $($pkg.Namespace) if it does not exist..." + Write-Host "Creating namespace $($pkg.Namespace) if it does not exist..." kubectl create namespace $pkg.Namespace --dry-run=client -o yaml | kubectl apply -f - - Write-Output "Installing or upgrading stress test $($pkg.ReleaseName) from $($pkg.Directory)" + Write-Host "Installing or upgrading stress test $($pkg.ReleaseName) from $($pkg.Directory)" Run helm upgrade $pkg.ReleaseName $pkg.Directory ` -n $pkg.Namespace ` --install ` - --set registry=$registry ` - --set repository=$repository ` + --set repository=$registry.azurecr.io/$repository ` + --set tag=$deployId ` --set stress-test-addons.env=$environment - if ($LASTEXITCODE) { return $LASTEXITCODE } + if ($LASTEXITCODE) { + # Issues like 'UPGRADE FAILED: another operation (install/upgrade/rollback) is in progress' + # can be the result of cancelled `upgrade` operations (e.g. ctrl-c). + # See https://github.com/helm/helm/issues/4558 + Write-Warning "The issue may be fixable by first running 'helm rollback -n $($pkg.Namespace) $($pkg.ReleaseName)'" + return $LASTEXITCODE + } # Helm 3 stores release information in kubernetes secrets. The only way to add extra labels around # specific releases (thereby enabling filtering on `helm list`) is to label the underlying secret resources. # There is not currently support for setting these labels via the helm cli. $helmReleaseConfig = kubectl get secrets ` -n $pkg.Namespace ` - -l status!=superseded,name=$($pkg.ReleaseName) ` + -l status=deployed,name=$($pkg.ReleaseName) ` -o jsonpath='{.items[0].metadata.name}' Run kubectl label secret -n $pkg.Namespace --overwrite $helmReleaseConfig deployId=$deployId } -deployStressTests @PSBoundParameters +DeployStressTests @PSBoundParameters From 368bf5c1b57a31e4b1f7ba9000151ab9f5747b8b Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Wed, 28 Jul 2021 22:56:35 -0400 Subject: [PATCH 5/8] Check in stress deployment example compiled test-resources.json --- .../stress-deployment-example/.gitignore | 1 - .../test-resources.json | 41 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) delete mode 100644 tools/stress-cluster/chaos/examples/stress-deployment-example/.gitignore create mode 100644 tools/stress-cluster/chaos/examples/stress-deployment-example/test-resources.json diff --git a/tools/stress-cluster/chaos/examples/stress-deployment-example/.gitignore b/tools/stress-cluster/chaos/examples/stress-deployment-example/.gitignore deleted file mode 100644 index 3f3338d82dc..00000000000 --- a/tools/stress-cluster/chaos/examples/stress-deployment-example/.gitignore +++ /dev/null @@ -1 +0,0 @@ -test-resources.json diff --git a/tools/stress-cluster/chaos/examples/stress-deployment-example/test-resources.json b/tools/stress-cluster/chaos/examples/stress-deployment-example/test-resources.json new file mode 100644 index 00000000000..5a9462a27b9 --- /dev/null +++ b/tools/stress-cluster/chaos/examples/stress-deployment-example/test-resources.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.4.63.48766", + "templateHash": "658186316551815960" + } + }, + "parameters": { + "groupName": { + "type": "string" + }, + "location": { + "type": "string" + }, + "now": { + "type": "string", + "defaultValue": "[utcNow('u')]" + } + }, + "functions": [], + "resources": [ + { + "type": "Microsoft.Resources/resourceGroups", + "apiVersion": "2020-10-01", + "name": "[format('rg-{0}-{1}', parameters('groupName'), uniqueString(parameters('now')))]", + "location": "[parameters('location')]", + "tags": { + "DeleteAfter": "[dateTimeAdd(parameters('now'), 'PT8H')]" + } + } + ], + "outputs": { + "RESOURCE_GROUP": { + "type": "string", + "value": "[format('rg-{0}-{1}', parameters('groupName'), uniqueString(parameters('now')))]" + } + } +} \ No newline at end of file From 5ae04d852fbc027b8877d4a64c260084c2ef64bf Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Thu, 29 Jul 2021 01:54:41 -0400 Subject: [PATCH 6/8] Change stress test script filename convention --- eng/pipelines/stress-test-release.yml | 2 +- .../{deploy_stress_tests.ps1 => deploy-stress-tests.ps1} | 2 +- ...ind_all_stress_packages.ps1 => find-all-stress-packages.ps1} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename tools/stress-cluster/scripts/{deploy_stress_tests.ps1 => deploy-stress-tests.ps1} (99%) rename tools/stress-cluster/scripts/{find_all_stress_packages.ps1 => find-all-stress-packages.ps1} (100%) diff --git a/eng/pipelines/stress-test-release.yml b/eng/pipelines/stress-test-release.yml index 2558f5a29ef..f53b0afa14a 100644 --- a/eng/pipelines/stress-test-release.yml +++ b/eng/pipelines/stress-test-release.yml @@ -78,7 +78,7 @@ jobs: inputs: azureSubscription: ${{ parameters.Subscription }} scriptType: pscore - scriptPath: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/tools/stress-cluster/scripts/deploy_stress_tests.ps1 + scriptPath: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/tools/stress-cluster/scripts/deploy-stress-tests.ps1 arguments: -SearchDirectory '$(System.DefaultWorkingDirectory)/$(Repository)' -Filters $(Filters) diff --git a/tools/stress-cluster/scripts/deploy_stress_tests.ps1 b/tools/stress-cluster/scripts/deploy-stress-tests.ps1 similarity index 99% rename from tools/stress-cluster/scripts/deploy_stress_tests.ps1 rename to tools/stress-cluster/scripts/deploy-stress-tests.ps1 index edf07205535..b30a5f5c22e 100644 --- a/tools/stress-cluster/scripts/deploy_stress_tests.ps1 +++ b/tools/stress-cluster/scripts/deploy-stress-tests.ps1 @@ -17,7 +17,7 @@ param( $ErrorActionPreference = 'Stop' -. $PSScriptRoot/find_all_stress_packages.ps1 +. $PSScriptRoot/find-all-stress-packages.ps1 $FailedCommands = New-Object Collections.Generic.List[hashtable] if (!(Get-Module powershell-yaml)) { diff --git a/tools/stress-cluster/scripts/find_all_stress_packages.ps1 b/tools/stress-cluster/scripts/find-all-stress-packages.ps1 similarity index 100% rename from tools/stress-cluster/scripts/find_all_stress_packages.ps1 rename to tools/stress-cluster/scripts/find-all-stress-packages.ps1 From 7a239ffe6c53722874ab8a054be42e2bad961b1f Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Thu, 29 Jul 2021 02:17:21 -0400 Subject: [PATCH 7/8] Function naming and output improvements --- tools/stress-cluster/scripts/deploy-stress-tests.ps1 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/stress-cluster/scripts/deploy-stress-tests.ps1 b/tools/stress-cluster/scripts/deploy-stress-tests.ps1 index b30a5f5c22e..1d8d958a2cd 100644 --- a/tools/stress-cluster/scripts/deploy-stress-tests.ps1 +++ b/tools/stress-cluster/scripts/deploy-stress-tests.ps1 @@ -37,7 +37,7 @@ function Run() { } } -function RunOrExit() { +function RunOrExitOnFailure() { run @args if ($LASTEXITCODE) { exit $LASTEXITCODE @@ -46,14 +46,14 @@ function RunOrExit() { function Login([string]$subscription, [string]$clusterGroup, [boolean]$pushImages) { Write-Host "Logging in to subscription, cluster and container registry" - az account show + az account show *> $null if ($LASTEXITCODE) { - RunOrExit az login --allow-no-subscriptions + RunOrExitOnFailure az login --allow-no-subscriptions } $clusterName = (az aks list -g $clusterGroup -o json| ConvertFrom-Json).name - RunOrExit az aks get-credentials ` + RunOrExitOnFailure az aks get-credentials ` -n "$clusterName" ` -g "$clusterGroup" ` --subscription "$subscription" ` @@ -61,7 +61,7 @@ function Login([string]$subscription, [string]$clusterGroup, [boolean]$pushImage if ($pushImages) { $registry = (az acr list -g $clusterGroup -o json | ConvertFrom-Json).name - RunOrExit az acr login -n $registry + RunOrExitOnFailure az acr login -n $registry } } @@ -79,7 +79,7 @@ function DeployStressTests( Login $subscription $clusterGroup $pushImages } - RunOrExit helm repo add stress-test-charts https://stresstestcharts.blob.core.windows.net/helm/ + RunOrExitOnFailure helm repo add stress-test-charts https://stresstestcharts.blob.core.windows.net/helm/ Run helm repo update if ($LASTEXITCODE) { return $LASTEXITCODE } From c2dd8b27e073a37951e1f44f78ac9be76814c0bd Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Thu, 29 Jul 2021 02:24:28 -0400 Subject: [PATCH 8/8] Move stress testing scripts to eng/common --- .../common/scripts/stress-testing}/deploy-stress-tests.ps1 | 0 .../common/scripts/stress-testing}/find-all-stress-packages.ps1 | 0 eng/pipelines/stress-test-release.yml | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename {tools/stress-cluster/scripts => eng/common/scripts/stress-testing}/deploy-stress-tests.ps1 (100%) rename {tools/stress-cluster/scripts => eng/common/scripts/stress-testing}/find-all-stress-packages.ps1 (100%) diff --git a/tools/stress-cluster/scripts/deploy-stress-tests.ps1 b/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 similarity index 100% rename from tools/stress-cluster/scripts/deploy-stress-tests.ps1 rename to eng/common/scripts/stress-testing/deploy-stress-tests.ps1 diff --git a/tools/stress-cluster/scripts/find-all-stress-packages.ps1 b/eng/common/scripts/stress-testing/find-all-stress-packages.ps1 similarity index 100% rename from tools/stress-cluster/scripts/find-all-stress-packages.ps1 rename to eng/common/scripts/stress-testing/find-all-stress-packages.ps1 diff --git a/eng/pipelines/stress-test-release.yml b/eng/pipelines/stress-test-release.yml index f53b0afa14a..60507f6c337 100644 --- a/eng/pipelines/stress-test-release.yml +++ b/eng/pipelines/stress-test-release.yml @@ -78,7 +78,7 @@ jobs: inputs: azureSubscription: ${{ parameters.Subscription }} scriptType: pscore - scriptPath: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/tools/stress-cluster/scripts/deploy-stress-tests.ps1 + scriptPath: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/eng/common/scripts/stress-testing/deploy-stress-tests.ps1 arguments: -SearchDirectory '$(System.DefaultWorkingDirectory)/$(Repository)' -Filters $(Filters)