Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial scripts and pipeline for stress test discovery/build/push #1851

Merged
8 commits merged into from
Jul 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 174 additions & 0 deletions eng/common/scripts/stress-testing/deploy-stress-tests.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
[CmdletBinding(DefaultParameterSetName = 'Default')]
param(
[string]$SearchDirectory,
[hashtable]$Filters,
[string]$Environment,
[string]$Repository,
[switch]$PushImages,
[string]$ClusterGroup,
[string]$DeployId,

[Parameter(ParameterSetName = 'DoLogin', Mandatory = $true)]
[switch]$Login,

[Parameter(ParameterSetName = 'DoLogin')]
[string]$Subscription
)

$ErrorActionPreference = 'Stop'

. $PSScriptRoot/find-all-stress-packages.ps1
$FailedCommands = New-Object Collections.Generic.List[hashtable]

if (!(Get-Module powershell-yaml)) {
Install-Module -Name powershell-yaml -RequiredVersion 0.4.1 -Force -Scope CurrentUser
}

# Powershell does not (at time of writing) treat exit codes from external binaries
# as cause for stopping execution, so do this via a wrapper function.
# See https://github.com/PowerShell/PowerShell-RFC/pull/277
function Run() {
Write-Host "`n==> $args`n" -ForegroundColor Green
$command, $arguments = $args
& $command $arguments
if ($LASTEXITCODE) {
Write-Error "Command '$args' failed with code: $LASTEXITCODE" -ErrorAction 'Continue'
$FailedCommands.Add(@{ command = "$args"; code = $LASTEXITCODE })
}
}

function RunOrExitOnFailure() {
run @args
if ($LASTEXITCODE) {
exit $LASTEXITCODE
}
}

function Login([string]$subscription, [string]$clusterGroup, [boolean]$pushImages) {
Write-Host "Logging in to subscription, cluster and container registry"
az account show *> $null
if ($LASTEXITCODE) {
RunOrExitOnFailure az login --allow-no-subscriptions
}

$clusterName = (az aks list -g $clusterGroup -o json| ConvertFrom-Json).name

RunOrExitOnFailure az aks get-credentials `
-n "$clusterName" `
-g "$clusterGroup" `
--subscription "$subscription" `
--overwrite-existing

if ($pushImages) {
$registry = (az acr list -g $clusterGroup -o json | ConvertFrom-Json).name
RunOrExitOnFailure az acr login -n $registry
}
}

function DeployStressTests(
[string]$searchDirectory = '.',
[hashtable]$filters = @{},
[string]$environment = 'test',
[string]$repository = 'images',
[boolean]$pushImages = $false,
[string]$clusterGroup = 'rg-stress-test-cluster-',
[string]$deployId = 'local',
[string]$subscription = 'Azure SDK Test Resources'
) {
if ($PSCmdlet.ParameterSetName -eq 'DoLogin') {
Login $subscription $clusterGroup $pushImages
}

RunOrExitOnFailure helm repo add stress-test-charts https://stresstestcharts.blob.core.windows.net/helm/
Run helm repo update
if ($LASTEXITCODE) { return $LASTEXITCODE }

$pkgs = FindStressPackages $searchDirectory $filters
Write-Host "" "Found $($pkgs.Length) stress test packages:"
Write-Host $pkgs.Directory ""
foreach ($pkg in $pkgs) {
Write-Host "Deploying stress test at '$($pkg.Directory)'"
DeployStressPackage $pkg $deployId $environment $repository $pushImages
}

Write-Host "Releases deployed by $deployId"
Run helm list --all-namespaces -l deployId=$deployId

if ($FailedCommands) {
Write-Warning "The following commands failed:"
foreach ($cmd in $FailedCommands) {
Write-Error "'$($cmd.command)' failed with code $($cmd.code)" -ErrorAction 'Continue'
}
exit 1
}
}

function DeployStressPackage(
[object]$pkg,
[string]$deployId,
[string]$environment,
[string]$repository,
[boolean]$pushImages
) {
$registry = (az acr list -g $clusterGroup -o json | ConvertFrom-Json).name
if (!$registry) {
Write-Host "Could not find container registry in resource group $clusterGroup"
exit 1
}

if ($pushImages) {
Run helm dependency update $pkg.Directory
if ($LASTEXITCODE) { return $LASTEXITCODE }

$dockerFiles = Get-ChildItem "$($pkg.Directory)/Dockerfile*"
foreach ($dockerFile in $dockerFiles) {
# Infer docker image name from parent directory name, if file is named `Dockerfile`
# or from suffix, is file is named like `Dockerfile.myimage` (for multiple dockerfiles).
$prefix, $imageName = $dockerFile.Name.Split(".")
if (!$imageName) {
$imageName = $dockerFile.Directory.Name
}
$imageTag = "${registry}.azurecr.io/$($repository.ToLower())/$($imageName):$deployId"
Write-Host "Building and pushing stress test docker image '$imageTag'"
Run docker build -t $imageTag -f $dockerFile.FullName $dockerFile.DirectoryName
if ($LASTEXITCODE) { return $LASTEXITCODE }
Run docker push $imageTag
if ($LASTEXITCODE) {
if ($PSCmdlet.ParameterSetName -ne 'DoLogin') {
Write-Warning "If docker push is failing due to authentication issues, try calling this script with '-Login'"
}
return $LASTEXITCODE
}
}
}

Write-Host "Creating namespace $($pkg.Namespace) if it does not exist..."
kubectl create namespace $pkg.Namespace --dry-run=client -o yaml | kubectl apply -f -

Write-Host "Installing or upgrading stress test $($pkg.ReleaseName) from $($pkg.Directory)"
Run helm upgrade $pkg.ReleaseName $pkg.Directory `
-n $pkg.Namespace `
--install `
--set repository=$registry.azurecr.io/$repository `
--set tag=$deployId `
--set stress-test-addons.env=$environment
if ($LASTEXITCODE) {
# Issues like 'UPGRADE FAILED: another operation (install/upgrade/rollback) is in progress'
# can be the result of cancelled `upgrade` operations (e.g. ctrl-c).
# See https://github.com/helm/helm/issues/4558
Write-Warning "The issue may be fixable by first running 'helm rollback -n $($pkg.Namespace) $($pkg.ReleaseName)'"
return $LASTEXITCODE
}

# Helm 3 stores release information in kubernetes secrets. The only way to add extra labels around
# specific releases (thereby enabling filtering on `helm list`) is to label the underlying secret resources.
# There is not currently support for setting these labels via the helm cli.
$helmReleaseConfig = kubectl get secrets `
-n $pkg.Namespace `
-l status=deployed,name=$($pkg.ReleaseName) `
-o jsonpath='{.items[0].metadata.name}'

Run kubectl label secret -n $pkg.Namespace --overwrite $helmReleaseConfig deployId=$deployId
}

DeployStressTests @PSBoundParameters
53 changes: 53 additions & 0 deletions eng/common/scripts/stress-testing/find-all-stress-packages.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
param(
[string]$searchDirectory = '.',
[hashtable]$filters = @{}
)

class StressTestPackageInfo {
[string]$Namespace
[string]$Directory
[string]$ReleaseName
}

function FindStressPackages([string]$directory, [hashtable]$filters = @{}) {
# Bare minimum filter for stress tests
$filters['stressTest'] = 'true'

$packages = @()
$chartFiles = Get-ChildItem -Recurse -Filter 'Chart.yaml' $directory
foreach ($chartFile in $chartFiles) {
$chart = ParseChart $chartFile
if (matchesAnnotations $chart $filters) {
$packages += NewStressTestPackageInfo $chart $chartFile
}
}

return $packages
}

function ParseChart([string]$chartFile) {
return ConvertFrom-Yaml (Get-Content -Raw $chartFile)
}

function MatchesAnnotations([hashtable]$chart, [hashtable]$filters) {
foreach ($filter in $filters.GetEnumerator()) {
if (!$chart.annotations -or $chart.annotations[$filter.Key] -ne $filter.Value) {
return $false
}
}

return $true
}

function NewStressTestPackageInfo([hashtable]$chart, [System.IO.FileInfo]$chartFile) {
return [StressTestPackageInfo]@{
Namespace = $chart.annotations.namespace
Directory = $chartFile.DirectoryName
ReleaseName = $chart.name
}
}

# Don't call functions when the script is being dot sourced
if ($MyInvocation.InvocationName -ne ".") {
FindStressPackages $searchDirectory $filters
}
91 changes: 91 additions & 0 deletions eng/pipelines/stress-test-release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
pr: none

trigger: none

parameters:
- name: Subscription
type: string
default: 'Azure SDK Test Resources'
- name: Environment
type: string
default: prod
- name: ClusterGroup
type: string
default: rg-stress-test-cluster-prod
- name: TestRepository
displayName: Stress Test Repository
type: string
default: all
values:
- all
- examples
- javascript
- java
- net
- python
- go

jobs:
- job:
strategy:
matrix:
${{ if or(eq(parameters.TestRepository, 'examples'), eq(parameters.TestRepository, 'all')) }}:
examples:
Repository: Azure/azure-sdk-tools
Filters: '@{ "example" = "true" }'
${{ if or(eq(parameters.TestRepository, 'javascript'), eq(parameters.TestRepository, 'all')) }}:
javascript:
Repository: Azure/azure-sdk-for-js
Filters: '@{}'
${{ if or(eq(parameters.TestRepository, 'java'), eq(parameters.TestRepository, 'all')) }}:
java:
Repository: Azure/azure-sdk-for-java
Filters: '@{}'
${{ if or(eq(parameters.TestRepository, 'net'), eq(parameters.TestRepository, 'all')) }}:
net:
Repository: Azure/azure-sdk-for-net
Filters: '@{}'
${{ if or(eq(parameters.TestRepository, 'python'), eq(parameters.TestRepository, 'all')) }}:
python:
Repository: Azure/azure-sdk-for-python
Filters: '@{}'
${{ if or(eq(parameters.TestRepository, 'go'), eq(parameters.TestRepository, 'all')) }}:
go:
Repository: Azure/azure-sdk-for-go
Filters: '@{}'
pool:
vmImage: 'ubuntu-20.04'
#name: 'azsdk-pool-mms-ubuntu-2004-general'
#vmImage: 'MMSUbuntu20.04'
steps:
- template: /eng/common/pipelines/templates/steps/sparse-checkout.yml
parameters:
Repositories:
- Name: Azure/azure-sdk-tools
Commitish: $(Build.SourceVersion)
WorkingDirectory: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools
- Name: $(Repository)
Commitish: $(Build.SourceVersion)
WorkingDirectory: $(System.DefaultWorkingDirectory)/$(Repository)
Paths:
- '/tools'
- '!sdk/**/test-recordings'
- '!sdk/**/session-records'
- '!sdk/**/SessionRecords'

- task: AzureCLI@2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What value are we gaining by using the AzureCLI task here? Is the service connection information enough for us to do all the deployments or don't we need to stull pass in most of the secrets? Also what about other clouds do they need other configurations or is that not a scenario at this point.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hadn't gotten to that point yet but my thinking was to add an access policy to the buildout bicep configs to allowlist one of the devops service connections (that way there are no manual steps on buildout). I may still create a new identity just for this.

I think it's unlikely that we'll target other clouds, as our aim here is to stress the client code, rather than to test its operability across clouds. Though if we do have a need for that, I don't think we'll ever need to host the stress cluster itself in another cloud, so any relevant credentials are not configured in this context (they are auto-synced into the cluster via keyvault).

displayName: Build and Deploy Stress Tests
inputs:
azureSubscription: ${{ parameters.Subscription }}
scriptType: pscore
scriptPath: $(System.DefaultWorkingDirectory)/Azure/azure-sdk-tools/eng/common/scripts/stress-testing/deploy-stress-tests.ps1
arguments:
-SearchDirectory '$(System.DefaultWorkingDirectory)/$(Repository)'
-Filters $(Filters)
-Environment '${{ parameters.Environment }}'
-Repository '$(Agent.JobName)'
-PushImages
-ClusterGroup '${{ parameters.ClusterGroup }}'
-Login
-Subscription '${{ parameters.Subscription }}'
-DeployId '$(Build.BuildNumber)'
11 changes: 8 additions & 3 deletions tools/stress-cluster/chaos/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,17 @@ See the [Job Manifest section](#job-manifest) for an example spec containing con

### Helm Chart Dependencies

The `<chart root>/chart/Chart.yaml` file should look something like below. It must include the `stress-test-addons` dependency:
The `<chart root>/chart/Chart.yaml` file should look something like below. It must include the `stress-test-addons` dependency and the included annotations:

```
apiVersion: v2
name: <stress test name>
description: <description>
version: 0.1.0
appVersion: v0.1
annotations:
stressTest: 'true'
namespace: <your stress test namespace>

dependencies:
- name: stress-test-addons
Expand Down Expand Up @@ -341,14 +344,16 @@ Then install the stress test into the cluster:
```
kubectl create namespace <your stress test namespace>
kubectl label namespace <namespace> owners=<owner alias>
helm install <stress test name> .
helm install -n <your stress test namespace> <stress test name> .
```

To install into a different cluster (test, prod, or dev):

```
az aks get-credentials --subscription '<cluster subscription>' -g rg-stress-test-cluster-<cluster suffix> -n stress-test
helm install <stress test name> . --set stress-test-addons.env=<cluster suffix>
kubectl create namespace <your stress test namespace>
kubectl label namespace <namespace> owners=<owner alias>
helm install -n <your stress test namespace> <stress test name> . --set stress-test-addons.env=<cluster suffix>
```

You can check the progress/status of your installation via:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ dependencies:
repository: https://stresstestcharts.blob.core.windows.net/helm/
version: 0.1.2
digest: sha256:b38f530a7f691eb3f11d48809ba7f86ea9d7b226c3ecb311d1ae47fbb0585466
generated: "2021-07-06T18:41:20.4293087-04:00"
generated: "2021-07-28T22:23:31.0555163-04:00"
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
apiVersion: v2
name: network_example
name: network-example
description: An example stress test chart with network chaos
version: 0.1.0
version: 0.1.1
appVersion: v0.1
annotations:
stressTest: 'true'
namespace: 'examples'
example: 'true'
test: 'true'

dependencies:
- name: stress-test-addons
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM alpine:3.14
RUN apk add --no-cache wget
RUN apk add --no-cache bash

ADD ./poll.sh /poll.sh
RUN chmod +x /poll.sh

CMD bash /poll.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
spec:
containers:
- name: network-example
image: stresstestregistry.azurecr.io/example/networkexample:v1
command: ["bash", "poll.sh"]
image: {{ default "" .Values.repository }}/network-stress-example:{{ default "v1" .Values.tag }}
{{- include "stress-test-addons.container-env" . | nindent 6 }}
{{- end -}}
Loading