Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create sp for each e2e run #3685

Merged
merged 17 commits into from
Jul 22, 2024
Merged
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,3 @@ megalinter-reports/
/jq
/portalauth
.kiota.log
/clusterapp.env
1 change: 0 additions & 1 deletion .pipelines/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ jobs:

export CI=true
. ./hack/e2e/run-rp-and-e2e.sh
get_cluster_sp
deploy_e2e_db
displayName: Setup (Azure)

Expand Down
4 changes: 0 additions & 4 deletions docs/deploy-development-rp.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,6 @@
1. use the create utility:

```bash
# Create the application to run the cluster as and load it
CLUSTER=<cluster-name> go run ./hack/cluster createapp
tiguelu marked this conversation as resolved.
Show resolved Hide resolved
source clusterapp.env
# Create the cluster
CLUSTER=<cluster-name> go run ./hack/cluster create
```
Expand All @@ -130,7 +127,6 @@

```bash
CLUSTER=<cluster-name> go run ./hack/cluster delete
CLUSTER=<cluster-name> go run ./hack/cluster deleteapp
```

By default, a public cluster will be created. In order to create a private cluster, set the `PRIVATE_CLUSTER` environment variable to `true` prior to creation. Internet access from the cluster can also be restricted by setting the `NO_INTERNET` environment variable to `true`.
Expand Down
6 changes: 1 addition & 5 deletions hack/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const (

func run(ctx context.Context, log *logrus.Entry) error {
if len(os.Args) != 2 {
return fmt.Errorf("usage: CLUSTER=x %s {create,createApp,deleteApp,delete}", os.Args[0])
return fmt.Errorf("usage: CLUSTER=x %s {create,delete}", os.Args[0])
}

if err := env.ValidateVars(Cluster); err != nil {
Expand Down Expand Up @@ -59,10 +59,6 @@ func run(ctx context.Context, log *logrus.Entry) error {
switch strings.ToLower(os.Args[1]) {
case "create":
return c.Create(ctx, vnetResourceGroup, clusterName, osClusterVersion)
case "createapp":
return c.CreateApp(ctx, clusterName)
case "deleteapp":
return c.DeleteApp(ctx)
case "delete":
return c.Delete(ctx, vnetResourceGroup, clusterName)
default:
Expand Down
21 changes: 0 additions & 21 deletions hack/e2e/run-rp-and-e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -211,30 +211,9 @@ delete_e2e_cluster() {
./cluster delete
else
go run ./hack/cluster delete
go run ./hack/cluster deleteApp
fi
}

get_cluster_sp() {
echo "########## Downloading SP secrets ##########"
az keyvault secret download --vault-name=$CSP_VAULT_NAME \
--name=aro-v4-e2e-devops-spn-1-app-id \
--file=secrets/app-id
az keyvault secret download --vault-name=$CSP_VAULT_NAME \
--name=aro-v4-e2e-devops-spn-1-sp-id \
--file=secrets/sp-id
az keyvault secret download --vault-name=$CSP_VAULT_NAME \
--name=aro-v4-e2e-devops-spn-1-secret-value \
--file=secrets/secret-value

echo -e -n "\nexport AZURE_CLUSTER_SERVICE_PRINCIPAL_ID=" >>secrets/env
cat secrets/sp-id >>secrets/env
echo -e -n "\nexport AZURE_CLUSTER_APP_ID=" >>secrets/env
cat secrets/app-id >>secrets/env
echo -e -n "\nexport AZURE_CLUSTER_APP_SECRET=" >>secrets/env
cat secrets/secret-value >>secrets/env
}

# TODO: CLUSTER and is also recalculated in multiple places
# in the billing pipelines :-(

Expand Down
99 changes: 58 additions & 41 deletions pkg/util/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
"github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/features"
"github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/network"
redhatopenshift20231122 "github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/redhatopenshift/2023-11-22/redhatopenshift"
"github.com/Azure/ARO-RP/pkg/util/azureerrors"
utilgraph "github.com/Azure/ARO-RP/pkg/util/graph"
"github.com/Azure/ARO-RP/pkg/util/rbac"
"github.com/Azure/ARO-RP/pkg/util/rolesets"
Expand Down Expand Up @@ -133,31 +134,26 @@ func New(log *logrus.Entry, environment env.Core, ci bool) (*Cluster, error) {
return c, nil
}

func (c *Cluster) CreateApp(ctx context.Context, clusterName string) error {
c.log.Infof("creating AAD application")
type AppDetails struct {
applicationId string
applicationSecret string
SPId string
}
mociarain marked this conversation as resolved.
Show resolved Hide resolved

func (c *Cluster) createApp(ctx context.Context, clusterName string) (applicationDetails AppDetails, err error) {
c.log.Infof("Creating AAD application")
appID, appSecret, err := c.createApplication(ctx, "aro-"+clusterName)
if err != nil {
return err
return AppDetails{}, err
}

c.log.Infof("creating service principal")
c.log.Infof("Creating service principal")
spID, err := c.createServicePrincipal(ctx, appID)
if err != nil {
return err
return AppDetails{}, err
}

return os.WriteFile("clusterapp.env", []byte(fmt.Sprintf("export AZURE_CLUSTER_SERVICE_PRINCIPAL_ID=%s\nexport AZURE_CLUSTER_APP_ID=%s\nexport AZURE_CLUSTER_APP_SECRET=%s", spID, appID, appSecret)), 0o600)
}

func (c *Cluster) DeleteApp(ctx context.Context) error {
err := env.ValidateVars(
"AZURE_CLUSTER_APP_ID",
)
if err != nil {
return err
}

return c.deleteApplication(ctx, os.Getenv("AZURE_CLUSTER_APP_ID"))
return AppDetails{appID, appSecret, spID}, nil
}

func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName string, osClusterVersion string) error {
Expand All @@ -170,21 +166,16 @@ func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName str
return nil
}

err = env.ValidateVars(
"AZURE_FP_SERVICE_PRINCIPAL_ID",
"AZURE_CLUSTER_SERVICE_PRINCIPAL_ID",
"AZURE_CLUSTER_APP_ID",
"AZURE_CLUSTER_APP_SECRET",
)
fpSPId := os.Getenv("AZURE_FP_SERVICE_PRINCIPAL_ID")
if fpSPId == "" {
return fmt.Errorf("fp service principal id is not found")
}

appDetails, err := c.createApp(ctx, clusterName)
if err != nil {
return err
}

fpSPID := os.Getenv("AZURE_FP_SERVICE_PRINCIPAL_ID")
spID := os.Getenv("AZURE_CLUSTER_SERVICE_PRINCIPAL_ID")
appID := os.Getenv("AZURE_CLUSTER_APP_ID")
appSecret := os.Getenv("AZURE_CLUSTER_APP_SECRET")

visibility := api.VisibilityPublic

if os.Getenv("PRIVATE_CLUSTER") != "" || os.Getenv("NO_INTERNET") != "" {
Expand Down Expand Up @@ -240,8 +231,8 @@ func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName str
parameters := map[string]*arm.ParametersParameter{
"clusterName": {Value: clusterName},
"ci": {Value: c.ci},
"clusterServicePrincipalId": {Value: spID},
"fpServicePrincipalId": {Value: fpSPID},
"clusterServicePrincipalId": {Value: appDetails.SPId},
"fpServicePrincipalId": {Value: fpSPId},
"vnetAddressPrefix": {Value: addressPrefix},
"masterAddressPrefix": {Value: masterSubnet},
"workerAddressPrefix": {Value: workerSubnet},
Expand Down Expand Up @@ -292,7 +283,7 @@ func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName str
{"/subscriptions/" + c.env.SubscriptionID() + "/resourceGroups/" + vnetResourceGroup + "/providers/Microsoft.Network/routeTables/" + clusterName + "-rt", rbac.RoleNetworkContributor},
{diskEncryptionSetID, rbac.RoleReader},
} {
for _, principalID := range []string{spID, fpSPID} {
for _, principalID := range []string{appDetails.SPId, fpSPId} {
for i := 0; i < 5; i++ {
_, err = c.roleassignments.Create(
ctx,
Expand Down Expand Up @@ -331,7 +322,7 @@ func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName str
}

c.log.Info("creating cluster")
err = c.createCluster(ctx, vnetResourceGroup, clusterName, appID, appSecret, diskEncryptionSetID, visibility, osClusterVersion)
err = c.createCluster(ctx, vnetResourceGroup, clusterName, appDetails.applicationId, appDetails.applicationSecret, diskEncryptionSetID, visibility, osClusterVersion)

if err != nil {
return err
Expand Down Expand Up @@ -376,18 +367,29 @@ func (c *Cluster) generateSubnets() (vnetPrefix string, masterSubnet string, wor
}

func (c *Cluster) Delete(ctx context.Context, vnetResourceGroup, clusterName string) error {
c.log.Infof("Deleting cluster %s in resource group %s", clusterName, vnetResourceGroup)
var errs []error

switch {
case c.ci && env.IsLocalDevelopmentMode(): // PR E2E
if c.ci {
oc, err := c.openshiftclusters.Get(ctx, vnetResourceGroup, clusterName)
clusterResourceGroup := fmt.Sprintf("aro-%s", clusterName)
if err != nil {
c.log.Errorf("CI E2E cluster %s not found in resource group %s", clusterName, vnetResourceGroup)
errs = append(errs, err)
}
errs = append(errs,
c.deleteApplication(ctx, *oc.OpenShiftClusterProperties.ServicePrincipalProfile.ClientID),
c.deleteCluster(ctx, vnetResourceGroup, clusterName),
c.deleteClusterResourceGroup(ctx, vnetResourceGroup),
c.deleteVnetPeerings(ctx, vnetResourceGroup),
c.ensureResourceGroupDeleted(ctx, clusterResourceGroup),
c.deleteResourceGroup(ctx, vnetResourceGroup),
)
case c.ci: // Prod E2E
errs = append(errs, c.deleteClusterResourceGroup(ctx, vnetResourceGroup))
default:

if env.IsLocalDevelopmentMode() { //PR E2E
errs = append(errs,
c.deleteVnetPeerings(ctx, vnetResourceGroup),
)
}
} else {
errs = append(errs,
c.deleteRoleAssignments(ctx, vnetResourceGroup, clusterName),
c.deleteCluster(ctx, vnetResourceGroup, clusterName),
Expand Down Expand Up @@ -714,13 +716,28 @@ func (c *Cluster) deleteCluster(ctx context.Context, resourceGroup, clusterName
return nil
}

func (c *Cluster) deleteClusterResourceGroup(ctx context.Context, resourceGroup string) error {
func (c *Cluster) ensureResourceGroupDeleted(ctx context.Context, resourceGroupName string) error {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was added due to some racey deletions and some stuff not getting cleaned up correctly in the new tenant. AFAIK in the past we were letting the VnetRG get cleaned up via the purge:true flag (https://github.com/Azure/ARO-RP/pull/3513/files @tsatam can you confirm this). This just makes all the deletion a bit more explicit but we still have the purge script as a fall back

c.log.Printf("deleting resource group %s", resourceGroupName)
timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute)
defer cancel()

return wait.PollImmediateUntil(5*time.Second, func() (bool, error) {
_, err := c.groups.Get(ctx, resourceGroupName)
if azureerrors.ResourceGroupNotFound(err) {
c.log.Infof("finished deleting resource group %s", resourceGroupName)
return true, nil
}
mociarain marked this conversation as resolved.
Show resolved Hide resolved
return false, fmt.Errorf("failed to delete resource group %s", resourceGroupName)
}, timeoutCtx.Done())
}

func (c *Cluster) deleteResourceGroup(ctx context.Context, resourceGroup string) error {
c.log.Printf("deleting resource group %s", resourceGroup)
if _, err := c.groups.Get(ctx, resourceGroup); err != nil {
c.log.Printf("error getting resource group %s, skipping deletion: %v", resourceGroup, err)
return nil
}

c.log.Printf("deleting resource group %s", resourceGroup)
if err := c.groups.DeleteAndWait(ctx, resourceGroup); err != nil {
return fmt.Errorf("error deleting resource group: %w", err)
}
Expand Down
Loading