diff --git a/.gitignore b/.gitignore index d51d176e912..0ff89866fd2 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,3 @@ megalinter-reports/ /jq /portalauth .kiota.log -/clusterapp.env diff --git a/.pipelines/e2e.yml b/.pipelines/e2e.yml index 6a21ab71fb5..94328e1dd51 100644 --- a/.pipelines/e2e.yml +++ b/.pipelines/e2e.yml @@ -52,7 +52,6 @@ jobs: export CI=true . ./hack/e2e/run-rp-and-e2e.sh - get_cluster_sp deploy_e2e_db displayName: Setup (Azure) diff --git a/docs/deploy-development-rp.md b/docs/deploy-development-rp.md index e8be02f66e3..edad0dcddc2 100644 --- a/docs/deploy-development-rp.md +++ b/docs/deploy-development-rp.md @@ -119,9 +119,6 @@ 1. use the create utility: ```bash - # Create the application to run the cluster as and load it - CLUSTER= go run ./hack/cluster createapp - source clusterapp.env # Create the cluster CLUSTER= go run ./hack/cluster create ``` @@ -130,7 +127,6 @@ ```bash CLUSTER= go run ./hack/cluster delete - CLUSTER= go run ./hack/cluster deleteapp ``` By default, a public cluster will be created. In order to create a private cluster, set the `PRIVATE_CLUSTER` environment variable to `true` prior to creation. Internet access from the cluster can also be restricted by setting the `NO_INTERNET` environment variable to `true`. diff --git a/hack/cluster/cluster.go b/hack/cluster/cluster.go index fd822d32dd2..8fab0aa9a06 100644 --- a/hack/cluster/cluster.go +++ b/hack/cluster/cluster.go @@ -25,7 +25,7 @@ const ( func run(ctx context.Context, log *logrus.Entry) error { if len(os.Args) != 2 { - return fmt.Errorf("usage: CLUSTER=x %s {create,createApp,deleteApp,delete}", os.Args[0]) + return fmt.Errorf("usage: CLUSTER=x %s {create,delete}", os.Args[0]) } if err := env.ValidateVars(Cluster); err != nil { @@ -59,10 +59,6 @@ func run(ctx context.Context, log *logrus.Entry) error { switch strings.ToLower(os.Args[1]) { case "create": return c.Create(ctx, vnetResourceGroup, clusterName, osClusterVersion) - case "createapp": - return c.CreateApp(ctx, clusterName) - case "deleteapp": - return c.DeleteApp(ctx) case "delete": return c.Delete(ctx, vnetResourceGroup, clusterName) default: diff --git a/hack/e2e/run-rp-and-e2e.sh b/hack/e2e/run-rp-and-e2e.sh index aee67f90b59..5dc1878642f 100755 --- a/hack/e2e/run-rp-and-e2e.sh +++ b/hack/e2e/run-rp-and-e2e.sh @@ -211,30 +211,9 @@ delete_e2e_cluster() { ./cluster delete else go run ./hack/cluster delete - go run ./hack/cluster deleteApp fi } -get_cluster_sp() { - echo "########## Downloading SP secrets ##########" - az keyvault secret download --vault-name=$CSP_VAULT_NAME \ - --name=aro-v4-e2e-devops-spn-1-app-id \ - --file=secrets/app-id - az keyvault secret download --vault-name=$CSP_VAULT_NAME \ - --name=aro-v4-e2e-devops-spn-1-sp-id \ - --file=secrets/sp-id - az keyvault secret download --vault-name=$CSP_VAULT_NAME \ - --name=aro-v4-e2e-devops-spn-1-secret-value \ - --file=secrets/secret-value - - echo -e -n "\nexport AZURE_CLUSTER_SERVICE_PRINCIPAL_ID=" >>secrets/env - cat secrets/sp-id >>secrets/env - echo -e -n "\nexport AZURE_CLUSTER_APP_ID=" >>secrets/env - cat secrets/app-id >>secrets/env - echo -e -n "\nexport AZURE_CLUSTER_APP_SECRET=" >>secrets/env - cat secrets/secret-value >>secrets/env -} - # TODO: CLUSTER and is also recalculated in multiple places # in the billing pipelines :-( diff --git a/pkg/util/cluster/cluster.go b/pkg/util/cluster/cluster.go index 293cfa12298..9860c9d5706 100644 --- a/pkg/util/cluster/cluster.go +++ b/pkg/util/cluster/cluster.go @@ -42,6 +42,7 @@ import ( "github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/features" "github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/network" redhatopenshift20231122 "github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/redhatopenshift/2023-11-22/redhatopenshift" + "github.com/Azure/ARO-RP/pkg/util/azureerrors" utilgraph "github.com/Azure/ARO-RP/pkg/util/graph" "github.com/Azure/ARO-RP/pkg/util/rbac" "github.com/Azure/ARO-RP/pkg/util/rolesets" @@ -133,31 +134,26 @@ func New(log *logrus.Entry, environment env.Core, ci bool) (*Cluster, error) { return c, nil } -func (c *Cluster) CreateApp(ctx context.Context, clusterName string) error { - c.log.Infof("creating AAD application") +type appDetails struct { + applicationId string + applicationSecret string + SPId string +} + +func (c *Cluster) createApp(ctx context.Context, clusterName string) (applicationDetails appDetails, err error) { + c.log.Infof("Creating AAD application") appID, appSecret, err := c.createApplication(ctx, "aro-"+clusterName) if err != nil { - return err + return appDetails{}, err } - c.log.Infof("creating service principal") + c.log.Infof("Creating service principal") spID, err := c.createServicePrincipal(ctx, appID) if err != nil { - return err + return appDetails{}, err } - return os.WriteFile("clusterapp.env", []byte(fmt.Sprintf("export AZURE_CLUSTER_SERVICE_PRINCIPAL_ID=%s\nexport AZURE_CLUSTER_APP_ID=%s\nexport AZURE_CLUSTER_APP_SECRET=%s", spID, appID, appSecret)), 0o600) -} - -func (c *Cluster) DeleteApp(ctx context.Context) error { - err := env.ValidateVars( - "AZURE_CLUSTER_APP_ID", - ) - if err != nil { - return err - } - - return c.deleteApplication(ctx, os.Getenv("AZURE_CLUSTER_APP_ID")) + return appDetails{appID, appSecret, spID}, nil } func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName string, osClusterVersion string) error { @@ -170,21 +166,16 @@ func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName str return nil } - err = env.ValidateVars( - "AZURE_FP_SERVICE_PRINCIPAL_ID", - "AZURE_CLUSTER_SERVICE_PRINCIPAL_ID", - "AZURE_CLUSTER_APP_ID", - "AZURE_CLUSTER_APP_SECRET", - ) + fpSPId := os.Getenv("AZURE_FP_SERVICE_PRINCIPAL_ID") + if fpSPId == "" { + return fmt.Errorf("fp service principal id is not found") + } + + appDetails, err := c.createApp(ctx, clusterName) if err != nil { return err } - fpSPID := os.Getenv("AZURE_FP_SERVICE_PRINCIPAL_ID") - spID := os.Getenv("AZURE_CLUSTER_SERVICE_PRINCIPAL_ID") - appID := os.Getenv("AZURE_CLUSTER_APP_ID") - appSecret := os.Getenv("AZURE_CLUSTER_APP_SECRET") - visibility := api.VisibilityPublic if os.Getenv("PRIVATE_CLUSTER") != "" || os.Getenv("NO_INTERNET") != "" { @@ -240,8 +231,8 @@ func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName str parameters := map[string]*arm.ParametersParameter{ "clusterName": {Value: clusterName}, "ci": {Value: c.ci}, - "clusterServicePrincipalId": {Value: spID}, - "fpServicePrincipalId": {Value: fpSPID}, + "clusterServicePrincipalId": {Value: appDetails.SPId}, + "fpServicePrincipalId": {Value: fpSPId}, "vnetAddressPrefix": {Value: addressPrefix}, "masterAddressPrefix": {Value: masterSubnet}, "workerAddressPrefix": {Value: workerSubnet}, @@ -292,7 +283,7 @@ func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName str {"/subscriptions/" + c.env.SubscriptionID() + "/resourceGroups/" + vnetResourceGroup + "/providers/Microsoft.Network/routeTables/" + clusterName + "-rt", rbac.RoleNetworkContributor}, {diskEncryptionSetID, rbac.RoleReader}, } { - for _, principalID := range []string{spID, fpSPID} { + for _, principalID := range []string{appDetails.SPId, fpSPId} { for i := 0; i < 5; i++ { _, err = c.roleassignments.Create( ctx, @@ -331,7 +322,7 @@ func (c *Cluster) Create(ctx context.Context, vnetResourceGroup, clusterName str } c.log.Info("creating cluster") - err = c.createCluster(ctx, vnetResourceGroup, clusterName, appID, appSecret, diskEncryptionSetID, visibility, osClusterVersion) + err = c.createCluster(ctx, vnetResourceGroup, clusterName, appDetails.applicationId, appDetails.applicationSecret, diskEncryptionSetID, visibility, osClusterVersion) if err != nil { return err @@ -376,18 +367,29 @@ func (c *Cluster) generateSubnets() (vnetPrefix string, masterSubnet string, wor } func (c *Cluster) Delete(ctx context.Context, vnetResourceGroup, clusterName string) error { + c.log.Infof("Deleting cluster %s in resource group %s", clusterName, vnetResourceGroup) var errs []error - switch { - case c.ci && env.IsLocalDevelopmentMode(): // PR E2E + if c.ci { + oc, err := c.openshiftclusters.Get(ctx, vnetResourceGroup, clusterName) + clusterResourceGroup := fmt.Sprintf("aro-%s", clusterName) + if err != nil { + c.log.Errorf("CI E2E cluster %s not found in resource group %s", clusterName, vnetResourceGroup) + errs = append(errs, err) + } errs = append(errs, + c.deleteApplication(ctx, *oc.OpenShiftClusterProperties.ServicePrincipalProfile.ClientID), c.deleteCluster(ctx, vnetResourceGroup, clusterName), - c.deleteClusterResourceGroup(ctx, vnetResourceGroup), - c.deleteVnetPeerings(ctx, vnetResourceGroup), + c.ensureResourceGroupDeleted(ctx, clusterResourceGroup), + c.deleteResourceGroup(ctx, vnetResourceGroup), ) - case c.ci: // Prod E2E - errs = append(errs, c.deleteClusterResourceGroup(ctx, vnetResourceGroup)) - default: + + if env.IsLocalDevelopmentMode() { //PR E2E + errs = append(errs, + c.deleteVnetPeerings(ctx, vnetResourceGroup), + ) + } + } else { errs = append(errs, c.deleteRoleAssignments(ctx, vnetResourceGroup, clusterName), c.deleteCluster(ctx, vnetResourceGroup, clusterName), @@ -714,13 +716,28 @@ func (c *Cluster) deleteCluster(ctx context.Context, resourceGroup, clusterName return nil } -func (c *Cluster) deleteClusterResourceGroup(ctx context.Context, resourceGroup string) error { +func (c *Cluster) ensureResourceGroupDeleted(ctx context.Context, resourceGroupName string) error { + c.log.Printf("deleting resource group %s", resourceGroupName) + timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + + return wait.PollImmediateUntil(5*time.Second, func() (bool, error) { + _, err := c.groups.Get(ctx, resourceGroupName) + if azureerrors.ResourceGroupNotFound(err) { + c.log.Infof("finished deleting resource group %s", resourceGroupName) + return true, nil + } + return false, fmt.Errorf("failed to delete resource group %s with %s", resourceGroupName, err) + }, timeoutCtx.Done()) +} + +func (c *Cluster) deleteResourceGroup(ctx context.Context, resourceGroup string) error { + c.log.Printf("deleting resource group %s", resourceGroup) if _, err := c.groups.Get(ctx, resourceGroup); err != nil { c.log.Printf("error getting resource group %s, skipping deletion: %v", resourceGroup, err) return nil } - c.log.Printf("deleting resource group %s", resourceGroup) if err := c.groups.DeleteAndWait(ctx, resourceGroup); err != nil { return fmt.Errorf("error deleting resource group: %w", err) }