Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate RP from Azure AD Graph to Microsoft Graph #1970

Merged
merged 12 commits into from
Jun 14, 2023
86 changes: 42 additions & 44 deletions pkg/util/cluster/aad.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,57 +6,44 @@ package cluster
import (
"context"
"fmt"
"net/http"
"time"

azgraphrbac "github.com/Azure/azure-sdk-for-go/services/graphrbac/1.6/graphrbac"
"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/date"
msgraph_apps "github.com/microsoftgraph/msgraph-sdk-go/applications"
msgraph_models "github.com/microsoftgraph/msgraph-sdk-go/models"
msgraph_errors "github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors"
"k8s.io/apimachinery/pkg/util/wait"

"github.com/Azure/ARO-RP/pkg/util/uuid"
)

func (c *Cluster) getServicePrincipal(ctx context.Context, appID string) (string, error) {
SudoBrendan marked this conversation as resolved.
Show resolved Hide resolved
// TODO: we are listing here rather than calling
// i.applications.GetServicePrincipalsIDByAppID() due to some missing
// permission with our dev/e2e applications
sps, err := c.serviceprincipals.List(ctx, fmt.Sprintf("appId eq '%s'", appID))
func (c *Cluster) createApplication(ctx context.Context, displayName string) (string, string, error) {
SudoBrendan marked this conversation as resolved.
Show resolved Hide resolved
bennerv marked this conversation as resolved.
Show resolved Hide resolved
appBody := msgraph_models.NewApplication()
appBody.SetDisplayName(&displayName)
appResult, err := c.spGraphClient.Applications().Post(ctx, appBody, nil)
if err != nil {
return "", err
return "", "", err
}

switch len(sps) {
case 0:
return "", nil
case 1:
return *sps[0].ObjectID, nil
default:
return "", fmt.Errorf("%d service principals found for appId %s", len(sps), appID)
}
}
id := *appResult.GetId()
endDateTime := time.Now().AddDate(1, 0, 0)

func (c *Cluster) createApplication(ctx context.Context, displayName string) (string, string, error) {
password := uuid.DefaultGenerator.Generate()

app, err := c.applications.Create(ctx, azgraphrbac.ApplicationCreateParameters{
DisplayName: &displayName,
PasswordCredentials: &[]azgraphrbac.PasswordCredential{
{
EndDate: &date.Time{Time: time.Now().AddDate(1, 0, 0)},
Value: &password,
},
},
})
pwCredential := msgraph_models.NewPasswordCredential()
pwCredential.SetDisplayName(&displayName)
pwCredential.SetEndDateTime(&endDateTime)

pwCredentialRequestBody := msgraph_apps.NewItemAddPasswordPostRequestBody()
pwCredentialRequestBody.SetPasswordCredential(pwCredential)
// ByApplicationId is confusingly named, but it refers to
// the application's Object ID, not to the Application ID.
// https://learn.microsoft.com/en-us/graph/api/application-addpassword?view=graph-rest-1.0&tabs=http#http-request
pwResult, err := c.spGraphClient.Applications().ByApplicationId(id).AddPassword().Post(ctx, pwCredentialRequestBody, nil)
if err != nil {
return "", "", err
}

return *app.AppID, password, nil
return *appResult.GetAppId(), *pwResult.GetSecretText(), nil
}

func (c *Cluster) createServicePrincipal(ctx context.Context, appID string) (string, error) {
var sp azgraphrbac.ServicePrincipal
var result msgraph_models.ServicePrincipalable
bennerv marked this conversation as resolved.
Show resolved Hide resolved
var err error

timeoutCtx, cancel := context.WithTimeout(ctx, 2*time.Minute)
Expand All @@ -66,11 +53,12 @@ func (c *Cluster) createServicePrincipal(ctx context.Context, appID string) (str
// wait.PollImmediateUntil. Doing this will not propagate the latest error
// to the user in case when wait exceeds the timeout
_ = wait.PollImmediateUntil(10*time.Second, func() (bool, error) {
sp, err = c.serviceprincipals.Create(ctx, azgraphrbac.ServicePrincipalCreateParameters{
AppID: &appID,
})
if detailedErr, ok := err.(autorest.DetailedError); ok &&
detailedErr.StatusCode == http.StatusForbidden {
requestBody := msgraph_models.NewServicePrincipal()
requestBody.SetAppId(&appID)
result, err = c.spGraphClient.ServicePrincipals().Post(ctx, requestBody, nil)

if oDataError, ok := err.(msgraph_errors.ODataErrorable); ok &&
*oDataError.GetError().GetCode() == "accessDenied" {
s-amann marked this conversation as resolved.
Show resolved Hide resolved
bennerv marked this conversation as resolved.
Show resolved Hide resolved
// goal is to retry the following error:
// graphrbac.ServicePrincipalsClient#Create: Failure responding to
// request: StatusCode=403 -- Original Error: autorest/azure:
Expand All @@ -89,22 +77,32 @@ func (c *Cluster) createServicePrincipal(ctx context.Context, appID string) (str
return "", err
}

return *sp.ObjectID, nil
return *result.GetId(), nil
}

func (c *Cluster) deleteApplication(ctx context.Context, appID string) error {
mbarnes marked this conversation as resolved.
Show resolved Hide resolved
apps, err := c.applications.List(ctx, fmt.Sprintf("appId eq '%s'", appID))
filter := fmt.Sprintf("appId eq '%s'", appID)
requestConfiguration := &msgraph_apps.ApplicationsRequestBuilderGetRequestConfiguration{
QueryParameters: &msgraph_apps.ApplicationsRequestBuilderGetQueryParameters{
Filter: &filter,
Select: []string{"id"},
},
}
result, err := c.spGraphClient.Applications().Get(ctx, requestConfiguration)
if err != nil {
return err
}

apps := result.GetValue()
switch len(apps) {
case 0:
return nil
case 1:
c.log.Print("deleting AAD application")
_, err = c.applications.Delete(ctx, *apps[0].ObjectID)
return err
// ByApplicationId is confusingly named, but it refers to
// the application's Object ID, not to the Application ID.
// https://learn.microsoft.com/en-us/graph/api/application-delete?view=graph-rest-1.0&tabs=http#http-request
return c.spGraphClient.Applications().ByApplicationId(*apps[0].GetId()).Delete(ctx, nil)
default:
return fmt.Errorf("%d applications found for appId %s", len(apps), appID)
}
Expand Down
23 changes: 12 additions & 11 deletions pkg/util/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/to"
"github.com/jongio/azidext/go/azidext"
msgraph "github.com/microsoftgraph/msgraph-sdk-go"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/util/wait"

Expand All @@ -34,7 +35,6 @@ import (
"github.com/Azure/ARO-RP/pkg/deploy/generator"
"github.com/Azure/ARO-RP/pkg/env"
"github.com/Azure/ARO-RP/pkg/util/arm"
"github.com/Azure/ARO-RP/pkg/util/azureclient/graphrbac"
"github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/authorization"
"github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/features"
keyvaultclient "github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/keyvault"
Expand All @@ -43,6 +43,7 @@ import (
redhatopenshift20210901preview "github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/redhatopenshift/2021-09-01-preview/redhatopenshift"
redhatopenshift20220401 "github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/redhatopenshift/2022-04-01/redhatopenshift"
redhatopenshift20220904 "github.com/Azure/ARO-RP/pkg/util/azureclient/mgmt/redhatopenshift/2022-09-04/redhatopenshift"
utilgraph "github.com/Azure/ARO-RP/pkg/util/graph"
"github.com/Azure/ARO-RP/pkg/util/rbac"
"github.com/Azure/ARO-RP/pkg/util/uuid"
)
Expand All @@ -53,10 +54,9 @@ type Cluster struct {
ci bool
ciParentVnet string

spGraphClient *msgraph.GraphServiceClient
deployments features.DeploymentsClient
groups features.ResourceGroupsClient
applications graphrbac.ApplicationsClient
serviceprincipals graphrbac.ServicePrincipalClient
openshiftclustersv20200430 redhatopenshift20200430.OpenShiftClustersClient
openshiftclustersv20210901preview redhatopenshift20210901preview.OpenShiftClustersClient
openshiftclustersv20220401 redhatopenshift20220401.OpenShiftClustersClient
Expand Down Expand Up @@ -96,25 +96,26 @@ func New(log *logrus.Entry, environment env.Core, ci bool) (*Cluster, error) {
return nil, err
}

scopes := []string{environment.Environment().GraphEndpoint + "/.default"}
graphAuthorizer := azidext.NewTokenCredentialAdapter(tokenCredential, scopes)
spGraphClient, err := environment.Environment().NewGraphServiceClient(tokenCredential)
mbarnes marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}

scopes = []string{environment.Environment().ResourceManagerScope}
scopes := []string{environment.Environment().ResourceManagerScope}
authorizer := azidext.NewTokenCredentialAdapter(tokenCredential, scopes)

c := &Cluster{
log: log,
env: environment,
ci: ci,

spGraphClient: spGraphClient,
deployments: features.NewDeploymentsClient(environment.Environment(), environment.SubscriptionID(), authorizer),
groups: features.NewResourceGroupsClient(environment.Environment(), environment.SubscriptionID(), authorizer),
openshiftclustersv20200430: redhatopenshift20200430.NewOpenShiftClustersClient(environment.Environment(), environment.SubscriptionID(), authorizer),
openshiftclustersv20210901preview: redhatopenshift20210901preview.NewOpenShiftClustersClient(environment.Environment(), environment.SubscriptionID(), authorizer),
openshiftclustersv20220401: redhatopenshift20220401.NewOpenShiftClustersClient(environment.Environment(), environment.SubscriptionID(), authorizer),
openshiftclustersv20220904: redhatopenshift20220904.NewOpenShiftClustersClient(environment.Environment(), environment.SubscriptionID(), authorizer),
applications: graphrbac.NewApplicationsClient(environment.Environment(), environment.TenantID(), graphAuthorizer),
serviceprincipals: graphrbac.NewServicePrincipalClient(environment.Environment(), environment.TenantID(), graphAuthorizer),
securitygroups: network.NewSecurityGroupsClient(environment.Environment(), environment.SubscriptionID(), authorizer),
subnets: network.NewSubnetsClient(environment.Environment(), environment.SubscriptionID(), authorizer),
routetables: network.NewRouteTablesClient(environment.Environment(), environment.SubscriptionID(), authorizer),
Expand Down Expand Up @@ -586,15 +587,15 @@ func (c *Cluster) fixupNSGs(ctx context.Context, vnetResourceGroup, clusterName
}

func (c *Cluster) deleteRoleAssignments(ctx context.Context, vnetResourceGroup, appID string) error {
spObjID, err := c.getServicePrincipal(ctx, appID)
spObjID, err := utilgraph.GetServicePrincipalIDByAppID(ctx, c.spGraphClient, appID)
if err != nil {
return err
}
if spObjID == "" {
if spObjID == nil {
return nil
}

roleAssignments, err := c.roleassignments.ListForResourceGroup(ctx, vnetResourceGroup, fmt.Sprintf("principalId eq '%s'", spObjID))
roleAssignments, err := c.roleassignments.ListForResourceGroup(ctx, vnetResourceGroup, fmt.Sprintf("principalId eq '%s'", *spObjID))
Comment on lines +594 to +598
Copy link
Collaborator

@SudoBrendan SudoBrendan Apr 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idea: Tagging this for 404 error refactors.

Copy link
Contributor Author

@mbarnes mbarnes Apr 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the use case that led me to have GetServicePrincipalIdByAppID return nil for not found instead of an error.

deleteRoleAssignments is idempotent, so if it can't find the app ID then it's actually a success. Checking for nil here just seemed simpler than parsing an error (especially an MS Graph error), but if you still feel it should eat an error here then I can revise.

Copy link
Collaborator

@SudoBrendan SudoBrendan Apr 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll admit it is a stylistic choice, so I'll let others comment too - I'd hold off on the change and see what others think. IMO, if we have 1 case where a "not found" is expected, and all others without, we could do a specific error handling check on that 1 case to achieve the same result (if the error is a 404 error, then...). This would save us from refactoring all cases.

Copy link
Contributor Author

@mbarnes mbarnes Apr 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note, however, there's only 3 cases total. 🙂 But I'm willing to go either way based on consensus.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am in favor of not using nil as a 'happy path' value when we can't find the service principal when given an appId this should be an error (handled) but not expected.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but not expected

I wouldn't return an error, but log a message stating no role assignments found.

If the cluster installation fails or is requeued for any reason, this role assignment may not exist. And that's totally fine, we're just trying to clean up after ourselves on cluster deletion.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not consider this as an error either. There simply are not roleAssignments to delete. +1 on logging

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this discussion thread shifted slightly.

Originally we were debating whether GetServicePrincipalIDByAppID should return an error if no service principals are found. It was a stylistic choice on my part to just return a nil string pointer.

What I'm understanding from the last couple comments is a "not found" error from c.roleassignments.ListForResourceGroup should simply be logged? (If it even returns an error in that case, I'm not sure if it does.) I'll investigate and open a followup PR if necessary, if that's alright.

if err != nil {
return err
}
Expand Down