Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPLAT-1742: vSphere - enable host group based zonal #8873

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ require (
github.com/ulikunitz/xz v0.5.12
github.com/vincent-petithory/dataurl v1.0.0
github.com/vmware/govmomi v0.43.0
go.uber.org/mock v0.4.0
golang.org/x/crypto v0.27.0
golang.org/x/oauth2 v0.21.0
golang.org/x/sync v0.8.0
Expand All @@ -112,7 +113,7 @@ require (
gopkg.in/ini.v1 v1.67.0
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.31.1
k8s.io/apiextensions-apiserver v0.31.0
k8s.io/apiextensions-apiserver v0.31.1
k8s.io/apimachinery v0.31.1
k8s.io/client-go v0.31.1
k8s.io/cloud-provider-vsphere v1.31.0
Expand Down Expand Up @@ -322,3 +323,9 @@ replace github.com/openshift/assisted-service/models => github.com/openshift/ass
replace github.com/containerd/containerd => github.com/containerd/containerd v1.6.26

replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v0.0.0-20240404200847-de75746a9505

replace github.com/openshift/client-go => github.com/jcpowermac/client-go v0.0.0-20241002131714-3be5d60ab621

replace github.com/openshift/api => github.com/jcpowermac/api v0.0.0-20241002131240-8cb746e646cb

replace github.com/openshift/library-go => github.com/jcpowermac/library-go v0.0.0-20241002131748-31bed7d1b859
18 changes: 10 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,12 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
github.com/jcpowermac/api v0.0.0-20241002131240-8cb746e646cb h1:sZT464Rz89bKA6vhpumHnRMh915T8YREkluj/GnF3d8=
github.com/jcpowermac/api v0.0.0-20241002131240-8cb746e646cb/go.mod h1:Shkl4HanLwDiiBzakv+con/aMGnVE2MAGvoKp5oyYUo=
github.com/jcpowermac/client-go v0.0.0-20241002131714-3be5d60ab621 h1:G/V95WypY97wkUHG5gjf1RlAFJKCTR5ZQESWCgNeN28=
github.com/jcpowermac/client-go v0.0.0-20241002131714-3be5d60ab621/go.mod h1:SmOKloVJ2R4yDlrLPeacBJS/nIko52VM+Wp5cY7JeKY=
github.com/jcpowermac/library-go v0.0.0-20241002131748-31bed7d1b859 h1:JWoRvAgY5Dm2Uom3OonIf0SQBRci34XMCfVzaj9dKF4=
github.com/jcpowermac/library-go v0.0.0-20241002131748-31bed7d1b859/go.mod h1:OI7tJ8G8VTh5h93w/OmWSUMIvbd2dp0O2VPLopKajZg=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/jinzhu/now v1.1.4 h1:tHnRBy1i5F2Dh8BAFxqFzxKqqvezXrL2OW1TnX+Mlas=
Expand Down Expand Up @@ -764,8 +770,6 @@ github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQ
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/openshift/api v0.0.0-20241001152557-e415140e5d5f h1:ya1OmyZm3LIIxI3U9VE9Nyx3ehCHgBwxyFUPflYPWls=
github.com/openshift/api v0.0.0-20241001152557-e415140e5d5f/go.mod h1:Shkl4HanLwDiiBzakv+con/aMGnVE2MAGvoKp5oyYUo=
github.com/openshift/assisted-image-service v0.0.0-20240607085136-02df2e56dde6 h1:U6ve+dnHlHhAELoxX+rdFOHVhoaYl0l9qtxwYtsO6C0=
github.com/openshift/assisted-image-service v0.0.0-20240607085136-02df2e56dde6/go.mod h1:o2H5VwQhUD8P6XsK6dRmKpCCJqVvv12KJQZBXmcCXCU=
github.com/openshift/assisted-service/api v0.0.0-20230831114549-1922eda29cf8 h1:+fZLKbycDo4JeLwPGVSAgf2XPaJGLM341l9ZfrrlxG0=
Expand All @@ -778,8 +782,6 @@ github.com/openshift/baremetal-operator/apis v0.0.0-20231128154154-6736c9b9c6c8
github.com/openshift/baremetal-operator/apis v0.0.0-20231128154154-6736c9b9c6c8/go.mod h1:CvKrrnAcvvtrZIc9y9WaqWmJhK0AJ9sWnh+VP4d7jcM=
github.com/openshift/baremetal-operator/pkg/hardwareutils v0.0.0-20231128154154-6736c9b9c6c8 h1:38vY9w7dXqB7tI9g1GCUnpahNDyBbp9Yylq+BQ154YE=
github.com/openshift/baremetal-operator/pkg/hardwareutils v0.0.0-20231128154154-6736c9b9c6c8/go.mod h1:399nvdaqoU9rTI25UdFw2EWcVjmJPpeZPIhfDAIx/XU=
github.com/openshift/client-go v0.0.0-20241001162912-da6d55e4611f h1:FRc0bVNWprihWS0GqQWzb3dY4dkCwpOP3mDw5NwSoR4=
github.com/openshift/client-go v0.0.0-20241001162912-da6d55e4611f/go.mod h1:KiZi2mJRH1TOJ3FtBDYS6YvUL30s/iIXaGSUrSa36mo=
github.com/openshift/cloud-credential-operator v0.0.0-20240404165937-5e8812d64187 h1:v2D/+SWsOPsl4Syz1SVjo7m3L0ethuRGR++ubsb89oA=
github.com/openshift/cloud-credential-operator v0.0.0-20240404165937-5e8812d64187/go.mod h1:eyA6FG71366St6Q1TW+jXdQbald0rUwtEPhAREMlyhA=
github.com/openshift/cloud-provider-vsphere v1.19.1-0.20240626105621-6464d0bb4928 h1:gX0HAKR0f40xmMWlUSn8DBMCjip8Iuzg5XToWAv6Uzw=
Expand All @@ -798,8 +800,6 @@ github.com/openshift/custom-resource-status v1.1.2 h1:C3DL44LEbvlbItfd8mT5jWrqPf
github.com/openshift/custom-resource-status v1.1.2/go.mod h1:DB/Mf2oTeiAmVVX1gN+NEqweonAPY0TKUwADizj8+ZA=
github.com/openshift/hive/apis v0.0.0-20220222213051-def9088fdb5a h1:E+XPJs/aVvYsrlJzo2ED38ZTR2RTNUlFMmOaFAAdMZg=
github.com/openshift/hive/apis v0.0.0-20220222213051-def9088fdb5a/go.mod h1:E1bgquRiwfugdArdecPbpYIrAdve5kTzMaJb0+8jMXI=
github.com/openshift/library-go v0.0.0-20240919205913-c96b82b3762b h1:y2DduJug7UZqTu0QTkRPAu73nskuUbFA66fmgxVf/fI=
github.com/openshift/library-go v0.0.0-20240919205913-c96b82b3762b/go.mod h1:f8QcnrooSwGa96xI4UaKbKGJZskhTCGeimXKyc4t/ZU=
github.com/openshift/machine-api-operator v0.2.1-0.20240930121047-57b7917e6140 h1:VKn644y4Ra94L8YZ3JT8ZuOZMEspZt8yoIjHeaRM3pA=
github.com/openshift/machine-api-operator v0.2.1-0.20240930121047-57b7917e6140/go.mod h1:JEUa45hXYbC6tDpLjJCRuLXIKs7pdcawDkodqfZuQEE=
github.com/openshift/machine-api-provider-gcp v0.0.1-0.20231014045125-6096cc86f3ba h1:q9VMvYHgKq1v+3E57HIdbR9hJPNSmHDfZpOHsXC27Nk=
Expand Down Expand Up @@ -986,6 +986,8 @@ go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpK
go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU=
go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
Expand Down Expand Up @@ -1494,8 +1496,8 @@ k8s.io/api v0.23.3/go.mod h1:w258XdGyvCmnBj/vGzQMj6kzdufJZVUwEM1U2fRJwSQ=
k8s.io/api v0.24.2/go.mod h1:AHqbSkTm6YrQ0ObxjO3Pmp/ubFF/KuM7jU+3khoBsOg=
k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU=
k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI=
k8s.io/apiextensions-apiserver v0.31.0 h1:fZgCVhGwsclj3qCw1buVXCV6khjRzKC5eCFt24kyLSk=
k8s.io/apiextensions-apiserver v0.31.0/go.mod h1:b9aMDEYaEe5sdK+1T0KU78ApR/5ZVp4i56VacZYEHxk=
k8s.io/apiextensions-apiserver v0.31.1 h1:L+hwULvXx+nvTYX/MKM3kKMZyei+UiSXQWciX/N6E40=
k8s.io/apiextensions-apiserver v0.31.1/go.mod h1:tWMPR3sgW+jsl2xm9v7lAyRF1rYEK71i9G5dRtkknoQ=
k8s.io/apimachinery v0.23.3/go.mod h1:BEuFMMBaIbcOqVIJqNZJXGFTP4W6AycEpb5+m/97hrM=
k8s.io/apimachinery v0.24.2/go.mod h1:82Bi4sCzVBdpYjyI4jY6aHX+YCUchUIrZrXKedjd2UM=
k8s.io/apimachinery v0.24.3/go.mod h1:82Bi4sCzVBdpYjyI4jY6aHX+YCUchUIrZrXKedjd2UM=
Expand Down
113 changes: 113 additions & 0 deletions pkg/asset/installconfig/vsphere/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vmware/govmomi/find"
"github.com/vmware/govmomi/object"
vapitags "github.com/vmware/govmomi/vapi/tags"
"github.com/vmware/govmomi/vim25"
"github.com/vmware/govmomi/vim25/mo"
Expand Down Expand Up @@ -162,8 +163,14 @@ func validateFailureDomain(validationCtx *validationContext, failureDomain *vsph
if err != nil {
allErrs = append(allErrs, field.InternalError(vsphereField, err))
}

validationCtx.regionTagCategoryID = regionTagCategoryID
validationCtx.zoneTagCategoryID = zoneTagCategoryID
if failureDomain.ZoneType == vsphere.HostGroupFailureDomain {
if err = validateHostTagAttachments(validationCtx, failureDomain.Topology.ComputeCluster, failureDomain.Zone); err != nil {
allErrs = append(allErrs, field.InternalError(vsphereField, err))
}
}
}

allErrs = append(allErrs, resourcePoolExists(validationCtx, resourcePool, topologyField.Child("resourcePool"))...)
Expand All @@ -173,6 +180,10 @@ func validateFailureDomain(validationCtx *validationContext, failureDomain *vsph
checkDatacenterPrivileges = false
}

if failureDomain.ZoneType == vsphere.HostGroupFailureDomain {
allErrs = append(allErrs, validateHostGroups(validationCtx, failureDomain.Topology.ComputeCluster, failureDomain.Topology.HostGroup, topologyField.Child("hostGroup"))...)
}

allErrs = append(allErrs, validateESXiVersion(validationCtx, failureDomain.Topology.ComputeCluster, vsphereField, topologyField.Child("computeCluster"))...)
allErrs = append(allErrs, validateVcenterPrivileges(validationCtx, topologyField.Child("server"))...)
allErrs = append(allErrs, computeClusterExists(validationCtx, failureDomain.Topology.ComputeCluster, topologyField.Child("computeCluster"), checkComputeClusterPrivileges, checkTags)...)
Expand All @@ -190,6 +201,32 @@ func validateFailureDomain(validationCtx *validationContext, failureDomain *vsph
return allErrs
}

func validateHostGroups(validationCtx *validationContext, cluster, hostGroup string, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
ctx, cancel := context.WithTimeout(context.TODO(), 60*time.Second)
defer cancel()

ccr, err := validationCtx.Finder.ClusterComputeResource(ctx, cluster)
if err != nil {
allErrs = append(allErrs, field.InternalError(fldPath, err))
}

configInfoEx, err := ccr.Configuration(ctx)
if err != nil {
allErrs = append(allErrs, field.InternalError(fldPath, err))
}

for _, g := range configInfoEx.Group {
if hg, ok := g.(*vim25types.ClusterHostGroup); ok {
if hg.Name == hostGroup {
return nil
}
}
}

return allErrs
}

func validateVCenterVersion(validationCtx *validationContext, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}

Expand Down Expand Up @@ -355,10 +392,13 @@ func computeClusterExists(validationCtx *validationContext, computeCluster strin
}

if checkTagAttachment {
/* TODO: jcallen: fix me...
err = validateTagAttachment(validationCtx, computeClusterMo.Reference())
if err != nil {
return field.ErrorList{field.InternalError(fldPath, err)}
}

*/
}

return field.ErrorList{}
Expand Down Expand Up @@ -567,7 +607,80 @@ func validateTagCategories(validationCtx *validationContext) (string, string, er
return regionTagCategoryID, zoneTagCategoryID, nil
}

func validateHostTagAttachments(validationCtx *validationContext, cluster, zoneName string) error {
if validationCtx.TagManager == nil {
// todo: jcallen: should this really return nil?
return nil
}
ctx, cancel := context.WithTimeout(context.TODO(), 60*time.Second)
defer cancel()

ccr, err := validationCtx.Finder.ClusterComputeResource(ctx, cluster)
if err != nil {
return err
}

hosts, err := ccr.Hosts(ctx)
if err != nil {
return err
}

references := make([]mo.Reference, 0, len(hosts))

for _, h := range hosts {
references = append(references, h.Reference())
}

attachedTags, err := validationCtx.TagManager.GetAttachedTagsOnObjects(ctx, references)
if err != nil {
return err
}

// todo: jcallen: this isn't enough
// todo: the zonal tag attached to the host _must_ be the
// todo: the one defined in the capv failure domain

for _, ta := range attachedTags {
if findHostReference(hosts, ta.ObjectID) {
found := false
for _, ta := range ta.Tags {
if ta.CategoryID == validationCtx.zoneTagCategoryID {
if ta.Name == zoneName {
found = true
}
}
}
if !found {
return errors.Errorf("host %s does not have a zone tag attachment", ta.ObjectID)
}
}

/*
else {
// todo: fix this later, obv we don't just want the obj id
return errors.Errorf("host %s does not have a zone tag attachment", ta.ObjectID)
}

*/
}

return nil
}

func findHostReference(hosts []*object.HostSystem, hostRef mo.Reference) bool {
for _, h := range hosts {
if h.Reference() == hostRef {
return true
}
}
return false
}

func validateTagAttachment(validationCtx *validationContext, reference vim25types.ManagedObjectReference) error {
// todo: jcallen: currently this is unused but will need this to make sure that the esxi hosts have the correct tags attached
// todo: jcallen: while it doesn't really matter for the installer make sure the performance
// todo: jcallen: suggested query of tags is used.

if validationCtx.TagManager == nil {
return nil
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/asset/machines/vsphere/capimachines.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ func GenerateMachines(ctx context.Context, clusterID string, config *types.Insta
"cluster.x-k8s.io/control-plane": "",
},
},

Spec: capv.VSphereMachineSpec{
VirtualMachineCloneSpec: capv.VirtualMachineCloneSpec{
CloneMode: capv.FullClone,
Expand All @@ -148,6 +149,14 @@ func GenerateMachines(ctx context.Context, clusterID string, config *types.Insta
},
},
}

if failureDomainName, ok := data.MachineFailureDomain[machine.Name]; ok {
logrus.Debugf(">>>>>>>>>> set failure domain %s, %s", failureDomainName, machine.Name)
vsphereMachine.Spec.FailureDomain = &failureDomainName
} else {
logrus.Warnf("unable to find failure domain for machine %s", machine.Name)
}

vsphereMachine.SetGroupVersionKind(capv.GroupVersion.WithKind("VSphereMachine"))
capvMachines = append(capvMachines, vsphereMachine)

Expand Down
23 changes: 20 additions & 3 deletions pkg/asset/machines/vsphere/machines.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ type MachineData struct {
ControlPlaneMachineSet *machinev1.ControlPlaneMachineSet
IPClaims []ipamv1.IPAddressClaim
IPAddresses []ipamv1.IPAddress

MachineFailureDomain map[string]string
}

// Machines returns a list of machines for a machinepool.
Expand Down Expand Up @@ -67,12 +69,14 @@ func Machines(clusterID string, config *types.InstallConfig, pool *types.Machine
}
}

failureDomains := []machinev1.VSphereFailureDomain{}
var failureDomains []machinev1.VSphereFailureDomain

vsphereMachineProvider := &machineapi.VSphereMachineProviderSpec{}
data.MachineFailureDomain = make(map[string]string)

for idx := int32(0); idx < replicas; idx++ {
logrus.Debugf("Creating %v machine %v", role, idx)

var host *vsphere.Host
desiredZone := mpool.Zones[int(idx)%numOfZones]
if hosts != nil && int(idx) < len(hosts) {
Expand Down Expand Up @@ -110,6 +114,7 @@ func Machines(clusterID string, config *types.InstallConfig, pool *types.Machine
if err != nil {
return data, errors.Wrap(err, "unable to find vCenter in failure domains")
}

provider, err := provider(clusterID, vcenter, failureDomain, mpool, osImageForZone, userDataSecret)
if err != nil {
return data, errors.Wrap(err, "failed to create provider")
Expand All @@ -133,6 +138,10 @@ func Machines(clusterID string, config *types.InstallConfig, pool *types.Machine
},
}

data.MachineFailureDomain[machine.Name] = failureDomain.Name

logrus.Debugf(">>>>> MachineFailureDomain %s %s", machine.Name, failureDomain.Name)

// Apply static IP if configured
claim, address, err := applyNetworkConfig(host, provider, machine)
if err != nil {
Expand Down Expand Up @@ -312,6 +321,7 @@ func generateCapiNetwork(machineName, ipAddress, gateway string, prefix, deviceI
return ipclaim, ipaddr
}

// todo: jcallen: temporary until we have cpms figured out for master workspace vmgroup
func provider(clusterID string, vcenter *vsphere.VCenter, failureDomain vsphere.FailureDomain, mpool *vsphere.MachinePool, osImage string, userDataSecret string) (*machineapi.VSphereMachineProviderSpec, error) {
networkDeviceSpec := make([]machineapi.NetworkDeviceSpec, len(failureDomain.Topology.Networks))

Expand All @@ -335,7 +345,7 @@ func provider(clusterID string, vcenter *vsphere.VCenter, failureDomain vsphere.
networkDeviceSpec[i] = machineapi.NetworkDeviceSpec{NetworkName: network}
}

return &machineapi.VSphereMachineProviderSpec{
vSphereMachineProviderSpec := &machineapi.VSphereMachineProviderSpec{
TypeMeta: metav1.TypeMeta{
APIVersion: machineapi.SchemeGroupVersion.String(),
Kind: "VSphereMachineProviderSpec",
Expand All @@ -358,7 +368,14 @@ func provider(clusterID string, vcenter *vsphere.VCenter, failureDomain vsphere.
NumCoresPerSocket: mpool.NumCoresPerSocket,
MemoryMiB: mpool.MemoryMiB,
DiskGiB: mpool.OSDisk.DiskSizeGB,
}, nil
}

if failureDomain.ZoneType == vsphere.HostGroupFailureDomain {
vSphereMachineProviderSpec.Workspace.VMGroup = fmt.Sprintf("%s-%s", clusterID, failureDomain.Name)
}

return vSphereMachineProviderSpec, nil

}

// ConfigMasters sets the PublicIP flag and assigns a set of load balancers to the given machines
Expand Down
Loading