Skip to content

Commit

Permalink
feat: remove the machine from the discovery service on reset
Browse files Browse the repository at this point in the history
Fixes #6137

Signed-off-by: Andrey Smirnov <[email protected]>
(cherry picked from commit 8c203ce)
  • Loading branch information
smira committed Aug 29, 2022
1 parent f543f97 commit 911058d
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 22 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,14 @@ require (
github.com/ryanuber/go-glob v1.0.0
github.com/safchain/ethtool v0.2.0
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.9
github.com/siderolabs/discovery-client v0.1.1
github.com/siderolabs/go-pcidb v0.1.0
github.com/siderolabs/go-pointer v1.0.0
github.com/spf13/cobra v1.5.0
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.0
github.com/talos-systems/crypto v0.3.6
github.com/talos-systems/discovery-api v0.1.0
github.com/talos-systems/discovery-client v0.1.0
github.com/talos-systems/go-blockdevice v0.3.4
github.com/talos-systems/go-cmd v0.1.0
github.com/talos-systems/go-debug v0.2.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sethgrid/pester v0.0.0-20190127155807-68a33a018ad0 h1:X9XMOYjxEfAYSy3xK1DzO5dMkkWhs9E9UCcS1IERx2k=
github.com/sethgrid/pester v0.0.0-20190127155807-68a33a018ad0/go.mod h1:Ad7IjTpvzZO8Fl0vh9AzQ+j/jYZfyp2diGwI8m5q+ns=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/siderolabs/discovery-client v0.1.1 h1:1lhXTepW4V1IJPf91ApvF77cBl086d1OXuC4WdVp8hM=
github.com/siderolabs/discovery-client v0.1.1/go.mod h1:gBE5qxdB4BcY2nXOBJ14Qh8u7cBadCofjbxBLohcnbQ=
github.com/siderolabs/go-pcidb v0.1.0 h1:6cJPBBmHlIF4GouYR/1g3JXS/niAON+6lIOfKl/t794=
github.com/siderolabs/go-pcidb v0.1.0/go.mod h1:wT/tUxNZFlKSuGBniVwXL53vlGQq2/CVu16y6sMGIao=
github.com/siderolabs/go-pointer v1.0.0 h1:6TshPKep2doDQJAAtHUuHWXbca8ZfyRySjSBT/4GsMU=
Expand Down Expand Up @@ -1128,8 +1130,6 @@ github.com/talos-systems/crypto v0.3.6 h1:aq1uADaF71P2/K35igWx8u/ivHOYp3vHQ/kAzq
github.com/talos-systems/crypto v0.3.6/go.mod h1:jbt9CspHnhdwyKmXQQEIiFSHz1cfsCJjsd0iNat1wEA=
github.com/talos-systems/discovery-api v0.1.0 h1:aKod6uqakH6VfeQ6HaxPF7obqFAL1QTJe4HHTb2mVKk=
github.com/talos-systems/discovery-api v0.1.0/go.mod h1:ZsbzzOC5bzToaF3+YvUXDf9paeWV5bedpDu5RPXrglM=
github.com/talos-systems/discovery-client v0.1.0 h1:m+f96TKGFckMWrhDI+o9+QhcGn8f1A61Jp6YYVwiulI=
github.com/talos-systems/discovery-client v0.1.0/go.mod h1:LxqCv16VBB68MgaMnV8jXujYd3Q097DAn22U5gaHmkU=
github.com/talos-systems/go-blockdevice v0.3.4 h1:LP2hgIcd5qbympxYFlWbHYTPyD/f/sZjBMma01soniQ=
github.com/talos-systems/go-blockdevice v0.3.4/go.mod h1:qnn/zDc09I1DA2BUDDCOSA2D0P8pIDjN8pGiRoRaQig=
github.com/talos-systems/go-cmd v0.0.0-20210216164758-68eb0067e0f0/go.mod h1:kf+rZzTEmlDiYQ6ulslvRONnKLQH8x83TowltGMhO+k=
Expand Down
2 changes: 2 additions & 0 deletions go.work.sum
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ github.com/sclevine/spec v1.2.0 h1:1Jwdf9jSfDl9NVmt8ndHqbTZ7XCCPbh1jI3hkDBHVYA=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 h1:58EBmR2dMNL2n/FnbQewK3D14nXr0V9CObDSvMJLq+Y=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/siderolabs/discovery-client v0.1.1 h1:1lhXTepW4V1IJPf91ApvF77cBl086d1OXuC4WdVp8hM=
github.com/siderolabs/discovery-client v0.1.1/go.mod h1:gBE5qxdB4BcY2nXOBJ14Qh8u7cBadCofjbxBLohcnbQ=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM=
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
Expand Down
57 changes: 39 additions & 18 deletions internal/app/machined/pkg/controllers/cluster/discovery_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ import (

"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
discoveryclient "github.com/siderolabs/discovery-client/pkg/client"
"github.com/siderolabs/go-pointer"
"github.com/talos-systems/discovery-api/api/v1alpha1/client/pb"
discoveryclient "github.com/talos-systems/discovery-client/pkg/client"
"go.uber.org/zap"
"inet.af/netaddr"

Expand All @@ -27,6 +28,7 @@ import (
"github.com/talos-systems/talos/pkg/machinery/resources/cluster"
"github.com/talos-systems/talos/pkg/machinery/resources/config"
"github.com/talos-systems/talos/pkg/machinery/resources/kubespan"
"github.com/talos-systems/talos/pkg/machinery/resources/runtime"
"github.com/talos-systems/talos/pkg/version"
)

Expand Down Expand Up @@ -62,6 +64,12 @@ func (ctrl *DiscoveryServiceController) Inputs() []controller.Input {
Type: kubespan.EndpointType,
Kind: controller.InputWeak,
},
{
Namespace: runtime.NamespaceName,
Type: runtime.MachineStatusType,
ID: pointer.To(runtime.MachineStatusID),
Kind: controller.InputWeak,
},
}
}

Expand Down Expand Up @@ -206,6 +214,11 @@ func (ctrl *DiscoveryServiceController) Run(ctx context.Context, r controller.Ru
return fmt.Errorf("error listing endpoints: %w", err)
}

machineStatus, err := safe.ReaderGet[*runtime.MachineStatus](ctx, r, resource.NewMetadata(runtime.NamespaceName, runtime.MachineStatusType, runtime.MachineStatusID, resource.VersionUndefined))
if err != nil && !state.IsNotFoundError(err) {
return fmt.Errorf("error getting machine status: %w", err)
}

if client == nil {
var cipher cipher.Block

Expand Down Expand Up @@ -236,23 +249,31 @@ func (ctrl *DiscoveryServiceController) Run(ctx context.Context, r controller.Ru
}()
}

localData := pbAffiliate(affiliateSpec)
localEndpoints := pbEndpoints(affiliateSpec)
otherEndpoints := pbOtherEndpoints(otherEndpointsList)

// don't send updates on localData if it hasn't changed: this introduces positive feedback loop,
// as the watch loop will notify on self update
if !proto.Equal(localData, prevLocalData) || !equalEndpoints(localEndpoints, prevLocalEndpoints) || !equalOtherEndpoints(otherEndpoints, prevOtherEndpoints) {
if err = client.SetLocalData(&discoveryclient.Affiliate{
Affiliate: localData,
Endpoints: localEndpoints,
}, otherEndpoints); err != nil {
return fmt.Errorf("error setting local affiliate data: %w", err) //nolint:govet
// delete/update local affiliate
//
// if the node enters resetting stage, cleanup the local affiliate
// otherwise, update local affiliate data
if machineStatus != nil && machineStatus.TypedSpec().Stage == runtime.MachineStageResetting {
client.DeleteLocalAffiliate()
} else {
localData := pbAffiliate(affiliateSpec)
localEndpoints := pbEndpoints(affiliateSpec)
otherEndpoints := pbOtherEndpoints(otherEndpointsList)

// don't send updates on localData if it hasn't changed: this introduces positive feedback loop,
// as the watch loop will notify on self update
if !proto.Equal(localData, prevLocalData) || !equalEndpoints(localEndpoints, prevLocalEndpoints) || !equalOtherEndpoints(otherEndpoints, prevOtherEndpoints) {
if err = client.SetLocalData(&discoveryclient.Affiliate{
Affiliate: localData,
Endpoints: localEndpoints,
}, otherEndpoints); err != nil {
return fmt.Errorf("error setting local affiliate data: %w", err) //nolint:govet
}

prevLocalData = localData
prevLocalEndpoints = localEndpoints
prevOtherEndpoints = otherEndpoints
}

prevLocalData = localData
prevLocalEndpoints = localEndpoints
prevOtherEndpoints = otherEndpoints
}

touchedIDs := make(map[resource.ID]struct{})
Expand All @@ -267,7 +288,7 @@ func (ctrl *DiscoveryServiceController) Run(ctx context.Context, r controller.Ru

return nil
}); err != nil {
return err
return err //nolint:govet
}

touchedIDs[id] = struct{}{}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ import (
"time"

"github.com/cosi-project/runtime/pkg/resource"
"github.com/siderolabs/discovery-client/pkg/client"
"github.com/stretchr/testify/suite"
"github.com/talos-systems/discovery-api/api/v1alpha1/client/pb"
"github.com/talos-systems/discovery-client/pkg/client"
"github.com/talos-systems/go-retry/retry"
"inet.af/netaddr"

Expand All @@ -31,6 +31,7 @@ import (
"github.com/talos-systems/talos/pkg/machinery/resources/cluster"
"github.com/talos-systems/talos/pkg/machinery/resources/config"
"github.com/talos-systems/talos/pkg/machinery/resources/kubespan"
"github.com/talos-systems/talos/pkg/machinery/resources/runtime"
)

type DiscoveryServiceSuite struct {
Expand Down Expand Up @@ -228,6 +229,25 @@ func (suite *DiscoveryServiceSuite) TestReconcile() {
}),
))

// pretend that machine is being reset
machineStatus := runtime.NewMachineStatus()
machineStatus.TypedSpec().Stage = runtime.MachineStageResetting
suite.Require().NoError(suite.state.Create(suite.ctx, machineStatus))

// client should see the affiliate being deleted
suite.Assert().NoError(retry.Constant(3*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
// controller should delete its local affiliate
affiliates := cli.GetAffiliates()

if len(affiliates) != 0 {
return retry.ExpectedErrorf("affiliates len %d != 0", len(affiliates))
}

return nil
},
))

cliCtxCancel()
suite.Assert().NoError(<-errCh)
}
Expand Down

0 comments on commit 911058d

Please sign in to comment.