diff --git a/pkg/monitor/cluster/cluster.go b/pkg/monitor/cluster/cluster.go index fce45a0c041..feadd1e14da 100644 --- a/pkg/monitor/cluster/cluster.go +++ b/pkg/monitor/cluster/cluster.go @@ -12,6 +12,7 @@ import ( "github.com/Azure/go-autorest/autorest/azure" configv1 "github.com/openshift/api/config/v1" configclient "github.com/openshift/client-go/config/clientset/versioned" + maoclient "github.com/openshift/machine-api-operator/pkg/generated/clientset/versioned" mcoclient "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned" "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" @@ -34,6 +35,7 @@ type Monitor struct { restconfig *rest.Config cli kubernetes.Interface configcli configclient.Interface + maocli maoclient.Interface mcocli mcoclient.Interface m metrics.Interface arocli aroclient.Interface @@ -70,6 +72,11 @@ func NewMonitor(ctx context.Context, log *logrus.Entry, restConfig *rest.Config, return nil, err } + maocli, err := maoclient.NewForConfig(restConfig) + if err != nil { + return nil, err + } + mcocli, err := mcoclient.NewForConfig(restConfig) if err != nil { return nil, err @@ -90,6 +97,7 @@ func NewMonitor(ctx context.Context, log *logrus.Entry, restConfig *rest.Config, restconfig: restConfig, cli: cli, configcli: configcli, + maocli: maocli, mcocli: mcocli, arocli: arocli, m: m, diff --git a/pkg/monitor/cluster/nodeconditions.go b/pkg/monitor/cluster/nodeconditions.go index 1a11c7c522c..89a63946046 100644 --- a/pkg/monitor/cluster/nodeconditions.go +++ b/pkg/monitor/cluster/nodeconditions.go @@ -5,9 +5,13 @@ package cluster import ( "context" + "encoding/json" + "strconv" "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + azureproviderv1beta1 "sigs.k8s.io/cluster-api-provider-azure/pkg/apis/azureprovider/v1beta1" ) var nodeConditionsExpected = map[corev1.NodeConditionType]corev1.ConditionStatus{ @@ -23,27 +27,34 @@ func (mon *Monitor) emitNodeConditions(ctx context.Context) error { return err } + spotInstances := mon.getSpotInstances(ctx) + mon.emitGauge("node.count", int64(len(ns.Items)), nil) for _, n := range ns.Items { + for _, c := range n.Status.Conditions { if c.Status == nodeConditionsExpected[c.Type] { continue } + _, isSpotInstance := spotInstances[n.Name] + mon.emitGauge("node.conditions", 1, map[string]string{ - "nodeName": n.Name, - "status": string(c.Status), - "type": string(c.Type), + "nodeName": n.Name, + "status": string(c.Status), + "type": string(c.Type), + "spotInstance": strconv.FormatBool(isSpotInstance), }) if mon.hourlyRun { mon.log.WithFields(logrus.Fields{ - "metric": "node.conditions", - "name": n.Name, - "status": c.Status, - "type": c.Type, - "message": c.Message, + "metric": "node.conditions", + "name": n.Name, + "status": c.Status, + "type": c.Type, + "message": c.Message, + "spotInstance": isSpotInstance, }).Print() } } @@ -57,3 +68,30 @@ func (mon *Monitor) emitNodeConditions(ctx context.Context) error { return nil } + +// getSpotInstances returns a map where the keys are the machine name and only exist if the machine is a spot instance +func (mon *Monitor) getSpotInstances(ctx context.Context) map[string]struct{} { + spotInstances := make(map[string]struct{}) + machines, err := mon.maocli.MachineV1beta1().Machines("openshift-machine-api").List(ctx, metav1.ListOptions{}) + + if err != nil { + // when this call fails we may report spot vms as non spot until the next successful call + mon.log.Error(err) + return spotInstances + } + + for _, machine := range machines.Items { + var spec azureproviderv1beta1.AzureMachineProviderSpec + err = json.Unmarshal(machine.Spec.ProviderSpec.Value.Raw, &spec) + if err != nil { + mon.log.Error(err) + continue + } + + if spec.SpotVMOptions != nil { + spotInstances[machine.Name] = struct{}{} + } + } + + return spotInstances +} diff --git a/pkg/monitor/cluster/nodeconditions_test.go b/pkg/monitor/cluster/nodeconditions_test.go index eb8c6c4d07d..6abb2057def 100644 --- a/pkg/monitor/cluster/nodeconditions_test.go +++ b/pkg/monitor/cluster/nodeconditions_test.go @@ -5,12 +5,19 @@ package cluster import ( "context" + "encoding/json" "testing" "github.com/golang/mock/gomock" + machinev1beta1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" + maoclient "github.com/openshift/machine-api-operator/pkg/generated/clientset/versioned" + maofake "github.com/openshift/machine-api-operator/pkg/generated/clientset/versioned/fake" + "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" + azureproviderv1beta1 "sigs.k8s.io/cluster-api-provider-azure/pkg/apis/azureprovider/v1beta1" mock_metrics "github.com/Azure/ARO-RP/pkg/util/mocks/metrics" ) @@ -18,9 +25,19 @@ import ( func TestEmitNodeConditions(t *testing.T) { ctx := context.Background() + provSpec, err := json.Marshal(azureproviderv1beta1.AzureMachineProviderSpec{}) + if err != nil { + t.Fatal(err) + } + + kubeletVersion := "v1.17.1+9d33dd3" + cli := fake.NewSimpleClientset(&corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "aro-master-0", + Annotations: map[string]string{ + "machine.openshift.io/machine": "openshift-machine-api/master-0", + }, }, Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{ @@ -30,12 +47,15 @@ func TestEmitNodeConditions(t *testing.T) { }, }, NodeInfo: corev1.NodeSystemInfo{ - KubeletVersion: "v1.17.1+9d33dd3", + KubeletVersion: kubeletVersion, }, }, }, &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "aro-master-1", + Annotations: map[string]string{ + "machine.openshift.io/machine": "openshift-machine-api/master-1", + }, }, Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{ @@ -45,10 +65,38 @@ func TestEmitNodeConditions(t *testing.T) { }, }, NodeInfo: corev1.NodeSystemInfo{ - KubeletVersion: "v1.17.1+9d33dd3", + KubeletVersion: kubeletVersion, }, }, }) + maoclient := maofake.NewSimpleClientset( + &machinev1beta1.Machine{ + Spec: machinev1beta1.MachineSpec{ + ProviderSpec: machinev1beta1.ProviderSpec{ + Value: &kruntime.RawExtension{ + Raw: provSpec, + }, + }, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-machine-api/master-0", + Namespace: "openshift-machine-api", + }, + }, + &machinev1beta1.Machine{ + Spec: machinev1beta1.MachineSpec{ + ProviderSpec: machinev1beta1.ProviderSpec{ + Value: &kruntime.RawExtension{ + Raw: provSpec, + }, + }, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-machine-api/master-1", + Namespace: "openshift-machine-api", + }, + }, + ) controller := gomock.NewController(t) defer controller.Finish() @@ -56,33 +104,171 @@ func TestEmitNodeConditions(t *testing.T) { m := mock_metrics.NewMockInterface(controller) mon := &Monitor{ - cli: cli, - m: m, + cli: cli, + maocli: maoclient, + m: m, } m.EXPECT().EmitGauge("node.count", int64(2), map[string]string{}) m.EXPECT().EmitGauge("node.conditions", int64(1), map[string]string{ - "nodeName": "aro-master-0", - "status": "True", - "type": "MemoryPressure", + "nodeName": "aro-master-0", + "status": "True", + "type": "MemoryPressure", + "spotInstance": "false", }) m.EXPECT().EmitGauge("node.conditions", int64(1), map[string]string{ - "nodeName": "aro-master-1", - "status": "False", - "type": "Ready", + "nodeName": "aro-master-1", + "status": "False", + "type": "Ready", + "spotInstance": "false", }) m.EXPECT().EmitGauge("node.kubelet.version", int64(1), map[string]string{ "nodeName": "aro-master-0", - "kubeletVersion": "v1.17.1+9d33dd3", + "kubeletVersion": kubeletVersion, }) m.EXPECT().EmitGauge("node.kubelet.version", int64(1), map[string]string{ "nodeName": "aro-master-1", - "kubeletVersion": "v1.17.1+9d33dd3", + "kubeletVersion": kubeletVersion, }) - err := mon.emitNodeConditions(ctx) + err = mon.emitNodeConditions(ctx) if err != nil { t.Fatal(err) } } + +func TestGetSpotInstances(t *testing.T) { + ctx := context.Background() + + spotProvSpec, err := json.Marshal(azureproviderv1beta1.AzureMachineProviderSpec{ + SpotVMOptions: &azureproviderv1beta1.SpotVMOptions{}, + }) + if err != nil { + t.Fatal(err) + } + + provSpec, err := json.Marshal(azureproviderv1beta1.AzureMachineProviderSpec{}) + if err != nil { + t.Fatal(err) + } + + for _, tt := range []struct { + name string + maocli maoclient.Interface + node corev1.Node + expectedSpotInstance bool + }{ + { + name: "node is a spot instance", + maocli: maofake.NewSimpleClientset(&machinev1beta1.Machine{ + Spec: machinev1beta1.MachineSpec{ + ProviderSpec: machinev1beta1.ProviderSpec{ + Value: &kruntime.RawExtension{ + Raw: spotProvSpec, + }, + }, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "aro-spot-0", + Namespace: "openshift-machine-api", + }, + }), + node: corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "aro-spot-0", + Annotations: map[string]string{ + "machine.openshift.io/machine": "openshift-machine-api/spot-0", + }, + }, + }, + expectedSpotInstance: true, + }, + { + name: "node is not a spot instance", + maocli: maofake.NewSimpleClientset(&machinev1beta1.Machine{ + Spec: machinev1beta1.MachineSpec{ + ProviderSpec: machinev1beta1.ProviderSpec{ + Value: &kruntime.RawExtension{ + Raw: provSpec, + }, + }, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "master-0", + Namespace: "openshift-machine-api", + }, + }), + node: corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "aro-master-0", + Annotations: map[string]string{ + "machine.openshift.io/machine": "openshift-machine-api/master-0", + }, + }, + }, + expectedSpotInstance: false, + }, + { + name: "node is missing annotation", + maocli: maofake.NewSimpleClientset(&machinev1beta1.Machine{ + Spec: machinev1beta1.MachineSpec{ + ProviderSpec: machinev1beta1.ProviderSpec{ + Value: &kruntime.RawExtension{ + Raw: provSpec, + }, + }, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "master-0", + Namespace: "openshift-machine-api", + }, + }), + node: corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "aro-master-0", + Annotations: map[string]string{}, + }, + }, + expectedSpotInstance: false, + }, + { + name: "malformed json in providerSpec", + maocli: maofake.NewSimpleClientset(&machinev1beta1.Machine{ + Spec: machinev1beta1.MachineSpec{ + ProviderSpec: machinev1beta1.ProviderSpec{ + Value: &kruntime.RawExtension{ + Raw: []byte(";df9j"), + }, + }, + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "aro-spot-1", + Namespace: "openshift-machine-api", + }, + }), + node: corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "aro-spot-1", + Annotations: map[string]string{ + "machine.openshift.io/machine": "openshift-machine-api/spot-0", + }, + }, + }, + expectedSpotInstance: false, + }, + } { + controller := gomock.NewController(t) + defer controller.Finish() + + mon := &Monitor{ + maocli: tt.maocli, + log: logrus.NewEntry(logrus.StandardLogger()), + } + + _, isSpotInstance := mon.getSpotInstances(ctx)[tt.node.Name] + if isSpotInstance != tt.expectedSpotInstance { + t.Fatalf("test %s: isSpotInstance should be %t but got %t", tt.name, tt.expectedSpotInstance, isSpotInstance) + } + } +}