Skip to content

Commit

Permalink
Aro 2998 monitoring errors (Azure#3068)
Browse files Browse the repository at this point in the history
Co-authored-by: b-jhoreman <[email protected]>
  • Loading branch information
jhoreman and b-jhoreman authored Aug 4, 2023
1 parent 8191585 commit d61bdd6
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 43 deletions.
53 changes: 28 additions & 25 deletions pkg/operator/controllers/monitoring/monitoring_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/Azure/ARO-RP/pkg/api"
arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
"github.com/Azure/ARO-RP/pkg/operator/controllers/base"
)

const (
Expand Down Expand Up @@ -54,52 +55,54 @@ type Config struct {
} `json:"alertmanagerMain,omitempty"`
}

type Reconciler struct {
log *logrus.Entry

client client.Client
type MonitoringReconciler struct {
base.AROController

jsonHandle *codec.JsonHandle
}

func NewReconciler(log *logrus.Entry, client client.Client) *Reconciler {
return &Reconciler{
log: log,
client: client,
func NewReconciler(log *logrus.Entry, client client.Client) *MonitoringReconciler {
return &MonitoringReconciler{
AROController: base.AROController{
Log: log,
Client: client,
Name: ControllerName,
},
jsonHandle: new(codec.JsonHandle),
}
}

func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
instance := &arov1alpha1.Cluster{}
err := r.client.Get(ctx, types.NamespacedName{Name: arov1alpha1.SingletonClusterName}, instance)
func (r *MonitoringReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
instance, err := r.GetCluster(ctx)
if err != nil {
return reconcile.Result{}, err
}

if !instance.Spec.OperatorFlags.GetSimpleBoolean(controllerEnabled) {
r.log.Debug("controller is disabled")
r.Log.Debug("controller is disabled")
return reconcile.Result{}, nil
}

r.log.Debug("running")
r.Log.Debug("running")
for _, f := range []func(context.Context) (ctrl.Result, error){
r.reconcileConfiguration,
r.reconcilePVC, // TODO(mj): This should be removed once we don't have PVC anymore
} {
result, err := f(ctx)
if err != nil {
r.Log.Error(err)
r.SetDegraded(ctx, err)
return result, err
}
}

return reconcile.Result{}, nil
}

func (r *Reconciler) reconcilePVC(ctx context.Context) (ctrl.Result, error) {
func (r *MonitoringReconciler) reconcilePVC(ctx context.Context) (ctrl.Result, error) {
pvcList := &corev1.PersistentVolumeClaimList{}
selector, _ := labels.Parse(prometheusLabels)
err := r.client.List(ctx, pvcList, &client.ListOptions{
err := r.Client.List(ctx, pvcList, &client.ListOptions{
Namespace: monitoringName.Namespace,
LabelSelector: selector,
})
Expand All @@ -108,15 +111,15 @@ func (r *Reconciler) reconcilePVC(ctx context.Context) (ctrl.Result, error) {
}

for _, pvc := range pvcList.Items {
err = r.client.Delete(ctx, &pvc)
err = r.Client.Delete(ctx, &pvc)
if err != nil {
return reconcile.Result{}, err
}
}
return reconcile.Result{}, nil
}

func (r *Reconciler) reconcileConfiguration(ctx context.Context) (ctrl.Result, error) {
func (r *MonitoringReconciler) reconcileConfiguration(ctx context.Context) (ctrl.Result, error) {
cm, isCreate, err := r.monitoringConfigMap(ctx)
if err != nil {
return reconcile.Result{}, err
Expand Down Expand Up @@ -172,18 +175,18 @@ func (r *Reconciler) reconcileConfiguration(ctx context.Context) (ctrl.Result, e
cm.Data["config.yaml"] = string(cmYaml)

if isCreate {
r.log.Infof("re-creating monitoring configmap. %s", monitoringName.Name)
err = r.client.Create(ctx, cm)
r.Log.Infof("re-creating monitoring configmap. %s", monitoringName.Name)
err = r.Client.Create(ctx, cm)
} else {
r.log.Infof("updating monitoring configmap. %s", monitoringName.Name)
err = r.client.Update(ctx, cm)
r.Log.Infof("updating monitoring configmap. %s", monitoringName.Name)
err = r.Client.Update(ctx, cm)
}
return reconcile.Result{}, err
}

func (r *Reconciler) monitoringConfigMap(ctx context.Context) (*corev1.ConfigMap, bool, error) {
func (r *MonitoringReconciler) monitoringConfigMap(ctx context.Context) (*corev1.ConfigMap, bool, error) {
cm := &corev1.ConfigMap{}
err := r.client.Get(ctx, monitoringName, cm)
err := r.Client.Get(ctx, monitoringName, cm)
if kerrors.IsNotFound(err) {
return &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -200,8 +203,8 @@ func (r *Reconciler) monitoringConfigMap(ctx context.Context) (*corev1.ConfigMap
}

// SetupWithManager setup the manager
func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
r.log.Info("starting cluster monitoring controller")
func (r *MonitoringReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.Log.Info("starting cluster monitoring controller")

aroClusterPredicate := predicate.NewPredicateFuncs(func(o client.Object) bool {
return o.GetName() == arov1alpha1.SingletonClusterName
Expand Down
60 changes: 42 additions & 18 deletions pkg/operator/controllers/monitoring/monitoring_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"strings"
"testing"

operatorv1 "github.com/openshift/api/operator/v1"
"github.com/sirupsen/logrus"
"github.com/ugorji/go/codec"
corev1 "k8s.io/api/core/v1"
Expand All @@ -19,26 +20,34 @@ import (
ctrlfake "sigs.k8s.io/controller-runtime/pkg/client/fake"

arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
"github.com/Azure/ARO-RP/pkg/operator/controllers/base"
"github.com/Azure/ARO-RP/pkg/util/cmp"
_ "github.com/Azure/ARO-RP/pkg/util/scheme"
utilconditions "github.com/Azure/ARO-RP/test/util/conditions"
)

var (
cmMetadata = metav1.ObjectMeta{Name: "cluster-monitoring-config", Namespace: "openshift-monitoring"}
)

func TestReconcileMonitoringConfig(t *testing.T) {
defaultAvailable := utilconditions.ControllerDefaultAvailable(ControllerName)
defaultProgressing := utilconditions.ControllerDefaultProgressing(ControllerName)
defaultDegraded := utilconditions.ControllerDefaultDegraded(ControllerName)
defaultConditions := []operatorv1.OperatorCondition{defaultAvailable, defaultProgressing, defaultDegraded}
log := logrus.NewEntry(logrus.StandardLogger())
type test struct {
name string
configMap *corev1.ConfigMap
wantConfig string
name string
configMap *corev1.ConfigMap
wantConfig string
wantConditions []operatorv1.OperatorCondition
}

for _, tt := range []*test{
{
name: "ConfigMap does not exist - enable",
wantConfig: `{}`,
name: "ConfigMap does not exist - enable",
wantConfig: `{}`,
wantConditions: defaultConditions,
},
{
name: "empty config.yaml",
Expand All @@ -48,7 +57,8 @@ func TestReconcileMonitoringConfig(t *testing.T) {
"config.yaml": ``,
},
},
wantConfig: ``,
wantConfig: ``,
wantConditions: defaultConditions,
},
{
name: "settings restored to default and extra fields are preserved",
Expand Down Expand Up @@ -88,6 +98,7 @@ alertmanagerMain:
prometheusK8s:
extraField: prometheus
`,
wantConditions: defaultConditions,
},
{
name: "empty volumeClaimTemplate struct is cleared out",
Expand All @@ -110,6 +121,7 @@ alertmanagerMain:
prometheusK8s:
bugs: not-here
`,
wantConditions: defaultConditions,
},
{
name: "other monitoring components are configured",
Expand All @@ -132,6 +144,7 @@ alertmanagerMain:
somethingElse:
configured: true
`,
wantConditions: defaultConditions,
},
} {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -153,9 +166,11 @@ somethingElse:
clientBuilder.WithObjects(tt.configMap)
}

r := &Reconciler{
log: log,
client: clientBuilder.Build(),
r := &MonitoringReconciler{
AROController: base.AROController{
Log: log,
Client: clientBuilder.Build(),
},
jsonHandle: new(codec.JsonHandle),
}
request := ctrl.Request{}
Expand All @@ -168,7 +183,7 @@ somethingElse:
}

cm := &corev1.ConfigMap{}
err = r.client.Get(ctx, types.NamespacedName{Namespace: "openshift-monitoring", Name: "cluster-monitoring-config"}, cm)
err = r.Client.Get(ctx, types.NamespacedName{Namespace: "openshift-monitoring", Name: "cluster-monitoring-config"}, cm)
if err != nil {
t.Fatal(err)
}
Expand All @@ -181,11 +196,16 @@ somethingElse:
}

func TestReconcilePVC(t *testing.T) {
defaultAvailable := utilconditions.ControllerDefaultAvailable(ControllerName)
defaultProgressing := utilconditions.ControllerDefaultProgressing(ControllerName)
defaultDegraded := utilconditions.ControllerDefaultDegraded(ControllerName)
defaultConditions := []operatorv1.OperatorCondition{defaultAvailable, defaultProgressing, defaultDegraded}
volumeMode := corev1.PersistentVolumeFilesystem
tests := []struct {
name string
pvcs []client.Object
want []corev1.PersistentVolumeClaim
name string
pvcs []client.Object
want []corev1.PersistentVolumeClaim
wantConditions []operatorv1.OperatorCondition
}{
{
name: "Should delete the prometheus PVCs",
Expand All @@ -211,7 +231,8 @@ func TestReconcilePVC(t *testing.T) {
},
},
},
want: []corev1.PersistentVolumeClaim{},
want: []corev1.PersistentVolumeClaim{},
wantConditions: defaultConditions,
},
{
name: "Should preserve 1 pvc",
Expand Down Expand Up @@ -255,6 +276,7 @@ func TestReconcilePVC(t *testing.T) {
},
},
},
wantConditions: defaultConditions,
},
}

Expand All @@ -275,9 +297,11 @@ func TestReconcilePVC(t *testing.T) {

clientFake := ctrlfake.NewClientBuilder().WithObjects(instance).WithObjects(tt.pvcs...).Build()

r := &Reconciler{
log: logrus.NewEntry(logrus.StandardLogger()),
client: clientFake,
r := &MonitoringReconciler{
AROController: base.AROController{
Log: logrus.NewEntry(logrus.StandardLogger()),
Client: clientFake,
},
jsonHandle: new(codec.JsonHandle),
}
request := ctrl.Request{}
Expand All @@ -290,7 +314,7 @@ func TestReconcilePVC(t *testing.T) {
}

pvcList := &corev1.PersistentVolumeClaimList{}
err = r.client.List(ctx, pvcList, &client.ListOptions{
err = r.Client.List(ctx, pvcList, &client.ListOptions{
Namespace: monitoringName.Namespace,
})
if err != nil {
Expand Down

0 comments on commit d61bdd6

Please sign in to comment.