This repository has been archived by the owner on Jun 8, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 80
Enhance HealthScope #160
Merged
wonderflow
merged 4 commits into
crossplane:master
from
captainroy-hy:enhance-healthscope
Aug 27, 2020
Merged
Enhance HealthScope #160
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
ef666fe
modify e2e-test for healthscope & fix log odd params panic
captainroy-hy fe48c6d
enhance healthscope & add unit tests
captainroy-hy c9b38c7
remove general health check from HealthScope
captainroy-hy 6068bc2
fix conflicts in e2e-test of healthscope
captainroy-hy File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,133 +19,198 @@ package healthscope | |
import ( | ||
"context" | ||
"fmt" | ||
"sync" | ||
"reflect" | ||
"time" | ||
|
||
apps "k8s.io/api/apps/v1" | ||
core "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||
"k8s.io/apimachinery/pkg/types" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
|
||
"github.com/crossplane/oam-kubernetes-runtime/apis/core/v1alpha2" | ||
corev1alpha2 "github.com/crossplane/oam-kubernetes-runtime/apis/core/v1alpha2" | ||
|
||
runtimev1alpha1 "github.com/crossplane/crossplane-runtime/apis/core/v1alpha1" | ||
"github.com/crossplane/crossplane-runtime/pkg/fieldpath" | ||
"github.com/crossplane/crossplane-runtime/pkg/logging" | ||
"github.com/pkg/errors" | ||
) | ||
|
||
const ( | ||
errNoWorkload = "could not retrieve workload %q" | ||
errNoWorkloadResources = "could not retrieve resources for workload %q" | ||
errResourceNotFound = "could not retrieve resource %q %q %q" | ||
errDeploymentUnavailable = "no ready instance found in %q %q %q" | ||
errFmtUnsupportWorkload = "APIVersion %v Kind %v workload is not supportted by HealthScope" | ||
errHealthCheck = "error occurs in health check" | ||
errUnhealthyChildResource = "unhealthy child resource exists" | ||
errFmtResourceNotReady = "resource not ready, resource status: %+v" | ||
|
||
defaultTimeout = 10 * time.Second | ||
) | ||
|
||
// UpdateHealthStatus updates the status of the healthscope based on workload resources. | ||
func UpdateHealthStatus(ctx context.Context, log logging.Logger, client client.Client, healthScope *v1alpha2.HealthScope) error { | ||
timeout := defaultTimeout | ||
if healthScope.Spec.ProbeTimeout != nil { | ||
timeout = time.Duration(*healthScope.Spec.ProbeTimeout) * time.Second | ||
} | ||
ctxWithTimeout, cancel := context.WithTimeout(ctx, timeout) | ||
defer cancel() | ||
|
||
resourceRefs := []runtimev1alpha1.TypedReference{} | ||
for _, workloadRef := range healthScope.Spec.WorkloadReferences { | ||
// Get workload object. | ||
workloadObject := unstructured.Unstructured{} | ||
workloadObject.SetAPIVersion(workloadRef.APIVersion) | ||
workloadObject.SetKind(workloadRef.Kind) | ||
workloadObjectRef := types.NamespacedName{Namespace: healthScope.GetNamespace(), Name: workloadRef.Name} | ||
if err := client.Get(ctxWithTimeout, workloadObjectRef, &workloadObject); err != nil { | ||
return errors.Wrapf(err, errNoWorkload, workloadRef.Name) | ||
} | ||
var ( | ||
kindContainerizedWorkload = corev1alpha2.ContainerizedWorkloadKind | ||
kindDeployment = reflect.TypeOf(apps.Deployment{}).Name() | ||
kindService = reflect.TypeOf(core.Service{}).Name() | ||
kindStatefulSet = reflect.TypeOf(apps.StatefulSet{}).Name() | ||
kindDaemonSet = reflect.TypeOf(apps.DaemonSet{}).Name() | ||
) | ||
|
||
// TODO(artursouza): not every workload has child resources, need to handle those scenarios too. | ||
// TODO(artursouza): change this to use an utility method instead. | ||
if value, err := fieldpath.Pave(workloadObject.UnstructuredContent()).GetValue("status.resources"); err == nil { | ||
refs := value.([]interface{}) | ||
for _, item := range refs { | ||
ref := item.(map[string]interface{}) | ||
resourceRef := runtimev1alpha1.TypedReference{ | ||
APIVersion: fmt.Sprintf("%v", ref["apiVersion"]), | ||
Kind: fmt.Sprintf("%v", ref["kind"]), | ||
Name: fmt.Sprintf("%v", ref["name"]), | ||
} | ||
|
||
resourceRefs = append(resourceRefs, resourceRef) | ||
} | ||
} else { | ||
return errors.Wrapf(err, errNoWorkloadResources, workloadRef.Name) | ||
} | ||
} | ||
// HealthCondition holds health status of any resource | ||
type HealthCondition struct { | ||
// Target represents resource being diagnosed | ||
Target runtimev1alpha1.TypedReference `json:"target"` | ||
|
||
statusc := resourcesHealthStatus(ctxWithTimeout, log, client, healthScope.Namespace, resourceRefs) | ||
status := true | ||
for r := range statusc { | ||
status = status && r | ||
} | ||
IsHealthy bool `json:"isHealthy"` | ||
|
||
health := "unhealthy" | ||
if status { | ||
health = "healthy" | ||
} | ||
// Diagnosis contains diagnosis info as well as error info | ||
Diagnosis string `json:"diagnosis,omitempty"` | ||
|
||
healthScope.Status.Health = health | ||
return nil | ||
// SubConditions represents health status of its child resources, if exist | ||
SubConditions []*HealthCondition `json:"subConditions,omitempty"` | ||
} | ||
|
||
func resourcesHealthStatus(ctx context.Context, log logging.Logger, client client.Client, namespace string, refs []runtimev1alpha1.TypedReference) <-chan bool { | ||
status := make(chan bool, len(refs)) | ||
var wg sync.WaitGroup | ||
wg.Add(len(refs)) | ||
for _, ref := range refs { | ||
go func(resourceRef runtimev1alpha1.TypedReference) { | ||
defer wg.Done() | ||
err := resourceHealthStatus(ctx, client, namespace, resourceRef) | ||
status <- (err == nil) | ||
if err != nil { | ||
log.Debug("Unhealthy resource", "resource", resourceRef.Name, "error", err) | ||
} | ||
}(ref) | ||
} | ||
go func() { | ||
wg.Wait() | ||
close(status) | ||
}() | ||
// A WorloadHealthChecker checks health status of specified resource | ||
// and saves status into an HealthCondition object. | ||
type WorloadHealthChecker interface { | ||
Check(context.Context, client.Client, runtimev1alpha1.TypedReference, string) *HealthCondition | ||
} | ||
|
||
// WorkloadHealthCheckFn checks health status of specified resource | ||
// and saves status into an HealthCondition object. | ||
type WorkloadHealthCheckFn func(context.Context, client.Client, runtimev1alpha1.TypedReference, string) *HealthCondition | ||
|
||
return status | ||
// Check the health status of specified resource | ||
func (fn WorkloadHealthCheckFn) Check(ctx context.Context, c client.Client, tr runtimev1alpha1.TypedReference, ns string) *HealthCondition { | ||
return fn(ctx, c, tr, ns) | ||
} | ||
|
||
func resourceHealthStatus(ctx context.Context, client client.Client, namespace string, ref runtimev1alpha1.TypedReference) error { | ||
if ref.GroupVersionKind() == apps.SchemeGroupVersion.WithKind("Deployment") { | ||
return deploymentHealthStatus(ctx, client, namespace, ref) | ||
// CheckContainerziedWorkloadHealth check health status of ContainerizedWorkload | ||
func CheckContainerziedWorkloadHealth(ctx context.Context, c client.Client, ref runtimev1alpha1.TypedReference, namespace string) *HealthCondition { | ||
if ref.GroupVersionKind() != corev1alpha2.SchemeGroupVersion.WithKind(kindContainerizedWorkload) { | ||
return nil | ||
} | ||
r := &HealthCondition{ | ||
IsHealthy: false, | ||
Target: ref, | ||
} | ||
cwObj := corev1alpha2.ContainerizedWorkload{} | ||
cwObj.SetGroupVersionKind(corev1alpha2.SchemeGroupVersion.WithKind(kindContainerizedWorkload)) | ||
if err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: ref.Name}, &cwObj); err != nil { | ||
r.Diagnosis = errors.Wrap(err, errHealthCheck).Error() | ||
return r | ||
} | ||
r.Target.UID = cwObj.GetUID() | ||
|
||
r.SubConditions = []*HealthCondition{} | ||
childRefs := cwObj.Status.Resources | ||
|
||
for _, childRef := range childRefs { | ||
switch childRef.Kind { | ||
case kindDeployment: | ||
// reuse Deployment health checker | ||
childCondition := CheckDeploymentHealth(ctx, c, childRef, namespace) | ||
r.SubConditions = append(r.SubConditions, childCondition) | ||
default: | ||
childCondition := &HealthCondition{ | ||
Target: childRef, | ||
IsHealthy: true, | ||
} | ||
o := unstructured.Unstructured{} | ||
o.SetAPIVersion(childRef.APIVersion) | ||
o.SetKind(childRef.Kind) | ||
if err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: childRef.Name}, &o); err != nil { | ||
// for unspecified resource | ||
// if cannot get it, then check fails | ||
childCondition.IsHealthy = false | ||
childCondition.Diagnosis = errors.Wrap(err, errHealthCheck).Error() | ||
} | ||
r.SubConditions = append(r.SubConditions, childCondition) | ||
} | ||
} | ||
|
||
// TODO(artursouza): add other health checks. | ||
// Generic health check by validating if the resource exists. | ||
object := unstructured.Unstructured{} | ||
object.SetAPIVersion(ref.APIVersion) | ||
object.SetKind(ref.Kind) | ||
objectRef := types.NamespacedName{Namespace: namespace, Name: ref.Name} | ||
err := client.Get(ctx, objectRef, &object) | ||
return err | ||
r.IsHealthy = true | ||
for _, sc := range r.SubConditions { | ||
if !sc.IsHealthy { | ||
r.IsHealthy = false | ||
r.Diagnosis = errUnhealthyChildResource | ||
break | ||
} | ||
} | ||
return r | ||
} | ||
|
||
func deploymentHealthStatus(ctx context.Context, client client.Client, namespace string, ref runtimev1alpha1.TypedReference) error { | ||
// CheckDeploymentHealth checks health status of Deployment | ||
func CheckDeploymentHealth(ctx context.Context, client client.Client, ref runtimev1alpha1.TypedReference, namespace string) *HealthCondition { | ||
if ref.GroupVersionKind() != apps.SchemeGroupVersion.WithKind(kindDeployment) { | ||
return nil | ||
} | ||
r := &HealthCondition{ | ||
IsHealthy: false, | ||
Target: ref, | ||
} | ||
deployment := apps.Deployment{} | ||
deployment.APIVersion = ref.APIVersion | ||
deployment.Kind = ref.Kind | ||
deployment.SetGroupVersionKind(apps.SchemeGroupVersion.WithKind(kindDeployment)) | ||
deploymentRef := types.NamespacedName{Namespace: namespace, Name: ref.Name} | ||
if err := client.Get(ctx, deploymentRef, &deployment); err != nil { | ||
return errors.Wrapf(err, errResourceNotFound, ref.APIVersion, ref.Kind, ref.Name) | ||
r.Diagnosis = errors.Wrap(err, errHealthCheck).Error() | ||
return r | ||
} | ||
r.Target.UID = deployment.GetUID() | ||
|
||
if deployment.Status.ReadyReplicas == 0 { | ||
return fmt.Errorf(errDeploymentUnavailable, ref.APIVersion, ref.Kind, ref.Name) | ||
r.Diagnosis = fmt.Sprintf(errFmtResourceNotReady, deployment.Status) | ||
return r | ||
} | ||
r.IsHealthy = true | ||
return r | ||
} | ||
|
||
// CheckStatefulsetHealth checks health status of StatefulSet | ||
func CheckStatefulsetHealth(ctx context.Context, client client.Client, ref runtimev1alpha1.TypedReference, namespace string) *HealthCondition { | ||
if ref.GroupVersionKind() != apps.SchemeGroupVersion.WithKind(kindStatefulSet) { | ||
return nil | ||
} | ||
r := &HealthCondition{ | ||
IsHealthy: false, | ||
Target: ref, | ||
} | ||
statefulset := apps.StatefulSet{} | ||
statefulset.APIVersion = ref.APIVersion | ||
statefulset.Kind = ref.Kind | ||
nk := types.NamespacedName{Namespace: namespace, Name: ref.Name} | ||
if err := client.Get(ctx, nk, &statefulset); err != nil { | ||
r.Diagnosis = errors.Wrap(err, errHealthCheck).Error() | ||
return r | ||
} | ||
r.Target.UID = statefulset.GetUID() | ||
|
||
if statefulset.Status.ReadyReplicas == 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here, why is that "ReadyReplicas != 0" means healthy? |
||
r.Diagnosis = fmt.Sprintf(errFmtResourceNotReady, statefulset.Status) | ||
return r | ||
} | ||
r.IsHealthy = true | ||
return r | ||
} | ||
|
||
// CheckDaemonsetHealth checks health status of DaemonSet | ||
func CheckDaemonsetHealth(ctx context.Context, client client.Client, ref runtimev1alpha1.TypedReference, namespace string) *HealthCondition { | ||
if ref.GroupVersionKind() != apps.SchemeGroupVersion.WithKind(kindDaemonSet) { | ||
return nil | ||
} | ||
r := &HealthCondition{ | ||
IsHealthy: false, | ||
Target: ref, | ||
} | ||
daemonset := apps.DaemonSet{} | ||
daemonset.APIVersion = ref.APIVersion | ||
daemonset.Kind = ref.Kind | ||
nk := types.NamespacedName{Namespace: namespace, Name: ref.Name} | ||
if err := client.Get(ctx, nk, &daemonset); err != nil { | ||
r.Diagnosis = errors.Wrap(err, errHealthCheck).Error() | ||
return r | ||
} | ||
r.Target.UID = daemonset.GetUID() | ||
|
||
if daemonset.Status.NumberUnavailable != 0 { | ||
r.Diagnosis = fmt.Sprintf(errFmtResourceNotReady, daemonset.Status) | ||
return r | ||
} | ||
return nil | ||
r.IsHealthy = true | ||
return r | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may be missing something here, why is that "ReadyReplicas != 0" means healthy?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually I just stay in line with current logic on health checking .And a more rigorous check is
x.Status.ReadyReplicas == x.Spec.Replicas
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we could fix it in the following PRs