Skip to content

Commit

Permalink
Track the count of failed invocations since last successful policy sn…
Browse files Browse the repository at this point in the history
…apshot (#88398)

Add tracking for the number of invocations that have passed between a successful SLM snapshot 
and the most recent failure. These stats would be helpful for reporting on SLM policy health.
  • Loading branch information
jbaiera authored Jul 12, 2022
1 parent ba46bd4 commit b790256
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 6 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/88398.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 88398
summary: Track the count of failed invocations since last successful policy snapshot
area: ILM+SLM
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

package org.elasticsearch.xpack.core.slm;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.SimpleDiffable;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
Expand Down Expand Up @@ -40,6 +41,7 @@ public class SnapshotLifecyclePolicyMetadata implements SimpleDiffable<SnapshotL
static final ParseField MODIFIED_DATE = new ParseField("modified_date");
static final ParseField LAST_SUCCESS = new ParseField("last_success");
static final ParseField LAST_FAILURE = new ParseField("last_failure");
static final ParseField INVOCATIONS_SINCE_LAST_SUCCESS = new ParseField("invocations_since_last_success");
static final ParseField NEXT_EXECUTION_MILLIS = new ParseField("next_execution_millis");
static final ParseField NEXT_EXECUTION = new ParseField("next_execution");

Expand All @@ -51,6 +53,7 @@ public class SnapshotLifecyclePolicyMetadata implements SimpleDiffable<SnapshotL
private final SnapshotInvocationRecord lastSuccess;
@Nullable
private final SnapshotInvocationRecord lastFailure;
private final long invocationsSinceLastSuccess;

@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<SnapshotLifecyclePolicyMetadata, String> PARSER = new ConstructingObjectParser<>(
Expand All @@ -66,6 +69,7 @@ public class SnapshotLifecyclePolicyMetadata implements SimpleDiffable<SnapshotL
.setModifiedDate((long) a[3])
.setLastSuccess(lastSuccess)
.setLastFailure(lastFailure)
.setInvocationsSinceLastSuccess(a[6] == null ? 0L : ((long) a[6]))
.build();
}
);
Expand All @@ -77,6 +81,7 @@ public class SnapshotLifecyclePolicyMetadata implements SimpleDiffable<SnapshotL
PARSER.declareLong(ConstructingObjectParser.constructorArg(), MODIFIED_DATE_MILLIS);
PARSER.declareObject(ConstructingObjectParser.optionalConstructorArg(), SnapshotInvocationRecord::parse, LAST_SUCCESS);
PARSER.declareObject(ConstructingObjectParser.optionalConstructorArg(), SnapshotInvocationRecord::parse, LAST_FAILURE);
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), INVOCATIONS_SINCE_LAST_SUCCESS);
}

public static SnapshotLifecyclePolicyMetadata parse(XContentParser parser, String name) {
Expand All @@ -89,7 +94,8 @@ public static SnapshotLifecyclePolicyMetadata parse(XContentParser parser, Strin
long version,
long modifiedDate,
SnapshotInvocationRecord lastSuccess,
SnapshotInvocationRecord lastFailure
SnapshotInvocationRecord lastFailure,
long invocationsSinceLastSuccess
) {
this.policy = policy;
this.headers = headers;
Expand All @@ -98,6 +104,7 @@ public static SnapshotLifecyclePolicyMetadata parse(XContentParser parser, Strin
this.modifiedDate = modifiedDate;
this.lastSuccess = lastSuccess;
this.lastFailure = lastFailure;
this.invocationsSinceLastSuccess = invocationsSinceLastSuccess;
}

@SuppressWarnings("unchecked")
Expand All @@ -109,6 +116,7 @@ public static SnapshotLifecyclePolicyMetadata parse(XContentParser parser, Strin
this.modifiedDate = in.readVLong();
this.lastSuccess = in.readOptionalWriteable(SnapshotInvocationRecord::new);
this.lastFailure = in.readOptionalWriteable(SnapshotInvocationRecord::new);
this.invocationsSinceLastSuccess = in.getVersion().onOrAfter(Version.V_8_4_0) ? in.readVLong() : 0L;
}

@Override
Expand All @@ -119,6 +127,9 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(this.modifiedDate);
out.writeOptionalWriteable(this.lastSuccess);
out.writeOptionalWriteable(this.lastFailure);
if (out.getVersion().onOrAfter(Version.V_8_4_0)) {
out.writeVLong(this.invocationsSinceLastSuccess);
}
}

public static Builder builder() {
Expand All @@ -134,7 +145,8 @@ public static Builder builder(SnapshotLifecyclePolicyMetadata metadata) {
.setVersion(metadata.getVersion())
.setModifiedDate(metadata.getModifiedDate())
.setLastSuccess(metadata.getLastSuccess())
.setLastFailure(metadata.getLastFailure());
.setLastFailure(metadata.getLastFailure())
.setInvocationsSinceLastSuccess(metadata.getInvocationsSinceLastSuccess());
}

public Map<String, String> getHeaders() {
Expand Down Expand Up @@ -165,6 +177,10 @@ public SnapshotInvocationRecord getLastFailure() {
return lastFailure;
}

public long getInvocationsSinceLastSuccess() {
return invocationsSinceLastSuccess;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
Expand All @@ -178,13 +194,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (Objects.nonNull(lastFailure)) {
builder.field(LAST_FAILURE.getPreferredName(), lastFailure);
}
builder.field(INVOCATIONS_SINCE_LAST_SUCCESS.getPreferredName(), invocationsSinceLastSuccess);
builder.endObject();
return builder;
}

@Override
public int hashCode() {
return Objects.hash(policy, headers, version, modifiedDate, lastSuccess, lastFailure);
return Objects.hash(policy, headers, version, modifiedDate, lastSuccess, lastFailure, invocationsSinceLastSuccess);
}

@Override
Expand All @@ -201,7 +218,8 @@ public boolean equals(Object obj) {
&& Objects.equals(version, other.version)
&& Objects.equals(modifiedDate, other.modifiedDate)
&& Objects.equals(lastSuccess, other.lastSuccess)
&& Objects.equals(lastFailure, other.lastFailure);
&& Objects.equals(lastFailure, other.lastFailure)
&& Objects.equals(invocationsSinceLastSuccess, other.invocationsSinceLastSuccess);
}

@Override
Expand All @@ -222,6 +240,7 @@ private Builder() {}
private Long modifiedDate;
private SnapshotInvocationRecord lastSuccessDate;
private SnapshotInvocationRecord lastFailureDate;
private long invocationsSinceLastSuccess = 0L;

public Builder setPolicy(SnapshotLifecyclePolicy policy) {
this.policy = policy;
Expand Down Expand Up @@ -253,14 +272,20 @@ public Builder setLastFailure(SnapshotInvocationRecord lastFailure) {
return this;
}

public Builder setInvocationsSinceLastSuccess(long invocationsSinceLastSuccess) {
this.invocationsSinceLastSuccess = invocationsSinceLastSuccess;
return this;
}

public SnapshotLifecyclePolicyMetadata build() {
return new SnapshotLifecyclePolicyMetadata(
Objects.requireNonNull(policy),
Optional.ofNullable(headers).orElse(new HashMap<>()),
version,
Objects.requireNonNull(modifiedDate, "modifiedDate must be set"),
lastSuccessDate,
lastFailureDate
lastFailureDate,
invocationsSinceLastSuccess
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ public static SnapshotLifecyclePolicyMetadata createRandomPolicyMetadata(String
if (randomBoolean()) {
builder.setHeaders(randomHeaders());
}
if (randomBoolean()) {
boolean hasSuccess = randomBoolean();
if (hasSuccess) {
builder.setLastSuccess(randomSnapshotInvocationRecord());
builder.setInvocationsSinceLastSuccess(0L);
}
if (randomBoolean()) {
builder.setLastFailure(randomSnapshotInvocationRecord());
if (hasSuccess) {
builder.setInvocationsSinceLastSuccess(randomLongBetween(1, Long.MAX_VALUE));
}
}
return builder.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,11 @@ public ClusterState execute(ClusterState currentState) throws Exception {
exception.map(SnapshotLifecycleTask::exceptionToString).orElse(null)
)
);
newPolicyMetadata.setInvocationsSinceLastSuccess(policyMetadata.getInvocationsSinceLastSuccess() + 1L);
} else {
stats.snapshotTaken(policyName);
newPolicyMetadata.setLastSuccess(new SnapshotInvocationRecord(snapshotName, snapshotStartTime, snapshotFinishTime, null));
newPolicyMetadata.setInvocationsSinceLastSuccess(0L);
}

snapLifecycles.put(policyName, newPolicyMetadata.build());
Expand Down

0 comments on commit b790256

Please sign in to comment.