From 35e705877be8e18946bb110a6f3e38a58905a793 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 20:37:45 +0200 Subject: [PATCH] Limit retries of failed allocations per index (#18467) Today if a shard fails during initialization phase due to misconfiguration, broken disks, missing analyzers, not installed plugins etc. elasticsaerch keeps on trying to initialize or rather allocate that shard. Yet, in the worst case scenario this ends in an endless allocation loop. To prevent this loop and all it's sideeffects like spamming log files over and over again this commit adds an allocation decider that stops allocating a shard that failed more than N times in a row to allocate. The number or retries can be configured via `index.allocation.max_retry` and it's default is set to `5`. Once the setting is updated shards with less failures than the number set per index will be allowed to allocate again. Internally we maintain a counter on the UnassignedInfo that is reset to `0` once the shards has been started. Relates to #18417 --- ...ansportClusterAllocationExplainAction.java | 2 +- .../reroute/ClusterRerouteRequest.java | 33 ++- .../reroute/ClusterRerouteRequestBuilder.java | 9 + .../TransportClusterRerouteAction.java | 74 +++--- .../elasticsearch/cluster/ClusterModule.java | 2 + .../cluster/routing/UnassignedInfo.java | 32 ++- .../routing/allocation/AllocationService.java | 22 +- .../allocation/FailedRerouteAllocation.java | 2 +- .../routing/allocation/RoutingAllocation.java | 9 +- .../allocation/StartedRerouteAllocation.java | 2 +- ...AllocateEmptyPrimaryAllocationCommand.java | 2 +- .../decider/MaxRetryAllocationDecider.java | 83 +++++++ .../common/settings/IndexScopedSettings.java | 3 +- .../gateway/ReplicaShardAllocator.java | 2 +- .../reroute/RestClusterRerouteAction.java | 1 + .../cluster/reroute/ClusterRerouteTests.java | 181 +++++++++++++++ .../cluster/routing/UnassignedInfoTests.java | 16 +- .../allocation/AllocationCommandsTests.java | 46 ++-- .../allocation/AwarenessAllocationTests.java | 2 +- .../allocation/DeadNodesAllocationTests.java | 8 +- .../ExpectedShardSizeAllocationTests.java | 2 +- .../allocation/FailedShardsRoutingTests.java | 8 +- .../MaxRetryAllocationDeciderTests.java | 210 ++++++++++++++++++ .../NodeVersionAllocationDeciderTests.java | 4 +- .../allocation/ThrottlingAllocationTests.java | 6 +- .../decider/DiskThresholdDeciderTests.java | 14 +- .../DiskThresholdDeciderUnitTests.java | 4 +- .../gateway/PrimaryShardAllocatorTests.java | 18 +- .../gateway/ReplicaShardAllocatorTests.java | 4 +- .../indices/state/RareClusterStateIT.java | 2 +- .../SharedClusterSnapshotRestoreIT.java | 2 +- docs/reference/cluster/reroute.asciidoc | 13 ++ .../rest-api-spec/api/cluster.reroute.json | 4 + 33 files changed, 705 insertions(+), 117 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java create mode 100644 core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java create mode 100644 core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java index 28b62083d42da..f986a5679a943 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java @@ -250,7 +250,7 @@ protected void masterOperation(final ClusterAllocationExplainRequest request, fi final ActionListener listener) { final RoutingNodes routingNodes = state.getRoutingNodes(); final RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, state, - clusterInfoService.getClusterInfo(), System.nanoTime()); + clusterInfoService.getClusterInfo(), System.nanoTime(), false); ShardRouting foundShard = null; if (request.useAnyUnassignedShard()) { diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java index a241f01ea28cc..dcc45ab21b961 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java @@ -38,9 +38,10 @@ * Request to submit cluster reroute allocation commands */ public class ClusterRerouteRequest extends AcknowledgedRequest { - AllocationCommands commands = new AllocationCommands(); - boolean dryRun; - boolean explain; + private AllocationCommands commands = new AllocationCommands(); + private boolean dryRun; + private boolean explain; + private boolean retryFailed; public ClusterRerouteRequest() { } @@ -81,6 +82,15 @@ public ClusterRerouteRequest explain(boolean explain) { return this; } + /** + * Sets the retry failed flag (defaults to false). If true, the + * request will retry allocating shards that can't currently be allocated due to too many allocation failures. + */ + public ClusterRerouteRequest setRetryFailed(boolean retryFailed) { + this.retryFailed = retryFailed; + return this; + } + /** * Returns the current explain flag */ @@ -88,6 +98,14 @@ public boolean explain() { return this.explain; } + /** + * Returns the current retry failed flag + */ + public boolean isRetryFailed() { + return this.retryFailed; + } + + /** * Set the allocation commands to execute. */ @@ -96,6 +114,13 @@ public ClusterRerouteRequest commands(AllocationCommand... commands) { return this; } + /** + * Returns the allocation commands to execute + */ + public AllocationCommands getCommands() { + return commands; + } + /** * Sets the source for the request. */ @@ -136,6 +161,7 @@ public void readFrom(StreamInput in) throws IOException { commands = AllocationCommands.readFrom(in); dryRun = in.readBoolean(); explain = in.readBoolean(); + retryFailed = in.readBoolean(); readTimeout(in); } @@ -145,6 +171,7 @@ public void writeTo(StreamOutput out) throws IOException { AllocationCommands.writeTo(commands, out); out.writeBoolean(dryRun); out.writeBoolean(explain); + out.writeBoolean(retryFailed); writeTimeout(out); } } diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java index 7f0ad81a00dbb..e5d130cf8125c 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java @@ -60,6 +60,15 @@ public ClusterRerouteRequestBuilder setExplain(boolean explain) { return this; } + /** + * Sets the retry failed flag (defaults to false). If true, the + * request will retry allocating shards that can't currently be allocated due to too many allocation failures. + */ + public ClusterRerouteRequestBuilder setRetryFailed(boolean retryFailed) { + request.setRetryFailed(retryFailed); + return this; + } + /** * Sets the commands for the request to execute. */ diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java index e6116dbfbc41e..b0b676f6e2e50 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java @@ -33,6 +33,7 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -68,38 +69,55 @@ protected ClusterRerouteResponse newResponse() { @Override protected void masterOperation(final ClusterRerouteRequest request, final ClusterState state, final ActionListener listener) { - clusterService.submitStateUpdateTask("cluster_reroute (api)", new AckedClusterStateUpdateTask(Priority.IMMEDIATE, request, listener) { + clusterService.submitStateUpdateTask("cluster_reroute (api)", new ClusterRerouteResponseAckedClusterStateUpdateTask(logger, + allocationService, request, listener)); + } - private volatile ClusterState clusterStateToSend; - private volatile RoutingExplanations explanations; + static class ClusterRerouteResponseAckedClusterStateUpdateTask extends AckedClusterStateUpdateTask { - @Override - protected ClusterRerouteResponse newResponse(boolean acknowledged) { - return new ClusterRerouteResponse(acknowledged, clusterStateToSend, explanations); - } + private final ClusterRerouteRequest request; + private final ActionListener listener; + private final ESLogger logger; + private final AllocationService allocationService; + private volatile ClusterState clusterStateToSend; + private volatile RoutingExplanations explanations; - @Override - public void onAckTimeout() { - listener.onResponse(new ClusterRerouteResponse(false, clusterStateToSend, new RoutingExplanations())); - } + ClusterRerouteResponseAckedClusterStateUpdateTask(ESLogger logger, AllocationService allocationService, ClusterRerouteRequest request, + ActionListener listener) { + super(Priority.IMMEDIATE, request, listener); + this.request = request; + this.listener = listener; + this.logger = logger; + this.allocationService = allocationService; + } - @Override - public void onFailure(String source, Throwable t) { - logger.debug("failed to perform [{}]", t, source); - super.onFailure(source, t); - } + @Override + protected ClusterRerouteResponse newResponse(boolean acknowledged) { + return new ClusterRerouteResponse(acknowledged, clusterStateToSend, explanations); + } + + @Override + public void onAckTimeout() { + listener.onResponse(new ClusterRerouteResponse(false, clusterStateToSend, new RoutingExplanations())); + } + + @Override + public void onFailure(String source, Throwable t) { + logger.debug("failed to perform [{}]", t, source); + super.onFailure(source, t); + } - @Override - public ClusterState execute(ClusterState currentState) { - RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.commands, request.explain()); - ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); - clusterStateToSend = newState; - explanations = routingResult.explanations(); - if (request.dryRun) { - return currentState; - } - return newState; + @Override + public ClusterState execute(ClusterState currentState) { + RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.getCommands(), request.explain(), + request.isRetryFailed()); + ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); + clusterStateToSend = newState; + explanations = routingResult.explanations(); + if (request.dryRun()) { + return currentState; } - }); + return newState; + } } -} \ No newline at end of file +} diff --git a/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 47dd2ce9ae6c3..a02e399ac0c4c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -49,6 +49,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SameShardAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; @@ -79,6 +80,7 @@ public class ClusterModule extends AbstractModule { new Setting<>("cluster.routing.allocation.type", BALANCED_ALLOCATOR, Function.identity(), Property.NodeScope); public static final List> DEFAULT_ALLOCATION_DECIDERS = Collections.unmodifiableList(Arrays.asList( + MaxRetryAllocationDecider.class, SameShardAllocationDecider.class, FilterAllocationDecider.class, ReplicaAfterPrimaryActiveAllocationDecider.class, diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java index 2670363364d53..bc44cd1701c93 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -48,7 +48,6 @@ public final class UnassignedInfo implements ToXContent, Writeable { public static final Setting INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING = Setting.timeSetting("index.unassigned.node_left.delayed_timeout", DEFAULT_DELAYED_NODE_LEFT_TIMEOUT, Property.Dynamic, Property.IndexScope); - /** * Reason why the shard is in unassigned state. *

@@ -103,7 +102,11 @@ public enum Reason { /** * A better replica location is identified and causes the existing replica allocation to be cancelled. */ - REALLOCATED_REPLICA; + REALLOCATED_REPLICA, + /** + * Unassigned as a result of a failed primary while the replica was initializing. + */ + PRIMARY_FAILED; } private final Reason reason; @@ -112,6 +115,7 @@ public enum Reason { private final long lastComputedLeftDelayNanos; // how long to delay shard allocation, not serialized (always positive, 0 means no delay) private final String message; private final Throwable failure; + private final int failedAllocations; /** * creates an UnassingedInfo object based **current** time @@ -120,7 +124,7 @@ public enum Reason { * @param message more information about cause. **/ public UnassignedInfo(Reason reason, String message) { - this(reason, message, null, System.nanoTime(), System.currentTimeMillis()); + this(reason, message, null, reason == Reason.ALLOCATION_FAILED ? 1 : 0, System.nanoTime(), System.currentTimeMillis()); } /** @@ -130,13 +134,16 @@ public UnassignedInfo(Reason reason, String message) { * @param unassignedTimeNanos the time to use as the base for any delayed re-assignment calculation * @param unassignedTimeMillis the time of unassignment used to display to in our reporting. */ - public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwable failure, long unassignedTimeNanos, long unassignedTimeMillis) { + public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwable failure, int failedAllocations, long unassignedTimeNanos, long unassignedTimeMillis) { this.reason = reason; this.unassignedTimeMillis = unassignedTimeMillis; this.unassignedTimeNanos = unassignedTimeNanos; this.lastComputedLeftDelayNanos = 0L; this.message = message; this.failure = failure; + this.failedAllocations = failedAllocations; + assert (failedAllocations > 0) == (reason == Reason.ALLOCATION_FAILED): + "failedAllocations: " + failedAllocations + " for reason " + reason; assert !(message == null && failure != null) : "provide a message if a failure exception is provided"; } @@ -147,17 +154,19 @@ public UnassignedInfo(UnassignedInfo unassignedInfo, long newComputedLeftDelayNa this.lastComputedLeftDelayNanos = newComputedLeftDelayNanos; this.message = unassignedInfo.message; this.failure = unassignedInfo.failure; + this.failedAllocations = unassignedInfo.failedAllocations; } public UnassignedInfo(StreamInput in) throws IOException { this.reason = Reason.values()[(int) in.readByte()]; this.unassignedTimeMillis = in.readLong(); // As System.nanoTime() cannot be compared across different JVMs, reset it to now. - // This means that in master failover situations, elapsed delay time is forgotten. + // This means that in master fail-over situations, elapsed delay time is forgotten. this.unassignedTimeNanos = System.nanoTime(); this.lastComputedLeftDelayNanos = 0L; this.message = in.readOptionalString(); this.failure = in.readThrowable(); + this.failedAllocations = in.readVInt(); } public void writeTo(StreamOutput out) throws IOException { @@ -166,12 +175,18 @@ public void writeTo(StreamOutput out) throws IOException { // Do not serialize unassignedTimeNanos as System.nanoTime() cannot be compared across different JVMs out.writeOptionalString(message); out.writeThrowable(failure); + out.writeVInt(failedAllocations); } public UnassignedInfo readFrom(StreamInput in) throws IOException { return new UnassignedInfo(in); } + /** + * Returns the number of previously failed allocations of this shard. + */ + public int getNumFailedAllocations() { return failedAllocations; } + /** * The reason why the shard is unassigned. */ @@ -325,7 +340,11 @@ public String shortSummary() { StringBuilder sb = new StringBuilder(); sb.append("[reason=").append(reason).append("]"); sb.append(", at[").append(DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)).append("]"); + if (failedAllocations > 0) { + sb.append(", failed_attempts[").append(failedAllocations).append("]"); + } String details = getDetails(); + if (details != null) { sb.append(", details[").append(details).append("]"); } @@ -342,6 +361,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject("unassigned_info"); builder.field("reason", reason); builder.field("at", DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)); + if (failedAllocations > 0) { + builder.field("failed_attempts", failedAllocations); + } String details = getDetails(); if (details != null) { builder.field("details", details); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index e1bbbb7f4ab96..d59113675d8ff 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -222,8 +222,10 @@ public RoutingAllocation.Result applyFailedShards(ClusterState clusterState, Lis List orderedFailedShards = new ArrayList<>(failedShards); orderedFailedShards.sort(Comparator.comparing(failedShard -> failedShard.shard.primary())); for (FailedRerouteAllocation.FailedShard failedShard : orderedFailedShards) { + UnassignedInfo unassignedInfo = failedShard.shard.unassignedInfo(); + final int failedAllocations = unassignedInfo != null ? unassignedInfo.getNumFailedAllocations() : 0; changed |= applyFailedShard(allocation, failedShard.shard, true, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShard.message, failedShard.failure, - System.nanoTime(), System.currentTimeMillis())); + failedAllocations + 1, System.nanoTime(), System.currentTimeMillis())); } if (!changed) { return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); @@ -257,16 +259,13 @@ private String firstListElementsToCommaDelimitedString(List elements, Fun .collect(Collectors.joining(", ")); } - public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands) { - return reroute(clusterState, commands, false); - } - - public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain) { + public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain, boolean retryFailed) { RoutingNodes routingNodes = getMutableRoutingNodes(clusterState); // we don't shuffle the unassigned shards here, to try and get as close as possible to // a consistent result of the effect the commands have on the routing // this allows systems to dry run the commands, see the resulting cluster state, and act on it - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, + clusterInfoService.getClusterInfo(), currentNanoTime(), retryFailed); // don't short circuit deciders, we want a full explanation allocation.debugDecision(true); // we ignore disable allocation, because commands are explicit @@ -305,7 +304,8 @@ protected RoutingAllocation.Result reroute(ClusterState clusterState, String rea RoutingNodes routingNodes = getMutableRoutingNodes(clusterState); // shuffle the unassigned nodes, just so we won't have things like poison failed shards routingNodes.unassigned().shuffle(); - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, + clusterInfoService.getClusterInfo(), currentNanoTime(), false); allocation.debugDecision(debug); if (!reroute(allocation)) { return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); @@ -437,7 +437,7 @@ private boolean deassociateDeadNodes(RoutingAllocation allocation) { // now, go over all the shards routing on the node, and fail them for (ShardRouting shardRouting : node.copyShards()) { UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left[" + node.nodeId() + "]", null, - allocation.getCurrentNanoTime(), System.currentTimeMillis()); + 0, allocation.getCurrentNanoTime(), System.currentTimeMillis()); applyFailedShard(allocation, shardRouting, false, unassignedInfo); } // its a dead node, remove it, note, its important to remove it *after* we apply failed shard @@ -457,8 +457,8 @@ private boolean failReplicasForUnassignedPrimary(RoutingAllocation allocation, S boolean changed = false; for (ShardRouting routing : replicas) { changed |= applyFailedShard(allocation, routing, false, - new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, "primary failed while replica initializing", - null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); + new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED, "primary failed while replica initializing", + null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis())); } return changed; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java index a13862fed260a..ef2e42eed764c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java @@ -58,7 +58,7 @@ public String toString() { private final List failedShards; public FailedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, List failedShards, ClusterInfo clusterInfo) { - super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime()); + super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime(), false); this.failedShards = failedShards; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index 60ca3a8d5fd45..0df8074e14c3a 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -134,6 +134,8 @@ public RoutingExplanations explanations() { private boolean ignoreDisable = false; + private final boolean retryFailed; + private boolean debugDecision = false; private boolean hasPendingAsyncFetch = false; @@ -148,7 +150,7 @@ public RoutingExplanations explanations() { * @param clusterState cluster state before rerouting * @param currentNanoTime the nano time to use for all delay allocation calculation (typically {@link System#nanoTime()}) */ - public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, ClusterInfo clusterInfo, long currentNanoTime) { + public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, ClusterInfo clusterInfo, long currentNanoTime, boolean retryFailed) { this.deciders = deciders; this.routingNodes = routingNodes; this.metaData = clusterState.metaData(); @@ -156,6 +158,7 @@ public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, this.customs = clusterState.customs(); this.clusterInfo = clusterInfo; this.currentNanoTime = currentNanoTime; + this.retryFailed = retryFailed; } /** returns the nano time captured at the beginning of the allocation. used to make sure all time based decisions are aligned */ @@ -297,4 +300,8 @@ public boolean hasPendingAsyncFetch() { public void setHasPendingAsyncFetch() { this.hasPendingAsyncFetch = true; } + + public boolean isRetryFailed() { + return retryFailed; + } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java index e9570edd9c3bd..0f55ab4fda1ae 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java @@ -36,7 +36,7 @@ public class StartedRerouteAllocation extends RoutingAllocation { private final List startedShards; public StartedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, List startedShards, ClusterInfo clusterInfo) { - super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime()); + super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime(), false); this.startedShards = startedShards; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java index d4191292cfcbe..c80afde308605 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java @@ -125,7 +125,7 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) // we need to move the unassigned info back to treat it as if it was index creation unassignedInfoToUpdate = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "force empty allocation from previous reason " + shardRouting.unassignedInfo().getReason() + ", " + shardRouting.unassignedInfo().getMessage(), - shardRouting.unassignedInfo().getFailure(), System.nanoTime(), System.currentTimeMillis()); + shardRouting.unassignedInfo().getFailure(), 0, System.nanoTime(), System.currentTimeMillis()); } initializeUnassignedShard(allocation, routingNodes, routingNode, shardRouting, unassignedInfoToUpdate); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java new file mode 100644 index 0000000000000..6a8a0ccc5fab3 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -0,0 +1,83 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; + +/** + * An allocation decider that prevents shards from being allocated on any node if the shards allocation has been retried N times without + * success. This means if a shard has been INITIALIZING N times in a row without being moved to STARTED the shard will be ignored until + * the setting for index.allocation.max_retry is raised. The default value is 5. + * Note: This allocation decider also allows allocation of repeatedly failing shards when the /_cluster/reroute?retry_failed=true + * API is manually invoked. This allows single retries without raising the limits. + * + * @see RoutingAllocation#isRetryFailed() + */ +public class MaxRetryAllocationDecider extends AllocationDecider { + + public static final Setting SETTING_ALLOCATION_MAX_RETRY = Setting.intSetting("index.allocation.max_retries", 5, 0, + Setting.Property.Dynamic, Setting.Property.IndexScope); + + public static final String NAME = "max_retry"; + + /** + * Initializes a new {@link MaxRetryAllocationDecider} + * + * @param settings {@link Settings} used by this {@link AllocationDecider} + */ + @Inject + public MaxRetryAllocationDecider(Settings settings) { + super(settings); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { + UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); + if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0) { + final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); + final int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexMetaData.getSettings()); + if (allocation.isRetryFailed()) { // manual allocation - retry + // if we are called via the _reroute API we ignore the failure counter and try to allocate + // this improves the usability since people don't need to raise the limits to issue retries since a simple _reroute call is + // enough to manually retry. + return allocation.decision(Decision.YES, NAME, "shard has already failed allocating [" + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + + unassignedInfo.toString() + " - retrying once on manual allocation"); + } else if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { + return allocation.decision(Decision.NO, NAME, "shard has already failed allocating [" + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + + unassignedInfo.toString() + " - manually call [/_cluster/reroute?retry_failed=true] to retry"); + } + } + return allocation.decision(Decision.YES, NAME, "shard has no previous failures"); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, allocation); + } +} diff --git a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 9ebf68531667b..203d1db76b3fd 100644 --- a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -21,6 +21,7 @@ import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.gateway.PrimaryShardAllocator; @@ -40,7 +41,6 @@ import org.elasticsearch.index.store.FsDirectoryService; import org.elasticsearch.index.store.IndexStore; import org.elasticsearch.index.store.Store; -import org.elasticsearch.index.IndexWarmer; import org.elasticsearch.indices.IndicesRequestCache; import java.util.Arrays; @@ -59,6 +59,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { public static final Predicate INDEX_SETTINGS_KEY_PREDICATE = (s) -> s.startsWith(IndexMetaData.INDEX_SETTING_PREFIX); public static final Set> BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY, IndexSettings.INDEX_TTL_DISABLE_PURGE_SETTING, IndexStore.INDEX_STORE_THROTTLE_TYPE_SETTING, IndexStore.INDEX_STORE_THROTTLE_MAX_BYTES_PER_SEC_SETTING, diff --git a/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java b/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java index d2e3d7f42cff7..8b6e425c26a31 100644 --- a/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java +++ b/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java @@ -108,7 +108,7 @@ public boolean processExistingRecoveries(RoutingAllocation allocation) { currentNode, nodeWithHighestMatch); it.moveToUnassigned(new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node [" + nodeWithHighestMatch + "]", - null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); + null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis())); changed = true; } } diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java index 8a4afd89ac485..b34c6726c0986 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java @@ -64,6 +64,7 @@ public RestClusterRerouteAction(Settings settings, RestController controller, Cl public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) throws Exception { final ClusterRerouteRequest clusterRerouteRequest = Requests.clusterRerouteRequest(); clusterRerouteRequest.dryRun(request.paramAsBoolean("dry_run", clusterRerouteRequest.dryRun())); + clusterRerouteRequest.setRetryFailed(request.paramAsBoolean("retry_failed", clusterRerouteRequest.isRetryFailed())); clusterRerouteRequest.explain(request.paramAsBoolean("explain", clusterRerouteRequest.explain())); clusterRerouteRequest.timeout(request.paramAsTime("timeout", clusterRerouteRequest.timeout())); clusterRerouteRequest.masterNodeTimeout(request.paramAsTime("master_timeout", clusterRerouteRequest.masterNodeTimeout())); diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java new file mode 100644 index 0000000000000..927f572487ce5 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java @@ -0,0 +1,181 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.action.admin.cluster.reroute; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.network.NetworkModule; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.ESAllocationTestCase; +import org.elasticsearch.test.gateway.NoopGatewayAllocator; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; + +public class ClusterRerouteTests extends ESAllocationTestCase { + + public void testSerializeRequest() throws IOException { + ClusterRerouteRequest req = new ClusterRerouteRequest(); + req.setRetryFailed(randomBoolean()); + req.dryRun(randomBoolean()); + req.explain(randomBoolean()); + req.commands(new AllocateEmptyPrimaryAllocationCommand("foo", 1, "bar", randomBoolean())); + req.timeout(TimeValue.timeValueMillis(randomIntBetween(0, 100))); + BytesStreamOutput out = new BytesStreamOutput(); + req.writeTo(out); + BytesReference bytes = out.bytes(); + NamedWriteableRegistry namedWriteableRegistry = new NamedWriteableRegistry(); + new NetworkModule(null, Settings.EMPTY, true, namedWriteableRegistry); + StreamInput wrap = new NamedWriteableAwareStreamInput(StreamInput.wrap(bytes.toBytes()), + namedWriteableRegistry); + ClusterRerouteRequest deserializedReq = new ClusterRerouteRequest(); + deserializedReq.readFrom(wrap); + + assertEquals(req.isRetryFailed(), deserializedReq.isRetryFailed()); + assertEquals(req.dryRun(), deserializedReq.dryRun()); + assertEquals(req.explain(), deserializedReq.explain()); + assertEquals(req.timeout(), deserializedReq.timeout()); + assertEquals(1, deserializedReq.getCommands().commands().size()); // allocation commands have their own tests + assertEquals(req.getCommands().commands().size(), deserializedReq.getCommands().commands().size()); + } + + public void testClusterStateUpdateTask() { + AllocationService allocationService = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + ClusterState clusterState = createInitialClusterState(allocationService); + ClusterRerouteRequest req = new ClusterRerouteRequest(); + req.dryRun(true); + AtomicReference responseRef = new AtomicReference<>(); + ActionListener responseActionListener = new ActionListener() { + @Override + public void onResponse(ClusterRerouteResponse clusterRerouteResponse) { + responseRef.set(clusterRerouteResponse); + } + + @Override + public void onFailure(Throwable e) { + + } + }; + TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task = + new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask(logger, allocationService, req, + responseActionListener ); + ClusterState execute = task.execute(clusterState); + assertSame(execute, clusterState); // dry-run + task.onAllNodesAcked(null); + assertNotSame(responseRef.get().getState(), execute); + + req.dryRun(false);// now we allocate + + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries; i++) { + ClusterState newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + clusterState = newState; + RoutingTable routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i); + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = allocationService.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + clusterState = ClusterState.builder(clusterState).routingTable(result.routingTable()).build(); + routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + if (i == retries-1) { + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + } else { + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + } + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + } + + + // without retry_failed we won't allocate that shard + ClusterState newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + task.onAllNodesAcked(null); + assertSame(responseRef.get().getState(), newState); + RoutingTable routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + + req.setRetryFailed(true); // now we manually retry and get the shard back into initializing + newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + clusterState = newState; + routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + } + + private ClusterState createInitialClusterState(AllocationService service) { + MetaData.Builder metaBuilder = MetaData.builder(); + metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); + MetaData metaData = metaBuilder.build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + routingTableBuilder.addAsNew(metaData.index("idx")); + + RoutingTable routingTable = routingTableBuilder.build(); + ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) + .metaData(metaData).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = service.reroute(clusterState, "reroute").routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + assertEquals(prevRoutingTable.index("idx").shards().size(), 1); + assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + return clusterState; + } +} diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java index 708f8ca50790a..f46224570b0c9 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java @@ -64,7 +64,8 @@ public void testReasonOrdinalOrder() { UnassignedInfo.Reason.NODE_LEFT, UnassignedInfo.Reason.REROUTE_CANCELLED, UnassignedInfo.Reason.REINITIALIZED, - UnassignedInfo.Reason.REALLOCATED_REPLICA}; + UnassignedInfo.Reason.REALLOCATED_REPLICA, + UnassignedInfo.Reason.PRIMARY_FAILED}; for (int i = 0; i < order.length; i++) { assertThat(order[i].ordinal(), equalTo(i)); } @@ -72,7 +73,10 @@ public void testReasonOrdinalOrder() { } public void testSerialization() throws Exception { - UnassignedInfo meta = new UnassignedInfo(RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values()), randomBoolean() ? randomAsciiOfLength(4) : null); + UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values()); + UnassignedInfo meta = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? + new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null, null, randomIntBetween(1, 100), System.nanoTime(), System.currentTimeMillis()): + new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null); BytesStreamOutput out = new BytesStreamOutput(); meta.writeTo(out); out.close(); @@ -82,6 +86,7 @@ public void testSerialization() throws Exception { assertThat(read.getUnassignedTimeInMillis(), equalTo(meta.getUnassignedTimeInMillis())); assertThat(read.getMessage(), equalTo(meta.getMessage())); assertThat(read.getDetails(), equalTo(meta.getDetails())); + assertThat(read.getNumFailedAllocations(), equalTo(meta.getNumFailedAllocations())); } public void testIndexCreated() { @@ -273,7 +278,10 @@ public void testUnassignedDelayedOnlyOnNodeLeft() throws Exception { public void testUnassignedDelayOnlyNodeLeftNonNodeLeftReason() throws Exception { EnumSet reasons = EnumSet.allOf(UnassignedInfo.Reason.class); reasons.remove(UnassignedInfo.Reason.NODE_LEFT); - UnassignedInfo unassignedInfo = new UnassignedInfo(RandomPicks.randomFrom(random(), reasons), null); + UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), reasons); + UnassignedInfo unassignedInfo = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? + new UnassignedInfo(reason, null, null, 1, System.nanoTime(), System.currentTimeMillis()): + new UnassignedInfo(reason, null); unassignedInfo = unassignedInfo.updateDelay(unassignedInfo.getUnassignedTimeInNanos() + 1, // add 1 tick delay Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "10h").build(), Settings.EMPTY); long delay = unassignedInfo.getLastComputedLeftDelayNanos(); @@ -287,7 +295,7 @@ public void testUnassignedDelayOnlyNodeLeftNonNodeLeftReason() throws Exception */ public void testLeftDelayCalculation() throws Exception { final long baseTime = System.nanoTime(); - UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, baseTime, System.currentTimeMillis()); + UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, 0, baseTime, System.currentTimeMillis()); final long totalDelayNanos = TimeValue.timeValueMillis(10).nanos(); final Settings settings = Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), TimeValue.timeValueNanos(totalDelayNanos)).build(); unassignedInfo = unassignedInfo.updateDelay(baseTime, settings, Settings.EMPTY); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java index 7aa8576ece340..b63692e0d2a51 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java @@ -94,7 +94,7 @@ public void testMoveShardCommand() { } else { toNodeId = "node1"; } - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), equalTo(ShardRoutingState.RELOCATING)); @@ -148,7 +148,7 @@ public void testAllocateCommand() { logger.info("--> allocating to non-existent node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node42"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node42")), false, false); fail("expected IllegalArgumentException when allocating to non-existing node"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("failed to resolve [node42], no matching nodes")); @@ -156,7 +156,7 @@ public void testAllocateCommand() { logger.info("--> allocating to non-data node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node4"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node4")), false, false); fail("expected IllegalArgumentException when allocating to non-data node"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocation can only be done on data nodes")); @@ -164,7 +164,7 @@ public void testAllocateCommand() { logger.info("--> allocating non-existing shard, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 1, "node2"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 1, "node2")), false, false); fail("expected ShardNotFoundException when allocating non-existing shard"); } catch (ShardNotFoundException e) { assertThat(e.getMessage(), containsString("no such shard")); @@ -172,7 +172,7 @@ public void testAllocateCommand() { logger.info("--> allocating non-existing index, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test2", 0, "node2"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test2", 0, "node2")), false, false); fail("expected ShardNotFoundException when allocating non-existing index"); } catch (IndexNotFoundException e) { assertThat(e.getMessage(), containsString("no such index")); @@ -180,7 +180,7 @@ public void testAllocateCommand() { logger.info("--> allocating empty primary with acceptDataLoss flag set to false"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", false)), false, false); fail("expected IllegalArgumentException when allocating empty primary with acceptDataLoss flag set to false"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocating an empty primary for " + shardId + " can result in data loss. Please confirm by setting the accept_data_loss parameter to true")); @@ -188,14 +188,14 @@ public void testAllocateCommand() { logger.info("--> allocating stale primary with acceptDataLoss flag set to false"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, shardId.id(), "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, shardId.id(), "node1", false)), false, false); fail("expected IllegalArgumentException when allocating stale primary with acceptDataLoss flag set to false"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocating an empty primary for " + shardId + " can result in data loss. Please confirm by setting the accept_data_loss parameter to true")); } logger.info("--> allocating empty primary with acceptDataLoss flag set to true"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -211,13 +211,13 @@ public void testAllocateCommand() { logger.info("--> allocate the replica shard on the primary shard node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node1"))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node1")), false, false); fail("expected IllegalArgumentException when allocating replica shard on the primary shard node"); } catch (IllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -236,7 +236,7 @@ public void testAllocateCommand() { logger.info("--> verify that we fail when there are no unassigned shards"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 0, "node3"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 0, "node3")), false, false); fail("expected IllegalArgumentException when allocating shard while no unassigned shard available"); } catch (IllegalArgumentException e) { } @@ -268,7 +268,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); logger.info("--> allocating empty primary shard with accept_data_loss flag set to true"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -277,7 +277,7 @@ public void testCancelCommand() { logger.info("--> cancel primary allocation, make sure it fails..."); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } @@ -291,13 +291,13 @@ public void testCancelCommand() { logger.info("--> cancel primary allocation, make sure it fails..."); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -306,7 +306,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the relocation allocation"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -315,7 +315,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -325,7 +325,7 @@ public void testCancelCommand() { logger.info("--> cancel the primary being replicated, make sure it fails"); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } @@ -339,7 +339,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> cancel allocation of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -348,7 +348,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(rerouteResult.changed(), equalTo(true)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -364,7 +364,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> move the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3")), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); @@ -374,7 +374,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the move of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false)), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); @@ -383,7 +383,7 @@ public void testCancelCommand() { logger.info("--> cancel the primary allocation (with allow_primary set to true)"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(rerouteResult.changed(), equalTo(true)); logger.error(clusterState.prettyPrint()); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java index ee993bf3ebd3a..805ab0321ba92 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java @@ -868,7 +868,7 @@ public void testUnassignedShardsWithUnbalancedZones() { } commands.add(new MoveAllocationCommand("test", 0, primaryNode, "A-4")); - routingTable = strategy.reroute(clusterState, commands).routingTable(); + routingTable = strategy.reroute(clusterState, commands, false, false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(0)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java index 11b78d2ae6aa5..b14aeca890e9d 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java @@ -149,8 +149,8 @@ public void testDeadNodeWhileRelocatingOnToNode() { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); @@ -223,8 +223,8 @@ public void testDeadNodeWhileRelocatingOnFromNode() { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test",0 , clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test",0 , clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java index bfa27a36d8b1c..29644f079446e 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java @@ -149,7 +149,7 @@ public void addListener(Listener listener) { } else { toNodeId = "node1"; } - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertEquals(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), ShardRoutingState.RELOCATING); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 58e2397b04333..e859c5811c39f 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -109,8 +109,8 @@ public void testFailedShardPrimaryRelocatingToAndFrom() { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); @@ -125,8 +125,8 @@ public void testFailedShardPrimaryRelocatingToAndFrom() { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java new file mode 100644 index 0000000000000..f76851cfef93c --- /dev/null +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java @@ -0,0 +1,210 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.command.AllocateReplicaAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.ESAllocationTestCase; +import org.elasticsearch.test.gateway.NoopGatewayAllocator; + +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; + +public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase { + + private AllocationService strategy; + + @Override + public void setUp() throws Exception { + super.setUp(); + strategy = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + } + + private ClusterState createInitialClusterState() { + MetaData.Builder metaBuilder = MetaData.builder(); + metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); + MetaData metaData = metaBuilder.build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + routingTableBuilder.addAsNew(metaData.index("idx")); + + RoutingTable routingTable = routingTableBuilder.build(); + ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) + .metaData(metaData).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = strategy.reroute(clusterState, "reroute", false).routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + assertEquals(prevRoutingTable.index("idx").shards().size(), 1); + assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + return clusterState; + } + + public void testSingleRetryOnIgnore() { + ClusterState clusterState = createInitialClusterState(); + RoutingTable routingTable = clusterState.routingTable(); + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries-1; i++) { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); + } + // now we go and check that we are actually stick to unassigned on the next failure + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + result = strategy.reroute(clusterState, new AllocationCommands(), false, true); // manual reroute should retry once + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + // now we go and check that we are actually stick to unassigned on the next failure ie. no retry + failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + } + + public void testFailedAllocation() { + ClusterState clusterState = createInitialClusterState(); + RoutingTable routingTable = clusterState.routingTable(); + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries-1; i++) { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); + } + // now we go and check that we are actually stick to unassigned on the next failure + { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + } + + // change the settings and ensure we can do another round of allocation for that index. + clusterState = ClusterState.builder(clusterState).routingTable(routingTable) + .metaData(MetaData.builder(clusterState.metaData()) + .put(IndexMetaData.builder(clusterState.metaData().index("idx")).settings( + Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", + retries+1).build() + ).build(), true).build()).build(); + RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed", false); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + // good we are initializing and we are maintaining failure information + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + // now we start the shard + routingTable = strategy.applyStartedShards(clusterState, Collections.singletonList(routingTable.index("idx") + .shard(0).shards().get(0))).routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + // all counters have been reset to 0 ie. no unassigned info + assertEquals(routingTable.index("idx").shards().size(), 1); + assertNull(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo()); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), STARTED); + + // now fail again and see if it has a new counter + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "ZOOOMG", + new UnsupportedOperationException())); + result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "ZOOOMG"); + } +} diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java index 97a3003ab2fd6..d0fc64b4b6b42 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java @@ -337,7 +337,7 @@ public void testRebalanceDoesNotAllocatePrimaryAndReplicasOnDifferentVersionNode AllocationService strategy = new MockAllocationService(Settings.EMPTY, allocationDeciders, NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); - RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true); + RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true, false); // the two indices must stay as is, the replicas cannot move to oldNode2 because versions don't match state = ClusterState.builder(state).routingResult(result).build(); assertThat(result.routingTable().index(shard2.getIndex()).shardsWithState(ShardRoutingState.RELOCATING).size(), equalTo(0)); @@ -369,7 +369,7 @@ public void testRestoreDoesNotAllocateSnapshotOnOlderNodes() { AllocationService strategy = new MockAllocationService(Settings.EMPTY, allocationDeciders, NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); - RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true); + RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true, false); // Make sure that primary shards are only allocated on the new node for (int i = 0; i < numberOfShards; i++) { diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java index 28d916e20c15f..61a72bc352a87 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java @@ -283,7 +283,7 @@ public void testOutgoingThrottlesAllocaiton() { assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node2"), 0); assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), 0); - RoutingAllocation.Result reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node1").iterator().next().shardId().id(), "node1", "node2"))); + RoutingAllocation.Result reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node1").iterator().next().shardId().id(), "node1", "node2")), false, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.YES); routingTable = reroute.routingTable(); @@ -296,7 +296,7 @@ public void testOutgoingThrottlesAllocaiton() { assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), 0); // outgoing throttles - reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node1")), true); + reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node1")), true, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE); assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), 0); @@ -311,7 +311,7 @@ public void testOutgoingThrottlesAllocaiton() { assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(0)); // incoming throttles - reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node2")), true); + reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node2")), true, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java index 2c4e86ad4b1d5..579e87150a7de 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java @@ -796,7 +796,7 @@ public void addListener(Listener listener) { AllocationCommand relocate1 = new MoveAllocationCommand("test", 0, "node2", "node3"); AllocationCommands cmds = new AllocationCommands(relocate1); - routingTable = strategy.reroute(clusterState, cmds).routingTable(); + routingTable = strategy.reroute(clusterState, cmds, false, false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); @@ -808,7 +808,7 @@ public void addListener(Listener listener) { // node3, which will put it over the low watermark when it // completes, with shard relocations taken into account this should // throw an exception about not being able to complete - strategy.reroute(clusterState, cmds).routingTable(); + strategy.reroute(clusterState, cmds, false, false).routingTable(); fail("should not have been able to reroute the shard"); } catch (IllegalArgumentException e) { assertThat("can't allocated because there isn't enough room: " + e.getMessage(), @@ -876,7 +876,7 @@ public void testCanRemainWithShardRelocatingAway() { ); ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, - System.nanoTime()); + System.nanoTime(), false); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); @@ -896,7 +896,8 @@ public void testCanRemainWithShardRelocatingAway() { ) ); clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); - routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime()); + routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), + false); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); @@ -992,7 +993,7 @@ public void testForSingleDataNode() { ); ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, - System.nanoTime()); + System.nanoTime(), false); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); // Two shards should start happily @@ -1051,7 +1052,8 @@ public void addListener(Listener listener) { ); clusterState = ClusterState.builder(updateClusterState).routingTable(builder.build()).build(); - routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime()); + routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), + false); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index d9e9fb954458b..008884cbb8d69 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -136,7 +136,7 @@ public void testCanAllocateUsesMaxAvailableSpace() { ImmutableOpenMap.Builder shardSizes = ImmutableOpenMap.builder(); shardSizes.put("[test][0][p]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), ImmutableOpenMap.of()); - RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); assertEquals(mostAvailableUsage.toString(), Decision.YES, decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation)); assertEquals(mostAvailableUsage.toString(), Decision.NO, decider.canAllocate(test_0, new RoutingNode("node_1", node_1), allocation)); } @@ -204,7 +204,7 @@ public void testCanRemainUsesLeastAvailableSpace() { shardSizes.put("[test][2][p]", 10L); final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), shardRoutingMap.build()); - RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); assertEquals(Decision.YES, decider.canRemain(test_0, new RoutingNode("node_0", node_0), allocation)); assertEquals(Decision.NO, decider.canRemain(test_1, new RoutingNode("node_1", node_1), allocation)); try { diff --git a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java index 01e717e6fa95e..ee1cf7280e787 100644 --- a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java @@ -346,7 +346,7 @@ private RoutingAllocation getRestoreRoutingAllocation(AllocationDeciders allocat .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } /** @@ -425,7 +425,7 @@ private RoutingAllocation getRecoverOnAnyNodeRoutingAllocation(AllocationDecider .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } /** @@ -444,7 +444,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndex() { .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -452,7 +452,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndex() { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node1, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -460,7 +460,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndex() { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node2, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); @@ -485,7 +485,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndexWithDifferentVersion( .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -493,7 +493,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndexWithDifferentVersion( assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node1, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -501,7 +501,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndexWithDifferentVersion( assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node2, 2, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); @@ -525,7 +525,7 @@ private RoutingAllocation routingAllocationWithOnePrimaryNoReplicas(AllocationDe .metaData(metaData) .routingTable(routingTableBuilder.build()) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } class TestAllocator extends PrimaryShardAllocator { diff --git a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java index 672c9de3d3e71..20eb6286813d9 100644 --- a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java @@ -302,7 +302,7 @@ private RoutingAllocation onePrimaryOnNode1And1Replica(AllocationDeciders decide .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime(), false); } private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDeciders deciders) { @@ -324,7 +324,7 @@ private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDecid .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime(), false); } class TestAllocator extends ReplicaShardAllocator { diff --git a/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java b/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java index dc53373788685..81c50cc4f9cf4 100644 --- a/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java +++ b/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java @@ -103,7 +103,7 @@ public void testUnassignedShardAndEmptyNodesInRoutingTable() throws Exception { .nodes(DiscoveryNodes.EMPTY_NODES) .build(), false ); - RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime()); + RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime(), false); allocator.allocateUnassigned(routingAllocation); } diff --git a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 5ed63b519a43c..7ca30132a4a6b 100644 --- a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -640,7 +640,7 @@ public void testDataFileFailureDuringRestore() throws Exception { assertAcked(client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", repositoryLocation))); - createIndex("test-idx"); + prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retries", Integer.MAX_VALUE)).get(); ensureGreen(); logger.info("--> indexing some data"); diff --git a/docs/reference/cluster/reroute.asciidoc b/docs/reference/cluster/reroute.asciidoc index 99e754df52931..bb48a00fbe53d 100644 --- a/docs/reference/cluster/reroute.asciidoc +++ b/docs/reference/cluster/reroute.asciidoc @@ -103,3 +103,16 @@ are available: To ensure that these implications are well-understood, this command requires the special field `accept_data_loss` to be explicitly set to `true` for it to work. + +[float] +=== Retry failed shards + +The cluster will attempt to allocate a shard a maximum of +`index.allocation.max_retries` times in a row (defaults to `5`), before giving +up and leaving the shard unallocated. This scenario can be caused by +structural problems such as having an analyzer which refers to a stopwords +file which doesn't exist on all nodes. + +Once the problem has been corrected, allocation can be manually retried by +calling the <> API with `?retry_failed`, which +will attempt a single retry round for these shards. \ No newline at end of file diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json index 2ae42c089d318..8bb85ca087ae8 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json @@ -16,6 +16,10 @@ "type" : "boolean", "description" : "Return an explanation of why the commands can or cannot be executed" }, + "retry_failed": { + "type" : "boolean", + "description" : "Retries allocation of shards that are blocked due to too many subsequent allocation failures" + }, "metric": { "type": "list", "options": ["_all", "blocks", "metadata", "nodes", "routing_table", "master_node", "version"],