From ebadf42ff990146e481bcd7db7070558863140b9 Mon Sep 17 00:00:00 2001 From: Swetha Guptha Date: Mon, 2 Sep 2024 22:57:45 +0530 Subject: [PATCH] Optimise memory for rest cluster health calls with inline shard aggregations for levels cluster and indices. Signed-off-by: Swetha Guptha --- .../opensearch/cluster/ClusterHealthIT.java | 129 ++++++++++++++++++ .../cluster/health/ClusterHealthRequest.java | 25 ++++ .../health/ClusterHealthRequestBuilder.java | 5 + .../cluster/health/ClusterHealthResponse.java | 45 ++++++ .../health/TransportClusterHealthAction.java | 7 +- .../stats/TransportClusterStatsAction.java | 3 +- .../cluster/health/ClusterIndexHealth.java | 112 ++++++++++++++- .../cluster/health/ClusterShardHealth.java | 39 ++++-- .../cluster/health/ClusterStateHealth.java | 84 ++++++++++++ .../routing/allocation/AllocationService.java | 7 +- .../cluster/RestClusterHealthAction.java | 1 + 11 files changed, 433 insertions(+), 24 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java index 0304e00a49070..4174f7951f45e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java @@ -37,6 +37,8 @@ import org.opensearch.action.support.IndicesOptions; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.health.ClusterHealthStatus; +import org.opensearch.cluster.health.ClusterIndexHealth; +import org.opensearch.cluster.health.ClusterShardHealth; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.service.ClusterService; @@ -49,6 +51,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -439,4 +442,130 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS completionFuture.actionGet(TimeValue.timeValueSeconds(30)); } } + + public void testHealthAtLevelParamEqualsClusterIsHonoured() { + createIndex( + "test1", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + ensureGreen(); + ClusterHealthResponse healthResponse = client().admin() + .cluster() + .prepareHealth() + .setHonourClusterHealthLevel(true) + .execute() + .actionGet(); + assertEquals(healthResponse.getStatus(), ClusterHealthStatus.GREEN); + assertTrue(healthResponse.getIndices().isEmpty()); + assertEquals(1, healthResponse.getActiveShards()); + assertEquals(1, healthResponse.getActivePrimaryShards()); + assertEquals(0, healthResponse.getUnassignedShards()); + assertEquals(0, healthResponse.getInitializingShards()); + assertEquals(0, healthResponse.getRelocatingShards()); + assertEquals(0, healthResponse.getDelayedUnassignedShards()); + } + + public void testHealthAtLevelParamEqualIndicesIsHonoured() { + createIndex( + "test1", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + ensureGreen(); + ClusterHealthResponse healthResponse = client().admin() + .cluster() + .prepareHealth() + .setLevel("indices") + .setHonourClusterHealthLevel(true) + .execute() + .actionGet(); + assertEquals(healthResponse.getStatus(), ClusterHealthStatus.GREEN); + + assertEquals(1, healthResponse.getActiveShards()); + assertEquals(1, healthResponse.getActivePrimaryShards()); + assertEquals(0, healthResponse.getUnassignedShards()); + assertEquals(0, healthResponse.getInitializingShards()); + assertEquals(0, healthResponse.getRelocatingShards()); + assertEquals(0, healthResponse.getDelayedUnassignedShards()); + + Map indices = healthResponse.getIndices(); + assertFalse(indices.isEmpty()); + assertEquals(1, indices.size()); + for (Map.Entry indicesHealth : indices.entrySet()) { + String indexName = indicesHealth.getKey(); + assertEquals("test1", indexName); + ClusterIndexHealth indicesHealthValue = indicesHealth.getValue(); + assertEquals(1, indicesHealthValue.getActiveShards()); + assertEquals(1, indicesHealthValue.getActivePrimaryShards()); + assertEquals(0, indicesHealthValue.getInitializingShards()); + assertEquals(0, indicesHealthValue.getUnassignedShards()); + assertEquals(0, indicesHealthValue.getDelayedUnassignedShards()); + assertEquals(0, indicesHealthValue.getRelocatingShards()); + assertEquals(ClusterHealthStatus.GREEN, indicesHealthValue.getStatus()); + assertTrue(indicesHealthValue.getShards().isEmpty()); + } + } + + public void testHealthAtLevelParamEqualShardsIsHonoured() { + createIndex( + "test1", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + ensureGreen(TimeValue.timeValueSeconds(120), "test1"); + createIndex( + "test2", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + ensureGreen(TimeValue.timeValueSeconds(120)); + client().admin() + .indices() + .prepareUpdateSettings() + .setIndices("test2") + .setSettings(Settings.builder().put("index.number_of_replicas", 2).build()) + .execute() + .actionGet(); + ClusterHealthResponse healthResponse = client().admin() + .cluster() + .prepareHealth() + .setLevel("shards") + .setHonourClusterHealthLevel(true) + .execute() + .actionGet(); + assertEquals(healthResponse.getStatus(), ClusterHealthStatus.YELLOW); + + assertEquals(4, healthResponse.getActiveShards()); + assertEquals(2, healthResponse.getActivePrimaryShards()); + assertEquals(1, healthResponse.getUnassignedShards()); + assertEquals(0, healthResponse.getInitializingShards()); + assertEquals(0, healthResponse.getRelocatingShards()); + assertEquals(0, healthResponse.getDelayedUnassignedShards()); + + Map indices = healthResponse.getIndices(); + assertFalse(indices.isEmpty()); + assertEquals(2, indices.size()); + for (Map.Entry indicesHealth : indices.entrySet()) { + String indexName = indicesHealth.getKey(); + boolean indexHasMoreReplicas = indexName.equals("test2"); + ClusterIndexHealth indicesHealthValue = indicesHealth.getValue(); + assertEquals(2, indicesHealthValue.getActiveShards()); + assertEquals(1, indicesHealthValue.getActivePrimaryShards()); + assertEquals(0, indicesHealthValue.getInitializingShards()); + assertEquals(indexHasMoreReplicas ? 1 : 0, indicesHealthValue.getUnassignedShards()); + assertEquals(0, indicesHealthValue.getDelayedUnassignedShards()); + assertEquals(0, indicesHealthValue.getRelocatingShards()); + assertEquals(indexHasMoreReplicas ? ClusterHealthStatus.YELLOW : ClusterHealthStatus.GREEN, indicesHealthValue.getStatus()); + Map shards = indicesHealthValue.getShards(); + assertFalse(shards.isEmpty()); + assertEquals(1, shards.size()); + for (Map.Entry shardHealth : shards.entrySet()) { + ClusterShardHealth clusterShardHealth = shardHealth.getValue(); + assertEquals(2, clusterShardHealth.getActiveShards()); + assertEquals(indexHasMoreReplicas ? 1 : 0, clusterShardHealth.getUnassignedShards()); + assertEquals(0, clusterShardHealth.getDelayedUnassignedShards()); + assertEquals(0, clusterShardHealth.getRelocatingShards()); + assertEquals(0, clusterShardHealth.getInitializingShards()); + assertTrue(clusterShardHealth.isPrimaryActive()); + assertEquals(indexHasMoreReplicas ? ClusterHealthStatus.YELLOW : ClusterHealthStatus.GREEN, clusterShardHealth.getStatus()); + } + } + } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java index ec8b01d853da6..2547a97449855 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java @@ -76,6 +76,17 @@ public class ClusterHealthRequest extends ClusterManagerNodeReadRequest 0) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java index a9a3756755265..2e149bc577c29 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java @@ -171,4 +171,9 @@ public final ClusterHealthRequestBuilder setEnsureNodeWeighedIn(boolean ensureNo request.ensureNodeWeighedIn(ensureNodeCommissioned); return this; } + + public ClusterHealthRequestBuilder setHonourClusterHealthLevel(boolean honourClusterHealthLevel) { + request.setHonourClusterHealthLevel(honourClusterHealthLevel); + return this; + } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java index 1a27f161343e8..3fd20d456b1c6 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java @@ -237,6 +237,28 @@ public ClusterHealthResponse( this.clusterHealthStatus = clusterStateHealth.getStatus(); } + public ClusterHealthResponse( + String clusterName, + String[] concreteIndices, + ClusterHealthRequest clusterHealthRequest, + ClusterState clusterState, + int numberOfPendingTasks, + int numberOfInFlightFetch, + TimeValue taskMaxWaitingTime + ) { + this.clusterName = clusterName; + this.numberOfPendingTasks = numberOfPendingTasks; + this.numberOfInFlightFetch = numberOfInFlightFetch; + this.taskMaxWaitingTime = taskMaxWaitingTime; + if (clusterHealthRequest.isHonourClusterHealthLevel()) { + this.clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices, clusterHealthRequest.level()); + } else { + this.clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices); + } + this.clusterHealthStatus = clusterStateHealth.getStatus(); + this.delayedUnassignedShards = clusterStateHealth.getDelayedUnassignedShards(); + } + // Awareness Attribute health public ClusterHealthResponse( String clusterName, @@ -261,6 +283,29 @@ public ClusterHealthResponse( this.clusterAwarenessHealth = new ClusterAwarenessHealth(clusterState, clusterSettings, awarenessAttributeName); } + public ClusterHealthResponse( + String clusterName, + ClusterHealthRequest clusterHealthRequest, + ClusterState clusterState, + ClusterSettings clusterSettings, + String[] concreteIndices, + String awarenessAttributeName, + int numberOfPendingTasks, + int numberOfInFlightFetch, + TimeValue taskMaxWaitingTime + ) { + this( + clusterName, + concreteIndices, + clusterHealthRequest, + clusterState, + numberOfPendingTasks, + numberOfInFlightFetch, + taskMaxWaitingTime + ); + this.clusterAwarenessHealth = new ClusterAwarenessHealth(clusterState, clusterSettings, awarenessAttributeName); + } + /** * For XContent Parser and serialization tests */ diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java index f69f462372888..b4c6f840edc72 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java @@ -52,7 +52,6 @@ import org.opensearch.cluster.metadata.ProcessClusterEventTimeoutException; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.NodeWeighedAwayException; -import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.routing.WeightedRoutingUtils; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; @@ -496,13 +495,13 @@ private ClusterHealthResponse clusterHealth( concreteIndices = clusterState.getMetadata().getConcreteAllIndices(); return new ClusterHealthResponse( clusterState.getClusterName().value(), + request, clusterState, clusterService.getClusterSettings(), concreteIndices, awarenessAttribute, numberOfPendingTasks, numberOfInFlightFetch, - UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue ); } @@ -514,10 +513,10 @@ private ClusterHealthResponse clusterHealth( ClusterHealthResponse response = new ClusterHealthResponse( clusterState.getClusterName().value(), Strings.EMPTY_ARRAY, + request, clusterState, numberOfPendingTasks, numberOfInFlightFetch, - UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue ); response.setStatus(ClusterHealthStatus.RED); @@ -527,10 +526,10 @@ private ClusterHealthResponse clusterHealth( return new ClusterHealthResponse( clusterState.getClusterName().value(), concreteIndices, + request, clusterState, numberOfPendingTasks, numberOfInFlightFetch, - UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index be7d41a7ba75e..a49ca2035783c 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -34,6 +34,7 @@ import org.apache.lucene.store.AlreadyClosedException; import org.opensearch.action.FailedNodeException; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.node.info.NodeInfo; import org.opensearch.action.admin.cluster.node.stats.NodeStats; import org.opensearch.action.admin.indices.stats.CommonStats; @@ -209,7 +210,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq ClusterHealthStatus clusterStatus = null; if (clusterService.state().nodes().isLocalNodeElectedClusterManager()) { - clusterStatus = new ClusterStateHealth(clusterService.state()).getStatus(); + clusterStatus = new ClusterStateHealth(clusterService.state(), ClusterHealthRequest.Level.CLUSTER).getStatus(); } return new ClusterStatsNodeResponse( diff --git a/server/src/main/java/org/opensearch/cluster/health/ClusterIndexHealth.java b/server/src/main/java/org/opensearch/cluster/health/ClusterIndexHealth.java index 19c64965e6941..f2ff208a91b56 100644 --- a/server/src/main/java/org/opensearch/cluster/health/ClusterIndexHealth.java +++ b/server/src/main/java/org/opensearch/cluster/health/ClusterIndexHealth.java @@ -32,9 +32,11 @@ package org.opensearch.cluster.health; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.ParseField; import org.opensearch.core.common.io.stream.StreamInput; @@ -141,6 +143,7 @@ public final class ClusterIndexHealth implements Iterable, W private final int relocatingShards; private final int initializingShards; private final int unassignedShards; + private int delayedUnassignedShards; private final int activePrimaryShards; private final ClusterHealthStatus status; private final Map shards; @@ -163,6 +166,7 @@ public ClusterIndexHealth(final IndexMetadata indexMetadata, final IndexRoutingT int computeRelocatingShards = 0; int computeInitializingShards = 0; int computeUnassignedShards = 0; + int computeDelayedUnassignedShards = 0; for (ClusterShardHealth shardHealth : shards.values()) { if (shardHealth.isPrimaryActive()) { computeActivePrimaryShards++; @@ -171,13 +175,9 @@ public ClusterIndexHealth(final IndexMetadata indexMetadata, final IndexRoutingT computeRelocatingShards += shardHealth.getRelocatingShards(); computeInitializingShards += shardHealth.getInitializingShards(); computeUnassignedShards += shardHealth.getUnassignedShards(); + computeDelayedUnassignedShards += shardHealth.getDelayedUnassignedShards(); - if (shardHealth.getStatus() == ClusterHealthStatus.RED) { - computeStatus = ClusterHealthStatus.RED; - } else if (shardHealth.getStatus() == ClusterHealthStatus.YELLOW && computeStatus != ClusterHealthStatus.RED) { - // do not override an existing red - computeStatus = ClusterHealthStatus.YELLOW; - } + computeStatus = computeIndexHealthStatus(shardHealth.getStatus(), computeStatus); } if (shards.isEmpty()) { // might be since none has been created yet (two phase index creation) computeStatus = ClusterHealthStatus.RED; @@ -189,6 +189,102 @@ public ClusterIndexHealth(final IndexMetadata indexMetadata, final IndexRoutingT this.relocatingShards = computeRelocatingShards; this.initializingShards = computeInitializingShards; this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; + } + + public ClusterIndexHealth( + final IndexMetadata indexMetadata, + final IndexRoutingTable indexRoutingTable, + final ClusterHealthRequest.Level healthLevel + ) { + this.index = indexMetadata.getIndex().getName(); + this.numberOfShards = indexMetadata.getNumberOfShards(); + this.numberOfReplicas = indexMetadata.getNumberOfReplicas(); + + shards = new HashMap<>(); + + // update the index status + ClusterHealthStatus computeStatus = ClusterHealthStatus.GREEN; + int computeActivePrimaryShards = 0; + int computeActiveShards = 0; + int computeRelocatingShards = 0; + int computeInitializingShards = 0; + int computeUnassignedShards = 0; + int computeDelayedUnassignedShards = 0; + + boolean isShardLevelHealthInfo = healthLevel == ClusterHealthRequest.Level.SHARDS; + if (!isShardLevelHealthInfo) { + for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { + int activeShardsPerShardId = 0; + + List shardRoutings = indexShardRoutingTable.shards(); + int shardRoutingCountPerShardId = shardRoutings.size(); + for (int index = 0; index < shardRoutingCountPerShardId; index++) { + ShardRouting shardRouting = shardRoutings.get(index); + if (shardRouting.active()) { + computeActiveShards++; + activeShardsPerShardId++; + } else if (shardRouting.relocating()) { + computeRelocatingShards++; + } else if (shardRouting.initializing()) { + computeInitializingShards++; + } else if (shardRouting.unassigned()) { + computeUnassignedShards++; + if (shardRouting.unassignedInfo().isDelayed()) { + computeDelayedUnassignedShards++; + } + } + } + ShardRouting primaryShard = indexShardRoutingTable.primaryShard(); + if (primaryShard.active()) { + computeActivePrimaryShards++; + } + ClusterHealthStatus shardHealth = ClusterShardHealth.computeShardHealth( + primaryShard, + activeShardsPerShardId, + shardRoutingCountPerShardId + ); + computeStatus = computeIndexHealthStatus(shardHealth, computeStatus); + } + } else { + for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { + int shardId = indexShardRoutingTable.shardId().id(); + ClusterShardHealth shardHealth = new ClusterShardHealth(shardId, indexShardRoutingTable); + if (shardHealth.isPrimaryActive()) { + computeActivePrimaryShards++; + } + computeActiveShards += shardHealth.getActiveShards(); + computeRelocatingShards += shardHealth.getRelocatingShards(); + computeInitializingShards += shardHealth.getInitializingShards(); + computeUnassignedShards += shardHealth.getUnassignedShards(); + computeDelayedUnassignedShards += shardHealth.getDelayedUnassignedShards(); + computeStatus = computeIndexHealthStatus(shardHealth.getStatus(), computeStatus); + shards.put(shardId, shardHealth); + } + } + + if (indexRoutingTable.shards().isEmpty()) { // might be since none has been created yet (two phase index creation) + computeStatus = ClusterHealthStatus.RED; + } + + this.status = computeStatus; + this.activePrimaryShards = computeActivePrimaryShards; + this.activeShards = computeActiveShards; + this.relocatingShards = computeRelocatingShards; + this.initializingShards = computeInitializingShards; + this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; + + } + + private static ClusterHealthStatus computeIndexHealthStatus(ClusterHealthStatus shardHealth, ClusterHealthStatus computeStatus) { + if (shardHealth == ClusterHealthStatus.RED) { + computeStatus = ClusterHealthStatus.RED; + } else if (shardHealth == ClusterHealthStatus.YELLOW && computeStatus != ClusterHealthStatus.RED) { + // do not override an existing red + computeStatus = ClusterHealthStatus.YELLOW; + } + return computeStatus; } public ClusterIndexHealth(final StreamInput in) throws IOException { @@ -269,6 +365,10 @@ public int getUnassignedShards() { return unassignedShards; } + public int getDelayedUnassignedShards() { + return delayedUnassignedShards; + } + public ClusterHealthStatus getStatus() { return status; } diff --git a/server/src/main/java/org/opensearch/cluster/health/ClusterShardHealth.java b/server/src/main/java/org/opensearch/cluster/health/ClusterShardHealth.java index 1fe88f65248c2..375ce6dcbd826 100644 --- a/server/src/main/java/org/opensearch/cluster/health/ClusterShardHealth.java +++ b/server/src/main/java/org/opensearch/cluster/health/ClusterShardHealth.java @@ -50,6 +50,7 @@ import org.opensearch.core.xcontent.XContentParser; import java.io.IOException; +import java.util.List; import java.util.Locale; import java.util.Objects; @@ -109,6 +110,7 @@ public final class ClusterShardHealth implements Writeable, ToXContentFragment { private final int relocatingShards; private final int initializingShards; private final int unassignedShards; + private int delayedUnassignedShards; private final boolean primaryActive; public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardRoutingTable) { @@ -117,7 +119,10 @@ public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardR int computeRelocatingShards = 0; int computeInitializingShards = 0; int computeUnassignedShards = 0; - for (ShardRouting shardRouting : shardRoutingTable) { + int computeDelayedUnassignedShards = 0; + List shardRoutings = shardRoutingTable.shards(); + for (int index = 0; index < shardRoutings.size(); index++) { + ShardRouting shardRouting = shardRoutings.get(index); if (shardRouting.active()) { computeActiveShards++; if (shardRouting.relocating()) { @@ -128,24 +133,18 @@ public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardR computeInitializingShards++; } else if (shardRouting.unassigned()) { computeUnassignedShards++; + if (shardRouting.unassignedInfo().isDelayed()) { + computeDelayedUnassignedShards++; + } } } - ClusterHealthStatus computeStatus; final ShardRouting primaryRouting = shardRoutingTable.primaryShard(); - if (primaryRouting.active()) { - if (computeActiveShards == shardRoutingTable.size()) { - computeStatus = ClusterHealthStatus.GREEN; - } else { - computeStatus = ClusterHealthStatus.YELLOW; - } - } else { - computeStatus = getInactivePrimaryHealth(primaryRouting); - } - this.status = computeStatus; + this.status = computeShardHealth(primaryRouting, computeActiveShards, shardRoutingTable.size()); this.activeShards = computeActiveShards; this.relocatingShards = computeRelocatingShards; this.initializingShards = computeInitializingShards; this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; this.primaryActive = primaryRouting.active(); } @@ -208,6 +207,10 @@ public int getUnassignedShards() { return unassignedShards; } + public int getDelayedUnassignedShards() { + return delayedUnassignedShards; + } + @Override public void writeTo(final StreamOutput out) throws IOException { out.writeVInt(shardId); @@ -219,6 +222,18 @@ public void writeTo(final StreamOutput out) throws IOException { out.writeBoolean(primaryActive); } + public static ClusterHealthStatus computeShardHealth(final ShardRouting primaryRouting, final int activeShards, final int totalShards) { + if (primaryRouting.active()) { + if (activeShards == totalShards) { + return ClusterHealthStatus.GREEN; + } else { + return ClusterHealthStatus.YELLOW; + } + } else { + return getInactivePrimaryHealth(primaryRouting); + } + } + /** * Checks if an inactive primary shard should cause the cluster health to go RED. *

diff --git a/server/src/main/java/org/opensearch/cluster/health/ClusterStateHealth.java b/server/src/main/java/org/opensearch/cluster/health/ClusterStateHealth.java index 083159bffdc2b..bcd91f01bcd0b 100644 --- a/server/src/main/java/org/opensearch/cluster/health/ClusterStateHealth.java +++ b/server/src/main/java/org/opensearch/cluster/health/ClusterStateHealth.java @@ -31,6 +31,7 @@ package org.opensearch.cluster.health; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.IndexRoutingTable; @@ -63,6 +64,7 @@ public final class ClusterStateHealth implements Iterable, W private final int activePrimaryShards; private final int initializingShards; private final int unassignedShards; + private int delayedUnassignedShards; private final double activeShardsPercent; private final ClusterHealthStatus status; private final Map indices; @@ -76,6 +78,10 @@ public ClusterStateHealth(final ClusterState clusterState) { this(clusterState, clusterState.metadata().getConcreteAllIndices()); } + public ClusterStateHealth(final ClusterState clusterState, final ClusterHealthRequest.Level clusterHealthLevel) { + this(clusterState, clusterState.metadata().getConcreteAllIndices(), clusterHealthLevel); + } + /** * Creates a new ClusterStateHealth instance considering the current cluster state and the provided index names. * @@ -129,6 +135,80 @@ public ClusterStateHealth(final ClusterState clusterState, final String[] concre this.relocatingShards = computeRelocatingShards; this.initializingShards = computeInitializingShards; this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeUnassignedShards; + + // shortcut on green + if (computeStatus.equals(ClusterHealthStatus.GREEN)) { + this.activeShardsPercent = 100; + } else { + List shardRoutings = clusterState.getRoutingTable().allShards(); + int activeShardCount = 0; + int totalShardCount = 0; + for (ShardRouting shardRouting : shardRoutings) { + if (shardRouting.active()) activeShardCount++; + totalShardCount++; + } + this.activeShardsPercent = (((double) activeShardCount) / totalShardCount) * 100; + } + } + + public ClusterStateHealth( + final ClusterState clusterState, + final String[] concreteIndices, + final ClusterHealthRequest.Level healthLevel + ) { + numberOfNodes = clusterState.nodes().getSize(); + numberOfDataNodes = clusterState.nodes().getDataNodes().size(); + hasDiscoveredClusterManager = clusterState.nodes().getClusterManagerNodeId() != null; + indices = new HashMap<>(); + boolean isIndexLevelHealthInfo = healthLevel == ClusterHealthRequest.Level.INDICES + || healthLevel == ClusterHealthRequest.Level.SHARDS; + + ClusterHealthStatus computeStatus = ClusterHealthStatus.GREEN; + int computeActivePrimaryShards = 0; + int computeActiveShards = 0; + int computeRelocatingShards = 0; + int computeInitializingShards = 0; + int computeUnassignedShards = 0; + int computeDelayedUnassignedShards = 0; + + for (String index : concreteIndices) { + IndexRoutingTable indexRoutingTable = clusterState.routingTable().index(index); + IndexMetadata indexMetadata = clusterState.metadata().index(index); + if (indexRoutingTable == null) { + continue; + } + + ClusterIndexHealth indexHealth = new ClusterIndexHealth(indexMetadata, indexRoutingTable, healthLevel); + computeActivePrimaryShards += indexHealth.getActivePrimaryShards(); + computeActiveShards += indexHealth.getActiveShards(); + computeRelocatingShards += indexHealth.getRelocatingShards(); + computeInitializingShards += indexHealth.getInitializingShards(); + computeUnassignedShards += indexHealth.getUnassignedShards(); + computeDelayedUnassignedShards += indexHealth.getDelayedUnassignedShards(); + if (indexHealth.getStatus() == ClusterHealthStatus.RED) { + computeStatus = ClusterHealthStatus.RED; + } else if (indexHealth.getStatus() == ClusterHealthStatus.YELLOW && computeStatus != ClusterHealthStatus.RED) { + computeStatus = ClusterHealthStatus.YELLOW; + } + + if (isIndexLevelHealthInfo) { + // Store ClusterIndexHealth only when the health is requested at Index or Shard level + indices.put(indexHealth.getIndex(), indexHealth); + } + } + + if (clusterState.blocks().hasGlobalBlockWithStatus(RestStatus.SERVICE_UNAVAILABLE)) { + computeStatus = ClusterHealthStatus.RED; + } + + this.status = computeStatus; + this.activePrimaryShards = computeActivePrimaryShards; + this.activeShards = computeActiveShards; + this.relocatingShards = computeRelocatingShards; + this.initializingShards = computeInitializingShards; + this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; // shortcut on green if (computeStatus.equals(ClusterHealthStatus.GREEN)) { @@ -213,6 +293,10 @@ public int getUnassignedShards() { return unassignedShards; } + public int getDelayedUnassignedShards() { + return delayedUnassignedShards; + } + public int getNumberOfNodes() { return this.numberOfNodes; } diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java index e29a81a2c131f..78f17c9ff212b 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.Version; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.cluster.ClusterInfoService; import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; @@ -196,7 +197,11 @@ public ClusterState applyStartedShards(ClusterState clusterState, List