Skip to content

Commit

Permalink
Fix ReactiveStorageIT#testScaleDuringSplitOrClone
Browse files Browse the repository at this point in the history
Sometimes the autoscaling decider returns an empty response when
the service does not have enough information to provide an autoscaling
decision, i.e. when a new node joins it tries to fetch the new node
memory info and this might take a while. This commits adds a busy
assertion to ensure that a valid autoscaling capacity is provided
eventually.

Closes elastic#88478
  • Loading branch information
fcofdez committed Jul 19, 2022
1 parent 98b789c commit ce447dc
Showing 1 changed file with 17 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import static org.elasticsearch.index.store.Store.INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.notNullValue;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
public class ReactiveStorageIT extends AutoscalingStorageIntegTestCase {
Expand Down Expand Up @@ -356,7 +358,6 @@ public void testScaleWhileShrinking() throws Exception {
ensureGreen();
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/88478")
public void testScaleDuringSplitOrClone() throws Exception {
internalCluster().startMasterOnlyNode();
final String dataNode1Name = internalCluster().startDataOnlyNode();
Expand Down Expand Up @@ -392,15 +393,20 @@ public void testScaleDuringSplitOrClone() throws Exception {
setTotalSpace(dataNode1Name, enoughSpace);
setTotalSpace(dataNode2Name, enoughSpace);

// validate initial state looks good
GetAutoscalingCapacityAction.Response response = capacity();
assertThat(response.results().keySet(), equalTo(Set.of(policyName)));
assertThat(response.results().get(policyName).currentCapacity().total().storage().getBytes(), equalTo(enoughSpace * 2));
assertThat(response.results().get(policyName).requiredCapacity().total().storage().getBytes(), equalTo(enoughSpace * 2));
assertThat(
response.results().get(policyName).requiredCapacity().node().storage().getBytes(),
equalTo(used + LOW_WATERMARK_BYTES + ReactiveStorageDeciderService.NODE_DISK_OVERHEAD)
);
// It might take a while until the autoscaling polls the node information of dataNode2 and
// provides a complete autoscaling capacity response
assertBusy(() -> {
// validate initial state looks good
GetAutoscalingCapacityAction.Response response = capacity();
assertThat(response.results().keySet(), equalTo(Set.of(policyName)));
assertThat(response.results().get(policyName).currentCapacity().total().storage().getBytes(), equalTo(enoughSpace * 2));
assertThat(response.results().get(policyName).requiredCapacity(), is(notNullValue()));
assertThat(response.results().get(policyName).requiredCapacity().total().storage().getBytes(), equalTo(enoughSpace * 2));
assertThat(
response.results().get(policyName).requiredCapacity().node().storage().getBytes(),
equalTo(used + LOW_WATERMARK_BYTES + ReactiveStorageDeciderService.NODE_DISK_OVERHEAD)
);
});

assertAcked(
client().admin()
Expand Down Expand Up @@ -430,7 +436,7 @@ public void testScaleDuringSplitOrClone() throws Exception {
// * 2 since worst case is no hard links, see DiskThresholdDecider.getExpectedShardSize.
long requiredSpaceForClone = used * 2 + LOW_WATERMARK_BYTES;

response = capacity();
GetAutoscalingCapacityAction.Response response = capacity();
assertThat(response.results().keySet(), equalTo(Set.of(policyName)));
assertThat(response.results().get(policyName).currentCapacity().total().storage().getBytes(), equalTo(enoughSpace * 2));
// test that even when the shard cannot allocate due to disk space, we do not request a "total" scale up, only a node-level.
Expand Down

0 comments on commit ce447dc

Please sign in to comment.