Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] Backport some fixes and compatibility commits from master to ozone-1.4 #7218

Merged
merged 16 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
2711d03
HDDS-11137. Removed locks from SnapshotPurge and SnapshotSetProperty …
hemantk-12 Aug 15, 2024
1906009
HDDS-9198. Maintain local cache in OMSnapshotPurgeRequest to get upda…
hemantk-12 Aug 26, 2024
23f8e0a
HDDS-11152. OMDoubleBuffer error when handling snapshot's background …
hemantk-12 Aug 28, 2024
c707b57
HDDS-11209. Avoid insufficient EC pipelines in the container pipeline…
whbing Aug 21, 2024
45b71c8
HDDS-9819. Recon - Potential memory overflow in Container Health Task…
devmadhuu Jan 14, 2024
2dd0c3a
HDDS-10293. IllegalArgumentException: containerSize Negative (#6178)
ArafatKhan2198 Feb 22, 2024
bbc2c46
HDDS-11309. Increase CONTAINER_STATE Column Length in UNHEALTHY_CONTA…
ArafatKhan2198 Aug 26, 2024
5cb839c
HDDS-10761. Add raft close threshold config to OM RaftProperties (#6594)
whbing Apr 28, 2024
c1c3983
HDDS-10773. Simplify OM RaftProperties formatting (#6605)
whbing Apr 29, 2024
d142f02
HDDS-11320. Update OM, SCM, Datanode conf for RATIS-2135. (#7080)
szetszwo Aug 16, 2024
3bc0128
HDDS-11331. Fix Datanode unable to report for a long time (#7090)
jianghuazhu Aug 21, 2024
63617f4
HDDS-10612. Add Robot test to verify Container Balancer for RATIS con…
afilpp Apr 2, 2024
2d30bba
HDDS-9889. Refactor tests related to dynamical adaptation for datanod…
Montura May 1, 2024
efcf80b
HDDS-11375. DN startup fails due to illegal configuration of raft.grp…
jojochuang Aug 28, 2024
4470ad3
HDDS-10985. EC Reconstruction failed because the size of currentChunk…
slfan1989 Sep 11, 2024
0b22d8f
HDDS-8900. Mark TestSecretKeysApi#testSecretKeyApiSuccess as flaky
adoroszlai Jul 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,13 @@

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
Expand Down Expand Up @@ -141,8 +145,34 @@ ContainerCommandResponseProto> executePutBlock(boolean close,
}

if (checksumBlockData != null) {
List<ChunkInfo> currentChunks = getContainerBlockData().getChunksList();

// For the same BlockGroupLength, we need to find the larger value of Block DataSize.
// This is because we do not send empty chunks to the DataNode, so the larger value is more accurate.
Map<Long, Optional<BlockData>> maxDataSizeByGroup = Arrays.stream(blockData)
.filter(Objects::nonNull)
.collect(Collectors.groupingBy(BlockData::getBlockGroupLength,
Collectors.maxBy(Comparator.comparingLong(BlockData::getSize))));
BlockData maxBlockData = maxDataSizeByGroup.get(blockGroupLength).get();

// When calculating the checksum size,
// We need to consider both blockGroupLength and the actual size of blockData.
//
// We use the smaller value to determine the size of the ChunkList.
//
// 1. In most cases, blockGroupLength is equal to the size of blockData.
// 2. Occasionally, blockData is not fully filled; if a chunk is empty,
// it is not sent to the DN, resulting in blockData size being smaller than blockGroupLength.
// 3. In cases with 'dirty data',
// if an error occurs when writing to the EC-Stripe (e.g., DN reports Container Closed),
// and the length confirmed with OM is smaller, blockGroupLength may be smaller than blockData size.
long blockDataSize = Math.min(maxBlockData.getSize(), blockGroupLength);
int chunkSize = (int) Math.ceil(((double) blockDataSize / repConfig.getEcChunkSize()));
List<ChunkInfo> checksumBlockDataChunks = checksumBlockData.getChunks();
if (chunkSize > 0) {
checksumBlockDataChunks = checksumBlockData.getChunks().subList(0, chunkSize);
}

List<ChunkInfo> currentChunks = getContainerBlockData().getChunksList();

Preconditions.checkArgument(
currentChunks.size() == checksumBlockDataChunks.size(),
Expand Down Expand Up @@ -268,7 +298,7 @@ ContainerCommandResponseProto> executePutBlock(boolean close,
throw ce;
});
} catch (IOException | ExecutionException e) {
throw new IOException(EXCEPTION_MSG + e.toString(), e);
throw new IOException(EXCEPTION_MSG + e, e);
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
handleInterruptedException(ex, false);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.conf;

import org.apache.ratis.conf.RaftProperties;
import org.apache.ratis.grpc.GrpcConfigKeys;
import org.apache.ratis.server.RaftServerConfigKeys;
import org.apache.ratis.util.Preconditions;
import org.apache.ratis.util.SizeInBytes;

/**
* Utilities for Ratis configurations.
*/
public class RatisConfUtils {
/** For {@link GrpcConfigKeys}. */
public static class Grpc {
/** For setting {@link GrpcConfigKeys#setMessageSizeMax(RaftProperties, SizeInBytes)}. */
public static void setMessageSizeMax(RaftProperties properties, int max) {
Preconditions.assertTrue(max > 0, () -> "max = " + max + " <= 0");

final long logAppenderBufferByteLimit = RaftServerConfigKeys.Log.Appender.bufferByteLimit(properties).getSize();
Preconditions.assertTrue(max >= logAppenderBufferByteLimit,
() -> "max = " + max + " < logAppenderBufferByteLimit = " + logAppenderBufferByteLimit);

// Need an 1MB gap; see RATIS-2135
GrpcConfigKeys.setMessageSizeMax(properties, SizeInBytes.valueOf(max + SizeInBytes.ONE_MB.getSize()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.hadoop.hdds.utils.db.Codec;
import org.apache.hadoop.hdds.utils.db.DelegatedCodec;
import org.apache.hadoop.hdds.utils.db.Proto3Codec;
import org.apache.hadoop.ozone.OzoneConsts;

import java.io.IOException;
import java.util.Collections;
Expand Down Expand Up @@ -296,4 +297,14 @@ public void appendTo(StringBuilder sb) {
sb.append(", size=").append(size);
sb.append("]");
}

public long getBlockGroupLength() {
String lenStr = getMetadata()
.get(OzoneConsts.BLOCK_GROUP_LEN_KEY_IN_PUT_BLOCK);
// If we don't have the length, then it indicates a problem with the stripe.
// All replica should carry the length, so if it is not there, we return 0,
// which will cause us to set the length of the block to zero and not
// attempt to reconstruct it.
return (lenStr == null) ? 0 : Long.parseLong(lenStr);
}
}
11 changes: 10 additions & 1 deletion hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2097,7 +2097,7 @@
</property>

<property>
<name>ozone.om.ratis.server.leaderelection.pre-vote </name>
<name>ozone.om.ratis.server.leaderelection.pre-vote</name>
<value>true</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>Enable/disable OM HA leader election pre-vote phase.
Expand All @@ -2114,6 +2114,15 @@
</description>
</property>

<property>
<name>ozone.om.ratis.server.close.threshold</name>
<value>60s</value>
<tag>OZONE, OM, RATIS</tag>
<description>
Raft Server will close if JVM pause longer than the threshold.
</description>
</property>

<property>
<name>ozone.om.ratis.snapshot.dir</name>
<value/>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.conf;

import org.apache.ratis.conf.RaftProperties;
import org.apache.ratis.grpc.GrpcConfigKeys;
import org.apache.ratis.server.RaftServerConfigKeys;
import org.apache.ratis.util.SizeInBytes;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Test {@link RatisConfUtils}.
*/
public class TestRatisConfUtils {
private static final Logger LOG = LoggerFactory.getLogger(TestRatisConfUtils.class);

@Test
void testGrpcSetMessageSizeMax() {
final RaftProperties properties = new RaftProperties();

final int logAppenderBufferByteLimit = 1000;

// setMessageSizeMax without setBufferByteLimit
Assertions.assertThrows(IllegalStateException.class,
() -> RatisConfUtils.Grpc.setMessageSizeMax(properties, logAppenderBufferByteLimit));

RaftServerConfigKeys.Log.Appender.setBufferByteLimit(properties, SizeInBytes.valueOf(logAppenderBufferByteLimit));

// setMessageSizeMax with a value smaller than logAppenderBufferByteLimit
Assertions.assertThrows(IllegalStateException.class,
() -> RatisConfUtils.Grpc.setMessageSizeMax(properties, logAppenderBufferByteLimit - 1));

// setMessageSizeMax with the correct logAppenderBufferByteLimit
RatisConfUtils.Grpc.setMessageSizeMax(properties, logAppenderBufferByteLimit);

final SizeInBytes max = GrpcConfigKeys.messageSizeMax(properties, LOG::info);
Assertions.assertEquals(SizeInBytes.ONE_MB.getSize(), max.getSize() - logAppenderBufferByteLimit);
}
}
Loading
Loading