Skip to content

Commit

Permalink
[Enhancement] Support recovery FE metadata from meta dir (#51040)
Browse files Browse the repository at this point in the history
## Why I'm doing:
With this patch, it is possible to recover FE metadata if all machines are unavailable but the meta directory still exists.
1. Start a FE node with the backup meta using config `bdbje_reset_election_group = true` in fe.conf. 
2. If the node is started, there will be a frontend in the cluster, and the IP address is the IP address of the new host.
3. Check metadata.
4. If everything is ok, remove the config `bdbje_reset_election_group = true` from fe.conf, and restart the node.
5. Through the above steps, the cluster has been recovered and has only one follower node.

Signed-off-by: gengjun-git <[email protected]>
  • Loading branch information
gengjun-git authored Sep 29, 2024
1 parent 6b1c44c commit 437544d
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 17 deletions.
2 changes: 1 addition & 1 deletion fe/fe-core/src/main/java/com/starrocks/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ public class Config extends ConfigBase {
* (Because most of the follower data has been damaged).
*/
@ConfField
public static String bdbje_reset_election_group = "false";
public static boolean bdbje_reset_election_group = false;

/**
* If the bdb data is corrupted, and you want to start the cluster only with image, set this param to true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ public void readFields(DataInput in) throws IOException {
case OperationType.OP_ADD_FRONTEND_V2:
case OperationType.OP_ADD_FIRST_FRONTEND_V2:
case OperationType.OP_UPDATE_FRONTEND_V2:
case OperationType.OP_REMOVE_FRONTEND_V2: {
case OperationType.OP_REMOVE_FRONTEND_V2:
case OperationType.OP_RESET_FRONTENDS: {
data = GsonUtils.GSON.fromJson(Text.readString(in), Frontend.class);
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ protected void setup() throws JournalException, InterruptedException {

protected void initConfigs(boolean isElectable) throws JournalException {
// Almost never used, just in case the master can not restart
if (Config.bdbje_reset_election_group.equals("true")) {
if (Config.bdbje_reset_election_group) {
if (!isElectable) {
String errMsg = "Current node is not in the electable_nodes list. will exit";
LOG.error(errMsg);
Expand Down Expand Up @@ -339,7 +339,7 @@ protected void ensureHelperInLocal() throws JournalException, InterruptedExcepti
}

// Almost never used, just in case the master can not restart
if (Config.bdbje_reset_election_group.equals("true")) {
if (Config.bdbje_reset_election_group) {
LOG.info("skip check local environment because metadata_failure_recovery = true");
return;
}
Expand Down
9 changes: 9 additions & 0 deletions fe/fe-core/src/main/java/com/starrocks/persist/EditLog.java
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,11 @@ public void loadJournal(GlobalStateMgr globalStateMgr, JournalEntity journal)
globalStateMgr.getNodeMgr().replayUpdateFrontend(fe);
break;
}
case OperationType.OP_RESET_FRONTENDS: {
Frontend fe = (Frontend) journal.getData();
globalStateMgr.getNodeMgr().replayResetFrontends(fe);
break;
}
case OperationType.OP_TIMESTAMP_V2: {
Timestamp stamp = (Timestamp) journal.getData();
globalStateMgr.setSynchronizedTime(stamp.getTimestamp());
Expand Down Expand Up @@ -1391,6 +1396,10 @@ public void logLeaderInfo(LeaderInfo info) {
logJsonObject(OperationType.OP_LEADER_INFO_CHANGE_V2, info);
}

public void logResetFrontends(Frontend frontend) {
logEdit(OperationType.OP_RESET_FRONTENDS, frontend);
}

public void logMetaVersion(MetaVersion metaVersion) {
logEdit(OperationType.OP_META_VERSION_V2, metaVersion);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ public class OperationType {

public static final short OP_ALTER_MATERIALIZED_VIEW_BASE_TABLE_INFOS = 10098;

// manage system node info 10101 ~ 10120
// manage compute node 10201 ~ 10220

@IgnorableOnReplayFailed
public static final short OP_ADD_COMPUTE_NODE = 10201;

Expand Down Expand Up @@ -510,6 +513,9 @@ public class OperationType {

public static final short OP_LEADER_INFO_CHANGE_V2 = 13041;

@IgnorableOnReplayFailed
public static final short OP_RESET_FRONTENDS = 13042;

@IgnorableOnReplayFailed
public static final short OP_ADD_FUNCTION_V2 = 13050;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,10 @@ private void transferToLeader() {
editLog.logAddFirstFrontend(self);
}

if (Config.bdbje_reset_election_group) {
nodeMgr.resetFrontends();
}

// MUST set leader ip before starting checkpoint thread.
// because checkpoint thread need this info to select non-leader FE to push image
nodeMgr.setLeaderInfo();
Expand Down
22 changes: 21 additions & 1 deletion fe/fe-core/src/main/java/com/starrocks/server/NodeMgr.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,13 @@ public NodeMgr() {
this.brokerMgr = new BrokerMgr();
}

// For test
protected NodeMgr(FrontendNodeType role, String nodeName, Pair<String, Integer> selfNode) {
this.role = role;
this.nodeName = nodeName;
this.selfNode = selfNode;
}

public void initialize(String[] args) throws Exception {
getCheckedSelfHostPort();
getHelperNodes(args);
Expand Down Expand Up @@ -664,7 +671,7 @@ public List<Pair<String, Integer>> getHelperNodes() {
* frontend log is deleted because of checkpoint.
*/
public void checkCurrentNodeExist() {
if (Config.bdbje_reset_election_group.equals("true")) {
if (Config.bdbje_reset_election_group) {
return;
}

Expand Down Expand Up @@ -1176,6 +1183,19 @@ public ConcurrentHashMap<String, Frontend> getFrontends() {
return frontends;
}

public void resetFrontends() {
frontends.clear();
Frontend self = new Frontend(role, nodeName, selfNode.first, selfNode.second);
frontends.put(self.getNodeName(), self);

GlobalStateMgr.getCurrentState().getEditLog().logResetFrontends(self);
}

public void replayResetFrontends(Frontend frontend) {
frontends.clear();
frontends.put(frontend.getNodeName(), frontend);
}

public void save(ImageWriter imageWriter) throws IOException, SRMetaBlockException {
SRMetaBlockWriter writer = imageWriter.getBlockWriter(SRMetaBlockID.NODE_MGR, 1);
writer.writeJson(this);
Expand Down
14 changes: 2 additions & 12 deletions fe/fe-core/src/main/java/com/starrocks/system/Frontend.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,13 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.gson.annotations.SerializedName;
import com.starrocks.common.Config;
import com.starrocks.common.io.Text;
import com.starrocks.common.io.Writable;
import com.starrocks.common.io.JsonWriter;
import com.starrocks.ha.BDBHA;
import com.starrocks.ha.FrontendNodeType;
import com.starrocks.persist.gson.GsonUtils;
import com.starrocks.server.GlobalStateMgr;
import com.starrocks.system.HeartbeatResponse.HbStatus;

import java.io.DataOutput;
import java.io.IOException;

public class Frontend implements Writable {
public class Frontend extends JsonWriter {
@SerializedName(value = "r")
private FrontendNodeType role;
@SerializedName(value = "n")
Expand Down Expand Up @@ -210,11 +205,6 @@ public boolean handleHbResponse(FrontendHbResponse hbResponse, boolean isReplay)
return isChanged;
}

@Override
public void write(DataOutput out) throws IOException {
Text.writeString(out, GsonUtils.GSON.toJson(this));
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ public void testRecoverableOperations() {
Assert.assertTrue(OperationType.IGNORABLE_OPERATIONS.contains(OperationType.OP_MODIFY_DICTIONARY_MGR));
Assert.assertTrue(OperationType.IGNORABLE_OPERATIONS.contains(OperationType.OP_REPLICATION_JOB));
Assert.assertTrue(OperationType.IGNORABLE_OPERATIONS.contains(OperationType.OP_DELETE_REPLICATION_JOB));
Assert.assertTrue(OperationType.IGNORABLE_OPERATIONS.contains(OperationType.OP_RESET_FRONTENDS));
}

@Test
Expand Down
27 changes: 27 additions & 0 deletions fe/fe-core/src/test/java/com/starrocks/server/NodeMgrTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,27 @@

package com.starrocks.server;

import com.starrocks.common.Pair;
import com.starrocks.ha.FrontendNodeType;
import com.starrocks.system.Frontend;
import com.starrocks.system.FrontendHbResponse;
import com.starrocks.utframe.UtFrameUtils;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.File;
import java.net.UnknownHostException;
import java.util.List;
import java.util.UUID;

public class NodeMgrTest {

@BeforeClass
public static void setUp() {
UtFrameUtils.setUpForPersistTest();
}

@Test(expected = UnknownHostException.class)
public void testCheckFeExistByIpOrFqdnException() throws UnknownHostException {
NodeMgr nodeMgr = new NodeMgr();
Expand Down Expand Up @@ -71,4 +80,22 @@ public void testRemoveClusterIdAndRoleFile() throws Exception {
nodeMgr.removeClusterIdAndRole();
Assert.assertTrue(nodeMgr.isVersionAndRoleFilesNotExist());
}

@Test
public void testResetFrontends() throws Exception {
FrontendNodeType role = FrontendNodeType.FOLLOWER;
String nodeName = "node1";
Pair<String, Integer> selfNode = Pair.create("192.168.3.5", 9010);
NodeMgr leaderNodeMgr = new NodeMgr(role, nodeName, selfNode);
leaderNodeMgr.resetFrontends();

UtFrameUtils.PseudoJournalReplayer.replayJournalToEnd();

List<Frontend> frontends = GlobalStateMgr.getCurrentState().getNodeMgr().getFrontends(FrontendNodeType.FOLLOWER);
Assert.assertEquals(1, frontends.size());
Assert.assertEquals(role, frontends.get(0).getRole());
Assert.assertEquals(nodeName, frontends.get(0).getNodeName());
Assert.assertEquals(selfNode.first, frontends.get(0).getHost());
Assert.assertEquals((int) selfNode.second, frontends.get(0).getEditLogPort());
}
}

0 comments on commit 437544d

Please sign in to comment.