Skip to content

Commit

Permalink
[PLAT-15545] Simplify the frozen universe message for end user in YBA
Browse files Browse the repository at this point in the history
Summary: Refactored to make code more modular and keep the error message in one place.

Test Plan:
1. Aborted EditUniverse.
2. Ran node action. Verified right error message was printed.
3. Gflags rerun.

Itests should cover more.

{F293989}

Reviewers: vkumar, yshchetinin, sanketh, cwang, nbhatia

Reviewed By: vkumar, cwang

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D38610
  • Loading branch information
nkhogen committed Oct 2, 2024
1 parent 9beb6dc commit 6128137
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ private InstallNodeAgent.Params createInstallParams(
params.setUniverseUUID(universe.getUniverseUUID());
params.nodeAgentInstallDir = installPath;
params.nodeAgentPort = serverPort;
params.reinstall = reinstall;
if (StringUtils.isNotEmpty(nodeDetails.sshUserOverride)) {
params.sshUser = nodeDetails.sshUserOverride;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ protected void validateUniverseState(Universe universe) {
// If PIT restore enabled and previous task was a volume resize, should not run this task
@Override
protected boolean checkSafeToRunOnRestriction(
Universe universe, TaskInfo placementModificationTaskInfo) {
Universe universe, TaskInfo placementModificationTaskInfo, AllowedTasks allowedTasks) {
if (taskParams().scheduleParams.enablePointInTimeRestore
&& placementModificationTaskInfo.getTaskType() == TaskType.EditKubernetesUniverse) {
UniverseDefinitionTaskParams placementTaskParams =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,9 @@ public static AllowedTasks getAllowedTasksOnFailure(TaskInfo placementModificati
}

/**
* Returns the allowed task object when the universe is in a frozen failed state.
* Returns the allowed task object when the universe is in a frozen failed state. This does not
* check universe specific states. Consider using {@link #validateAllowedTasksOnFailure(Universe,
* TaskType)} if universe specific checks are required.
*
* @param lockedPlacementModificationTaskUuid the placement modification task UUID.
* @return the allowed tasks.
Expand All @@ -690,41 +692,46 @@ public static AllowedTasks getAllowedTasksOnFailure(UUID lockedPlacementModifica
return getAllowedTasksOnFailure(optional.get());
}

public AllowedTasks getAllowedTasksOnFailure(Universe universe, TaskType taskType) {
/**
* Validate and get the allowed tasks on a universe. This also checks universe specific states by
* calling {@link #checkSafeToRunOnRestriction(Universe, TaskInfo)}.
*
* @param universe the given universe.
* @param taskType the task type to be checked.
* @return the allowed tasks if validation passes.
*/
public AllowedTasks validateAllowedTasksOnFailure(Universe universe, TaskType taskType) {
Consumer<AllowedTasks> errorHandler =
allowedTasks -> {
log.error(
"Task {} cannot be run because a previously failed task {}({}) has frozen the"
+ " universe",
getUserTaskUUID(),
universe.getUniverseDetails().placementModificationTaskUuid,
allowedTasks.lockedTaskType);
throw new RuntimeException(
String.format(
"Task %s cannot be run because a previous task %s failed on the universe."
+ " Please retry the previous task first to fix the universe.",
taskType, allowedTasks.lockedTaskType));
};
AllowedTasks allowedTasks =
getAllowedTasksOnFailure(universe.getUniverseDetails().placementModificationTaskUuid);
if (allowedTasks.isRestricted() && !allowedTasks.getTaskTypes().contains(taskType)) {
String msg =
String.format(
"Universe %s placement update failed - can't run %s task until"
+ " placement update succeeds",
universe.getUniverseUUID(), taskType.name());
log.error(msg);
throw new RuntimeException(msg);
errorHandler.accept(allowedTasks);
}
if (universe.getUniverseDetails().placementModificationTaskUuid != null) {
Optional<TaskInfo> optPlacementModificationTask =
TaskInfo.maybeGet(universe.getUniverseDetails().placementModificationTaskUuid);
if (optPlacementModificationTask.isPresent()
&& !checkSafeToRunOnRestriction(universe, optPlacementModificationTask.get())) {
String msg =
String.format(
"Universe %s cannot run restricted %s task",
universe.getUniverseUUID(), taskType.name());
log.error(msg);
throw new RuntimeException(msg);
}
TaskInfo.maybeGet(universe.getUniverseDetails().placementModificationTaskUuid)
.ifPresent(
t -> {
if (!checkSafeToRunOnRestriction(universe, t, allowedTasks)) {
errorHandler.accept(allowedTasks);
}
});
}
return allowedTasks;
}

/**
* Validator method which is invoked when a re-run of a task is performed.
*
* @param previousTaskInfo the task info of the previous task for which the re-run is submitted.
*/
protected void validateRerunParams(TaskInfo previousTaskInfo) {}

@Override
public void validateParams(boolean isFirstTry) {
TaskType taskType = getTaskExecutor().getTaskType(getClass());
Expand All @@ -738,13 +745,7 @@ public void validateParams(boolean isFirstTry) {
.ifPresent(
universe -> {
if (isFirstTry) {
UniverseDefinitionTaskParams universeDetails = universe.getUniverseDetails();
AllowedTasks allowedTasks = getAllowedTasksOnFailure(universe, taskType);
if (allowedTasks.isRerun()) {
// Invoke the re-run validator.
TaskInfo.maybeGet(universeDetails.placementModificationTaskUuid)
.ifPresent(taskInfo -> validateRerunParams(taskInfo));
}
validateAllowedTasksOnFailure(universe, taskType);
Consumer<Universe> validator = getAdditionalValidator();
if (validator != null) {
validator.accept(universe);
Expand Down Expand Up @@ -784,7 +785,7 @@ protected void validateUniverseState(Universe universe) {
* task.
*/
protected boolean checkSafeToRunOnRestriction(
Universe universe, TaskInfo placementModificationTaskInfo) {
Universe universe, TaskInfo placementModificationTaskInfo, AllowedTasks allowedTasks) {
return true;
}

Expand Down Expand Up @@ -885,20 +886,7 @@ public void run(Universe universe) {
} else if (universeDetails.placementModificationTaskUuid != null) {
// If we're in the middle of placement modification task (failed and waiting to be
// retried), only allow subset of safe to execute tasks.
AllowedTasks allowedTasks =
getAllowedTasksOnFailure(universeDetails.placementModificationTaskUuid);
boolean isSafeToRun =
!allowedTasks.isRestricted() || allowedTasks.getTaskTypes().contains(owner);
if (!isSafeToRun) {
String msg =
"Universe "
+ universe.getUniverseUUID()
+ " placement update failed - can't run "
+ owner.name()
+ " task until placement update succeeds";
log.error(msg);
throw new RuntimeException(msg);
}
validateAllowedTasksOnFailure(universe, owner);
}
markUniverseUpdateInProgress(owner, universe, getConfig());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,12 @@ public void validateParams(boolean isFirstTry) {
}

@Override
protected void validateRerunParams(TaskInfo previousTaskInfo) {
Universe universe = getUniverse();
protected boolean checkSafeToRunOnRestriction(
Universe universe, TaskInfo previousTaskInfo, AllowedTasks allowedTasks) {
if (!allowedTasks.isRerun()) {
return true;
}
// Validate rerun task parameters against the old parameters from the previous task.
GFlagsUpgradeParams prevTaskParams =
Json.fromJson(previousTaskInfo.getTaskParams(), GFlagsUpgradeParams.class);
// Cluster with GFlags from the previous task params.
Expand Down Expand Up @@ -144,6 +148,7 @@ protected void validateRerunParams(TaskInfo previousTaskInfo) {
"Gflags upgrade rerun must affect all server types and nodes changed by the previously"
+ " failed gflags operation");
}
return true;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ public void run(Universe universe) {

RuntimeException ex = assertThrows(RuntimeException.class, () -> submitTask(params));
assertEquals(
"Universe "
+ defaultUniverse.getUniverseUUID().toString()
+ " cannot run restricted CreateBackupScheduleKubernetes task",
"Task CreateBackupScheduleKubernetes cannot be run because a previous task"
+ " EditKubernetesUniverse failed on the universe. Please retry the previous task first"
+ " to fix the universe.",
ex.getMessage());
}

Expand Down

0 comments on commit 6128137

Please sign in to comment.