Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: offline error message when node is shutting down #5797

Merged
merged 10 commits into from
Aug 8, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import {IExecutionEngine} from "../../execution/engine/interface.js";
import {BlockError, BlockErrorCode} from "../errors/index.js";
import {IClock} from "../../util/clock.js";
import {BlockProcessOpts} from "../options.js";
import {ExecutePayloadStatus} from "../../execution/engine/interface.js";
import {ExecutionPayloadStatus} from "../../execution/engine/interface.js";
import {IEth1ForBlockProduction} from "../../eth1/index.js";
import {Metrics} from "../../metrics/metrics.js";
import {ImportBlockOpts} from "./types.js";
Expand Down Expand Up @@ -304,13 +304,13 @@ export async function verifyBlockExecutionPayload(
chain.metrics?.engineNotifyNewPayloadResult.inc({result: execResult.status});

switch (execResult.status) {
case ExecutePayloadStatus.VALID: {
case ExecutionPayloadStatus.VALID: {
const executionStatus: ExecutionStatus.Valid = ExecutionStatus.Valid;
const lvhResponse = {executionStatus, latestValidExecHash: execResult.latestValidHash};
return {executionStatus, lvhResponse, execError: null};
}

case ExecutePayloadStatus.INVALID: {
case ExecutionPayloadStatus.INVALID: {
const executionStatus: ExecutionStatus.Invalid = ExecutionStatus.Invalid;
const lvhResponse = {
executionStatus,
Expand All @@ -326,15 +326,15 @@ export async function verifyBlockExecutionPayload(
}

// Accepted and Syncing have the same treatment, as final validation of block is pending
case ExecutePayloadStatus.ACCEPTED:
case ExecutePayloadStatus.SYNCING: {
case ExecutionPayloadStatus.ACCEPTED:
case ExecutionPayloadStatus.SYNCING: {
// Check if the entire segment was deemed safe or, this block specifically itself if not in
// the safeSlotsToImportOptimistically window of current slot, then we can import else
// we need to throw and not import his block
if (!isOptimisticallySafe && block.message.slot + opts.safeSlotsToImportOptimistically >= currentSlot) {
const execError = new BlockError(block, {
code: BlockErrorCode.EXECUTION_ENGINE_ERROR,
execStatus: ExecutePayloadStatus.UNSAFE_OPTIMISTIC_STATUS,
execStatus: ExecutionPayloadStatus.UNSAFE_OPTIMISTIC_STATUS,
errorMessage: `not safe to import ${execResult.status} payload within ${opts.safeSlotsToImportOptimistically} of currentSlot`,
});
return {executionStatus: null, execError} as VerifyBlockExecutionResponse;
Expand All @@ -360,9 +360,9 @@ export async function verifyBlockExecutionPayload(
// back. But for now, lets assume other mechanisms like unknown parent block of a future
// child block will cause it to replay

case ExecutePayloadStatus.INVALID_BLOCK_HASH:
case ExecutePayloadStatus.ELERROR:
case ExecutePayloadStatus.UNAVAILABLE: {
case ExecutionPayloadStatus.INVALID_BLOCK_HASH:
case ExecutionPayloadStatus.ELERROR:
case ExecutionPayloadStatus.UNAVAILABLE: {
const execError = new BlockError(block, {
code: BlockErrorCode.EXECUTION_ENGINE_ERROR,
execStatus: execResult.status,
Expand Down
6 changes: 3 additions & 3 deletions packages/beacon-node/src/chain/errors/blockError.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {toHexString} from "@chainsafe/ssz";
import {allForks, RootHex, Slot, ValidatorIndex} from "@lodestar/types";
import {LodestarError} from "@lodestar/utils";
import {CachedBeaconStateAllForks} from "@lodestar/state-transition";
import {ExecutePayloadStatus} from "../../execution/engine/interface.js";
import {ExecutionPayloadStatus} from "../../execution/engine/interface.js";
import {QueueErrorCode} from "../../util/queue/index.js";
import {GossipActionError} from "./gossipValidation.js";

Expand Down Expand Up @@ -66,8 +66,8 @@ export enum BlockErrorCode {
}

type ExecutionErrorStatus = Exclude<
ExecutePayloadStatus,
ExecutePayloadStatus.VALID | ExecutePayloadStatus.ACCEPTED | ExecutePayloadStatus.SYNCING
ExecutionPayloadStatus,
ExecutionPayloadStatus.VALID | ExecutionPayloadStatus.ACCEPTED | ExecutionPayloadStatus.SYNCING
>;

export type BlockErrorType =
Expand Down
4 changes: 2 additions & 2 deletions packages/beacon-node/src/execution/engine/disabled.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {ExecutionEngineState, IExecutionEngine, PayloadIdCache} from "./interface.js";
import {ExecutionState, IExecutionEngine, PayloadIdCache} from "./interface.js";

export class ExecutionEngineDisabled implements IExecutionEngine {
readonly payloadIdCache = new PayloadIdCache();
Expand Down Expand Up @@ -27,7 +27,7 @@ export class ExecutionEngineDisabled implements IExecutionEngine {
throw Error("Execution engine disabled");
}

getState(): ExecutionEngineState {
getState(): ExecutionState {
throw Error("Execution engine disabled");
}
}
119 changes: 51 additions & 68 deletions packages/beacon-node/src/execution/engine/http.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
import {Root, RootHex, allForks, Wei} from "@lodestar/types";
import {SLOTS_PER_EPOCH, ForkName, ForkSeq} from "@lodestar/params";
import {Logger} from "@lodestar/logger";
import {isErrorAborted} from "@lodestar/utils";
import {ErrorJsonRpcResponse, HttpRpcError} from "../../eth1/provider/jsonRpcHttpClient.js";
import {IJsonRpcHttpClient, ReqOpts} from "../../eth1/provider/jsonRpcHttpClient.js";
import {
ErrorJsonRpcResponse,
HttpRpcError,
IJsonRpcHttpClient,
ReqOpts,
} from "../../eth1/provider/jsonRpcHttpClient.js";
import {Metrics} from "../../metrics/index.js";
import {JobItemQueue, isQueueErrorAborted} from "../../util/queue/index.js";
import {JobItemQueue} from "../../util/queue/index.js";
import {EPOCHS_PER_BATCH} from "../../sync/constants.js";
import {numToQuantity} from "../../eth1/provider/utils.js";
import {IJson, RpcPayload} from "../../eth1/interface.js";
import {
ExecutePayloadStatus,
ExecutionPayloadStatus,
ExecutePayloadResponse,
IExecutionEngine,
PayloadId,
PayloadAttributes,
BlobsBundle,
VersionedHashes,
ExecutionEngineState,
ExecutionState,
} from "./interface.js";
import {PayloadIdCache} from "./payloadIdCache.js";
import {
Expand Down Expand Up @@ -92,7 +95,7 @@ export class ExecutionEngineHttp implements IExecutionEngine {
// The default state is ONLINE, it will be updated to SYNCING once we receive the first payload
// This assumption is better than the OFFLINE state, since we can't be sure if the EL is offline and being offline may trigger some notifications
// It's safer to to avoid false positives and assume that the EL is syncing until we receive the first payload
private state: ExecutionEngineState = ExecutionEngineState.ONLINE;
private state: ExecutionState = ExecutionState.ONLINE;

readonly payloadIdCache = new PayloadIdCache();
/**
Expand Down Expand Up @@ -128,12 +131,15 @@ export class ExecutionEngineHttp implements IExecutionEngine {
protected async fetchWithRetries<R, P = IJson[]>(payload: RpcPayload<P>, opts?: ReqOpts): Promise<R> {
try {
const res = await this.rpc.fetchWithRetries<R, P>(payload, opts);
this.updateEngineState(ExecutionEngineState.ONLINE);
this.updateEngineState(ExecutionState.ONLINE);
return res;
} catch (err) {
if (!isErrorAborted(err)) {
this.updateEngineState(getExecutionEngineState({payloadError: err}));
}
this.updateEngineState(getExecutionEngineState({payloadError: err, oldState: this.state}));

/*
* TODO: For some error cases as abort, we may not want to escalate the error to the caller
* But for now the higher level code handles such cases so we can just rethrow the error
*/
throw err;
}
}
Expand Down Expand Up @@ -207,39 +213,34 @@ export class ExecutionEngineHttp implements IExecutionEngine {

const {status, latestValidHash, validationError} = await (
this.rpcFetchQueue.push(engineRequest) as Promise<EngineApiRpcReturnTypes[typeof method]>
)
// If there are errors by EL like connection refused, internal error, they need to be
// treated separate from being INVALID. For now, just pass the error upstream.
.catch((e: Error): EngineApiRpcReturnTypes[typeof method] => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this error handling is required upstream in processBlocks (as well as to not downgrade the peer in case of UNAVAILABLE)

if (!isErrorAborted(e) && !isQueueErrorAborted(e)) {
this.updateEngineState(getExecutionEngineState({payloadError: e}));
}
if (e instanceof HttpRpcError || e instanceof ErrorJsonRpcResponse) {
return {status: ExecutePayloadStatus.ELERROR, latestValidHash: null, validationError: e.message};
} else {
return {status: ExecutePayloadStatus.UNAVAILABLE, latestValidHash: null, validationError: e.message};
}
});
this.updateEngineState(getExecutionEngineState({payloadStatus: status}));
).catch((e: Error) => {
if (e instanceof HttpRpcError || e instanceof ErrorJsonRpcResponse) {
return {status: ExecutionPayloadStatus.ELERROR, latestValidHash: null, validationError: e.message};
} else {
return {status: ExecutionPayloadStatus.UNAVAILABLE, latestValidHash: null, validationError: e.message};
}
});

this.updateEngineState(getExecutionEngineState({payloadStatus: status, oldState: this.state}));

switch (status) {
case ExecutePayloadStatus.VALID:
case ExecutionPayloadStatus.VALID:
return {status, latestValidHash: latestValidHash ?? "0x0", validationError: null};

case ExecutePayloadStatus.INVALID:
case ExecutionPayloadStatus.INVALID:
// As per latest specs if latestValidHash can be null and it would mean only
// invalidate this block
return {status, latestValidHash, validationError};

case ExecutePayloadStatus.SYNCING:
case ExecutePayloadStatus.ACCEPTED:
case ExecutionPayloadStatus.SYNCING:
case ExecutionPayloadStatus.ACCEPTED:
return {status, latestValidHash: null, validationError: null};

case ExecutePayloadStatus.INVALID_BLOCK_HASH:
case ExecutionPayloadStatus.INVALID_BLOCK_HASH:
return {status, latestValidHash: null, validationError: validationError ?? "Malformed block"};

case ExecutePayloadStatus.UNAVAILABLE:
case ExecutePayloadStatus.ELERROR:
case ExecutionPayloadStatus.UNAVAILABLE:
case ExecutionPayloadStatus.ELERROR:
return {
status,
latestValidHash: null,
Expand All @@ -248,7 +249,7 @@ export class ExecutionEngineHttp implements IExecutionEngine {

default:
return {
status: ExecutePayloadStatus.ELERROR,
status: ExecutionPayloadStatus.ELERROR,
latestValidHash: null,
validationError: `Invalid EL status on executePayload: ${status}`,
};
Expand Down Expand Up @@ -312,25 +313,15 @@ export class ExecutionEngineHttp implements IExecutionEngine {
methodOpts: fcUReqOpts,
}) as Promise<EngineApiRpcReturnTypes[typeof method]>;

const response = await request
// If there are errors by EL like connection refused, internal error, they need to be
// treated separate from being INVALID. For now, just pass the error upstream.
.catch((e: Error): EngineApiRpcReturnTypes[typeof method] => {
if (!isErrorAborted(e) && !isQueueErrorAborted(e)) {
this.updateEngineState(getExecutionEngineState({payloadError: e}));
}
throw e;
});

const {
payloadStatus: {status, latestValidHash: _latestValidHash, validationError},
payloadId,
} = response;
} = await request;

this.updateEngineState(getExecutionEngineState({payloadStatus: status}));
this.updateEngineState(getExecutionEngineState({payloadStatus: status, oldState: this.state}));

switch (status) {
case ExecutePayloadStatus.VALID:
case ExecutionPayloadStatus.VALID:
// if payloadAttributes are provided, a valid payloadId is expected
if (payloadAttributesRpc) {
if (!payloadId || payloadId === "0x") {
Expand All @@ -342,15 +333,15 @@ export class ExecutionEngineHttp implements IExecutionEngine {
}
return payloadId !== "0x" ? payloadId : null;

case ExecutePayloadStatus.SYNCING:
case ExecutionPayloadStatus.SYNCING:
// Throw error on syncing if requested to produce a block, else silently ignore
if (payloadAttributes) {
throw Error("Execution Layer Syncing");
} else {
return null;
}

case ExecutePayloadStatus.INVALID:
case ExecutionPayloadStatus.INVALID:
throw Error(
`Invalid ${payloadAttributes ? "prepare payload" : "forkchoice request"}, validationError=${
validationError ?? ""
Expand Down Expand Up @@ -421,38 +412,30 @@ export class ExecutionEngineHttp implements IExecutionEngine {
return response.map(deserializeExecutionPayloadBody);
}

getState(): ExecutionEngineState {
getState(): ExecutionState {
return this.state;
}

private updateEngineState(newState: ExecutionEngineState): void {
private updateEngineState(newState: ExecutionState): void {
const oldState = this.state;

if (oldState === newState) return;

// The ONLINE is initial state and can reached from offline or auth failed error
if (
newState === ExecutionEngineState.ONLINE &&
!(oldState === ExecutionEngineState.OFFLINE || oldState === ExecutionEngineState.AUTH_FAILED)
) {
return;
}

switch (newState) {
case ExecutionEngineState.ONLINE:
this.logger.info("Execution client became online");
case ExecutionState.ONLINE:
this.logger.info("Execution client became online", {oldState, newState});
break;
case ExecutionEngineState.OFFLINE:
this.logger.error("Execution client went offline");
case ExecutionState.OFFLINE:
this.logger.error("Execution client went offline", {oldState, newState});
break;
case ExecutionEngineState.SYNCED:
this.logger.info("Execution client is synced");
case ExecutionState.SYNCED:
this.logger.info("Execution client is synced", {oldState, newState});
break;
case ExecutionEngineState.SYNCING:
this.logger.warn("Execution client is syncing");
case ExecutionState.SYNCING:
this.logger.warn("Execution client is syncing", {oldState, newState});
break;
case ExecutionEngineState.AUTH_FAILED:
this.logger.error("Execution client authentication failed");
case ExecutionState.AUTH_FAILED:
this.logger.error("Execution client authentication failed", {oldState, newState});
break;
}

Expand Down
27 changes: 17 additions & 10 deletions packages/beacon-node/src/execution/engine/interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {ExecutionPayloadBody} from "./types.js";

export {PayloadIdCache, PayloadId, WithdrawalV1};

export enum ExecutePayloadStatus {
export enum ExecutionPayloadStatus {
/** given payload is valid */
VALID = "VALID",
/** given payload is invalid */
Expand All @@ -30,7 +30,7 @@ export enum ExecutePayloadStatus {
UNSAFE_OPTIMISTIC_STATUS = "UNSAFE_OPTIMISTIC_STATUS",
}

export enum ExecutionEngineState {
export enum ExecutionState {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i am thinking if ExecutionState might cause confusion over reference to actual execution state, in that case original ExecutionEngineState is preferable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I use ExecutionEngineState, then would prefer to use the same vocabulary/wording to other places for variables and functions. That will lead to almost similar to what we had earlier. Will it be ok?

ONLINE = "ONLINE",
OFFLINE = "OFFLINE",
SYNCING = "SYNCING",
Expand All @@ -39,19 +39,26 @@ export enum ExecutionEngineState {
}

export type ExecutePayloadResponse =
| {status: ExecutePayloadStatus.SYNCING | ExecutePayloadStatus.ACCEPTED; latestValidHash: null; validationError: null}
| {status: ExecutePayloadStatus.VALID; latestValidHash: RootHex; validationError: null}
| {status: ExecutePayloadStatus.INVALID; latestValidHash: RootHex | null; validationError: string | null}
| {
status: ExecutePayloadStatus.INVALID_BLOCK_HASH | ExecutePayloadStatus.ELERROR | ExecutePayloadStatus.UNAVAILABLE;
status: ExecutionPayloadStatus.SYNCING | ExecutionPayloadStatus.ACCEPTED;
latestValidHash: null;
validationError: null;
}
| {status: ExecutionPayloadStatus.VALID; latestValidHash: RootHex; validationError: null}
| {status: ExecutionPayloadStatus.INVALID; latestValidHash: RootHex | null; validationError: string | null}
| {
status:
| ExecutionPayloadStatus.INVALID_BLOCK_HASH
| ExecutionPayloadStatus.ELERROR
| ExecutionPayloadStatus.UNAVAILABLE;
latestValidHash: null;
validationError: string;
};

export type ForkChoiceUpdateStatus =
| ExecutePayloadStatus.VALID
| ExecutePayloadStatus.INVALID
| ExecutePayloadStatus.SYNCING;
| ExecutionPayloadStatus.VALID
| ExecutionPayloadStatus.INVALID
| ExecutionPayloadStatus.SYNCING;

export type PayloadAttributes = {
timestamp: number;
Expand Down Expand Up @@ -141,5 +148,5 @@ export interface IExecutionEngine {

getPayloadBodiesByRange(start: number, count: number): Promise<(ExecutionPayloadBody | null)[]>;

getState(): ExecutionEngineState;
getState(): ExecutionState;
}
Loading
Loading