Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Separation of node health and initialization state from rln_relay #2612

Merged
merged 7 commits into from
Apr 23, 2024
75 changes: 48 additions & 27 deletions apps/wakunode2/wakunode2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ import
../../waku/common/logging,
../../waku/factory/external_config,
../../waku/factory/networks_config,
../../waku/factory/app
../../waku/factory/app,
../../waku/node/health_monitor

logScope:
topics = "wakunode main"
Expand Down Expand Up @@ -88,54 +89,74 @@ when isMainModule:
doInspectRlnDb(conf)
of noCommand:
case conf.clusterId
# cluster-id=0
of 0:
let clusterZeroConf = ClusterConf.ClusterZeroConf()
conf.pubsubTopics = clusterZeroConf.pubsubTopics
# TODO: Write some template to "merge" the configs
# cluster-id=1 (aka The Waku Network)
of 1:
let twnClusterConf = ClusterConf.TheWakuNetworkConf()
if len(conf.shards) != 0:
conf.pubsubTopics = conf.shards.mapIt(twnClusterConf.pubsubTopics[it.uint16])
else:
conf.pubsubTopics = twnClusterConf.pubsubTopics

# Override configuration
conf.maxMessageSize = twnClusterConf.maxMessageSize
conf.clusterId = twnClusterConf.clusterId
conf.rlnRelay = twnClusterConf.rlnRelay
conf.rlnRelayEthContractAddress = twnClusterConf.rlnRelayEthContractAddress
conf.rlnRelayDynamic = twnClusterConf.rlnRelayDynamic
conf.rlnRelayBandwidthThreshold = twnClusterConf.rlnRelayBandwidthThreshold
conf.discv5Discovery = twnClusterConf.discv5Discovery
conf.discv5BootstrapNodes =
conf.discv5BootstrapNodes & twnClusterConf.discv5BootstrapNodes
conf.rlnEpochSizeSec = twnClusterConf.rlnEpochSizeSec
conf.rlnRelayUserMessageLimit = twnClusterConf.rlnRelayUserMessageLimit
# cluster-id=0
of 0:
let clusterZeroConf = ClusterConf.ClusterZeroConf()
conf.pubsubTopics = clusterZeroConf.pubsubTopics
# TODO: Write some template to "merge" the configs
# cluster-id=1 (aka The Waku Network)
of 1:
let twnClusterConf = ClusterConf.TheWakuNetworkConf()
if len(conf.shards) != 0:
conf.pubsubTopics = conf.shards.mapIt(twnClusterConf.pubsubTopics[it.uint16])
else:
discard
conf.pubsubTopics = twnClusterConf.pubsubTopics

# Override configuration
conf.maxMessageSize = twnClusterConf.maxMessageSize
conf.clusterId = twnClusterConf.clusterId
conf.rlnRelay = twnClusterConf.rlnRelay
conf.rlnRelayEthContractAddress = twnClusterConf.rlnRelayEthContractAddress
conf.rlnRelayDynamic = twnClusterConf.rlnRelayDynamic
conf.rlnRelayBandwidthThreshold = twnClusterConf.rlnRelayBandwidthThreshold
conf.discv5Discovery = twnClusterConf.discv5Discovery
conf.discv5BootstrapNodes =
conf.discv5BootstrapNodes & twnClusterConf.discv5BootstrapNodes
conf.rlnEpochSizeSec = twnClusterConf.rlnEpochSizeSec
conf.rlnRelayUserMessageLimit = twnClusterConf.rlnRelayUserMessageLimit
else:
discard

info "Running nwaku node", version = app.git_version
logConfig(conf)

# NOTE: {.threadvar.} is used to make the global variable GC safe for the closure uses it
# It will always be called from main thread anyway.
# Ref: https://nim-lang.org/docs/manual.html#threads-gc-safety
var nodeHealthMonitor {.threadvar.}: WakuNodeHealthMonitor
nodeHealthMonitor = WakuNodeHealthMonitor()
nodeHealthMonitor.setOverallHealth(HealthStatus.INITIALIZING)

let restServerRes = startRestServerEsentials(nodeHealthMonitor, conf)
if restServerRes.isErr():
error "Starting REST server failed.", error = $restServerRes.error()
quit(QuitFailure)

var wakunode2 = App.init(conf).valueOr:
error "App initialization failed", error = error
quit(QuitFailure)

nodeHealthMonitor.setNode(wakunode2.node)

wakunode2.startApp().isOkOr:
error "Starting app failed", error = error
quit(QuitFailure)

if conf.rest and not restServerRes.isErr():
wakunode2.restServer = restServerRes.value

wakunode2.setupMonitoringAndExternalInterfaces().isOkOr:
error "Starting monitoring and external interfaces failed", error = error
quit(QuitFailure)

nodeHealthMonitor.setOverallHealth(HealthStatus.READY)

debug "Setting up shutdown hooks"
## Setup shutdown hooks for this process.
## Stop node gracefully on shutdown.

proc asyncStopper(node: App) {.async: (raises: [Exception]).} =
nodeHealthMonitor.setOverallHealth(HealthStatus.SHUTTING_DOWN)
await node.stop()
quit(QuitSuccess)

Expand Down
55 changes: 55 additions & 0 deletions scripts/chkhealth.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env bash

# optional argument to specgify the ip address
ip_address=$1
plain_text_out=false

# Parse command line arguments
POSITIONAL_ARGS=()

while [[ $# -gt 0 ]]; do
case $1 in
-p|--plain)
plain_text_out=true
shift # past argument
;;
-*|--*)
echo "Unknown option $1"
exit 1
;;
*)
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
done

set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters

# Check if an IP address is provided as an argument
if [[ -n "$1" ]]; then
ip_address="$1"
else
ip_address="localhost:8645"
fi

# check if curl is available
if ! command -v curl &> /dev/null
then
echo "curl could not be found"
exit 1
fi

response=$(curl -s GET http://${ip_address}/health)

if [[ -z "${response}" ]]; then
echo -e "$(date +'%H:%M:%S')\tnode health status is: unknown\n"
exit 1
fi

if ! command -v jq &> /dev/null || [[ "$plain_text_out" = true ]]; then
echo -e "$(date +'%H:%M:%S')\tnode health status is: ${response}\n"
else
echo -e "$(date +'%H:%M:%S')\tnode health status is:\n"
echo "${response}" | jq .
fi
3 changes: 2 additions & 1 deletion tests/all_tests_waku.nim
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ import
./wakunode_rest/test_rest_filter,
./wakunode_rest/test_rest_lightpush,
./wakunode_rest/test_rest_admin,
./wakunode_rest/test_rest_cors
./wakunode_rest/test_rest_cors,
./wakunode_rest/test_rest_health

import ./waku_rln_relay/test_all

Expand Down
25 changes: 17 additions & 8 deletions tests/wakunode_rest/test_rest_health.nim
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import
../../waku/waku_api/rest/health/handlers as health_api,
../../waku/waku_api/rest/health/client as health_api_client,
../../waku/waku_rln_relay,
../../waku/node/health_monitor,
../testlib/common,
../testlib/testutils,
../testlib/wakucore,
Expand All @@ -35,17 +36,20 @@ proc testWakuNode(): WakuNode =

suite "Waku v2 REST API - health":
# TODO: better test for health
xasyncTest "Get node health info - GET /health":
asyncTest "Get node health info - GET /health":
# Given
let node = testWakuNode()
let healthMonitor = WakuNodeHealthMonitor()
await node.start()
await node.mountRelay()

healthMonitor.setOverallHealth(HealthStatus.INITIALIZING)

let restPort = Port(58001)
let restAddress = parseIpAddress("0.0.0.0")
let restServer = WakuRestServerRef.init(restAddress, restPort).tryGet()

installHealthApiHandler(restServer.router, node)
installHealthApiHandler(restServer.router, healthMonitor)
restServer.start()
let client = newRestHttpClient(initTAddress(restAddress, restPort))

Expand All @@ -54,9 +58,10 @@ suite "Waku v2 REST API - health":

# Then
check:
response.status == 503
$response.contentType == $MIMETYPE_TEXT
response.data == "Node is not ready"
response.status == 200
$response.contentType == $MIMETYPE_JSON
response.data ==
HealthReport(nodeHealth: HealthStatus.INITIALIZING, protocolsHealth: @[])

# now kick in rln (currently the only check for health)
await node.mountRlnRelay(
Expand All @@ -67,15 +72,19 @@ suite "Waku v2 REST API - health":
rlnRelayTreePath: genTempPath("rln_tree", "wakunode"),
)
)

healthMonitor.setNode(node)
healthMonitor.setOverallHealth(HealthStatus.READY)
# When
response = await client.healthCheck()

# Then
check:
response.status == 200
$response.contentType == $MIMETYPE_TEXT
response.data == "Node is healthy"
$response.contentType == $MIMETYPE_JSON
response.data.nodeHealth == HealthStatus.READY
response.data.protocolsHealth.len() == 1
response.data.protocolsHealth[0].protocol == "Rln Relay"
response.data.protocolsHealth[0].health == HealthStatus.READY

await restServer.stop()
await restServer.closeWait()
Expand Down
Loading
Loading