diff --git a/deploy/self-test/Dockerfile b/deploy/self-test/Dockerfile index 5aa4230c..a8063bdd 100644 --- a/deploy/self-test/Dockerfile +++ b/deploy/self-test/Dockerfile @@ -4,7 +4,7 @@ LABEL org.opencontainers.image.source https://github.com/project-codeflare/codef ENV DEBIAN_FRONTEND=noninteractive -RUN apt update && apt -y install sudo curl python3 python3-pip python3-venv git \ +RUN apt update && apt -y install sudo curl python3 python3-pip python3-venv git uuid-runtime \ && pip3 install --no-cache-dir -U pip && pip3 install --no-cache-dir -U setuptools \ && apt -y clean && rm -rf /var/lib/apt/lists/* diff --git a/package-lock.json b/package-lock.json index 7b733f1c..2a4ad762 100644 --- a/package-lock.json +++ b/package-lock.json @@ -899,9 +899,9 @@ "license": "MIT" }, "node_modules/@guidebooks/store": { - "version": "3.3.9", - "resolved": "https://registry.npmjs.org/@guidebooks/store/-/store-3.3.9.tgz", - "integrity": "sha512-OkIor5QgeEmdumsIypqrqe5ak7jXaglsGShn6poGDmv/I5KYtWjE/HlKa/8skrNDHoFb8Tek3hOwHOrleqsC1A==" + "version": "3.3.12", + "resolved": "https://registry.npmjs.org/@guidebooks/store/-/store-3.3.12.tgz", + "integrity": "sha512-t8DRQTQLsh5fXm5CZUQEOH4C/OOG9FlB4QnVgX9MZqY8qNv9LDDRT/gjdqq4Ryim7610XaRbGwbhrPQK27fxhA==" }, "node_modules/@humanwhocodes/config-array": { "version": "0.11.8", @@ -8121,9 +8121,9 @@ } }, "node_modules/madwizard": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/madwizard/-/madwizard-6.1.2.tgz", - "integrity": "sha512-pHsXmJTpnjcrz8yr9dw3KlF+UAduyMH8b3l2mHV5Jj7TH79Uo5LpN/l/pMmMn7AQb737jnwuWWxBuTtPnHcPAQ==", + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/madwizard/-/madwizard-6.2.0.tgz", + "integrity": "sha512-tpxGwQpA1G3NbjEjLzbGJ04J/fSxLVMW6K797MU1rRyQcmbZCNvFc+j1Yr2KH3MyVRTKatAw7anpYV/xYUkOLw==", "dependencies": { "chalk": "^5.2.0", "columnify": "^1.6.0", @@ -14470,7 +14470,7 @@ }, "plugins/plugin-client-default": { "name": "@kui-shell/plugin-client", - "version": "2.6.1", + "version": "2.6.2", "license": "Apache-2.0" }, "plugins/plugin-codeflare": { @@ -14478,13 +14478,13 @@ "version": "0.0.1", "license": "Apache-2.0", "dependencies": { - "@guidebooks/store": "^3.3.9", + "@guidebooks/store": "^3.3.12", "@logdna/tail-file": "^3.0.1", "@patternfly/react-charts": "^6.94.18", "@patternfly/react-core": "^4.276.6", "asciinema-player": "^3.0.1", "chokidar": "^3.5.3", - "madwizard": "^6.1.2", + "madwizard": "^6.2.0", "needle": "^3.2.0", "open": "^8.4.0", "pretty-bytes": "^6.0.0", @@ -15088,9 +15088,9 @@ "dev": true }, "@guidebooks/store": { - "version": "3.3.9", - "resolved": "https://registry.npmjs.org/@guidebooks/store/-/store-3.3.9.tgz", - "integrity": "sha512-OkIor5QgeEmdumsIypqrqe5ak7jXaglsGShn6poGDmv/I5KYtWjE/HlKa/8skrNDHoFb8Tek3hOwHOrleqsC1A==" + "version": "3.3.12", + "resolved": "https://registry.npmjs.org/@guidebooks/store/-/store-3.3.12.tgz", + "integrity": "sha512-t8DRQTQLsh5fXm5CZUQEOH4C/OOG9FlB4QnVgX9MZqY8qNv9LDDRT/gjdqq4Ryim7610XaRbGwbhrPQK27fxhA==" }, "@humanwhocodes/config-array": { "version": "0.11.8", @@ -15336,7 +15336,7 @@ "@kui-shell/plugin-codeflare": { "version": "file:plugins/plugin-codeflare", "requires": { - "@guidebooks/store": "^3.3.9", + "@guidebooks/store": "^3.3.12", "@logdna/tail-file": "^3.0.1", "@patternfly/react-charts": "^6.94.18", "@patternfly/react-core": "^4.276.6", @@ -15347,7 +15347,7 @@ "@types/split2": "^3.2.1", "asciinema-player": "^3.0.1", "chokidar": "^3.5.3", - "madwizard": "^6.1.2", + "madwizard": "^6.2.0", "needle": "^3.2.0", "open": "^8.4.0", "pretty-bytes": "^6.0.0", @@ -19845,9 +19845,9 @@ } }, "madwizard": { - "version": "6.1.2", - "resolved": "https://registry.npmjs.org/madwizard/-/madwizard-6.1.2.tgz", - "integrity": "sha512-pHsXmJTpnjcrz8yr9dw3KlF+UAduyMH8b3l2mHV5Jj7TH79Uo5LpN/l/pMmMn7AQb737jnwuWWxBuTtPnHcPAQ==", + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/madwizard/-/madwizard-6.2.0.tgz", + "integrity": "sha512-tpxGwQpA1G3NbjEjLzbGJ04J/fSxLVMW6K797MU1rRyQcmbZCNvFc+j1Yr2KH3MyVRTKatAw7anpYV/xYUkOLw==", "requires": { "chalk": "^5.2.0", "columnify": "^1.6.0", diff --git a/plugins/plugin-codeflare/package.json b/plugins/plugin-codeflare/package.json index e6958c83..30342579 100644 --- a/plugins/plugin-codeflare/package.json +++ b/plugins/plugin-codeflare/package.json @@ -30,13 +30,13 @@ "@types/split2": "^3.2.1" }, "dependencies": { - "@guidebooks/store": "^3.3.9", + "@guidebooks/store": "^3.3.12", "@logdna/tail-file": "^3.0.1", "@patternfly/react-charts": "^6.94.18", "@patternfly/react-core": "^4.276.6", "asciinema-player": "^3.0.1", "chokidar": "^3.5.3", - "madwizard": "^6.1.2", + "madwizard": "^6.2.0", "needle": "^3.2.0", "open": "^8.4.0", "pretty-bytes": "^6.0.0", diff --git a/plugins/plugin-codeflare/src/controller/appName.ts b/plugins/plugin-codeflare/src/controller/appName.ts new file mode 100644 index 00000000..3ae393a1 --- /dev/null +++ b/plugins/plugin-codeflare/src/controller/appName.ts @@ -0,0 +1,20 @@ +/* + * Copyright 2022 The Kubernetes Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { productName } from "@kui-shell/client/config.d/name.json" + +const appName = productName.toLowerCase() +export default appName diff --git a/plugins/plugin-codeflare/src/controller/attach.ts b/plugins/plugin-codeflare/src/controller/attach.ts index 4dfbf653..bba10b3f 100644 --- a/plugins/plugin-codeflare/src/controller/attach.ts +++ b/plugins/plugin-codeflare/src/controller/attach.ts @@ -19,6 +19,7 @@ import Debug from "debug" import { MadWizardOptions } from "madwizard" import { Arguments, Capabilities, ParsedOptions } from "@kui-shell/core" +import appName from "./appName" import { DashboardOptions } from "./dashboard" export type Options = ParsedOptions & @@ -57,7 +58,6 @@ export async function attach( process.env.JOB_ID = jobId } - const appName = "codeflare" const options: MadWizardOptions = Object.assign( { appName, diff --git a/plugins/plugin-codeflare/src/controller/index.ts b/plugins/plugin-codeflare/src/controller/index.ts index 28304c28..2ebe8729 100644 --- a/plugins/plugin-codeflare/src/controller/index.ts +++ b/plugins/plugin-codeflare/src/controller/index.ts @@ -25,6 +25,7 @@ import events from "./events" import dashboard from "./dashboard" import description from "./description" import { Options as AttachOptions } from "./attach" +import { ProfileOptions, profileFlags } from "./options" function help() { return `Usage: @@ -42,6 +43,12 @@ export default function registerCodeflareCommands(registrar: Registrar) { description(registrar) registrar.listen("/help", help) + registrar.listen( + "/codeflare/logs", + (args) => import("./logs").then((_) => _.default(args)), + { flags: profileFlags } + ) + registrar.listen( "/codeflare/attach", (args) => import("./attach").then((_) => _.default(args)), diff --git a/plugins/plugin-codeflare/src/controller/logs.ts b/plugins/plugin-codeflare/src/controller/logs.ts new file mode 100644 index 00000000..e6f563f2 --- /dev/null +++ b/plugins/plugin-codeflare/src/controller/logs.ts @@ -0,0 +1,53 @@ +/* + * Copyright 2023 The Kubernetes Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Arguments } from "@kui-shell/core" +import { MadWizardOptions } from "madwizard" + +import appName from "./appName" +import { ProfileOptions } from "./options" + +export default async function logs(args: Arguments) { + // Display logs for this jobID; if not provided, the user will be + // prompted (via the guidebook) to choose one + const jobId = args.argvNoOptions[args.argvNoOptions.indexOf("logs") + 1] + if (jobId) { + process.env.JOB_ID = jobId + } + + // play this guidebook + const guidebook = jobId === undefined ? "ml/ray/aggregator" : "ml/ray/aggregator/with-jobid" + + // but only interactive starting here + const ifor = + jobId === undefined + ? ["ml/ray/cluster/choose", "ml/ray/cluster/choose/kubernetes", "ml/ray/run/choose/list-jobs"] + : undefined + + const options: MadWizardOptions = Object.assign({ + appName, + store: args.parsedOptions.s || process.env.GUIDEBOOK_STORE, + verbose: args.parsedOptions.V, + profile: args.parsedOptions.profile || (await import("madwizard").then((_) => _.Profiles.lastUsed())), + interactive: false, + ifor, + }) + + const { guide } = await import("madwizard/dist/fe/cli") + + await guide([appName, "guide", guidebook], undefined, options) + return true +} diff --git a/plugins/plugin-codeflare/src/controller/options.ts b/plugins/plugin-codeflare/src/controller/options.ts new file mode 100644 index 00000000..2a0223ed --- /dev/null +++ b/plugins/plugin-codeflare/src/controller/options.ts @@ -0,0 +1,35 @@ +/* + * Copyright 2023 The Kubernetes Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { CommandOptions, ParsedOptions } from "@kui-shell/core" + +export type ProfileOptions = ParsedOptions & { + p: string + profile: string + s: string + store: string + V: string + verbose: string +} + +export const profileFlags: CommandOptions["flags"] = { + boolean: ["V", "verbose"], + alias: { + verbose: ["V"], + profile: ["p"], + store: ["s"], + }, +} diff --git a/tests/kind/profiles/gpu1/keep-it-simple b/tests/kind/profiles/gpu1/keep-it-simple index d3013f7c..c68de727 100644 --- a/tests/kind/profiles/gpu1/keep-it-simple +++ b/tests/kind/profiles/gpu1/keep-it-simple @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Keep It Simple" } } diff --git a/tests/kind/profiles/non-gpu1/keep-it-simple b/tests/kind/profiles/non-gpu1/keep-it-simple index a0f55f48..64aff92f 100644 --- a/tests/kind/profiles/non-gpu1/keep-it-simple +++ b/tests/kind/profiles/non-gpu1/keep-it-simple @@ -1,8 +1,8 @@ { "name": "keep-it-simple", "creationTime": 1664149787016, - "lastModifiedTime": 1664149821230, - "lastUsedTime": 1675364320141, + "lastModifiedTime": 1676828268772, + "lastUsedTime": 1676836671254, "choices": { "madwizard/apriori/use-gpu": "don't use gpus", "madwizard/apriori/arch": "x64", @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Keep It Simple" } } \ No newline at end of file diff --git a/tests/kind/profiles/non-gpu1/mcad-coscheduler b/tests/kind/profiles/non-gpu1/mcad-coscheduler index 0b42b702..3e5f76be 100644 --- a/tests/kind/profiles/non-gpu1/mcad-coscheduler +++ b/tests/kind/profiles/non-gpu1/mcad-coscheduler @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Use the Multi-user Enhanced Kubernetes Scheduler", "kubernetes/mcad/choose/scheduler": "MCAD with the Advanced Coscheduler" } diff --git a/tests/kind/profiles/non-gpu1/mcad-default b/tests/kind/profiles/non-gpu1/mcad-default index ed252fed..22f966b0 100644 --- a/tests/kind/profiles/non-gpu1/mcad-default +++ b/tests/kind/profiles/non-gpu1/mcad-default @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Use the Multi-user Enhanced Kubernetes Scheduler", "kubernetes/mcad/choose/scheduler": "MCAD with the Default Kubernetes Scheduler" } diff --git a/tests/kind/profiles/non-gpu1/mcad-preinstalled b/tests/kind/profiles/non-gpu1/mcad-preinstalled index a8cf8dbf..11deaa07 100644 --- a/tests/kind/profiles/non-gpu1/mcad-preinstalled +++ b/tests/kind/profiles/non-gpu1/mcad-preinstalled @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Use the Multi-user Enhanced Kubernetes Scheduler", "kubernetes/mcad/choose/scheduler": "My administrator has already installed and configured MCAD" } diff --git a/tests/kind/profiles/non-gpu1/ray-autoscaler b/tests/kind/profiles/non-gpu1/ray-autoscaler index a200e149..0edd5c51 100644 --- a/tests/kind/profiles/non-gpu1/ray-autoscaler +++ b/tests/kind/profiles/non-gpu1/ray-autoscaler @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Use the Ray Autoscaler" } } diff --git a/tests/kind/profiles/non-gpu2/keep-it-simple b/tests/kind/profiles/non-gpu2/keep-it-simple index a72b481c..42a0a18b 100644 --- a/tests/kind/profiles/non-gpu2/keep-it-simple +++ b/tests/kind/profiles/non-gpu2/keep-it-simple @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Keep It Simple" } } diff --git a/tests/kind/profiles/non-gpu3/keep-it-simple b/tests/kind/profiles/non-gpu3/keep-it-simple index a72b481c..42a0a18b 100644 --- a/tests/kind/profiles/non-gpu3/keep-it-simple +++ b/tests/kind/profiles/non-gpu3/keep-it-simple @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Keep It Simple" } } diff --git a/tests/kind/profiles/non-gpu4/keep-it-simple b/tests/kind/profiles/non-gpu4/keep-it-simple index a72b481c..42a0a18b 100644 --- a/tests/kind/profiles/non-gpu4/keep-it-simple +++ b/tests/kind/profiles/non-gpu4/keep-it-simple @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Keep It Simple" } } diff --git a/tests/kind/profiles/roberta-1gpu/keep-it-simple b/tests/kind/profiles/roberta-1gpu/keep-it-simple index ffca7b8e..14842a2f 100644 --- a/tests/kind/profiles/roberta-1gpu/keep-it-simple +++ b/tests/kind/profiles/roberta-1gpu/keep-it-simple @@ -19,6 +19,7 @@ "kubernetes/choose/ns": "default", "ml/ray/storage/s3/maybe": "My code does not use Ray Workflows", "ml/ray/cluster/choose": "codeflare-test-ray-cluster", + "ml/ray/cluster/choose/kubernetes": "codeflare-test-ray-cluster", "ml/ray/cluster/kubernetes/choose-pod-scheduler": "Keep It Simple" } } diff --git a/tests/kind/run.sh b/tests/kind/run.sh index da7db8bb..a609bcb0 100755 --- a/tests/kind/run.sh +++ b/tests/kind/run.sh @@ -21,6 +21,11 @@ export KUBE_POD_SCHEDULER=default # use a fixed cluster name export RAY_KUBE_CLUSTER_NAME=codeflare-test-ray-cluster +# this forces bin/codeflare to run in headless mode using a platform +# nodejs runtime (rather than using electron via ELECTRON_RUN_AS_NODE) +export NODE=node +export CODEFLARE_HEADLESS_HOME=${CODEFLARE_HEADLESS_HOME-$ROOT/dist/headless} + while getopts "ab:f:is:" opt do case $opt in @@ -52,16 +57,6 @@ function start_kind { fi } -# build docker image of log aggregator just for this test and load it -# into kind -function build { - if [ -n "$TEST_LOG_AGGREGATOR" ]; then - export LOG_AGGREGATOR_IMAGE=codeflare-log-aggregator:test - FAST=true npm run build:docker:log-aggregator - kind load docker-image $LOG_AGGREGATOR_IMAGE --name $CLUSTER - fi -} - # # !!!! This is the main work of the test !!!! # @@ -70,20 +65,15 @@ function build { # e.g. by looking for "succeeded" (see below) # function run { - local profileFull=$1 - local variant=$(dirname $profileFull) - local profile=$(basename $profileFull) + local profileFull="$1" + local variant=$(dirname "$profileFull") + local profile=$(basename "$profileFull") export MWPROFILES_PATH="$MWPROFILES_PATH_BASE"/$variant mkdir -p "$MWPROFILES_PATH" local guidebook=${2-$GUIDEBOOK} local yes=$([ -z "$FORCE_ALL" ] && [ "$FORCE" != "$profileFull" ] && [ -f "$MWPROFILES_PATH/$profile" ] && echo "--yes" || echo "") - # this forces bin/codeflare to run in headless mode using a platform - # nodejs runtime (rather than using electron via ELECTRON_RUN_AS_NODE) - export NODE=node - export CODEFLARE_HEADLESS_HOME=${CODEFLARE_HEADLESS_HOME-$ROOT/dist/headless} - local PRE="$MWPROFILES_PATH_BASE"/../profiles.d/$profile/pre if [ -f "$PRE" ]; then echo "[Test] Running pre guidebooks for profile=$profile" @@ -94,87 +84,40 @@ function run { GUIDEBOOK_NAME="main-job-run" "$ROOT"/bin/codeflare -p $profile $yes $guidebook } -# Undeploy any prior ray cluster -function cleanup_ray { - local profileFull=$1 - local variant=$(dirname $profileFull) - local profile=$(basename $profileFull) - export MWPROFILES_PATH="$MWPROFILES_PATH_BASE"/$variant - - echo "[Test] Undeploying any prior ray cluster with variant=$variant profile=$profile" - (GUIDEBOOK_NAME="ray-undeploy" "$ROOT"/bin/codeflare -p $profile -y ml/ray/stop/kubernetes \ - || exit 0) -} - -# Undeploy any prior log aggregator -function cleanup_log_aggregator { - local profileFull=$1 - local variant=$(dirname $profileFull) - local profile=$(basename $profileFull) - export MWPROFILES_PATH="$MWPROFILES_PATH_BASE"/$variant - - echo "[Test] Undeploying any prior ray cluster" - (GUIDEBOOK_NAME="ray-undeploy" "$ROOT"/bin/codeflare -p $profile -y ml/ray/aggregator/in-cluster/client-side/undeploy \ - || exit 0) -} - # -# Attach a log aggregator +# Attach a log streamer # - $1: variant/profile e.g. non-gpu1/keep-it-simple # - $2: JOB_ID # function attach { - local profileFull=$1 - local variant=$(dirname $profileFull) - local profile=$(basename $profileFull) + local profileFull="$1" + local variant=$(dirname "$profileFull") + local profile=$(basename "$profileFull") export MWPROFILES_PATH="$MWPROFILES_PATH_BASE"/$variant local jobId=$2 + LOGFILE=$(mktemp) + echo "[Test] Attaching variant=$variant profile=$profile jobId=$jobId" - GUIDEBOOK_NAME="log-aggregator-attach" "$ROOT"/bin/codeflare -V -p $profile attach -a $jobId --wait & + GUIDEBOOK_NAME="log-streamer" "$ROOT"/bin/codeflare -V -p $profile logs $jobId > $LOGFILE & ATTACH_PID=$! - echo "[Test] Attach underway" + echo "[Test] Attach underway, streaming to $LOGFILE" } -# @return path to locally captured logs for the given jobId, run in the given profile -function localpath { - local profile=$1 - local jobId=$2 - - local BASE=$(node -e "import('madwizard/dist/profiles/index.js').then(_ => _.guidebookJobDataPath({ profile: \"$profile\" })).then(console.log)") - echo "$BASE/$jobId" -} - -# Validate the output of the log aggregator +# Validate the output of the log streamer function validateAttach { - local profileFull=$1 - local variant=$(dirname $profileFull) - local profile=$(basename $profileFull) - export MWPROFILES_PATH="$MWPROFILES_PATH_BASE"/$variant + local LOGFILE=$1 - local jobId=$2 - - RUNDIR=$(localpath $profile $jobId) - - if [ ! -d "$RUNDIR" ]; then - echo "[Test] ❌ Logs were not captured locally: missing logdir" - exit 1 - elif [ ! -f "$RUNDIR/jobid.txt" ]; then - echo "[Test] ❌ Logs were not captured locally: missing jobid.txt" - exit 1 - elif [ ! -f "$RUNDIR/logs/job.txt" ]; then - echo "[Test] ❌ Logs were not captured locally: missing logs/job.txt" - exit 1 - elif [ ! -s "$RUNDIR/logs/job.txt" ]; then - echo "[Test] ❌ Logs were not captured locally: empty logs/job.txt" + if [ ! -f "$LOGFILE" ]; then + echo "[Test] ❌ Logs were not captured locally: missing log file" exit 1 fi # TODO the expected output is going to be profile-specific - grep -q 'Final result' "$RUNDIR/logs/job.txt" \ + grep -q 'Final result' "$LOGFILE" \ && echo "[Test] ✅ Logs seem good!" \ - || (echo "[Test] ❌ Logs were not captured locally: job logs incomplete" && exit 1) + || (echo "[Test] ❌ Logs were not captured locally to $LOGFILE: job logs incomplete" && ls -l "$LOGFILE" && cat "$LOGFILE" && exit 1) } function logpoller { @@ -203,12 +146,10 @@ function onexit { if [ -n "$EVENTS_PID" ]; then (pkill -P $EVENTS_PID || exit 0) fi - if [ -n "$AGGREGATOR_POLLER_PID" ]; then - (pkill -P $AGGREGATOR_POLLER_PID || exit 0) - fi if [ -z "$NO_KIND" ]; then # don't kill ourselves if we're running in a container + sleep 10 echo "[Test] pkilling ourselves to help with cleanup" pkill -P $$ fi @@ -225,9 +166,6 @@ function debug { logpoller ray-node-type=worker & WORKER_POLLER_PID=$! - logpoller app=guidebook-log-aggregator & - AGGREGATOR_POLLER_PID=$! - kubectl get events -w & EVENTS_PID=$! fi @@ -246,51 +184,45 @@ function test { # allocate JOB_ID (requires node and `uuid` npm; but we should # have both for codeflare-cli dev) - if [ -n "$TEST_LOG_AGGREGATOR" ]; then - export JOB_ID=$(node -e 'console.log(require("uuid").v4())') - echo "[Test] Using JOB_ID=$JOB_ID" - fi + export JOB_ID=$(uuidgen | tr '[:upper:]' '[:lower:]') + echo "[Test] Using JOB_ID=$JOB_ID" - # 0. clean up prior ray clusters - # cleanup_ray "$1" - # 1. launch codeflare guidebook run run "$1" | tee $OUTPUT & - RUN_PID=$! + local RUN_PID=$! + echo "[Test] Run submitted pid=$RUN_PID" - # 2. if asked, attach a log aggregator - if [ -n "$TEST_LOG_AGGREGATOR" ]; then - cleanup_log_aggregator "$1" + # wait to attach until the job has been submitted + while true; do + grep -q 'Run it' "$OUTPUT" && break + echo "[Test] Waiting to attach log streamer..." + sleep 1 + done - # wait to attach until the job has been submitted - # while true; do - # grep -q 'submitted successfully' "$OUTPUT" && break - # sleep 1 - # done - sleep 10 - - attach "$1" "$JOB_ID" - fi + echo "[Test] Preparing to attach log streamer jobid=$JOB_ID" + attach "$1" "$JOB_ID" wait $RUN_PID echo "[Test] Run has finished" # the job should be done now - # 3. if asked, now validate the log aggregator - if [ -n "$TEST_LOG_AGGREGATOR" ]; then - # TODO validate run status in captured logs; should be SUCCESSFUL - validateAttach "$1" "$JOB_ID" - fi + # 3. if asked, now validate the log streamer + # TODO validate run status in captured logs; should be SUCCESSFUL + validateAttach $LOGFILE # 4. validate the output of the job itself echo "[Test] Validating run output" - grep succeeded $OUTPUT + if grep -q succeeded $OUTPUT ; then + echo "[Test] ✅ Job submit output seems good!" + else + echo "[Test] ❌ Job submit output seems incomplete" + exit 1 + fi } trap onexit INT trap onexit EXIT start_kind -build debug test "$1"