Skip to content

Commit

Permalink
fix: dashboard should not show gpu grids for runs not using gpus
Browse files Browse the repository at this point in the history
  • Loading branch information
starpit committed Apr 1, 2023
1 parent 0c4c94c commit 8a3cdb3
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,42 @@ export async function jobIdFrom(args: Arguments<Options>, cmd: string, offset =
return { jobId, profile }
}

/** @return grid model for the given `kind` for `jobId` in `profile` */
async function gridFor(
kind: SupportedGrid,
profile: string,
jobId: string,
opts: Pick<Options, "demo" | "theme">
): Promise<GridSpec> {
const tails = await tailf(kind, profile, jobId)
return kind === "status"
? status(tails, { demo: opts.demo, theme: opts.theme, themeDefault: "colorbrewer" })
: utilization(kind, tails, opts)
}

/** @return all relevant grid models for `jobId` in `profile` */
async function allGridsFor(profile: string, jobId: string, opts: Pick<Options, "demo" | "theme">) {
const usesGpus = opts.demo || (await import("../env.js").then((_) => _.usesGpus(profile, jobId)))

const all = [
gridFor("status", profile, jobId, opts),
null, // newline
gridFor("cpu%", profile, jobId, opts),
]

if (usesGpus) {
all.push(gridFor("gpu%", profile, jobId, opts))
}

all.push(gridFor("mem%", profile, jobId, opts))

if (usesGpus) {
all.push(gridFor("gpumem%", profile, jobId, opts))
}

return Promise.all(all)
}

export default async function dashboard(args: Arguments<Options>, cmd: "db" | "dashboard") {
const { theme } = args.parsedOptions

Expand All @@ -98,25 +134,11 @@ export default async function dashboard(args: Arguments<Options>, cmd: "db" | "d
throw new Error(usage(cmd, ["all"]))
}

const gridFor = async (kind: SupportedGrid): Promise<GridSpec> => {
const tails = await tailf(kind, profile, jobId)
return kind === "status"
? status(tails, { demo, theme, themeDefault: "colorbrewer" })
: utilization(kind, tails, { demo, theme })
}

const gridForA = async (kind: KindA): Promise<null | GridSpec | (null | GridSpec)[]> => {
if (kind === "all") {
return Promise.all([
gridFor("status"),
null, // newline
gridFor("cpu%"),
gridFor("gpu%"),
gridFor("mem%"),
gridFor("gpumem%"),
])
return allGridsFor(profile, jobId, { demo, theme })
} else if (isSupportedGrid(kind)) {
return gridFor(kind)
return gridFor(kind, profile, jobId, { demo, theme })
} else {
return null
}
Expand Down
8 changes: 5 additions & 3 deletions plugins/plugin-codeflare-dashboard/src/controller/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,18 @@ export default async function dump(args: Arguments<Options>) {
// and for which jobId and profile?
const { jobId, profile } = await jobIdFrom(args, "dump")

if (!(isValidKind(kind) || kind === "path")) {
if (!(isValidKind(kind) || kind === "path" || kind === "env")) {
throw new Error(usage())
} else if (!jobId) {
throw new Error(usage())
}

if (kind === "path") {
// print the path to the data captured for the given jobId in the given profile
const { dirname } = await import("path")
return Array.from(new Set(await pathsFor("cpu%", profile, jobId).then((_) => _.map((_) => dirname(dirname(_))))))[0]
return import("./path.js").then((_) => _.pathFor(profile, jobId))
} else if (kind === "env") {
// print job env vars
return JSON.stringify(await import("./env.js").then((_) => _.getJobEnv(profile, jobId)), undefined, 2)
} else if (!args.parsedOptions.f) {
const { createReadStream } = await import("fs")
await Promise.all(
Expand Down
59 changes: 59 additions & 0 deletions plugins/plugin-codeflare-dashboard/src/controller/env.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright 2023 The Kubernetes Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { join } from "path"

type NameValue = { name: string; value: unknown }

function isNameValue(obj: object): obj is NameValue {
const nv = obj as NameValue
return typeof nv === "object" && typeof nv.name === "string" && typeof nv.value !== undefined
}

function isNameValueArray(obj: object): obj is NameValue[] {
const nva = obj as NameValue[]
return Array.isArray(nva) && nva.every(isNameValue)
}

function toRecord(nva: NameValue[]): Record<string, unknown> {
return nva.reduce((R, { name, value }) => {
R[name] = value
return R
}, {} as Record<string, unknown>)
}

export async function getJobEnv(profile: string, jobId: string): Promise<Record<string, unknown>> {
const filepath = await import("./path.js").then((_) => _.pathFor(profile, jobId))
const nameValueArray = JSON.parse(
await import("fs/promises").then((_) => _.readFile(join(filepath, "env.json"))).then((_) => _.toString())
)
if (!isNameValueArray(nameValueArray)) {
throw new Error("Malformatted env.json file")
} else {
return toRecord(nameValueArray)
}
}

export async function numGpus(profile: string, jobId: string): Promise<number> {
const env = await getJobEnv(profile, jobId)

const raw = env["NUM_GPUS"]
return typeof raw === "number" ? raw : typeof raw === "string" ? parseInt(raw, 10) : 0
}

export async function usesGpus(profile: string, jobId: string): Promise<boolean> {
return (await numGpus(profile, jobId)) > 0
}
23 changes: 23 additions & 0 deletions plugins/plugin-codeflare-dashboard/src/controller/path.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright 2023 The Kubernetes Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { pathsFor } from "./dashboard/tailf.js"

/** @return path to the data captured for the given jobId in the given profile */
export async function pathFor(profile: string, jobId: string) {
const { dirname } = await import("path")
return Array.from(new Set(await pathsFor("cpu%", profile, jobId).then((_) => _.map((_) => dirname(dirname(_))))))[0]
}

0 comments on commit 8a3cdb3

Please sign in to comment.