Skip to content

Commit

Permalink
adding documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
eecsliu committed Sep 1, 2020
1 parent 5b292b4 commit 25dd59d
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 21 deletions.
10 changes: 6 additions & 4 deletions cli/determined_cli/tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from determined_common.check import check_eq

from . import render
from .command import Command, launch_command, parse_config, render_event_stream
from .command import Command, parse_config, render_event_stream
from .declarative_argparse import Arg, Cmd

Tensorboard = namedtuple(
Expand All @@ -33,14 +33,16 @@ def to_tensorboard(command: Command) -> Tensorboard:

@authentication_required
def start_tensorboard(args: Namespace) -> None:
# this is the place where you add some stuff related to importing the config
# think about if you need to add the option for manual config options
if args.trial_ids is None and args.experiment_ids is None:
print("Either experiment_ids or trial_ids must be specified.")
sys.exit(1)

config = parse_config(args.config_file, None, [], [])
req_body = {"config": config, "trial_ids": args.trial_ids, "experiment_ids": args.experiment_ids}
req_body = {
"config": config,
"trial_ids": args.trial_ids,
"experiment_ids": args.experiment_ids,
}
resp = api.post(args.master, "tensorboard", body=req_body).json()

if args.detach:
Expand Down
26 changes: 26 additions & 0 deletions docs/how-to/tensorboard.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,32 @@ TensorBoard for multiple experiments use
metrics from persistent storage. It may take up to 5 minutes for TensorBoard
to receive data and render visualizations.

Customizing Tensorboards
------------------------

Determined supports initializing TensorBoard with a YAML configuration file.
For example, this feature can be useful for running TensorBoard with a
specific container image or for enabling access to additional data with a
bind-mount.

.. code:: yaml

environment:
image: determinedai/environments:cuda-10.0-pytorch-1.4-tf-1.15-cpu-0.5.0
bind_mounts:
- host_path: /my/agent/path
container_path: /my/container/path
read_only: true

Details of configuration settings can be found in the
:ref:`command-notebook-configuration`.

To launch Tensorboard with a config file, use
``det tensorboard start <experiment-id> --config-file=my_config.yaml``.

To view the configuration of a running Tensorboard instance, use
``det tensorboard config <tensorboard_id>``.

Analyzing Specific Trials
-------------------------

Expand Down
10 changes: 9 additions & 1 deletion docs/reference/command-notebook-config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ when the workload is launched:
Options set via ``--config`` take precedence over values specified in
the configuration file.

Tensorboard workloads also support YAML configuration files, but do not
allow configuration variables to be passed directly to it:

.. code::

$ det tensorboard start experiment_id --config-file=my_config.yaml

Configuration Settings
**********************

Expand Down Expand Up @@ -69,7 +76,8 @@ The following configuration settings are supported:
number of slots on the agent in the cluster with the most slots.
For example, Determined will be unable to schedule a command that
requests 4 slots if the Determined cluster is composed of agents with 2
slots each.
slots each. The number of slots for Tensorboard is fixed at ``0`` and
may not be changed.

- ``agent_label``: If set, the command/notebook will _only_ be scheduled on
agents that have the given label set. If this is not set (the default
Expand Down
7 changes: 5 additions & 2 deletions master/internal/command/tensorboard_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,11 @@ func (t *tensorboardManager) newTensorBoard(
)
config.Entrypoint = []string{tensorboardEntrypointFile, "--logdir", strings.Join(logDirs, ",")}
config.Resources.Slots = tensorboardResourcesSlots
config.Environment.EnvironmentVariables = model.RuntimeItems{CPU: envVars, GPU: envVars}
config.BindMounts = getMounts(uniqMounts)

cpuEnvVars := append(config.Environment.EnvironmentVariables.CPU, envVars...)
gpuEnvVars := append(config.Environment.EnvironmentVariables.GPU, envVars...)
config.Environment.EnvironmentVariables = model.RuntimeItems{CPU: cpuEnvVars, GPU: gpuEnvVars}
config.BindMounts = append(config.BindMounts, getMounts(uniqMounts)...)

setPodSpec(&config, t.taskContainerDefaults)

Expand Down
28 changes: 14 additions & 14 deletions tools/run-server.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def tail_db_logs():
def run_master():
return proc(
"master",
["../master/build/determined-master", "--config-file", "/usr/local/determined/etc/master.yaml"],
["../master/build/determined-master", "--config-file", "master.yaml"],
logs_handler=lambda line: f"{MAGENTA}determined-master |{CLEAR} {line}"
)

Expand Down Expand Up @@ -83,26 +83,26 @@ def main():
db, master, agent, db_logs = False, None, None, None
try:
master = run_master()
# agent = run_agent()
# db_logs = tail_db_logs()
# if not is_db_running():
# db = True
# subprocess.check_call(["docker-compose", "up", "-d"])

# wait_for_server(5432)
# db_logs.start()
agent = run_agent()
db_logs = tail_db_logs()
if not is_db_running():
db = True
subprocess.check_call(["docker-compose", "up", "-d"])

wait_for_server(5432)
db_logs.start()
master.start()
wait_for_server(8080)
# agent.start()
agent.start()

# Join the agent first so we can exit if the agent fails to connect to
# the master.
# agent.join()
# if agent.exitcode != 0:
# raise Exception(f"agent failed with non-zero exit code {agent.exitcode}")
agent.join()
if agent.exitcode != 0:
raise Exception(f"agent failed with non-zero exit code {agent.exitcode}")

master.join()
# db_logs.join()
db_logs.join()
except KeyboardInterrupt:
pass
finally:
Expand Down

0 comments on commit 25dd59d

Please sign in to comment.