From 25dd59da4de0b54b0f0b66b11c19c13e4b202486 Mon Sep 17 00:00:00 2001
From: Eric Liu <eliu@determined.ai>
Date: Mon, 31 Aug 2020 10:49:05 -0700
Subject: [PATCH] adding documentation

---
 cli/determined_cli/tensorboard.py             | 10 ++++---
 docs/how-to/tensorboard.txt                   | 26 +++++++++++++++++
 docs/reference/command-notebook-config.txt    | 10 ++++++-
 .../internal/command/tensorboard_manager.go   |  7 +++--
 tools/run-server.py                           | 28 +++++++++----------
 5 files changed, 60 insertions(+), 21 deletions(-)
diff --git a/cli/determined_cli/tensorboard.py b/cli/determined_cli/tensorboard.py
index 14aa5a349731..2c54ab44d3c2 100644
--- a/cli/determined_cli/tensorboard.py
+++ b/cli/determined_cli/tensorboard.py
@@ -10,7 +10,7 @@
 from determined_common.check import check_eq
 
 from . import render
-from .command import Command, launch_command, parse_config, render_event_stream
+from .command import Command, parse_config, render_event_stream
 from .declarative_argparse import Arg, Cmd
 
 Tensorboard = namedtuple(
@@ -33,14 +33,16 @@ def to_tensorboard(command: Command) -> Tensorboard:
 
 @authentication_required
 def start_tensorboard(args: Namespace) -> None:
-    # this is the place where you add some stuff related to importing the config
-    # think about if you need to add the option for manual config options
     if args.trial_ids is None and args.experiment_ids is None:
         print("Either experiment_ids or trial_ids must be specified.")
         sys.exit(1)
 
     config = parse_config(args.config_file, None, [], [])
-    req_body = {"config": config, "trial_ids": args.trial_ids, "experiment_ids": args.experiment_ids}
+    req_body = {
+        "config": config,
+        "trial_ids": args.trial_ids,
+        "experiment_ids": args.experiment_ids,
+    }
     resp = api.post(args.master, "tensorboard", body=req_body).json()
 
     if args.detach:
diff --git a/docs/how-to/tensorboard.txt b/docs/how-to/tensorboard.txt
index 31b0db007219..0b91e2d23ecc 100644
--- a/docs/how-to/tensorboard.txt
+++ b/docs/how-to/tensorboard.txt
@@ -51,6 +51,32 @@ TensorBoard for multiple experiments use
   metrics from persistent storage. It may take up to 5 minutes for TensorBoard
   to receive data and render visualizations.
 
+Customizing Tensorboards
+------------------------
+
+Determined supports initializing TensorBoard with a YAML configuration file.
+For example, this feature can be useful for running TensorBoard with a
+specific container image or for enabling access to additional data with a
+bind-mount.
+
+.. code:: yaml
+
+  environment:
+    image: determinedai/environments:cuda-10.0-pytorch-1.4-tf-1.15-cpu-0.5.0
+  bind_mounts:
+    - host_path: /my/agent/path
+      container_path: /my/container/path
+      read_only: true
+
+Details of configuration settings can be found in the
+:ref:`command-notebook-configuration`.
+
+To launch Tensorboard with a config file, use
+``det tensorboard start <experiment-id> --config-file=my_config.yaml``.
+
+To view the configuration of a running Tensorboard instance, use
+``det tensorboard config <tensorboard_id>``.
+
 Analyzing Specific Trials
 -------------------------
 
diff --git a/docs/reference/command-notebook-config.txt b/docs/reference/command-notebook-config.txt
index b87e37f42467..b806c5401f57 100644
--- a/docs/reference/command-notebook-config.txt
+++ b/docs/reference/command-notebook-config.txt
@@ -24,6 +24,13 @@ when the workload is launched:
 Options set via ``--config`` take precedence over values specified in
 the configuration file.
 
+Tensorboard workloads also support YAML configuration files, but do not
+allow configuration variables to be passed directly to it:
+
+.. code::
+
+    $ det tensorboard start experiment_id --config-file=my_config.yaml
+
 Configuration Settings
 **********************
 
@@ -69,7 +76,8 @@ The following configuration settings are supported:
     number of slots on the agent in the cluster with the most slots.
     For example, Determined will be unable to schedule a command that
     requests 4 slots if the Determined cluster is composed of agents with 2
-    slots each.
+    slots each. The number of slots for Tensorboard is fixed at ``0`` and
+    may not be changed.
 
   - ``agent_label``: If set, the command/notebook will _only_ be scheduled on
     agents that have the given label set. If this is not set (the default
diff --git a/master/internal/command/tensorboard_manager.go b/master/internal/command/tensorboard_manager.go
index be6fc38e32ac..fe6e21dce146 100644
--- a/master/internal/command/tensorboard_manager.go
+++ b/master/internal/command/tensorboard_manager.go
@@ -299,8 +299,11 @@ func (t *tensorboardManager) newTensorBoard(
 	)
 	config.Entrypoint = []string{tensorboardEntrypointFile, "--logdir", strings.Join(logDirs, ",")}
 	config.Resources.Slots = tensorboardResourcesSlots
-	config.Environment.EnvironmentVariables = model.RuntimeItems{CPU: envVars, GPU: envVars}
-	config.BindMounts = getMounts(uniqMounts)
+
+	cpuEnvVars := append(config.Environment.EnvironmentVariables.CPU, envVars...)
+	gpuEnvVars := append(config.Environment.EnvironmentVariables.GPU, envVars...)
+	config.Environment.EnvironmentVariables = model.RuntimeItems{CPU: cpuEnvVars, GPU: gpuEnvVars}
+	config.BindMounts = append(config.BindMounts, getMounts(uniqMounts)...)
 
 	setPodSpec(&config, t.taskContainerDefaults)
 
diff --git a/tools/run-server.py b/tools/run-server.py
index 291dbba3a312..c26edf85462b 100644
--- a/tools/run-server.py
+++ b/tools/run-server.py
@@ -50,7 +50,7 @@ def tail_db_logs():
 def run_master():
     return proc(
         "master",
-        ["../master/build/determined-master", "--config-file", "/usr/local/determined/etc/master.yaml"],
+        ["../master/build/determined-master", "--config-file", "master.yaml"],
         logs_handler=lambda line: f"{MAGENTA}determined-master  |{CLEAR} {line}"
     )
 
@@ -83,26 +83,26 @@ def main():
     db, master, agent, db_logs = False, None, None, None
     try:
         master = run_master()
-        # agent = run_agent()
-        # db_logs = tail_db_logs()
-        # if not is_db_running():
-        #     db = True
-        #     subprocess.check_call(["docker-compose", "up", "-d"])
-
-        # wait_for_server(5432)
-        # db_logs.start()
+        agent = run_agent()
+        db_logs = tail_db_logs()
+        if not is_db_running():
+            db = True
+            subprocess.check_call(["docker-compose", "up", "-d"])
+
+        wait_for_server(5432)
+        db_logs.start()
         master.start()
         wait_for_server(8080)
-        # agent.start()
+        agent.start()
 
         # Join the agent first so we can exit if the agent fails to connect to
         # the master.
-        # agent.join()
-        # if agent.exitcode != 0:
-        #     raise Exception(f"agent failed with non-zero exit code {agent.exitcode}")
+        agent.join()
+        if agent.exitcode != 0:
+            raise Exception(f"agent failed with non-zero exit code {agent.exitcode}")
 
         master.join()
-        # db_logs.join()
+        db_logs.join()
     except KeyboardInterrupt:
         pass
     finally: