Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revert: added cli logging to native. #821

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cli/determined_cli/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

from determined_cli import render
from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_false

from .declarative_argparse import Arg, Cmd, Group
from .user import authentication_required


def local_id(address: str) -> str:
Expand Down
5 changes: 2 additions & 3 deletions cli/determined_cli/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
from typing import Any, Dict, List, Optional

from determined_common import api, constants, experimental
from determined_common.api.authentication import authentication_required
from determined_common.experimental import Determined

from . import render
from . import render, user
from .declarative_argparse import Arg, Cmd


Expand Down Expand Up @@ -59,7 +58,7 @@ def render_checkpoint(checkpoint: experimental.Checkpoint, path: Optional[str] =
render.tabulate_or_csv(headers, [values], False)


@authentication_required
@user.authentication_required
def list(args: Namespace) -> None:
params = {}
if args.best is not None:
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
from determined_cli.tensorboard import args_description as tensorboard_args_description
from determined_cli.trial import args_description as trial_args_description
from determined_cli.user import args_description as user_args_description
from determined_cli.user import authentication_required
from determined_cli.version import args_description as version_args_description
from determined_cli.version import check_version
from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_not_none
from determined_common.util import chunks, debug_mode, get_default_master_address

Expand Down
104 changes: 101 additions & 3 deletions cli/determined_cli/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
import determined_common
from determined_cli import checkpoint, render
from determined_cli.declarative_argparse import Arg, Cmd, Group
from determined_cli.trial import logs
from determined_cli.user import authentication_required
from determined_common import api, constants, context
from determined_common.api.authentication import authentication_required
from determined_common.experimental import Determined

from .checkpoint import render_checkpoint
Expand Down Expand Up @@ -51,6 +52,103 @@ def cancel(args: Namespace) -> None:
print("Canceled experiment {}".format(args.experiment_id))


def follow_experiment_logs(master_url: str, exp_id: int) -> None:
# Get the ID of this experiment's first trial (i.e., the one with the lowest ID).
print("Waiting for first trial to begin...")
while True:
r = api.get(master_url, "experiments/{}".format(exp_id))
if len(r.json()["trials"]) > 0:
break
else:
time.sleep(0.1)

first_trial_id = sorted(t_id["id"] for t_id in r.json()["trials"])[0]
print("Following first trial with ID {}".format(first_trial_id))

# Call `logs --follow` on the new trial.
logs_args = Namespace(trial_id=first_trial_id, follow=True, master=master_url, tail=None)
logs(logs_args)


def follow_test_experiment_logs(master_url: str, exp_id: int) -> None:
def print_progress(active_stage: int, ended: bool) -> None:
# There are four sequential stages of verification. Track the
# current stage with an index into this list.
stages = [
"Scheduling task",
"Testing training",
"Testing validation",
"Testing checkpointing",
]

for idx, stage in enumerate(stages):
if active_stage > idx:
color = "green"
checkbox = "✔"
elif active_stage == idx:
color = "red" if ended else "yellow"
checkbox = "✗" if ended else " "
else:
color = "white"
checkbox = " "
print(colored(stage + (25 - len(stage)) * ".", color), end="")
print(colored(" [" + checkbox + "]", color), end="")

if idx == len(stages) - 1:
print("\n" if ended else "\r", end="")
else:
print(", ", end="")

while True:
r = api.get(master_url, "experiments/{}".format(exp_id)).json()

# Wait for experiment to start and initialize a trial and step.
if len(r["trials"]) < 1 or len(r["trials"][0]["steps"]) < 1:
step = {} # type: Dict
else:
step = r["trials"][0]["steps"][0]

# Update the active_stage by examining the result from master
# /experiments/<experiment-id> endpoint.
if r["state"] == constants.COMPLETED:
active_stage = 4
elif step.get("checkpoint"):
active_stage = 3
elif step.get("validation"):
active_stage = 2
elif step:
active_stage = 1
else:
active_stage = 0

# If the experiment is in a terminal state, output the appropriate
# message and exit. Otherwise, sleep and repeat.
if r["state"] == constants.COMPLETED:
print_progress(active_stage, ended=True)
print(colored("Model definition test succeeded! 🎉", "green"))
return
elif r["state"] == constants.CANCELED:
print_progress(active_stage, ended=True)
print(
colored(
"Model definition test (ID: {}) canceled before "
"model test could complete. Please re-run the "
"command.".format(exp_id),
"yellow",
)
)
sys.exit(1)
elif r["state"] == constants.ERROR:
print_progress(active_stage, ended=True)
trial_id = r["trials"][0]["id"]
logs_args = Namespace(trial_id=trial_id, master=master_url, tail=None, follow=False)
logs(logs_args)
sys.exit(1)
else:
print_progress(active_stage, ended=False)
time.sleep(0.2)


def read_git_metadata(model_def_path: pathlib.Path) -> Tuple[str, str, str, str]:
"""
Attempt to read the git metadata from the model definition directory. If
Expand Down Expand Up @@ -159,7 +257,7 @@ def submit_experiment(args: Namespace) -> None:
additional_body_fields=additional_body_fields,
)
print(colored("Test experiment ID: {}".format(exp_id), "green"))
api.experiment.follow_test_experiment_logs(args.master, exp_id)
follow_test_experiment_logs(args.master, exp_id)
else:
exp_id = api.experiment.create_experiment(
master_url=args.master,
Expand All @@ -172,7 +270,7 @@ def submit_experiment(args: Namespace) -> None:
)
print("Created experiment {}".format(exp_id))
if not args.paused and args.follow_first_trial:
api.experiment.follow_experiment_logs(args.master, exp_id)
follow_experiment_logs(args.master, exp_id)


def local_experiment(args: Namespace) -> None:
Expand Down
6 changes: 3 additions & 3 deletions cli/determined_cli/master.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@
from requests import Response

from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_gt

from . import user
from .declarative_argparse import Arg, Cmd


@authentication_required
@user.authentication_required
def config(args: Namespace) -> None:
response = api.get(args.master, "config")
print(json.dumps(response.json(), indent=4))


@authentication_required
@user.authentication_required
def logs(args: Namespace) -> None:
def process_response(response: Response, latest_log_id: int) -> int:
for log in response.json():
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_eq

from . import render
Expand All @@ -21,6 +20,7 @@
render_event_stream,
)
from .declarative_argparse import Arg, Cmd
from .user import authentication_required


@authentication_required
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required

from . import render
from .command import (
Expand All @@ -21,6 +20,7 @@
render_event_stream,
)
from .declarative_argparse import Arg, Cmd
from .user import authentication_required


@authentication_required
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_eq, check_len

from . import render
Expand All @@ -24,6 +23,7 @@
render_event_stream,
)
from .declarative_argparse import Arg, Cmd
from .user import authentication_required


def get_agent_user(host: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required

from . import render
from .declarative_argparse import Arg, Cmd
from .user import authentication_required

TemplateClean = namedtuple("TemplateClean", ["name"])
TemplateAll = namedtuple("TemplateAll", ["name", "config"])
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_eq

from . import render
from .command import Command, render_event_stream
from .declarative_argparse import Arg, Cmd
from .user import authentication_required

Tensorboard = namedtuple(
"Tensorboard",
Expand Down
57 changes: 53 additions & 4 deletions cli/determined_cli/trial.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import distutils.util
import json
import time
from argparse import Namespace
from typing import Any, List
from typing import Any, List, Optional

from termcolor import colored

from determined_cli import render
from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common import api, constants
from determined_common.experimental import Determined

from .checkpoint import format_checkpoint, format_validation, render_checkpoint
from .declarative_argparse import Arg, Cmd, Group
from .user import authentication_required


@authentication_required
Expand Down Expand Up @@ -77,6 +80,52 @@ def describe_trial(args: Namespace) -> None:
render.tabulate_or_csv(headers, values, args.csv)


@authentication_required
def logs(args: Namespace) -> None:
last_offset, last_state = 0, None

def print_logs(offset: Optional[int], limit: Optional[int] = 5000) -> Any:
nonlocal last_offset, last_state
path = "trials/{}/logsv2?".format(args.trial_id)
if offset is not None:
path += "&offset={}".format(offset)
if limit is not None:
path += "&limit={}".format(limit)
logs = api.get(args.master, path).json()
for log in logs:
print(log["message"], end="")
last_state = log["state"]
return logs[-1]["id"] if logs else last_offset

try:
if args.tail is not None:
last_offset = print_logs(None, args.tail)
else:
while True:
new_offset = print_logs(last_offset)
if last_offset == new_offset:
break
last_offset = new_offset

if not args.follow:
return
while True:
last_offset = print_logs(last_offset)
if last_state in constants.TERMINAL_STATES:
break
time.sleep(0.2)
except KeyboardInterrupt:
pass
finally:
print(
colored(
"Trial is in the {} state. To reopen log stream, run: "
"det trial logs -f {}".format(last_state, args.trial_id),
"green",
)
)


def download(args: Namespace) -> None:
checkpoint = (
Determined(args.master, None)
Expand Down Expand Up @@ -179,7 +228,7 @@ def kill_trial(args: Namespace) -> None:
),
Cmd(
"logs",
api.experiment.logs,
logs,
"fetch trial logs",
[
Arg("trial_id", type=int, help="trial ID"),
Expand Down
11 changes: 10 additions & 1 deletion cli/determined_cli/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import determined_common.api.authentication as auth
from determined_common import api
from determined_common.api.authentication import authentication_required

from . import render
from .declarative_argparse import Arg, Cmd
Expand All @@ -34,6 +33,16 @@ def f(namespace: Namespace) -> Any:
return f


def authentication_required(func: Callable[[Namespace], Any]) -> Callable[..., Any]:
@wraps(func)
def f(namespace: Namespace) -> Any:
v = vars(namespace)
auth.initialize_session(namespace.master, v.get("user"), try_reauth=True)
return func(namespace)

return f


def update_user(
username: str,
master_address: str,
Expand Down
2 changes: 0 additions & 2 deletions common/determined_common/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
create_test_experiment,
make_test_experiment_config,
patch_experiment,
follow_experiment_logs,
follow_test_experiment_logs,
)
from determined_common.api.request import (
WebSocket,
Expand Down
Loading