From 913fdc3b81ef33c2760bdb128c8ce9179e4ab9b2 Mon Sep 17 00:00:00 2001 From: Yoni Ben-tzur Date: Fri, 31 Jul 2020 09:55:19 -0700 Subject: [PATCH] feat: add command infra endpoints (#986) --- master/internal/agent/proto.go | 76 ++----------- master/internal/api_command.go | 29 +++++ master/internal/api_notebook.go | 29 +++++ master/internal/api_shell.go | 29 +++++ master/internal/api_tensorboard.go | 31 +++++ master/internal/command/command.go | 107 ++++++++++++++++++ master/internal/command/command_manager.go | 9 ++ master/internal/command/notebook_manager.go | 9 ++ master/internal/command/shell_manager.go | 9 ++ .../internal/command/tensorboard_manager.go | 9 ++ master/pkg/container/container.go | 19 ++++ master/pkg/container/state.go | 19 ++++ master/pkg/device/device.go | 31 ++++- master/pkg/protoutils/struct.go | 16 +++ master/pkg/protoutils/timestamp.go | 15 +++ proto/src/determined/agent/v1/agent.proto | 60 ++-------- proto/src/determined/api/v1/api.proto | 68 +++++++++++ proto/src/determined/api/v1/command.proto | 65 +++++++++++ proto/src/determined/api/v1/notebook.proto | 65 +++++++++++ proto/src/determined/api/v1/shell.proto | 65 +++++++++++ proto/src/determined/api/v1/tensorboard.proto | 63 +++++++++++ proto/src/determined/command/v1/command.proto | 22 ++++ .../determined/container/v1/container.proto | 35 ++++++ proto/src/determined/device/v1/device.proto | 26 +++++ .../src/determined/notebook/v1/notebook.proto | 22 ++++ proto/src/determined/shell/v1/shell.proto | 26 +++++ .../tensorboard/v1/tensorboard.proto | 26 +++++ 27 files changed, 858 insertions(+), 122 deletions(-) create mode 100644 master/internal/api_command.go create mode 100644 master/internal/api_notebook.go create mode 100644 master/internal/api_shell.go create mode 100644 master/internal/api_tensorboard.go create mode 100644 master/pkg/protoutils/struct.go create mode 100644 master/pkg/protoutils/timestamp.go create mode 100644 proto/src/determined/api/v1/command.proto create mode 100644 proto/src/determined/api/v1/notebook.proto create mode 100644 proto/src/determined/api/v1/shell.proto create mode 100644 proto/src/determined/api/v1/tensorboard.proto create mode 100644 proto/src/determined/command/v1/command.proto create mode 100644 proto/src/determined/container/v1/container.proto create mode 100644 proto/src/determined/device/v1/device.proto create mode 100644 proto/src/determined/notebook/v1/notebook.proto create mode 100644 proto/src/determined/shell/v1/shell.proto create mode 100644 proto/src/determined/tensorboard/v1/tensorboard.proto diff --git a/master/internal/agent/proto.go b/master/internal/agent/proto.go index c75f485517d..6211cf53861 100644 --- a/master/internal/agent/proto.go +++ b/master/internal/agent/proto.go @@ -1,10 +1,7 @@ package agent import ( - "github.com/golang/protobuf/ptypes/timestamp" - - "github.com/determined-ai/determined/master/pkg/container" - "github.com/determined-ai/determined/master/pkg/device" + "github.com/determined-ai/determined/master/pkg/protoutils" proto "github.com/determined-ai/determined/proto/pkg/agentv1" ) @@ -15,76 +12,19 @@ func ToProtoAgent(a AgentSummary) *proto.Agent { slots[s.ID] = toProtoSlot(s) } return &proto.Agent{ - Id: a.ID, - RegisteredTime: ×tamp.Timestamp{ - Seconds: a.RegisteredTime.Unix(), - Nanos: int32(a.RegisteredTime.Nanosecond()), - }, - Slots: slots, - Containers: nil, - Label: a.Label, + Id: a.ID, + RegisteredTime: protoutils.ToTimestamp(a.RegisteredTime), + Slots: slots, + Containers: nil, + Label: a.Label, } } func toProtoSlot(s SlotSummary) *proto.Slot { - var c *proto.Container - if s.Container != nil { - c = toProtoContainer(*s.Container) - } return &proto.Slot{ Id: s.ID, - Device: toProtoDevice(s.Device), + Device: s.Device.Proto(), Enabled: s.Enabled, - Container: c, - } -} - -func toProtoContainer(c container.Container) *proto.Container { - var devices []*proto.Device - for _, d := range c.Devices { - devices = append(devices, toProtoDevice(d)) - } - return &proto.Container{ - Parent: c.Parent.String(), - Id: c.ID.String(), - State: toProtoContainerState(c.State), - Devices: devices, - } -} - -func toProtoContainerState(s container.State) proto.Container_State { - switch s { - case container.Assigned: - return proto.Container_STATE_ASSIGNED - case container.Pulling: - return proto.Container_STATE_PULLING - case container.Starting: - return proto.Container_STATE_STARTING - case container.Running: - return proto.Container_STATE_RUNNING - case container.Terminated: - return proto.Container_STATE_TERMINATED - default: - return proto.Container_STATE_UNSPECIFIED - } -} - -func toProtoDevice(d device.Device) *proto.Device { - return &proto.Device{ - Id: int32(d.ID), - Brand: d.Brand, - Uuid: d.UUID, - Type: toProtoDeviceType(d.Type), - } -} - -func toProtoDeviceType(t device.Type) proto.Device_Type { - switch t { - case device.CPU: - return proto.Device_TYPE_CPU - case device.GPU: - return proto.Device_TYPE_GPU - default: - return proto.Device_TYPE_UNSPECIFIED + Container: s.Container.Proto(), } } diff --git a/master/internal/api_command.go b/master/internal/api_command.go new file mode 100644 index 00000000000..e3938d96e7f --- /dev/null +++ b/master/internal/api_command.go @@ -0,0 +1,29 @@ +package internal + +import ( + "context" + "fmt" + + "github.com/determined-ai/determined/proto/pkg/apiv1" +) + +func (a *apiServer) GetCommands( + _ context.Context, req *apiv1.GetCommandsRequest, +) (resp *apiv1.GetCommandsResponse, err error) { + err = a.actorRequest("/commands", req, &resp) + if err != nil { + return nil, err + } + a.sort(resp.Commands, req.OrderBy, req.SortBy, apiv1.GetCommandsRequest_SORT_BY_ID) + return resp, a.paginate(&resp.Pagination, &resp.Commands, req.Offset, req.Limit) +} + +func (a *apiServer) GetCommand( + _ context.Context, req *apiv1.GetCommandRequest) (resp *apiv1.GetCommandResponse, err error) { + return resp, a.actorRequest(fmt.Sprintf("/commands/%s", req.CommandId), req, &resp) +} + +func (a *apiServer) KillCommand( + _ context.Context, req *apiv1.KillCommandRequest) (resp *apiv1.KillCommandResponse, err error) { + return resp, a.actorRequest(fmt.Sprintf("/commands/%s", req.CommandId), req, &resp) +} diff --git a/master/internal/api_notebook.go b/master/internal/api_notebook.go new file mode 100644 index 00000000000..a23afb13bb3 --- /dev/null +++ b/master/internal/api_notebook.go @@ -0,0 +1,29 @@ +package internal + +import ( + "context" + "fmt" + + "github.com/determined-ai/determined/proto/pkg/apiv1" +) + +func (a *apiServer) GetNotebooks( + _ context.Context, req *apiv1.GetNotebooksRequest, +) (resp *apiv1.GetNotebooksResponse, err error) { + err = a.actorRequest("/notebooks", req, &resp) + if err != nil { + return nil, err + } + a.sort(resp.Notebooks, req.OrderBy, req.SortBy, apiv1.GetNotebooksRequest_SORT_BY_ID) + return resp, a.paginate(&resp.Pagination, &resp.Notebooks, req.Offset, req.Limit) +} + +func (a *apiServer) GetNotebook( + _ context.Context, req *apiv1.GetNotebookRequest) (resp *apiv1.GetNotebookResponse, err error) { + return resp, a.actorRequest(fmt.Sprintf("/notebooks/%s", req.NotebookId), req, &resp) +} + +func (a *apiServer) KillNotebook( + _ context.Context, req *apiv1.KillNotebookRequest) (resp *apiv1.KillNotebookResponse, err error) { + return resp, a.actorRequest(fmt.Sprintf("/notebooks/%s", req.NotebookId), req, &resp) +} diff --git a/master/internal/api_shell.go b/master/internal/api_shell.go new file mode 100644 index 00000000000..7ecc5c63924 --- /dev/null +++ b/master/internal/api_shell.go @@ -0,0 +1,29 @@ +package internal + +import ( + "context" + "fmt" + + "github.com/determined-ai/determined/proto/pkg/apiv1" +) + +func (a *apiServer) GetShells( + _ context.Context, req *apiv1.GetShellsRequest, +) (resp *apiv1.GetShellsResponse, err error) { + err = a.actorRequest("/shells", req, &resp) + if err != nil { + return nil, err + } + a.sort(resp.Shells, req.OrderBy, req.SortBy, apiv1.GetShellsRequest_SORT_BY_ID) + return resp, a.paginate(&resp.Pagination, &resp.Shells, req.Offset, req.Limit) +} + +func (a *apiServer) GetShell( + _ context.Context, req *apiv1.GetShellRequest) (resp *apiv1.GetShellResponse, err error) { + return resp, a.actorRequest(fmt.Sprintf("/shells/%s", req.ShellId), req, &resp) +} + +func (a *apiServer) KillShell( + _ context.Context, req *apiv1.KillShellRequest) (resp *apiv1.KillShellResponse, err error) { + return resp, a.actorRequest(fmt.Sprintf("/shells/%s", req.ShellId), req, &resp) +} diff --git a/master/internal/api_tensorboard.go b/master/internal/api_tensorboard.go new file mode 100644 index 00000000000..aefebeeafde --- /dev/null +++ b/master/internal/api_tensorboard.go @@ -0,0 +1,31 @@ +package internal + +import ( + "context" + "fmt" + + "github.com/determined-ai/determined/proto/pkg/apiv1" +) + +func (a *apiServer) GetTensorboards( + _ context.Context, req *apiv1.GetTensorboardsRequest, +) (resp *apiv1.GetTensorboardsResponse, err error) { + err = a.actorRequest("/tensorboards", req, &resp) + if err != nil { + return nil, err + } + a.sort(resp.Tensorboards, req.OrderBy, req.SortBy, apiv1.GetTensorboardsRequest_SORT_BY_ID) + return resp, a.paginate(&resp.Pagination, &resp.Tensorboards, req.Offset, req.Limit) +} + +func (a *apiServer) GetTensorboard( + _ context.Context, req *apiv1.GetTensorboardRequest, +) (resp *apiv1.GetTensorboardResponse, err error) { + return resp, a.actorRequest(fmt.Sprintf("/tensorboards/%s", req.TensorboardId), req, &resp) +} + +func (a *apiServer) KillTensorboard( + _ context.Context, req *apiv1.KillTensorboardRequest, +) (resp *apiv1.KillTensorboardResponse, err error) { + return resp, a.actorRequest(fmt.Sprintf("/tensorboards/%s", req.TensorboardId), req, &resp) +} diff --git a/master/internal/command/command.go b/master/internal/command/command.go index fdfd427f31b..ccd60365485 100644 --- a/master/internal/command/command.go +++ b/master/internal/command/command.go @@ -13,7 +13,13 @@ import ( "github.com/determined-ai/determined/master/pkg/archive" "github.com/determined-ai/determined/master/pkg/container" "github.com/determined-ai/determined/master/pkg/model" + "github.com/determined-ai/determined/master/pkg/protoutils" "github.com/determined-ai/determined/master/pkg/tasks" + "github.com/determined-ai/determined/proto/pkg/apiv1" + "github.com/determined-ai/determined/proto/pkg/commandv1" + "github.com/determined-ai/determined/proto/pkg/notebookv1" + "github.com/determined-ai/determined/proto/pkg/shellv1" + "github.com/determined-ai/determined/proto/pkg/tensorboardv1" ) // terminatedDuration defines the amount of time the command stays in a @@ -103,6 +109,55 @@ func (c *command) Receive(ctx *actor.Context) error { ctx.Respond(newSummary(c)) } + case *notebookv1.Notebook: + ctx.Respond(c.toNotebook(ctx)) + + case *apiv1.GetNotebookRequest: + ctx.Respond(&apiv1.GetNotebookResponse{ + Notebook: c.toNotebook(ctx), + Config: protoutils.ToStruct(c.config), + }) + + case *apiv1.KillNotebookRequest: + c.terminate(ctx) + ctx.Respond(&apiv1.KillNotebookResponse{Notebook: c.toNotebook(ctx)}) + + case *commandv1.Command: + ctx.Respond(c.toCommand(ctx)) + + case *apiv1.GetCommandRequest: + ctx.Respond(&apiv1.GetCommandResponse{ + Command: c.toCommand(ctx), + Config: protoutils.ToStruct(c.config), + }) + + case *apiv1.KillCommandRequest: + c.terminate(ctx) + ctx.Respond(&apiv1.KillCommandResponse{Command: c.toCommand(ctx)}) + + case *shellv1.Shell: + ctx.Respond(c.toShell(ctx)) + + case *apiv1.GetShellRequest: + ctx.Respond(&apiv1.GetShellResponse{ + Shell: c.toShell(ctx), + Config: protoutils.ToStruct(c.config), + }) + + case *apiv1.KillShellRequest: + c.terminate(ctx) + ctx.Respond(&apiv1.KillShellResponse{Shell: c.toShell(ctx)}) + + case *tensorboardv1.Tensorboard: + ctx.Respond(c.toTensorboard(ctx)) + + case *apiv1.GetTensorboardRequest: + ctx.Respond(&apiv1.GetTensorboardResponse{Tensorboard: c.toTensorboard(ctx)}) + + case *apiv1.KillTensorboardRequest: + c.terminate(ctx) + ctx.Respond(&apiv1.KillTensorboardResponse{Tensorboard: c.toTensorboard(ctx)}) + case sproto.ContainerStateChanged: c.container = &msg.Container if msg.Container.State == container.Terminated { @@ -199,3 +254,55 @@ func (c *command) exit(ctx *actor.Context, exitStatus string) { ctx.Tell(c.eventStream, event{Snapshot: newSummary(c), ExitedEvent: c.exitStatus}) actors.NotifyAfter(ctx, terminatedDuration, terminateForGC{}) } + +func (c *command) toNotebook(ctx *actor.Context) *notebookv1.Notebook { + return ¬ebookv1.Notebook{ + Id: ctx.Self().Address().Local(), + Description: c.config.Description, + Container: c.container.Proto(), + StartTime: protoutils.ToTimestamp(ctx.Self().RegisteredTime()), + Username: c.owner.Username, + } +} + +func (c *command) toCommand(ctx *actor.Context) *commandv1.Command { + return &commandv1.Command{ + Id: ctx.Self().Address().Local(), + Description: c.config.Description, + Container: c.container.Proto(), + StartTime: protoutils.ToTimestamp(ctx.Self().RegisteredTime()), + Username: c.owner.Username, + } +} + +func (c *command) toShell(ctx *actor.Context) *shellv1.Shell { + return &shellv1.Shell{ + Id: ctx.Self().Address().Local(), + Description: c.config.Description, + StartTime: protoutils.ToTimestamp(ctx.Self().RegisteredTime()), + Container: c.container.Proto(), + PrivateKey: c.metadata["privateKey"].(string), + PublicKey: c.metadata["publicKey"].(string), + Username: c.owner.Username, + } +} + +func (c *command) toTensorboard(ctx *actor.Context) *tensorboardv1.Tensorboard { + var eids []int32 + for _, id := range c.metadata["experiment_ids"].([]int) { + eids = append(eids, int32(id)) + } + var tids []int32 + for _, id := range c.metadata["trial_ids"].([]int) { + tids = append(tids, int32(id)) + } + return &tensorboardv1.Tensorboard{ + Id: ctx.Self().Address().Local(), + Description: c.config.Description, + StartTime: protoutils.ToTimestamp(ctx.Self().RegisteredTime()), + Container: c.container.Proto(), + ExperimentIds: eids, + TrialIds: tids, + Username: c.owner.Username, + } +} diff --git a/master/internal/command/command_manager.go b/master/internal/command/command_manager.go index c0e01e073f8..4a80162e780 100644 --- a/master/internal/command/command_manager.go +++ b/master/internal/command/command_manager.go @@ -12,6 +12,8 @@ import ( "github.com/determined-ai/determined/master/pkg/actor" "github.com/determined-ai/determined/master/pkg/check" "github.com/determined-ai/determined/master/pkg/model" + "github.com/determined-ai/determined/proto/pkg/apiv1" + "github.com/determined-ai/determined/proto/pkg/commandv1" ) // If an entrypoint is specified as a singleton string, Determined will follow the "shell form" @@ -29,6 +31,13 @@ type commandManager struct { func (c *commandManager) Receive(ctx *actor.Context) error { switch msg := ctx.Message().(type) { + case *apiv1.GetCommandsRequest: + resp := &apiv1.GetCommandsResponse{} + for _, command := range ctx.AskAll(&commandv1.Command{}, ctx.Children()...).GetAll() { + resp.Commands = append(resp.Commands, command.(*commandv1.Command)) + } + ctx.Respond(resp) + case echo.Context: c.handleAPIRequest(ctx, msg) } diff --git a/master/internal/command/notebook_manager.go b/master/internal/command/notebook_manager.go index d2329ef4be7..45249fcea36 100644 --- a/master/internal/command/notebook_manager.go +++ b/master/internal/command/notebook_manager.go @@ -19,6 +19,8 @@ import ( "github.com/determined-ai/determined/master/pkg/check" "github.com/determined-ai/determined/master/pkg/etc" "github.com/determined-ai/determined/master/pkg/model" + "github.com/determined-ai/determined/proto/pkg/apiv1" + "github.com/determined-ai/determined/proto/pkg/notebookv1" ) const ( @@ -103,6 +105,13 @@ type notebookManager struct { func (n *notebookManager) Receive(ctx *actor.Context) error { switch msg := ctx.Message().(type) { + case *apiv1.GetNotebooksRequest: + resp := &apiv1.GetNotebooksResponse{} + for _, notebook := range ctx.AskAll(¬ebookv1.Notebook{}, ctx.Children()...).GetAll() { + resp.Notebooks = append(resp.Notebooks, notebook.(*notebookv1.Notebook)) + } + ctx.Respond(resp) + case echo.Context: n.handleAPIRequest(ctx, msg) } diff --git a/master/internal/command/shell_manager.go b/master/internal/command/shell_manager.go index ff819b5c709..6e477d4473f 100644 --- a/master/internal/command/shell_manager.go +++ b/master/internal/command/shell_manager.go @@ -17,6 +17,8 @@ import ( "github.com/determined-ai/determined/master/pkg/etc" "github.com/determined-ai/determined/master/pkg/model" "github.com/determined-ai/determined/master/pkg/ssh" + "github.com/determined-ai/determined/proto/pkg/apiv1" + "github.com/determined-ai/determined/proto/pkg/shellv1" ) const ( @@ -44,6 +46,13 @@ type shellManager struct { func (n *shellManager) Receive(ctx *actor.Context) error { switch msg := ctx.Message().(type) { + case *apiv1.GetShellsRequest: + resp := &apiv1.GetShellsResponse{} + for _, shell := range ctx.AskAll(&shellv1.Shell{}, ctx.Children()...).GetAll() { + resp.Shells = append(resp.Shells, shell.(*shellv1.Shell)) + } + ctx.Respond(resp) + case echo.Context: n.handleAPIRequest(ctx, msg) } diff --git a/master/internal/command/tensorboard_manager.go b/master/internal/command/tensorboard_manager.go index 6f9b0c7a389..20372bb4fca 100644 --- a/master/internal/command/tensorboard_manager.go +++ b/master/internal/command/tensorboard_manager.go @@ -23,6 +23,8 @@ import ( "github.com/determined-ai/determined/master/pkg/check" "github.com/determined-ai/determined/master/pkg/etc" "github.com/determined-ai/determined/master/pkg/model" + "github.com/determined-ai/determined/proto/pkg/apiv1" + "github.com/determined-ai/determined/proto/pkg/tensorboardv1" ) const ( @@ -55,6 +57,13 @@ type tensorboardManager struct { func (t *tensorboardManager) Receive(ctx *actor.Context) error { switch msg := ctx.Message().(type) { + case *apiv1.GetTensorboardsRequest: + resp := &apiv1.GetTensorboardsResponse{} + for _, tensorboard := range ctx.AskAll(&tensorboardv1.Tensorboard{}, ctx.Children()...).GetAll() { + resp.Tensorboards = append(resp.Tensorboards, tensorboard.(*tensorboardv1.Tensorboard)) + } + ctx.Respond(resp) + case echo.Context: t.handleAPIRequest(ctx, msg) } diff --git a/master/pkg/container/container.go b/master/pkg/container/container.go index 4705ce04001..c251c77a0d6 100644 --- a/master/pkg/container/container.go +++ b/master/pkg/container/container.go @@ -4,6 +4,8 @@ import ( "github.com/determined-ai/determined/master/pkg/actor" "github.com/determined-ai/determined/master/pkg/check" "github.com/determined-ai/determined/master/pkg/device" + "github.com/determined-ai/determined/proto/pkg/containerv1" + "github.com/determined-ai/determined/proto/pkg/devicev1" ) // Container tracks a container running in the cluster. @@ -36,3 +38,20 @@ func (c Container) GPUDeviceUUIDs() []string { } return uuids } + +// Proto returns the proto representation of the container. +func (c *Container) Proto() *containerv1.Container { + if c == nil { + return nil + } + var devices []*devicev1.Device + for _, d := range c.Devices { + devices = append(devices, d.Proto()) + } + return &containerv1.Container{ + Parent: c.Parent.String(), + Id: c.ID.String(), + State: c.State.Proto(), + Devices: devices, + } +} diff --git a/master/pkg/container/state.go b/master/pkg/container/state.go index d2d6d8f05b5..8de319a30c8 100644 --- a/master/pkg/container/state.go +++ b/master/pkg/container/state.go @@ -4,6 +4,7 @@ import ( "github.com/pkg/errors" "github.com/determined-ai/determined/master/pkg/check" + "github.com/determined-ai/determined/proto/pkg/containerv1" ) // State represents the current state of the container. @@ -57,3 +58,21 @@ func (s *State) UnmarshalText(text []byte) error { *s = parsed return nil } + +// Proto returns the proto representation of the container state. +func (s State) Proto() containerv1.State { + switch s { + case Assigned: + return containerv1.State_STATE_ASSIGNED + case Pulling: + return containerv1.State_STATE_PULLING + case Starting: + return containerv1.State_STATE_STARTING + case Running: + return containerv1.State_STATE_RUNNING + case Terminated: + return containerv1.State_STATE_TERMINATED + default: + return containerv1.State_STATE_UNSPECIFIED + } +} diff --git a/master/pkg/device/device.go b/master/pkg/device/device.go index 7e6ffa10569..8198416406b 100644 --- a/master/pkg/device/device.go +++ b/master/pkg/device/device.go @@ -1,6 +1,10 @@ package device -import "fmt" +import ( + "fmt" + + "github.com/determined-ai/determined/proto/pkg/devicev1" +) // Type is a string holding the type of the Device. type Type string @@ -12,6 +16,18 @@ const ( GPU Type = "gpu" ) +// Proto returns the proto representation of the device type. +func (t Type) Proto() devicev1.Type { + switch t { + case CPU: + return devicev1.Type_TYPE_CPU + case GPU: + return devicev1.Type_TYPE_GPU + default: + return devicev1.Type_TYPE_UNSPECIFIED + } +} + // Device represents a single computational device on an agent. type Device struct { ID int `json:"id"` @@ -23,3 +39,16 @@ type Device struct { func (d *Device) String() string { return fmt.Sprintf("%s%d (%s)", d.Type, d.ID, d.Brand) } + +// Proto returns the proto representation of the device. +func (d *Device) Proto() *devicev1.Device { + if d == nil { + return nil + } + return &devicev1.Device{ + Id: int32(d.ID), + Brand: d.Brand, + Uuid: d.UUID, + Type: d.Type.Proto(), + } +} diff --git a/master/pkg/protoutils/struct.go b/master/pkg/protoutils/struct.go new file mode 100644 index 00000000000..4dbf49b8160 --- /dev/null +++ b/master/pkg/protoutils/struct.go @@ -0,0 +1,16 @@ +package protoutils + +import ( + "encoding/json" + + structpb "github.com/golang/protobuf/ptypes/struct" + "google.golang.org/protobuf/encoding/protojson" +) + +// ToStruct converts a Go interface to a protobuf struct. +func ToStruct(v interface{}) *structpb.Struct { + b, _ := json.Marshal(v) + configStruct := &structpb.Struct{} + _ = protojson.Unmarshal(b, configStruct) + return configStruct +} diff --git a/master/pkg/protoutils/timestamp.go b/master/pkg/protoutils/timestamp.go new file mode 100644 index 00000000000..8fd8920c48c --- /dev/null +++ b/master/pkg/protoutils/timestamp.go @@ -0,0 +1,15 @@ +package protoutils + +import ( + "time" + + "github.com/golang/protobuf/ptypes/timestamp" +) + +// ToTimestamp converts a Go time struct to a protobuf message. +func ToTimestamp(t time.Time) *timestamp.Timestamp { + return ×tamp.Timestamp{ + Seconds: t.Unix(), + Nanos: int32(t.Nanosecond()), + } +} diff --git a/proto/src/determined/agent/v1/agent.proto b/proto/src/determined/agent/v1/agent.proto index b429db04bd0..54b7cdf43c6 100644 --- a/proto/src/determined/agent/v1/agent.proto +++ b/proto/src/determined/agent/v1/agent.proto @@ -5,6 +5,10 @@ option go_package = "github.com/determined-ai/determined/proto/pkg/agentv1"; import "google/protobuf/timestamp.proto"; +import "determined/container/v1/container.proto"; +import "determined/device/v1/device.proto"; + + // Agent is a pool of resources where containers are run. message Agent { // The unique id of the agent. @@ -14,7 +18,7 @@ message Agent { // A map of slot id to each slot of this agent. map slots = 3; // A map of container id to all containers assigned to this agent. - map containers = 4; + map containers = 4; // An optional label applied to the agent for scheduling restrictions. string label = 5; } @@ -24,60 +28,10 @@ message Slot { // The unqiue id of the slot for a given agent. string id = 1; // The individual resource this slot wraps. - Device device = 2; + determined.device.v1.Device device = 2; // Flag notifying if containers can be scheduled on this slot. bool enabled = 3; // Container that is currently running on this agent. It is unset if there is // no container currently running on this slot. - Container container = 4; -} - -// Container is a Docker container that is either scheduled to run or is currently running on a -// set of slots. -message Container { - // The current state of the container. - enum State { - // The container state is unknown. - STATE_UNSPECIFIED = 0; - // The container has been assigned to an agent but has not started yet. - STATE_ASSIGNED = 1; - // The container's base image is being pulled from the Docker registry. - STATE_PULLING = 2; - // The image has been built and the container is being started, but the service in the container is not ready yet. - STATE_STARTING = 3; - // The service in the container is able to accept requests. - STATE_RUNNING = 4; - // The container has completely exited or the container has been aborted prior to getting assigned. - STATE_TERMINATED = 5; - } - // The id of the task that is currently managing this container. - string parent = 1; - // The unique id of this instance of a container. - string id = 2; - // The current state that the container is currently in. - State state = 3; - // A list of devices that is being used by this container. - repeated Device devices = 4; -} - -// Device represents a single computational device on an agent. -message Device { - // The type of the Device. - enum Type { - // An unspecified device type. - TYPE_UNSPECIFIED = 0; - // A CPU device. - TYPE_CPU = 1; - // A GPU device. - TYPE_GPU = 2; - } - - // The index of the device. - int32 id = 1; - // The brand name of the device. - string brand = 2; - // The unique UUID of the device. - string uuid = 3; - // The type of the Device. - Type type = 4; + determined.container.v1.Container container = 4; } diff --git a/proto/src/determined/api/v1/api.proto b/proto/src/determined/api/v1/api.proto index 225df585f68..15c8de55152 100644 --- a/proto/src/determined/api/v1/api.proto +++ b/proto/src/determined/api/v1/api.proto @@ -8,11 +8,15 @@ import "protoc-gen-swagger/options/annotations.proto"; import "determined/api/v1/agent.proto"; import "determined/api/v1/auth.proto"; +import "determined/api/v1/command.proto"; import "determined/api/v1/experiment.proto"; import "determined/api/v1/master.proto"; import "determined/api/v1/model.proto"; +import "determined/api/v1/notebook.proto"; import "determined/api/v1/template.proto"; +import "determined/api/v1/tensorboard.proto"; import "determined/api/v1/trial.proto"; +import "determined/api/v1/shell.proto"; import "determined/api/v1/user.proto"; option (grpc.gateway.protoc_gen_swagger.options.openapiv2_swagger) = { @@ -159,6 +163,70 @@ service Determined { option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Templates"}; } + // Get a list of notebooks. + rpc GetNotebooks(GetNotebooksRequest) returns (GetNotebooksResponse) { + option (google.api.http) = {get: "/api/v1/notebooks"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Notebooks"}; + } + // Get the requested notebook. + rpc GetNotebook(GetNotebookRequest) returns (GetNotebookResponse) { + option (google.api.http) = {get: "/api/v1/notebooks/{notebook_id}"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Notebooks"}; + } + // Kill the requested notebook. + rpc KillNotebook(KillNotebookRequest) returns (KillNotebookResponse) { + option (google.api.http) = {post: "/api/v1/notebooks/{notebook_id}/kill"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Notebooks"}; + } + + // Get a list of shells. + rpc GetShells(GetShellsRequest) returns (GetShellsResponse) { + option (google.api.http) = {get: "/api/v1/shells"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Shells"}; + } + // Get the requested shell. + rpc GetShell(GetShellRequest) returns (GetShellResponse) { + option (google.api.http) = {get: "/api/v1/shells/{shell_id}"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Shells"}; + } + // Kill the requested shell. + rpc KillShell(KillShellRequest) returns (KillShellResponse) { + option (google.api.http) = {post: "/api/v1/shells/{shell_id}/kill"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Shells"}; + } + + // Get a list of commands. + rpc GetCommands(GetCommandsRequest) returns (GetCommandsResponse) { + option (google.api.http) = {get: "/api/v1/commands"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Commands"}; + } + // Get the requested command. + rpc GetCommand(GetCommandRequest) returns (GetCommandResponse) { + option (google.api.http) = {get: "/api/v1/commands/{command_id}"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Commands"}; + } + // Kill the requested command. + rpc KillCommand(KillCommandRequest) returns (KillCommandResponse) { + option (google.api.http) = {post: "/api/v1/commands/{command_id}/kill"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Commands"}; + } + + // Get a list of tensorboards. + rpc GetTensorboards(GetTensorboardsRequest) returns (GetTensorboardsResponse) { + option (google.api.http) = {get: "/api/v1/tensorboards"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Tensorboards"}; + } + // Get the requested tensorboard. + rpc GetTensorboard(GetTensorboardRequest) returns (GetTensorboardResponse) { + option (google.api.http) = {get: "/api/v1/tensorboards/{tensorboard_id}"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Tensorboards"}; + } + // Kill the requested tensorboard. + rpc KillTensorboard(KillTensorboardRequest) returns (KillTensorboardResponse) { + option (google.api.http) = {post: "/api/v1/tensorboards/{tensorboard_id}/kill"}; + option (grpc.gateway.protoc_gen_swagger.options.openapiv2_operation) = {tags: "Tensorboards"}; + } + // Get the requested model. rpc GetModel(GetModelRequest) returns (GetModelResponse) { option (google.api.http) = {get: "/api/v1/models/{model_name}"}; diff --git a/proto/src/determined/api/v1/command.proto b/proto/src/determined/api/v1/command.proto new file mode 100644 index 00000000000..57b960dd5ed --- /dev/null +++ b/proto/src/determined/api/v1/command.proto @@ -0,0 +1,65 @@ +syntax = "proto3"; + +package determined.api.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/apiv1"; + +import "google/protobuf/struct.proto"; + +import "determined/api/v1/pagination.proto"; +import "determined/command/v1/command.proto"; + +// Get a list of commands. +message GetCommandsRequest { + // Sorts commands by the given field. + enum SortBy { + // Returns commands in an unsorted list. + SORT_BY_UNSPECIFIED = 0; + // Returns commands sorted by id. + SORT_BY_ID = 1; + // Returns commands sorted by description. + SORT_BY_DESCRIPTION = 2; + // Return commands sorted by start time. + SORT_BY_START_TIME = 4; + + } + // Sort commands by the given field + SortBy sort_by = 1; + // Order commands in either ascending or descending order. + OrderBy order_by = 2; + // Skip the number of commands before returning results. Negative values + // denote number of commands to skip from the end before returning results. + int32 offset = 3; + // Limit the number of commands. A value of 0 denotes no limit. + int32 limit = 4; +} +// Response to GetCommandsRequest. +message GetCommandsResponse { + // The list of returned commands. + repeated determined.command.v1.Command commands = 1; + // Pagination information of the full dataset. + Pagination pagination = 2; +} + +// Get the requested command. +message GetCommandRequest { + // The id of the command. + string command_id = 1; +} +// Response to GetCommandRequest. +message GetCommandResponse { + // The requested command. + determined.command.v1.Command command = 1; + // The command config. + google.protobuf.Struct config = 2; +} + +// Kill the requested command. +message KillCommandRequest { + // The id of the command. + string command_id = 1; +} +// Response to KillCommandRequest. +message KillCommandResponse { + // The requested command. + determined.command.v1.Command command = 1; +} diff --git a/proto/src/determined/api/v1/notebook.proto b/proto/src/determined/api/v1/notebook.proto new file mode 100644 index 00000000000..f83f403d4cd --- /dev/null +++ b/proto/src/determined/api/v1/notebook.proto @@ -0,0 +1,65 @@ +syntax = "proto3"; + +package determined.api.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/apiv1"; + +import "google/protobuf/struct.proto"; + +import "determined/api/v1/pagination.proto"; +import "determined/notebook/v1/notebook.proto"; + +// Get a list of notebooks. +message GetNotebooksRequest { + // Sorts notebooks by the given field. + enum SortBy { + // Returns notebooks in an unsorted list. + SORT_BY_UNSPECIFIED = 0; + // Returns notebooks sorted by id. + SORT_BY_ID = 1; + // Returns notebooks sorted by description. + SORT_BY_DESCRIPTION = 2; + // Return notebooks sorted by start time. + SORT_BY_START_TIME = 4; + + } + // Sort notebooks by the given field + SortBy sort_by = 1; + // Order notebooks in either ascending or descending order. + OrderBy order_by = 2; + // Skip the number of notebooks before returning results. Negative values + // denote number of notebooks to skip from the end before returning results. + int32 offset = 3; + // Limit the number of notebooks. A value of 0 denotes no limit. + int32 limit = 4; +} +// Response to GetNotebooksRequest. +message GetNotebooksResponse { + // The list of returned notebooks. + repeated determined.notebook.v1.Notebook notebooks = 1; + // Pagination information of the full dataset. + Pagination pagination = 2; +} + +// Get the requested notebook. +message GetNotebookRequest { + // The id of the notebook. + string notebook_id = 1; +} +// Response to GetNotebookRequest. +message GetNotebookResponse { + // The requested notebook. + determined.notebook.v1.Notebook notebook = 1; + // The notebook config. + google.protobuf.Struct config = 2; +} + +// Kill the requested notebook. +message KillNotebookRequest { + // The id of the notebook. + string notebook_id = 1; +} +// Response to KillNotebookRequest. +message KillNotebookResponse { + // The requested notebook. + determined.notebook.v1.Notebook notebook = 1; +} diff --git a/proto/src/determined/api/v1/shell.proto b/proto/src/determined/api/v1/shell.proto new file mode 100644 index 00000000000..8e23d06b0d6 --- /dev/null +++ b/proto/src/determined/api/v1/shell.proto @@ -0,0 +1,65 @@ +syntax = "proto3"; + +package determined.api.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/apiv1"; + +import "google/protobuf/struct.proto"; + +import "determined/api/v1/pagination.proto"; +import "determined/shell/v1/shell.proto"; + +// Get a list of shells. +message GetShellsRequest { + // Sorts shells by the given field. + enum SortBy { + // Returns shells in an unsorted list. + SORT_BY_UNSPECIFIED = 0; + // Returns shells sorted by id. + SORT_BY_ID = 1; + // Returns shells sorted by description. + SORT_BY_DESCRIPTION = 2; + // Return shells sorted by start time. + SORT_BY_START_TIME = 4; + + } + // Sort shells by the given field + SortBy sort_by = 1; + // Order shells in either ascending or descending order. + OrderBy order_by = 2; + // Skip the number of shells before returning results. Negative values + // denote number of shells to skip from the end before returning results. + int32 offset = 3; + // Limit the number of shells. A value of 0 denotes no limit. + int32 limit = 4; +} +// Response to GetShellsRequest. +message GetShellsResponse { + // The list of returned shells. + repeated determined.shell.v1.Shell shells = 1; + // Pagination information of the full dataset. + Pagination pagination = 2; +} + +// Get the requested shell. +message GetShellRequest { + // The id of the shell. + string shell_id = 1; +} +// Response to GetShellRequest. +message GetShellResponse { + // The requested shell. + determined.shell.v1.Shell shell = 1; + // The shell config. + google.protobuf.Struct config = 2; +} + +// Kill the requested shell. +message KillShellRequest { + // The id of the shell. + string shell_id = 1; +} +// Response to KillShellRequest. +message KillShellResponse { + // The requested shell. + determined.shell.v1.Shell shell = 1; +} diff --git a/proto/src/determined/api/v1/tensorboard.proto b/proto/src/determined/api/v1/tensorboard.proto new file mode 100644 index 00000000000..13fe301c6a8 --- /dev/null +++ b/proto/src/determined/api/v1/tensorboard.proto @@ -0,0 +1,63 @@ +syntax = "proto3"; + +package determined.api.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/apiv1"; + +import "google/protobuf/struct.proto"; + +import "determined/api/v1/pagination.proto"; +import "determined/tensorboard/v1/tensorboard.proto"; + +// Get a list of tensorboards. +message GetTensorboardsRequest { + // Sorts tensorboards by the given field. + enum SortBy { + // Returns tensorboards in an unsorted list. + SORT_BY_UNSPECIFIED = 0; + // Returns tensorboards sorted by id. + SORT_BY_ID = 1; + // Returns tensorboards sorted by description. + SORT_BY_DESCRIPTION = 2; + // Return tensorboards sorted by start time. + SORT_BY_START_TIME = 4; + + } + // Sort tensorboards by the given field + SortBy sort_by = 1; + // Order tensorboards in either ascending or descending order. + OrderBy order_by = 2; + // Skip the number of tensorboards before returning results. Negative values + // denote number of tensorboards to skip from the end before returning results. + int32 offset = 3; + // Limit the number of tensorboards. A value of 0 denotes no limit. + int32 limit = 4; +} +// Response to GetTensorboardsRequest. +message GetTensorboardsResponse { + // The list of returned tensorboards. + repeated determined.tensorboard.v1.Tensorboard tensorboards = 1; + // Pagination information of the full dataset. + Pagination pagination = 2; +} + +// Get the requested tensorboard. +message GetTensorboardRequest { + // The id of the tensorboard. + string tensorboard_id = 1; +} +// Response to GetTensorboardRequest. +message GetTensorboardResponse { + // The requested tensorboard. + determined.tensorboard.v1.Tensorboard tensorboard = 1; +} + +// Kill the requested tensorboard. +message KillTensorboardRequest { + // The id of the tensorboard. + string tensorboard_id = 1; +} +// Response to KillTensorboardRequest. +message KillTensorboardResponse { + // The requested tensorboard. + determined.tensorboard.v1.Tensorboard tensorboard = 1; +} diff --git a/proto/src/determined/command/v1/command.proto b/proto/src/determined/command/v1/command.proto new file mode 100644 index 00000000000..3e61ccdd3f4 --- /dev/null +++ b/proto/src/determined/command/v1/command.proto @@ -0,0 +1,22 @@ +syntax = "proto3"; + +package determined.command.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/commandv1"; + +import "google/protobuf/timestamp.proto"; + +import "determined/container/v1/container.proto"; + +// Command is a single container running the configured command. +message Command { + // The id of the command. + string id = 1; + // The description of the command. + string description = 2; + // The time the command was started. + google.protobuf.Timestamp start_time = 4; + // The container running the command. + determined.container.v1.Container container = 6; + // The username of the user that created the command. + string username = 10; +} diff --git a/proto/src/determined/container/v1/container.proto b/proto/src/determined/container/v1/container.proto new file mode 100644 index 00000000000..050facc8099 --- /dev/null +++ b/proto/src/determined/container/v1/container.proto @@ -0,0 +1,35 @@ +syntax = "proto3"; + +package determined.container.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/containerv1"; + +import "determined/device/v1/device.proto"; + +// The current state of the container. +enum State { + // The container state is unknown. + STATE_UNSPECIFIED = 0; + // The container has been assigned to an agent but has not started yet. + STATE_ASSIGNED = 1; + // The container's base image is being pulled from the Docker registry. + STATE_PULLING = 2; + // The image has been built and the container is being started, but the service in the container is not ready yet. + STATE_STARTING = 3; + // The service in the container is able to accept requests. + STATE_RUNNING = 4; + // The container has completely exited or the container has been aborted prior to getting assigned. + STATE_TERMINATED = 5; +} + +// Container is a Docker container that is either scheduled to run or is currently running on a +// set of slots. +message Container { + // The id of the task that is currently managing this container. + string parent = 1; + // The unique id of this instance of a container. + string id = 2; + // The current state that the container is currently in. + State state = 3; + // A list of devices that is being used by this container. + repeated determined.device.v1.Device devices = 4; +} diff --git a/proto/src/determined/device/v1/device.proto b/proto/src/determined/device/v1/device.proto new file mode 100644 index 00000000000..daa699c7341 --- /dev/null +++ b/proto/src/determined/device/v1/device.proto @@ -0,0 +1,26 @@ +syntax = "proto3"; + +package determined.device.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/devicev1"; + +// The type of the Device. +enum Type { + // An unspecified device type. + TYPE_UNSPECIFIED = 0; + // A CPU device. + TYPE_CPU = 1; + // A GPU device. + TYPE_GPU = 2; +} + +// Device represents a single computational device on an agent. +message Device { + // The index of the device. + int32 id = 1; + // The brand name of the device. + string brand = 2; + // The unique UUID of the device. + string uuid = 3; + // The type of the Device. + Type type = 4; +} diff --git a/proto/src/determined/notebook/v1/notebook.proto b/proto/src/determined/notebook/v1/notebook.proto new file mode 100644 index 00000000000..8bec624d907 --- /dev/null +++ b/proto/src/determined/notebook/v1/notebook.proto @@ -0,0 +1,22 @@ +syntax = "proto3"; + +package determined.notebook.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/notebookv1"; + +import "google/protobuf/timestamp.proto"; + +import "determined/container/v1/container.proto"; + +// Notebook is a Jupyter notebook in a containerized environment. +message Notebook { + // The id of the notebook. + string id = 1; + // The description of the notebook. + string description = 2; + // The time the notebook was started. + google.protobuf.Timestamp start_time = 4; + // The container running the notebook. + determined.container.v1.Container container = 6; + // The username of the user that created the notebook. + string username = 10; +} diff --git a/proto/src/determined/shell/v1/shell.proto b/proto/src/determined/shell/v1/shell.proto new file mode 100644 index 00000000000..5ceee308d2c --- /dev/null +++ b/proto/src/determined/shell/v1/shell.proto @@ -0,0 +1,26 @@ +syntax = "proto3"; + +package determined.shell.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/shellv1"; + +import "google/protobuf/timestamp.proto"; + +import "determined/container/v1/container.proto"; + +// Shell is an ssh server in a containerized environment. +message Shell { + // The id of the shell. + string id = 1; + // The description of the shell. + string description = 2; + // The time the shell was started. + google.protobuf.Timestamp start_time = 4; + // The container running the shell. + determined.container.v1.Container container = 6; + // The private key for this shell. + string private_key = 7; + // The public key for this shell. + string public_key = 8; + // The username of the user that created the shell. + string username = 10; +} diff --git a/proto/src/determined/tensorboard/v1/tensorboard.proto b/proto/src/determined/tensorboard/v1/tensorboard.proto new file mode 100644 index 00000000000..d1ed6071488 --- /dev/null +++ b/proto/src/determined/tensorboard/v1/tensorboard.proto @@ -0,0 +1,26 @@ +syntax = "proto3"; + +package determined.tensorboard.v1; +option go_package = "github.com/determined-ai/determined/proto/pkg/tensorboardv1"; + +import "google/protobuf/timestamp.proto"; + +import "determined/container/v1/container.proto"; + +// Tensorboard is a tensorboard instance in a containerized environment. +message Tensorboard { + // The id of the tensorboard. + string id = 1; + // The description of the tensorboard. + string description = 2; + // The time the tensorboard was started. + google.protobuf.Timestamp start_time = 4; + // The container running the tensorboard. + determined.container.v1.Container container = 6; + // The experiment ids loaded into this tensorboard instance. + repeated int32 experiment_ids = 7; + // The trial ids loaded into this tensorboard instance. + repeated int32 trial_ids = 8; + // The username of the user that created the tensorboard. + string username = 10; +}