Skip to content

Commit

Permalink
The instance cannot be reused by another user (#1204)
Browse files Browse the repository at this point in the history
* Upload the ssh public key of the user in addition to the job configuration to the shim

* Install only the project key on the instance

* Remove golangci-lint warnings from Github Actions

* Allow to use Run.attach()
  • Loading branch information
Sergey Mezentsev authored May 10, 2024
1 parent b615a12 commit 789c4c8
Show file tree
Hide file tree
Showing 33 changed files with 479 additions and 144 deletions.
28 changes: 14 additions & 14 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,16 @@ jobs:
working-directory: runner
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v3
uses: actions/setup-go@v5
with:
go-version: 1.21.1
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
go-version: "1.22"
- name: Run golangci-lint
uses: golangci/golangci-lint-action@v6
with:
version: v1.51.2
args: --issues-exit-code=0 --timeout=20m
version: v1.58
args: --timeout=20m
working-directory: runner
- name: Test
run: |
Expand All @@ -85,7 +85,7 @@ jobs:
go test -race $(go list ./... | grep -v /vendor/)
runner-compile:
needs: [ runner-test ]
needs: [runner-test]
defaults:
run:
working-directory: runner
Expand All @@ -94,14 +94,14 @@ jobs:
strategy:
matrix:
include:
- {goos: "linux", goarch: "amd64", runson: "ubuntu-latest"}
- { goos: "linux", goarch: "amd64", runson: "ubuntu-latest" }
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v3
uses: actions/setup-go@v5
with:
go-version: 1.21.1
go-version: "1.22"
- name: build
env:
GOOS: ${{ matrix.goos }}
Expand All @@ -122,11 +122,11 @@ jobs:
retention-days: 1

runner-upload:
needs: [ runner-compile ]
needs: [runner-compile]
runs-on: ubuntu-latest
steps:
- name: Install AWS
run: pip install awscli
run: pip install awscli
- name: Download Runner
uses: actions/download-artifact@v3
with:
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
go test -race $(go list ./... | grep -v /vendor/)
runner-compile:
needs: [ runner-test ]
needs: [runner-test]
defaults:
run:
working-directory: runner
Expand All @@ -77,14 +77,14 @@ jobs:
strategy:
matrix:
include:
- {goos: "linux", goarch: "amd64", runson: "ubuntu-latest"}
- { goos: "linux", goarch: "amd64", runson: "ubuntu-latest" }
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v3
uses: actions/setup-go@v5
with:
go-version: 1.21.1
go-version: "1.22"
- name: build
env:
GOOS: ${{ matrix.goos }}
Expand All @@ -107,7 +107,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Install AWS
run: pip install awscli
run: pip install awscli
- name: Download Runner
uses: actions/download-artifact@v3
with:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.1
rev: v0.4.4
hooks:
- id: ruff
name: ruff common
args: ['--fix']
- id: ruff-format
- repo: https://github.com/golangci/golangci-lint
rev: v1.56.2
rev: v1.58.1
hooks:
- id: golangci-lint-full
entry: bash -c 'cd runner && golangci-lint run -D depguard --presets import,module,unused "$@"'
Expand Down
2 changes: 1 addition & 1 deletion runner/cmd/runner/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func main() {
}

func start(tempDir string, homeDir string, workingDir string, httpPort int, logLevel int, version string) error {
if err := os.MkdirAll(tempDir, 0755); err != nil {
if err := os.MkdirAll(tempDir, 0o755); err != nil {
return tracerr.Errorf("Failed to create temp directory: %w", err)
}

Expand Down
18 changes: 12 additions & 6 deletions runner/cmd/shim/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func main() {
Name: "ssh-key",
Usage: "Public SSH key",
Required: true,
Destination: &args.Docker.PublicSSHKey,
Destination: &args.Docker.ConcatinatedPublicSSHKeys,
EnvVars: []string{"DSTACK_PUBLIC_SSH_KEY"},
},
&cli.BoolFlag{
Expand All @@ -112,7 +112,6 @@ func main() {
},
},
Action: func(c *cli.Context) error {

if args.Runner.BinaryPath == "" {
if err := args.DownloadRunner(); err != nil {
return cli.Exit(err, 1)
Expand Down Expand Up @@ -230,7 +229,10 @@ func writeHostInfo() {
panic(err)
}

f.Sync()
err = f.Sync()
if err != nil {
panic(err)
}
}

func getGpuInfo() [][]string {
Expand Down Expand Up @@ -272,7 +274,7 @@ func getGpuInfo() [][]string {
if err != nil {
log.Fatal(err)
}
fmt.Printf("gpu record %v\n", record)

gpus = append(gpus, record)
}
return gpus
Expand All @@ -284,6 +286,7 @@ func getInterfaces() []string {
if err != nil {
panic("cannot get interfaces")
}

for _, i := range ifaces {
addrs, err := i.Addrs()
if err != nil {
Expand All @@ -293,10 +296,10 @@ func getInterfaces() []string {
for _, addr := range addrs {
switch v := addr.(type) {
case *net.IPNet:
fmt.Println(v.IP)
if v.IP.IsLoopback() {
continue
}

addresses = append(addresses, addr.String())
}
}
Expand All @@ -307,10 +310,13 @@ func getInterfaces() []string {
func getDiskSize() uint64 {
var stat unix.Statfs_t
wd, err := os.Getwd()
if err != nil {
panic("cannot get current disk")
}
err = unix.Statfs(wd, &stat)
if err != nil {
panic("cannot get disk size")
}
unix.Statfs(wd, &stat)
size := stat.Bavail * uint64(stat.Bsize)
return size
}
Expand Down
2 changes: 1 addition & 1 deletion runner/internal/runner/api/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func (ds DummyRunner) GetState() (shim.RunnerStatus, shim.ContainerStatus, strin
return ds.State, ds.ContainerStatus, "", ds.JobResult
}

func (ds DummyRunner) Run(context.Context, shim.DockerImageConfig) error {
func (ds DummyRunner) Run(context.Context, shim.TaskConfig) error {
return nil
}

Expand Down
8 changes: 4 additions & 4 deletions runner/internal/shim/api/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@ func (s *ShimServer) SubmitPostHandler(w http.ResponseWriter, r *http.Request) (
return nil, &api.Error{Status: http.StatusConflict}
}

var body DockerTaskBody
var body TaskConfigBody
if err := api.DecodeJSONBody(w, r, &body, true); err != nil {
log.Println("Failed to decode submit body", "err", err)
return nil, err
}

go func(taskParams shim.DockerImageConfig) {
err := s.runner.Run(context.Background(), taskParams)
go func(taskConfig shim.TaskConfig) {
err := s.runner.Run(context.Background(), taskConfig)
if err != nil {
fmt.Printf("failed Run %v\n", err)
}
}(body.TaskParams())
}(body.GetTaskConfig())

return nil, nil
}
Expand Down
22 changes: 14 additions & 8 deletions runner/internal/shim/api/schemas.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ package api

import "github.com/dstackai/dstack/runner/internal/shim"

type DockerTaskBody struct {
Username string `json:"username"`
Password string `json:"password"`
ImageName string `json:"image_name"`
ContainerName string `json:"container_name"`
ShmSize int64 `json:"shm_size"`
type TaskConfigBody struct {
Username string `json:"username"`
Password string `json:"password"`
ImageName string `json:"image_name"`
ContainerName string `json:"container_name"`
ShmSize int64 `json:"shm_size"`
PublicKeys []string `json:"public_keys"`
SshUser string `json:"ssh_user"`
SshKey string `json:"ssh_key"`
}

type StopBody struct {
Expand Down Expand Up @@ -36,13 +39,16 @@ type StopResponse struct {
State string `json:"state"`
}

func (ra DockerTaskBody) TaskParams() shim.DockerImageConfig {
res := shim.DockerImageConfig{
func (ra TaskConfigBody) GetTaskConfig() shim.TaskConfig {
res := shim.TaskConfig{
ImageName: ra.ImageName,
Username: ra.Username,
Password: ra.Password,
ContainerName: ra.ContainerName,
ShmSize: ra.ShmSize,
PublicKeys: ra.PublicKeys,
SshUser: ra.SshUser,
SshKey: ra.SshKey,
}
return res
}
2 changes: 1 addition & 1 deletion runner/internal/shim/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

type TaskRunner interface {
Run(context.Context, shim.DockerImageConfig) error
Run(context.Context, shim.TaskConfig) error
GetState() (shim.RunnerStatus, shim.ContainerStatus, string, shim.JobResult)
Stop(bool)
}
Expand Down
Loading

0 comments on commit 789c4c8

Please sign in to comment.