Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The instance cannot be reused by another user #1204

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,16 @@ jobs:
working-directory: runner
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v3
uses: actions/setup-go@v5
with:
go-version: 1.21.1
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
go-version: "1.22"
- name: Run golangci-lint
uses: golangci/golangci-lint-action@v6
with:
version: v1.51.2
args: --issues-exit-code=0 --timeout=20m
version: v1.58
args: --timeout=20m
working-directory: runner
- name: Test
run: |
Expand All @@ -85,7 +85,7 @@ jobs:
go test -race $(go list ./... | grep -v /vendor/)
runner-compile:
needs: [ runner-test ]
needs: [runner-test]
defaults:
run:
working-directory: runner
Expand All @@ -94,14 +94,14 @@ jobs:
strategy:
matrix:
include:
- {goos: "linux", goarch: "amd64", runson: "ubuntu-latest"}
- { goos: "linux", goarch: "amd64", runson: "ubuntu-latest" }
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v3
uses: actions/setup-go@v5
with:
go-version: 1.21.1
go-version: "1.22"
- name: build
env:
GOOS: ${{ matrix.goos }}
Expand All @@ -122,11 +122,11 @@ jobs:
retention-days: 1

runner-upload:
needs: [ runner-compile ]
needs: [runner-compile]
runs-on: ubuntu-latest
steps:
- name: Install AWS
run: pip install awscli
run: pip install awscli
- name: Download Runner
uses: actions/download-artifact@v3
with:
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
go test -race $(go list ./... | grep -v /vendor/)
runner-compile:
needs: [ runner-test ]
needs: [runner-test]
defaults:
run:
working-directory: runner
Expand All @@ -77,14 +77,14 @@ jobs:
strategy:
matrix:
include:
- {goos: "linux", goarch: "amd64", runson: "ubuntu-latest"}
- { goos: "linux", goarch: "amd64", runson: "ubuntu-latest" }
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v3
uses: actions/setup-go@v5
with:
go-version: 1.21.1
go-version: "1.22"
- name: build
env:
GOOS: ${{ matrix.goos }}
Expand All @@ -107,7 +107,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Install AWS
run: pip install awscli
run: pip install awscli
- name: Download Runner
uses: actions/download-artifact@v3
with:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.1
rev: v0.4.4
hooks:
- id: ruff
name: ruff common
args: ['--fix']
- id: ruff-format
- repo: https://github.com/golangci/golangci-lint
rev: v1.56.2
rev: v1.58.1
hooks:
- id: golangci-lint-full
entry: bash -c 'cd runner && golangci-lint run -D depguard --presets import,module,unused "$@"'
Expand Down
2 changes: 1 addition & 1 deletion runner/cmd/runner/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func main() {
}

func start(tempDir string, homeDir string, workingDir string, httpPort int, logLevel int, version string) error {
if err := os.MkdirAll(tempDir, 0755); err != nil {
if err := os.MkdirAll(tempDir, 0o755); err != nil {
return tracerr.Errorf("Failed to create temp directory: %w", err)
}

Expand Down
18 changes: 12 additions & 6 deletions runner/cmd/shim/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func main() {
Name: "ssh-key",
Usage: "Public SSH key",
Required: true,
Destination: &args.Docker.PublicSSHKey,
Destination: &args.Docker.ConcatinatedPublicSSHKeys,
EnvVars: []string{"DSTACK_PUBLIC_SSH_KEY"},
},
&cli.BoolFlag{
Expand All @@ -112,7 +112,6 @@ func main() {
},
},
Action: func(c *cli.Context) error {

if args.Runner.BinaryPath == "" {
if err := args.DownloadRunner(); err != nil {
return cli.Exit(err, 1)
Expand Down Expand Up @@ -230,7 +229,10 @@ func writeHostInfo() {
panic(err)
}

f.Sync()
err = f.Sync()
if err != nil {
panic(err)
}
}

func getGpuInfo() [][]string {
Expand Down Expand Up @@ -272,7 +274,7 @@ func getGpuInfo() [][]string {
if err != nil {
log.Fatal(err)
}
fmt.Printf("gpu record %v\n", record)

gpus = append(gpus, record)
}
return gpus
Expand All @@ -284,6 +286,7 @@ func getInterfaces() []string {
if err != nil {
panic("cannot get interfaces")
}

for _, i := range ifaces {
addrs, err := i.Addrs()
if err != nil {
Expand All @@ -293,10 +296,10 @@ func getInterfaces() []string {
for _, addr := range addrs {
switch v := addr.(type) {
case *net.IPNet:
fmt.Println(v.IP)
if v.IP.IsLoopback() {
continue
}

addresses = append(addresses, addr.String())
}
}
Expand All @@ -307,10 +310,13 @@ func getInterfaces() []string {
func getDiskSize() uint64 {
var stat unix.Statfs_t
wd, err := os.Getwd()
if err != nil {
panic("cannot get current disk")
}
err = unix.Statfs(wd, &stat)
if err != nil {
panic("cannot get disk size")
}
unix.Statfs(wd, &stat)
size := stat.Bavail * uint64(stat.Bsize)
return size
}
Expand Down
2 changes: 1 addition & 1 deletion runner/internal/runner/api/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func (ds DummyRunner) GetState() (shim.RunnerStatus, shim.ContainerStatus, strin
return ds.State, ds.ContainerStatus, "", ds.JobResult
}

func (ds DummyRunner) Run(context.Context, shim.DockerImageConfig) error {
func (ds DummyRunner) Run(context.Context, shim.TaskConfig) error {
return nil
}

Expand Down
8 changes: 4 additions & 4 deletions runner/internal/shim/api/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@ func (s *ShimServer) SubmitPostHandler(w http.ResponseWriter, r *http.Request) (
return nil, &api.Error{Status: http.StatusConflict}
}

var body DockerTaskBody
var body TaskConfigBody
if err := api.DecodeJSONBody(w, r, &body, true); err != nil {
log.Println("Failed to decode submit body", "err", err)
return nil, err
}

go func(taskParams shim.DockerImageConfig) {
err := s.runner.Run(context.Background(), taskParams)
go func(taskConfig shim.TaskConfig) {
err := s.runner.Run(context.Background(), taskConfig)
if err != nil {
fmt.Printf("failed Run %v\n", err)
}
}(body.TaskParams())
}(body.GetTaskConfig())

return nil, nil
}
Expand Down
22 changes: 14 additions & 8 deletions runner/internal/shim/api/schemas.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ package api

import "github.com/dstackai/dstack/runner/internal/shim"

type DockerTaskBody struct {
Username string `json:"username"`
Password string `json:"password"`
ImageName string `json:"image_name"`
ContainerName string `json:"container_name"`
ShmSize int64 `json:"shm_size"`
type TaskConfigBody struct {
Username string `json:"username"`
Password string `json:"password"`
ImageName string `json:"image_name"`
ContainerName string `json:"container_name"`
ShmSize int64 `json:"shm_size"`
PublicKeys []string `json:"public_keys"`
SshUser string `json:"ssh_user"`
SshKey string `json:"ssh_key"`
}

type StopBody struct {
Expand Down Expand Up @@ -36,13 +39,16 @@ type StopResponse struct {
State string `json:"state"`
}

func (ra DockerTaskBody) TaskParams() shim.DockerImageConfig {
res := shim.DockerImageConfig{
func (ra TaskConfigBody) GetTaskConfig() shim.TaskConfig {
res := shim.TaskConfig{
ImageName: ra.ImageName,
Username: ra.Username,
Password: ra.Password,
ContainerName: ra.ContainerName,
ShmSize: ra.ShmSize,
PublicKeys: ra.PublicKeys,
SshUser: ra.SshUser,
SshKey: ra.SshKey,
}
return res
}
2 changes: 1 addition & 1 deletion runner/internal/shim/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

type TaskRunner interface {
Run(context.Context, shim.DockerImageConfig) error
Run(context.Context, shim.TaskConfig) error
GetState() (shim.RunnerStatus, shim.ContainerStatus, string, shim.JobResult)
Stop(bool)
}
Expand Down
Loading
Loading