From 9621d3f31046c1377277f241f067c4cb807e164a Mon Sep 17 00:00:00 2001 From: Tibor Vass Date: Fri, 1 Mar 2019 00:37:11 +0000 Subject: [PATCH] [WIP] Add --gpus support Signed-off-by: Tibor Vass --- cli/command/container/opts.go | 10 +- opts/gpus.go | 112 ++++++++++++++++++ .../docker/api/types/container/host_config.go | 13 +- 3 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 opts/gpus.go diff --git a/cli/command/container/opts.go b/cli/command/container/opts.go index 4546839629d0..6e51a4c4a38a 100644 --- a/cli/command/container/opts.go +++ b/cli/command/container/opts.go @@ -46,6 +46,7 @@ type containerOptions struct { labels opts.ListOpts deviceCgroupRules opts.ListOpts devices opts.ListOpts + gpus opts.GpuOpts ulimits *opts.UlimitOpt sysctls *opts.MapOpts publish opts.ListOpts @@ -166,6 +167,7 @@ func addFlags(flags *pflag.FlagSet) *containerOptions { flags.VarP(&copts.attach, "attach", "a", "Attach to STDIN, STDOUT or STDERR") flags.Var(&copts.deviceCgroupRules, "device-cgroup-rule", "Add a rule to the cgroup allowed devices list") flags.Var(&copts.devices, "device", "Add a host device to the container") + flags.Var(&copts.gpus, "gpus", "Request GPU devices for the container ('all' to pass all GPUs)") flags.VarP(&copts.env, "env", "e", "Set environment variables") flags.Var(&copts.envFile, "env-file", "Read in a file of environment variables") flags.StringVar(&copts.entrypoint, "entrypoint", "", "Overwrite the default ENTRYPOINT of the image") @@ -527,6 +529,8 @@ func parse(flags *pflag.FlagSet, copts *containerOptions, serverOS string) (*con } } + deviceRequests := copts.gpus.Value() + resources := container.Resources{ CgroupParent: copts.cgroupParent, Memory: copts.memory.Value(), @@ -545,7 +549,6 @@ func parse(flags *pflag.FlagSet, copts *containerOptions, serverOS string) (*con CPUQuota: copts.cpuQuota, CPURealtimePeriod: copts.cpuRealtimePeriod, CPURealtimeRuntime: copts.cpuRealtimeRuntime, - PidsLimit: copts.pidsLimit, BlkioWeight: copts.blkioWeight, BlkioWeightDevice: copts.blkioWeightDevice.GetList(), BlkioDeviceReadBps: copts.deviceReadBps.GetList(), @@ -557,6 +560,11 @@ func parse(flags *pflag.FlagSet, copts *containerOptions, serverOS string) (*con Ulimits: copts.ulimits.GetList(), DeviceCgroupRules: copts.deviceCgroupRules.GetAll(), Devices: deviceMappings, + DeviceRequests: deviceRequests, + } + + if copts.pidsLimit != 0 { + resources.PidsLimit = &copts.pidsLimit } config := &container.Config{ diff --git a/opts/gpus.go b/opts/gpus.go new file mode 100644 index 000000000000..9a023d96b472 --- /dev/null +++ b/opts/gpus.go @@ -0,0 +1,112 @@ +package opts + +import ( + "encoding/csv" + "fmt" + "strconv" + "strings" + + "github.com/docker/docker/api/types/container" + "github.com/pkg/errors" +) + +// GpuOpts is a Value type for parsing mounts +type GpuOpts struct { + values []container.DeviceRequest +} + +func parseCount(s string) (int, error) { + i := -1 + var err error + if s != "all" { + i, err = strconv.Atoi(s) + if err != nil { + err = errors.Wrap(err, "count must be an integer") + } + } + return i, err +} + +// Set a new mount value +func (o *GpuOpts) Set(value string) error { + csvReader := csv.NewReader(strings.NewReader(value)) + fields, err := csvReader.Read() + if err != nil { + return err + } + + req := container.DeviceRequest{Options: make(map[string]string), Capabilities: [][]string{{"gpu"}}} + + // Set writable as the default + for _, field := range fields { + parts := strings.SplitN(field, "=", 2) + key := strings.ToLower(parts[0]) + + if len(parts) == 1 { + req.Count, err = parseCount(key) + if err != nil { + return err + } + continue + } + + if len(parts) != 2 { + return fmt.Errorf("invalid field '%s' must be a key=value pair", field) + } + + value := parts[1] + switch key { + case "driver": + req.Driver = value + case "count": + req.Count, err = parseCount(value) + if err != nil { + return err + } + case "device": + req.DeviceIDs = strings.Split(value, ",") + case "caps": + req.Capabilities = [][]string{append(strings.Split(value, ","), "gpu")} + case "options": + r := csv.NewReader(strings.NewReader(value)) + optFields, err := r.Read() + if err != nil { + return errors.Wrap(err, "error reading gpu options") + } + req.Options = make(map[string]string) + for _, optField := range optFields { + optParts := strings.SplitN(optField, "=", 2) + key := strings.ToLower(optParts[0]) + var value string + if len(optParts) > 1 { + value = optParts[1] + } + req.Options[key] = value + } + default: + return fmt.Errorf("unexpected key '%s' in '%s'", key, field) + } + } + + o.values = append(o.values, req) + return nil +} + +// Type returns the type of this option +func (o *GpuOpts) Type() string { + return "gpuRequest" +} + +// String returns a string repr of this option +func (o *GpuOpts) String() string { + gpus := []string{} + for _, gpu := range o.values { + gpus = append(gpus, fmt.Sprintf("%v", gpu)) + } + return strings.Join(gpus, ", ") +} + +// Value returns the mounts +func (o *GpuOpts) Value() []container.DeviceRequest { + return o.values +} diff --git a/vendor/github.com/docker/docker/api/types/container/host_config.go b/vendor/github.com/docker/docker/api/types/container/host_config.go index 05dd16a92568..c710107702b8 100644 --- a/vendor/github.com/docker/docker/api/types/container/host_config.go +++ b/vendor/github.com/docker/docker/api/types/container/host_config.go @@ -244,6 +244,16 @@ func (n PidMode) Container() string { return "" } +// DeviceRequest represents a request for devices from a device driver. +// Used by GPU device drivers. +type DeviceRequest struct { + Driver string // Name of device driver + Count int // Number of devices to request (-1 = All) + DeviceIDs []string // List of device IDs as recognizable by the device driver + Capabilities [][]string // An OR list of AND lists of device capabilities (e.g. "gpu") + Options map[string]string // Options to pass onto the device driver +} + // DeviceMapping represents the device mapping between the host and the container. type DeviceMapping struct { PathOnHost string @@ -327,6 +337,7 @@ type Resources struct { CpusetMems string // CpusetMems 0-2, 0,1 Devices []DeviceMapping // List of devices to map inside the container DeviceCgroupRules []string // List of rule to be added to the device cgroup + DeviceRequests []DeviceRequest // List of device requests for device drivers DiskQuota int64 // Disk limit (in bytes) KernelMemory int64 // Kernel memory limit (in bytes) KernelMemoryTCP int64 // Hard limit for kernel TCP buffer memory (in bytes) @@ -334,7 +345,7 @@ type Resources struct { MemorySwap int64 // Total memory usage (memory + swap); set `-1` to enable unlimited swap MemorySwappiness *int64 // Tuning container memory swappiness behaviour OomKillDisable *bool // Whether to disable OOM Killer or not - PidsLimit int64 // Setting pids limit for a container + PidsLimit *int64 // Setting PIDs limit for a container; Set `0` or `-1` for unlimited, or `null` to not change. Ulimits []*units.Ulimit // List of ulimits to be set in the container // Applicable to Windows