Skip to content

Commit

Permalink
fix: Fail attempts to mount under /run/determined on HPC [FOUNDENG-48…
Browse files Browse the repository at this point in the history
…2] (determined-ai#710)

Singularity/Enroot do not support overlapping mounts.   Allowing a user
mount under /run/determined conflicts with generated mounts, so fail
such attempts with a clean error message.
  • Loading branch information
jerryharrow authored and dzhu committed Apr 25, 2023
1 parent 6f0cefd commit 5b98817
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 8 deletions.
4 changes: 2 additions & 2 deletions e2e_tests/tests/cluster/test_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def test_unsupported_option() -> None:
exp.run_failure_test(
conf.fixtures_path("failures/unsupported-slurm-option.yaml"),
conf.fixtures_path("failures/"),
"resources failed with non-zero exit code: unable to create "
+ "the launcher manifest: slurm option -G is not configurable",
"resources failed with non-zero exit code: unable to launch job: "
+ "slurm option -G is not configurable",
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,7 @@ func (m *dispatcherResourceManager) startLauncherJob(
)
if err != nil {
sendResourceStateChangedErrorResponse(ctx, err, msg,
"unable to create the launcher manifest")
"unable to launch job")
return
}

Expand Down
16 changes: 13 additions & 3 deletions master/pkg/tasks/dispatcher_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,10 @@ func (t *TaskSpec) ToDispatcherManifest(
launchParameters := launcher.NewLaunchParameters()
launchParameters.SetMode("batch")

mounts, userWantsDirMountedOnTmp, varTmpExists := getDataVolumes(t.Mounts)
mounts, userWantsDirMountedOnTmp, varTmpExists, err := getDataVolumes(t.Mounts)
if err != nil {
return nil, "", "", err
}

// When the container run type is enroot, we need a binding for the
// "/var/tmp" folder.
Expand Down Expand Up @@ -721,12 +724,19 @@ func getPayloadName(taskSpec *TaskSpec) string {
// Provide all task mount points as data volumes, and return true if there is a bind for /tmp
// Launcher requires that a Data object has a name; source, target & read-only are all
// that matter to Singularity.
func getDataVolumes(mounts []mount.Mount) ([]launcher.Data, bool, bool) {
func getDataVolumes(mounts []mount.Mount) ([]launcher.Data, bool, bool, error) {
volumes := []launcher.Data{}
userWantsDirMountedOnTmp := false
varTmpExists := false
var err error

for i, mount := range mounts {
if strings.HasPrefix(mount.Target, RunDir) {
err = fmt.Errorf("bind_mounts.container_path: %s not supported."+
"HPC launcher cannot mount under %s", mount.Target, RunDir)
return volumes, userWantsDirMountedOnTmp, varTmpExists, err
}

volume := *launcher.NewData()
volume.SetName("ds" + strconv.Itoa(i))
volume.SetSource(mount.Source)
Expand All @@ -743,7 +753,7 @@ func getDataVolumes(mounts []mount.Mount) ([]launcher.Data, bool, bool) {
}
}

return volumes, userWantsDirMountedOnTmp, varTmpExists
return volumes, userWantsDirMountedOnTmp, varTmpExists, err
}

// Used for creating a tmpfs mount type at the target location.
Expand Down
17 changes: 15 additions & 2 deletions master/pkg/tasks/dispatcher_task_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ func Test_getDataVolumns(t *testing.T) {
"yyy", "bbb", "/tmp",
}

volumns, mountOnTmp, _ := getDataVolumes(arg)
volumns, mountOnTmp, _, _ := getDataVolumes(arg)

assert.Equal(t, mountOnTmp, true)
for i, v := range volumns {
Expand All @@ -940,16 +940,29 @@ func Test_getDataVolumns(t *testing.T) {
}
}

func Test_preventRunDeterminedMount(t *testing.T) {
arg := []mount.Mount{
{
Source: "any",
Target: "/run/determined/workdir",
},
}
volumes, _, _, err := getDataVolumes(arg)
assert.Equal(t, len(volumes), 0)
assert.ErrorContains(t, err, "/run/determined/workdir")
}

func Test_addTmpFs(t *testing.T) {
arg := []mount.Mount{}
volumes, _, _ := getDataVolumes(arg)
volumes, _, _, err := getDataVolumes(arg)
name := "varTmp"
target := varTmp
volumes = addTmpFs(volumes, name, target)
v := volumes[0]
assert.Equal(t, *v.Name, "varTmp")
assert.Equal(t, *v.Source, "tmpfs")
assert.Equal(t, *v.Target, "/var/tmp:x-create=dir")
assert.NilError(t, err)
}

func Test_getPayloadName(t *testing.T) {
Expand Down

0 comments on commit 5b98817

Please sign in to comment.