Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YUNIKORN-2724] Improve the signature of methods notifyTaskComplete() and ensureAppAndTaskCreated() #873

Closed
wants to merge 15 commits into from
Closed
58 changes: 28 additions & 30 deletions pkg/cache/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,41 +295,41 @@
}

func (ctx *Context) updateYuniKornPod(appID string, pod *v1.Pod) {
var app *Application
taskID := string(pod.UID)
if app = ctx.getApplication(appID); app != nil {
app := ctx.getApplication(appID)
if app != nil {
if task := app.GetTask(taskID); task != nil {
task.setTaskPod(pod)
}
}

// treat terminated pods like a remove
if utils.IsPodTerminated(pod) {
ctx.notifyTaskComplete(appID, taskID)
if app != nil {
ryankert01 marked this conversation as resolved.
Show resolved Hide resolved
ryankert01 marked this conversation as resolved.
Show resolved Hide resolved
ctx.notifyTaskComplete(app, taskID)
}
log.Log(log.ShimContext).Debug("Request to update terminated pod, removing from cache", zap.String("podName", pod.Name))
ctx.schedulerCache.RemovePod(pod)
return
}

if ctx.schedulerCache.UpdatePod(pod) {
// pod was accepted; ensure the application and task objects have been created
ctx.ensureAppAndTaskCreated(pod)
ctx.ensureAppAndTaskCreated(pod, app)
}
}

func (ctx *Context) ensureAppAndTaskCreated(pod *v1.Pod) {
// get app metadata
appMeta, ok := getAppMetadata(pod)
if !ok {
log.Log(log.ShimContext).Warn("BUG: Unable to retrieve application metadata from YuniKorn-managed Pod",
zap.String("namespace", pod.Namespace),
zap.String("name", pod.Name))
return
}

func (ctx *Context) ensureAppAndTaskCreated(pod *v1.Pod, app *Application) {
// add app if it doesn't already exist
app := ctx.GetApplication(appMeta.ApplicationID)
if app == nil {
// get app metadata
appMeta, ok := getAppMetadata(pod)
if !ok {
log.Log(log.ShimContext).Warn("BUG: Unable to retrieve application metadata from YuniKorn-managed Pod",
zap.String("namespace", pod.Namespace),
zap.String("name", pod.Name))
return

Check warning on line 331 in pkg/cache/context.go

View check run for this annotation

Codecov / codecov/patch

pkg/cache/context.go#L328-L331

Added lines #L328 - L331 were not covered by tests
}
app = ctx.AddApplication(&AddApplicationRequest{
Metadata: appMeta,
})
Expand Down Expand Up @@ -435,9 +435,7 @@

func (ctx *Context) deleteYuniKornPod(pod *v1.Pod) {
if taskMeta, ok := getTaskMetadata(pod); ok {
if app := ctx.GetApplication(taskMeta.ApplicationID); app != nil {
ctx.notifyTaskComplete(taskMeta.ApplicationID, taskMeta.TaskID)
}
ctx.notifyTaskComplete(ctx.GetApplication(taskMeta.ApplicationID), taskMeta.TaskID)
}

log.Log(log.ShimContext).Debug("removing pod from cache", zap.String("podName", pod.Name))
Expand Down Expand Up @@ -873,19 +871,19 @@
return ctx.schedulerCache.StartPodAllocation(podKey, nodeID)
}

func (ctx *Context) notifyTaskComplete(appID, taskID string) {
log.Log(log.ShimContext).Debug("NotifyTaskComplete",
zap.String("appID", appID),
zap.String("taskID", taskID))
if app := ctx.GetApplication(appID); app != nil {
log.Log(log.ShimContext).Debug("release allocation",
zap.String("appID", appID),
func (ctx *Context) notifyTaskComplete(app *Application, taskID string) {
if app == nil {
log.Log(log.ShimContext).Debug("In notifyTaskComplete but app is nil",
zap.String("taskID", taskID))
ev := NewSimpleTaskEvent(appID, taskID, CompleteTask)
dispatcher.Dispatch(ev)
if app.GetApplicationState() == ApplicationStates().Resuming {
dispatcher.Dispatch(NewSimpleApplicationEvent(appID, AppTaskCompleted))
}
return
}
log.Log(log.ShimContext).Debug("notifyTaskComplete and release allocation",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can keep origin message? release allocation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I change the message to "notifyTaskComplete and release allocation" because I combine 2 logs to make it only log once. Perhaps I can change it to " release allocation in notifyTaskComplete"?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. It is fine to keep current message :)

zap.String("appID", app.applicationID),
zap.String("taskID", taskID))
ev := NewSimpleTaskEvent(app.applicationID, taskID, CompleteTask)
dispatcher.Dispatch(ev)
if app.GetApplicationState() == ApplicationStates().Resuming {
dispatcher.Dispatch(NewSimpleApplicationEvent(app.applicationID, AppTaskCompleted))

Check warning on line 886 in pkg/cache/context.go

View check run for this annotation

Codecov / codecov/patch

pkg/cache/context.go#L886

Added line #L886 was not covered by tests
}
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/cache/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1034,7 +1034,7 @@ func TestTaskReleaseAfterRecovery(t *testing.T) {
assert.Equal(t, len(app.GetBoundTasks()), 2)

// release one of the tasks
context.notifyTaskComplete(appID, pod2UID)
context.notifyTaskComplete(app, pod2UID)

// wait for release
err = utils.WaitForCondition(func() bool {
Expand Down Expand Up @@ -2075,7 +2075,7 @@ func TestTaskRemoveOnCompletion(t *testing.T) {
assert.NilError(t, err)

// mark completion
context.notifyTaskComplete(appID, taskUID1)
context.notifyTaskComplete(app, taskUID1)
err = utils.WaitForCondition(func() bool {
return task.GetTaskState() == TaskStates().Completed
}, 100*time.Millisecond, time.Second)
Expand Down