From b0e7322941cd4ba646c39b7f5f8f603c3846c47c Mon Sep 17 00:00:00 2001
From: Joel Smith <joesmith@redhat.com>
Date: Fri, 13 Oct 2017 09:55:00 -0600
Subject: [PATCH 1/6] UPSTREAM: 47599: Rerun init containers when the pod needs
 to be restarted

---
 .../apis/cri/testing/fake_runtime_service.go  |  12 +
 .../kuberuntime/kuberuntime_container.go      |  47 +-
 .../kuberuntime/kuberuntime_manager.go        | 287 +++++------
 .../kuberuntime/kuberuntime_manager_test.go   | 454 ++++++++++++++++--
 4 files changed, 595 insertions(+), 205 deletions(-)

diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/testing/fake_runtime_service.go b/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/testing/fake_runtime_service.go
index c26d13abcd3e..a053dea45cb8 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/testing/fake_runtime_service.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/testing/fake_runtime_service.go
@@ -55,6 +55,18 @@ type FakeRuntimeService struct {
 	Sandboxes  map[string]*FakePodSandbox
 }
 
+func (r *FakeRuntimeService) GetContainerID(sandboxID, name string, attempt uint32) (string, error) {
+	r.Lock()
+	defer r.Unlock()
+
+	for id, c := range r.Containers {
+		if c.SandboxID == sandboxID && c.Metadata.Name == name && c.Metadata.Attempt == attempt {
+			return id, nil
+		}
+	}
+	return "", fmt.Errorf("container (name, attempt, sandboxID)=(%q, %d, %q) not found", name, attempt, sandboxID)
+}
+
 func (r *FakeRuntimeService) SetFakeSandboxes(sandboxes []*FakePodSandbox) {
 	r.Lock()
 	defer r.Unlock()
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
index 7a999913e0a6..94e441d12574 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
@@ -624,10 +624,11 @@ func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(pod *v1.Pod, ru
 	return
 }
 
-// pruneInitContainers ensures that before we begin creating init containers, we have reduced the number
-// of outstanding init containers still present. This reduces load on the container garbage collector
-// by only preserving the most recent terminated init container.
-func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.ContainerID]int) {
+// pruneInitContainersBeforeStart ensures that before we begin creating init
+// containers, we have reduced the number of outstanding init containers still
+// present. This reduces load on the container garbage collector by only
+// preserving the most recent terminated init container.
+func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, podStatus *kubecontainer.PodStatus) {
 	// only the last execution of each init container should be preserved, and only preserve it if it is in the
 	// list of init containers to keep.
 	initContainerNames := sets.NewString()
@@ -645,14 +646,9 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod,
 			if count == 1 {
 				continue
 			}
-			// if there is a reason to preserve the older container, do so
-			if _, ok := initContainersToKeep[status.ID]; ok {
-				continue
-			}
-
 			// prune all other init containers that match this container name
 			glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count)
-			if err := m.runtimeService.RemoveContainer(status.ID.ID); err != nil {
+			if err := m.removeContainer(status.ID.ID); err != nil {
 				utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
 				continue
 			}
@@ -667,6 +663,37 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod,
 	}
 }
 
+// Remove all init containres. Note that this function does not check the state
+// of the container because it assumes all init containers have been stopped
+// before the call happens.
+func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus *kubecontainer.PodStatus) {
+	initContainerNames := sets.NewString()
+	for _, container := range pod.Spec.InitContainers {
+		initContainerNames.Insert(container.Name)
+	}
+	for name := range initContainerNames {
+		count := 0
+		for _, status := range podStatus.ContainerStatuses {
+			if status.Name != name || !initContainerNames.Has(status.Name) {
+				continue
+			}
+			count++
+			// Purge all init containers that match this container name
+			glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count)
+			if err := m.removeContainer(status.ID.ID); err != nil {
+				utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
+				continue
+			}
+			// Remove any references to this container
+			if _, ok := m.containerRefManager.GetRef(status.ID); ok {
+				m.containerRefManager.ClearRef(status.ID)
+			} else {
+				glog.Warningf("No ref for container %q", status.ID)
+			}
+		}
+	}
+}
+
 // findNextInitContainerToRun returns the status of the last failed container, the
 // next init container to start, or done if there are no further init containers.
 // Status is only returned if an init container is failed, in which case next will
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
index 636f4b724caa..7701cd42c2a9 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
@@ -328,34 +328,28 @@ type containerToKillInfo struct {
 	message string
 }
 
-// podContainerSpecChanges keeps information on changes that need to happen for a pod.
-type podContainerSpecChanges struct {
-	// Whether need to create a new sandbox.
+// podActions keeps information what to do for a pod.
+type podActions struct {
+	// Stop all running (regular and init) containers and the sandbox for the pod.
+	KillPod bool
+	// Whether need to create a new sandbox. If needed to kill pod and create a
+	// a new pod sandbox, all init containers need to be purged (i.e., removed).
 	CreateSandbox bool
 	// The id of existing sandbox. It is used for starting containers in ContainersToStart.
 	SandboxID string
 	// The attempt number of creating sandboxes for the pod.
 	Attempt uint32
 
-	// ContainersToStart keeps a map of containers that need to be started, note that
-	// the key is index of the container inside pod.Spec.Containers, while
-	// the value is a message indicates why the container needs to start.
-	ContainersToStart map[int]string
-	// ContainersToKeep keeps a map of containers that need to be kept as is, note that
-	// the key is the container ID of the container, while
-	// the value is index of the container inside pod.Spec.Containers.
-	ContainersToKeep map[kubecontainer.ContainerID]int
+	// The next init container to start.
+	NextInitContainerToStart *v1.Container
+	// ContainersToStart keeps a list of indexes for the containers to start,
+	// where the index is the index of the specific container in the pod spec (
+	// pod.Spec.Containers.
+	ContainersToStart []int
 	// ContainersToKill keeps a map of containers that need to be killed, note that
 	// the key is the container ID of the container, while
 	// the value contains necessary information to kill a container.
 	ContainersToKill map[kubecontainer.ContainerID]containerToKillInfo
-
-	// InitFailed indicates whether init containers are failed.
-	InitFailed bool
-	// InitContainersToKeep keeps a map of init containers that need to be kept as
-	// is, note that the key is the container ID of the container, while
-	// the value is index of the container inside pod.Spec.InitContainers.
-	InitContainersToKeep map[kubecontainer.ContainerID]int
 }
 
 // podSandboxChanged checks whether the spec of the pod is changed and returns
@@ -394,141 +388,127 @@ func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *v1.Pod, podStatus *ku
 	return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id
 }
 
-// checkAndKeepInitContainers keeps all successfully completed init containers. If there
-// are failing containers, only keep the first failing one.
-func checkAndKeepInitContainers(pod *v1.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.ContainerID]int) bool {
-	initFailed := false
-
-	for i, container := range pod.Spec.InitContainers {
-		containerStatus := podStatus.FindContainerStatusByName(container.Name)
-		if containerStatus == nil {
-			continue
-		}
-
-		if containerStatus.State == kubecontainer.ContainerStateRunning {
-			initContainersToKeep[containerStatus.ID] = i
-			continue
-		}
+func containerChanged(container *v1.Container, containerStatus *kubecontainer.ContainerStatus) (uint64, uint64, bool) {
+	expectedHash := kubecontainer.HashContainer(container)
+	return expectedHash, containerStatus.Hash, containerStatus.Hash != expectedHash
+}
 
-		if containerStatus.State == kubecontainer.ContainerStateExited {
-			initContainersToKeep[containerStatus.ID] = i
-		}
+func shouldRestartOnFailure(pod *v1.Pod) bool {
+	return pod.Spec.RestartPolicy != v1.RestartPolicyNever
+}
 
-		if isContainerFailed(containerStatus) {
-			initFailed = true
-			break
-		}
+func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) bool {
+	cStatus := podStatus.FindContainerStatusByName(c.Name)
+	if cStatus == nil || cStatus.State == kubecontainer.ContainerStateRunning {
+		return false
 	}
-
-	return initFailed
+	return cStatus.ExitCode == 0
 }
 
-// computePodContainerChanges checks whether the pod spec has changed and returns the changes if true.
-func (m *kubeGenericRuntimeManager) computePodContainerChanges(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podContainerSpecChanges {
+// computePodActions checks whether the pod spec has changed and returns the changes if true.
+func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
 	glog.V(5).Infof("Syncing Pod %q: %+v", format.Pod(pod), pod)
 
-	sandboxChanged, attempt, sandboxID := m.podSandboxChanged(pod, podStatus)
-	changes := podContainerSpecChanges{
-		CreateSandbox:        sandboxChanged,
-		SandboxID:            sandboxID,
-		Attempt:              attempt,
-		ContainersToStart:    make(map[int]string),
-		ContainersToKeep:     make(map[kubecontainer.ContainerID]int),
-		InitContainersToKeep: make(map[kubecontainer.ContainerID]int),
-		ContainersToKill:     make(map[kubecontainer.ContainerID]containerToKillInfo),
+	createPodSandbox, attempt, sandboxID := m.podSandboxChanged(pod, podStatus)
+	changes := podActions{
+		KillPod:           createPodSandbox,
+		CreateSandbox:     createPodSandbox,
+		SandboxID:         sandboxID,
+		Attempt:           attempt,
+		ContainersToStart: []int{},
+		ContainersToKill:  make(map[kubecontainer.ContainerID]containerToKillInfo),
+	}
+
+	// If we need to (re-)create the pod sandbox, everything will need to be
+	// killed and recreated, and init containers should be purged.
+	if createPodSandbox {
+		if !shouldRestartOnFailure(pod) && attempt != 0 {
+			// Should not restart the pod, just return.
+			return changes
+		}
+		if len(pod.Spec.InitContainers) != 0 {
+			// Pod has init containers, return the first one.
+			changes.NextInitContainerToStart = &pod.Spec.InitContainers[0]
+			return changes
+		}
+		// Start all containers by default but exclude the ones that succeeded if
+		// RestartPolicy is OnFailure.
+		for idx, c := range pod.Spec.Containers {
+			if containerSucceeded(&c, podStatus) && pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure {
+				continue
+			}
+			changes.ContainersToStart = append(changes.ContainersToStart, idx)
+		}
+		return changes
 	}
 
-	// check the status of init containers.
-	initFailed := false
-	// always reset the init containers if the sandbox is changed.
-	if !sandboxChanged {
-		// Keep all successfully completed containers. If there are failing containers,
-		// only keep the first failing one.
-		initFailed = checkAndKeepInitContainers(pod, podStatus, changes.InitContainersToKeep)
+	// Check initialization progress.
+	initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
+	if !done {
+		if next != nil {
+			initFailed := initLastStatus != nil && isContainerFailed(initLastStatus)
+			if initFailed && !shouldRestartOnFailure(pod) {
+				changes.KillPod = true
+			} else {
+				changes.NextInitContainerToStart = next
+			}
+		}
+		// Initialization failed or still in progress. Skip inspecting non-init
+		// containers.
+		return changes
 	}
-	changes.InitFailed = initFailed
 
+	// Number of running containers to keep.
+	keepCount := 0
 	// check the status of containers.
-	for index, container := range pod.Spec.Containers {
+	for idx, container := range pod.Spec.Containers {
 		containerStatus := podStatus.FindContainerStatusByName(container.Name)
+		// If container does not exist, or is not running, check whether we
+		// need to restart it.
 		if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
 			if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
 				message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
 				glog.Info(message)
-				changes.ContainersToStart[index] = message
+				changes.ContainersToStart = append(changes.ContainersToStart, idx)
 			}
 			continue
 		}
-		if sandboxChanged {
-			if pod.Spec.RestartPolicy != v1.RestartPolicyNever {
-				message := fmt.Sprintf("Container %+v's pod sandbox is dead, the container will be recreated.", container)
-				glog.Info(message)
-				changes.ContainersToStart[index] = message
-			}
-			continue
-		}
-
-		if initFailed {
-			// Initialization failed and Container exists.
-			// If we have an initialization failure everything will be killed anyway.
-			// If RestartPolicy is Always or OnFailure we restart containers that were running before.
-			if pod.Spec.RestartPolicy != v1.RestartPolicyNever {
-				message := fmt.Sprintf("Failed to initialize pod. %q will be restarted.", container.Name)
-				glog.V(1).Info(message)
-				changes.ContainersToStart[index] = message
-			}
+		// The container is running, but kill the container if any of the following condition is met.
+		reason := ""
+		restart := shouldRestartOnFailure(pod)
+		if expectedHash, actualHash, changed := containerChanged(&container, containerStatus); changed {
+			reason = fmt.Sprintf("Container spec hash changed (%d vs %d).", actualHash, expectedHash)
+			// Restart regardless of the restart policy because the container
+			// spec changed.
+			restart = true
+		} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
+			// If the container failed the liveness probe, we should kill it.
+			reason = "Container failed liveness probe."
+		} else {
+			// Keep the container.
+			keepCount += 1
 			continue
 		}
 
-		expectedHash := kubecontainer.HashContainer(&container)
-		containerChanged := containerStatus.Hash != expectedHash
-		if containerChanged {
-			message := fmt.Sprintf("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.",
-				pod.Name, container.Name, containerStatus.Hash, expectedHash)
-			glog.Info(message)
-			changes.ContainersToStart[index] = message
-			continue
+		// We need to kill the container, but if we also want to restart the
+		// container afterwards, make the intent clear in the message. Also do
+		// not kill the entire pod since we expect container to be running eventually.
+		message := reason
+		if restart {
+			message = fmt.Sprintf("%s. Container will be killed and recreated.", message)
+			changes.ContainersToStart = append(changes.ContainersToStart, idx)
 		}
 
-		liveness, found := m.livenessManager.Get(containerStatus.ID)
-		if !found || liveness == proberesults.Success {
-			changes.ContainersToKeep[containerStatus.ID] = index
-			continue
-		}
-		if pod.Spec.RestartPolicy != v1.RestartPolicyNever {
-			message := fmt.Sprintf("pod %q container %q is unhealthy, it will be killed and re-created.", format.Pod(pod), container.Name)
-			glog.Info(message)
-			changes.ContainersToStart[index] = message
+		changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
+			name:      containerStatus.Name,
+			container: &pod.Spec.Containers[idx],
+			message:   message,
 		}
+		glog.V(2).Infof("Container %q (%q) of pod %s: %s", container.Name, containerStatus.ID, format.Pod(pod), message)
 	}
 
-	// Don't keep init containers if they are the only containers to keep.
-	if !sandboxChanged && len(changes.ContainersToStart) == 0 && len(changes.ContainersToKeep) == 0 {
-		changes.InitContainersToKeep = make(map[kubecontainer.ContainerID]int)
-	}
-
-	// compute containers to be killed
-	runningContainerStatuses := podStatus.GetRunningContainerStatuses()
-	for _, containerStatus := range runningContainerStatuses {
-		_, keep := changes.ContainersToKeep[containerStatus.ID]
-		_, keepInit := changes.InitContainersToKeep[containerStatus.ID]
-		if !keep && !keepInit {
-			var podContainer *v1.Container
-			var killMessage string
-			for i, c := range pod.Spec.Containers {
-				if c.Name == containerStatus.Name {
-					podContainer = &pod.Spec.Containers[i]
-					killMessage = changes.ContainersToStart[i]
-					break
-				}
-			}
-
-			changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
-				name:      containerStatus.Name,
-				container: podContainer,
-				message:   killMessage,
-			}
-		}
+	if keepCount == 0 && len(changes.ContainersToStart) == 0 {
+		changes.KillPod = true
 	}
 
 	return changes
@@ -544,8 +524,8 @@ func (m *kubeGenericRuntimeManager) computePodContainerChanges(pod *v1.Pod, podS
 //  6. Create normal containers.
 func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
 	// Step 1: Compute sandbox and container changes.
-	podContainerChanges := m.computePodContainerChanges(pod, podStatus)
-	glog.V(3).Infof("computePodContainerChanges got %+v for pod %q", podContainerChanges, format.Pod(pod))
+	podContainerChanges := m.computePodActions(pod, podStatus)
+	glog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))
 	if podContainerChanges.CreateSandbox {
 		ref, err := ref.GetReference(api.Scheme, pod)
 		if err != nil {
@@ -554,13 +534,13 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 		if podContainerChanges.SandboxID != "" {
 			m.recorder.Eventf(ref, v1.EventTypeNormal, "SandboxChanged", "Pod sandbox changed, it will be killed and re-created.")
 		} else {
-			glog.V(4).Infof("SyncPod received new pod %q, will create a new sandbox for it", format.Pod(pod))
+			glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
 		}
 	}
 
 	// Step 2: Kill the pod if the sandbox has changed.
-	if podContainerChanges.CreateSandbox || (len(podContainerChanges.ContainersToKeep) == 0 && len(podContainerChanges.ContainersToStart) == 0) {
-		if len(podContainerChanges.ContainersToKeep) == 0 && len(podContainerChanges.ContainersToStart) == 0 {
+	if podContainerChanges.KillPod {
+		if !podContainerChanges.CreateSandbox {
 			glog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
 		} else {
 			glog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
@@ -572,6 +552,10 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 			glog.Errorf("killPodWithSyncResult failed: %v", killResult.Error())
 			return
 		}
+
+		if podContainerChanges.CreateSandbox {
+			m.purgeInitContainers(pod, podStatus)
+		}
 	} else {
 		// Step 3: kill any running containers in this pod which are not to keep.
 		for containerID, containerInfo := range podContainerChanges.ContainersToKill {
@@ -587,7 +571,9 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 	}
 
 	// Keep terminated init containers fairly aggressively controlled
-	m.pruneInitContainersBeforeStart(pod, podStatus, podContainerChanges.InitContainersToKeep)
+	// This is an optmization because container removals are typically handled
+	// by container garbage collector.
+	m.pruneInitContainersBeforeStart(pod, podStatus)
 
 	// We pass the value of the podIP down to generatePodSandboxConfig and
 	// generateContainerConfig, which in turn passes it to various other
@@ -605,7 +591,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 
 	// Step 4: Create a sandbox for the pod if necessary.
 	podSandboxID := podContainerChanges.SandboxID
-	if podContainerChanges.CreateSandbox && len(podContainerChanges.ContainersToStart) > 0 {
+	if podContainerChanges.CreateSandbox {
 		var msg string
 		var err error
 
@@ -647,30 +633,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 		return
 	}
 
-	// Step 5: start init containers.
-	status, next, done := findNextInitContainerToRun(pod, podStatus)
-	if status != nil && status.ExitCode != 0 {
-		// container initialization has failed, flag the pod as failed
-		initContainerResult := kubecontainer.NewSyncResult(kubecontainer.InitContainer, status.Name)
-		initContainerResult.Fail(kubecontainer.ErrRunInitContainer, fmt.Sprintf("init container %q exited with %d", status.Name, status.ExitCode))
-		result.AddSyncResult(initContainerResult)
-		if pod.Spec.RestartPolicy == v1.RestartPolicyNever {
-			utilruntime.HandleError(fmt.Errorf("error running pod %q init container %q, restart=Never: %#v", format.Pod(pod), status.Name, status))
-			return
-		}
-		utilruntime.HandleError(fmt.Errorf("Error running pod %q init container %q, restarting: %#v", format.Pod(pod), status.Name, status))
-	}
-	if next != nil {
-		if len(podContainerChanges.ContainersToStart) == 0 {
-			glog.V(4).Infof("No containers to start, stopping at init container %+v in pod %v", next.Name, format.Pod(pod))
-			return
-		}
-
-		// If we need to start the next container, do so now then exit
-		container := next
+	// Step 5: start the init container.
+	if container := podContainerChanges.NextInitContainerToStart; container != nil {
+		// Start the next init container.
 		startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
 		result.AddSyncResult(startContainerResult)
-
 		isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
 		if isInBackOff {
 			startContainerResult.Fail(err, msg)
@@ -687,20 +654,10 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 
 		// Successfully started the container; clear the entry in the failure
 		glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
-		return
-	}
-	if !done {
-		// init container still running
-		glog.V(4).Infof("An init container is still running in pod %v", format.Pod(pod))
-		return
-	}
-	if podContainerChanges.InitFailed {
-		glog.V(4).Infof("Not all init containers have succeeded for pod %v", format.Pod(pod))
-		return
 	}
 
 	// Step 6: start containers in podContainerChanges.ContainersToStart.
-	for idx := range podContainerChanges.ContainersToStart {
+	for _, idx := range podContainerChanges.ContainersToStart {
 		container := &pod.Spec.Containers[idx]
 		startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
 		result.AddSyncResult(startContainerResult)
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
index 8ad095858f22..9a13c6b9e8fa 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
@@ -24,6 +24,7 @@ import (
 
 	cadvisorapi "github.com/google/cadvisor/info/v1"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
@@ -226,6 +227,40 @@ func verifyFakeContainerList(fakeRuntime *apitest.FakeRuntimeService, expected [
 	return actual, reflect.DeepEqual(expected, actual)
 }
 
+type containerRecord struct {
+	container *v1.Container
+	attempt   uint32
+	state     runtimeapi.ContainerState
+}
+
+// Only extract the fields of interests.
+type cRecord struct {
+	name    string
+	attempt uint32
+	state   runtimeapi.ContainerState
+}
+
+type cRecordList []*cRecord
+
+func (b cRecordList) Len() int      { return len(b) }
+func (b cRecordList) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
+func (b cRecordList) Less(i, j int) bool {
+	if b[i].name != b[j].name {
+		return b[i].name < b[j].name
+	}
+	return b[i].attempt < b[j].attempt
+}
+
+func verifyContainerStatuses(t *testing.T, runtime *apitest.FakeRuntimeService, expected []*cRecord, desc string) {
+	actual := []*cRecord{}
+	for _, cStatus := range runtime.Containers {
+		actual = append(actual, &cRecord{name: cStatus.Metadata.Name, attempt: cStatus.Metadata.Attempt, state: cStatus.State})
+	}
+	sort.Sort(cRecordList(expected))
+	sort.Sort(cRecordList(actual))
+	assert.Equal(t, expected, actual, desc)
+}
+
 func TestNewKubeRuntimeManager(t *testing.T) {
 	_, _, _, err := createTestRuntimeManager()
 	assert.NoError(t, err)
@@ -582,8 +617,7 @@ func TestPruneInitContainers(t *testing.T) {
 	podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
 	assert.NoError(t, err)
 
-	keep := map[kubecontainer.ContainerID]int{}
-	m.pruneInitContainersBeforeStart(pod, podStatus, keep)
+	m.pruneInitContainersBeforeStart(pod, podStatus)
 	expectedContainers := []string{fakes[0].Id, fakes[2].Id}
 	if actual, ok := verifyFakeContainerList(fakeRuntime, expectedContainers); !ok {
 		t.Errorf("expected %q, got %q", expectedContainers, actual)
@@ -625,18 +659,6 @@ func TestSyncPodWithInitContainers(t *testing.T) {
 		},
 	}
 
-	// buildContainerID is an internal helper function to build container id from api pod
-	// and container with default attempt number 0.
-	buildContainerID := func(pod *v1.Pod, container v1.Container) string {
-		uid := string(pod.UID)
-		sandboxID := apitest.BuildSandboxName(&runtimeapi.PodSandboxMetadata{
-			Name:      pod.Name,
-			Uid:       uid,
-			Namespace: pod.Namespace,
-		})
-		return apitest.BuildContainerName(&runtimeapi.ContainerMetadata{Name: container.Name}, sandboxID)
-	}
-
 	backOff := flowcontrol.NewBackOff(time.Second, time.Minute)
 
 	// 1. should only create the init container.
@@ -644,34 +666,406 @@ func TestSyncPodWithInitContainers(t *testing.T) {
 	assert.NoError(t, err)
 	result := m.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff)
 	assert.NoError(t, result.Error())
-	assert.Equal(t, 1, len(fakeRuntime.Containers))
-	initContainerID := buildContainerID(pod, initContainers[0])
-	expectedContainers := []string{initContainerID}
-	if actual, ok := verifyFakeContainerList(fakeRuntime, expectedContainers); !ok {
-		t.Errorf("expected %q, got %q", expectedContainers, actual)
+	expected := []*cRecord{
+		{name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING},
 	}
+	verifyContainerStatuses(t, fakeRuntime, expected, "start only the init container")
 
 	// 2. should not create app container because init container is still running.
 	podStatus, err = m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
 	assert.NoError(t, err)
 	result = m.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff)
 	assert.NoError(t, result.Error())
-	assert.Equal(t, 1, len(fakeRuntime.Containers))
-	expectedContainers = []string{initContainerID}
-	if actual, ok := verifyFakeContainerList(fakeRuntime, expectedContainers); !ok {
-		t.Errorf("expected %q, got %q", expectedContainers, actual)
-	}
+	verifyContainerStatuses(t, fakeRuntime, expected, "init container still running; do nothing")
 
 	// 3. should create all app containers because init container finished.
-	fakeRuntime.StopContainer(initContainerID, 0)
+	// Stop init container instance 0.
+	sandboxIDs, err := m.getSandboxIDByPodUID(pod.UID, nil)
+	require.NoError(t, err)
+	sandboxID := sandboxIDs[0]
+	initID0, err := fakeRuntime.GetContainerID(sandboxID, initContainers[0].Name, 0)
+	require.NoError(t, err)
+	fakeRuntime.StopContainer(initID0, 0)
+	// Sync again.
 	podStatus, err = m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
 	assert.NoError(t, err)
 	result = m.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff)
 	assert.NoError(t, result.Error())
-	assert.Equal(t, 3, len(fakeRuntime.Containers))
-	expectedContainers = []string{initContainerID, buildContainerID(pod, containers[0]),
-		buildContainerID(pod, containers[1])}
-	if actual, ok := verifyFakeContainerList(fakeRuntime, expectedContainers); !ok {
-		t.Errorf("expected %q, got %q", expectedContainers, actual)
+	expected = []*cRecord{
+		{name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
+		{name: containers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING},
+		{name: containers[1].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING},
+	}
+	verifyContainerStatuses(t, fakeRuntime, expected, "init container completed; all app containers should be running")
+
+	// 4. should restart the init container if needed to create a new podsandbox
+	// Stop the pod sandbox.
+	fakeRuntime.StopPodSandbox(sandboxID)
+	// Sync again.
+	podStatus, err = m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
+	assert.NoError(t, err)
+	result = m.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff)
+	assert.NoError(t, result.Error())
+	expected = []*cRecord{
+		// The first init container instance is purged and no longer visible.
+		// The second (attempt == 1) instance has been started and is running.
+		{name: initContainers[0].Name, attempt: 1, state: runtimeapi.ContainerState_CONTAINER_RUNNING},
+		// All containers are killed.
+		{name: containers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
+		{name: containers[1].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
+	}
+	verifyContainerStatuses(t, fakeRuntime, expected, "kill all app containers, purge the existing init container, and restart a new one")
+}
+
+// A helper function to get a basic pod and its status assuming all sandbox and
+// containers are running and ready.
+func makeBasePodAndStatus() (*v1.Pod, *kubecontainer.PodStatus) {
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			UID:       "12345678",
+			Name:      "foo",
+			Namespace: "foo-ns",
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Name:  "foo1",
+					Image: "busybox",
+				},
+				{
+					Name:  "foo2",
+					Image: "busybox",
+				},
+				{
+					Name:  "foo3",
+					Image: "busybox",
+				},
+			},
+		},
+	}
+	status := &kubecontainer.PodStatus{
+		ID:        pod.UID,
+		Name:      pod.Name,
+		Namespace: pod.Namespace,
+		SandboxStatuses: []*runtimeapi.PodSandboxStatus{
+			{
+				Id:       "sandboxID",
+				State:    runtimeapi.PodSandboxState_SANDBOX_READY,
+				Metadata: &runtimeapi.PodSandboxMetadata{Name: pod.Name, Namespace: pod.Namespace, Uid: "sandboxuid", Attempt: uint32(0)},
+			},
+		},
+		ContainerStatuses: []*kubecontainer.ContainerStatus{
+			{
+				ID:   kubecontainer.ContainerID{ID: "id1"},
+				Name: "foo1", State: kubecontainer.ContainerStateRunning,
+				Hash: kubecontainer.HashContainer(&pod.Spec.Containers[0]),
+			},
+			{
+				ID:   kubecontainer.ContainerID{ID: "id2"},
+				Name: "foo2", State: kubecontainer.ContainerStateRunning,
+				Hash: kubecontainer.HashContainer(&pod.Spec.Containers[1]),
+			},
+			{
+				ID:   kubecontainer.ContainerID{ID: "id3"},
+				Name: "foo3", State: kubecontainer.ContainerStateRunning,
+				Hash: kubecontainer.HashContainer(&pod.Spec.Containers[2]),
+			},
+		},
+	}
+	return pod, status
+}
+
+func TestComputePodActions(t *testing.T) {
+	_, _, m, err := createTestRuntimeManager()
+	require.NoError(t, err)
+
+	// Createing a pair reference pod and status for the test cases to refer
+	// the specific fields.
+	basePod, baseStatus := makeBasePodAndStatus()
+	noAction := podActions{
+		SandboxID:         baseStatus.SandboxStatuses[0].Id,
+		ContainersToStart: []int{},
+		ContainersToKill:  map[kubecontainer.ContainerID]containerToKillInfo{},
+	}
+
+	for desc, test := range map[string]struct {
+		mutatePodFn    func(*v1.Pod)
+		mutateStatusFn func(*kubecontainer.PodStatus)
+		actions        podActions
+	}{
+		"everying is good; do nothing": {
+			actions: noAction,
+		},
+		"start pod sandbox and all containers for a new pod": {
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				// No container or sandbox exists.
+				status.SandboxStatuses = []*runtimeapi.PodSandboxStatus{}
+				status.ContainerStatuses = []*kubecontainer.ContainerStatus{}
+			},
+			actions: podActions{
+				KillPod:           true,
+				CreateSandbox:     true,
+				Attempt:           uint32(0),
+				ContainersToStart: []int{0, 1, 2},
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+		"restart exited containers if RestartPolicy == Always": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				// The first container completed, restart it,
+				status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited
+				status.ContainerStatuses[0].ExitCode = 0
+
+				// The second container exited with failure, restart it,
+				status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited
+				status.ContainerStatuses[1].ExitCode = 111
+			},
+			actions: podActions{
+				SandboxID:         baseStatus.SandboxStatuses[0].Id,
+				ContainersToStart: []int{0, 1},
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+		"restart failed containers if RestartPolicy == OnFailure": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				// The first container completed, don't restart it,
+				status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited
+				status.ContainerStatuses[0].ExitCode = 0
+
+				// The second container exited with failure, restart it,
+				status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited
+				status.ContainerStatuses[1].ExitCode = 111
+			},
+			actions: podActions{
+				SandboxID:         baseStatus.SandboxStatuses[0].Id,
+				ContainersToStart: []int{1},
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+		"don't restart containers if RestartPolicy == Never": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				// Don't restart any containers.
+				status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited
+				status.ContainerStatuses[0].ExitCode = 0
+				status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited
+				status.ContainerStatuses[1].ExitCode = 111
+			},
+			actions: noAction,
+		},
+		"Kill pod and recreate everything if the pod sandbox is dead, and RestartPolicy == Always": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
+			},
+			actions: podActions{
+				KillPod:           true,
+				CreateSandbox:     true,
+				SandboxID:         baseStatus.SandboxStatuses[0].Id,
+				Attempt:           uint32(1),
+				ContainersToStart: []int{0, 1, 2},
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+		"Kill pod and recreate all containers (except for the succeeded one) if the pod sandbox is dead, and RestartPolicy == OnFailure": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
+				status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited
+				status.ContainerStatuses[1].ExitCode = 0
+			},
+			actions: podActions{
+				KillPod:           true,
+				CreateSandbox:     true,
+				SandboxID:         baseStatus.SandboxStatuses[0].Id,
+				Attempt:           uint32(1),
+				ContainersToStart: []int{0, 2},
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+
+		"Kill and recreate the container if the container's spec changed": {
+			mutatePodFn: func(pod *v1.Pod) {
+				pod.Spec.RestartPolicy = v1.RestartPolicyAlways
+			},
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.ContainerStatuses[1].Hash = uint64(432423432)
+			},
+			actions: podActions{
+				SandboxID:         baseStatus.SandboxStatuses[0].Id,
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{1}),
+				ContainersToStart: []int{1},
+			},
+			// TODO: Add a test case for containers which failed the liveness
+			// check. Will need to fake the livessness check result.
+		},
+	} {
+		pod, status := makeBasePodAndStatus()
+		if test.mutatePodFn != nil {
+			test.mutatePodFn(pod)
+		}
+		if test.mutateStatusFn != nil {
+			test.mutateStatusFn(status)
+		}
+		actions := m.computePodActions(pod, status)
+		verifyActions(t, &test.actions, &actions, desc)
+	}
+}
+
+func getKillMap(pod *v1.Pod, status *kubecontainer.PodStatus, cIndexes []int) map[kubecontainer.ContainerID]containerToKillInfo {
+	m := map[kubecontainer.ContainerID]containerToKillInfo{}
+	for _, i := range cIndexes {
+		m[status.ContainerStatuses[i].ID] = containerToKillInfo{
+			container: &pod.Spec.Containers[i],
+			name:      pod.Spec.Containers[i].Name,
+		}
+	}
+	return m
+}
+
+func verifyActions(t *testing.T, expected, actual *podActions, desc string) {
+	if actual.ContainersToKill != nil {
+		// Clear the message field since we don't need to verify the message.
+		for k, info := range actual.ContainersToKill {
+			info.message = ""
+			actual.ContainersToKill[k] = info
+		}
+	}
+	assert.Equal(t, expected, actual, desc)
+}
+
+func TestComputePodActionsWithInitContainers(t *testing.T) {
+	_, _, m, err := createTestRuntimeManager()
+	require.NoError(t, err)
+
+	// Createing a pair reference pod and status for the test cases to refer
+	// the specific fields.
+	basePod, baseStatus := makeBasePodAndStatusWithInitContainers()
+	noAction := podActions{
+		SandboxID:         baseStatus.SandboxStatuses[0].Id,
+		ContainersToStart: []int{},
+		ContainersToKill:  map[kubecontainer.ContainerID]containerToKillInfo{},
+	}
+
+	for desc, test := range map[string]struct {
+		mutatePodFn    func(*v1.Pod)
+		mutateStatusFn func(*kubecontainer.PodStatus)
+		actions        podActions
+	}{
+		"initialization completed; start all containers": {
+			actions: podActions{
+				SandboxID:         baseStatus.SandboxStatuses[0].Id,
+				ContainersToStart: []int{0, 1, 2},
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+		"initialization in progress; do nothing": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.ContainerStatuses[2].State = kubecontainer.ContainerStateRunning
+			},
+			actions: noAction,
+		},
+		"Kill pod and restart the first init container if the pod sandbox is dead": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
+			},
+			actions: podActions{
+				KillPod:                  true,
+				CreateSandbox:            true,
+				SandboxID:                baseStatus.SandboxStatuses[0].Id,
+				Attempt:                  uint32(1),
+				NextInitContainerToStart: &basePod.Spec.InitContainers[0],
+				ContainersToStart:        []int{},
+				ContainersToKill:         getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+		"initialization failed; restart the last init container if RestartPolicy == Always": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.ContainerStatuses[2].ExitCode = 137
+			},
+			actions: podActions{
+				SandboxID:                baseStatus.SandboxStatuses[0].Id,
+				NextInitContainerToStart: &basePod.Spec.InitContainers[2],
+				ContainersToStart:        []int{},
+				ContainersToKill:         getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+		"initialization failed; restart the last init container if RestartPolicy == OnFailure": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.ContainerStatuses[2].ExitCode = 137
+			},
+			actions: podActions{
+				SandboxID:                baseStatus.SandboxStatuses[0].Id,
+				NextInitContainerToStart: &basePod.Spec.InitContainers[2],
+				ContainersToStart:        []int{},
+				ContainersToKill:         getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+		"initialization failed; kill pod if RestartPolicy == Never": {
+			mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever },
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.ContainerStatuses[2].ExitCode = 137
+			},
+			actions: podActions{
+				KillPod:           true,
+				SandboxID:         baseStatus.SandboxStatuses[0].Id,
+				ContainersToStart: []int{},
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
+	} {
+		pod, status := makeBasePodAndStatusWithInitContainers()
+		if test.mutatePodFn != nil {
+			test.mutatePodFn(pod)
+		}
+		if test.mutateStatusFn != nil {
+			test.mutateStatusFn(status)
+		}
+		actions := m.computePodActions(pod, status)
+		verifyActions(t, &test.actions, &actions, desc)
+	}
+}
+
+func makeBasePodAndStatusWithInitContainers() (*v1.Pod, *kubecontainer.PodStatus) {
+	pod, status := makeBasePodAndStatus()
+	pod.Spec.InitContainers = []v1.Container{
+		{
+			Name:  "init1",
+			Image: "bar-image",
+		},
+		{
+			Name:  "init2",
+			Image: "bar-image",
+		},
+		{
+			Name:  "init3",
+			Image: "bar-image",
+		},
+	}
+	// Replace the original statuses of the containers with those for the init
+	// containers.
+	status.ContainerStatuses = []*kubecontainer.ContainerStatus{
+		{
+			ID:   kubecontainer.ContainerID{ID: "initid1"},
+			Name: "init1", State: kubecontainer.ContainerStateExited,
+			Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]),
+		},
+		{
+			ID:   kubecontainer.ContainerID{ID: "initid2"},
+			Name: "init2", State: kubecontainer.ContainerStateExited,
+			Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]),
+		},
+		{
+			ID:   kubecontainer.ContainerID{ID: "initid3"},
+			Name: "init3", State: kubecontainer.ContainerStateExited,
+			Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]),
+		},
 	}
+	return pod, status
 }

From 55a74c867f65797febb89eb1211d53b5816f2d49 Mon Sep 17 00:00:00 2001
From: Joel Smith <joesmith@redhat.com>
Date: Fri, 13 Oct 2017 09:56:05 -0600
Subject: [PATCH 2/6] UPSTREAM: 48584: Move event type

---
 vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go            | 1 +
 .../kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
index 0a0efc20cfdc..3eb2dab25f4d 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
@@ -67,6 +67,7 @@ const (
 	FailedNodeAllocatableEnforcement     = "FailedNodeAllocatableEnforcement"
 	SuccessfulNodeAllocatableEnforcement = "NodeAllocatableEnforced"
 	UnsupportedMountOption               = "UnsupportedMountOption"
+	SandboxChanged                       = "SandboxChanged"
 
 	// Image manager event reason list
 	InvalidDiskCapacity = "InvalidDiskCapacity"
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
index 7701cd42c2a9..a7db18e44184 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
@@ -532,7 +532,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 			glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
 		}
 		if podContainerChanges.SandboxID != "" {
-			m.recorder.Eventf(ref, v1.EventTypeNormal, "SandboxChanged", "Pod sandbox changed, it will be killed and re-created.")
+			m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
 		} else {
 			glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
 		}

From a6bf11f9a099541ad1ed467a46df1da7cfd99976 Mon Sep 17 00:00:00 2001
From: Joel Smith <joesmith@redhat.com>
Date: Fri, 13 Oct 2017 09:57:09 -0600
Subject: [PATCH 3/6] UPSTREAM: 48589: When faild create pod sandbox record
 event.

---
 vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go         | 1 +
 .../pkg/kubelet/kuberuntime/kuberuntime_manager.go           | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
index 3eb2dab25f4d..3c0ee08b3e3a 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
@@ -68,6 +68,7 @@ const (
 	SuccessfulNodeAllocatableEnforcement = "NodeAllocatableEnforced"
 	UnsupportedMountOption               = "UnsupportedMountOption"
 	SandboxChanged                       = "SandboxChanged"
+	FailedCreatePodSandBox               = "FailedCreatePodSandBox"
 
 	// Image manager event reason list
 	InvalidDiskCapacity = "InvalidDiskCapacity"
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
index a7db18e44184..29141d2db77a 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
@@ -602,6 +602,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 		if err != nil {
 			createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
 			glog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
+			ref, err := ref.GetReference(api.Scheme, pod)
+			if err != nil {
+				glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
+			}
+			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox.")
 			return
 		}
 		glog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))

From a1e92bc02ea47fb375fb48dd19228dd9217770c9 Mon Sep 17 00:00:00 2001
From: Joel Smith <joesmith@redhat.com>
Date: Fri, 13 Oct 2017 09:58:38 -0600
Subject: [PATCH 4/6] UPSTREAM: 48970: Recreate pod sandbox when the sandbox
 does not have an IP address.

---
 .../kubelet/kuberuntime/kuberuntime_manager.go    |  6 ++++++
 .../kuberuntime/kuberuntime_manager_test.go       | 15 ++++++++++++++-
 .../k8s.io/kubernetes/pkg/kubelet/pleg/generic.go |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
index 29141d2db77a..93511729b2ac 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
@@ -385,6 +385,12 @@ func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *v1.Pod, podStatus *ku
 		return true, sandboxStatus.Metadata.Attempt + 1, ""
 	}
 
+	// Needs to create a new sandbox when the sandbox does not have an IP address.
+	if !kubecontainer.IsHostNetworkPod(pod) && sandboxStatus.Network.Ip == "" {
+		glog.V(2).Infof("Sandbox for pod %q has no IP address.  Need to start a new one", format.Pod(pod))
+		return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id
+	}
+
 	return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id
 }
 
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
index 9a13c6b9e8fa..f7655ea10a3b 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
@@ -752,6 +752,7 @@ func makeBasePodAndStatus() (*v1.Pod, *kubecontainer.PodStatus) {
 				Id:       "sandboxID",
 				State:    runtimeapi.PodSandboxState_SANDBOX_READY,
 				Metadata: &runtimeapi.PodSandboxMetadata{Name: pod.Name, Namespace: pod.Namespace, Uid: "sandboxuid", Attempt: uint32(0)},
+				Network:  &runtimeapi.PodSandboxNetworkStatus{Ip: "10.0.0.1"},
 			},
 		},
 		ContainerStatuses: []*kubecontainer.ContainerStatus{
@@ -885,7 +886,19 @@ func TestComputePodActions(t *testing.T) {
 				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
 			},
 		},
-
+		"Kill pod and recreate all containers if the PodSandbox does not have an IP": {
+			mutateStatusFn: func(status *kubecontainer.PodStatus) {
+				status.SandboxStatuses[0].Network.Ip = ""
+			},
+			actions: podActions{
+				KillPod:           true,
+				CreateSandbox:     true,
+				SandboxID:         baseStatus.SandboxStatuses[0].Id,
+				Attempt:           uint32(1),
+				ContainersToStart: []int{0, 1, 2},
+				ContainersToKill:  getKillMap(basePod, baseStatus, []int{}),
+			},
+		},
 		"Kill and recreate the container if the container's spec changed": {
 			mutatePodFn: func(pod *v1.Pod) {
 				pod.Spec.RestartPolicy = v1.RestartPolicyAlways
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/pleg/generic.go b/vendor/k8s.io/kubernetes/pkg/kubelet/pleg/generic.go
index 6c6c980c3d8b..2d8a9a0c13d8 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/pleg/generic.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/pleg/generic.go
@@ -178,7 +178,7 @@ func (g *GenericPLEG) updateRelisTime(timestamp time.Time) {
 }
 
 // relist queries the container runtime for list of pods/containers, compare
-// with the internal pods/containers, and generats events accordingly.
+// with the internal pods/containers, and generates events accordingly.
 func (g *GenericPLEG) relist() {
 	glog.V(5).Infof("GenericPLEG: Relisting")
 

From b6a4fc25b842ae420c2937c1a307c03bd4ae5563 Mon Sep 17 00:00:00 2001
From: Joel Smith <joesmith@redhat.com>
Date: Fri, 13 Oct 2017 10:06:22 -0600
Subject: [PATCH 5/6] UPSTREAM: 50350: Wait for container cleanup before
 deletion

---
 .../k8s.io/kubernetes/pkg/kubelet/kubelet.go  |  4 +--
 .../kubernetes/pkg/kubelet/kubelet_pods.go    | 27 +++++++++++++++++
 .../kuberuntime/fake_kuberuntime_manager.go   | 19 ++++++------
 .../kuberuntime/kuberuntime_container.go      | 15 ++--------
 .../pkg/kubelet/kuberuntime/kuberuntime_gc.go | 29 +++++++------------
 .../kuberuntime/kuberuntime_gc_test.go        | 14 ++++-----
 .../kuberuntime/kuberuntime_manager.go        | 10 +++----
 .../kubelet/rkt/fake_rkt_interface_test.go    | 17 ++++++-----
 .../k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go  | 15 +++++-----
 .../kubernetes/pkg/kubelet/rkt/rkt_test.go    |  8 ++---
 10 files changed, 83 insertions(+), 75 deletions(-)

diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go
index 0dc725cf2026..7cfd4c23f6af 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go
@@ -617,7 +617,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
 			klet.livenessManager,
 			containerRefManager,
 			machineInfo,
-			klet.podManager,
+			klet,
 			kubeDeps.OSInterface,
 			klet,
 			httpClient,
@@ -648,7 +648,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
 			klet,
 			kubeDeps.Recorder,
 			containerRefManager,
-			klet.podManager,
+			klet,
 			klet.livenessManager,
 			httpClient,
 			klet.networkPlugin,
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go
index e0fef3c95b5c..e5d499fee1cb 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go
@@ -52,6 +52,7 @@ import (
 	"k8s.io/kubernetes/pkg/kubelet/cm"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 	"k8s.io/kubernetes/pkg/kubelet/envvars"
+	"k8s.io/kubernetes/pkg/kubelet/eviction"
 	"k8s.io/kubernetes/pkg/kubelet/images"
 	"k8s.io/kubernetes/pkg/kubelet/server/portforward"
 	remotecommandserver "k8s.io/kubernetes/pkg/kubelet/server/remotecommand"
@@ -743,6 +744,22 @@ func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
 	return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
 }
 
+// IsPodDeleted returns true if the pod is deleted.  For the pod to be deleted, either:
+// 1. The pod object is deleted
+// 2. The pod's status is evicted
+// 3. The pod's deletion timestamp is set, and containers are not running
+func (kl *Kubelet) IsPodDeleted(uid types.UID) bool {
+	pod, podFound := kl.podManager.GetPodByUID(uid)
+	if !podFound {
+		return true
+	}
+	status, statusFound := kl.statusManager.GetPodStatus(pod.UID)
+	if !statusFound {
+		status = pod.Status
+	}
+	return eviction.PodIsEvicted(status) || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
+}
+
 // PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have
 // been reclaimed by the kubelet.  Reclaiming resources is a prerequisite to deleting a pod from the API server.
 func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bool {
@@ -751,6 +768,16 @@ func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bo
 		glog.V(3).Infof("Pod %q is terminated, but some containers are still running", format.Pod(pod))
 		return false
 	}
+	// pod's containers should be deleted
+	runtimeStatus, err := kl.podCache.Get(pod.UID)
+	if err != nil {
+		glog.V(3).Infof("Pod %q is terminated, Error getting runtimeStatus from the podCache: %s", format.Pod(pod), err)
+		return false
+	}
+	if len(runtimeStatus.ContainerStatuses) > 0 {
+		glog.V(3).Infof("Pod %q is terminated, but some containers have not been cleaned up: %+v", format.Pod(pod), runtimeStatus.ContainerStatuses)
+		return false
+	}
 	if kl.podVolumesExist(pod.UID) && !kl.kubeletConfiguration.KeepTerminatedPodVolumes {
 		// We shouldnt delete pods whose volumes have not been cleaned up if we are not keeping terminated pod volumes
 		glog.V(3).Infof("Pod %q is terminated, but some volumes have not been cleaned up", format.Pod(pod))
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go
index dca959a5e94e..ed9301afe00d 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go
@@ -24,7 +24,6 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/client-go/util/flowcontrol"
-	"k8s.io/kubernetes/pkg/api/v1"
 	"k8s.io/kubernetes/pkg/credentialprovider"
 	internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@@ -43,17 +42,19 @@ func (f *fakeHTTP) Get(url string) (*http.Response, error) {
 	return nil, f.err
 }
 
-type fakePodGetter struct {
-	pods map[types.UID]*v1.Pod
+type fakePodDeletionProvider struct {
+	pods map[types.UID]struct{}
 }
 
-func newFakePodGetter() *fakePodGetter {
-	return &fakePodGetter{make(map[types.UID]*v1.Pod)}
+func newFakePodDeletionProvider() *fakePodDeletionProvider {
+	return &fakePodDeletionProvider{
+		pods: make(map[types.UID]struct{}),
+	}
 }
 
-func (f *fakePodGetter) GetPodByUID(uid types.UID) (*v1.Pod, bool) {
-	pod, found := f.pods[uid]
-	return pod, found
+func (f *fakePodDeletionProvider) IsPodDeleted(uid types.UID) bool {
+	_, found := f.pods[uid]
+	return !found
 }
 
 func NewFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) {
@@ -76,7 +77,7 @@ func NewFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS
 		return nil, err
 	}
 
-	kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodGetter(), kubeRuntimeManager)
+	kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodDeletionProvider(), kubeRuntimeManager)
 	kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
 	kubeRuntimeManager.imagePuller = images.NewImageManager(
 		kubecontainer.FilterEventRecorder(recorder),
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
index 94e441d12574..31f0b8ed9ffe 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
@@ -337,13 +337,12 @@ func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerO
 func (m *kubeGenericRuntimeManager) getKubeletContainers(allContainers bool) ([]*runtimeapi.Container, error) {
 	filter := &runtimeapi.ContainerFilter{}
 	if !allContainers {
-		runningState := runtimeapi.ContainerState_CONTAINER_RUNNING
 		filter.State = &runtimeapi.ContainerStateValue{
-			State: runningState,
+			State: runtimeapi.ContainerState_CONTAINER_RUNNING,
 		}
 	}
 
-	containers, err := m.getContainersHelper(filter)
+	containers, err := m.runtimeService.ListContainers(filter)
 	if err != nil {
 		glog.Errorf("getKubeletContainers failed: %v", err)
 		return nil, err
@@ -352,16 +351,6 @@ func (m *kubeGenericRuntimeManager) getKubeletContainers(allContainers bool) ([]
 	return containers, nil
 }
 
-// getContainers lists containers by filter.
-func (m *kubeGenericRuntimeManager) getContainersHelper(filter *runtimeapi.ContainerFilter) ([]*runtimeapi.Container, error) {
-	resp, err := m.runtimeService.ListContainers(filter)
-	if err != nil {
-		return nil, err
-	}
-
-	return resp, err
-}
-
 // makeUID returns a randomly generated string.
 func makeUID() string {
 	return fmt.Sprintf("%08x", rand.Uint32())
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go
index fe8f1465945d..063a89d6b310 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go
@@ -32,17 +32,17 @@ import (
 
 // containerGC is the manager of garbage collection.
 type containerGC struct {
-	client    internalapi.RuntimeService
-	manager   *kubeGenericRuntimeManager
-	podGetter podGetter
+	client              internalapi.RuntimeService
+	manager             *kubeGenericRuntimeManager
+	podDeletionProvider podDeletionProvider
 }
 
 // NewContainerGC creates a new containerGC.
-func NewContainerGC(client internalapi.RuntimeService, podGetter podGetter, manager *kubeGenericRuntimeManager) *containerGC {
+func NewContainerGC(client internalapi.RuntimeService, podDeletionProvider podDeletionProvider, manager *kubeGenericRuntimeManager) *containerGC {
 	return &containerGC{
-		client:    client,
-		manager:   manager,
-		podGetter: podGetter,
+		client:              client,
+		manager:             manager,
+		podDeletionProvider: podDeletionProvider,
 	}
 }
 
@@ -52,8 +52,6 @@ type containerGCInfo struct {
 	id string
 	// The name of the container.
 	name string
-	// The sandbox ID which this container belongs to
-	sandboxID string
 	// Creation time for the container.
 	createTime time.Time
 }
@@ -159,12 +157,6 @@ func (cgc *containerGC) removeSandbox(sandboxID string) {
 	}
 }
 
-// isPodDeleted returns true if the pod is already deleted.
-func (cgc *containerGC) isPodDeleted(podUID types.UID) bool {
-	_, found := cgc.podGetter.GetPodByUID(podUID)
-	return !found
-}
-
 // evictableContainers gets all containers that are evictable. Evictable containers are: not running
 // and created more than MinAge ago.
 func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) {
@@ -191,7 +183,6 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
 			id:         container.Id,
 			name:       container.Metadata.Name,
 			createTime: createdAt,
-			sandboxID:  container.PodSandboxId,
 		}
 		key := evictUnit{
 			uid:  labeledInfo.PodUID,
@@ -219,7 +210,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy
 	// Remove deleted pod containers if all sources are ready.
 	if allSourcesReady {
 		for key, unit := range evictUnits {
-			if cgc.isPodDeleted(key.uid) || evictNonDeletedPods {
+			if cgc.podDeletionProvider.IsPodDeleted(key.uid) || evictNonDeletedPods {
 				cgc.removeOldestN(unit, len(unit)) // Remove all.
 				delete(evictUnits, key)
 			}
@@ -307,7 +298,7 @@ func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
 	}
 
 	for podUID, sandboxes := range sandboxesByPod {
-		if cgc.isPodDeleted(podUID) || evictNonDeletedPods {
+		if cgc.podDeletionProvider.IsPodDeleted(podUID) || evictNonDeletedPods {
 			// Remove all evictable sandboxes if the pod has been removed.
 			// Note that the latest dead sandbox is also removed if there is
 			// already an active one.
@@ -333,7 +324,7 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
 		for _, dir := range dirs {
 			name := dir.Name()
 			podUID := types.UID(name)
-			if !cgc.isPodDeleted(podUID) {
+			if !cgc.podDeletionProvider.IsPodDeleted(podUID) {
 				continue
 			}
 			err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go
index b32cd9428464..389b1e52cce0 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go
@@ -34,11 +34,11 @@ func TestSandboxGC(t *testing.T) {
 	fakeRuntime, _, m, err := createTestRuntimeManager()
 	assert.NoError(t, err)
 
-	fakePodGetter := m.containerGC.podGetter.(*fakePodGetter)
+	podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider)
 	makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodGetter bool, createdAt int64) sandboxTemplate {
 		if withPodGetter {
 			// initialize the pod getter
-			fakePodGetter.pods[pod.UID] = pod
+			podDeletionProvider.pods[pod.UID] = struct{}{}
 		}
 		return sandboxTemplate{
 			pod:       pod,
@@ -162,13 +162,13 @@ func TestContainerGC(t *testing.T) {
 	fakeRuntime, _, m, err := createTestRuntimeManager()
 	assert.NoError(t, err)
 
-	fakePodGetter := m.containerGC.podGetter.(*fakePodGetter)
+	podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider)
 	makeGCContainer := func(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
 		container := makeTestContainer(containerName, "test-image")
 		pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
 		if podName != "deleted" {
 			// initialize the pod getter, explicitly exclude deleted pod
-			fakePodGetter.pods[pod.UID] = pod
+			podDeletionProvider.pods[pod.UID] = struct{}{}
 		}
 		return containerTemplate{
 			pod:       pod,
@@ -361,11 +361,11 @@ func TestPodLogDirectoryGC(t *testing.T) {
 	_, _, m, err := createTestRuntimeManager()
 	assert.NoError(t, err)
 	fakeOS := m.osInterface.(*containertest.FakeOS)
-	fakePodGetter := m.containerGC.podGetter.(*fakePodGetter)
+	podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider)
 
 	// pod log directories without corresponding pods should be removed.
-	fakePodGetter.pods["123"] = makeTestPod("foo1", "new", "123", nil)
-	fakePodGetter.pods["456"] = makeTestPod("foo2", "new", "456", nil)
+	podDeletionProvider.pods["123"] = struct{}{}
+	podDeletionProvider.pods["456"] = struct{}{}
 	files := []string{"123", "456", "789", "012"}
 	removed := []string{filepath.Join(podLogsRootDirectory, "789"), filepath.Join(podLogsRootDirectory, "012")}
 
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
index 93511729b2ac..d852c9a63100 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
@@ -64,9 +64,9 @@ var (
 	ErrVersionNotSupported = errors.New("Runtime api version is not supported")
 )
 
-// A subset of the pod.Manager interface extracted for garbage collection purposes.
-type podGetter interface {
-	GetPodByUID(kubetypes.UID) (*v1.Pod, bool)
+// podDeletionProvider can determine if a pod is deleted
+type podDeletionProvider interface {
+	IsPodDeleted(kubetypes.UID) bool
 }
 
 type kubeGenericRuntimeManager struct {
@@ -119,7 +119,7 @@ func NewKubeGenericRuntimeManager(
 	livenessManager proberesults.Manager,
 	containerRefManager *kubecontainer.RefManager,
 	machineInfo *cadvisorapi.MachineInfo,
-	podGetter podGetter,
+	podDeletionProvider podDeletionProvider,
 	osInterface kubecontainer.OSInterface,
 	runtimeHelper kubecontainer.RuntimeHelper,
 	httpClient types.HttpGetter,
@@ -182,7 +182,7 @@ func NewKubeGenericRuntimeManager(
 		imagePullQPS,
 		imagePullBurst)
 	kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager)
-	kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podGetter, kubeRuntimeManager)
+	kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podDeletionProvider, kubeRuntimeManager)
 
 	kubeRuntimeManager.versionCache = cache.NewObjectCache(
 		func() (interface{}, error) {
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/fake_rkt_interface_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/fake_rkt_interface_test.go
index 2684fad214a4..19b3665685df 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/fake_rkt_interface_test.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/fake_rkt_interface_test.go
@@ -28,7 +28,6 @@ import (
 	"google.golang.org/grpc"
 	"k8s.io/apimachinery/pkg/types"
 	kubetypes "k8s.io/apimachinery/pkg/types"
-	"k8s.io/kubernetes/pkg/api/v1"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 )
 
@@ -179,17 +178,19 @@ func (f *fakeRktCli) Reset() {
 	f.err = nil
 }
 
-type fakePodGetter struct {
-	pods map[types.UID]*v1.Pod
+type fakePodDeletionProvider struct {
+	pods map[types.UID]struct{}
 }
 
-func newFakePodGetter() *fakePodGetter {
-	return &fakePodGetter{pods: make(map[types.UID]*v1.Pod)}
+func newFakePodDeletionProvider() *fakePodDeletionProvider {
+	return &fakePodDeletionProvider{
+		pods: make(map[types.UID]struct{}),
+	}
 }
 
-func (f fakePodGetter) GetPodByUID(uid types.UID) (*v1.Pod, bool) {
-	p, found := f.pods[uid]
-	return p, found
+func (f *fakePodDeletionProvider) IsPodDeleted(uid types.UID) bool {
+	_, found := f.pods[uid]
+	return !found
 }
 
 type fakeUnitGetter struct {
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go
index d16c92db311a..a366950675a3 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go
@@ -158,7 +158,7 @@ type Runtime struct {
 	dockerKeyring credentialprovider.DockerKeyring
 
 	containerRefManager *kubecontainer.RefManager
-	podGetter           podGetter
+	podDeletionProvider podDeletionProvider
 	runtimeHelper       kubecontainer.RuntimeHelper
 	recorder            record.EventRecorder
 	livenessManager     proberesults.Manager
@@ -199,9 +199,9 @@ type podServiceDirective struct {
 var _ kubecontainer.Runtime = &Runtime{}
 var _ kubecontainer.DirectStreamingRuntime = &Runtime{}
 
-// TODO(yifan): This duplicates the podGetter in dockertools.
-type podGetter interface {
-	GetPodByUID(kubetypes.UID) (*v1.Pod, bool)
+// podDeletionProvider can determine if a pod is deleted
+type podDeletionProvider interface {
+	IsPodDeleted(kubetypes.UID) bool
 }
 
 // cliInterface wrapps the command line calls for testing purpose.
@@ -226,7 +226,7 @@ func New(
 	runtimeHelper kubecontainer.RuntimeHelper,
 	recorder record.EventRecorder,
 	containerRefManager *kubecontainer.RefManager,
-	podGetter podGetter,
+	podDeletionProvider podDeletionProvider,
 	livenessManager proberesults.Manager,
 	httpClient types.HttpGetter,
 	networkPlugin network.NetworkPlugin,
@@ -283,7 +283,7 @@ func New(
 		config:              config,
 		dockerKeyring:       credentialprovider.NewDockerKeyring(),
 		containerRefManager: containerRefManager,
-		podGetter:           podGetter,
+		podDeletionProvider: podDeletionProvider,
 		runtimeHelper:       runtimeHelper,
 		recorder:            recorder,
 		livenessManager:     livenessManager,
@@ -1988,8 +1988,7 @@ func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSo
 				removeCandidates = append(removeCandidates, pod)
 				continue
 			}
-			_, found := r.podGetter.GetPodByUID(uid)
-			if !found && allSourcesReady {
+			if r.podDeletionProvider.IsPodDeleted(uid) && allSourcesReady {
 				removeCandidates = append(removeCandidates, pod)
 				continue
 			}
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt_test.go
index 16c661733bd6..a675fbffcbb7 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt_test.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt_test.go
@@ -1633,7 +1633,7 @@ func TestGarbageCollect(t *testing.T) {
 	fs := newFakeSystemd()
 	cli := newFakeRktCli()
 	fakeOS := kubetesting.NewFakeOS()
-	getter := newFakePodGetter()
+	deletionProvider := newFakePodDeletionProvider()
 	fug := newfakeUnitGetter()
 	frh := &containertesting.FakeRuntimeHelper{}
 
@@ -1641,7 +1641,7 @@ func TestGarbageCollect(t *testing.T) {
 		os:                  fakeOS,
 		cli:                 cli,
 		apisvc:              fr,
-		podGetter:           getter,
+		podDeletionProvider: deletionProvider,
 		systemd:             fs,
 		containerRefManager: kubecontainer.NewRefManager(),
 		unitGetter:          fug,
@@ -1827,7 +1827,7 @@ func TestGarbageCollect(t *testing.T) {
 
 		fr.pods = tt.pods
 		for _, p := range tt.apiPods {
-			getter.pods[p.UID] = p
+			deletionProvider.pods[p.UID] = struct{}{}
 		}
 
 		allSourcesReady := true
@@ -1859,7 +1859,7 @@ func TestGarbageCollect(t *testing.T) {
 		ctrl.Finish()
 		fakeOS.Removes = []string{}
 		fs.resetFailedUnits = []string{}
-		getter.pods = make(map[kubetypes.UID]*v1.Pod)
+		deletionProvider.pods = make(map[kubetypes.UID]struct{})
 	}
 }
 

From ee7ec54b41a34eefaa550ef3672830d946890dad Mon Sep 17 00:00:00 2001
From: Joel Smith <joesmith@redhat.com>
Date: Fri, 13 Oct 2017 10:09:23 -0600
Subject: [PATCH 6/6] UPSTREAM: 53857: kubelet sync pod throws more detailed
 events

---
 vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go  |  7 +++++++
 vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go       | 11 +++++++++++
 .../pkg/kubelet/kuberuntime/kuberuntime_container.go  |  1 +
 .../pkg/kubelet/kuberuntime/kuberuntime_manager.go    |  5 +++++
 vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go   | 11 ++++-------
 5 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
index 3c0ee08b3e3a..926938263dcf 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go
@@ -32,6 +32,12 @@ const (
 	BackOffStartContainer   = "BackOff"
 	ExceededGracePeriod     = "ExceededGracePeriod"
 
+	// Pod event reason list
+	FailedToKillPod                = "FailedKillPod"
+	FailedToCreatePodContainer     = "FailedCreatePodContainer"
+	FailedToMakePodDataDirectories = "Failed"
+	NetworkNotReady                = "NetworkNotReady"
+
 	// Image event reason list
 	PullingImage            = "Pulling"
 	PulledImage             = "Pulled"
@@ -69,6 +75,7 @@ const (
 	UnsupportedMountOption               = "UnsupportedMountOption"
 	SandboxChanged                       = "SandboxChanged"
 	FailedCreatePodSandBox               = "FailedCreatePodSandBox"
+	FailedStatusPodSandBox               = "FailedPodSandBoxStatus"
 
 	// Image manager event reason list
 	InvalidDiskCapacity = "InvalidDiskCapacity"
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go
index 7cfd4c23f6af..2d2b2e79eaad 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go
@@ -1394,6 +1394,10 @@ func (kl *Kubelet) GetClusterDNS(pod *v1.Pod) ([]string, []string, bool, error)
 //
 // If any step of this workflow errors, the error is returned, and is repeated
 // on the next syncPod call.
+//
+// This operation writes all events that are dispatched in order to provide
+// the most accurate information possible about an error situation to aid debugging.
+// Callers should not throw an event if this operation returns an error.
 func (kl *Kubelet) syncPod(o syncPodOptions) error {
 	// pull out the required options
 	pod := o.pod
@@ -1411,6 +1415,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 		kl.statusManager.SetPodStatus(pod, apiPodStatus)
 		// we kill the pod with the specified grace period since this is a termination
 		if err := kl.killPod(pod, nil, podStatus, killPodOptions.PodTerminationGracePeriodSecondsOverride); err != nil {
+			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
 			// there was an error killing the pod, so we return that error directly
 			utilruntime.HandleError(err)
 			return err
@@ -1477,6 +1482,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 	if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == v1.PodFailed {
 		var syncErr error
 		if err := kl.killPod(pod, nil, podStatus, nil); err != nil {
+			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
 			syncErr = fmt.Errorf("error killing pod: %v", err)
 			utilruntime.HandleError(syncErr)
 		} else {
@@ -1491,6 +1497,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 
 	// If the network plugin is not ready, only start the pod if it uses the host network
 	if rs := kl.runtimeState.networkErrors(); len(rs) != 0 && !kubecontainer.IsHostNetworkPod(pod) {
+		kl.recorder.Eventf(pod, v1.EventTypeWarning, events.NetworkNotReady, "network is not ready: %v", rs)
 		return fmt.Errorf("network is not ready: %v", rs)
 	}
 
@@ -1533,6 +1540,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 					glog.V(2).Infof("Failed to update QoS cgroups while syncing pod: %v", err)
 				}
 				if err := pcm.EnsureExists(pod); err != nil {
+					kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToCreatePodContainer, "unable to ensure pod container exists: %v", err)
 					return fmt.Errorf("failed to ensure that the pod: %v cgroups exist and are correctly applied: %v", pod.UID, err)
 				}
 			}
@@ -1565,6 +1573,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 
 	// Make data directories for the pod
 	if err := kl.makePodDataDirs(pod); err != nil {
+		kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToMakePodDataDirectories, "error making pod data directories: %v", err)
 		glog.Errorf("Unable to make pod data directories for pod %q: %v", format.Pod(pod), err)
 		return err
 	}
@@ -1586,6 +1595,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 	result := kl.containerRuntime.SyncPod(pod, apiPodStatus, podStatus, pullSecrets, kl.backOff)
 	kl.reasonCache.Update(pod.UID, result)
 	if err := result.Error(); err != nil {
+		// Do not record an event here, as we keep all event logging for sync pod failures
+		// local to container runtime so we get better errors
 		return err
 	}
 
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
index 31f0b8ed9ffe..1f27afb33154 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go
@@ -89,6 +89,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
 	// Step 1: pull the image.
 	imageRef, msg, err := m.imagePuller.EnsureImageExists(pod, container, pullSecrets)
 	if err != nil {
+		m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err))
 		return msg, err
 	}
 
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
index d852c9a63100..7e5d1d0d2937 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go
@@ -619,6 +619,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat
 
 		podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
 		if err != nil {
+			ref, err := ref.GetReference(api.Scheme, pod)
+			if err != nil {
+				glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
+			}
+			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
 			glog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod))
 			result.Fail(err)
 			return
diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go b/vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go
index ea2ace249cf3..bf5b50e4600b 100644
--- a/vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go
+++ b/vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go
@@ -162,6 +162,9 @@ func (p *podWorkers) managePodLoop(podUpdates <-chan UpdatePodOptions) {
 			// the previous sync.
 			status, err := p.podCache.GetNewerThan(podUID, lastSyncTime)
 			if err != nil {
+				// This is the legacy event thrown by manage pod loop
+				// all other events are now dispatched from syncPodFn
+				p.recorder.Eventf(update.Pod, v1.EventTypeWarning, events.FailedSync, "error determining status: %v", err)
 				return err
 			}
 			err = p.syncPodFn(syncPodOptions{
@@ -179,14 +182,8 @@ func (p *podWorkers) managePodLoop(podUpdates <-chan UpdatePodOptions) {
 			update.OnCompleteFunc(err)
 		}
 		if err != nil {
+			// IMPORTANT: we do not log errors here, the syncPodFn is responsible for logging errors
 			glog.Errorf("Error syncing pod %s (%q), skipping: %v", update.Pod.UID, format.Pod(update.Pod), err)
-			// if we failed sync, we throw more specific events for why it happened.
-			// as a result, i question the value of this event.
-			// TODO: determine if we can remove this in a future release.
-			// do not include descriptive text that can vary on why it failed so in a pathological
-			// scenario, kubelet does not create enough discrete events that miss default aggregation
-			// window.
-			p.recorder.Eventf(update.Pod, v1.EventTypeWarning, events.FailedSync, "Error syncing pod")
 		}
 		p.wrapUp(update.Pod.UID, err)
 	}