From b0e7322941cd4ba646c39b7f5f8f603c3846c47c Mon Sep 17 00:00:00 2001 From: Joel Smith Date: Fri, 13 Oct 2017 09:55:00 -0600 Subject: [PATCH 1/6] UPSTREAM: 47599: Rerun init containers when the pod needs to be restarted --- .../apis/cri/testing/fake_runtime_service.go | 12 + .../kuberuntime/kuberuntime_container.go | 47 +- .../kuberuntime/kuberuntime_manager.go | 287 +++++------ .../kuberuntime/kuberuntime_manager_test.go | 454 ++++++++++++++++-- 4 files changed, 595 insertions(+), 205 deletions(-) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/testing/fake_runtime_service.go b/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/testing/fake_runtime_service.go index c26d13abcd3e..a053dea45cb8 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/testing/fake_runtime_service.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/apis/cri/testing/fake_runtime_service.go @@ -55,6 +55,18 @@ type FakeRuntimeService struct { Sandboxes map[string]*FakePodSandbox } +func (r *FakeRuntimeService) GetContainerID(sandboxID, name string, attempt uint32) (string, error) { + r.Lock() + defer r.Unlock() + + for id, c := range r.Containers { + if c.SandboxID == sandboxID && c.Metadata.Name == name && c.Metadata.Attempt == attempt { + return id, nil + } + } + return "", fmt.Errorf("container (name, attempt, sandboxID)=(%q, %d, %q) not found", name, attempt, sandboxID) +} + func (r *FakeRuntimeService) SetFakeSandboxes(sandboxes []*FakePodSandbox) { r.Lock() defer r.Unlock() diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go index 7a999913e0a6..94e441d12574 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -624,10 +624,11 @@ func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(pod *v1.Pod, ru return } -// pruneInitContainers ensures that before we begin creating init containers, we have reduced the number -// of outstanding init containers still present. This reduces load on the container garbage collector -// by only preserving the most recent terminated init container. -func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.ContainerID]int) { +// pruneInitContainersBeforeStart ensures that before we begin creating init +// containers, we have reduced the number of outstanding init containers still +// present. This reduces load on the container garbage collector by only +// preserving the most recent terminated init container. +func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, podStatus *kubecontainer.PodStatus) { // only the last execution of each init container should be preserved, and only preserve it if it is in the // list of init containers to keep. initContainerNames := sets.NewString() @@ -645,14 +646,9 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, if count == 1 { continue } - // if there is a reason to preserve the older container, do so - if _, ok := initContainersToKeep[status.ID]; ok { - continue - } - // prune all other init containers that match this container name glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count) - if err := m.runtimeService.RemoveContainer(status.ID.ID); err != nil { + if err := m.removeContainer(status.ID.ID); err != nil { utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod))) continue } @@ -667,6 +663,37 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(pod *v1.Pod, } } +// Remove all init containres. Note that this function does not check the state +// of the container because it assumes all init containers have been stopped +// before the call happens. +func (m *kubeGenericRuntimeManager) purgeInitContainers(pod *v1.Pod, podStatus *kubecontainer.PodStatus) { + initContainerNames := sets.NewString() + for _, container := range pod.Spec.InitContainers { + initContainerNames.Insert(container.Name) + } + for name := range initContainerNames { + count := 0 + for _, status := range podStatus.ContainerStatuses { + if status.Name != name || !initContainerNames.Has(status.Name) { + continue + } + count++ + // Purge all init containers that match this container name + glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count) + if err := m.removeContainer(status.ID.ID); err != nil { + utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod))) + continue + } + // Remove any references to this container + if _, ok := m.containerRefManager.GetRef(status.ID); ok { + m.containerRefManager.ClearRef(status.ID) + } else { + glog.Warningf("No ref for container %q", status.ID) + } + } + } +} + // findNextInitContainerToRun returns the status of the last failed container, the // next init container to start, or done if there are no further init containers. // Status is only returned if an init container is failed, in which case next will diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 636f4b724caa..7701cd42c2a9 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -328,34 +328,28 @@ type containerToKillInfo struct { message string } -// podContainerSpecChanges keeps information on changes that need to happen for a pod. -type podContainerSpecChanges struct { - // Whether need to create a new sandbox. +// podActions keeps information what to do for a pod. +type podActions struct { + // Stop all running (regular and init) containers and the sandbox for the pod. + KillPod bool + // Whether need to create a new sandbox. If needed to kill pod and create a + // a new pod sandbox, all init containers need to be purged (i.e., removed). CreateSandbox bool // The id of existing sandbox. It is used for starting containers in ContainersToStart. SandboxID string // The attempt number of creating sandboxes for the pod. Attempt uint32 - // ContainersToStart keeps a map of containers that need to be started, note that - // the key is index of the container inside pod.Spec.Containers, while - // the value is a message indicates why the container needs to start. - ContainersToStart map[int]string - // ContainersToKeep keeps a map of containers that need to be kept as is, note that - // the key is the container ID of the container, while - // the value is index of the container inside pod.Spec.Containers. - ContainersToKeep map[kubecontainer.ContainerID]int + // The next init container to start. + NextInitContainerToStart *v1.Container + // ContainersToStart keeps a list of indexes for the containers to start, + // where the index is the index of the specific container in the pod spec ( + // pod.Spec.Containers. + ContainersToStart []int // ContainersToKill keeps a map of containers that need to be killed, note that // the key is the container ID of the container, while // the value contains necessary information to kill a container. ContainersToKill map[kubecontainer.ContainerID]containerToKillInfo - - // InitFailed indicates whether init containers are failed. - InitFailed bool - // InitContainersToKeep keeps a map of init containers that need to be kept as - // is, note that the key is the container ID of the container, while - // the value is index of the container inside pod.Spec.InitContainers. - InitContainersToKeep map[kubecontainer.ContainerID]int } // podSandboxChanged checks whether the spec of the pod is changed and returns @@ -394,141 +388,127 @@ func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *v1.Pod, podStatus *ku return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id } -// checkAndKeepInitContainers keeps all successfully completed init containers. If there -// are failing containers, only keep the first failing one. -func checkAndKeepInitContainers(pod *v1.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.ContainerID]int) bool { - initFailed := false - - for i, container := range pod.Spec.InitContainers { - containerStatus := podStatus.FindContainerStatusByName(container.Name) - if containerStatus == nil { - continue - } - - if containerStatus.State == kubecontainer.ContainerStateRunning { - initContainersToKeep[containerStatus.ID] = i - continue - } +func containerChanged(container *v1.Container, containerStatus *kubecontainer.ContainerStatus) (uint64, uint64, bool) { + expectedHash := kubecontainer.HashContainer(container) + return expectedHash, containerStatus.Hash, containerStatus.Hash != expectedHash +} - if containerStatus.State == kubecontainer.ContainerStateExited { - initContainersToKeep[containerStatus.ID] = i - } +func shouldRestartOnFailure(pod *v1.Pod) bool { + return pod.Spec.RestartPolicy != v1.RestartPolicyNever +} - if isContainerFailed(containerStatus) { - initFailed = true - break - } +func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) bool { + cStatus := podStatus.FindContainerStatusByName(c.Name) + if cStatus == nil || cStatus.State == kubecontainer.ContainerStateRunning { + return false } - - return initFailed + return cStatus.ExitCode == 0 } -// computePodContainerChanges checks whether the pod spec has changed and returns the changes if true. -func (m *kubeGenericRuntimeManager) computePodContainerChanges(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podContainerSpecChanges { +// computePodActions checks whether the pod spec has changed and returns the changes if true. +func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions { glog.V(5).Infof("Syncing Pod %q: %+v", format.Pod(pod), pod) - sandboxChanged, attempt, sandboxID := m.podSandboxChanged(pod, podStatus) - changes := podContainerSpecChanges{ - CreateSandbox: sandboxChanged, - SandboxID: sandboxID, - Attempt: attempt, - ContainersToStart: make(map[int]string), - ContainersToKeep: make(map[kubecontainer.ContainerID]int), - InitContainersToKeep: make(map[kubecontainer.ContainerID]int), - ContainersToKill: make(map[kubecontainer.ContainerID]containerToKillInfo), + createPodSandbox, attempt, sandboxID := m.podSandboxChanged(pod, podStatus) + changes := podActions{ + KillPod: createPodSandbox, + CreateSandbox: createPodSandbox, + SandboxID: sandboxID, + Attempt: attempt, + ContainersToStart: []int{}, + ContainersToKill: make(map[kubecontainer.ContainerID]containerToKillInfo), + } + + // If we need to (re-)create the pod sandbox, everything will need to be + // killed and recreated, and init containers should be purged. + if createPodSandbox { + if !shouldRestartOnFailure(pod) && attempt != 0 { + // Should not restart the pod, just return. + return changes + } + if len(pod.Spec.InitContainers) != 0 { + // Pod has init containers, return the first one. + changes.NextInitContainerToStart = &pod.Spec.InitContainers[0] + return changes + } + // Start all containers by default but exclude the ones that succeeded if + // RestartPolicy is OnFailure. + for idx, c := range pod.Spec.Containers { + if containerSucceeded(&c, podStatus) && pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure { + continue + } + changes.ContainersToStart = append(changes.ContainersToStart, idx) + } + return changes } - // check the status of init containers. - initFailed := false - // always reset the init containers if the sandbox is changed. - if !sandboxChanged { - // Keep all successfully completed containers. If there are failing containers, - // only keep the first failing one. - initFailed = checkAndKeepInitContainers(pod, podStatus, changes.InitContainersToKeep) + // Check initialization progress. + initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus) + if !done { + if next != nil { + initFailed := initLastStatus != nil && isContainerFailed(initLastStatus) + if initFailed && !shouldRestartOnFailure(pod) { + changes.KillPod = true + } else { + changes.NextInitContainerToStart = next + } + } + // Initialization failed or still in progress. Skip inspecting non-init + // containers. + return changes } - changes.InitFailed = initFailed + // Number of running containers to keep. + keepCount := 0 // check the status of containers. - for index, container := range pod.Spec.Containers { + for idx, container := range pod.Spec.Containers { containerStatus := podStatus.FindContainerStatusByName(container.Name) + // If container does not exist, or is not running, check whether we + // need to restart it. if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning { if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) { message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container) glog.Info(message) - changes.ContainersToStart[index] = message + changes.ContainersToStart = append(changes.ContainersToStart, idx) } continue } - if sandboxChanged { - if pod.Spec.RestartPolicy != v1.RestartPolicyNever { - message := fmt.Sprintf("Container %+v's pod sandbox is dead, the container will be recreated.", container) - glog.Info(message) - changes.ContainersToStart[index] = message - } - continue - } - - if initFailed { - // Initialization failed and Container exists. - // If we have an initialization failure everything will be killed anyway. - // If RestartPolicy is Always or OnFailure we restart containers that were running before. - if pod.Spec.RestartPolicy != v1.RestartPolicyNever { - message := fmt.Sprintf("Failed to initialize pod. %q will be restarted.", container.Name) - glog.V(1).Info(message) - changes.ContainersToStart[index] = message - } + // The container is running, but kill the container if any of the following condition is met. + reason := "" + restart := shouldRestartOnFailure(pod) + if expectedHash, actualHash, changed := containerChanged(&container, containerStatus); changed { + reason = fmt.Sprintf("Container spec hash changed (%d vs %d).", actualHash, expectedHash) + // Restart regardless of the restart policy because the container + // spec changed. + restart = true + } else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure { + // If the container failed the liveness probe, we should kill it. + reason = "Container failed liveness probe." + } else { + // Keep the container. + keepCount += 1 continue } - expectedHash := kubecontainer.HashContainer(&container) - containerChanged := containerStatus.Hash != expectedHash - if containerChanged { - message := fmt.Sprintf("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", - pod.Name, container.Name, containerStatus.Hash, expectedHash) - glog.Info(message) - changes.ContainersToStart[index] = message - continue + // We need to kill the container, but if we also want to restart the + // container afterwards, make the intent clear in the message. Also do + // not kill the entire pod since we expect container to be running eventually. + message := reason + if restart { + message = fmt.Sprintf("%s. Container will be killed and recreated.", message) + changes.ContainersToStart = append(changes.ContainersToStart, idx) } - liveness, found := m.livenessManager.Get(containerStatus.ID) - if !found || liveness == proberesults.Success { - changes.ContainersToKeep[containerStatus.ID] = index - continue - } - if pod.Spec.RestartPolicy != v1.RestartPolicyNever { - message := fmt.Sprintf("pod %q container %q is unhealthy, it will be killed and re-created.", format.Pod(pod), container.Name) - glog.Info(message) - changes.ContainersToStart[index] = message + changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{ + name: containerStatus.Name, + container: &pod.Spec.Containers[idx], + message: message, } + glog.V(2).Infof("Container %q (%q) of pod %s: %s", container.Name, containerStatus.ID, format.Pod(pod), message) } - // Don't keep init containers if they are the only containers to keep. - if !sandboxChanged && len(changes.ContainersToStart) == 0 && len(changes.ContainersToKeep) == 0 { - changes.InitContainersToKeep = make(map[kubecontainer.ContainerID]int) - } - - // compute containers to be killed - runningContainerStatuses := podStatus.GetRunningContainerStatuses() - for _, containerStatus := range runningContainerStatuses { - _, keep := changes.ContainersToKeep[containerStatus.ID] - _, keepInit := changes.InitContainersToKeep[containerStatus.ID] - if !keep && !keepInit { - var podContainer *v1.Container - var killMessage string - for i, c := range pod.Spec.Containers { - if c.Name == containerStatus.Name { - podContainer = &pod.Spec.Containers[i] - killMessage = changes.ContainersToStart[i] - break - } - } - - changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{ - name: containerStatus.Name, - container: podContainer, - message: killMessage, - } - } + if keepCount == 0 && len(changes.ContainersToStart) == 0 { + changes.KillPod = true } return changes @@ -544,8 +524,8 @@ func (m *kubeGenericRuntimeManager) computePodContainerChanges(pod *v1.Pod, podS // 6. Create normal containers. func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) { // Step 1: Compute sandbox and container changes. - podContainerChanges := m.computePodContainerChanges(pod, podStatus) - glog.V(3).Infof("computePodContainerChanges got %+v for pod %q", podContainerChanges, format.Pod(pod)) + podContainerChanges := m.computePodActions(pod, podStatus) + glog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod)) if podContainerChanges.CreateSandbox { ref, err := ref.GetReference(api.Scheme, pod) if err != nil { @@ -554,13 +534,13 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat if podContainerChanges.SandboxID != "" { m.recorder.Eventf(ref, v1.EventTypeNormal, "SandboxChanged", "Pod sandbox changed, it will be killed and re-created.") } else { - glog.V(4).Infof("SyncPod received new pod %q, will create a new sandbox for it", format.Pod(pod)) + glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod)) } } // Step 2: Kill the pod if the sandbox has changed. - if podContainerChanges.CreateSandbox || (len(podContainerChanges.ContainersToKeep) == 0 && len(podContainerChanges.ContainersToStart) == 0) { - if len(podContainerChanges.ContainersToKeep) == 0 && len(podContainerChanges.ContainersToStart) == 0 { + if podContainerChanges.KillPod { + if !podContainerChanges.CreateSandbox { glog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod)) } else { glog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod)) @@ -572,6 +552,10 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat glog.Errorf("killPodWithSyncResult failed: %v", killResult.Error()) return } + + if podContainerChanges.CreateSandbox { + m.purgeInitContainers(pod, podStatus) + } } else { // Step 3: kill any running containers in this pod which are not to keep. for containerID, containerInfo := range podContainerChanges.ContainersToKill { @@ -587,7 +571,9 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat } // Keep terminated init containers fairly aggressively controlled - m.pruneInitContainersBeforeStart(pod, podStatus, podContainerChanges.InitContainersToKeep) + // This is an optmization because container removals are typically handled + // by container garbage collector. + m.pruneInitContainersBeforeStart(pod, podStatus) // We pass the value of the podIP down to generatePodSandboxConfig and // generateContainerConfig, which in turn passes it to various other @@ -605,7 +591,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat // Step 4: Create a sandbox for the pod if necessary. podSandboxID := podContainerChanges.SandboxID - if podContainerChanges.CreateSandbox && len(podContainerChanges.ContainersToStart) > 0 { + if podContainerChanges.CreateSandbox { var msg string var err error @@ -647,30 +633,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat return } - // Step 5: start init containers. - status, next, done := findNextInitContainerToRun(pod, podStatus) - if status != nil && status.ExitCode != 0 { - // container initialization has failed, flag the pod as failed - initContainerResult := kubecontainer.NewSyncResult(kubecontainer.InitContainer, status.Name) - initContainerResult.Fail(kubecontainer.ErrRunInitContainer, fmt.Sprintf("init container %q exited with %d", status.Name, status.ExitCode)) - result.AddSyncResult(initContainerResult) - if pod.Spec.RestartPolicy == v1.RestartPolicyNever { - utilruntime.HandleError(fmt.Errorf("error running pod %q init container %q, restart=Never: %#v", format.Pod(pod), status.Name, status)) - return - } - utilruntime.HandleError(fmt.Errorf("Error running pod %q init container %q, restarting: %#v", format.Pod(pod), status.Name, status)) - } - if next != nil { - if len(podContainerChanges.ContainersToStart) == 0 { - glog.V(4).Infof("No containers to start, stopping at init container %+v in pod %v", next.Name, format.Pod(pod)) - return - } - - // If we need to start the next container, do so now then exit - container := next + // Step 5: start the init container. + if container := podContainerChanges.NextInitContainerToStart; container != nil { + // Start the next init container. startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name) result.AddSyncResult(startContainerResult) - isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff) if isInBackOff { startContainerResult.Fail(err, msg) @@ -687,20 +654,10 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat // Successfully started the container; clear the entry in the failure glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod)) - return - } - if !done { - // init container still running - glog.V(4).Infof("An init container is still running in pod %v", format.Pod(pod)) - return - } - if podContainerChanges.InitFailed { - glog.V(4).Infof("Not all init containers have succeeded for pod %v", format.Pod(pod)) - return } // Step 6: start containers in podContainerChanges.ContainersToStart. - for idx := range podContainerChanges.ContainersToStart { + for _, idx := range podContainerChanges.ContainersToStart { container := &pod.Spec.Containers[idx] startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name) result.AddSyncResult(startContainerResult) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index 8ad095858f22..9a13c6b9e8fa 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -24,6 +24,7 @@ import ( cadvisorapi "github.com/google/cadvisor/info/v1" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -226,6 +227,40 @@ func verifyFakeContainerList(fakeRuntime *apitest.FakeRuntimeService, expected [ return actual, reflect.DeepEqual(expected, actual) } +type containerRecord struct { + container *v1.Container + attempt uint32 + state runtimeapi.ContainerState +} + +// Only extract the fields of interests. +type cRecord struct { + name string + attempt uint32 + state runtimeapi.ContainerState +} + +type cRecordList []*cRecord + +func (b cRecordList) Len() int { return len(b) } +func (b cRecordList) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b cRecordList) Less(i, j int) bool { + if b[i].name != b[j].name { + return b[i].name < b[j].name + } + return b[i].attempt < b[j].attempt +} + +func verifyContainerStatuses(t *testing.T, runtime *apitest.FakeRuntimeService, expected []*cRecord, desc string) { + actual := []*cRecord{} + for _, cStatus := range runtime.Containers { + actual = append(actual, &cRecord{name: cStatus.Metadata.Name, attempt: cStatus.Metadata.Attempt, state: cStatus.State}) + } + sort.Sort(cRecordList(expected)) + sort.Sort(cRecordList(actual)) + assert.Equal(t, expected, actual, desc) +} + func TestNewKubeRuntimeManager(t *testing.T) { _, _, _, err := createTestRuntimeManager() assert.NoError(t, err) @@ -582,8 +617,7 @@ func TestPruneInitContainers(t *testing.T) { podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace) assert.NoError(t, err) - keep := map[kubecontainer.ContainerID]int{} - m.pruneInitContainersBeforeStart(pod, podStatus, keep) + m.pruneInitContainersBeforeStart(pod, podStatus) expectedContainers := []string{fakes[0].Id, fakes[2].Id} if actual, ok := verifyFakeContainerList(fakeRuntime, expectedContainers); !ok { t.Errorf("expected %q, got %q", expectedContainers, actual) @@ -625,18 +659,6 @@ func TestSyncPodWithInitContainers(t *testing.T) { }, } - // buildContainerID is an internal helper function to build container id from api pod - // and container with default attempt number 0. - buildContainerID := func(pod *v1.Pod, container v1.Container) string { - uid := string(pod.UID) - sandboxID := apitest.BuildSandboxName(&runtimeapi.PodSandboxMetadata{ - Name: pod.Name, - Uid: uid, - Namespace: pod.Namespace, - }) - return apitest.BuildContainerName(&runtimeapi.ContainerMetadata{Name: container.Name}, sandboxID) - } - backOff := flowcontrol.NewBackOff(time.Second, time.Minute) // 1. should only create the init container. @@ -644,34 +666,406 @@ func TestSyncPodWithInitContainers(t *testing.T) { assert.NoError(t, err) result := m.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff) assert.NoError(t, result.Error()) - assert.Equal(t, 1, len(fakeRuntime.Containers)) - initContainerID := buildContainerID(pod, initContainers[0]) - expectedContainers := []string{initContainerID} - if actual, ok := verifyFakeContainerList(fakeRuntime, expectedContainers); !ok { - t.Errorf("expected %q, got %q", expectedContainers, actual) + expected := []*cRecord{ + {name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING}, } + verifyContainerStatuses(t, fakeRuntime, expected, "start only the init container") // 2. should not create app container because init container is still running. podStatus, err = m.GetPodStatus(pod.UID, pod.Name, pod.Namespace) assert.NoError(t, err) result = m.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff) assert.NoError(t, result.Error()) - assert.Equal(t, 1, len(fakeRuntime.Containers)) - expectedContainers = []string{initContainerID} - if actual, ok := verifyFakeContainerList(fakeRuntime, expectedContainers); !ok { - t.Errorf("expected %q, got %q", expectedContainers, actual) - } + verifyContainerStatuses(t, fakeRuntime, expected, "init container still running; do nothing") // 3. should create all app containers because init container finished. - fakeRuntime.StopContainer(initContainerID, 0) + // Stop init container instance 0. + sandboxIDs, err := m.getSandboxIDByPodUID(pod.UID, nil) + require.NoError(t, err) + sandboxID := sandboxIDs[0] + initID0, err := fakeRuntime.GetContainerID(sandboxID, initContainers[0].Name, 0) + require.NoError(t, err) + fakeRuntime.StopContainer(initID0, 0) + // Sync again. podStatus, err = m.GetPodStatus(pod.UID, pod.Name, pod.Namespace) assert.NoError(t, err) result = m.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff) assert.NoError(t, result.Error()) - assert.Equal(t, 3, len(fakeRuntime.Containers)) - expectedContainers = []string{initContainerID, buildContainerID(pod, containers[0]), - buildContainerID(pod, containers[1])} - if actual, ok := verifyFakeContainerList(fakeRuntime, expectedContainers); !ok { - t.Errorf("expected %q, got %q", expectedContainers, actual) + expected = []*cRecord{ + {name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, + {name: containers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING}, + {name: containers[1].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING}, + } + verifyContainerStatuses(t, fakeRuntime, expected, "init container completed; all app containers should be running") + + // 4. should restart the init container if needed to create a new podsandbox + // Stop the pod sandbox. + fakeRuntime.StopPodSandbox(sandboxID) + // Sync again. + podStatus, err = m.GetPodStatus(pod.UID, pod.Name, pod.Namespace) + assert.NoError(t, err) + result = m.SyncPod(pod, v1.PodStatus{}, podStatus, []v1.Secret{}, backOff) + assert.NoError(t, result.Error()) + expected = []*cRecord{ + // The first init container instance is purged and no longer visible. + // The second (attempt == 1) instance has been started and is running. + {name: initContainers[0].Name, attempt: 1, state: runtimeapi.ContainerState_CONTAINER_RUNNING}, + // All containers are killed. + {name: containers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, + {name: containers[1].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, + } + verifyContainerStatuses(t, fakeRuntime, expected, "kill all app containers, purge the existing init container, and restart a new one") +} + +// A helper function to get a basic pod and its status assuming all sandbox and +// containers are running and ready. +func makeBasePodAndStatus() (*v1.Pod, *kubecontainer.PodStatus) { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "12345678", + Name: "foo", + Namespace: "foo-ns", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo1", + Image: "busybox", + }, + { + Name: "foo2", + Image: "busybox", + }, + { + Name: "foo3", + Image: "busybox", + }, + }, + }, + } + status := &kubecontainer.PodStatus{ + ID: pod.UID, + Name: pod.Name, + Namespace: pod.Namespace, + SandboxStatuses: []*runtimeapi.PodSandboxStatus{ + { + Id: "sandboxID", + State: runtimeapi.PodSandboxState_SANDBOX_READY, + Metadata: &runtimeapi.PodSandboxMetadata{Name: pod.Name, Namespace: pod.Namespace, Uid: "sandboxuid", Attempt: uint32(0)}, + }, + }, + ContainerStatuses: []*kubecontainer.ContainerStatus{ + { + ID: kubecontainer.ContainerID{ID: "id1"}, + Name: "foo1", State: kubecontainer.ContainerStateRunning, + Hash: kubecontainer.HashContainer(&pod.Spec.Containers[0]), + }, + { + ID: kubecontainer.ContainerID{ID: "id2"}, + Name: "foo2", State: kubecontainer.ContainerStateRunning, + Hash: kubecontainer.HashContainer(&pod.Spec.Containers[1]), + }, + { + ID: kubecontainer.ContainerID{ID: "id3"}, + Name: "foo3", State: kubecontainer.ContainerStateRunning, + Hash: kubecontainer.HashContainer(&pod.Spec.Containers[2]), + }, + }, + } + return pod, status +} + +func TestComputePodActions(t *testing.T) { + _, _, m, err := createTestRuntimeManager() + require.NoError(t, err) + + // Createing a pair reference pod and status for the test cases to refer + // the specific fields. + basePod, baseStatus := makeBasePodAndStatus() + noAction := podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, + } + + for desc, test := range map[string]struct { + mutatePodFn func(*v1.Pod) + mutateStatusFn func(*kubecontainer.PodStatus) + actions podActions + }{ + "everying is good; do nothing": { + actions: noAction, + }, + "start pod sandbox and all containers for a new pod": { + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // No container or sandbox exists. + status.SandboxStatuses = []*runtimeapi.PodSandboxStatus{} + status.ContainerStatuses = []*kubecontainer.ContainerStatus{} + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + Attempt: uint32(0), + ContainersToStart: []int{0, 1, 2}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "restart exited containers if RestartPolicy == Always": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // The first container completed, restart it, + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[0].ExitCode = 0 + + // The second container exited with failure, restart it, + status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[1].ExitCode = 111 + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{0, 1}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "restart failed containers if RestartPolicy == OnFailure": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // The first container completed, don't restart it, + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[0].ExitCode = 0 + + // The second container exited with failure, restart it, + status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[1].ExitCode = 111 + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{1}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "don't restart containers if RestartPolicy == Never": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // Don't restart any containers. + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[0].ExitCode = 0 + status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[1].ExitCode = 111 + }, + actions: noAction, + }, + "Kill pod and recreate everything if the pod sandbox is dead, and RestartPolicy == Always": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + ContainersToStart: []int{0, 1, 2}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "Kill pod and recreate all containers (except for the succeeded one) if the pod sandbox is dead, and RestartPolicy == OnFailure": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[1].ExitCode = 0 + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + ContainersToStart: []int{0, 2}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + + "Kill and recreate the container if the container's spec changed": { + mutatePodFn: func(pod *v1.Pod) { + pod.Spec.RestartPolicy = v1.RestartPolicyAlways + }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[1].Hash = uint64(432423432) + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToKill: getKillMap(basePod, baseStatus, []int{1}), + ContainersToStart: []int{1}, + }, + // TODO: Add a test case for containers which failed the liveness + // check. Will need to fake the livessness check result. + }, + } { + pod, status := makeBasePodAndStatus() + if test.mutatePodFn != nil { + test.mutatePodFn(pod) + } + if test.mutateStatusFn != nil { + test.mutateStatusFn(status) + } + actions := m.computePodActions(pod, status) + verifyActions(t, &test.actions, &actions, desc) + } +} + +func getKillMap(pod *v1.Pod, status *kubecontainer.PodStatus, cIndexes []int) map[kubecontainer.ContainerID]containerToKillInfo { + m := map[kubecontainer.ContainerID]containerToKillInfo{} + for _, i := range cIndexes { + m[status.ContainerStatuses[i].ID] = containerToKillInfo{ + container: &pod.Spec.Containers[i], + name: pod.Spec.Containers[i].Name, + } + } + return m +} + +func verifyActions(t *testing.T, expected, actual *podActions, desc string) { + if actual.ContainersToKill != nil { + // Clear the message field since we don't need to verify the message. + for k, info := range actual.ContainersToKill { + info.message = "" + actual.ContainersToKill[k] = info + } + } + assert.Equal(t, expected, actual, desc) +} + +func TestComputePodActionsWithInitContainers(t *testing.T) { + _, _, m, err := createTestRuntimeManager() + require.NoError(t, err) + + // Createing a pair reference pod and status for the test cases to refer + // the specific fields. + basePod, baseStatus := makeBasePodAndStatusWithInitContainers() + noAction := podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, + } + + for desc, test := range map[string]struct { + mutatePodFn func(*v1.Pod) + mutateStatusFn func(*kubecontainer.PodStatus) + actions podActions + }{ + "initialization completed; start all containers": { + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{0, 1, 2}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "initialization in progress; do nothing": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateRunning + }, + actions: noAction, + }, + "Kill pod and restart the first init container if the pod sandbox is dead": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + NextInitContainerToStart: &basePod.Spec.InitContainers[0], + ContainersToStart: []int{}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "initialization failed; restart the last init container if RestartPolicy == Always": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].ExitCode = 137 + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + NextInitContainerToStart: &basePod.Spec.InitContainers[2], + ContainersToStart: []int{}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "initialization failed; restart the last init container if RestartPolicy == OnFailure": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].ExitCode = 137 + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + NextInitContainerToStart: &basePod.Spec.InitContainers[2], + ContainersToStart: []int{}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "initialization failed; kill pod if RestartPolicy == Never": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].ExitCode = 137 + }, + actions: podActions{ + KillPod: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + } { + pod, status := makeBasePodAndStatusWithInitContainers() + if test.mutatePodFn != nil { + test.mutatePodFn(pod) + } + if test.mutateStatusFn != nil { + test.mutateStatusFn(status) + } + actions := m.computePodActions(pod, status) + verifyActions(t, &test.actions, &actions, desc) + } +} + +func makeBasePodAndStatusWithInitContainers() (*v1.Pod, *kubecontainer.PodStatus) { + pod, status := makeBasePodAndStatus() + pod.Spec.InitContainers = []v1.Container{ + { + Name: "init1", + Image: "bar-image", + }, + { + Name: "init2", + Image: "bar-image", + }, + { + Name: "init3", + Image: "bar-image", + }, + } + // Replace the original statuses of the containers with those for the init + // containers. + status.ContainerStatuses = []*kubecontainer.ContainerStatus{ + { + ID: kubecontainer.ContainerID{ID: "initid1"}, + Name: "init1", State: kubecontainer.ContainerStateExited, + Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]), + }, + { + ID: kubecontainer.ContainerID{ID: "initid2"}, + Name: "init2", State: kubecontainer.ContainerStateExited, + Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]), + }, + { + ID: kubecontainer.ContainerID{ID: "initid3"}, + Name: "init3", State: kubecontainer.ContainerStateExited, + Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]), + }, } + return pod, status } From 55a74c867f65797febb89eb1211d53b5816f2d49 Mon Sep 17 00:00:00 2001 From: Joel Smith Date: Fri, 13 Oct 2017 09:56:05 -0600 Subject: [PATCH 2/6] UPSTREAM: 48584: Move event type --- vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go | 1 + .../kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go index 0a0efc20cfdc..3eb2dab25f4d 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go @@ -67,6 +67,7 @@ const ( FailedNodeAllocatableEnforcement = "FailedNodeAllocatableEnforcement" SuccessfulNodeAllocatableEnforcement = "NodeAllocatableEnforced" UnsupportedMountOption = "UnsupportedMountOption" + SandboxChanged = "SandboxChanged" // Image manager event reason list InvalidDiskCapacity = "InvalidDiskCapacity" diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 7701cd42c2a9..a7db18e44184 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -532,7 +532,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err) } if podContainerChanges.SandboxID != "" { - m.recorder.Eventf(ref, v1.EventTypeNormal, "SandboxChanged", "Pod sandbox changed, it will be killed and re-created.") + m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.") } else { glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod)) } From a6bf11f9a099541ad1ed467a46df1da7cfd99976 Mon Sep 17 00:00:00 2001 From: Joel Smith Date: Fri, 13 Oct 2017 09:57:09 -0600 Subject: [PATCH 3/6] UPSTREAM: 48589: When faild create pod sandbox record event. --- vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go | 1 + .../pkg/kubelet/kuberuntime/kuberuntime_manager.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go index 3eb2dab25f4d..3c0ee08b3e3a 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go @@ -68,6 +68,7 @@ const ( SuccessfulNodeAllocatableEnforcement = "NodeAllocatableEnforced" UnsupportedMountOption = "UnsupportedMountOption" SandboxChanged = "SandboxChanged" + FailedCreatePodSandBox = "FailedCreatePodSandBox" // Image manager event reason list InvalidDiskCapacity = "InvalidDiskCapacity" diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index a7db18e44184..29141d2db77a 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -602,6 +602,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat if err != nil { createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg) glog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err) + ref, err := ref.GetReference(api.Scheme, pod) + if err != nil { + glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err) + } + m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox.") return } glog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod)) From a1e92bc02ea47fb375fb48dd19228dd9217770c9 Mon Sep 17 00:00:00 2001 From: Joel Smith Date: Fri, 13 Oct 2017 09:58:38 -0600 Subject: [PATCH 4/6] UPSTREAM: 48970: Recreate pod sandbox when the sandbox does not have an IP address. --- .../kubelet/kuberuntime/kuberuntime_manager.go | 6 ++++++ .../kuberuntime/kuberuntime_manager_test.go | 15 ++++++++++++++- .../k8s.io/kubernetes/pkg/kubelet/pleg/generic.go | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 29141d2db77a..93511729b2ac 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -385,6 +385,12 @@ func (m *kubeGenericRuntimeManager) podSandboxChanged(pod *v1.Pod, podStatus *ku return true, sandboxStatus.Metadata.Attempt + 1, "" } + // Needs to create a new sandbox when the sandbox does not have an IP address. + if !kubecontainer.IsHostNetworkPod(pod) && sandboxStatus.Network.Ip == "" { + glog.V(2).Infof("Sandbox for pod %q has no IP address. Need to start a new one", format.Pod(pod)) + return true, sandboxStatus.Metadata.Attempt + 1, sandboxStatus.Id + } + return false, sandboxStatus.Metadata.Attempt, sandboxStatus.Id } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index 9a13c6b9e8fa..f7655ea10a3b 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -752,6 +752,7 @@ func makeBasePodAndStatus() (*v1.Pod, *kubecontainer.PodStatus) { Id: "sandboxID", State: runtimeapi.PodSandboxState_SANDBOX_READY, Metadata: &runtimeapi.PodSandboxMetadata{Name: pod.Name, Namespace: pod.Namespace, Uid: "sandboxuid", Attempt: uint32(0)}, + Network: &runtimeapi.PodSandboxNetworkStatus{Ip: "10.0.0.1"}, }, }, ContainerStatuses: []*kubecontainer.ContainerStatus{ @@ -885,7 +886,19 @@ func TestComputePodActions(t *testing.T) { ContainersToKill: getKillMap(basePod, baseStatus, []int{}), }, }, - + "Kill pod and recreate all containers if the PodSandbox does not have an IP": { + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].Network.Ip = "" + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + ContainersToStart: []int{0, 1, 2}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, "Kill and recreate the container if the container's spec changed": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/pleg/generic.go b/vendor/k8s.io/kubernetes/pkg/kubelet/pleg/generic.go index 6c6c980c3d8b..2d8a9a0c13d8 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/pleg/generic.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/pleg/generic.go @@ -178,7 +178,7 @@ func (g *GenericPLEG) updateRelisTime(timestamp time.Time) { } // relist queries the container runtime for list of pods/containers, compare -// with the internal pods/containers, and generats events accordingly. +// with the internal pods/containers, and generates events accordingly. func (g *GenericPLEG) relist() { glog.V(5).Infof("GenericPLEG: Relisting") From b6a4fc25b842ae420c2937c1a307c03bd4ae5563 Mon Sep 17 00:00:00 2001 From: Joel Smith Date: Fri, 13 Oct 2017 10:06:22 -0600 Subject: [PATCH 5/6] UPSTREAM: 50350: Wait for container cleanup before deletion --- .../k8s.io/kubernetes/pkg/kubelet/kubelet.go | 4 +-- .../kubernetes/pkg/kubelet/kubelet_pods.go | 27 +++++++++++++++++ .../kuberuntime/fake_kuberuntime_manager.go | 19 ++++++------ .../kuberuntime/kuberuntime_container.go | 15 ++-------- .../pkg/kubelet/kuberuntime/kuberuntime_gc.go | 29 +++++++------------ .../kuberuntime/kuberuntime_gc_test.go | 14 ++++----- .../kuberuntime/kuberuntime_manager.go | 10 +++---- .../kubelet/rkt/fake_rkt_interface_test.go | 17 ++++++----- .../k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go | 15 +++++----- .../kubernetes/pkg/kubelet/rkt/rkt_test.go | 8 ++--- 10 files changed, 83 insertions(+), 75 deletions(-) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go index 0dc725cf2026..7cfd4c23f6af 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go @@ -617,7 +617,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub klet.livenessManager, containerRefManager, machineInfo, - klet.podManager, + klet, kubeDeps.OSInterface, klet, httpClient, @@ -648,7 +648,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub klet, kubeDeps.Recorder, containerRefManager, - klet.podManager, + klet, klet.livenessManager, httpClient, klet.networkPlugin, diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go index e0fef3c95b5c..e5d499fee1cb 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go @@ -52,6 +52,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/envvars" + "k8s.io/kubernetes/pkg/kubelet/eviction" "k8s.io/kubernetes/pkg/kubelet/images" "k8s.io/kubernetes/pkg/kubelet/server/portforward" remotecommandserver "k8s.io/kubernetes/pkg/kubelet/server/remotecommand" @@ -743,6 +744,22 @@ func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool { return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses)) } +// IsPodDeleted returns true if the pod is deleted. For the pod to be deleted, either: +// 1. The pod object is deleted +// 2. The pod's status is evicted +// 3. The pod's deletion timestamp is set, and containers are not running +func (kl *Kubelet) IsPodDeleted(uid types.UID) bool { + pod, podFound := kl.podManager.GetPodByUID(uid) + if !podFound { + return true + } + status, statusFound := kl.statusManager.GetPodStatus(pod.UID) + if !statusFound { + status = pod.Status + } + return eviction.PodIsEvicted(status) || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses)) +} + // PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have // been reclaimed by the kubelet. Reclaiming resources is a prerequisite to deleting a pod from the API server. func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bool { @@ -751,6 +768,16 @@ func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bo glog.V(3).Infof("Pod %q is terminated, but some containers are still running", format.Pod(pod)) return false } + // pod's containers should be deleted + runtimeStatus, err := kl.podCache.Get(pod.UID) + if err != nil { + glog.V(3).Infof("Pod %q is terminated, Error getting runtimeStatus from the podCache: %s", format.Pod(pod), err) + return false + } + if len(runtimeStatus.ContainerStatuses) > 0 { + glog.V(3).Infof("Pod %q is terminated, but some containers have not been cleaned up: %+v", format.Pod(pod), runtimeStatus.ContainerStatuses) + return false + } if kl.podVolumesExist(pod.UID) && !kl.kubeletConfiguration.KeepTerminatedPodVolumes { // We shouldnt delete pods whose volumes have not been cleaned up if we are not keeping terminated pod volumes glog.V(3).Infof("Pod %q is terminated, but some volumes have not been cleaned up", format.Pod(pod)) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go index dca959a5e94e..ed9301afe00d 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go @@ -24,7 +24,6 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/flowcontrol" - "k8s.io/kubernetes/pkg/api/v1" "k8s.io/kubernetes/pkg/credentialprovider" internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -43,17 +42,19 @@ func (f *fakeHTTP) Get(url string) (*http.Response, error) { return nil, f.err } -type fakePodGetter struct { - pods map[types.UID]*v1.Pod +type fakePodDeletionProvider struct { + pods map[types.UID]struct{} } -func newFakePodGetter() *fakePodGetter { - return &fakePodGetter{make(map[types.UID]*v1.Pod)} +func newFakePodDeletionProvider() *fakePodDeletionProvider { + return &fakePodDeletionProvider{ + pods: make(map[types.UID]struct{}), + } } -func (f *fakePodGetter) GetPodByUID(uid types.UID) (*v1.Pod, bool) { - pod, found := f.pods[uid] - return pod, found +func (f *fakePodDeletionProvider) IsPodDeleted(uid types.UID) bool { + _, found := f.pods[uid] + return !found } func NewFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) { @@ -76,7 +77,7 @@ func NewFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS return nil, err } - kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodGetter(), kubeRuntimeManager) + kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodDeletionProvider(), kubeRuntimeManager) kubeRuntimeManager.runtimeName = typedVersion.RuntimeName kubeRuntimeManager.imagePuller = images.NewImageManager( kubecontainer.FilterEventRecorder(recorder), diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go index 94e441d12574..31f0b8ed9ffe 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -337,13 +337,12 @@ func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerO func (m *kubeGenericRuntimeManager) getKubeletContainers(allContainers bool) ([]*runtimeapi.Container, error) { filter := &runtimeapi.ContainerFilter{} if !allContainers { - runningState := runtimeapi.ContainerState_CONTAINER_RUNNING filter.State = &runtimeapi.ContainerStateValue{ - State: runningState, + State: runtimeapi.ContainerState_CONTAINER_RUNNING, } } - containers, err := m.getContainersHelper(filter) + containers, err := m.runtimeService.ListContainers(filter) if err != nil { glog.Errorf("getKubeletContainers failed: %v", err) return nil, err @@ -352,16 +351,6 @@ func (m *kubeGenericRuntimeManager) getKubeletContainers(allContainers bool) ([] return containers, nil } -// getContainers lists containers by filter. -func (m *kubeGenericRuntimeManager) getContainersHelper(filter *runtimeapi.ContainerFilter) ([]*runtimeapi.Container, error) { - resp, err := m.runtimeService.ListContainers(filter) - if err != nil { - return nil, err - } - - return resp, err -} - // makeUID returns a randomly generated string. func makeUID() string { return fmt.Sprintf("%08x", rand.Uint32()) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go index fe8f1465945d..063a89d6b310 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go @@ -32,17 +32,17 @@ import ( // containerGC is the manager of garbage collection. type containerGC struct { - client internalapi.RuntimeService - manager *kubeGenericRuntimeManager - podGetter podGetter + client internalapi.RuntimeService + manager *kubeGenericRuntimeManager + podDeletionProvider podDeletionProvider } // NewContainerGC creates a new containerGC. -func NewContainerGC(client internalapi.RuntimeService, podGetter podGetter, manager *kubeGenericRuntimeManager) *containerGC { +func NewContainerGC(client internalapi.RuntimeService, podDeletionProvider podDeletionProvider, manager *kubeGenericRuntimeManager) *containerGC { return &containerGC{ - client: client, - manager: manager, - podGetter: podGetter, + client: client, + manager: manager, + podDeletionProvider: podDeletionProvider, } } @@ -52,8 +52,6 @@ type containerGCInfo struct { id string // The name of the container. name string - // The sandbox ID which this container belongs to - sandboxID string // Creation time for the container. createTime time.Time } @@ -159,12 +157,6 @@ func (cgc *containerGC) removeSandbox(sandboxID string) { } } -// isPodDeleted returns true if the pod is already deleted. -func (cgc *containerGC) isPodDeleted(podUID types.UID) bool { - _, found := cgc.podGetter.GetPodByUID(podUID) - return !found -} - // evictableContainers gets all containers that are evictable. Evictable containers are: not running // and created more than MinAge ago. func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) { @@ -191,7 +183,6 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE id: container.Id, name: container.Metadata.Name, createTime: createdAt, - sandboxID: container.PodSandboxId, } key := evictUnit{ uid: labeledInfo.PodUID, @@ -219,7 +210,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy // Remove deleted pod containers if all sources are ready. if allSourcesReady { for key, unit := range evictUnits { - if cgc.isPodDeleted(key.uid) || evictNonDeletedPods { + if cgc.podDeletionProvider.IsPodDeleted(key.uid) || evictNonDeletedPods { cgc.removeOldestN(unit, len(unit)) // Remove all. delete(evictUnits, key) } @@ -307,7 +298,7 @@ func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error { } for podUID, sandboxes := range sandboxesByPod { - if cgc.isPodDeleted(podUID) || evictNonDeletedPods { + if cgc.podDeletionProvider.IsPodDeleted(podUID) || evictNonDeletedPods { // Remove all evictable sandboxes if the pod has been removed. // Note that the latest dead sandbox is also removed if there is // already an active one. @@ -333,7 +324,7 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error { for _, dir := range dirs { name := dir.Name() podUID := types.UID(name) - if !cgc.isPodDeleted(podUID) { + if !cgc.podDeletionProvider.IsPodDeleted(podUID) { continue } err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name)) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go index b32cd9428464..389b1e52cce0 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go @@ -34,11 +34,11 @@ func TestSandboxGC(t *testing.T) { fakeRuntime, _, m, err := createTestRuntimeManager() assert.NoError(t, err) - fakePodGetter := m.containerGC.podGetter.(*fakePodGetter) + podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider) makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodGetter bool, createdAt int64) sandboxTemplate { if withPodGetter { // initialize the pod getter - fakePodGetter.pods[pod.UID] = pod + podDeletionProvider.pods[pod.UID] = struct{}{} } return sandboxTemplate{ pod: pod, @@ -162,13 +162,13 @@ func TestContainerGC(t *testing.T) { fakeRuntime, _, m, err := createTestRuntimeManager() assert.NoError(t, err) - fakePodGetter := m.containerGC.podGetter.(*fakePodGetter) + podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider) makeGCContainer := func(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate { container := makeTestContainer(containerName, "test-image") pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container}) if podName != "deleted" { // initialize the pod getter, explicitly exclude deleted pod - fakePodGetter.pods[pod.UID] = pod + podDeletionProvider.pods[pod.UID] = struct{}{} } return containerTemplate{ pod: pod, @@ -361,11 +361,11 @@ func TestPodLogDirectoryGC(t *testing.T) { _, _, m, err := createTestRuntimeManager() assert.NoError(t, err) fakeOS := m.osInterface.(*containertest.FakeOS) - fakePodGetter := m.containerGC.podGetter.(*fakePodGetter) + podDeletionProvider := m.containerGC.podDeletionProvider.(*fakePodDeletionProvider) // pod log directories without corresponding pods should be removed. - fakePodGetter.pods["123"] = makeTestPod("foo1", "new", "123", nil) - fakePodGetter.pods["456"] = makeTestPod("foo2", "new", "456", nil) + podDeletionProvider.pods["123"] = struct{}{} + podDeletionProvider.pods["456"] = struct{}{} files := []string{"123", "456", "789", "012"} removed := []string{filepath.Join(podLogsRootDirectory, "789"), filepath.Join(podLogsRootDirectory, "012")} diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 93511729b2ac..d852c9a63100 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -64,9 +64,9 @@ var ( ErrVersionNotSupported = errors.New("Runtime api version is not supported") ) -// A subset of the pod.Manager interface extracted for garbage collection purposes. -type podGetter interface { - GetPodByUID(kubetypes.UID) (*v1.Pod, bool) +// podDeletionProvider can determine if a pod is deleted +type podDeletionProvider interface { + IsPodDeleted(kubetypes.UID) bool } type kubeGenericRuntimeManager struct { @@ -119,7 +119,7 @@ func NewKubeGenericRuntimeManager( livenessManager proberesults.Manager, containerRefManager *kubecontainer.RefManager, machineInfo *cadvisorapi.MachineInfo, - podGetter podGetter, + podDeletionProvider podDeletionProvider, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, httpClient types.HttpGetter, @@ -182,7 +182,7 @@ func NewKubeGenericRuntimeManager( imagePullQPS, imagePullBurst) kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager) - kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podGetter, kubeRuntimeManager) + kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podDeletionProvider, kubeRuntimeManager) kubeRuntimeManager.versionCache = cache.NewObjectCache( func() (interface{}, error) { diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/fake_rkt_interface_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/fake_rkt_interface_test.go index 2684fad214a4..19b3665685df 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/fake_rkt_interface_test.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/fake_rkt_interface_test.go @@ -28,7 +28,6 @@ import ( "google.golang.org/grpc" "k8s.io/apimachinery/pkg/types" kubetypes "k8s.io/apimachinery/pkg/types" - "k8s.io/kubernetes/pkg/api/v1" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) @@ -179,17 +178,19 @@ func (f *fakeRktCli) Reset() { f.err = nil } -type fakePodGetter struct { - pods map[types.UID]*v1.Pod +type fakePodDeletionProvider struct { + pods map[types.UID]struct{} } -func newFakePodGetter() *fakePodGetter { - return &fakePodGetter{pods: make(map[types.UID]*v1.Pod)} +func newFakePodDeletionProvider() *fakePodDeletionProvider { + return &fakePodDeletionProvider{ + pods: make(map[types.UID]struct{}), + } } -func (f fakePodGetter) GetPodByUID(uid types.UID) (*v1.Pod, bool) { - p, found := f.pods[uid] - return p, found +func (f *fakePodDeletionProvider) IsPodDeleted(uid types.UID) bool { + _, found := f.pods[uid] + return !found } type fakeUnitGetter struct { diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go index d16c92db311a..a366950675a3 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt.go @@ -158,7 +158,7 @@ type Runtime struct { dockerKeyring credentialprovider.DockerKeyring containerRefManager *kubecontainer.RefManager - podGetter podGetter + podDeletionProvider podDeletionProvider runtimeHelper kubecontainer.RuntimeHelper recorder record.EventRecorder livenessManager proberesults.Manager @@ -199,9 +199,9 @@ type podServiceDirective struct { var _ kubecontainer.Runtime = &Runtime{} var _ kubecontainer.DirectStreamingRuntime = &Runtime{} -// TODO(yifan): This duplicates the podGetter in dockertools. -type podGetter interface { - GetPodByUID(kubetypes.UID) (*v1.Pod, bool) +// podDeletionProvider can determine if a pod is deleted +type podDeletionProvider interface { + IsPodDeleted(kubetypes.UID) bool } // cliInterface wrapps the command line calls for testing purpose. @@ -226,7 +226,7 @@ func New( runtimeHelper kubecontainer.RuntimeHelper, recorder record.EventRecorder, containerRefManager *kubecontainer.RefManager, - podGetter podGetter, + podDeletionProvider podDeletionProvider, livenessManager proberesults.Manager, httpClient types.HttpGetter, networkPlugin network.NetworkPlugin, @@ -283,7 +283,7 @@ func New( config: config, dockerKeyring: credentialprovider.NewDockerKeyring(), containerRefManager: containerRefManager, - podGetter: podGetter, + podDeletionProvider: podDeletionProvider, runtimeHelper: runtimeHelper, recorder: recorder, livenessManager: livenessManager, @@ -1988,8 +1988,7 @@ func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSo removeCandidates = append(removeCandidates, pod) continue } - _, found := r.podGetter.GetPodByUID(uid) - if !found && allSourcesReady { + if r.podDeletionProvider.IsPodDeleted(uid) && allSourcesReady { removeCandidates = append(removeCandidates, pod) continue } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt_test.go index 16c661733bd6..a675fbffcbb7 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt_test.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/rkt/rkt_test.go @@ -1633,7 +1633,7 @@ func TestGarbageCollect(t *testing.T) { fs := newFakeSystemd() cli := newFakeRktCli() fakeOS := kubetesting.NewFakeOS() - getter := newFakePodGetter() + deletionProvider := newFakePodDeletionProvider() fug := newfakeUnitGetter() frh := &containertesting.FakeRuntimeHelper{} @@ -1641,7 +1641,7 @@ func TestGarbageCollect(t *testing.T) { os: fakeOS, cli: cli, apisvc: fr, - podGetter: getter, + podDeletionProvider: deletionProvider, systemd: fs, containerRefManager: kubecontainer.NewRefManager(), unitGetter: fug, @@ -1827,7 +1827,7 @@ func TestGarbageCollect(t *testing.T) { fr.pods = tt.pods for _, p := range tt.apiPods { - getter.pods[p.UID] = p + deletionProvider.pods[p.UID] = struct{}{} } allSourcesReady := true @@ -1859,7 +1859,7 @@ func TestGarbageCollect(t *testing.T) { ctrl.Finish() fakeOS.Removes = []string{} fs.resetFailedUnits = []string{} - getter.pods = make(map[kubetypes.UID]*v1.Pod) + deletionProvider.pods = make(map[kubetypes.UID]struct{}) } } From ee7ec54b41a34eefaa550ef3672830d946890dad Mon Sep 17 00:00:00 2001 From: Joel Smith Date: Fri, 13 Oct 2017 10:09:23 -0600 Subject: [PATCH 6/6] UPSTREAM: 53857: kubelet sync pod throws more detailed events --- vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go | 7 +++++++ vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go | 11 +++++++++++ .../pkg/kubelet/kuberuntime/kuberuntime_container.go | 1 + .../pkg/kubelet/kuberuntime/kuberuntime_manager.go | 5 +++++ vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go | 11 ++++------- 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go index 3c0ee08b3e3a..926938263dcf 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/events/event.go @@ -32,6 +32,12 @@ const ( BackOffStartContainer = "BackOff" ExceededGracePeriod = "ExceededGracePeriod" + // Pod event reason list + FailedToKillPod = "FailedKillPod" + FailedToCreatePodContainer = "FailedCreatePodContainer" + FailedToMakePodDataDirectories = "Failed" + NetworkNotReady = "NetworkNotReady" + // Image event reason list PullingImage = "Pulling" PulledImage = "Pulled" @@ -69,6 +75,7 @@ const ( UnsupportedMountOption = "UnsupportedMountOption" SandboxChanged = "SandboxChanged" FailedCreatePodSandBox = "FailedCreatePodSandBox" + FailedStatusPodSandBox = "FailedPodSandBoxStatus" // Image manager event reason list InvalidDiskCapacity = "InvalidDiskCapacity" diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go index 7cfd4c23f6af..2d2b2e79eaad 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go @@ -1394,6 +1394,10 @@ func (kl *Kubelet) GetClusterDNS(pod *v1.Pod) ([]string, []string, bool, error) // // If any step of this workflow errors, the error is returned, and is repeated // on the next syncPod call. +// +// This operation writes all events that are dispatched in order to provide +// the most accurate information possible about an error situation to aid debugging. +// Callers should not throw an event if this operation returns an error. func (kl *Kubelet) syncPod(o syncPodOptions) error { // pull out the required options pod := o.pod @@ -1411,6 +1415,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { kl.statusManager.SetPodStatus(pod, apiPodStatus) // we kill the pod with the specified grace period since this is a termination if err := kl.killPod(pod, nil, podStatus, killPodOptions.PodTerminationGracePeriodSecondsOverride); err != nil { + kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err) // there was an error killing the pod, so we return that error directly utilruntime.HandleError(err) return err @@ -1477,6 +1482,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == v1.PodFailed { var syncErr error if err := kl.killPod(pod, nil, podStatus, nil); err != nil { + kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err) syncErr = fmt.Errorf("error killing pod: %v", err) utilruntime.HandleError(syncErr) } else { @@ -1491,6 +1497,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { // If the network plugin is not ready, only start the pod if it uses the host network if rs := kl.runtimeState.networkErrors(); len(rs) != 0 && !kubecontainer.IsHostNetworkPod(pod) { + kl.recorder.Eventf(pod, v1.EventTypeWarning, events.NetworkNotReady, "network is not ready: %v", rs) return fmt.Errorf("network is not ready: %v", rs) } @@ -1533,6 +1540,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { glog.V(2).Infof("Failed to update QoS cgroups while syncing pod: %v", err) } if err := pcm.EnsureExists(pod); err != nil { + kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToCreatePodContainer, "unable to ensure pod container exists: %v", err) return fmt.Errorf("failed to ensure that the pod: %v cgroups exist and are correctly applied: %v", pod.UID, err) } } @@ -1565,6 +1573,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { // Make data directories for the pod if err := kl.makePodDataDirs(pod); err != nil { + kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToMakePodDataDirectories, "error making pod data directories: %v", err) glog.Errorf("Unable to make pod data directories for pod %q: %v", format.Pod(pod), err) return err } @@ -1586,6 +1595,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { result := kl.containerRuntime.SyncPod(pod, apiPodStatus, podStatus, pullSecrets, kl.backOff) kl.reasonCache.Update(pod.UID, result) if err := result.Error(); err != nil { + // Do not record an event here, as we keep all event logging for sync pod failures + // local to container runtime so we get better errors return err } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go index 31f0b8ed9ffe..1f27afb33154 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -89,6 +89,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb // Step 1: pull the image. imageRef, msg, err := m.imagePuller.EnsureImageExists(pod, container, pullSecrets) if err != nil { + m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err)) return msg, err } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index d852c9a63100..7e5d1d0d2937 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -619,6 +619,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStat podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID) if err != nil { + ref, err := ref.GetReference(api.Scheme, pod) + if err != nil { + glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err) + } + m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err) glog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod)) result.Fail(err) return diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go b/vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go index ea2ace249cf3..bf5b50e4600b 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/pod_workers.go @@ -162,6 +162,9 @@ func (p *podWorkers) managePodLoop(podUpdates <-chan UpdatePodOptions) { // the previous sync. status, err := p.podCache.GetNewerThan(podUID, lastSyncTime) if err != nil { + // This is the legacy event thrown by manage pod loop + // all other events are now dispatched from syncPodFn + p.recorder.Eventf(update.Pod, v1.EventTypeWarning, events.FailedSync, "error determining status: %v", err) return err } err = p.syncPodFn(syncPodOptions{ @@ -179,14 +182,8 @@ func (p *podWorkers) managePodLoop(podUpdates <-chan UpdatePodOptions) { update.OnCompleteFunc(err) } if err != nil { + // IMPORTANT: we do not log errors here, the syncPodFn is responsible for logging errors glog.Errorf("Error syncing pod %s (%q), skipping: %v", update.Pod.UID, format.Pod(update.Pod), err) - // if we failed sync, we throw more specific events for why it happened. - // as a result, i question the value of this event. - // TODO: determine if we can remove this in a future release. - // do not include descriptive text that can vary on why it failed so in a pathological - // scenario, kubelet does not create enough discrete events that miss default aggregation - // window. - p.recorder.Eventf(update.Pod, v1.EventTypeWarning, events.FailedSync, "Error syncing pod") } p.wrapUp(update.Pod.UID, err) }