This repository has been archived by the owner on Jun 20, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 670
Fix 2797 on Kubernetes - Cluster goes down because all IPs become unreachable #3149
Merged
Merged
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
f431ee2
Refactor: create config in main function
bboreham ed8d92c
RBAC to read and write ConfigMaps
bboreham 71b3948
Maintain a list of peers in a Kubernetes annotation
bboreham 497ddda
Improve config locking by using optimistic locking strategy
267bd8b
Pseudo-code to clean up dead peers
bboreham a552f19
Code to reclaim IP address space from removed peers
bboreham 6a0e39e
Minor refactor for clarity
cb0c9e5
Document: Add file-level docstrings for kube-peers
fd2f91e
Refactor: Improve naming
3afb8d3
Add log messages to detect dying peer edge case
70a13c9
Make kube-peers logging consistent and add -log-level flag
bboreham 3501ad4
Cope with a Kubernetes node being deleted and coming back
bboreham c67bc70
Temporarily raise kube-peers log level to debug
bboreham 49ed40a
Add test function to recover clean output from command run on remote …
987ac98
Add up command to integration tests to make launching cluster easier
90f4f7b
Add test to ensure that weave recovers unreachable IPs on launch
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
/* | ||
In order to keep track of active weave peers, we use annotations on the Kubernetes cluster. | ||
|
||
Kubernetes uses etcd to distribute and synchronise these annotations so we don't have to. | ||
|
||
This module deals with operations on the peerlist backed by Kubernetes' annotation mechanism. | ||
*/ | ||
package main | ||
|
||
import ( | ||
"encoding/json" | ||
"log" | ||
"time" | ||
|
||
"github.com/pkg/errors" | ||
|
||
v1 "k8s.io/api/core/v1" | ||
kubeErrors "k8s.io/apimachinery/pkg/api/errors" | ||
api "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
wait "k8s.io/apimachinery/pkg/util/wait" | ||
kubernetes "k8s.io/client-go/kubernetes" | ||
corev1client "k8s.io/client-go/kubernetes/typed/core/v1" | ||
) | ||
|
||
type configMapAnnotations struct { | ||
ConfigMapName string | ||
Namespace string | ||
Client corev1client.ConfigMapsGetter | ||
cm *v1.ConfigMap | ||
} | ||
|
||
func newConfigMapAnnotations(ns string, configMapName string, clientset *kubernetes.Clientset) *configMapAnnotations { | ||
return &configMapAnnotations{ | ||
Namespace: ns, | ||
ConfigMapName: configMapName, | ||
Client: clientset.CoreV1(), | ||
} | ||
} | ||
|
||
type peerList struct { | ||
Peers []peerInfo | ||
} | ||
|
||
type peerInfo struct { | ||
PeerName string // Weave internal unique ID | ||
NodeName string // Kubernetes node name | ||
} | ||
|
||
func (pl peerList) contains(peerName string) bool { | ||
for _, peer := range pl.Peers { | ||
if peer.PeerName == peerName { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
func (pl *peerList) add(peerName string, name string) { | ||
pl.Peers = append(pl.Peers, peerInfo{PeerName: peerName, NodeName: name}) | ||
} | ||
|
||
func (pl *peerList) remove(peerNameToRemove string) { | ||
for i := 0; i < len(pl.Peers); { | ||
if pl.Peers[i].PeerName == peerNameToRemove { | ||
pl.Peers = append(pl.Peers[:i], pl.Peers[i+1:]...) | ||
} else { | ||
i++ | ||
} | ||
} | ||
} | ||
|
||
const ( | ||
retryPeriod = time.Second * 2 | ||
jitterFactor = 1.0 | ||
|
||
// KubePeersAnnotationKey is the default annotation key | ||
KubePeersAnnotationKey = "kube-peers.weave.works/peers" | ||
) | ||
|
||
func (cml *configMapAnnotations) Init() error { | ||
for { | ||
// Since it's potentially racy to GET, then CREATE if not found, we wrap in a check loop | ||
// so that if the configmap is created after our GET but before or CREATE, we'll gracefully | ||
// re-try to get the configmap. | ||
var err error | ||
cml.cm, err = cml.Client.ConfigMaps(cml.Namespace).Get(cml.ConfigMapName, api.GetOptions{}) | ||
if err != nil { | ||
if !kubeErrors.IsNotFound(err) { | ||
return errors.Wrapf(err, "Unable to fetch ConfigMap %s/%s", cml.Namespace, cml.ConfigMapName) | ||
} | ||
cml.cm, err = cml.Client.ConfigMaps(cml.Namespace).Create(&v1.ConfigMap{ | ||
ObjectMeta: api.ObjectMeta{ | ||
Name: cml.ConfigMapName, | ||
Namespace: cml.Namespace, | ||
}, | ||
}) | ||
if err != nil { | ||
if kubeErrors.IsAlreadyExists(err) { | ||
continue | ||
} | ||
return errors.Wrapf(err, "Unable to create ConfigMap %s/%s", cml.Namespace, cml.ConfigMapName) | ||
} | ||
} | ||
break | ||
} | ||
if cml.cm.Annotations == nil { | ||
cml.cm.Annotations = make(map[string]string) | ||
} | ||
return nil | ||
} | ||
|
||
func (cml *configMapAnnotations) GetPeerList() (*peerList, error) { | ||
var record peerList | ||
if cml.cm == nil { | ||
return nil, errors.New("endpoint not initialized, call Init first") | ||
} | ||
if recordBytes, found := cml.cm.Annotations[KubePeersAnnotationKey]; found { | ||
if err := json.Unmarshal([]byte(recordBytes), &record); err != nil { | ||
return nil, err | ||
} | ||
} | ||
return &record, nil | ||
} | ||
|
||
func (cml *configMapAnnotations) UpdatePeerList(list peerList) error { | ||
recordBytes, err := json.Marshal(list) | ||
if err != nil { | ||
return err | ||
} | ||
return cml.UpdateAnnotation(KubePeersAnnotationKey, string(recordBytes)) | ||
} | ||
|
||
func (cml *configMapAnnotations) UpdateAnnotation(key, value string) error { | ||
if cml.cm == nil { | ||
return errors.New("endpoint not initialized, call Init first") | ||
} | ||
cm := cml.cm | ||
cm.Annotations[key] = value | ||
cm, err := cml.Client.ConfigMaps(cml.Namespace).Update(cml.cm) | ||
if err == nil { | ||
cml.cm = cm | ||
} | ||
return err | ||
} | ||
|
||
func (cml *configMapAnnotations) RemoveAnnotation(key string) error { | ||
if cml.cm == nil { | ||
return errors.New("endpoint not initialized, call Init first") | ||
} | ||
cm := cml.cm | ||
delete(cm.Annotations, key) | ||
cm, err := cml.Client.ConfigMaps(cml.Namespace).Update(cml.cm) | ||
if err == nil { | ||
cml.cm = cm | ||
} | ||
return err | ||
} | ||
|
||
func (cml *configMapAnnotations) RemoveAnnotationsWithValue(valueToRemove string) error { | ||
if cml.cm == nil { | ||
return errors.New("endpoint not initialized, call Init first") | ||
} | ||
cm := cml.cm | ||
for key, value := range cm.Annotations { | ||
if value == valueToRemove { | ||
delete(cm.Annotations, key) | ||
} | ||
} | ||
cm, err := cml.Client.ConfigMaps(cml.Namespace).Update(cml.cm) | ||
if err == nil { | ||
cml.cm = cm | ||
} | ||
return err | ||
} | ||
|
||
// Loop with jitter, fetching the cml data and calling f() until it | ||
// doesn't get an optimistic locking conflict. | ||
// If it succeeds or gets any other kind of error, stop the loop. | ||
func (cml *configMapAnnotations) LoopUpdate(f func() error) error { | ||
stop := make(chan struct{}) | ||
var err error | ||
wait.JitterUntil(func() { | ||
if err = cml.Init(); err != nil { | ||
close(stop) | ||
return | ||
} | ||
err = f() | ||
if err != nil && kubeErrors.IsConflict(err) { | ||
This comment was marked as abuse.
Sorry, something went wrong.
This comment was marked as abuse.
Sorry, something went wrong.
This comment was marked as abuse.
Sorry, something went wrong.
This comment was marked as abuse.
Sorry, something went wrong. |
||
log.Printf("Optimistic locking conflict: trying again: %s", err) | ||
return | ||
} | ||
close(stop) | ||
}, retryPeriod, jitterFactor, true, stop) | ||
return err | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This comment was marked as abuse.
Sorry, something went wrong.