diff --git a/bin/helm-build b/bin/helm-build index 4f4b9a477fbab..f4be94e7a26e4 100755 --- a/bin/helm-build +++ b/bin/helm-build @@ -20,6 +20,7 @@ bindir=$( cd "${BASH_SOURCE[0]%/*}" && pwd ) rootdir=$( cd "$bindir"/.. && pwd ) "$bindir"/helm lint "$rootdir"/charts/linkerd2-multicluster +"$bindir"/helm lint "$rootdir"/charts/linkerd2-multicluster-link "$bindir"/helm lint "$rootdir"/charts/partials "$bindir"/helm dep up "$rootdir"/charts/linkerd2-cni "$bindir"/helm lint "$rootdir"/charts/linkerd2-cni @@ -50,6 +51,7 @@ if [ "$1" = package ]; then "$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2 "$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2-cni "$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2-multicluster + "$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2-multicluster-link mv "$rootdir"/target/helm/index-pre.yaml "$rootdir"/target/helm/index-pre-"$version".yaml "$bindir"/helm repo index --url "https://helm.linkerd.io/$repo/" --merge "$rootdir"/target/helm/index-pre-"$version".yaml "$rootdir"/target/helm diff --git a/charts/linkerd2-multicluster-link/.helmignore b/charts/linkerd2-multicluster-link/.helmignore new file mode 100644 index 0000000000000..79c90a8063116 --- /dev/null +++ b/charts/linkerd2-multicluster-link/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +OWNERS +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/linkerd2-multicluster-link/Chart.yaml b/charts/linkerd2-multicluster-link/Chart.yaml new file mode 100644 index 0000000000000..c6ec380a9b727 --- /dev/null +++ b/charts/linkerd2-multicluster-link/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +appVersion: edge-XX.X.X +description: A helm chart containing the resources to enable mirroring of services from a remote cluster +kubeVersion: ">=1.13.0-0" +icon: https://linkerd.io/images/logo-only-200h.png +name: "linkerd2-multicluster-link" +version: 0.1.0 diff --git a/charts/linkerd2-multicluster-link/README.md b/charts/linkerd2-multicluster-link/README.md new file mode 100644 index 0000000000000..3500c5285444a --- /dev/null +++ b/charts/linkerd2-multicluster-link/README.md @@ -0,0 +1,40 @@ + +# Linkerd2-multicluster-link Helm Chart + +Linkerd is a *service mesh*, designed to give platform-wide observability, +reliability, and security without requiring configuration or code changes. This +chart provides the components needed to enable communication between clusters. + +## Configuration + +The following table lists the configurable parameters of the +linkerd2-multicluster chart and their default values. + +| Parameter | Description | Default | +|---------------------------------|---------------------------------------------------------------------------------------------|----------------------------------------------| +|`controllerComponentLabel` | Control plane label. Do not edit |`linkerd.io/control-plane-component` | +|`controllerImage` | Docker image for the Service mirror component (uses the Linkerd controller image) |`gcr.io/linkerd-io/controller` | +|`controllerImageVersion` | Tag for the Service Mirror container Docker image |`latest version` | +|`createdByAnnotation` | Annotation label for the proxy create. Do not edit. |`linkerd.io/created-by` | +|`gateway` | If the gateway component should be installed |`true` | +|`gatewayLocalProbePath` | The path that will be used by the local liveness checks to ensure the gateway is alive |`/health-local` | +|`gatewayLocalProbePort` | The port that will be used by the local liveness checks to ensure the gateway is alive |`8888` | +|`gatewayName` | The name of the gateway that will be installed |`linkerd-gateway` | +|`gatewayNginxImage` | The Nginx image |`nginx` | +|`gatewayNginxImageVersion` | The version of the Nginx image |`1.17` | +|`gatewayPort` | The port on which all the gateway will accept incoming traffic |`4143` | +|`gatewayProbePath` | The path that will be used by remote clusters for determining whether the gateway is alive |`/health` | +|`gatewayProbePort` | The port used for liveliness probing |`4181` | +|`gatewayProbeSeconds` | The interval (in seconds) between liveness probes |`3` | +|`identityTrustDomain` | Trust domain used for identity of the existing linkerd installation |`cluster.local` | +|`installNamespace` | If the namespace should be installed |`true` | +|`linkerdNamespace` | The namespace of the existing Linkerd installation |`linkerd` | +|`linkerdVersion` | Control plane version | latest version | +|`namespace` | Service Mirror component namespace |`linkerd-multicluster` | +|`proxyOutboundPort` | The port on which the proxy accepts outbound traffic |`4140` | +|`remoteMirrorServiceAccountName` | The name of the service account used to allow remote clusters to mirror local services |`linkerd-service-mirror-remote-access-default`| +|`remoteMirrorServiceAccount` | If the remote mirror service account should be installed |`true` | +|`serviceMirror` | If the service mirror component should be installed |`true` | +|`logLevel` | Log level for the Multicluster components |`info` | +|`serviceMirrorRetryLimit` | Number of times update from the remote cluster is allowed to be requeued (retried) |`3` | +|`serviceMirrorUID` | User id under which the Service Mirror shall be ran |`2103` | diff --git a/charts/linkerd2-multicluster-link/templates/gateway-mirror.yaml b/charts/linkerd2-multicluster-link/templates/gateway-mirror.yaml new file mode 100644 index 0000000000000..7806d3e470760 --- /dev/null +++ b/charts/linkerd2-multicluster-link/templates/gateway-mirror.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: probe-gateway-{{.Values.targetClusterName}} + namespace: {{.Values.namespace}} + labels: + mirror.linkerd.io/mirrored-gateway: "true" + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} +spec: + ports: + - name: mc-probe + port: {{.Values.gatewayProbePort}} + protocol: TCP diff --git a/charts/linkerd2-multicluster/templates/service-mirror.yaml b/charts/linkerd2-multicluster-link/templates/service-mirror.yaml similarity index 59% rename from charts/linkerd2-multicluster/templates/service-mirror.yaml rename to charts/linkerd2-multicluster-link/templates/service-mirror.yaml index cd797bb7fc2e7..a337660d23881 100644 --- a/charts/linkerd2-multicluster/templates/service-mirror.yaml +++ b/charts/linkerd2-multicluster-link/templates/service-mirror.yaml @@ -1,11 +1,11 @@ -{{if .Values.serviceMirror -}} --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: linkerd-service-mirror-access-local-resources + name: linkerd-service-mirror-access-local-resources-{{.Values.targetClusterName}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} rules: - apiGroups: [""] resources: ["endpoints", "services"] @@ -17,72 +17,83 @@ rules: kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: linkerd-service-mirror-access-local-resources + name: linkerd-service-mirror-access-local-resources-{{.Values.targetClusterName}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: linkerd-service-mirror-access-local-resources + name: linkerd-service-mirror-access-local-resources-{{.Values.targetClusterName}} subjects: - kind: ServiceAccount - name: linkerd-service-mirror + name: linkerd-service-mirror-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} --- kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: linkerd-service-mirror-read-remote-creds + name: linkerd-service-mirror-read-remote-creds-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} rules: - apiGroups: [""] resources: ["secrets"] + resourceNames: ["cluster-credentials-{{.Values.targetClusterName}}"] + verbs: ["list", "get", "watch"] + - apiGroups: ["multicluster.linkerd.io"] + resources: ["links"] verbs: ["list", "get", "watch"] --- kind: RoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: linkerd-service-mirror-read-remote-creds + name: linkerd-service-mirror-read-remote-creds-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: linkerd-service-mirror-read-remote-creds + name: linkerd-service-mirror-read-remote-creds-{{.Values.targetClusterName}} subjects: - kind: ServiceAccount - name: linkerd-service-mirror + name: linkerd-service-mirror-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} --- kind: ServiceAccount apiVersion: v1 metadata: - name: linkerd-service-mirror + name: linkerd-service-mirror-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} --- apiVersion: apps/v1 kind: Deployment metadata: labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror - name: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} + name: linkerd-service-mirror-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} spec: replicas: 1 selector: matchLabels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} template: metadata: annotations: linkerd.io/inject: enabled labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} spec: containers: - args: @@ -90,6 +101,7 @@ spec: - -log-level={{.Values.logLevel}} - -event-requeue-limit={{.Values.serviceMirrorRetryLimit}} - -namespace={{.Values.namespace}} + - {{.Values.targetClusterName}} image: {{.Values.controllerImage}}:{{.Values.controllerImageVersion}} name: service-mirror securityContext: @@ -97,5 +109,4 @@ spec: ports: - containerPort: 9999 name: admin-http - serviceAccountName: linkerd-service-mirror -{{end -}} + serviceAccountName: linkerd-service-mirror-{{.Values.targetClusterName}} diff --git a/charts/linkerd2-multicluster-link/values.yaml b/charts/linkerd2-multicluster-link/values.yaml new file mode 100644 index 0000000000000..df34d0d83c42f --- /dev/null +++ b/charts/linkerd2-multicluster-link/values.yaml @@ -0,0 +1,9 @@ +controllerComponentLabel: linkerd.io/control-plane-component +controllerImage: gcr.io/linkerd-io/controller +controllerImageVersion: linkerdVersionValue +createdByAnnotation: linkerd.io/created-by +gatewayProbePort: 4181 +namespace: linkerd-multicluster +logLevel: info +serviceMirrorRetryLimit: 3 +serviceMirrorUID: 2103 diff --git a/charts/linkerd2-multicluster/Chart.yaml b/charts/linkerd2-multicluster/Chart.yaml index 377b564f958e4..0870b69269002 100644 --- a/charts/linkerd2-multicluster/Chart.yaml +++ b/charts/linkerd2-multicluster/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 appVersion: edge-XX.X.X -description: A helm chart containing the resources to enable mirroring of services on remote clusters +description: A helm chart containing the resources to support multicluster linking to remote clusters kubeVersion: ">=1.13.0-0" icon: https://linkerd.io/images/logo-only-200h.png name: "linkerd2-multicluster" -version: 0.1.0 \ No newline at end of file +version: 0.1.0 diff --git a/charts/linkerd2-multicluster/templates/link-crd.yaml b/charts/linkerd2-multicluster/templates/link-crd.yaml new file mode 100644 index 0000000000000..c2efffa956377 --- /dev/null +++ b/charts/linkerd2-multicluster/templates/link-crd.yaml @@ -0,0 +1,22 @@ +--- +### +### Link CRD +### +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: links.multicluster.linkerd.io + annotations: + {{.Values.createdByAnnotation}}: {{default (printf "linkerd/helm %s" .Values.linkerdVersion) .Values.cliVersion}} +spec: + group: multicluster.linkerd.io + versions: + - name: v1alpha1 + served: true + storage: true + scope: Namespaced + names: + plural: links + singular: link + kind: Link diff --git a/charts/linkerd2-multicluster/values.yaml b/charts/linkerd2-multicluster/values.yaml index 6d260c2416989..78d17be59e569 100644 --- a/charts/linkerd2-multicluster/values.yaml +++ b/charts/linkerd2-multicluster/values.yaml @@ -1,6 +1,3 @@ -controllerComponentLabel: linkerd.io/control-plane-component -controllerImage: gcr.io/linkerd-io/controller -controllerImageVersion: linkerdVersionValue createdByAnnotation: linkerd.io/created-by gateway: true gatewayLocalProbePath: /health-local @@ -12,15 +9,9 @@ gatewayPort: 4143 gatewayProbePath: /health gatewayProbePort: 4181 gatewayProbeSeconds: 3 -identityTrustDomain: cluster.local installNamespace: true -linkerdNamespace: linkerd linkerdVersion: linkerdVersionValue namespace: linkerd-multicluster proxyOutboundPort: 4140 -serviceMirror: true -logLevel: info -serviceMirrorRetryLimit: 3 -serviceMirrorUID: 2103 remoteMirrorServiceAccount: true remoteMirrorServiceAccountName: linkerd-service-mirror-remote-access-default diff --git a/cli/cmd/check.go b/cli/cmd/check.go index 3c16eed02c315..807ac167a1054 100644 --- a/cli/cmd/check.go +++ b/cli/cmd/check.go @@ -188,8 +188,7 @@ func configureAndRunChecks(wout io.Writer, werr io.Writer, stage string, options } checks = append(checks, healthcheck.LinkerdCNIPluginChecks) checks = append(checks, healthcheck.LinkerdHAChecks) - checks = append(checks, healthcheck.LinkerdMulticlusterSourceChecks) - checks = append(checks, healthcheck.LinkerdMulticlusterTargetChecks) + checks = append(checks, healthcheck.LinkerdMulticlusterChecks) checks = append(checks, healthcheck.AddOnCategories...) } @@ -208,8 +207,6 @@ func configureAndRunChecks(wout io.Writer, werr io.Writer, stage string, options RetryDeadline: time.Now().Add(options.wait), CNIEnabled: options.cniEnabled, InstallManifest: installManifest, - SourceCluster: options.multicluster, - TargetCluster: options.multicluster, }) success := runChecks(wout, werr, hc, options.output) diff --git a/cli/cmd/multicluster.go b/cli/cmd/multicluster.go index f6a50b9762ba7..4062c876ce669 100644 --- a/cli/cmd/multicluster.go +++ b/cli/cmd/multicluster.go @@ -18,6 +18,7 @@ import ( mccharts "github.com/linkerd/linkerd2/pkg/charts/multicluster" "github.com/linkerd/linkerd2/pkg/healthcheck" "github.com/linkerd/linkerd2/pkg/k8s" + mc "github.com/linkerd/linkerd2/pkg/multicluster" "github.com/linkerd/linkerd2/pkg/version" log "github.com/sirupsen/logrus" "github.com/spf13/cobra" @@ -33,10 +34,12 @@ import ( ) const ( - defaultMulticlusterNamespace = "linkerd-multicluster" - helmMulticlusterDefaultChartName = "linkerd2-multicluster" - tokenKey = "token" - defaultServiceAccountName = "linkerd-service-mirror-remote-access-default" + defaultMulticlusterNamespace = "linkerd-multicluster" + defaultGatewayName = "linkerd-gateway" + helmMulticlusterDefaultChartName = "linkerd2-multicluster" + helmMulticlusterLinkDefaultChartName = "linkerd2-multicluster-link" + tokenKey = "token" + defaultServiceAccountName = "linkerd-service-mirror-remote-access-default" ) type ( @@ -52,21 +55,23 @@ type ( gatewayProbeSeconds uint32 gatewayProbePort uint32 namespace string - serviceMirror bool - serviceMirrorRetryLimit uint32 - logLevel string gatewayNginxImage string gatewayNginxVersion string - controlPlaneVersion string dockerRegistry string remoteMirrorCredentials bool } linkOptions struct { - namespace string - clusterName string - apiServerAddress string - serviceAccountName string + namespace string + clusterName string + apiServerAddress string + serviceAccountName string + gatewayName string + gatewayNamespace string + serviceMirrorRetryLimit uint32 + logLevel string + controlPlaneVersion string + dockerRegistry string } exportServiceOptions struct { @@ -82,7 +87,7 @@ type ( ) func newMulticlusterInstallOptionsWithDefault() (*multiclusterInstallOptions, error) { - defaults, err := mccharts.NewValues() + defaults, err := mccharts.NewInstallValues() if err != nil { return nil, err } @@ -93,16 +98,26 @@ func newMulticlusterInstallOptionsWithDefault() (*multiclusterInstallOptions, er gatewayProbeSeconds: defaults.GatewayProbeSeconds, gatewayProbePort: defaults.GatewayProbePort, namespace: defaults.Namespace, - serviceMirror: defaults.ServiceMirror, - serviceMirrorRetryLimit: defaults.ServiceMirrorRetryLimit, - logLevel: defaults.LogLevel, gatewayNginxImage: defaults.GatewayNginxImage, gatewayNginxVersion: defaults.GatewayNginxImageVersion, - controlPlaneVersion: version.Version, dockerRegistry: defaultDockerRegistry, remoteMirrorCredentials: true, }, nil +} + +func newLinkOptionsWithDefault() (*linkOptions, error) { + defaults, err := mccharts.NewLinkValues() + if err != nil { + return nil, err + } + return &linkOptions{ + controlPlaneVersion: version.Version, + namespace: defaults.Namespace, + dockerRegistry: defaultDockerRegistry, + serviceMirrorRetryLimit: defaults.ServiceMirrorRetryLimit, + logLevel: defaults.LogLevel, + }, nil } func getLinkerdConfigMap() (*configPb.All, error) { @@ -119,15 +134,7 @@ func getLinkerdConfigMap() (*configPb.All, error) { return global, nil } -func buildMulticlusterInstallValues(opts *multiclusterInstallOptions) (*multicluster.Values, error) { - - global, err := getLinkerdConfigMap() - if err != nil { - if kerrors.IsNotFound(err) { - return nil, errors.New("you need Linkerd to be installed in order to install multicluster addons") - } - return nil, err - } +func buildServiceMirrorValues(opts *linkOptions) (*multicluster.Values, error) { if !alphaNumDashDot.MatchString(opts.controlPlaneVersion) { return nil, fmt.Errorf("%s is not a valid version", opts.controlPlaneVersion) @@ -145,13 +152,42 @@ func buildMulticlusterInstallValues(opts *multiclusterInstallOptions) (*multiclu return nil, fmt.Errorf("--log-level must be one of: panic, fatal, error, warn, info, debug") } - defaults, err := mccharts.NewValues() + defaults, err := mccharts.NewLinkValues() + if err != nil { + return nil, err + } + + defaults.TargetClusterName = opts.clusterName + defaults.Namespace = opts.namespace + defaults.ServiceMirrorRetryLimit = opts.serviceMirrorRetryLimit + defaults.LogLevel = opts.logLevel + defaults.ControllerImageVersion = opts.controlPlaneVersion + defaults.ControllerImage = fmt.Sprintf("%s/controller", opts.dockerRegistry) + + return defaults, nil +} + +func buildMulticlusterInstallValues(opts *multiclusterInstallOptions) (*multicluster.Values, error) { + + global, err := getLinkerdConfigMap() if err != nil { + if kerrors.IsNotFound(err) { + return nil, errors.New("you need Linkerd to be installed in order to install multicluster addons") + } return nil, err } - if opts.gatewayProbePort == defaults.GatewayLocalProbePort { - return nil, fmt.Errorf("The probe port needs to be different from %d which is the multicluster probe port", opts.gatewayProbePort) + if opts.namespace == "" { + return nil, errors.New("you need to specify a namespace") + } + + if opts.namespace == controlPlaneNamespace { + return nil, errors.New("you need to setup the multicluster addons in a namespace different than the Linkerd one") + } + + defaults, err := mccharts.NewInstallValues() + if err != nil { + return nil, err } defaults.Namespace = opts.namespace @@ -159,17 +195,12 @@ func buildMulticlusterInstallValues(opts *multiclusterInstallOptions) (*multiclu defaults.GatewayPort = opts.gatewayPort defaults.GatewayProbeSeconds = opts.gatewayProbeSeconds defaults.GatewayProbePort = opts.gatewayProbePort - defaults.ServiceMirror = opts.serviceMirror - defaults.ServiceMirrorRetryLimit = opts.serviceMirrorRetryLimit - defaults.LogLevel = opts.logLevel defaults.GatewayNginxImage = opts.gatewayNginxImage defaults.GatewayNginxImageVersion = opts.gatewayNginxVersion defaults.IdentityTrustDomain = global.Global.IdentityContext.TrustDomain defaults.LinkerdNamespace = controlPlaneNamespace defaults.ProxyOutboundPort = global.Proxy.OutboundPort.Port defaults.LinkerdVersion = version.Version - defaults.ControllerImageVersion = opts.controlPlaneVersion - defaults.ControllerImage = fmt.Sprintf("%s/controller", opts.dockerRegistry) defaults.RemoteMirrorServiceAccount = opts.remoteMirrorCredentials return defaults, nil @@ -194,7 +225,7 @@ func buildMulticlusterAllowValues(opts *allowOptions) (*mccharts.Values, error) return nil, errors.New("you need to setup the multicluster addons in a namespace different than the Linkerd one") } - defaults, err := mccharts.NewValues() + defaults, err := mccharts.NewInstallValues() if err != nil { return nil, err } @@ -336,8 +367,8 @@ func newMulticlusterInstallCommand() *cobra.Command { {Name: chartutil.ChartfileName}, {Name: "templates/namespace.yaml"}, {Name: "templates/gateway.yaml"}, - {Name: "templates/service-mirror.yaml"}, {Name: "templates/remote-access-service-mirror-rbac.yaml"}, + {Name: "templates/link-crd.yaml"}, } chart := &charts.Chart{ @@ -363,12 +394,8 @@ func newMulticlusterInstallCommand() *cobra.Command { cmd.Flags().Uint32Var(&options.gatewayPort, "gateway-port", options.gatewayPort, "The port on the gateway used for all incoming traffic") cmd.Flags().Uint32Var(&options.gatewayProbeSeconds, "gateway-probe-seconds", options.gatewayProbeSeconds, "The interval at which the gateway will be checked for being alive in seconds") cmd.Flags().Uint32Var(&options.gatewayProbePort, "gateway-probe-port", options.gatewayProbePort, "The liveness check port of the gateway") - cmd.Flags().BoolVar(&options.serviceMirror, "service-mirror", options.serviceMirror, "If the service-mirror component should be installed") - cmd.Flags().Uint32Var(&options.serviceMirrorRetryLimit, "service-mirror-retry-limit", options.serviceMirrorRetryLimit, "The number of times a failed update from the target cluster is allowed to be retried") - cmd.Flags().StringVar(&options.logLevel, "log-level", options.logLevel, "Log level for the Multicluster components") cmd.Flags().StringVar(&options.gatewayNginxImage, "gateway-nginx-image", options.gatewayNginxImage, "The nginx image to be used") cmd.Flags().StringVar(&options.gatewayNginxVersion, "gateway-nginx-image-version", options.gatewayNginxVersion, "The version of nginx to be used") - cmd.Flags().StringVarP(&options.controlPlaneVersion, "control-plane-version", "", options.controlPlaneVersion, "(Development) Tag to be used for the control plane component images") cmd.Flags().StringVar(&options.dockerRegistry, "registry", options.dockerRegistry, "Docker registry to pull images from") cmd.Flags().BoolVar(&options.remoteMirrorCredentials, "service-mirror-credentials", options.remoteMirrorCredentials, "Whether to install the service account which can be used by service mirror components in source clusters to discover exported servivces") @@ -387,7 +414,11 @@ func newMulticlusterInstallCommand() *cobra.Command { } func newLinkCommand() *cobra.Command { - opts := linkOptions{} + opts, err := newLinkOptionsWithDefault() + if err != nil { + fmt.Fprintf(os.Stderr, "%s", err) + os.Exit(1) + } cmd := &cobra.Command{ Use: "link", @@ -497,11 +528,93 @@ func newLinkCommand() *cobra.Command { }, } - out, err := yaml.Marshal(creds) + credsOut, err := yaml.Marshal(creds) + if err != nil { + return err + } + + gateway, err := k.CoreV1().Services(opts.gatewayNamespace).Get(opts.gatewayName, metav1.GetOptions{}) + if err != nil { + return err + } + + gatewayAddresses := []string{} + for _, ingress := range gateway.Status.LoadBalancer.Ingress { + gatewayAddresses = append(gatewayAddresses, ingress.IP) + } + if len(gatewayAddresses) == 0 { + return fmt.Errorf("Gateway %s.%s has no ingress addresses", gateway.Name, gateway.Namespace) + } + + gatewayIdentity, ok := gateway.Annotations[k8s.GatewayIdentity] + if !ok || gatewayIdentity == "" { + return fmt.Errorf("Gatway %s.%s has no %s annotation", gateway.Name, gateway.Namespace, k8s.GatewayIdentity) + } + + probeSpec, err := mc.ExtractProbeSpec(gateway) + if err != nil { + return err + } + + gatewayPort, err := extractGatewayPort(gateway) + if err != nil { + return err + } + + link := mc.Link{ + Name: opts.clusterName, + Namespace: opts.namespace, + TargetClusterName: opts.clusterName, + TargetClusterDomain: configMap.Global.ClusterDomain, + TargetClusterLinkerdNamespace: controlPlaneNamespace, + ClusterCredentialsSecret: fmt.Sprintf("cluster-credentials-%s", opts.clusterName), + GatewayAddress: strings.Join(gatewayAddresses, ","), + GatewayPort: gatewayPort, + GatewayIdentity: gatewayIdentity, + ProbeSpec: probeSpec, + } + + linkOut, err := yaml.Marshal(link.ToUnstructured().Object) if err != nil { return err } - fmt.Println(string(out)) + + values, err := buildServiceMirrorValues(opts) + + if err != nil { + return err + } + + // Render raw values and create chart config + rawValues, err := yaml.Marshal(values) + if err != nil { + return err + } + + files := []*chartutil.BufferedFile{ + {Name: chartutil.ChartfileName}, + {Name: "templates/service-mirror.yaml"}, + {Name: "templates/gateway-mirror.yaml"}, + } + + chart := &charts.Chart{ + Name: helmMulticlusterLinkDefaultChartName, + Dir: helmMulticlusterLinkDefaultChartName, + Namespace: controlPlaneNamespace, + RawValues: rawValues, + Files: files, + } + serviceMirrorOut, err := chart.RenderNoPartials() + if err != nil { + return err + } + + stdout.Write(credsOut) + stdout.Write([]byte("---\n")) + stdout.Write(linkOut) + stdout.Write([]byte("---\n")) + stdout.Write(serviceMirrorOut.Bytes()) + stdout.Write([]byte("---\n")) return nil }, @@ -511,6 +624,12 @@ func newLinkCommand() *cobra.Command { cmd.Flags().StringVar(&opts.clusterName, "cluster-name", "", "Cluster name") cmd.Flags().StringVar(&opts.apiServerAddress, "api-server-address", "", "The api server address of the target cluster") cmd.Flags().StringVar(&opts.serviceAccountName, "service-account-name", defaultServiceAccountName, "The name of the service account associated with the credentials") + cmd.Flags().StringVar(&opts.controlPlaneVersion, "control-plane-version", opts.controlPlaneVersion, "(Development) Tag to be used for the service mirror controller image") + cmd.Flags().StringVar(&opts.gatewayName, "gateway-name", defaultGatewayName, "The name of the gateway service") + cmd.Flags().StringVar(&opts.gatewayNamespace, "gateway-namespace", defaultMulticlusterNamespace, "The namespace of the gateway service") + cmd.Flags().Uint32Var(&opts.serviceMirrorRetryLimit, "service-mirror-retry-limit", opts.serviceMirrorRetryLimit, "The number of times a failed update from the target cluster is allowed to be retried") + cmd.Flags().StringVar(&opts.logLevel, "log-level", opts.logLevel, "Log level for the Multicluster components") + cmd.Flags().StringVar(&opts.dockerRegistry, "registry", opts.dockerRegistry, "Docker registry to pull service mirror controller image from") return cmd } @@ -796,14 +915,12 @@ func renderGateways(rows []*pb.GatewaysTable_Row, w io.Writer) { } var ( - gatewayNameHeader = "NAME" - gatewayNamespaceHeader = "NAMESPACE" - clusterNameHeader = "CLUSTER" - aliveHeader = "ALIVE" - pairedServicesHeader = "NUM_SVC" - latencyP50Header = "LATENCY_P50" - latencyP95Header = "LATENCY_P95" - latencyP99Header = "LATENCY_P99" + clusterNameHeader = "CLUSTER" + aliveHeader = "ALIVE" + pairedServicesHeader = "NUM_SVC" + latencyP50Header = "LATENCY_P50" + latencyP95Header = "LATENCY_P95" + latencyP99Header = "LATENCY_P99" ) func buildGatewaysTable() table.Table { @@ -814,18 +931,6 @@ func buildGatewaysTable() table.Table { Flexible: true, LeftAlign: true, }, - table.Column{ - Header: gatewayNamespaceHeader, - Width: 9, - Flexible: true, - LeftAlign: true, - }, - table.Column{ - Header: gatewayNameHeader, - Width: 4, - Flexible: true, - LeftAlign: true, - }, table.Column{ Header: aliveHeader, Width: 5, @@ -869,8 +974,6 @@ func gatewaysRowToTableRow(row *pb.GatewaysTable_Row) []string { } return []string{ row.ClusterName, - row.Namespace, - row.Name, alive, fmt.Sprint(row.PairedServices), valueOrPlaceholder(fmt.Sprintf("%dms", row.LatencyMsP50)), @@ -879,3 +982,12 @@ func gatewaysRowToTableRow(row *pb.GatewaysTable_Row) []string { } } + +func extractGatewayPort(gateway *corev1.Service) (uint32, error) { + for _, port := range gateway.Spec.Ports { + if port.Name == k8s.GatewayPortName { + return uint32(port.Port), nil + } + } + return 0, fmt.Errorf("gateway service %s has no gateway port named %s", gateway.Name, k8s.GatewayPortName) +} diff --git a/controller/api/public/gateways.go b/controller/api/public/gateways.go index 564a444b8be76..9d2c8b7d9da8f 100644 --- a/controller/api/public/gateways.go +++ b/controller/api/public/gateways.go @@ -54,7 +54,8 @@ func buildGatewaysRequestLabels(req *pb.GatewaysRequest) (labels model.LabelSet, return labels, groupBy } -// this function returns a map of gateways to the number of services using them +// this function returns a map of target cluster to the number of services mirrored +// from it func (s *grpcServer) getNumServicesMap() (map[string]uint64, error) { results := make(map[string]uint64) @@ -66,11 +67,7 @@ func (s *grpcServer) getNumServicesMap() (map[string]uint64, error) { for _, svc := range services.Items { clusterName := svc.Labels[k8s.RemoteClusterNameLabel] - gatewayName := svc.Labels[k8s.RemoteGatewayNameLabel] - gatewayNs := svc.Labels[k8s.RemoteGatewayNsLabel] - key := fmt.Sprintf("%s-%s-%s", clusterName, gatewayName, gatewayNs) - - results[key]++ + results[clusterName]++ } return results, nil @@ -83,20 +80,14 @@ func processPrometheusResult(results []promResult, numSvcMap map[string]uint64) for _, result := range results { for _, sample := range result.vec { - clusterName := sample.Metric[remoteClusterNameLabel] - gatewayName := sample.Metric[gatewayNameLabel] - gatewayNamespace := sample.Metric[gatewayNamespaceLabel] - numPairedSvc := numSvcMap[fmt.Sprintf("%s-%s-%s", clusterName, gatewayName, gatewayNamespace)] - - key := fmt.Sprintf("%s-%s-%s", clusterName, gatewayNamespace, gatewayName) + clusterName := string(sample.Metric[remoteClusterNameLabel]) + numPairedSvc := numSvcMap[clusterName] addRow := func() { - if rows[key] == nil { - rows[key] = &pb.GatewaysTable_Row{} - rows[key].ClusterName = string(clusterName) - rows[key].Name = string(gatewayName) - rows[key].Namespace = string(gatewayNamespace) - rows[key].PairedServices = numPairedSvc + if rows[clusterName] == nil { + rows[clusterName] = &pb.GatewaysTable_Row{} + rows[clusterName].ClusterName = clusterName + rows[clusterName].PairedServices = numPairedSvc } } @@ -105,16 +96,16 @@ func processPrometheusResult(results []promResult, numSvcMap map[string]uint64) switch result.prom { case promGatewayAlive: addRow() - rows[key].Alive = value > 0 + rows[clusterName].Alive = value > 0 case promLatencyP50: addRow() - rows[key].LatencyMsP50 = value + rows[clusterName].LatencyMsP50 = value case promLatencyP95: addRow() - rows[key].LatencyMsP95 = value + rows[clusterName].LatencyMsP95 = value case promLatencyP99: addRow() - rows[key].LatencyMsP99 = value + rows[clusterName].LatencyMsP99 = value } } } @@ -125,13 +116,11 @@ func processPrometheusResult(results []promResult, numSvcMap map[string]uint64) func (s *grpcServer) getGatewaysMetrics(ctx context.Context, req *pb.GatewaysRequest, timeWindow string) (map[string]*pb.GatewaysTable_Row, error) { labels, groupBy := buildGatewaysRequestLabels(req) - reqLabels := generateLabelStringWithExclusion(labels, string(gatewayNameLabel)) - promQueries := map[promType]string{ promGatewayAlive: gatewayAliveQuery, } - metricsResp, err := s.getPrometheusMetrics(ctx, promQueries, gatewayLatencyQuantileQuery, reqLabels, timeWindow, groupBy.String()) + metricsResp, err := s.getPrometheusMetrics(ctx, promQueries, gatewayLatencyQuantileQuery, labels.String(), timeWindow, groupBy.String()) if err != nil { return nil, err diff --git a/controller/cmd/service-mirror/cluster_watcher.go b/controller/cmd/service-mirror/cluster_watcher.go index 883ab190e36fe..890b34cfea9e1 100644 --- a/controller/cmd/service-mirror/cluster_watcher.go +++ b/controller/cmd/service-mirror/cluster_watcher.go @@ -1,15 +1,14 @@ package servicemirror import ( - "errors" "fmt" "net" - "strconv" "strings" "time" "github.com/linkerd/linkerd2/controller/k8s" consts "github.com/linkerd/linkerd2/pkg/k8s" + "github.com/linkerd/linkerd2/pkg/multicluster" "github.com/prometheus/client_golang/prometheus" logging "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" @@ -31,8 +30,7 @@ type ( // problems or general glitch in the Matrix. RemoteClusterServiceWatcher struct { serviceMirrorNamespace string - clusterName string - clusterDomain string + link *multicluster.Link remoteAPIClient *k8s.API localAPIClient *k8s.API stopper chan struct{} @@ -42,30 +40,10 @@ type ( repairPeriod time.Duration } - // ProbeConfig describes the configured probe on particular gateway (if presents) - ProbeConfig struct { - path string - port uint32 - periodInSeconds uint32 - } - - // GatewaySpec contains essential data about the gateway - GatewaySpec struct { - gatewayName string - gatewayNamespace string - clusterName string - addresses []corev1.EndpointAddress - incomingPort uint32 - resourceVersion string - identity string - *ProbeConfig - } - // RemoteServiceCreated is generated whenever a remote service is created Observing // this event means that the service in question is not mirrored atm RemoteServiceCreated struct { - service *corev1.Service - gatewayData gatewayMetadata + service *corev1.Service } // RemoteServiceUpdated is generated when we see something about an already @@ -76,7 +54,6 @@ type ( localService *corev1.Service localEndpoints *corev1.Endpoints remoteUpdate *corev1.Service - gatewayData gatewayMetadata } // RemoteServiceDeleted when a remote service is going away or it is not @@ -86,24 +63,7 @@ type ( Namespace string } - // RemoteGatewayDeleted is observed when a service that is a gateway is deleted - RemoteGatewayDeleted struct { - gatewayData gatewayMetadata - } - - // RemoteGatewayCreated is observed when a gateway service is created on the remote cluster - RemoteGatewayCreated struct { - gatewaySpec GatewaySpec - } - - // RemoteGatewayUpdated happens when a service that is updated. - RemoteGatewayUpdated struct { - gatewaySpec GatewaySpec - affectedServices []*corev1.Service - } - - // ClusterUnregistered is issued when the secret containing the remote cluster - // access information is deleted + // ClusterUnregistered is issued when this ClusterWatcher is shut down. ClusterUnregistered struct{} // OprhanedServicesGcTriggered is a self-triggered event which aims to delete any @@ -144,11 +104,6 @@ type ( // endpoints should be resolved based on the remote gateway and updated. RepairEndpoints struct{} - gatewayMetadata struct { - Name string - Namespace string - } - // RetryableError is an error that should be retried through requeuing events RetryableError struct{ Inner []error } ) @@ -161,26 +116,15 @@ func (re RetryableError) Error() string { return fmt.Sprintf("Inner errors:\n\t%s", strings.Join(errorStrings, "\n\t")) } -// When the gateway is resolved we need to produce a set of endpoint addresses that that -// contain the external IPs that this gateway exposes. Therefore we return the IP addresses -// as well as a single port on which the gateway is accessible. -func (rcsw *RemoteClusterServiceWatcher) resolveGateway(metadata *gatewayMetadata) (*GatewaySpec, error) { - gateway, err := rcsw.remoteAPIClient.Svc().Lister().Services(metadata.Namespace).Get(metadata.Name) - if err != nil { - return nil, err - } - return rcsw.extractGatewaySpec(gateway) -} - // NewRemoteClusterServiceWatcher constructs a new cluster watcher func NewRemoteClusterServiceWatcher( serviceMirrorNamespace string, localAPI *k8s.API, cfg *rest.Config, - clusterName string, + link *multicluster.Link, requeueLimit int, repairPeriod time.Duration, - clusterDomain string, + ) (*RemoteClusterServiceWatcher, error) { remoteAPI, err := k8s.InitializeAPIForConfig(cfg, false, k8s.Svc) if err != nil { @@ -189,8 +133,7 @@ func NewRemoteClusterServiceWatcher( stopper := make(chan struct{}) return &RemoteClusterServiceWatcher{ serviceMirrorNamespace: serviceMirrorNamespace, - clusterName: clusterName, - clusterDomain: clusterDomain, + link: link, remoteAPIClient: remoteAPI, localAPIClient: localAPI, stopper: stopper, @@ -205,26 +148,24 @@ func NewRemoteClusterServiceWatcher( } func (rcsw *RemoteClusterServiceWatcher) mirroredResourceName(remoteName string) string { - return fmt.Sprintf("%s-%s", remoteName, rcsw.clusterName) + return fmt.Sprintf("%s-%s", remoteName, rcsw.link.TargetClusterName) } func (rcsw *RemoteClusterServiceWatcher) originalResourceName(mirroredName string) string { - return strings.TrimSuffix(mirroredName, fmt.Sprintf("-%s", rcsw.clusterName)) + return strings.TrimSuffix(mirroredName, fmt.Sprintf("-%s", rcsw.link.TargetClusterName)) } -func (rcsw *RemoteClusterServiceWatcher) getMirroredServiceLabels(gatewayData *gatewayMetadata) map[string]string { +func (rcsw *RemoteClusterServiceWatcher) getMirroredServiceLabels() map[string]string { return map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - consts.RemoteGatewayNameLabel: gatewayData.Name, - consts.RemoteGatewayNsLabel: gatewayData.Namespace, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, } } func (rcsw *RemoteClusterServiceWatcher) getMirroredServiceAnnotations(remoteService *corev1.Service) map[string]string { return map[string]string{ consts.RemoteResourceVersionAnnotation: remoteService.ResourceVersion, // needed to detect real changes - consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.clusterDomain), + consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.link.TargetClusterDomain), } } @@ -239,7 +180,7 @@ func (rcsw *RemoteClusterServiceWatcher) mirrorNamespaceIfNecessary(namespace st ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, }, Name: namespace, }, @@ -261,13 +202,13 @@ func (rcsw *RemoteClusterServiceWatcher) mirrorNamespaceIfNecessary(namespace st // that we should send traffic to and create endpoint ports that bind to the mirrored service ports // (same name, etc) but send traffic to the gateway port. This way we do not need to do any remapping // on the service side of things. It all happens in the endpoints. -func (rcsw *RemoteClusterServiceWatcher) getEndpointsPorts(service *corev1.Service, gatewayPort int32) []corev1.EndpointPort { +func (rcsw *RemoteClusterServiceWatcher) getEndpointsPorts(service *corev1.Service) []corev1.EndpointPort { var endpointsPorts []corev1.EndpointPort for _, remotePort := range service.Spec.Ports { endpointsPorts = append(endpointsPorts, corev1.EndpointPort{ Name: remotePort.Name, Protocol: remotePort.Protocol, - Port: gatewayPort, + Port: int32(rcsw.link.GatewayPort), }) } return endpointsPorts @@ -276,7 +217,7 @@ func (rcsw *RemoteClusterServiceWatcher) getEndpointsPorts(service *corev1.Servi func (rcsw *RemoteClusterServiceWatcher) cleanupOrphanedServices() error { matchLabels := map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, } servicesOnLocalCluster, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector()) @@ -318,10 +259,7 @@ func (rcsw *RemoteClusterServiceWatcher) cleanupOrphanedServices() error { // created. This piece of code is responsible for doing just that. It takes care of // services, endpoints and namespaces (if needed) func (rcsw *RemoteClusterServiceWatcher) cleanupMirroredResources() error { - matchLabels := map[string]string{ - consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - } + matchLabels := rcsw.getMirroredServiceLabels() services, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector()) if err != nil { @@ -374,7 +312,7 @@ func (rcsw *RemoteClusterServiceWatcher) cleanupMirroredResources() error { // Deletes a locally mirrored service as it is not present on the remote cluster anymore func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceDeleted(ev *RemoteServiceDeleted) error { localServiceName := rcsw.mirroredResourceName(ev.Name) - rcsw.log.Infof("Deleting mirrored service %s/%s and its corresponding Endpoints", ev.Namespace, localServiceName) + rcsw.log.Infof("Deleting mirrored service %s/%s", ev.Namespace, localServiceName) var errors []error if err := rcsw.localAPIClient.Client.CoreV1().Services(ev.Namespace).Delete(localServiceName, &metav1.DeleteOptions{}); err != nil { if !kerrors.IsNotFound(err) { @@ -382,12 +320,6 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceDeleted(ev *RemoteSe } } - if err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ev.Namespace).Delete(localServiceName, &metav1.DeleteOptions{}); err != nil { - if !kerrors.IsNotFound(err) { - errors = append(errors, fmt.Errorf("could not delete Endpoints: %s/%s: %s", ev.Namespace, localServiceName, err)) - } - } - if len(errors) > 0 { return RetryableError{errors} } @@ -399,45 +331,29 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceDeleted(ev *RemoteSe // Updates a locally mirrored service. There might have been some pretty fundamental changes such as // new gateway being assigned or additional ports exposed. This method takes care of that. func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceUpdated(ev *RemoteServiceUpdated) error { - serviceInfo := fmt.Sprintf("%s/%s", ev.remoteUpdate.Namespace, ev.remoteUpdate.Name) rcsw.log.Infof("Updating mirror service %s/%s", ev.localService.Namespace, ev.localService.Name) - gatewaySpec, err := rcsw.resolveGateway(&ev.gatewayData) copiedEndpoints := ev.localEndpoints.DeepCopy() - if err == nil { - copiedEndpoints.Subsets = []corev1.EndpointSubset{ - { - Addresses: gatewaySpec.addresses, - Ports: rcsw.getEndpointsPorts(ev.remoteUpdate, int32(gatewaySpec.incomingPort)), - }, - } - - if gatewaySpec.identity != "" { - copiedEndpoints.Annotations[consts.RemoteGatewayIdentity] = gatewaySpec.identity - } else { - delete(copiedEndpoints.Annotations, consts.RemoteGatewayIdentity) - } + copiedEndpoints.Subsets = []corev1.EndpointSubset{ + { + Addresses: rcsw.resolveGatewayAddress(), + Ports: rcsw.getEndpointsPorts(ev.remoteUpdate), + }, + } - } else { - rcsw.log.Warnf("Could not resolve gateway for %s: %s, nulling endpoints", serviceInfo, err) - copiedEndpoints.Subsets = nil + if copiedEndpoints.Annotations == nil { + copiedEndpoints.Annotations = make(map[string]string) } - // we need to set the new name and ns data no matter whether they are valid or not - copiedEndpoints.Labels[consts.RemoteGatewayNameLabel] = ev.gatewayData.Name - copiedEndpoints.Labels[consts.RemoteGatewayNsLabel] = ev.gatewayData.Namespace + copiedEndpoints.Annotations[consts.RemoteGatewayIdentity] = rcsw.link.GatewayIdentity if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(copiedEndpoints.Namespace).Update(copiedEndpoints); err != nil { return RetryableError{[]error{err}} } - ev.localService.Labels = rcsw.getMirroredServiceLabels(&ev.gatewayData) + ev.localService.Labels = rcsw.getMirroredServiceLabels() ev.localService.Annotations = rcsw.getMirroredServiceAnnotations(ev.remoteUpdate) ev.localService.Spec.Ports = remapRemoteServicePorts(ev.remoteUpdate.Spec.Ports) - if gatewaySpec != nil { - ev.localService.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = gatewaySpec.resourceVersion - } - if _, err := rcsw.localAPIClient.Client.CoreV1().Services(ev.localService.Namespace).Update(ev.localService); err != nil { return RetryableError{[]error{err}} } @@ -473,7 +389,7 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceCreated(ev *RemoteSe Name: localServiceName, Namespace: remoteService.Namespace, Annotations: rcsw.getMirroredServiceAnnotations(remoteService), - Labels: rcsw.getMirroredServiceLabels(&ev.gatewayData), + Labels: rcsw.getMirroredServiceLabels(), }, Spec: corev1.ServiceSpec{ Ports: remapRemoteServicePorts(remoteService.Spec.Ports), @@ -486,40 +402,30 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceCreated(ev *RemoteSe Namespace: ev.service.Namespace, Labels: map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - consts.RemoteGatewayNameLabel: ev.gatewayData.Name, - consts.RemoteGatewayNsLabel: ev.gatewayData.Namespace, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, }, Annotations: map[string]string{ - consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.clusterDomain), + consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.link.TargetClusterDomain), }, }, } - // Now we try to resolve the remote gateway - gatewaySpec, err := rcsw.resolveGateway(&ev.gatewayData) - if err == nil { - // only if we resolve it, we are updating the endpoints addresses and ports - rcsw.log.Infof("Resolved gateway [%v:%d] for %s", gatewaySpec.addresses, gatewaySpec.incomingPort, serviceInfo) + gatewayAddress := rcsw.resolveGatewayAddress() + // only if we resolve it, we are updating the endpoints addresses and ports + rcsw.log.Infof("Resolved gateway [%v:%d] for %s", gatewayAddress, rcsw.link.GatewayPort, serviceInfo) - if len(gatewaySpec.addresses) > 0 { - endpointsToCreate.Subsets = []corev1.EndpointSubset{ - { - Addresses: gatewaySpec.addresses, - Ports: rcsw.getEndpointsPorts(ev.service, int32(gatewaySpec.incomingPort)), - }, - } - } else { - rcsw.log.Warnf("gateway for %s: %s does not have ready addresses, skipping subsets", serviceInfo, err) - } - serviceToCreate.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = gatewaySpec.resourceVersion - if gatewaySpec.identity != "" { - endpointsToCreate.Annotations[consts.RemoteGatewayIdentity] = gatewaySpec.identity + if len(gatewayAddress) > 0 { + endpointsToCreate.Subsets = []corev1.EndpointSubset{ + { + Addresses: gatewayAddress, + Ports: rcsw.getEndpointsPorts(ev.service), + }, } - } else { - rcsw.log.Infof("Could not resolve gateway for %s: %s, skipping subsets", serviceInfo, err) - endpointsToCreate.Subsets = nil + rcsw.log.Warnf("gateway for %s does not have ready addresses, skipping subsets", serviceInfo) + } + if rcsw.link.GatewayIdentity != "" { + endpointsToCreate.Annotations[consts.RemoteGatewayIdentity] = rcsw.link.GatewayIdentity } rcsw.log.Infof("Creating a new service mirror for %s", serviceInfo) @@ -540,274 +446,7 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceCreated(ev *RemoteSe return nil } -func (rcsw *RemoteClusterServiceWatcher) handleRemoteGatewayDeleted(ev *RemoteGatewayDeleted) error { - - if err := rcsw.localAPIClient.Client.CoreV1().Services(rcsw.serviceMirrorNamespace).Delete(rcsw.mirroredResourceName(ev.gatewayData.Name), &metav1.DeleteOptions{}); err != nil { - rcsw.log.Errorf("Could not delete gateway mirror %s", err) - } - - affectedEndpoints, err := rcsw.endpointsForGateway(&ev.gatewayData) - if err != nil { - // if we cannot find the endpoints, we can give up - if kerrors.IsNotFound(err) { - return err - } - // if it is another error, just retry - return RetryableError{[]error{err}} - } - - var errors []error - if len(affectedEndpoints) > 0 { - rcsw.log.Infof("Nulling %d endpoints due to gateway [%s/%s] deletion", len(affectedEndpoints), ev.gatewayData.Namespace, ev.gatewayData.Name) - for _, ep := range affectedEndpoints { - updated := ep.DeepCopy() - updated.Subsets = nil - if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Update(updated); err != nil { - errors = append(errors, err) - } - } - } - if len(errors) > 0 { - // if we have encountered any errors, we can retry the whole operation - return RetryableError{errors} - } - return nil -} - -// the logic here creates a mirror service for the gateway. The only port exposed there is the -// probes port. This enables us to discover the gateways probe endpoints through the dst service -// and apply proper identity -func (rcsw *RemoteClusterServiceWatcher) handleRemoteGatewayCreated(event *RemoteGatewayCreated) error { - localServiceName := rcsw.mirroredResourceName(event.gatewaySpec.gatewayName) - if event.gatewaySpec.ProbeConfig == nil { - rcsw.log.Infof("Skipping creation of gateway mirror as gateway does not specify probe config") - return nil - } - serviceToCreate := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: localServiceName, - Namespace: rcsw.serviceMirrorNamespace, - Annotations: map[string]string{ - consts.RemoteGatewayResourceVersionAnnotation: event.gatewaySpec.resourceVersion, - consts.MirroredGatewayRemoteName: event.gatewaySpec.gatewayName, - consts.MirroredGatewayRemoteNameSpace: event.gatewaySpec.gatewayNamespace, - consts.MirroredGatewayProbePath: event.gatewaySpec.ProbeConfig.path, - consts.MirroredGatewayProbePeriod: fmt.Sprint(event.gatewaySpec.ProbeConfig.periodInSeconds), - }, - Labels: map[string]string{ - consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - consts.MirroredGatewayLabel: "true", - }, - }, - Spec: corev1.ServiceSpec{ - Ports: []corev1.ServicePort{ - { - Name: consts.ProbePortName, - Protocol: "TCP", - Port: int32(event.gatewaySpec.ProbeConfig.port), - }, - }, - }, - } - - endpointsToCreate := &corev1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: localServiceName, - Namespace: rcsw.serviceMirrorNamespace, - Labels: map[string]string{ - consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - }, - Annotations: map[string]string{ - consts.RemoteGatewayIdentity: event.gatewaySpec.identity, - }, - }, - } - - if len(event.gatewaySpec.addresses) > 0 { - endpointsToCreate.Subsets = []corev1.EndpointSubset{ - { - Addresses: event.gatewaySpec.addresses, - Ports: []corev1.EndpointPort{ - { - Name: consts.ProbePortName, - Protocol: "TCP", - Port: int32(event.gatewaySpec.ProbeConfig.port), - }, - }, - }, - } - } - - rcsw.log.Infof("Creating a new gateway mirror Service for %s", localServiceName) - if _, err := rcsw.localAPIClient.Client.CoreV1().Services(rcsw.serviceMirrorNamespace).Create(serviceToCreate); err != nil { - if !kerrors.IsAlreadyExists(err) { - // we might have created it during earlier attempt, if that is not the case, we retry - return RetryableError{[]error{err}} - } - } - - rcsw.log.Infof("Creating a new gateway mirror Endpoints for %s", localServiceName) - if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(rcsw.serviceMirrorNamespace).Create(endpointsToCreate); err != nil { - // we clean up after ourselves - rcsw.localAPIClient.Client.CoreV1().Services(rcsw.serviceMirrorNamespace).Delete(event.gatewaySpec.gatewayName, &metav1.DeleteOptions{}) - // and retry - return RetryableError{[]error{err}} - } - - return nil -} - -func (rcsw *RemoteClusterServiceWatcher) updateAffectedServices(gatewaySpec GatewaySpec, affectedServices []*corev1.Service) error { - rcsw.log.Infof("Updating %d services due to gateway [%s/%s] update", len(affectedServices), gatewaySpec.gatewayNamespace, gatewaySpec.gatewayName) - var errors []error - for _, svc := range affectedServices { - updatedService := svc.DeepCopy() - if updatedService.Annotations != nil { - updatedService.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = gatewaySpec.resourceVersion - } - endpoints, err := rcsw.localAPIClient.Endpoint().Lister().Endpoints(svc.Namespace).Get(svc.Name) - if err != nil { - errors = append(errors, fmt.Errorf("Could not get endpoints: %s", err)) - continue - } - - updatedEndpoints := endpoints.DeepCopy() - if len(gatewaySpec.addresses) > 0 { - updatedEndpoints.Subsets = []corev1.EndpointSubset{ - { - Addresses: gatewaySpec.addresses, - Ports: rcsw.getEndpointsPorts(updatedService, int32(gatewaySpec.incomingPort)), - }, - } - } else { - updatedEndpoints.Subsets = nil - } - - if gatewaySpec.identity != "" { - updatedEndpoints.Annotations[consts.RemoteGatewayIdentity] = gatewaySpec.identity - } else { - delete(updatedEndpoints.Annotations, consts.RemoteGatewayIdentity) - } - - _, err = rcsw.localAPIClient.Client.CoreV1().Services(updatedService.Namespace).Update(updatedService) - if err != nil { - errors = append(errors, err) - continue - } - - _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(updatedService.Namespace).Update(updatedEndpoints) - if err != nil { - errors = append(errors, err) - } - } - - if len(errors) > 0 { - return RetryableError{errors} - } - return nil -} - -func (rcsw *RemoteClusterServiceWatcher) updateGatewayMirrorService(spec *GatewaySpec) error { - localServiceName := rcsw.mirroredResourceName(spec.gatewayName) - service, err := rcsw.localAPIClient.Svc().Lister().Services(rcsw.serviceMirrorNamespace).Get(localServiceName) - if err != nil { - return err - } - - if service.Annotations != nil && service.Annotations[consts.RemoteGatewayResourceVersionAnnotation] != spec.resourceVersion { - updatedService := service.DeepCopy() - if updatedService.Annotations != nil { - updatedService.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = spec.resourceVersion - updatedService.Annotations[consts.MirroredGatewayProbePath] = spec.ProbeConfig.path - updatedService.Annotations[consts.MirroredGatewayProbePeriod] = fmt.Sprint(spec.ProbeConfig.periodInSeconds) - } - - updatedService.Spec.Ports = []corev1.ServicePort{ - { - Name: consts.ProbePortName, - Protocol: "TCP", - Port: int32(spec.ProbeConfig.port), - }, - } - - endpoints, err := rcsw.localAPIClient.Endpoint().Lister().Endpoints(rcsw.serviceMirrorNamespace).Get(localServiceName) - if err != nil { - return err - } - - updatedEndpoints := endpoints.DeepCopy() - if spec.addresses == nil { - updatedEndpoints.Subsets = nil - } else { - updatedEndpoints.Subsets = []corev1.EndpointSubset{ - { - Addresses: spec.addresses, - Ports: []corev1.EndpointPort{ - { - Name: consts.ProbePortName, - Protocol: "TCP", - Port: int32(spec.ProbeConfig.port), - }, - }, - }, - } - } - - updatedEndpoints.Annotations[consts.RemoteGatewayIdentity] = spec.identity - - _, err = rcsw.localAPIClient.Client.CoreV1().Services(rcsw.serviceMirrorNamespace).Update(updatedService) - if err != nil { - return err - } - - _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(rcsw.serviceMirrorNamespace).Update(updatedEndpoints) - if err != nil { - return err - } - rcsw.log.Infof("%s gateway mirror updated", localServiceName) - } - - return nil -} - -func (rcsw *RemoteClusterServiceWatcher) handleRemoteGatewayUpdated(ev *RemoteGatewayUpdated) error { - if err := rcsw.updateAffectedServices(ev.gatewaySpec, ev.affectedServices); err != nil { - return err - } - - if err := rcsw.updateGatewayMirrorService(&ev.gatewaySpec); err != nil { - return err - } - - return nil -} - -// Retrieves the annotations that indicate this service can be mirrored. -// The values of these annotations help us resolve the gateway to which -// traffic should be sent. -func getGatewayMetadata(annotations map[string]string) *gatewayMetadata { - remoteGatewayName, hasGtwName := annotations[consts.GatewayNameAnnotation] - remoteGatewayNs, hasGtwNs := annotations[consts.GatewayNsAnnotation] - if hasGtwName && hasGtwNs { - return &gatewayMetadata{ - Name: remoteGatewayName, - Namespace: remoteGatewayNs, - } - } - return nil -} - -func isGateway(annotations map[string]string) bool { - if annotations != nil { - _, hasAnnotation := annotations[consts.MulticlusterGatewayAnnotation] - return hasAnnotation - } - return false -} - -func isMirroredService(annotations map[string]string) bool { +func isExportedService(annotations map[string]string) bool { if annotations != nil { _, hasGtwName := annotations[consts.GatewayNameAnnotation] _, hasGtwNs := annotations[consts.GatewayNsAnnotation] @@ -822,48 +461,12 @@ func isMirroredService(annotations map[string]string) bool { func (rcsw *RemoteClusterServiceWatcher) createOrUpdateService(service *corev1.Service) error { localName := rcsw.mirroredResourceName(service.Name) - if isGateway(service.Annotations) { - gatewaySpec, err := rcsw.extractGatewaySpec(service) - if err != nil { - return RetryableError{[]error{err}} - } - - _, err = rcsw.localAPIClient.Svc().Lister().Services(rcsw.serviceMirrorNamespace).Get(localName) - if err != nil { - if kerrors.IsNotFound(err) { - rcsw.eventsQueue.Add(&RemoteGatewayCreated{ - gatewaySpec: *gatewaySpec, - }) - return nil - } - return RetryableError{[]error{err}} - } - - affectedServices, err := rcsw.affectedMirroredServicesForGatewayUpdate(&gatewayMetadata{ - Name: service.Name, - Namespace: service.Namespace, - }, service.ResourceVersion) - if err != nil { - return RetryableError{[]error{err}} - } - - rcsw.eventsQueue.Add(&RemoteGatewayUpdated{ - affectedServices: affectedServices, - gatewaySpec: *gatewaySpec, - }) - return nil - - } else if isMirroredService(service.Annotations) { - gatewayData := getGatewayMetadata(service.Annotations) - if gatewayData == nil { - return fmt.Errorf("got service in invalid state, no gateway metadata %s", service) - } + if isExportedService(service.Annotations) { localService, err := rcsw.localAPIClient.Svc().Lister().Services(service.Namespace).Get(localName) if err != nil { if kerrors.IsNotFound(err) { rcsw.eventsQueue.Add(&RemoteServiceCreated{ - service: service, - gatewayData: *gatewayData, + service: service, }) return nil } @@ -878,53 +481,33 @@ func (rcsw *RemoteClusterServiceWatcher) createOrUpdateService(service *corev1.S localService: localService, localEndpoints: endpoints, remoteUpdate: service, - gatewayData: *gatewayData, }) return nil } return RetryableError{[]error{err}} } return nil - } else { - localSvc, err := rcsw.localAPIClient.Svc().Lister().Services(service.Namespace).Get(localName) - if err == nil { - if localSvc.Labels != nil { - _, isMirroredRes := localSvc.Labels[consts.MirroredResourceLabel] - clusterName := localSvc.Labels[consts.RemoteClusterNameLabel] - if isMirroredRes && (clusterName == rcsw.clusterName) { - rcsw.eventsQueue.Add(&RemoteServiceDeleted{ - Name: service.Name, - Namespace: service.Namespace, - }) - } - } - } - return nil - } -} - -func (rcsw *RemoteClusterServiceWatcher) affectedMirroredServicesForGatewayUpdate(gatewayData *gatewayMetadata, latestResourceVersion string) ([]*corev1.Service, error) { - services, err := rcsw.mirroredServicesForGateway(gatewayData) - if err != nil { - return nil, err } - - affectedServices := []*corev1.Service{} - for _, srv := range services { - ver, ok := srv.Annotations[consts.RemoteGatewayResourceVersionAnnotation] - if ok && ver != latestResourceVersion { - affectedServices = append(affectedServices, srv) + localSvc, err := rcsw.localAPIClient.Svc().Lister().Services(service.Namespace).Get(localName) + if err == nil { + if localSvc.Labels != nil { + _, isMirroredRes := localSvc.Labels[consts.MirroredResourceLabel] + clusterName := localSvc.Labels[consts.RemoteClusterNameLabel] + if isMirroredRes && (clusterName == rcsw.link.TargetClusterName) { + rcsw.eventsQueue.Add(&RemoteServiceDeleted{ + Name: service.Name, + Namespace: service.Namespace, + }) + } } } - return affectedServices, nil + return nil } -func (rcsw *RemoteClusterServiceWatcher) mirroredServicesForGateway(gatewayData *gatewayMetadata) ([]*corev1.Service, error) { +func (rcsw *RemoteClusterServiceWatcher) getMirrorServices() ([]*corev1.Service, error) { matchLabels := map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteGatewayNameLabel: gatewayData.Name, - consts.RemoteGatewayNsLabel: gatewayData.Namespace, - consts.RemoteClusterNameLabel: rcsw.clusterName, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, } services, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector()) @@ -934,33 +517,12 @@ func (rcsw *RemoteClusterServiceWatcher) mirroredServicesForGateway(gatewayData return services, nil } -func (rcsw *RemoteClusterServiceWatcher) endpointsForGateway(gatewayData *gatewayMetadata) ([]*corev1.Endpoints, error) { - matchLabels := map[string]string{ - consts.MirroredResourceLabel: "true", - consts.RemoteGatewayNameLabel: gatewayData.Name, - consts.RemoteGatewayNsLabel: gatewayData.Namespace, - consts.RemoteClusterNameLabel: rcsw.clusterName, - } - - endpoints, err := rcsw.localAPIClient.Endpoint().Lister().List(labels.Set(matchLabels).AsSelector()) - if err != nil { - return nil, err - } - return endpoints, nil -} - func (rcsw *RemoteClusterServiceWatcher) handleOnDelete(service *corev1.Service) { - if isMirroredService(service.Annotations) { + if isExportedService(service.Annotations) { rcsw.eventsQueue.Add(&RemoteServiceDeleted{ Name: service.Name, Namespace: service.Namespace, }) - } else if isGateway(service.Annotations) { - rcsw.eventsQueue.Add(&RemoteGatewayDeleted{ - gatewayData: gatewayMetadata{ - Name: service.Name, - Namespace: service.Namespace, - }}) } else { rcsw.log.Infof("Skipping OnDelete for service %s", service) } @@ -990,12 +552,6 @@ func (rcsw *RemoteClusterServiceWatcher) processNextEvent() (bool, interface{}, err = rcsw.handleRemoteServiceUpdated(ev) case *RemoteServiceDeleted: err = rcsw.handleRemoteServiceDeleted(ev) - case *RemoteGatewayUpdated: - err = rcsw.handleRemoteGatewayUpdated(ev) - case *RemoteGatewayDeleted: - err = rcsw.handleRemoteGatewayDeleted(ev) - case *RemoteGatewayCreated: - err = rcsw.handleRemoteGatewayCreated(ev) case *ClusterUnregistered: err = rcsw.cleanupMirroredResources() case *OprhanedServicesGcTriggered: @@ -1080,6 +636,11 @@ func (rcsw *RemoteClusterServiceWatcher) Start() error { ) go rcsw.processEvents() + // We need to issue a RepairEndpoints immediately to populate the gateway + // mirror endpoints. + ev := RepairEndpoints{} + rcsw.eventsQueue.Add(&ev) + go func() { ticker := time.NewTicker(rcsw.repairPeriod) for { @@ -1105,126 +666,117 @@ func (rcsw *RemoteClusterServiceWatcher) Stop(cleanupState bool) { rcsw.eventsQueue.ShutDown() } -func extractPort(port []corev1.ServicePort, portName string) (uint32, error) { - for _, p := range port { - if p.Name == portName { - return uint32(p.Port), nil +func (rcsw *RemoteClusterServiceWatcher) resolveGatewayAddress() []corev1.EndpointAddress { + var gatewayEndpoints []corev1.EndpointAddress + for _, addr := range strings.Split(rcsw.link.GatewayAddress, ",") { + resolved := addr + ipAddr, err := net.ResolveIPAddr("ip", addr) + if err == nil { + resolved = ipAddr.String() } + gatewayEndpoints = append(gatewayEndpoints, corev1.EndpointAddress{ + IP: resolved, + }) } - return 0, fmt.Errorf("could not find port with name %s", portName) + return gatewayEndpoints } -func extractProbeConfig(gateway *corev1.Service) (*ProbeConfig, error) { - probePath := gateway.Annotations[consts.GatewayProbePath] +func (rcsw *RemoteClusterServiceWatcher) repairEndpoints() { + endpointRepairCounter.With(prometheus.Labels{ + gatewayClusterName: rcsw.link.TargetClusterName, + }).Inc() - probePort, err := extractPort(gateway.Spec.Ports, consts.ProbePortName) + // Create or update gateway mirror endpoints. + gatewayMirrorName := fmt.Sprintf("probe-gateway-%s", rcsw.link.TargetClusterName) - if err != nil { - return nil, err + gatewayMirrorEndpoints := &corev1.Endpoints{ + ObjectMeta: metav1.ObjectMeta{ + Name: gatewayMirrorName, + Namespace: rcsw.serviceMirrorNamespace, + Labels: map[string]string{ + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, + }, + Annotations: map[string]string{ + consts.RemoteGatewayIdentity: rcsw.link.GatewayIdentity, + }, + }, + Subsets: []corev1.EndpointSubset{ + { + Addresses: rcsw.resolveGatewayAddress(), + Ports: []corev1.EndpointPort{ + { + Name: "mc-probe", + Port: int32(rcsw.link.ProbeSpec.Port), + Protocol: "TCP", + }, + }, + }, + }, } - probePeriod, err := strconv.ParseUint(gateway.Annotations[consts.GatewayProbePeriod], 10, 32) + err := rcsw.createOrUpdateEndpoints(gatewayMirrorEndpoints) if err != nil { - return nil, err + rcsw.log.Errorf("Failed to create/update gateway mirror endpoints: %s", err) } - if probePath == "" { - return nil, errors.New("probe path is empty") + // Repair mirror service endpoints. + mirrorServices, err := rcsw.getMirrorServices() + if err != nil { + rcsw.log.Errorf("Failed to list mirror services: %s", err) } + for _, svc := range mirrorServices { + updatedService := svc.DeepCopy() - return &ProbeConfig{ - path: probePath, - port: probePort, - periodInSeconds: uint32(probePeriod), - }, nil -} + endpoints, err := rcsw.localAPIClient.Endpoint().Lister().Endpoints(svc.Namespace).Get(svc.Name) + if err != nil { + rcsw.log.Errorf("Could not get endpoints: %s", err) + continue + } -func (rcsw *RemoteClusterServiceWatcher) extractGatewaySpec(gateway *corev1.Service) (*GatewaySpec, error) { - incomingPort, err := extractPort(gateway.Spec.Ports, consts.GatewayPortName) + updatedEndpoints := endpoints.DeepCopy() + updatedEndpoints.Subsets = []corev1.EndpointSubset{ + { + Addresses: rcsw.resolveGatewayAddress(), + Ports: rcsw.getEndpointsPorts(updatedService), + }, + } - if err != nil { - return nil, err - } + if updatedEndpoints.Annotations == nil { + updatedEndpoints.Annotations = make(map[string]string) + } + updatedEndpoints.Annotations[consts.RemoteGatewayIdentity] = rcsw.link.GatewayIdentity - var gatewayEndpoints []corev1.EndpointAddress - for _, ingress := range gateway.Status.LoadBalancer.Ingress { - ip := ingress.IP - if ip == "" { - ipAddr, err := net.ResolveIPAddr("ip", ingress.Hostname) - if err != nil { - return nil, err - } - ip = ipAddr.String() + _, err = rcsw.localAPIClient.Client.CoreV1().Services(updatedService.Namespace).Update(updatedService) + if err != nil { + rcsw.log.Error(err) + continue } - gatewayEndpoints = append(gatewayEndpoints, corev1.EndpointAddress{ - IP: ip, - }) - } - gatewayIdentity := gateway.Annotations[consts.GatewayIdentity] - probeConfig, err := extractProbeConfig(gateway) - if err != nil { - return nil, fmt.Errorf("could not parse probe config for gateway: %s/%s: %s", gateway.Namespace, gateway.Name, err) + _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(updatedService.Namespace).Update(updatedEndpoints) + if err != nil { + rcsw.log.Error(err) + } } - - return &GatewaySpec{ - clusterName: rcsw.clusterName, - gatewayName: gateway.Name, - gatewayNamespace: gateway.Namespace, - addresses: gatewayEndpoints, - incomingPort: incomingPort, - resourceVersion: gateway.ResourceVersion, - identity: gatewayIdentity, - ProbeConfig: probeConfig, - }, nil } -// repairEndpoints will look up all remote gateways and update the endpoints -// of all local mirror services for those gateways. Note that we ignore resource -// version and update ALL affected endpoints objects. This is because the -// remote gateway may be exposed as a DNS hostname and we want to re-resolve -// this DNS name in case its IP address has changed. By invoking repairEndpoints -// frequently, we can pick up any DNS changes fairly quickly. -// TODO: Replace this with a more robust solution that does not rely on -// frequently repairing endpoints to pick up DNS updates. -func (rcsw *RemoteClusterServiceWatcher) repairEndpoints() { - svcs, err := rcsw.remoteAPIClient.Svc().Lister().Services(metav1.NamespaceAll).List(labels.Everything()) +func (rcsw *RemoteClusterServiceWatcher) createOrUpdateEndpoints(ep *corev1.Endpoints) error { + _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Get(ep.Name, metav1.GetOptions{}) if err != nil { - rcsw.log.Errorf("failed to list remote gateways: %s", err) - return - } - rcsw.log.Errorf("During repair, found %d remote services", len(svcs)) - for _, svc := range svcs { - if isGateway(svc.Annotations) { - - // We omit a resource version here because we want to get ALL mirror - // services for this gateway. - affectedServices, err := rcsw.affectedMirroredServicesForGatewayUpdate(&gatewayMetadata{ - Name: svc.Name, - Namespace: svc.Namespace, - }, "") - if err != nil { - rcsw.log.Errorf("failed to determine mirror services for gateway %s.%s: %s", svc.Name, svc.Namespace, err) - continue - } - - spec, err := rcsw.extractGatewaySpec(svc) + if kerrors.IsNotFound(err) { + // Does not exist so we should create it. + _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Create(ep) if err != nil { - rcsw.log.Errorf("failed to extract spec for gateway %s.%s: %s", svc.Name, svc.Namespace, err) - continue + return err } - - endpointRepairCounter.With(prometheus.Labels{ - gatewayNameLabel: svc.Name, - gatewayNamespaceLabel: svc.Namespace, - gatewayClusterName: rcsw.clusterName, - }).Inc() - - rcsw.log.Errorf("adding gateway update event %s with %d mirrro services", svc.Name, len(affectedServices)) - rcsw.eventsQueue.Add(&RemoteGatewayUpdated{ - gatewaySpec: *spec, - affectedServices: affectedServices, - }) + } else { + return err } } + // Exists so we should update it. + _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Update(ep) + if err != nil { + return err + } + + return nil } diff --git a/controller/cmd/service-mirror/cluster_watcher_mirroring_test.go b/controller/cmd/service-mirror/cluster_watcher_mirroring_test.go index ce434f8e29fe5..758b7b7bfb1e2 100644 --- a/controller/cmd/service-mirror/cluster_watcher_mirroring_test.go +++ b/controller/cmd/service-mirror/cluster_watcher_mirroring_test.go @@ -2,12 +2,10 @@ package servicemirror import ( "fmt" - "net" "reflect" "testing" corev1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/workqueue" ) @@ -53,15 +51,8 @@ func (tc *mirroringTestCase) run(t *testing.T) { } if tc.expectedLocalEndpoints == nil { - // ensure the are no local endpoints - endpoints, err := localAPI.Client.CoreV1().Endpoints(corev1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - t.Fatal(err) - } - if len(endpoints.Items) > 0 { - t.Fatalf("Was expecting no local endpoints but instead found %d", len(endpoints.Items)) - - } + // In a real Kubernetes cluster, deleting the service is sufficient + // to delete the endpoints. } else { for _, expected := range tc.expectedLocalEndpoints { actual, err := localAPI.Client.CoreV1().Endpoints(expected.Namespace).Get(expected.Name, metav1.GetOptions{}) @@ -94,48 +85,25 @@ func (tc *mirroringTestCase) run(t *testing.T) { func TestRemoteServiceCreatedMirroring(t *testing.T) { for _, tt := range []mirroringTestCase{ { - description: "create service and endpoints when gateway cannot be resolved", - environment: serviceCreateWithMissingGateway, - expectedLocalServices: []*corev1.Service{ - mirroredService("service-one-remote", "ns1", "missing-gateway", "missing-namespace", "111", "", nil), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("service-one-remote", "ns1", "missing-gateway", "missing-namespace", "", "", nil), - }, + description: "does not create service and endpoints when gateway address is missing", + environment: serviceCreateWithMissingGateway, + expectedLocalServices: []*corev1.Service{}, + expectedLocalEndpoints: []*corev1.Endpoints{}, }, { - description: "create service and endpoints without subsets when gateway spec is wrong", - environment: createServiceWrongGatewaySpec, - expectedLocalServices: []*corev1.Service{ - mirroredService("service-one-remote", "ns1", "existing-gateway", "existing-namespace", "111", "", - []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 555, - }, - { - Name: "port2", - Protocol: "TCP", - Port: 666, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("service-one-remote", "ns1", "existing-gateway", "existing-namespace", "", "", nil), - }, + description: "does not create service and endpoints when gateway spec is wrong", + environment: createServiceWrongGatewaySpec, + expectedLocalServices: []*corev1.Service{}, + expectedLocalEndpoints: []*corev1.Endpoints{}, }, { description: "create service and endpoints when gateway can be resolved", environment: createServiceOkeGatewaySpec, expectedLocalServices: []*corev1.Service{ - mirroredService( + mirrorService( "service-one-remote", "ns1", - "existing-gateway", - "existing-namespace", "111", - "222", []corev1.ServicePort{ { Name: "port1", @@ -150,7 +118,7 @@ func TestRemoteServiceCreatedMirroring(t *testing.T) { }), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("service-one-remote", "ns1", "existing-gateway", "existing-namespace", "192.0.2.127", "gateway-identity", []corev1.EndpointPort{ + endpoints("service-one-remote", "ns1", "192.0.2.127", "gateway-identity", []corev1.EndpointPort{ { Name: "port1", Port: 888, @@ -184,50 +152,11 @@ func TestRemoteServiceDeletedMirroring(t *testing.T) { func TestRemoteServiceUpdatedMirroring(t *testing.T) { for _, tt := range []mirroringTestCase{ - { - description: "update to new gateway", - environment: updateServiceToNewGateway, - expectedLocalServices: []*corev1.Service{ - mirroredService( - "test-service-remote", - "test-namespace", - "gateway-new", - "gateway-ns", - "currentServiceResVersion", - "currentGatewayResVersion", - []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 111, - }, - { - Name: "port3", - Protocol: "TCP", - Port: 333, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway-new", "gateway-ns", "0.0.0.0", "", []corev1.EndpointPort{ - { - Name: "port1", - Port: 999, - Protocol: "TCP", - }, - { - Name: "port2", - Port: 999, - Protocol: "TCP", - }, - }), - }, - }, { description: "updates service ports on both service and endpoints", environment: updateServiceWithChangedPorts, expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentServiceResVersion", "currentGatewayResVersion", + mirrorService("test-service-remote", "test-namespace", "currentServiceResVersion", []corev1.ServicePort{ { Name: "port1", @@ -243,7 +172,7 @@ func TestRemoteServiceUpdatedMirroring(t *testing.T) { }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ + endpoints("test-service-remote", "test-namespace", "192.0.2.127", "gateway-identity", []corev1.EndpointPort{ { Name: "port1", Port: 888, @@ -263,189 +192,6 @@ func TestRemoteServiceUpdatedMirroring(t *testing.T) { } } -func TestRemoteGatewayUpdatedMirroring(t *testing.T) { - - localhostIP, err := net.ResolveIPAddr("ip", "localhost") - if err != nil { - t.Fatal(err) - } - - for _, tt := range []mirroringTestCase{ - { - description: "endpoints ports are updated on gateway change", - environment: remoteGatewayUpdated, - expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 999, - Protocol: "TCP", - }}), - endpoints("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 999, - Protocol: "TCP", - }}), - }, - }, - - { - description: "endpoints addresses are updated on gateway change", - environment: gatewayAddressChanged, - expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.1", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - endpoints("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.1", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, - }, - - { - description: "identity is updated on gateway change", - environment: gatewayIdentityChanged, - expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "new-identity", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - endpoints("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "new-identity", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, - }, - { - description: "gateway uses hostname address", - environment: remoteGatewayUpdatedWithHostnameAddress, - expectedEventsInQueue: []interface{}{ - &RemoteGatewayUpdated{ - gatewaySpec: GatewaySpec{ - gatewayName: "gateway", - gatewayNamespace: "gateway-ns", - clusterName: "remote", - addresses: []corev1.EndpointAddress{{IP: localhostIP.String()}}, - incomingPort: 999, - resourceVersion: "currentGatewayResVersion", - ProbeConfig: &ProbeConfig{ - path: defaultProbePath, - port: defaultProbePort, - periodInSeconds: defaultProbePeriod, - }, - }, - affectedServices: []*v1.Service{}, - }, - }, - }, - } { - tc := tt // pin - tc.run(t) - } -} -func TestRemoteGatewayDeletedMirroring(t *testing.T) { - for _, tt := range []mirroringTestCase{ - { - description: "removes endpoint subsets when gateway is deleted", - environment: gatewayDeleted, - expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "", nil), - endpoints("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "", nil), - }, - }, - } { - tc := tt // pin - tc.run(t) - } -} - func TestClusterUnregisteredMirroring(t *testing.T) { for _, tt := range []mirroringTestCase{ { @@ -464,11 +210,11 @@ func TestGcOrphanedServicesMirroring(t *testing.T) { description: "deletes mirrored resources that are no longer present on the remote cluster", environment: gcTriggered, expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "", nil), + mirrorService("test-service-1-remote", "test-namespace", "", nil), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "", "", "", "", nil), + endpoints("test-service-1-remote", "test-namespace", "", "", nil), }, }, } { @@ -490,39 +236,31 @@ func onAddOrUpdateTestCases(isAdd bool) []mirroringTestCase { environment: onAddOrUpdateExportedSvc(isAdd), expectedEventsInQueue: []interface{}{&RemoteServiceCreated{ service: remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "resVersion", nil), - gatewayData: gatewayMetadata{ - Name: "gateway", - Namespace: "gateway-ns", - }, }}, }, { description: fmt.Sprintf("enqueue a RemoteServiceUpdated event if this is a service that we have already mirrored and its res version is different (%s)", testType), environment: onAddOrUpdateRemoteServiceUpdated(isAdd), expectedEventsInQueue: []interface{}{&RemoteServiceUpdated{ - localService: mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastResourceVersion", "gatewayResVersion", nil), - localEndpoints: endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + localService: mirrorService("test-service-remote", "test-namespace", "pastResourceVersion", nil), + localEndpoints: endpoints("test-service-remote", "test-namespace", "0.0.0.0", "", nil), remoteUpdate: remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "currentResVersion", nil), - gatewayData: gatewayMetadata{ - Name: "gateway", - Namespace: "gateway-ns", - }, }}, expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastResourceVersion", "gatewayResVersion", nil), + mirrorService("test-service-remote", "test-namespace", "pastResourceVersion", nil), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + endpoints("test-service-remote", "test-namespace", "0.0.0.0", "", nil), }, }, { description: fmt.Sprintf("not enqueue any events as this update does not really tell us anything new (res version is the same...) (%s)", testType), environment: onAddOrUpdateSameResVersion(isAdd), expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentResVersion", "gatewayResVersion", nil), + mirrorService("test-service-remote", "test-namespace", "currentResVersion", nil), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + endpoints("test-service-remote", "test-namespace", "0.0.0.0", "", nil), }, }, { @@ -534,10 +272,10 @@ func onAddOrUpdateTestCases(isAdd bool) []mirroringTestCase { }}, expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentResVersion", "gatewayResVersion", nil), + mirrorService("test-service-remote", "test-namespace", "currentResVersion", nil), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + endpoints("test-service-remote", "test-namespace", "0.0.0.0", "", nil), }, }, } diff --git a/controller/cmd/service-mirror/cluster_watcher_test_util.go b/controller/cmd/service-mirror/cluster_watcher_test_util.go index b59dc0b61b2ed..a1f9ff24d630e 100644 --- a/controller/cmd/service-mirror/cluster_watcher_test_util.go +++ b/controller/cmd/service-mirror/cluster_watcher_test_util.go @@ -5,10 +5,12 @@ import ( "log" "reflect" "strings" + "time" "github.com/ghodss/yaml" "github.com/linkerd/linkerd2/controller/k8s" consts "github.com/linkerd/linkerd2/pkg/k8s" + "github.com/linkerd/linkerd2/pkg/multicluster" logging "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -23,10 +25,17 @@ const ( defaultProbePeriod = 60 ) +var defaultProbeSpec = multicluster.ProbeSpec{ + Path: defaultProbePath, + Port: defaultProbePort, + Period: time.Duration(defaultProbePeriod) * time.Second, +} + type testEnvironment struct { events []interface{} remoteResources []string localResources []string + link multicluster.Link } func (te *testEnvironment) runEnvironment(watcherQueue workqueue.RateLimitingInterface) (*k8s.API, error) { @@ -44,8 +53,7 @@ func (te *testEnvironment) runEnvironment(watcherQueue workqueue.RateLimitingInt localAPI.Sync(nil) watcher := RemoteClusterServiceWatcher{ - clusterName: clusterName, - clusterDomain: clusterDomain, + link: &te.link, remoteAPIClient: remoteAPI, localAPIClient: localAPI, stopper: nil, @@ -72,12 +80,12 @@ var serviceCreateWithMissingGateway = &testEnvironment{ events: []interface{}{ &RemoteServiceCreated{ service: remoteService("service-one", "ns1", "missing-gateway", "missing-namespace", "111", nil), - gatewayData: gatewayMetadata{ - Name: "missing-gateway", - Namespace: "missing-namespace", - }, }, }, + link: multicluster.Link{ + TargetClusterName: clusterName, + GatewayAddress: "", + }, } var createServiceWrongGatewaySpec = &testEnvironment{ @@ -96,15 +104,14 @@ var createServiceWrongGatewaySpec = &testEnvironment{ Port: 666, }, }), - - gatewayData: gatewayMetadata{ - Name: "existing-gateway", - Namespace: "existing-namespace", - }, }, }, remoteResources: []string{ - gatewayAsYaml("existing-gateway", "existing-namespace", "222", "192.0.2.127", "", "mc-wrong", 888, "", 111, "/path", 666), + gatewayAsYaml("existing-gateway", "existing-namespace", "222", "192.0.2.127", "mc-wrong", 888, "", 111, "/path", 666), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + GatewayAddress: "??????", }, } @@ -123,14 +130,18 @@ var createServiceOkeGatewaySpec = &testEnvironment{ Port: 666, }, }), - gatewayData: gatewayMetadata{ - Name: "existing-gateway", - Namespace: "existing-namespace", - }, }, }, remoteResources: []string{ - gatewayAsYaml("existing-gateway", "existing-namespace", "222", "192.0.2.127", "", "mc-gateway", 888, "gateway-identity", defaultProbePort, defaultProbePath, defaultProbePeriod), + gatewayAsYaml("existing-gateway", "existing-namespace", "222", "192.0.2.127", "mc-gateway", 888, "gateway-identity", defaultProbePort, defaultProbePath, defaultProbePeriod), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } @@ -142,84 +153,16 @@ var deleteMirroredService = &testEnvironment{ }, }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote-to-delete-remote", "test-namespace-to-delete", "", "", "", "", nil), - endpointsAsYaml("test-service-remote-to-delete-remote", "test-namespace-to-delete", "", "", "", "gateway-identity", nil), - }, -} - -var updateServiceToNewGateway = &testEnvironment{ - events: []interface{}{ - &RemoteServiceUpdated{ - remoteUpdate: remoteService("test-service", "test-namespace", "gateway-new", "gateway-ns", "currentServiceResVersion", []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 111, - }, - { - Name: "port2", - Protocol: "TCP", - Port: 222, - }, - }), - localService: mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastServiceResVersion", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 111, - }, - { - Name: "port2", - Protocol: "TCP", - Port: 222, - }, - }), - localEndpoints: endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ - { - Name: "port1", - Port: 888, - Protocol: "TCP", - }, - { - Name: "port2", - Port: 888, - Protocol: "TCP", - }, - }), - gatewayData: gatewayMetadata{ - Name: "gateway-new", - Namespace: "gateway-ns", - }, - }, + mirrorServiceAsYaml("test-service-remote-to-delete-remote", "test-namespace-to-delete", "", nil), + endpointsAsYaml("test-service-remote-to-delete-remote", "test-namespace-to-delete", "", "gateway-identity", nil), }, - remoteResources: []string{ - gatewayAsYaml("gateway-new", "gateway-ns", "currentGatewayResVersion", "0.0.0.0", "", "mc-gateway", 999, "", defaultProbePort, defaultProbePath, defaultProbePeriod), - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "past", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 111, - }, - { - Name: "port2", - Protocol: "TCP", - Port: 222, - }, - }), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ - { - Name: "port1", - Port: 888, - Protocol: "TCP", - }, - { - Name: "port2", - Port: 888, - Protocol: "TCP", - }, - }), + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } @@ -238,7 +181,7 @@ var updateServiceWithChangedPorts = &testEnvironment{ Port: 333, }, }), - localService: mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastServiceResVersion", "pastGatewayResVersion", []corev1.ServicePort{ + localService: mirrorService("test-service-remote", "test-namespace", "pastServiceResVersion", []corev1.ServicePort{ { Name: "port1", Protocol: "TCP", @@ -250,7 +193,7 @@ var updateServiceWithChangedPorts = &testEnvironment{ Port: 222, }, }), - localEndpoints: endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ + localEndpoints: endpoints("test-service-remote", "test-namespace", "192.0.2.127", "", []corev1.EndpointPort{ { Name: "port1", Port: 888, @@ -262,17 +205,13 @@ var updateServiceWithChangedPorts = &testEnvironment{ Protocol: "TCP", }, }), - gatewayData: gatewayMetadata{ - Name: "gateway", - Namespace: "gateway-ns", - }, }, }, remoteResources: []string{ - gatewayAsYaml("gateway", "gateway-ns", "currentGatewayResVersion", "192.0.2.127", "", "mc-gateway", 888, "", defaultProbePort, defaultProbePath, defaultProbePeriod), + gatewayAsYaml("gateway", "gateway-ns", "currentGatewayResVersion", "192.0.2.127", "mc-gateway", 888, "", defaultProbePort, defaultProbePath, defaultProbePeriod), }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "past", "pastGatewayResVersion", []corev1.ServicePort{ + mirrorServiceAsYaml("test-service-remote", "test-namespace", "past", []corev1.ServicePort{ { Name: "port1", Protocol: "TCP", @@ -289,7 +228,7 @@ var updateServiceWithChangedPorts = &testEnvironment{ Port: 333, }, }), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ + endpointsAsYaml("test-service-remote", "test-namespace", "192.0.2.127", "", []corev1.EndpointPort{ { Name: "port1", Port: 888, @@ -307,262 +246,13 @@ var updateServiceWithChangedPorts = &testEnvironment{ }, }), }, -} - -var remoteGatewayUpdated = &testEnvironment{ - events: []interface{}{ - &RemoteGatewayUpdated{ - gatewaySpec: GatewaySpec{ - gatewayName: "gateway", - gatewayNamespace: "gateway-ns", - clusterName: "remote", - addresses: []corev1.EndpointAddress{{IP: "0.0.0.0"}}, - incomingPort: 999, - resourceVersion: "currentGatewayResVersion", - ProbeConfig: &ProbeConfig{ - path: defaultProbePath, - port: defaultProbePort, - periodInSeconds: defaultProbePeriod, - }, - }, - affectedServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - }, - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - endpointsAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - endpointsAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, -} - -var remoteGatewayUpdatedWithHostnameAddress = &testEnvironment{ - events: []interface{}{ - &RepairEndpoints{}, - }, - remoteResources: []string{ - gatewayAsYaml("gateway", "gateway-ns", "currentGatewayResVersion", "", "localhost", "mc-gateway", 999, "", defaultProbePort, defaultProbePath, defaultProbePeriod), - }, -} - -var gatewayAddressChanged = &testEnvironment{ - events: []interface{}{ - &RemoteGatewayUpdated{ - gatewaySpec: GatewaySpec{ - gatewayName: "gateway", - gatewayNamespace: "gateway-ns", - clusterName: "some-cluster", - addresses: []corev1.EndpointAddress{{IP: "0.0.0.1"}}, - incomingPort: 888, - resourceVersion: "currentGatewayResVersion", - ProbeConfig: &ProbeConfig{ - path: "/p", - port: 1, - periodInSeconds: 222, - }, - }, - affectedServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - }, - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - endpointsAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - endpointsAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, -} - -var gatewayIdentityChanged = &testEnvironment{ - events: []interface{}{ - &RemoteGatewayUpdated{ - gatewaySpec: GatewaySpec{ - gatewayName: "gateway", - gatewayNamespace: "gateway-ns", - clusterName: clusterName, - addresses: []corev1.EndpointAddress{{IP: "0.0.0.0"}}, - incomingPort: 888, - resourceVersion: "currentGatewayResVersion", - identity: "new-identity", - ProbeConfig: &ProbeConfig{ - path: defaultProbePath, - port: defaultProbePort, - periodInSeconds: defaultProbePeriod, - }, - }, - affectedServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - }, - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - endpointsAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - endpointsAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, -} - -var gatewayDeleted = &testEnvironment{ - events: []interface{}{ - &RemoteGatewayDeleted{ - gatewayData: gatewayMetadata{ - Name: "gateway", - Namespace: "gateway-ns", - }, - }, - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - endpointsAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - endpointsAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } @@ -571,10 +261,13 @@ var clusterUnregistered = &testEnvironment{ &ClusterUnregistered{}, }, localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "", "", "", "", nil), - endpointsAsYaml("test-service-1-remote", "test-namespace", "", "", "", "", nil), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "", "", "", "", nil), - endpointsAsYaml("test-service-2-remote", "test-namespace", "", "", "", "", nil), + mirrorServiceAsYaml("test-service-1-remote", "test-namespace", "", nil), + endpointsAsYaml("test-service-1-remote", "test-namespace", "", "", nil), + mirrorServiceAsYaml("test-service-2-remote", "test-namespace", "", nil), + endpointsAsYaml("test-service-2-remote", "test-namespace", "", "", nil), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, }, } @@ -583,14 +276,17 @@ var gcTriggered = &testEnvironment{ &OprhanedServicesGcTriggered{}, }, localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "", nil), - endpointsAsYaml("test-service-1-remote", "test-namespace", "", "", "", "", nil), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "", "", "", "", nil), - endpointsAsYaml("test-service-2-remote", "test-namespace", "", "", "", "", nil), + mirrorServiceAsYaml("test-service-1-remote", "test-namespace", "", nil), + endpointsAsYaml("test-service-1-remote", "test-namespace", "", "", nil), + mirrorServiceAsYaml("test-service-2-remote", "test-namespace", "", nil), + endpointsAsYaml("test-service-2-remote", "test-namespace", "", "", nil), }, remoteResources: []string{ remoteServiceAsYaml("test-service-1", "test-namespace", "gateway", "gateway-ns", "", nil), }, + link: multicluster.Link{ + TargetClusterName: clusterName, + }, } func onAddOrUpdateExportedSvc(isAdd bool) *testEnvironment { @@ -598,6 +294,14 @@ func onAddOrUpdateExportedSvc(isAdd bool) *testEnvironment { events: []interface{}{ onAddOrUpdateEvent(isAdd, remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "resVersion", nil)), }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, + }, } } @@ -608,8 +312,16 @@ func onAddOrUpdateRemoteServiceUpdated(isAdd bool) *testEnvironment { onAddOrUpdateEvent(isAdd, remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "currentResVersion", nil)), }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastResourceVersion", "gatewayResVersion", nil), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + mirrorServiceAsYaml("test-service-remote", "test-namespace", "pastResourceVersion", nil), + endpointsAsYaml("test-service-remote", "test-namespace", "0.0.0.0", "", nil), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } } @@ -620,8 +332,16 @@ func onAddOrUpdateSameResVersion(isAdd bool) *testEnvironment { onAddOrUpdateEvent(isAdd, remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "currentResVersion", nil)), }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentResVersion", "gatewayResVersion", nil), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + mirrorServiceAsYaml("test-service-remote", "test-namespace", "currentResVersion", nil), + endpointsAsYaml("test-service-remote", "test-namespace", "0.0.0.0", "", nil), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } } @@ -632,8 +352,16 @@ func serviceNotExportedAnymore(isAdd bool) *testEnvironment { onAddOrUpdateEvent(isAdd, remoteService("test-service", "test-namespace", "", "gateway-ns", "currentResVersion", nil)), }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentResVersion", "gatewayResVersion", nil), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + mirrorServiceAsYaml("test-service-remote", "test-namespace", "currentResVersion", nil), + endpointsAsYaml("test-service-remote", "test-namespace", "0.0.0.0", "", nil), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } } @@ -644,6 +372,14 @@ var onDeleteWithGatewayMetadata = &testEnvironment{ svc: remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "currentResVersion", nil), }, }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, + }, } var onDeleteNoGatewayMetadata = &testEnvironment{ @@ -652,6 +388,14 @@ var onDeleteNoGatewayMetadata = &testEnvironment{ svc: remoteService("gateway", "test-namespace", "", "", "currentResVersion", nil), }, }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, + }, } // the following tests ensure that onAdd, onUpdate and onDelete result in @@ -742,15 +486,11 @@ func remoteServiceAsYaml(name, namespace, gtwName, gtwNs, resourceVersion string return string(bytes) } -func mirroredService(name, namespace, gtwName, gtwNs, resourceVersion, gatewayResourceVersion string, ports []corev1.ServicePort) *corev1.Service { +func mirrorService(name, namespace, resourceVersion string, ports []corev1.ServicePort) *corev1.Service { annotations := make(map[string]string) annotations[consts.RemoteResourceVersionAnnotation] = resourceVersion annotations[consts.RemoteServiceFqName] = fmt.Sprintf("%s.%s.svc.cluster.local", strings.Replace(name, "-remote", "", 1), namespace) - if gatewayResourceVersion != "" { - annotations[consts.RemoteGatewayResourceVersionAnnotation] = gatewayResourceVersion - - } return &corev1.Service{ TypeMeta: metav1.TypeMeta{ Kind: "Service", @@ -760,10 +500,8 @@ func mirroredService(name, namespace, gtwName, gtwNs, resourceVersion, gatewayRe Name: name, Namespace: namespace, Labels: map[string]string{ - consts.RemoteClusterNameLabel: "remote", + consts.RemoteClusterNameLabel: clusterName, consts.MirroredResourceLabel: "true", - consts.RemoteGatewayNameLabel: gtwName, - consts.RemoteGatewayNsLabel: gtwNs, }, Annotations: annotations, }, @@ -773,8 +511,8 @@ func mirroredService(name, namespace, gtwName, gtwNs, resourceVersion, gatewayRe } } -func mirroredServiceAsYaml(name, namespace, gtwName, gtwNs, resourceVersion, gatewayResourceVersion string, ports []corev1.ServicePort) string { - svc := mirroredService(name, namespace, gtwName, gtwNs, resourceVersion, gatewayResourceVersion, ports) +func mirrorServiceAsYaml(name, namespace, resourceVersion string, ports []corev1.ServicePort) string { + svc := mirrorService(name, namespace, resourceVersion, ports) bytes, err := yaml.Marshal(svc) if err != nil { @@ -825,8 +563,8 @@ func gateway(name, namespace, resourceVersion, ip, hostname, portName string, po return &svc } -func gatewayAsYaml(name, namespace, resourceVersion, ip, hostname, portName string, port int32, identity string, probePort int32, probePath string, probePeriod int) string { - gtw := gateway(name, namespace, resourceVersion, ip, hostname, portName, port, identity, probePort, probePath, probePeriod) +func gatewayAsYaml(name, namespace, resourceVersion, ip, portName string, port int32, identity string, probePort int32, probePath string, probePeriod int) string { + gtw := gateway(name, namespace, resourceVersion, ip, "", portName, port, identity, probePort, probePath, probePeriod) bytes, err := yaml.Marshal(gtw) if err != nil { @@ -835,7 +573,7 @@ func gatewayAsYaml(name, namespace, resourceVersion, ip, hostname, portName stri return string(bytes) } -func endpoints(name, namespace, gtwName, gtwNs, gatewayIP string, gatewayIdentity string, ports []corev1.EndpointPort) *corev1.Endpoints { +func endpoints(name, namespace, gatewayIP string, gatewayIdentity string, ports []corev1.EndpointPort) *corev1.Endpoints { var subsets []corev1.EndpointSubset if gatewayIP != "" { subsets = []corev1.EndpointSubset{ @@ -859,10 +597,8 @@ func endpoints(name, namespace, gtwName, gtwNs, gatewayIP string, gatewayIdentit Name: name, Namespace: namespace, Labels: map[string]string{ - consts.RemoteClusterNameLabel: "remote", + consts.RemoteClusterNameLabel: clusterName, consts.MirroredResourceLabel: "true", - consts.RemoteGatewayNameLabel: gtwName, - consts.RemoteGatewayNsLabel: gtwNs, }, Annotations: map[string]string{ consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.cluster.local", strings.Replace(name, "-remote", "", 1), namespace), @@ -878,8 +614,8 @@ func endpoints(name, namespace, gtwName, gtwNs, gatewayIP string, gatewayIdentit return endpoints } -func endpointsAsYaml(name, namespace, gtwName, gtwNs, gatewayIP, gatewayIdentity string, ports []corev1.EndpointPort) string { - ep := endpoints(name, namespace, gtwName, gtwNs, gatewayIP, gatewayIdentity, ports) +func endpointsAsYaml(name, namespace, gatewayIP, gatewayIdentity string, ports []corev1.EndpointPort) string { + ep := endpoints(name, namespace, gatewayIP, gatewayIdentity, ports) bytes, err := yaml.Marshal(ep) if err != nil { diff --git a/controller/cmd/service-mirror/config_watcher.go b/controller/cmd/service-mirror/config_watcher.go deleted file mode 100644 index c71543acad976..0000000000000 --- a/controller/cmd/service-mirror/config_watcher.go +++ /dev/null @@ -1,161 +0,0 @@ -package servicemirror - -import ( - "fmt" - "sync" - "time" - - "github.com/linkerd/linkerd2/controller/k8s" - log "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/clientcmd" - - consts "github.com/linkerd/linkerd2/pkg/k8s" - sm "github.com/linkerd/linkerd2/pkg/servicemirror" -) - -// RemoteClusterConfigWatcher watches for secrets of type MirrorSecretType -// and upon the detection of such secret created starts a RemoteClusterServiceWatcher -type RemoteClusterConfigWatcher struct { - serviceMirrorNamespace string - k8sAPI *k8s.API - clusterWatchers map[string]*RemoteClusterServiceWatcher - requeueLimit int - repairPeriod time.Duration - sync.RWMutex -} - -// NewRemoteClusterConfigWatcher Creates a new config watcher -func NewRemoteClusterConfigWatcher(serviceMirrorNamespace string, secretsInformer cache.SharedIndexInformer, k8sAPI *k8s.API, requeueLimit int, repairPeriod time.Duration) *RemoteClusterConfigWatcher { - rcw := &RemoteClusterConfigWatcher{ - serviceMirrorNamespace: serviceMirrorNamespace, - k8sAPI: k8sAPI, - clusterWatchers: map[string]*RemoteClusterServiceWatcher{}, - requeueLimit: requeueLimit, - repairPeriod: repairPeriod, - } - secretsInformer.AddEventHandler( - cache.FilteringResourceEventHandler{ - FilterFunc: func(obj interface{}) bool { - switch object := obj.(type) { - case *corev1.Secret: - return object.Type == consts.MirrorSecretType - - case cache.DeletedFinalStateUnknown: - if secret, ok := object.Obj.(*corev1.Secret); ok { - return secret.Type == consts.MirrorSecretType - } - return false - default: - return false - } - }, - - Handler: cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - secret := obj.(*corev1.Secret) - if err := rcw.registerRemoteCluster(secret); err != nil { - log.Errorf("Cannot register target cluster: %s", err) - } - }, - DeleteFunc: func(obj interface{}) { - secret, ok := obj.(*corev1.Secret) - if !ok { - tombstone, ok := obj.(cache.DeletedFinalStateUnknown) - if !ok { - log.Errorf("couldn't get object from DeletedFinalStateUnknown %#v", obj) - return - } - secret, ok = tombstone.Obj.(*corev1.Secret) - if !ok { - log.Errorf("DeletedFinalStateUnknown contained object that is not a Secret %#v", obj) - return - } - } - if err := rcw.unregisterRemoteCluster(secret, true); err != nil { - log.Errorf("Cannot unregister target cluster: %s", err) - } - }, - UpdateFunc: func(old, new interface{}) { - oldSecret := old.(*corev1.Secret) - newSecret := new.(*corev1.Secret) - - if oldSecret.ResourceVersion != newSecret.ResourceVersion { - if err := rcw.unregisterRemoteCluster(oldSecret, false); err != nil { - log.Errorf("Cannot unregister target cluster: %s", err) - return - } - - if err := rcw.registerRemoteCluster(newSecret); err != nil { - log.Errorf("Cannot register target cluster: %s", err) - } - - } - - //TODO: Handle update (it might be that the credentials have changed...) - }, - }, - }, - ) - return rcw -} - -// Stop Shuts down all created config and cluster watchers -func (rcw *RemoteClusterConfigWatcher) Stop() { - rcw.Lock() - defer rcw.Unlock() - for _, watcher := range rcw.clusterWatchers { - watcher.Stop(false) - } -} - -func (rcw *RemoteClusterConfigWatcher) registerRemoteCluster(secret *corev1.Secret) error { - config, err := sm.ParseRemoteClusterSecret(secret) - - if err != nil { - return err - } - - clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config.APIConfig) - if err != nil { - return fmt.Errorf("unable to parse kube config: %s", err) - } - - rcw.Lock() - defer rcw.Unlock() - - if _, ok := rcw.clusterWatchers[config.ClusterName]; ok { - return fmt.Errorf("there is already a cluster with name %s being watcher. Please delete its config before attempting to register a new one", config.ClusterName) - } - - watcher, err := NewRemoteClusterServiceWatcher(rcw.serviceMirrorNamespace, rcw.k8sAPI, clientConfig, config.ClusterName, rcw.requeueLimit, rcw.repairPeriod, config.ClusterDomain) - if err != nil { - return err - } - - rcw.clusterWatchers[config.ClusterName] = watcher - if err := watcher.Start(); err != nil { - return err - } - return nil - -} - -func (rcw *RemoteClusterConfigWatcher) unregisterRemoteCluster(secret *corev1.Secret, cleanState bool) error { - config, err := sm.ParseRemoteClusterSecret(secret) - - if err != nil { - return err - } - rcw.Lock() - defer rcw.Unlock() - if watcher, ok := rcw.clusterWatchers[config.ClusterName]; ok { - watcher.Stop(cleanState) - } else { - return fmt.Errorf("cannot find watcher for cluser: %s", config.ClusterName) - } - delete(rcw.clusterWatchers, config.ClusterName) - - return nil -} diff --git a/controller/cmd/service-mirror/events_formatting.go b/controller/cmd/service-mirror/events_formatting.go index 02058024c54d3..486b4e30b83e4 100644 --- a/controller/cmd/service-mirror/events_formatting.go +++ b/controller/cmd/service-mirror/events_formatting.go @@ -51,48 +51,19 @@ func formatEndpoints(endp *corev1.Endpoints) string { return fmt.Sprintf("Endpoints: {name: %s, namespace: %s, annotations: [%s], labels: [%s], subsets: [%s]}", endp.Name, endp.Namespace, formatMetadata(endp.Annotations), formatMetadata(endp.Labels), strings.Join(subsets, ",")) } -func (b ProbeConfig) String() string { - return fmt.Sprintf("ProbeConfig: {path: %s, port: %d, periodInSeconds: %d}", b.path, b.port, b.periodInSeconds) -} - -func (b GatewaySpec) String() string { - return fmt.Sprintf("GatewaySpec: {gatewayName: %s, gatewayNamespace: %s, clusterName: %s, addresses: [%s], incomingPort: %d, resourceVersion: %s, identity: %s, probeConfig: %s}", b.gatewayName, b.gatewayNamespace, b.clusterName, formatAddresses(b.addresses), b.incomingPort, b.resourceVersion, b.identity, b.ProbeConfig) -} - -func (gtm gatewayMetadata) String() string { - return fmt.Sprintf("gatewayMetadata: {name: %s, namespace: %s}", gtm.Name, gtm.Namespace) -} - // Events for cluster watcher func (rsc RemoteServiceCreated) String() string { - return fmt.Sprintf("RemoteServiceCreated: {service: %s, gatewayData: %s}", formatService(rsc.service), rsc.gatewayData) + return fmt.Sprintf("RemoteServiceCreated: {service: %s}", formatService(rsc.service)) } func (rsu RemoteServiceUpdated) String() string { - return fmt.Sprintf("RemoteServiceUpdated: {localService: %s, localEndpoints: %s, remoteUpdate: %s, gatewayData: %s}", formatService(rsu.localService), formatEndpoints(rsu.localEndpoints), formatService(rsu.remoteUpdate), rsu.gatewayData) + return fmt.Sprintf("RemoteServiceUpdated: {localService: %s, localEndpoints: %s, remoteUpdate: %s}", formatService(rsu.localService), formatEndpoints(rsu.localEndpoints), formatService(rsu.remoteUpdate)) } func (rsd RemoteServiceDeleted) String() string { return fmt.Sprintf("RemoteServiceDeleted: {name: %s, namespace: %s }", rsd.Name, rsd.Namespace) } -func (rgd RemoteGatewayDeleted) String() string { - return fmt.Sprintf("RemoteGatewayDeleted: {gatewayData: %s}", rgd.gatewayData) -} - -func (rgd *RemoteGatewayCreated) String() string { - return fmt.Sprintf("RemoteGatewayCreated: {gatewaySpec: %s}", rgd.gatewaySpec) -} - -func (rgu RemoteGatewayUpdated) String() string { - var services []string - - for _, s := range rgu.affectedServices { - services = append(services, formatService(s)) - } - return fmt.Sprintf("RemoteGatewayUpdated: {gatewaySpec: %s, affectedServices: [%s]}", rgu.gatewaySpec, strings.Join(services, ",")) -} - func (cgu ClusterUnregistered) String() string { return "ClusterUnregistered: {}" } @@ -116,21 +87,3 @@ func (od OnDeleteCalled) String() string { func (re RepairEndpoints) String() string { return "RepairEndpoints" } - -//Events for probe manager - -func (ps probeSpec) String() string { - return fmt.Sprintf("ProbeSpec: {path: %s, port: %d, period: %d}", ps.path, ps.port, ps.periodInSeconds) -} - -func (gmc GatewayMirrorCreated) String() string { - return fmt.Sprintf("GatewayMirrorCreated: {gatewayName: %s, gatewayNamespace: %s, clusterName: %s, probeSpec: %s}", gmc.gatewayName, gmc.gatewayNamespace, gmc.clusterName, gmc.probeSpec) -} - -func (gmd GatewayMirrorDeleted) String() string { - return fmt.Sprintf("GatewayMirrorDeleted: {gatewayName: %s, gatewayNamespace: %s, clusterName: %s}", gmd.gatewayName, gmd.gatewayNamespace, gmd.clusterName) -} - -func (gmu GatewayMirrorUpdated) String() string { - return fmt.Sprintf("GatewayMirrorUpdated: {gatewayName: %s, gatewayNamespace: %s, clusterName: %s, probeSpec: %s}", gmu.gatewayName, gmu.gatewayNamespace, gmu.clusterName, gmu.probeSpec) -} diff --git a/controller/cmd/service-mirror/main.go b/controller/cmd/service-mirror/main.go index 3fc6476e317a7..378b5918cb3ae 100644 --- a/controller/cmd/service-mirror/main.go +++ b/controller/cmd/service-mirror/main.go @@ -1,7 +1,6 @@ package servicemirror import ( - "context" "flag" "fmt" "os" @@ -9,93 +8,166 @@ import ( "syscall" "time" - "k8s.io/client-go/informers" - "k8s.io/client-go/tools/cache" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + dynamic "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/tools/clientcmd" - "github.com/linkerd/linkerd2/controller/k8s" + controllerK8s "github.com/linkerd/linkerd2/controller/k8s" "github.com/linkerd/linkerd2/pkg/admin" "github.com/linkerd/linkerd2/pkg/flags" + "github.com/linkerd/linkerd2/pkg/k8s" + "github.com/linkerd/linkerd2/pkg/multicluster" + "github.com/linkerd/linkerd2/pkg/servicemirror" log "github.com/sirupsen/logrus" - "k8s.io/client-go/kubernetes" ) -func initLocalResourceInformer(api kubernetes.Interface, namespace string, resource k8s.APIResource) (cache.SharedIndexInformer, error) { - sharedInformers := informers.NewSharedInformerFactoryWithOptions(api, 10*time.Minute, informers.WithNamespace(namespace)) - - var informer cache.SharedIndexInformer - - switch resource { - case k8s.Svc: - informer = sharedInformers.Core().V1().Services().Informer() - case k8s.Secret: - informer = sharedInformers.Core().V1().Secrets().Informer() - default: - return nil, fmt.Errorf("cannot instantiate local informer for %v", resource) - - } - - sharedInformers.Start(nil) - - ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) - defer cancel() - - log.Infof("waiting for local namespaced %v informer caches to sync", resource) - if !cache.WaitForCacheSync(ctx.Done(), informer.HasSynced) { - return nil, fmt.Errorf("failed to sync local namespaced %v informer caches", resource) - } - log.Infof("local namespaced %v informer caches synced", resource) - return informer, nil -} +var ( + clusterWatcher *RemoteClusterServiceWatcher + probeWorker *ProbeWorker +) -// Main executes the tap service-mirror +// Main executes the service-mirror controller func Main(args []string) { cmd := flag.NewFlagSet("service-mirror", flag.ExitOnError) kubeConfigPath := cmd.String("kubeconfig", "", "path to the local kube config") requeueLimit := cmd.Int("event-requeue-limit", 3, "requeue limit for events") metricsAddr := cmd.String("metrics-addr", ":9999", "address to serve scrapable metrics on") - namespace := cmd.String("namespace", "", "address to serve scrapable metrics on") + namespace := cmd.String("namespace", "", "namespace containing Link and credentials Secret") repairPeriod := cmd.Duration("endpoint-refresh-period", 1*time.Minute, "frequency to refresh endpoint resolution") flags.ConfigureAndParse(cmd, args) + linkName := cmd.Arg(0) stop := make(chan os.Signal, 1) signal.Notify(stop, os.Interrupt, syscall.SIGTERM) - k8sAPI, err := k8s.InitializeAPI( - *kubeConfigPath, - false, - k8s.Svc, - k8s.NS, - k8s.Endpoint, - ) - + // We create two different kubernetes API clients for the local cluster: + // k8sAPI is used as a dynamic client for unstrcutured access to Link custom + // resources. + // + // controllerK8sAPI is used by the cluster watcher to manage + // mirror resources such as services, namespaces, and endpoints. + k8sAPI, err := k8s.NewAPI(*kubeConfigPath, "", "", []string{}, 0) //TODO: Use can-i to check for required permissions if err != nil { log.Fatalf("Failed to initialize K8s API: %s", err) } - secretsInformer, err := initLocalResourceInformer(k8sAPI.Client, *namespace, k8s.Secret) + controllerK8sAPI, err := controllerK8s.InitializeAPI(*kubeConfigPath, false, + controllerK8s.NS, + controllerK8s.Svc, + controllerK8s.Endpoint, + ) if err != nil { - log.Fatalf("Failed to initialize secret informer: %s", err) + log.Fatalf("Failed to initialize K8s API: %s", err) + } + + linkClient := k8sAPI.DynamicClient.Resource(multicluster.LinkGVR).Namespace(*namespace) + + metrics := newProbeMetricVecs() + go admin.StartServer(*metricsAddr) + + controllerK8sAPI.Sync(nil) + + for { + // Start link watch + linkWatch, err := linkClient.Watch(metav1.ListOptions{}) + if err != nil { + log.Fatalf("Failed to watch Link %s: %s", linkName, err) + } + results := linkWatch.ResultChan() + + // Each time the link resource is updated, reload the config and restart the + // cluster watcher. + for event := range results { + switch obj := event.Object.(type) { + case *dynamic.Unstructured: + if obj.GetName() == linkName { + switch event.Type { + case watch.Added, watch.Modified: + link, err := multicluster.NewLink(*obj) + if err != nil { + log.Errorf("Failed to parse link %s: %s", linkName, err) + continue + } + log.Infof("Got updated link %s: %+v", linkName, link) + creds, err := loadCredentials(link, *namespace, k8sAPI) + if err != nil { + log.Errorf("Failed to load remote cluster credentials: %s", err) + } + restartClusterWatcher(link, *namespace, creds, controllerK8sAPI, *requeueLimit, *repairPeriod, metrics) + case watch.Deleted: + log.Infof("Link %s deleted", linkName) + // TODO: should we delete all mirror resources? + default: + log.Infof("Ignoring event type %s", event.Type) + } + } + default: + log.Errorf("Unknown object type detected: %+v", obj) + } + } + + log.Info("Link watch terminated; restarting watch") } - svcInformer, err := initLocalResourceInformer(k8sAPI.Client, *namespace, k8s.Svc) +} +func loadCredentials(link multicluster.Link, namespace string, k8sAPI *k8s.KubernetesAPI) (*servicemirror.WatchedClusterConfig, error) { + // Load the credentials secret + secret, err := k8sAPI.Interface.CoreV1().Secrets(namespace).Get(link.ClusterCredentialsSecret, metav1.GetOptions{}) if err != nil { - log.Fatalf("Failed to initialize service informer: %s", err) + return nil, fmt.Errorf("Failed to load credentials secret %s: %s", link.ClusterCredentialsSecret, err) } + return servicemirror.ParseRemoteClusterSecret(secret) +} - probeManager := NewProbeManager(svcInformer) - probeManager.Start() +func restartClusterWatcher( + link multicluster.Link, + namespace string, + creds *servicemirror.WatchedClusterConfig, + controllerK8sAPI *controllerK8s.API, + requeueLimit int, + repairPeriod time.Duration, + metrics probeMetricVecs, +) { + if clusterWatcher != nil { + clusterWatcher.Stop(false) + } + if probeWorker != nil { + probeWorker.Stop() + } - k8sAPI.Sync(nil) - watcher := NewRemoteClusterConfigWatcher(*namespace, secretsInformer, k8sAPI, *requeueLimit, *repairPeriod) - log.Info("Started cluster config watcher") + cfg, err := clientcmd.RESTConfigFromKubeConfig(creds.APIConfig) + if err != nil { + log.Errorf("Unable to parse kube config: %s", err) + return + } - go admin.StartServer(*metricsAddr) + clusterWatcher, err = NewRemoteClusterServiceWatcher( + namespace, + controllerK8sAPI, + cfg, + &link, + requeueLimit, + repairPeriod, + ) + if err != nil { + log.Errorf("Unable to create cluster watcher: %s", err) + return + } + + err = clusterWatcher.Start() + if err != nil { + log.Errorf("Failed to start cluster watcher: %s", err) + return + } - <-stop - log.Info("Stopping cluster config watcher") - watcher.Stop() - probeManager.Stop() + workerMetrics, err := metrics.newWorkerMetrics(link.TargetClusterName) + if err != nil { + log.Errorf("Failed to create metrics for cluster watcher: %s", err) + } + probeWorker = NewProbeWorker(fmt.Sprintf("probe-gateway-%s", link.TargetClusterName), &link.ProbeSpec, workerMetrics, link.TargetClusterName) + go probeWorker.run() } diff --git a/controller/cmd/service-mirror/metrics.go b/controller/cmd/service-mirror/metrics.go index 97f027b5acce6..6f6f087aa6083 100644 --- a/controller/cmd/service-mirror/metrics.go +++ b/controller/cmd/service-mirror/metrics.go @@ -7,11 +7,9 @@ import ( ) const ( - gatewayNameLabel = "gateway_name" - gatewayNamespaceLabel = "gateway_namespace" - gatewayClusterName = "target_cluster_name" - eventTypeLabelName = "event_type" - probeSuccessfulLabel = "probe_successful" + gatewayClusterName = "target_cluster_name" + eventTypeLabelName = "event_type" + probeSuccessfulLabel = "probe_successful" ) type probeMetricVecs struct { @@ -37,19 +35,19 @@ func init() { Name: "service_mirror_endpoint_repairs", Help: "Increments when the service mirror controller attempts to repair mirror endpoints", }, - []string{gatewayNameLabel, gatewayNamespaceLabel, gatewayClusterName}, + []string{gatewayClusterName}, ) } func newProbeMetricVecs() probeMetricVecs { - labelNames := []string{gatewayNameLabel, gatewayNamespaceLabel, gatewayClusterName} + labelNames := []string{gatewayClusterName} probes := promauto.NewCounterVec( prometheus.CounterOpts{ Name: "gateway_probes", Help: "A counter for the number of actual performed probes to a gateway", }, - []string{gatewayNameLabel, gatewayNamespaceLabel, gatewayClusterName, probeSuccessfulLabel}, + []string{gatewayClusterName, probeSuccessfulLabel}, ) enqueues := promauto.NewCounterVec( @@ -98,12 +96,10 @@ func newProbeMetricVecs() probeMetricVecs { probes: probes, } } -func (mv probeMetricVecs) newWorkerMetrics(gatewayNamespace, gatewayName, remoteClusterName string) (*probeMetrics, error) { +func (mv probeMetricVecs) newWorkerMetrics(remoteClusterName string) (*probeMetrics, error) { labels := prometheus.Labels{ - gatewayNameLabel: gatewayName, - gatewayNamespaceLabel: gatewayNamespace, - gatewayClusterName: remoteClusterName, + gatewayClusterName: remoteClusterName, } curriedProbes, err := mv.probes.CurryWith(labels) @@ -115,16 +111,14 @@ func (mv probeMetricVecs) newWorkerMetrics(gatewayNamespace, gatewayName, remote latencies: mv.latencies.With(labels), probes: curriedProbes, unregister: func() { - mv.unregister(gatewayNamespace, gatewayName, remoteClusterName) + mv.unregister(remoteClusterName) }, }, nil } -func (mv probeMetricVecs) unregister(gatewayNamespace, gatewayName, remoteClusterName string) { +func (mv probeMetricVecs) unregister(remoteClusterName string) { labels := prometheus.Labels{ - gatewayNameLabel: gatewayName, - gatewayNamespaceLabel: gatewayNamespace, - gatewayClusterName: remoteClusterName, + gatewayClusterName: remoteClusterName, } if !mv.alive.Delete(labels) { diff --git a/controller/cmd/service-mirror/probe_manager.go b/controller/cmd/service-mirror/probe_manager.go deleted file mode 100644 index c2b850fedea7a..0000000000000 --- a/controller/cmd/service-mirror/probe_manager.go +++ /dev/null @@ -1,271 +0,0 @@ -package servicemirror - -import ( - "fmt" - "strconv" - - consts "github.com/linkerd/linkerd2/pkg/k8s" - "github.com/prometheus/client_golang/prometheus" - log "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - "k8s.io/client-go/tools/cache" -) - -const probeChanBufferSize = 500 - -// ProbeManager takes care of managing the lifecycle of probe workers -type ProbeManager struct { - probeWorkers map[string]*ProbeWorker - mirroredGatewayInformer cache.SharedIndexInformer - events chan interface{} - metricVecs *probeMetricVecs - done chan struct{} -} - -// GatewayMirrorCreated is observed when a mirror of a remote gateway is created locally -type GatewayMirrorCreated struct { - gatewayName string - gatewayNamespace string - clusterName string - probeSpec -} - -// GatewayMirrorDeleted is emitted when a mirror of a remote gateway is deleted -type GatewayMirrorDeleted struct { - gatewayName string - gatewayNamespace string - clusterName string -} - -// GatewayMirrorUpdated is emitted when the mirror of a remote gateway has changed -type GatewayMirrorUpdated struct { - gatewayName string - gatewayNamespace string - clusterName string - probeSpec -} - -// NewProbeManager creates a new probe manager -func NewProbeManager(mirroredGatewayInformer cache.SharedIndexInformer) *ProbeManager { - metricVecs := newProbeMetricVecs() - return &ProbeManager{ - mirroredGatewayInformer: mirroredGatewayInformer, - probeWorkers: make(map[string]*ProbeWorker), - events: make(chan interface{}, probeChanBufferSize), - metricVecs: &metricVecs, - done: make(chan struct{}), - } -} - -func eventTypeString(ev interface{}) string { - switch ev.(type) { - case *GatewayMirrorCreated: - return "GatewayMirrorCreated" - case *GatewayMirrorDeleted: - return "GatewayMirrorDeleted" - case *GatewayMirrorUpdated: - return "GatewayMirrorUpdated" - default: - return "Unknown" - } -} - -func (m *ProbeManager) enqueueEvent(event interface{}) { - m.metricVecs.enqueues.With(prometheus.Labels{eventTypeLabelName: eventTypeString(event)}).Inc() - m.events <- event -} - -func probeKey(gatewayNamespace string, gatewayName string, clusterName string) string { - return fmt.Sprintf("%s-%s-%s", gatewayNamespace, gatewayName, clusterName) -} - -func (m *ProbeManager) handleEvent(ev interface{}) { - switch ev := ev.(type) { - case *GatewayMirrorCreated: - m.handleGatewayMirrorCreated(ev) - case *GatewayMirrorUpdated: - m.handleGatewayMirrorUpdated(ev) - case *GatewayMirrorDeleted: - m.handleGatewayMirrorDeleted(ev) - default: - log.Errorf("Received unknown event: %v", ev) - } -} - -func (m *ProbeManager) handleGatewayMirrorDeleted(event *GatewayMirrorDeleted) { - probeKey := probeKey(event.gatewayNamespace, event.gatewayName, event.clusterName) - m.stopProbe(probeKey) -} - -func (m *ProbeManager) handleGatewayMirrorCreated(event *GatewayMirrorCreated) { - probeKey := probeKey(event.gatewayNamespace, event.gatewayName, event.clusterName) - worker, ok := m.probeWorkers[probeKey] - if ok { - log.Infof("There is already a probe worker for %s. Updating instead of creating", probeKey) - worker.UpdateProbeSpec(&event.probeSpec) - } else { - log.Infof("Creating probe worker %s", probeKey) - probeMetrics, err := m.metricVecs.newWorkerMetrics(event.gatewayNamespace, event.gatewayName, event.clusterName) - if err != nil { - log.Errorf("Could not crete probe metrics: %s", err) - } else { - localGatewayName := fmt.Sprintf("%s-%s", event.gatewayName, event.clusterName) - worker = NewProbeWorker(localGatewayName, &event.probeSpec, probeMetrics, probeKey) - m.probeWorkers[probeKey] = worker - worker.Start() - } - } -} - -func (m *ProbeManager) handleGatewayMirrorUpdated(event *GatewayMirrorUpdated) { - probeKey := probeKey(event.gatewayNamespace, event.gatewayName, event.clusterName) - worker, ok := m.probeWorkers[probeKey] - if ok { - if worker.probeSpec.port != event.port || worker.probeSpec.periodInSeconds != event.periodInSeconds || worker.probeSpec.path != event.path { - worker.UpdateProbeSpec(&event.probeSpec) - } - } else { - log.Infof("Could not find a worker for %s while handling GatewayMirrorUpdated event", probeKey) - } -} - -func (m *ProbeManager) stopProbe(key string) { - if worker, ok := m.probeWorkers[key]; ok { - worker.Stop() - delete(m.probeWorkers, key) - } else { - log.Infof("Could not find probe worker with key %s", key) - } -} - -func (m *ProbeManager) run() { - for { - select { - case event := <-m.events: - log.Infof("Probe Manager: received event: %s", event) - m.metricVecs.dequeues.With(prometheus.Labels{eventTypeLabelName: eventTypeString(event)}).Inc() - m.handleEvent(event) - case <-m.done: - log.Infof("Shutting down ProbeManager") - for key := range m.probeWorkers { - m.stopProbe(key) - } - return - } - } -} - -func extractProbeSpec(svc *corev1.Service) (*probeSpec, error) { - path, hasPath := svc.Annotations[consts.MirroredGatewayProbePath] - if !hasPath { - return nil, fmt.Errorf("mirrored Gateway service is missing %s annotation", consts.MirroredGatewayProbePath) - } - - probePort, err := extractPort(svc.Spec.Ports, consts.ProbePortName) - if err != nil { - return nil, fmt.Errorf("%s: %s", svc.Name, err) - } - - period, hasPeriod := svc.Annotations[consts.MirroredGatewayProbePeriod] - if !hasPeriod { - return nil, fmt.Errorf("mirrored Gateway service is missing %s annotation", consts.MirroredGatewayProbePeriod) - } - - probePeriod, err := strconv.ParseUint(period, 10, 32) - if err != nil { - return nil, err - } - - return &probeSpec{ - path: path, - port: probePort, - periodInSeconds: uint32(probePeriod), - }, nil - -} - -// Start starts the probe manager -func (m *ProbeManager) Start() { - m.mirroredGatewayInformer.AddEventHandler( - cache.FilteringResourceEventHandler{ - FilterFunc: func(obj interface{}) bool { - switch object := obj.(type) { - case *corev1.Service: - _, isMirrorGateway := object.Labels[consts.MirroredGatewayLabel] - return isMirrorGateway - - case cache.DeletedFinalStateUnknown: - if svc, ok := object.Obj.(*corev1.Service); ok { - _, isMirrorGateway := svc.Labels[consts.MirroredGatewayLabel] - return isMirrorGateway - } - return false - default: - return false - } - }, - - Handler: cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - service := obj.(*corev1.Service) - spec, err := extractProbeSpec(service) - if err != nil { - log.Errorf("Could not parse probe spec %s", err) - } else { - m.enqueueEvent(&GatewayMirrorCreated{ - gatewayName: service.Annotations[consts.MirroredGatewayRemoteName], - gatewayNamespace: service.Annotations[consts.MirroredGatewayRemoteNameSpace], - clusterName: service.Labels[consts.RemoteClusterNameLabel], - probeSpec: *spec, - }) - } - }, - DeleteFunc: func(obj interface{}) { - service, ok := obj.(*corev1.Service) - if !ok { - tombstone, ok := obj.(cache.DeletedFinalStateUnknown) - if !ok { - log.Errorf("couldn't get object from DeletedFinalStateUnknown %#v", obj) - return - } - service, ok = tombstone.Obj.(*corev1.Service) - if !ok { - log.Errorf("DeletedFinalStateUnknown contained object that is not a Secret %#v", obj) - return - } - } - - m.enqueueEvent(&GatewayMirrorDeleted{ - gatewayName: service.Annotations[consts.MirroredGatewayRemoteName], - gatewayNamespace: service.Annotations[consts.MirroredGatewayRemoteNameSpace], - clusterName: service.Labels[consts.RemoteClusterNameLabel], - }) - }, - UpdateFunc: func(old, new interface{}) { - oldService := old.(*corev1.Service) - newService := new.(*corev1.Service) - - if oldService.ResourceVersion != newService.ResourceVersion { - spec, err := extractProbeSpec(newService) - if err != nil { - log.Errorf("Could not parse probe spec %s", err) - } else { - m.enqueueEvent(&GatewayMirrorUpdated{ - gatewayName: newService.Annotations[consts.MirroredGatewayRemoteName], - gatewayNamespace: newService.Annotations[consts.MirroredGatewayRemoteNameSpace], - clusterName: newService.Labels[consts.RemoteClusterNameLabel], - probeSpec: *spec, - }) - } - } - }, - }, - }, - ) - go m.run() -} - -// Stop stops the probe manager -func (m *ProbeManager) Stop() { - close(m.done) -} diff --git a/controller/cmd/service-mirror/probe_worker.go b/controller/cmd/service-mirror/probe_worker.go index 62a4afbb42e35..ed349c434b2b1 100644 --- a/controller/cmd/service-mirror/probe_worker.go +++ b/controller/cmd/service-mirror/probe_worker.go @@ -6,30 +6,25 @@ import ( "sync" "time" + "github.com/linkerd/linkerd2/pkg/multicluster" "github.com/prometheus/client_golang/prometheus" logging "github.com/sirupsen/logrus" ) const httpGatewayTimeoutMillis = 50000 -type probeSpec struct { - path string - port uint32 - periodInSeconds uint32 -} - // ProbeWorker is responsible for monitoring gateways using a probe specification type ProbeWorker struct { localGatewayName string *sync.RWMutex - probeSpec *probeSpec + probeSpec *multicluster.ProbeSpec stopCh chan struct{} metrics *probeMetrics log *logging.Entry } // NewProbeWorker creates a new probe worker associated with a particular gateway -func NewProbeWorker(localGatewayName string, spec *probeSpec, metrics *probeMetrics, probekey string) *ProbeWorker { +func NewProbeWorker(localGatewayName string, spec *multicluster.ProbeSpec, metrics *probeMetrics, probekey string) *ProbeWorker { return &ProbeWorker{ localGatewayName: localGatewayName, RWMutex: &sync.RWMutex{}, @@ -43,7 +38,7 @@ func NewProbeWorker(localGatewayName string, spec *probeSpec, metrics *probeMetr } // UpdateProbeSpec is used to update the probe specification when something about the gateway changes -func (pw *ProbeWorker) UpdateProbeSpec(spec *probeSpec) { +func (pw *ProbeWorker) UpdateProbeSpec(spec *multicluster.ProbeSpec) { pw.Lock() pw.probeSpec = spec pw.Unlock() @@ -63,9 +58,8 @@ func (pw *ProbeWorker) Start() { } func (pw *ProbeWorker) run() { - periodInMillis := pw.probeSpec.periodInSeconds * 1000 - probeTickerPeriod := time.Duration(periodInMillis) * time.Millisecond - maxJitter := time.Duration(periodInMillis/10) * time.Millisecond // max jitter is 10% of period + probeTickerPeriod := pw.probeSpec.Period + maxJitter := pw.probeSpec.Period / 10 // max jitter is 10% of period probeTicker := NewTicker(probeTickerPeriod, maxJitter) probeLoop: @@ -90,7 +84,7 @@ func (pw *ProbeWorker) doProbe() { Timeout: httpGatewayTimeoutMillis * time.Millisecond, } - req, err := http.NewRequest("GET", fmt.Sprintf("http://%s:%d/%s", pw.localGatewayName, pw.probeSpec.port, pw.probeSpec.path), nil) + req, err := http.NewRequest("GET", fmt.Sprintf("http://%s:%d/%s", pw.localGatewayName, pw.probeSpec.Port, pw.probeSpec.Path), nil) if err != nil { pw.log.Errorf("Could not create a GET request to gateway: %s", err) return diff --git a/pkg/charts/multicluster/values.go b/pkg/charts/multicluster/values.go index 158265a50c2aa..d95aa5d6b431d 100644 --- a/pkg/charts/multicluster/values.go +++ b/pkg/charts/multicluster/values.go @@ -9,7 +9,10 @@ import ( "sigs.k8s.io/yaml" ) -const helmDefaultChartDir = "linkerd2-multicluster" +const ( + helmDefaultChartDir = "linkerd2-multicluster" + helmDefaultLinkChartDir = "linkerd2-multicluster-link" +) // Values contains the top-level elements in the Helm charts type Values struct { @@ -40,10 +43,11 @@ type Values struct { ServiceMirrorUID int64 `json:"serviceMirrorUID"` RemoteMirrorServiceAccount bool `json:"remoteMirrorServiceAccount"` RemoteMirrorServiceAccountName string `json:"remoteMirrorServiceAccountName"` + TargetClusterName string `json:"targetClusterName"` } -// NewValues returns a new instance of the Values type. -func NewValues() (*Values, error) { +// NewInstallValues returns a new instance of the Values type. +func NewInstallValues() (*Values, error) { chartDir := fmt.Sprintf("%s/", helmDefaultChartDir) v, err := readDefaults(chartDir) if err != nil { @@ -54,6 +58,18 @@ func NewValues() (*Values, error) { return v, nil } +// NewLinkValues returns a new instance of the Values type. +func NewLinkValues() (*Values, error) { + chartDir := fmt.Sprintf("%s/", helmDefaultLinkChartDir) + v, err := readDefaults(chartDir) + if err != nil { + return nil, err + } + + v.CliVersion = k8s.CreatedByAnnotationValue() + return v, nil +} + // readDefaults read all the default variables from the values.yaml file. // chartDir is the root directory of the Helm chart where values.yaml is. func readDefaults(chartDir string) (*Values, error) { diff --git a/pkg/flags/flags.go b/pkg/flags/flags.go index a7ac7952d6b87..b7f74df2a8939 100644 --- a/pkg/flags/flags.go +++ b/pkg/flags/flags.go @@ -51,7 +51,7 @@ func setLogLevel(logLevel string) { if level == log.DebugLevel { flag.Set("stderrthreshold", "INFO") flag.Set("logtostderr", "true") - flag.Set("v", "6") // At 7 and higher, authorization tokens get logged. + flag.Set("v", "12") // At 7 and higher, authorization tokens get logged. // pipe klog entries to logrus klog.SetOutput(log.StandardLogger().Writer()) } diff --git a/pkg/healthcheck/healthcheck.go b/pkg/healthcheck/healthcheck.go index 876db1c0fd976..067736e111aac 100644 --- a/pkg/healthcheck/healthcheck.go +++ b/pkg/healthcheck/healthcheck.go @@ -19,7 +19,7 @@ import ( "github.com/linkerd/linkerd2/pkg/identity" "github.com/linkerd/linkerd2/pkg/issuercerts" "github.com/linkerd/linkerd2/pkg/k8s" - sm "github.com/linkerd/linkerd2/pkg/servicemirror" + "github.com/linkerd/linkerd2/pkg/multicluster" "github.com/linkerd/linkerd2/pkg/tls" "github.com/linkerd/linkerd2/pkg/version" log "github.com/sirupsen/logrus" @@ -188,11 +188,6 @@ var ExpectedServiceAccountNames = []string{ "linkerd-tap", } -type expectedPolicy struct { - resources []string - verbs []string -} - var ( retryWindow = 5 * time.Second requestTimeout = 30 * time.Second @@ -344,8 +339,6 @@ type Options struct { RetryDeadline time.Time CNIEnabled bool InstallManifest string - SourceCluster bool - TargetCluster bool MultiCluster bool } @@ -356,20 +349,19 @@ type HealthChecker struct { *Options // these fields are set in the process of running checks - kubeAPI *k8s.KubernetesAPI - kubeVersion *k8sVersion.Info - controlPlanePods []corev1.Pod - apiClient public.APIClient - latestVersions version.Channels - serverVersion string - linkerdConfig *configPb.All - uuid string - issuerCert *tls.Cred - trustAnchors []*x509.Certificate - cniDaemonSet *appsv1.DaemonSet - serviceMirrorNs string - remoteClusterConfigs []*sm.WatchedClusterConfig - addOns map[string]interface{} + kubeAPI *k8s.KubernetesAPI + kubeVersion *k8sVersion.Info + controlPlanePods []corev1.Pod + apiClient public.APIClient + latestVersions version.Channels + serverVersion string + linkerdConfig *configPb.All + uuid string + issuerCert *tls.Cred + trustAnchors []*x509.Certificate + cniDaemonSet *appsv1.DaemonSet + links []multicluster.Link + addOns map[string]interface{} } // NewHealthChecker returns an initialized HealthChecker diff --git a/pkg/healthcheck/healthcheck_multicluster.go b/pkg/healthcheck/healthcheck_multicluster.go index 26e42e503211a..fc91757a5b5c1 100644 --- a/pkg/healthcheck/healthcheck_multicluster.go +++ b/pkg/healthcheck/healthcheck_multicluster.go @@ -8,50 +8,28 @@ import ( "sort" "strings" - pb "github.com/linkerd/linkerd2/controller/gen/public" - sm "github.com/linkerd/linkerd2/pkg/servicemirror" - tsclient "github.com/servicemeshinterface/smi-sdk-go/pkg/gen/client/split/clientset/versioned" + "github.com/linkerd/linkerd2/controller/gen/public" + "github.com/linkerd/linkerd2/pkg/multicluster" + "github.com/linkerd/linkerd2/pkg/servicemirror" corev1 "k8s.io/api/core/v1" "github.com/linkerd/linkerd2/pkg/k8s" "github.com/linkerd/linkerd2/pkg/tls" - v1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/clientcmd" ) const ( - // LinkerdMulticlusterSourceChecks adds a series of checks to validate - // the source side of a multicluster setup - LinkerdMulticlusterSourceChecks CategoryID = "linkerd-multicluster-source" - - // LinkerdMulticlusterTargetChecks add a series of checks to validate the - // targetside of a multicluster setup - LinkerdMulticlusterTargetChecks CategoryID = "linkerd-multicluster-target" - - linkerdServiceMirrorComponentName = "linkerd-service-mirror" - linkerdServiceMirrorClusterRoleName = "linkerd-service-mirror-access-local-resources" - linkerdServiceMirrorRoleName = "linkerd-service-mirror-read-remote-creds" + // LinkerdMulticlusterChecks adds a series of checks to validate a + // multicluster setup. + LinkerdMulticlusterChecks CategoryID = "linkerd-multicluster" + + linkerdServiceMirrorComponentName = "linkerd-service-mirror" + linkerdServiceMirrorSerivceAccountName = "linkerd-service-mirror-%s" + linkerdServiceMirrorClusterRoleName = "linkerd-service-mirror-access-local-resources-%s" + linkerdServiceMirrorRoleName = "linkerd-service-mirror-read-remote-creds-%s" ) -var expectedServiceMirrorClusterRolePolicies = []expectedPolicy{ - { - resources: []string{"endpoints", "services"}, - verbs: []string{"list", "get", "watch", "create", "delete", "update"}, - }, - { - resources: []string{"namespaces"}, - verbs: []string{"create", "list", "get", "watch"}, - }, -} - -var expectedServiceMirrorRolePolicies = []expectedPolicy{ - { - resources: []string{"secrets"}, - verbs: []string{"list", "get", "watch"}, - }, -} - var expectedServiceMirrorRemoteClusterPolicyVerbs = []string{ "get", "list", @@ -61,149 +39,103 @@ var expectedServiceMirrorRemoteClusterPolicyVerbs = []string{ func (hc *HealthChecker) multiClusterCategory() []category { return []category{ { - id: LinkerdMulticlusterSourceChecks, + id: LinkerdMulticlusterChecks, checkers: []checker{ + /* Link checks */ { - description: "service mirror controller is running", - hintAnchor: "l5d-multicluster-service-mirror-running", - retryDeadline: hc.RetryDeadline, - fatal: true, - check: func(context.Context) error { - return hc.checkServiceMirrorController() - }, - }, - { - description: "service mirror controller ClusterRoles exist", - hintAnchor: "l5d-multicluster-cluster-role-exist", + description: "Link CRD exists", + hintAnchor: "l5d-multicluster-link-crd-exists", + fatal: true, check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkClusterRoles(true, []string{linkerdServiceMirrorClusterRoleName}, hc.serviceMirrorComponentsSelector()) - } - return &SkipError{Reason: "not checking muticluster"} + return hc.checkLinkCRD() }, }, { - description: "service mirror controller ClusterRoleBindings exist", - hintAnchor: "l5d-multicluster-cluster-role-binding-exist", + description: "Link resources are valid", + hintAnchor: "l5d-multicluster-links-are-valid", + fatal: true, check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkClusterRoleBindings(true, []string{linkerdServiceMirrorClusterRoleName}, hc.serviceMirrorComponentsSelector()) + if hc.Options.MultiCluster { + return hc.checkLinks() } return &SkipError{Reason: "not checking muticluster"} }, }, + /* Serivce mirror controller checks */ { - description: "service mirror controller Roles exist", - hintAnchor: "l5d-multicluster-role-exist", + description: "service mirror controller has required permissions", + hintAnchor: "l5d-multicluster-source-rbac-correct", check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkRoles(true, hc.serviceMirrorNs, []string{linkerdServiceMirrorRoleName}, hc.serviceMirrorComponentsSelector()) + if hc.Options.MultiCluster { + return hc.checkServiceMirrorLocalRBAC() } return &SkipError{Reason: "not checking muticluster"} }, }, { - description: "service mirror controller RoleBindings exist", - hintAnchor: "l5d-multicluster-role-binding-exist", + description: "service mirror controllers are running", + hintAnchor: "l5d-multicluster-service-mirror-running", + retryDeadline: hc.RetryDeadline, + surfaceErrorOnRetry: true, check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkRoleBindings(true, hc.serviceMirrorNs, []string{linkerdServiceMirrorRoleName}, hc.serviceMirrorComponentsSelector()) + if hc.Options.MultiCluster { + return hc.checkServiceMirrorController() } return &SkipError{Reason: "not checking muticluster"} }, }, + /* Target cluster access checks */ { - description: "service mirror controller ServiceAccounts exist", - hintAnchor: "l5d-multicluster-service-account-exist", + description: "remote cluster access credentials are valid", + hintAnchor: "l5d-smc-target-clusters-access", check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkServiceAccounts([]string{linkerdServiceMirrorComponentName}, hc.serviceMirrorNs, hc.serviceMirrorComponentsSelector()) + if hc.Options.MultiCluster { + return hc.checkRemoteClusterConnectivity() } return &SkipError{Reason: "not checking muticluster"} }, }, - { - description: "service mirror controller has required permissions", - hintAnchor: "l5d-multicluster-source-rbac-correct", - check: func(context.Context) error { - return hc.checkServiceMirrorLocalRBAC() - }, - }, - { - description: "service mirror controller can access target clusters", - hintAnchor: "l5d-smc-target-clusters-access", - check: func(context.Context) error { - return hc.checkRemoteClusterConnectivity() - }, - }, - { - description: "all target cluster gateways are alive", - hintAnchor: "l5d-multicluster-target-gateways-alive", - check: func(ctx context.Context) error { - return hc.checkRemoteClusterGatewaysHealth(ctx) - }, - }, { description: "clusters share trust anchors", hintAnchor: "l5d-multicluster-clusters-share-anchors", check: func(ctx context.Context) error { - return hc.checkRemoteClusterAnchors() + if hc.Options.MultiCluster { + return hc.checkRemoteClusterAnchors() + } + return &SkipError{Reason: "not checking muticluster"} }, }, + /* Gateway mirror checks */ { - description: "multicluster daisy chaining is avoided", - hintAnchor: "l5d-multicluster-daisy-chaining", - warning: true, + description: "all gateway mirrors are healthy", + hintAnchor: "l5d-multicluster-gateways-endpoints", check: func(ctx context.Context) error { - return hc.checkDaisyChains() + if hc.Options.MultiCluster { + return hc.checkIfGatewayMirrorsHaveEndpoints(ctx) + } + return &SkipError{Reason: "not checking muticluster"} }, }, + /* Mirror service checks */ { description: "all mirror services have endpoints", hintAnchor: "l5d-multicluster-services-endpoints", - warning: true, - check: func(ctx context.Context) error { - return hc.checkIfMirrorServicesHaveEndpoints() - }, - }, - { - description: "all gateway mirrors have endpoints", - hintAnchor: "l5d-multicluster-gateways-endpoints", - warning: true, - check: func(ctx context.Context) error { - return hc.checkIfGatewayMirrorsHaveEndpoints() - }, - }, - { - description: "remote: all referenced gateways are valid", - hintAnchor: "l5d-multicluster-gateways-exist", - warning: true, check: func(ctx context.Context) error { - return hc.checkRemoteGateways() + if hc.Options.MultiCluster { + return hc.checkIfMirrorServicesHaveEndpoints() + } + return &SkipError{Reason: "not checking muticluster"} }, }, - }, - }, - { - id: LinkerdMulticlusterTargetChecks, - checkers: []checker{ { - description: "all cluster gateways are valid", - hintAnchor: "l5d-multicluster-gateways-exist", + description: "all mirror services are part of a Link", + hintAnchor: "l5d-multicluster-orphaned-services", warning: true, check: func(ctx context.Context) error { - targetCluster, err := hc.isTargetCluster() - if err != nil { - return err + if hc.Options.MultiCluster { + return hc.checkForOrphanedServices() } - if targetCluster || hc.TargetCluster { - err := hc.checkLocalGateways() - if err != nil { - return err - } - return hc.checkIfGatewaysHaveEndpoints() - } - return &SkipError{Reason: "not checking target cluster"} + return &SkipError{Reason: "not checking muticluster"} }, }, }, @@ -211,148 +143,250 @@ func (hc *HealthChecker) multiClusterCategory() []category { } } -func (hc *HealthChecker) serviceMirrorComponentsSelector() string { - return fmt.Sprintf("%s=%s", k8s.ControllerComponentLabel, linkerdServiceMirrorComponentName) -} +/* Link checks */ -func (hc *HealthChecker) checkServiceMirrorController() error { - options := metav1.ListOptions{ - LabelSelector: hc.serviceMirrorComponentsSelector(), +func (hc *HealthChecker) checkLinkCRD() error { + err := k8s.LinkAccess(hc.kubeAPI.Interface) + if err == nil { + hc.Options.MultiCluster = true + return nil } - result, err := hc.kubeAPI.AppsV1().Deployments(corev1.NamespaceAll).List(options) + if !hc.Options.MultiCluster { + return &SkipError{Reason: "not checking muticluster"} + } + return fmt.Errorf("multicluster.linkerd.io/Link CRD is missing: %s", err) +} + +func (hc *HealthChecker) checkLinks() error { + links, err := multicluster.GetLinks(hc.kubeAPI.DynamicClient) if err != nil { return err } - - // if we have explicitly requested for multicluster to be checked, error out - if len(result.Items) == 0 && hc.Options.SourceCluster { - return errors.New("Service mirror controller is not present") + if len(links) == 0 { + return &SkipError{Reason: "no links detected"} } + linkNames := []string{} + for _, l := range links { + linkNames = append(linkNames, fmt.Sprintf("\t* %s", l.TargetClusterName)) + } + hc.links = links + return &VerboseSuccess{Message: strings.Join(linkNames, "\n")} +} - if len(result.Items) > 0 { - hc.Options.SourceCluster = true +/* Serivce mirror controller checks */ - if len(result.Items) > 1 { - var errors []error - for _, smc := range result.Items { - errors = append(errors, fmt.Errorf("%s/%s", smc.Namespace, smc.Name)) - } - return fmt.Errorf("There are more than one service mirror controllers:\n%s", joinErrors(errors, 1)) +func (hc *HealthChecker) checkServiceMirrorLocalRBAC() error { + links := []string{} + errors := []string{} + + for _, link := range hc.links { + + err := hc.checkServiceAccounts( + []string{fmt.Sprintf(linkerdServiceMirrorSerivceAccountName, link.TargetClusterName)}, + link.Namespace, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) } - controller := result.Items[0] - if controller.Status.AvailableReplicas < 1 { - return fmt.Errorf("Service mirror controller is not available: %s/%s", controller.Namespace, controller.Name) + err = hc.checkClusterRoles( + true, + []string{fmt.Sprintf(linkerdServiceMirrorClusterRoleName, link.TargetClusterName)}, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) } - hc.serviceMirrorNs = controller.Namespace - return nil - } - return &SkipError{Reason: "not checking muticluster"} -} + err = hc.checkClusterRoleBindings( + true, + []string{fmt.Sprintf(linkerdServiceMirrorClusterRoleName, link.TargetClusterName)}, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) + } -func comparePermissions(expected, actual []string) error { - sort.Strings(expected) - sort.Strings(actual) + err = hc.checkRoles( + true, + link.Namespace, + []string{fmt.Sprintf(linkerdServiceMirrorRoleName, link.TargetClusterName)}, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) + } - expectedStr := strings.Join(expected, ",") - actualStr := strings.Join(actual, ",") + err = hc.checkRoleBindings( + true, + link.Namespace, + []string{fmt.Sprintf(linkerdServiceMirrorRoleName, link.TargetClusterName)}, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) + } - if expectedStr != actualStr { - return fmt.Errorf("expected %s, got %s", expectedStr, actualStr) + links = append(links, fmt.Sprintf("\t* %s", link.TargetClusterName)) + } + if len(errors) > 0 { + return fmt.Errorf(strings.Join(errors, "\n")) } - return nil + if len(links) == 0 { + return &SkipError{Reason: "no links"} + } + + return &VerboseSuccess{Message: strings.Join(links, "\n")} } -func verifyRule(expected expectedPolicy, actual []v1.PolicyRule) error { - for _, rule := range actual { - if err := comparePermissions(expected.resources, rule.Resources); err == nil { - if err := comparePermissions(expected.verbs, rule.Verbs); err != nil { - return fmt.Errorf("unexpected verbs %s", err) - } - return nil +func (hc *HealthChecker) checkServiceMirrorController() error { + + errors := []error{} + clusterNames := []string{} + + for _, link := range hc.links { + options := metav1.ListOptions{ + LabelSelector: serviceMirrorComponentsSelector(link.TargetClusterName), + } + result, err := hc.kubeAPI.AppsV1().Deployments(corev1.NamespaceAll).List(options) + if err != nil { + return err + } + + if len(result.Items) > 1 { + errors = append(errors, fmt.Errorf("* too many service mirror controller deployments for Link %s", link.TargetClusterName)) + continue + } + if len(result.Items) == 0 { + errors = append(errors, fmt.Errorf("* no service mirror controller deployment for Link %s", link.TargetClusterName)) + continue } + + controller := result.Items[0] + if controller.Status.AvailableReplicas < 1 { + errors = append(errors, fmt.Errorf("* service mirror controller is not available: %s/%s", controller.Namespace, controller.Name)) + continue + } + clusterNames = append(clusterNames, fmt.Sprintf("\t* %s", link.TargetClusterName)) } - return fmt.Errorf("could not fine rule for %s", strings.Join(expected.resources, ",")) + if len(errors) > 0 { + return joinErrors(errors, 2) + } + + if len(clusterNames) == 0 { + return &SkipError{Reason: "no links"} + } + + return &VerboseSuccess{Message: strings.Join(clusterNames, "\n")} } -func (hc *HealthChecker) checkServiceMirrorLocalRBAC() error { - if hc.Options.SourceCluster { - var errors []string +/* Target cluster access checks */ - clusterRole, err := hc.kubeAPI.RbacV1().ClusterRoles().Get(linkerdServiceMirrorClusterRoleName, metav1.GetOptions{}) +func (hc *HealthChecker) checkRemoteClusterConnectivity() error { + errors := []error{} + links := []string{} + for _, link := range hc.links { + // Load the credentials secret + secret, err := hc.kubeAPI.Interface.CoreV1().Secrets(link.Namespace).Get(link.ClusterCredentialsSecret, metav1.GetOptions{}) if err != nil { - return fmt.Errorf("Could not obtain service mirror ClusterRole %s: %s", linkerdServiceMirrorClusterRoleName, err) + errors = append(errors, fmt.Errorf("* secret: [%s/%s]: %s", link.Namespace, link.ClusterCredentialsSecret, err)) + continue } - role, err := hc.kubeAPI.RbacV1().Roles(hc.serviceMirrorNs).Get(linkerdServiceMirrorRoleName, metav1.GetOptions{}) + config, err := servicemirror.ParseRemoteClusterSecret(secret) + if err != nil { - return fmt.Errorf("Could not obtain service mirror Role %s : %s", linkerdServiceMirrorRoleName, err) + errors = append(errors, fmt.Errorf("* secret: [%s/%s]: could not parse config secret: %s", secret.Namespace, secret.Name, err)) + continue } - if len(clusterRole.Rules) != len(expectedServiceMirrorClusterRolePolicies) { - return fmt.Errorf("Service mirror ClusterRole %s has %d policy rules, expected %d", clusterRole.Name, len(clusterRole.Rules), len(expectedServiceMirrorClusterRolePolicies)) + clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config.APIConfig) + if err != nil { + errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: unable to parse api config: %s", secret.Namespace, secret.Name, config.ClusterName, err)) + continue } - for _, rule := range expectedServiceMirrorClusterRolePolicies { - if err := verifyRule(rule, clusterRole.Rules); err != nil { - errors = append(errors, fmt.Sprintf("Service mirror ClusterRole %s: %s", clusterRole.Name, err)) - } + remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, requestTimeout) + if err != nil { + errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: could not instantiate api for target cluster: %s", secret.Namespace, secret.Name, config.ClusterName, err)) + continue } - if len(role.Rules) != len(expectedServiceMirrorRolePolicies) { - return fmt.Errorf("Service mirror Role %s has %d policy rules, expected %d", role.Name, len(role.Rules), len(expectedServiceMirrorRolePolicies)) + var verbs []string + if err := hc.checkCanPerformAction(remoteAPI, "get", corev1.NamespaceAll, "", "v1", "services"); err == nil { + verbs = append(verbs, "get") } - for _, rule := range expectedServiceMirrorRolePolicies { - if err := verifyRule(rule, role.Rules); err != nil { - errors = append(errors, fmt.Sprintf("Service mirror Role %s: %s", role.Name, err)) - } + if err := hc.checkCanPerformAction(remoteAPI, "list", corev1.NamespaceAll, "", "v1", "services"); err == nil { + verbs = append(verbs, "list") } - if len(errors) > 0 { - return fmt.Errorf(strings.Join(errors, "\n")) + if err := hc.checkCanPerformAction(remoteAPI, "watch", corev1.NamespaceAll, "", "v1", "services"); err == nil { + verbs = append(verbs, "watch") } - return nil + if err := comparePermissions(expectedServiceMirrorRemoteClusterPolicyVerbs, verbs); err != nil { + errors = append(errors, fmt.Errorf("* cluster: [%s]: Insufficient Service permissions: %s", config.ClusterName, err)) + } + + links = append(links, fmt.Sprintf("\t* %s", link.TargetClusterName)) } - return &SkipError{Reason: "not checking muticluster"} -} -func (hc *HealthChecker) checkRemoteClusterAnchors() error { - if len(hc.remoteClusterConfigs) == 0 { - return &SkipError{Reason: "no target cluster configs"} + if len(errors) > 0 { + return joinErrors(errors, 2) + } + + if len(links) == 0 { + return &SkipError{Reason: "no links"} } + return &VerboseSuccess{Message: strings.Join(links, "\n")} +} + +func (hc *HealthChecker) checkRemoteClusterAnchors() error { localAnchors, err := tls.DecodePEMCertificates(hc.linkerdConfig.Global.IdentityContext.TrustAnchorsPem) if err != nil { return fmt.Errorf("Cannot parse source trust anchors: %s", err) } + errors := []string{} + links := []string{} + for _, link := range hc.links { + // Load the credentials secret + secret, err := hc.kubeAPI.Interface.CoreV1().Secrets(link.Namespace).Get(link.ClusterCredentialsSecret, metav1.GetOptions{}) + if err != nil { + errors = append(errors, fmt.Sprintf("* secret: [%s/%s]: %s", link.Namespace, link.ClusterCredentialsSecret, err)) + continue + } - var offendingClusters []string - for _, cfg := range hc.remoteClusterConfigs { + config, err := servicemirror.ParseRemoteClusterSecret(secret) + if err != nil { + errors = append(errors, fmt.Sprintf("* secret: [%s/%s]: could not parse config secret: %s", secret.Namespace, secret.Name, err)) + continue + } - clientConfig, err := clientcmd.RESTConfigFromKubeConfig(cfg.APIConfig) + clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config.APIConfig) if err != nil { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s: unable to parse api config", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* secret: [%s/%s] cluster: [%s]: unable to parse api config: %s", secret.Namespace, secret.Name, config.ClusterName, err)) continue } remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, requestTimeout) if err != nil { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s: unable to instantiate api", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* secret: [%s/%s] cluster: [%s]: could not instantiate api for target cluster: %s", secret.Namespace, secret.Name, config.ClusterName, err)) continue } - _, cfMap, err := FetchLinkerdConfigMap(remoteAPI, cfg.LinkerdNamespace) + _, cfMap, err := FetchLinkerdConfigMap(remoteAPI, link.TargetClusterLinkerdNamespace) if err != nil { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s: unable to fetch anchors: %s", cfg.ClusterName, err)) + errors = append(errors, fmt.Sprintf("* %s: unable to fetch anchors: %s", link.TargetClusterName, err)) continue } + remoteAnchors, err := tls.DecodePEMCertificates(cfMap.Global.IdentityContext.TrustAnchorsPem) if err != nil { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s: cannot parse trust anchors", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* %s: cannot parse trust anchors", link.TargetClusterName)) continue } @@ -360,7 +394,7 @@ func (hc *HealthChecker) checkRemoteClusterAnchors() error { // same, we can only compare certs one way and be sure we have // identical anchors if len(remoteAnchors) != len(localAnchors) { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* %s", link.TargetClusterName)) continue } @@ -372,363 +406,165 @@ func (hc *HealthChecker) checkRemoteClusterAnchors() error { for _, remote := range remoteAnchors { local, ok := localAnchorsMap[string(remote.Signature)] if !ok || !local.Equal(remote) { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* %s", link.TargetClusterName)) break } } + links = append(links, fmt.Sprintf("\t* %s", link.TargetClusterName)) } - if len(offendingClusters) > 0 { - return fmt.Errorf("Problematic clusters:\n %s", strings.Join(offendingClusters, "\n ")) + if len(errors) > 0 { + return fmt.Errorf("Problematic clusters:\n %s", strings.Join(errors, "\n ")) } - return nil -} + if len(links) == 0 { + return &SkipError{Reason: "no links"} + } -func serviceExported(svc corev1.Service) bool { - _, hasGtwName := svc.Annotations[k8s.GatewayNameAnnotation] - _, hasGtwNs := svc.Annotations[k8s.GatewayNsAnnotation] - return hasGtwName && hasGtwNs + return &VerboseSuccess{Message: strings.Join(links, "\n")} } -func (hc *HealthChecker) checkDaisyChains() error { - if hc.Options.SourceCluster { - errs := []error{} +/* Gateway mirror checks */ - svcs, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - return err - } - for _, svc := range svcs.Items { - _, isMirror := svc.Labels[k8s.MirroredResourceLabel] - if isMirror && serviceExported(svc) { - errs = append(errs, fmt.Errorf("mirror service %s.%s is exported", svc.Name, svc.Namespace)) - } - } +func (hc *HealthChecker) checkIfGatewayMirrorsHaveEndpoints(ctx context.Context) error { + links := []string{} + errors := []error{} - ts, err := tsclient.NewForConfig(hc.kubeAPI.Config) + for _, link := range hc.links { + selector := metav1.ListOptions{LabelSelector: fmt.Sprintf("%s,%s=%s", k8s.MirroredGatewayLabel, k8s.RemoteClusterNameLabel, link.TargetClusterName)} + gatewayMirrors, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(selector) if err != nil { - return err - } - splits, err := ts.SplitV1alpha1().TrafficSplits(metav1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - return err - } - for _, split := range splits.Items { - apex, err := hc.kubeAPI.CoreV1().Services(split.Namespace).Get(split.Spec.Service, metav1.GetOptions{}) - if err != nil { - return err - } - if serviceExported(*apex) { - for _, backend := range split.Spec.Backends { - if backend.Weight.IsZero() { - continue - } - leaf, err := hc.kubeAPI.CoreV1().Services(split.Namespace).Get(backend.Service, metav1.GetOptions{}) - if err != nil { - return err - } - _, isMirror := leaf.Labels[k8s.MirroredResourceLabel] - if isMirror { - errs = append(errs, fmt.Errorf("exported service %s.%s routes to mirror service %s.%s via traffic split %s.%s", - apex.Name, apex.Namespace, leaf.Name, leaf.Namespace, split.Name, split.Namespace, - )) - } - } - } - } - if len(errs) > 0 { - messages := []string{} - for _, err := range errs { - messages = append(messages, fmt.Sprintf("* %s", err.Error())) - } - return errors.New(strings.Join(messages, "\n")) + errors = append(errors, err) + continue } - return nil - } - return &SkipError{Reason: "not checking muticluster"} -} - -func (hc *HealthChecker) checkLocalGateways() error { - - errs := checkGateways(hc.kubeAPI) - if len(errs) > 0 { - return joinErrors(errs, 1) - } - return nil -} -func (hc *HealthChecker) checkRemoteGateways() error { + if len(gatewayMirrors.Items) != 1 { + errors = append(errors, fmt.Errorf("wrong number (%d) of probe gateways for target cluster %s", len(gatewayMirrors.Items), link.TargetClusterName)) + continue + } - if len(hc.remoteClusterConfigs) == 0 { - return &SkipError{Reason: "no target cluster configs"} - } + svc := gatewayMirrors.Items[0] - var offendingClusters []error - for _, cfg := range hc.remoteClusterConfigs { - clientConfig, err := clientcmd.RESTConfigFromKubeConfig(cfg.APIConfig) - if err != nil { - offendingClusters = append(offendingClusters, fmt.Errorf("* %s: unable to parse api config", cfg.ClusterName)) + // Check if there is a relevant end-point + endpoints, err := hc.kubeAPI.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) + if err != nil || len(endpoints.Subsets) == 0 { + errors = append(errors, fmt.Errorf("%s.%s mirrored from cluster [%s] has no endpoints", svc.Name, svc.Namespace, svc.Labels[k8s.RemoteClusterNameLabel])) continue } - remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, requestTimeout) + // Check gateway liveness according to probes + req := public.GatewaysRequest{ + TimeWindow: "1m", + RemoteClusterName: link.TargetClusterName, + } + rsp, err := hc.apiClient.Gateways(ctx, &req) if err != nil { - offendingClusters = append(offendingClusters, fmt.Errorf("* %s: unable to instantiate api", cfg.ClusterName)) + errors = append(errors, fmt.Errorf("failed to fetch gateway metrics for %s.%s: %s", svc.Name, svc.Namespace, err)) continue } - - errs := checkGateways(remoteAPI) - if len(errs) > 0 { - offendingClusters = append(offendingClusters, fmt.Errorf("* %s: remote cluster has invalid gateways:\n%s", cfg.ClusterName, joinErrors(errs, 2).Error())) + table := rsp.GetOk().GetGatewaysTable() + if table == nil { + errors = append(errors, fmt.Errorf("failed to fetch gateway metrics for %s.%s: %s", svc.Name, svc.Namespace, rsp.GetError().GetError())) continue } - } - if len(offendingClusters) > 0 { - return joinErrors(offendingClusters, 1) - } - return nil -} - -func checkGateways(api *k8s.KubernetesAPI) []error { - errs := []error{} - services, err := api.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - return []error{err} - } - - for _, svc := range services.Items { - if serviceExported(svc) { - // Check if there is a relevant gateway - gatewayName := svc.Annotations[k8s.GatewayNameAnnotation] - gatewayNamespace := svc.Annotations[k8s.GatewayNsAnnotation] - gateway, err := api.CoreV1().Services(gatewayNamespace).Get(gatewayName, metav1.GetOptions{}) - if err != nil { - errs = append(errs, fmt.Errorf("Exported service %s.%s references a gateway that does not exist: %s.%s", svc.Name, svc.Namespace, gatewayName, gatewayNamespace)) - continue - } - - // check if there is an external IP for the gateway service - if len(gateway.Status.LoadBalancer.Ingress) <= 0 { - errs = append(errs, fmt.Errorf("Exported service %s.%s references a gateway with no external IP: %s.%s", svc.Name, svc.Namespace, gatewayName, gatewayNamespace)) - } - - // check if the gateway service has relevant ports - portNames := []string{k8s.GatewayPortName, k8s.ProbePortName} - for _, portName := range portNames { - if !ifPortExists(gateway.Spec.Ports, portName) { - errs = append(errs, fmt.Errorf("Exported service %s.%s references a gateway that is missing port %s: %s.%s", svc.Name, svc.Namespace, portName, gatewayName, gatewayNamespace)) - } - } - + if len(table.Rows) != 1 { + errors = append(errors, fmt.Errorf("wrong number of (%d) gateway metrics entries for %s.%s", len(table.Rows), svc.Name, svc.Namespace)) + continue } - } - return errs -} -func ifPortExists(ports []corev1.ServicePort, portName string) bool { - for _, port := range ports { - if port.Name == portName { - return true + row := table.Rows[0] + if !row.Alive { + errors = append(errors, fmt.Errorf("liveness checks failed for %s", link.TargetClusterName)) + continue } - } - return false -} - -func (hc *HealthChecker) isTargetCluster() (bool, error) { - services, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - return false, err + links = append(links, fmt.Sprintf("\t* %s", link.TargetClusterName)) + } + if len(errors) > 0 { + return joinErrors(errors, 1) } - for _, service := range services.Items { - if serviceExported(service) { - return true, nil - } + if len(links) == 0 { + return &SkipError{Reason: "no links"} } - return false, nil + return &VerboseSuccess{Message: strings.Join(links, "\n")} } -func (hc *HealthChecker) checkRemoteClusterConnectivity() error { - if hc.Options.SourceCluster { - options := metav1.ListOptions{ - FieldSelector: fmt.Sprintf("%s=%s", "type", k8s.MirrorSecretType), - } - secrets, err := hc.kubeAPI.CoreV1().Secrets(corev1.NamespaceAll).List(options) - if err != nil { - return err - } - - if len(secrets.Items) == 0 { - return &SkipError{Reason: "no target cluster configs"} - } +/* Mirror service checks */ - var errors []error - for _, s := range secrets.Items { - secret := s - config, err := sm.ParseRemoteClusterSecret(&secret) - if err != nil { - errors = append(errors, fmt.Errorf("* secret: [%s/%s]: could not parse config secret: %s", secret.Namespace, secret.Name, err)) - continue - } - - clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config.APIConfig) - if err != nil { - errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: unable to parse api config: %s", secret.Namespace, secret.Name, config.ClusterName, err)) - continue - } - - remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, requestTimeout) - if err != nil { - errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: could not instantiate api for target cluster: %s", secret.Namespace, secret.Name, config.ClusterName, err)) - continue - } - - var verbs []string - if err := hc.checkCanPerformAction(remoteAPI, "get", corev1.NamespaceAll, "", "v1", "services"); err == nil { - verbs = append(verbs, "get") - } - - if err := hc.checkCanPerformAction(remoteAPI, "list", corev1.NamespaceAll, "", "v1", "services"); err == nil { - verbs = append(verbs, "list") - } - - if err := hc.checkCanPerformAction(remoteAPI, "watch", corev1.NamespaceAll, "", "v1", "services"); err == nil { - verbs = append(verbs, "watch") - } - - if err := comparePermissions(expectedServiceMirrorRemoteClusterPolicyVerbs, verbs); err != nil { - errors = append(errors, fmt.Errorf("* cluster: [%s]: Insufficient Service permissions: %s", config.ClusterName, err)) - } - - hc.remoteClusterConfigs = append(hc.remoteClusterConfigs, config) - - } +func (hc *HealthChecker) checkIfMirrorServicesHaveEndpoints() error { - if len(errors) > 0 { - return joinErrors(errors, 2) - } - return nil + var servicesWithNoEndpoints []string + selector := fmt.Sprintf("%s, !%s", k8s.MirroredResourceLabel, k8s.MirroredGatewayLabel) + mirrorServices, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return err } - return &SkipError{Reason: "not checking muticluster"} -} - -func (hc *HealthChecker) checkRemoteClusterGatewaysHealth(ctx context.Context) error { - if hc.Options.SourceCluster { - if hc.apiClient == nil { - return errors.New("public api client uninitialized") - } - req := &pb.GatewaysRequest{ - TimeWindow: "1m", - } - rsp, err := hc.apiClient.Gateways(ctx, req) - if err != nil { - return err - } - var deadGateways []string - var aliveGateways []string - if len(rsp.GetOk().GatewaysTable.Rows) == 0 { - return &SkipError{Reason: "no target gateways"} - } - for _, gtw := range rsp.GetOk().GatewaysTable.Rows { - if gtw.Alive { - aliveGateways = append(aliveGateways, fmt.Sprintf(" * cluster: [%s], gateway: [%s/%s]", gtw.ClusterName, gtw.Namespace, gtw.Name)) - } else { - deadGateways = append(deadGateways, fmt.Sprintf("* cluster: [%s], gateway: [%s/%s]", gtw.ClusterName, gtw.Namespace, gtw.Name)) - } + for _, svc := range mirrorServices.Items { + // Check if there is a relevant end-point + endpoint, err := hc.kubeAPI.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) + if err != nil || len(endpoint.Subsets) == 0 { + servicesWithNoEndpoints = append(servicesWithNoEndpoints, fmt.Sprintf("%s.%s mirrored from cluster [%s] (gateway: [%s/%s])", svc.Name, svc.Namespace, svc.Labels[k8s.RemoteClusterNameLabel], svc.Labels[k8s.RemoteGatewayNsLabel], svc.Labels[k8s.RemoteGatewayNameLabel])) } - - if len(deadGateways) > 0 { - return fmt.Errorf("Some gateways are not alive:\n %s", strings.Join(deadGateways, "\n ")) - } - return &VerboseSuccess{Message: strings.Join(aliveGateways, "\n")} } - return &SkipError{Reason: "not checking muticluster"} -} - -func (hc *HealthChecker) checkIfMirrorServicesHaveEndpoints() error { - if hc.Options.SourceCluster { - - var servicesWithNoEndpoints []string - selector := fmt.Sprintf("%s, !%s", k8s.MirroredResourceLabel, k8s.MirroredGatewayLabel) - mirrorServices, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{LabelSelector: selector}) - if err != nil { - return err - } - for _, svc := range mirrorServices.Items { - // Check if there is a relevant end-point - endpoint, err := hc.kubeAPI.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) - if err != nil || len(endpoint.Subsets) == 0 { - servicesWithNoEndpoints = append(servicesWithNoEndpoints, fmt.Sprintf("%s.%s mirrored from cluster [%s] (gateway: [%s/%s])", svc.Name, svc.Namespace, svc.Labels[k8s.RemoteClusterNameLabel], svc.Labels[k8s.RemoteGatewayNsLabel], svc.Labels[k8s.RemoteGatewayNameLabel])) - } - } - - if len(servicesWithNoEndpoints) > 0 { - return fmt.Errorf("Some mirror services do not have endpoints:\n %s", strings.Join(servicesWithNoEndpoints, "\n ")) - } - return nil + if len(servicesWithNoEndpoints) > 0 { + return fmt.Errorf("Some mirror services do not have endpoints:\n %s", strings.Join(servicesWithNoEndpoints, "\n ")) } - return &SkipError{Reason: "not checking muticluster"} -} -func (hc *HealthChecker) checkIfGatewayMirrorsHaveEndpoints() error { - if hc.Options.SourceCluster { + if len(mirrorServices.Items) == 0 { + return &SkipError{Reason: "no mirror services"} + } - var gatewayMirrorsWithNoEndpoints []string - gatewayServices, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{LabelSelector: k8s.MirroredGatewayLabel}) - if err != nil { - return err - } + return nil +} - for _, svc := range gatewayServices.Items { - // Check if there is a relevant end-point - endpoints, err := hc.kubeAPI.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) - if err != nil || len(endpoints.Subsets) == 0 { - gatewayMirrorsWithNoEndpoints = append(gatewayMirrorsWithNoEndpoints, fmt.Sprintf("%s.%s mirrored from cluster [%s]", svc.Name, svc.Namespace, svc.Labels[k8s.RemoteClusterNameLabel])) - } - } +func (hc *HealthChecker) checkForOrphanedServices() error { + errors := []error{} - if len(gatewayMirrorsWithNoEndpoints) > 0 { - return fmt.Errorf("Some gateway mirrors do not have endpoints:\n %s", strings.Join(gatewayMirrorsWithNoEndpoints, "\n ")) - } - return nil + selector := fmt.Sprintf("%s, !%s", k8s.MirroredResourceLabel, k8s.MirroredGatewayLabel) + mirrorServices, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return err } - return &SkipError{Reason: "not checking muticluster"} -} - -func (hc *HealthChecker) checkIfGatewaysHaveEndpoints() error { - var gatewaysWithNoEndpoints []string - services, err := hc.kubeAPI.CoreV1().Services(corev1.NamespaceAll).List(metav1.ListOptions{}) + links, err := multicluster.GetLinks(hc.kubeAPI.DynamicClient) if err != nil { return err } - for _, service := range services.Items { - if gatewayService(service) { - // Check if there is a relevant end-point - endpoints, err := hc.kubeAPI.CoreV1().Endpoints(service.Namespace).Get(service.Name, metav1.GetOptions{}) - if err != nil || len(endpoints.Subsets) == 0 { - gatewaysWithNoEndpoints = append(gatewaysWithNoEndpoints, fmt.Sprintf("%s.%s", service.Name, service.Namespace)) + for _, svc := range mirrorServices.Items { + targetCluster := svc.Labels[k8s.RemoteClusterNameLabel] + hasLink := false + for _, link := range links { + if link.TargetClusterName == targetCluster { + hasLink = true + break } } + if !hasLink { + errors = append(errors, fmt.Errorf("mirror service %s.%s is not part of any Link", svc.Name, svc.Namespace)) + } + } + if len(mirrorServices.Items) == 0 { + return &SkipError{Reason: "no mirror services"} } - if len(gatewaysWithNoEndpoints) > 0 { - return fmt.Errorf("Some gateway services do not have endpoints:\n %s", strings.Join(gatewaysWithNoEndpoints, "\n ")) + if len(errors) > 0 { + return joinErrors(errors, 1) } return nil - } -func gatewayService(svc corev1.Service) bool { - _, isGtw := svc.Annotations[k8s.MulticlusterGatewayAnnotation] - return isGtw +/* util */ + +func serviceMirrorComponentsSelector(targetCluster string) string { + return fmt.Sprintf("%s=%s,%s=%s", + k8s.ControllerComponentLabel, linkerdServiceMirrorComponentName, + k8s.RemoteClusterNameLabel, targetCluster) } func joinErrors(errs []error, tabDepth int) error { @@ -739,3 +575,17 @@ func joinErrors(errs []error, tabDepth int) error { } return errors.New(strings.Join(errStrings, "\n")) } + +func comparePermissions(expected, actual []string) error { + sort.Strings(expected) + sort.Strings(actual) + + expectedStr := strings.Join(expected, ",") + actualStr := strings.Join(actual, ",") + + if expectedStr != actualStr { + return fmt.Errorf("expected %s, got %s", expectedStr, actualStr) + } + + return nil +} diff --git a/pkg/k8s/authz.go b/pkg/k8s/authz.go index f8f7cf4d76bdb..09eeb63426f47 100644 --- a/pkg/k8s/authz.go +++ b/pkg/k8s/authz.go @@ -140,6 +140,25 @@ func checkEndpointSlicesExist(k8sClient kubernetes.Interface) error { return errors.New("no EndpointSlice resources exist in the cluster") } +// LinkAccess checks whether the Link CRD is installed on the cluster and the +// client is authorized to access Links. +func LinkAccess(k8sClient kubernetes.Interface) error { + res, err := k8sClient.Discovery().ServerResourcesForGroupVersion(LinkAPIGroupVersion) + if err != nil { + return err + } + + if res.GroupVersion == LinkAPIGroupVersion { + for _, apiRes := range res.APIResources { + if apiRes.Kind == LinkKind { + return ResourceAuthz(k8sClient, "", "list", LinkAPIGroup, LinkAPIVersion, "links", "") + } + } + } + + return errors.New("Link CRD not found") +} + // ClusterAccess verifies whether k8sClient is authorized to access all pods in // all namespaces in the cluster. func ClusterAccess(k8sClient kubernetes.Interface) error { diff --git a/pkg/k8s/k8s.go b/pkg/k8s/k8s.go index 892a9e888f03e..e480d3375b8c1 100644 --- a/pkg/k8s/k8s.go +++ b/pkg/k8s/k8s.go @@ -28,6 +28,11 @@ const ( ServiceProfileAPIVersion = "linkerd.io/v1alpha2" ServiceProfileKind = "ServiceProfile" + LinkAPIGroup = "multicluster.linkerd.io" + LinkAPIVersion = "v1alpha1" + LinkAPIGroupVersion = "multicluster.linkerd.io/v1alpha1" + LinkKind = "Link" + // special case k8s job label, to not conflict with Prometheus' job label l5dJob = "k8s_job" ) diff --git a/pkg/multicluster/link.go b/pkg/multicluster/link.go new file mode 100644 index 0000000000000..2503301a23880 --- /dev/null +++ b/pkg/multicluster/link.go @@ -0,0 +1,273 @@ +package multicluster + +import ( + "errors" + "fmt" + "strconv" + "strings" + "time" + + "github.com/linkerd/linkerd2/pkg/k8s" + consts "github.com/linkerd/linkerd2/pkg/k8s" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" +) + +type ( + // ProbeSpec defines how a gateway should be queried for health. Once per + // period, the probe workers will send an HTTP request to the remote gateway + // on the given port with the given path and expect a HTTP 200 response. + ProbeSpec struct { + Path string + Port uint32 + Period time.Duration + } + + // Link is an internal representation of the link.multicluster.linkerd.io + // custom resource. It defines a multicluster link to a gateway in a + // target cluster and is configures the behavior of a service mirror + // controller. + Link struct { + Name string + Namespace string + TargetClusterName string + TargetClusterDomain string + TargetClusterLinkerdNamespace string + ClusterCredentialsSecret string + GatewayAddress string + GatewayPort uint32 + GatewayIdentity string + ProbeSpec ProbeSpec + } +) + +// LinkGVR is the Group Version and Resource of the Link custom resource. +var LinkGVR = schema.GroupVersionResource{ + Group: k8s.LinkAPIGroup, + Version: k8s.LinkAPIVersion, + Resource: "links", +} + +func (ps ProbeSpec) String() string { + return fmt.Sprintf("ProbeSpec: {path: %s, port: %d, period: %s}", ps.Path, ps.Port, ps.Period) +} + +// NewLink parses an unstructured link.multicluster.linkerd.io resource and +// converts it to a structured internal representation. +func NewLink(u unstructured.Unstructured) (Link, error) { + + spec, ok := u.Object["spec"] + if !ok { + return Link{}, errors.New("Field 'spec' is missing") + } + specObj, ok := spec.(map[string]interface{}) + if !ok { + return Link{}, errors.New("Field 'spec' is not an object") + } + + ps, ok := specObj["probeSpec"] + if !ok { + return Link{}, errors.New("Field 'probeSpec' is missing") + } + psObj, ok := ps.(map[string]interface{}) + if !ok { + return Link{}, errors.New("Field 'probeSpec' it not an object") + } + + probeSpec, err := newProbeSpec(psObj) + if err != nil { + return Link{}, err + } + + targetClusterName, err := stringField(specObj, "targetClusterName") + if err != nil { + return Link{}, err + } + + targetClusterDomain, err := stringField(specObj, "targetClusterDomain") + if err != nil { + return Link{}, err + } + + targetClusterLinkerdNamespace, err := stringField(specObj, "targetClusterLinkerdNamespace") + if err != nil { + return Link{}, err + } + + clusterCredentialsSecret, err := stringField(specObj, "clusterCredentialsSecret") + if err != nil { + return Link{}, err + } + + gatewayAddress, err := stringField(specObj, "gatewayAddress") + if err != nil { + return Link{}, err + } + + portStr, err := stringField(specObj, "gatewayPort") + if err != nil { + return Link{}, err + } + gatewayPort, err := strconv.ParseUint(portStr, 10, 32) + if err != nil { + return Link{}, err + } + + gatewayIdentity, err := stringField(specObj, "gatewayIdentity") + if err != nil { + return Link{}, err + } + + return Link{ + Name: u.GetName(), + Namespace: u.GetNamespace(), + TargetClusterName: targetClusterName, + TargetClusterDomain: targetClusterDomain, + TargetClusterLinkerdNamespace: targetClusterLinkerdNamespace, + ClusterCredentialsSecret: clusterCredentialsSecret, + GatewayAddress: gatewayAddress, + GatewayPort: uint32(gatewayPort), + GatewayIdentity: gatewayIdentity, + ProbeSpec: probeSpec, + }, nil +} + +// ToUnstructured converts a Link struct into an unstructured resource that can +// be used by a kubernetes dynamic client. +func (l Link) ToUnstructured() unstructured.Unstructured { + return unstructured.Unstructured{ + + Object: map[string]interface{}{ + "apiVersion": k8s.LinkAPIGroupVersion, + "kind": k8s.LinkKind, + "metadata": map[string]interface{}{ + "name": l.Name, + "namespace": l.Namespace, + }, + "spec": map[string]interface{}{ + "targetClusterName": l.TargetClusterName, + "targetClusterDomain": l.TargetClusterDomain, + "targetClusterLinkerdNamespace": l.TargetClusterLinkerdNamespace, + "clusterCredentialsSecret": l.ClusterCredentialsSecret, + "gatewayAddress": l.GatewayAddress, + "gatewayPort": fmt.Sprintf("%d", l.GatewayPort), + "gatewayIdentity": l.GatewayIdentity, + "probeSpec": map[string]interface{}{ + "path": l.ProbeSpec.Path, + "port": fmt.Sprintf("%d", l.ProbeSpec.Port), + "period": l.ProbeSpec.Period.String(), + }, + }, + }, + } +} + +// ExtractProbeSpec parses the ProbSpec from a gateway service's annotations. +func ExtractProbeSpec(gateway *corev1.Service) (ProbeSpec, error) { + path := gateway.Annotations[consts.GatewayProbePath] + if path == "" { + return ProbeSpec{}, errors.New("probe path is empty") + } + + port, err := extractPort(gateway.Spec.Ports, consts.ProbePortName) + if err != nil { + return ProbeSpec{}, err + } + + period, err := strconv.ParseUint(gateway.Annotations[consts.GatewayProbePeriod], 10, 32) + if err != nil { + return ProbeSpec{}, err + } + + return ProbeSpec{ + Path: path, + Port: port, + Period: time.Duration(period) * time.Second, + }, nil +} + +// GetLinks fetchs a list of all Link objects in the cluster. +func GetLinks(client dynamic.Interface) ([]Link, error) { + list, err := client.Resource(LinkGVR).List(metav1.ListOptions{}) + if err != nil { + return nil, err + } + links := []Link{} + errs := []string{} + for _, u := range list.Items { + link, err := NewLink(u) + if err != nil { + errs = append(errs, fmt.Sprintf("failed to parse Link %s: %s", u.GetName(), err)) + } else { + links = append(links, link) + } + } + if len(errs) > 0 { + return nil, errors.New(strings.Join(errs, "\n")) + } + return links, nil +} + +// GetLink fetches a Link object from Kubernetes by name/namespace. +func GetLink(client dynamic.Interface, namespace, name string) (Link, error) { + unstructured, err := client.Resource(LinkGVR).Namespace(namespace).Get(name, metav1.GetOptions{}) + if err != nil { + return Link{}, err + } + return NewLink(*unstructured) +} + +func extractPort(port []corev1.ServicePort, portName string) (uint32, error) { + for _, p := range port { + if p.Name == portName { + return uint32(p.Port), nil + } + } + return 0, fmt.Errorf("could not find port with name %s", portName) +} + +func newProbeSpec(obj map[string]interface{}) (ProbeSpec, error) { + periodStr, err := stringField(obj, "period") + if err != nil { + return ProbeSpec{}, err + } + period, err := time.ParseDuration(periodStr) + if err != nil { + return ProbeSpec{}, err + } + + path, err := stringField(obj, "path") + if err != nil { + return ProbeSpec{}, err + } + + portStr, err := stringField(obj, "port") + if err != nil { + return ProbeSpec{}, err + } + port, err := strconv.ParseUint(portStr, 10, 32) + if err != nil { + return ProbeSpec{}, err + } + + return ProbeSpec{ + Path: path, + Port: uint32(port), + Period: period, + }, nil +} + +func stringField(obj map[string]interface{}, key string) (string, error) { + value, ok := obj[key] + if !ok { + return "", fmt.Errorf("Field '%s' is missing", key) + } + str, ok := value.(string) + if !ok { + return "", fmt.Errorf("Field '%s' is not a string", key) + } + return str, nil +} diff --git a/test/integration/install_test.go b/test/integration/install_test.go index d36e10dc51eca..b2b8d2506db21 100644 --- a/test/integration/install_test.go +++ b/test/integration/install_test.go @@ -863,7 +863,6 @@ func TestUninstallMulticluster(t *testing.T) { } else { exec := append([]string{"multicluster"}, []string{ "install", - "--log-level", "debug", "--namespace", TestHelper.GetMulticlusterNamespace(), }...) out, stderr, err := TestHelper.LinkerdRun(exec...) diff --git a/test/integration/testdata/check.multicluster.golden b/test/integration/testdata/check.multicluster.golden index 30dc2dfdb69f0..2d02d8d1260a7 100644 --- a/test/integration/testdata/check.multicluster.golden +++ b/test/integration/testdata/check.multicluster.golden @@ -68,17 +68,8 @@ linkerd-grafana √ grafana add-on config map exists √ grafana pod is running -linkerd-multicluster-source ---------------------------- -√ service mirror controller is running -√ service mirror controller ClusterRoles exist -√ service mirror controller ClusterRoleBindings exist -√ service mirror controller Roles exist -√ service mirror controller RoleBindings exist -√ service mirror controller ServiceAccounts exist -√ service mirror controller has required permissions -√ multicluster daisy chaining is avoided -√ all mirror services have endpoints -√ all gateway mirrors have endpoints +linkerd-multicluster +-------------------- +√ Link CRD exists Status check results are √ diff --git a/test/integration/testdata/check.multicluster.proxy.golden b/test/integration/testdata/check.multicluster.proxy.golden index 4e93d3031a467..5849a56d319d8 100644 --- a/test/integration/testdata/check.multicluster.proxy.golden +++ b/test/integration/testdata/check.multicluster.proxy.golden @@ -75,17 +75,8 @@ linkerd-grafana √ grafana add-on config map exists √ grafana pod is running -linkerd-multicluster-source ---------------------------- -√ service mirror controller is running -√ service mirror controller ClusterRoles exist -√ service mirror controller ClusterRoleBindings exist -√ service mirror controller Roles exist -√ service mirror controller RoleBindings exist -√ service mirror controller ServiceAccounts exist -√ service mirror controller has required permissions -√ multicluster daisy chaining is avoided -√ all mirror services have endpoints -√ all gateway mirrors have endpoints +linkerd-multicluster +-------------------- +√ Link CRD exists Status check results are √ diff --git a/testutil/test_helper.go b/testutil/test_helper.go index 4c9501b76eb3c..3c9356947ba3b 100644 --- a/testutil/test_helper.go +++ b/testutil/test_helper.go @@ -113,8 +113,7 @@ func NewGenericTestHelper( // MulticlusterDeployReplicas is a map containing the number of replicas for each Deployment and the main // container name for multicluster components var MulticlusterDeployReplicas = map[string]DeploySpec{ - "linkerd-gateway": {1, []string{"nginx"}}, - "linkerd-service-mirror": {1, []string{"service-mirror"}}, + "linkerd-gateway": {1, []string{"nginx"}}, } // NewTestHelper creates a new instance of TestHelper for the current test run.