Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding an allocator service that acts as a reverse proxy. #768

Merged
merged 1 commit into from
May 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions build/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ build_tag = agones-build:$(build_version)
controller_tag = $(REGISTRY)/agones-controller:$(VERSION)
sidecar_tag = $(REGISTRY)/agones-sdk:$(VERSION)
ping_tag = $(REGISTRY)/agones-ping:$(VERSION)
allocator_tag = $(REGISTRY)/agones-allocator:$(VERSION)

gomod_on = GO111MODULE=on

Expand Down Expand Up @@ -197,7 +198,7 @@ include ./includes/sdk.mk
build: build-images build-sdks

# build the docker images
build-images: build-controller-image build-agones-sdk-image build-ping-image
build-images: build-controller-image build-agones-sdk-image build-ping-image build-allocator-image

# package the current agones helm chart
build-chart: RELEASE_VERSION ?= $(base_version)
Expand Down Expand Up @@ -262,12 +263,13 @@ test-install-yaml:
diff /tmp/agones-install/install.yaml.sorted /tmp/agones-install/install.current.yaml.sorted

# Push all the images up to $(REGISTRY)
push: push-controller-image push-agones-sdk-image push-ping-image
push: push-controller-image push-agones-sdk-image push-ping-image push-allocator-image

# Installs the current development version of Agones into the Kubernetes cluster
install: ALWAYS_PULL_SIDECAR := true
install: IMAGE_PULL_POLICY := "Always"
install: PING_SERVICE_TYPE := "LoadBalancer"
install: ALLOCATOR_SERVICE_TYPE := "LoadBalancer"
install: CRD_CLEANUP := true
install: $(ensure-build-image) install-custom-pull-secret
$(DOCKER_RUN) \
Expand All @@ -276,6 +278,7 @@ install: $(ensure-build-image) install-custom-pull-secret
--set agones.image.controller.pullPolicy=$(IMAGE_PULL_POLICY),agones.image.sdk.alwaysPull=$(ALWAYS_PULL_SIDECAR) \
--set agones.image.controller.pullSecret=$(IMAGE_PULL_SECRET) \
--set agones.ping.http.serviceType=$(PING_SERVICE_TYPE),agones.ping.udp.serviceType=$(PING_SERVICE_TYPE) \
--set agones.allocator.http.serviceType=$(ALLOCATOR_SERVICE_TYPE) \
--set agones.crds.cleanupOnDelete=$(CRD_CLEANUP) \
agones $(mount_path)/install/helm/agones/

Expand Down Expand Up @@ -351,6 +354,20 @@ push-ping-image: $(ensure-build-image)
build-ping-image: $(ensure-build-image) build-ping-binary build-licenses build-required-src-dist
docker build $(agones_path)/cmd/ping/ --tag=$(ping_tag) $(DOCKER_BUILD_ARGS)

# Build a static binary for the allocator service
build-allocator-binary: $(ensure-build-image)
$(GO_BUILD_LINUX_AMD64) \
-tags $(GO_BUILD_TAGS) -o $(go_build_base_path)/cmd/allocator/bin/allocator \
$(go_rebuild_flags) $(go_version_flags) -installsuffix cgo $(agones_package)/cmd/allocator

# Pushes up the allocator image
push-allocator-image: $(ensure-build-image)
docker push $(allocator_tag)

# Build the image for the allocator service
build-allocator-image: $(ensure-build-image) build-allocator-binary build-licenses build-required-src-dist
docker build $(agones_path)/cmd/allocator/ --tag=$(allocator_tag) $(DOCKER_BUILD_ARGS)

# push the gameservers sidecar image
push-agones-sdk-image: $(ensure-build-image)
docker push $(sidecar_tag)
Expand All @@ -360,6 +377,7 @@ gen-install: $(ensure-build-image)
docker run --rm $(common_mounts) $(DOCKER_RUN_ARGS) $(build_tag) bash -c \
'helm template --name=agones-manual --namespace agones-system $(mount_path)/install/helm/agones \
--set agones.controller.generateTLS=false \
--set agones.allocator.generateTLS=false \
--set agones.crds.cleanupOnDelete=false \
> $(mount_path)/install/yaml/install.yaml'

Expand Down
2 changes: 1 addition & 1 deletion build/build-required-src-dist.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ tar -zcf ${TMP_DEPS_SRC} -C ${SRC_ROOT}/vendor/ \
github.com/hashicorp/golang-lru \
github.com/hashicorp/hcl

for ddir in ${SRC_ROOT}/cmd/controller/bin/ ${SRC_ROOT}/cmd/ping/bin/ ${SRC_ROOT}/cmd/sdk-server/bin/ ; do
for ddir in ${SRC_ROOT}/cmd/controller/bin/ ${SRC_ROOT}/cmd/ping/bin/ ${SRC_ROOT}/cmd/sdk-server/bin/ ${SRC_ROOT}/cmd/allocator/bin/ ; do
mkdir -p ${ddir}
cp ${TMP_DEPS_SRC} ${ddir}
done
Expand Down
2 changes: 1 addition & 1 deletion build/extract-licenses.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ while read -r entry; do
append_license ${LIBRARY} ${entry}
done <<< "$(find vendor/ -regextype posix-extended -iregex '.*LICENSE(\.txt)?')"

for ddir in ${SRC_ROOT}/cmd/controller/bin/ ${SRC_ROOT}/cmd/ping/bin/ ${SRC_ROOT}/cmd/sdk-server/bin/ ; do
for ddir in ${SRC_ROOT}/cmd/controller/bin/ ${SRC_ROOT}/cmd/ping/bin/ ${SRC_ROOT}/cmd/sdk-server/bin/ ${SRC_ROOT}/cmd/allocator/bin/ ; do
mkdir -p ${ddir}
cp ${TMP_LICENSES} ${ddir}
done
Expand Down
7 changes: 7 additions & 0 deletions build/helm.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ variable "image_pull_secret" {
variable "ping_service_type" {
default = "LoadBalancer"
}
variable "allocator_service_type" {
default = "LoadBalancer"
}

variable "values_file" {
default = "../install/helm/agones/values.yaml"
Expand Down Expand Up @@ -177,6 +180,10 @@ resource "helm_release" "agones" {
name = "agones.ping.udp.serviceType"
value = "${var.ping_service_type}"
}
set {
name = " agones.allocator.http.serviceType"
value = "${var.allocator_service_type}"
}
version = "${var.agones_version}"
namespace = "agones-system"
}
Expand Down
24 changes: 24 additions & 0 deletions cmd/allocator/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright 2019 Google LLC All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM alpine:3.8

RUN apk --update add ca-certificates && \
adduser -D agones

COPY --chown=agones:root ./bin/allocator /home/agones/allocator
COPY --chown=agones:root ./bin/LICENSES ./bin/dependencies-src.tgz /home/agones/

USER agones
ENTRYPOINT ["/home/agones/allocator"]
169 changes: 169 additions & 0 deletions cmd/allocator/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Copyright 2019 Google LLC All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main

import (
"crypto/tls"
"crypto/x509"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"strings"

allocationv1alpha1 "agones.dev/agones/pkg/apis/allocation/v1alpha1"
"agones.dev/agones/pkg/client/clientset/versioned"
"agones.dev/agones/pkg/util/runtime"
k8serror "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/rest"
)

var (
logger = runtime.NewLoggerWithSource("main")
)

const (
certDir = "/home/allocator/client-ca/"
tlsDir = "/home/allocator/tls/"
port = "8443"
)

// A handler for the web server
type handler func(w http.ResponseWriter, r *http.Request)

func main() {
agonesClient, err := getAgonesClient()
if err != nil {
logger.WithError(err).Fatal("could not create agones client")
}

h := httpHandler{
markmandel marked this conversation as resolved.
Show resolved Hide resolved
agonesClient: agonesClient,
namespace: os.Getenv("NAMESPACE"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason to tie this to a specific namespace? We could grab the namespace from the GameServerAllocation.ObjectMeta on deserialisation -- might be more flexible?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am introducing gRPC interface and that will not support k8s APIs including ObjectMeta. The idea is allocator service that is deployed to a namespace is responsible for serving allocation in that namespace. Then MatchMaking does not need to be aware of internal structure of k8s and namespaces and only calls the allocator's endpoint.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But if a user wants to run GameServers in more than one namespace, then they have to run two different services - which brings us back to the same place - the matchmaker being namespace aware (and we have to run twice the infrastructure, and potentially redeploy if we want to be dynamic about the namespaces being used).

Seems simpler to me to allow the endpoint to take a namespace as an agreement? (Maybe the "default" namespace if the default?)

Wdyt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are two allocator services deployed to k8s in different namespaces, matchmaker only needs to know the endpoints. However, if we want to have one allocator service deployed to one cluster and handle allocation requests for two namespaces for potentially two different purposes, then there will not be enough isolation between traffics for the two namespaces. IOW QPS for one namespace may impact allocator service performance for another namespace. So there is a trade off.

I would prefer not to expose namespace unless we for sure know that it is needed. Because adding additional fields are easy to do but removing them in future is hard as it will be a breaking change. WDYT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IOW QPS for one namespace may impact allocator service performance for another namespace. So there is a trade off.

Everything comes back to the k8s api anyway, so we'll always have that bottleneck?

I would prefer not to expose namespace unless we for sure know that it is needed. Because adding additional fields are easy to do but removing them in future is hard as it will be a breaking change. WDYT?

I can't disagree with that statement 👍

The only other few devil's advocate statement I can make, is that I think that this makes things for the end user a tad more complicated. Up until this point, everything is installed in the agones-system namespace - now we have Agones system components bleeding into other areas of Kubernetes, whereas before they were pretty tightly contained in the agones-system namespace.

The other thing is - we're saying we're a reverse proxy for this CRD, but we are changing the expected behaviour of that CRD with the reverse proxy. So it might be a bit confusing for users.

But given your excellent point above - I think we'll be okay to have the namespace defined in the env var -- and see how users like it. Much easier to add later 👍

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IOW QPS for one namespace may impact allocator service performance for another namespace. So there is a trade off.

Everything comes back to the k8s api anyway, so we'll always have that bottleneck?

Good point. I think you are right.

I would prefer not to expose namespace unless we for sure know that it is needed. Because adding additional fields are easy to do but removing them in future is hard as it will be a breaking change. WDYT?

I can't disagree with that statement

The only other few devil's advocate statement I can make, is that I think that this makes things for the end user a tad more complicated. Up until this point, everything is installed in the agones-system namespace - now we have Agones system components bleeding into other areas of Kubernetes, whereas before they were pretty tightly contained in the agones-system namespace.

The other thing is - we're saying we're a reverse proxy for this CRD, but we are changing the expected behaviour of that CRD with the reverse proxy. So it might be a bit confusing for users.

Can you please explain more? How are we changing the expected behavior?

But given your excellent point above - I think we'll be okay to have the namespace defined in the env var -- and see how users like it. Much easier to add later

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please explain more? How are we changing the expected behavior?

Sure - basically, through the k8s api I can pass through what namespace I'm working with as part of the URL path - so I can access whatever namespace I want. That being said, in this instance, we aren't doing that - so it's probably not that huge a deal.

The only potentially confusing thing I see is if a user sets the namespace in ObjectMeta.Namespace and it doesn't translate through to the applied namespace in the service.

But I don't see either of these things as blocking issues. As you say above, we can add this functionality later if we need it.

(Also, if someone wants to work on a new namespace, we have to provision a service account and rbac rules anyway, so it's not like you can dynamically add/remove namespace suport that quickly)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the gRPC interface that I will introduce next, the namespace is not exposed. So ObjectMeta.Namespace will not be relevant.

}

// TODO: add liveness probe
http.HandleFunc("/v1alpha1/gameserverallocation", h.postOnly(h.allocateHandler))

caCertPool, err := getCACertPool(certDir)
if err != nil {
logger.WithError(err).Fatal("could not get CA certs")
}

cfg := &tls.Config{
ClientAuth: tls.RequireAndVerifyClientCert,
ClientCAs: caCertPool,
}
srv := &http.Server{
Addr: ":" + port,
TLSConfig: cfg,
}

err = srv.ListenAndServeTLS(tlsDir+"tls.crt", tlsDir+"tls.key")
logger.WithError(err).Fatal("allocation service crashed")
}

// Set up our client which we will use to call the API
func getAgonesClient() (*versioned.Clientset, error) {
// Create the in-cluster config
config, err := rest.InClusterConfig()
if err != nil {
return nil, errors.New("Could not create in cluster config")
}

// Access to the Agones resources through the Agones Clientset
agonesClient, err := versioned.NewForConfig(config)
if err != nil {
return nil, errors.New("Could not create the agones api clientset")
}

return agonesClient, nil
}

func getCACertPool(path string) (*x509.CertPool, error) {
// Add all certificates under client-certs path because there could be multiple clusters
// and all client certs should be added.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally this should be one CA certificate regardless of # of clusters, clients will use leaf certificates signed by this CA, so you technically only need one PEM file with all CA certificate bundle, but not all the client certs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IOW - we need make sure we have the certificates example set up correctly, because people will be cargo-culting this a lot.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This acts as a revocation list for certificates as well. If a secret is compromised, its CA should be revoked without impacting other clients calling allocation service. Having this pattern is valuable for match making and not for cluster to cluster calls. If a cluster secret is compromised, then all secrets to talk to another cluster is compromised. However, matchmaker is an independent entity and this solution helps securing system from matchmakers bad actors.

caCertPool := x509.NewCertPool()
filesInfo, err := ioutil.ReadDir(path)
if err != nil {
return nil, fmt.Errorf("error reading certs from dir %s: %s", path, err.Error())
}

for _, file := range filesInfo {
if strings.HasSuffix(file.Name(), ".crt") || strings.HasSuffix(file.Name(), ".pem") {
certFile := filepath.Join(path, file.Name())
caCert, err := ioutil.ReadFile(certFile)
if err != nil {
return nil, fmt.Errorf("ca cert is not readable or missing: %s", err.Error())
}
if !caCertPool.AppendCertsFromPEM(caCert) {
return nil, fmt.Errorf("client cert %s cannot be installed", certFile)
}
logger.Infof("client cert %s is installed", certFile)
}
}

return caCertPool, nil
}

// Limit verbs the web server handles
func (h *httpHandler) postOnly(in handler) handler {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method == "POST" {
in(w, r)
return
}
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
}
}

type httpHandler struct {
agonesClient versioned.Interface
namespace string
}

func (h *httpHandler) allocateHandler(w http.ResponseWriter, r *http.Request) {
markmandel marked this conversation as resolved.
Show resolved Hide resolved
gsa := allocationv1alpha1.GameServerAllocation{}
if err := json.NewDecoder(r.Body).Decode(&gsa); err != nil {
http.Error(w, "invalid request", http.StatusBadRequest)
return
}

allocation := h.agonesClient.AllocationV1alpha1().GameServerAllocations(h.namespace)
allocatedGsa, err := allocation.Create(&gsa)
if err != nil {
http.Error(w, err.Error(), httpCode(err))
pooneh-m marked this conversation as resolved.
Show resolved Hide resolved
logger.Debug(err)
return
}
w.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w).Encode(allocatedGsa)
if err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at this point returning http.Error will probably have no effect, since we already sent the headers, I would just log a warning.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also returns the http status and body with error message.

http.Error(w, "internal server error", http.StatusInternalServerError)
logger.Error(err)
return
}
}

func httpCode(err error) int {
code := http.StatusInternalServerError
switch t := err.(type) {
case k8serror.APIStatus:
code = int(t.Status().Code)
}
return code
}
Loading