Skip to content

Commit

Permalink
Wiring up proxies (#78)
Browse files Browse the repository at this point in the history
* initial wiring for http reverse proxy

* server config updates

* lint

* remove enable proxy

* docker compose updates

* Update envoy processing so each route has a unqiue name and is not set to model name.

* Update set state method to take an expected state to transition from

* add missing Docker compose files

* lint

* Add model lock/unlock

* lint

* Add test for unexpected state change

* lint

* review comments
  • Loading branch information
ukclivecox authored Feb 7, 2022
1 parent 53d10cd commit cfee09c
Show file tree
Hide file tree
Showing 29 changed files with 558 additions and 229 deletions.
21 changes: 16 additions & 5 deletions operator/config/serverconfigs/mlserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ spec:
value: "9000"
- name: SELDON_SERVER_GRPC_PORT
value: "9500"
- name: SELDON_REVERSE_PROXY_HTTP_PORT
value: "9001"
- name: SELDON_REVERSE_PROXY_GRPC_PORT
value: "9501"
- name: SELDON_SCHEDULER_HOST
value: "seldon-scheduler"
- name: SELDON_SCHEDULER_PORT
Expand All @@ -53,6 +57,13 @@ spec:
resourceFieldRef:
containerName: mlserver
resource: requests.memory
ports:
- containerPort: 9501
name: grpc
protocol: TCP
- containerPort: 9001
name: http
protocol: TCP
volumeMounts:
- mountPath: /mnt/agent
name: mlserver-models
Expand Down Expand Up @@ -83,26 +94,26 @@ spec:
livenessProbe:
httpGet:
path: /v2/health/live
port: http
port: server-http
readinessProbe:
httpGet:
path: /v2/health/ready
port: http
port: server-http
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
httpGet:
path: /v2/health/ready
port: http
port: server-http
failureThreshold: 10
periodSeconds: 10
name: mlserver
ports:
- containerPort: 9500
name: grpc
name: server-grpc
protocol: TCP
- containerPort: 9000
name: http
name: server-http
protocol: TCP
readinessProbe:
failureThreshold: 3
Expand Down
21 changes: 16 additions & 5 deletions operator/config/serverconfigs/triton.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ spec:
value: "9000"
- name: SELDON_SERVER_GRPC_PORT
value: "9500"
- name: SELDON_REVERSE_PROXY_HTTP_PORT
value: "9001"
- name: SELDON_REVERSE_PROXY_GRPC_PORT
value: "9501"
- name: SELDON_SCHEDULER_HOST
value: "seldon-scheduler"
- name: SELDON_SCHEDULER_PORT
Expand All @@ -53,6 +57,13 @@ spec:
resourceFieldRef:
containerName: triton
resource: requests.memory
ports:
- containerPort: 9501
name: grpc
protocol: TCP
- containerPort: 9001
name: http
protocol: TCP
volumeMounts:
- mountPath: /mnt/agent
name: triton-models
Expand Down Expand Up @@ -82,26 +93,26 @@ spec:
livenessProbe:
httpGet:
path: /v2/health/live
port: http
port: server-http
readinessProbe:
httpGet:
path: /v2/health/ready
port: http
port: server-http
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
httpGet:
path: /v2/health/ready
port: http
port: server-http
failureThreshold: 10
periodSeconds: 10
name: triton
ports:
- containerPort: 9500
name: grpc
name: server-grpc
protocol: TCP
- containerPort: 9000
name: http
name: server-http
protocol: TCP
- containerPort: 8002
name: metrics
Expand Down
3 changes: 3 additions & 0 deletions scheduler/Dockerfile.envoy-compose
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM envoyproxy/envoy:v1.19.1
COPY config/envoy-compose.yaml /etc/envoy.yaml
CMD /usr/local/bin/envoy -c /etc/envoy.yaml
59 changes: 57 additions & 2 deletions scheduler/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ AGENT_IMG ?= seldonio/seldon-agent:latest
RCLONE_IMG ?= seldonio/seldon-rclone:latest
ENVOY_IMG ?= seldonio/seldon-envoy:latest
ENVOY_LOCAL_IMG ?= seldonio/seldon-envoy-local:latest
ENVOY_COMPOSE_IMG ?= seldonio/seldon-envoy-compose:latest
MLSERVER_IMG ?= seldonio/mlserver:1.0.0.rc1
TRITON_IMG ?= nvcr.io/nvidia/tritonserver:21.12-py3
KIND_NAME=ansible
Expand Down Expand Up @@ -64,6 +65,9 @@ docker-build-envoy:
docker-build-envoy-local:
docker build -t ${ENVOY_LOCAL_IMG} -f Dockerfile.envoy-local .

docker-build-envoy-compose:
docker build -t ${ENVOY_COMPOSE_IMG} -f Dockerfile.envoy-compose .

docker-push-envoy: ## Push docker image with the manager.
docker push ${ENVOY_IMG}

Expand Down Expand Up @@ -95,13 +99,13 @@ kind-image-install-all: kind-image-install-scheduler kind-image-install-envoy ki
DOCKER_COMPOSE_COMMON_IMAGES = \
SCHEDULER_IMAGE_AND_TAG=${SCHEDULER_IMG} \
AGENT_IMAGE_AND_TAG=${AGENT_IMG} \
ENVOY_IMAGE_AND_TAG=${ENVOY_LOCAL_IMG} \
ENVOY_IMAGE_AND_TAG=${ENVOY_COMPOSE_IMG} \
RCLONE_IMAGE_AND_TAG=${RCLONE_IMG}

DOCKER_COMPOSE_MLSERVER_IMAGES = \
${DOCKER_COMPOSE_COMMON_IMAGES} \
SERVER_IMAGE_AND_TAG=${MLSERVER_IMG}

DOCKER_COMPOSE_TRITON_IMAGES = \
${DOCKER_COMPOSE_COMMON_IMAGES} \
SERVER_IMAGE_AND_TAG=${TRITON_IMG}
Expand Down Expand Up @@ -159,6 +163,57 @@ stop-triton:
${DOCKER_COMPOSE_MLSERVER_IMAGES} \
docker-compose -f all-base.yaml -f all-triton.yaml --env-file env.all -p scv2_triton rm --stop --force ${DOCKER_COMPOSE_REMOVE_VOLUMES}

#####################################
# Start with Docker
#####################################

start-scheduler:
docker run --name scheduler -d --rm --network host -p 9004:9004 -p 9002:9002 -p 9005:9005 ${SCHEDULER_IMG}


start-agent-mlserver:
docker run --name agent -d --rm --network host -v ${PWD}/config:/mnt/config -v ${PWD}/mnt:/mnt/agent -e SELDON_SERVER_CAPABILITIES='sklearn,xgboost' -e SELDON_OVERCOMMIT='false' -e SELDON_SERVER_HTTP_PORT='8080' -e SELDON_SERVER_GRPC_PORT='8081' -e SELDON_SCHEDULER_HOST='0.0.0.0' -e SELDON_SCHEDULER_PORT='9005' -e SELDON_SERVER_TYPE='mlserver' -e MEMORY_REQUEST='1000000' ${AGENT_IMG} /bin/agent --log-level debug --config-path /mnt/config

start-agent-triton:
docker run --name agent -d --rm --network host -v ${PWD}/config:/mnt/config -v ${PWD}/mnt:/mnt/agent -e SELDON_SERVER_CAPABILITIES='tensorflow,onnx,pytorch' -e SELDON_OVERCOMMIT='false' -e SELDON_SERVER_HTTP_PORT='8080' -e SELDON_SERVER_GRPC_PORT='8081' -e SELDON_SCHEDULER_HOST='0.0.0.0' -e SELDON_SCHEDULER_PORT='9005' -e SELDON_SERVER_TYPE='triton' -e MEMORY_REQUEST='1000000' ${AGENT_IMG} /bin/agent --log-level debug --config-path /mnt/config

start-envoy:
docker run --name envoy -d --rm --network host -p 9003:9003 -p 9000:9000 ${ENVOY_LOCAL_IMG}

start-rclone:
docker run --name rclone -d --rm -v ${PWD}/mnt:/mnt/agent -p 5572:5572 seldonio/seldon-rclone:latest

start-docker-mlserver:
mkdir -p mnt/models
rm -rf mnt/models/*
docker run --name mlserver -d --rm -v ${PWD}/mnt/models:/mnt/models -p 8080:8080 -p 8081:8081 seldonio/mlserver:1.0.0.rc1 mlserver start /mnt/models

start-triton-debug:
mkdir -p mnt/models
rm -rf mnt/models/*
docker run --name triton -d --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p8080:8080 -p8081:8081 -v ${PWD}/mnt/models:/mnt/models nvcr.io/nvidia/tritonserver:21.12-py3 /opt/tritonserver/bin/tritonserver --model-repository=/mnt/models --http-port=8080 --grpc-port=8081 --log-verbose=1 --model-control-mode=explicit

start-docker-triton:
mkdir -p mnt/models
rm -rf mnt/models/*
docker run --name triton -d --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p8080:8080 -p8081:8081 -v ${PWD}/mnt/models:/mnt/models nvcr.io/nvidia/tritonserver:21.12-py3 /opt/tritonserver/bin/tritonserver --model-repository=/mnt/models --http-port=8080 --grpc-port=8081 --model-control-mode=explicit

start-all-docker-mlserver: start-scheduler start-agent-mlserver start-mlserver start-envoy start-rclone
stop-all-docker-mlserver:
docker rm -f scheduler
docker rm -f agent
docker rm -f mlserver
docker rm -f envoy
docker rm -f rclone

start-all-docker-triton: start-scheduler start-agent-triton start-triton start-envoy start-rclone
stop-all-docker-triton:
docker rm -f scheduler
docker rm -f agent
docker rm -f triton
docker rm -f envoy
docker rm -f rclone


#####################################
# Start local binaries
Expand Down
23 changes: 16 additions & 7 deletions scheduler/all-base.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
version: "3.9"

volumes:
downloads:
models:

services:
Expand All @@ -14,26 +13,36 @@ services:
ports:
- "${AGENT_HTTP_PORT}:${AGENT_HTTP_PORT}"
- "${AGENT_GRPC_PORT}:${AGENT_GRPC_PORT}"
- "${SELDON_REVERSE_PROXY_HTTP_PORT}:${SELDON_REVERSE_PROXY_HTTP_PORT}"
- "${SELDON_REVERSE_PROXY_GRPC_PORT}:${SELDON_REVERSE_PROXY_GRPC_PORT}"
command:
- "/bin/agent"
- "--log-level"
- "debug"
- "--config-path"
- "/mnt/config"
- "--rclone-host"
- "rclone"
- "--inference-host"
- "server"
- "--agent-host"
- "agent"
environment:
- SELDON_OVERCOMMIT=${AGENT_ENABLE_OVERCOMMIT}
- SELDON_SERVER_HTTP_PORT=${AGENT_HTTP_PORT}
- SELDON_SERVER_GRPC_PORT=${AGENT_GRPC_PORT}
- SELDON_REVERSE_PROXY_HTTP_PORT=${SELDON_REVERSE_PROXY_HTTP_PORT}
- SELDON_REVERSE_PROXY_GRPC_PORT=${SELDON_REVERSE_PROXY_GRPC_PORT}
- SELDON_SERVER_HTTP_PORT=${SERVER_HTTP_PORT}
- SELDON_SERVER_GRPC_PORT=${SERVER_GRPC_PORT}
- SELDON_DEBUG_GRPC_PORT=${AGENT_DEBUG_PORT}
- SELDON_SCHEDULER_HOST=0.0.0.0
- SELDON_SCHEDULER_HOST=scheduler
- SELDON_SCHEDULER_PORT=${SCHEDULER_AGENT_PORT}
- MEMORY_REQUEST=${AGENT_MEMORY_REQUEST}
volumes:
- type: bind
source: ./config
target: /mnt/config
- type: volume
source: downloads
source: models
target: /mnt/agent

envoy:
Expand All @@ -54,7 +63,7 @@ services:
- "${RCLONE_HTTP_PORT}:${RCLONE_HTTP_PORT}"
volumes:
- type: volume
source: downloads
source: models
target: /mnt/agent

scheduler:
Expand All @@ -75,4 +84,4 @@ services:
volumes:
- type: volume
source: models
target: /mnt/models
target: /mnt/agent
2 changes: 1 addition & 1 deletion scheduler/all-mlserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ services:
command:
- "mlserver"
- "start"
- "/mnt/models"
- "/mnt/agent/models"
environment:
- MLSERVER_LOAD_MODELS_AT_STARTUP=${MLSERVER_LOAD_MODELS_AT_STARTUP}
2 changes: 1 addition & 1 deletion scheduler/all-triton.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
server:
command:
- "/opt/tritonserver/bin/tritonserver"
- "--model-repository=/mnt/models"
- "--model-repository=/mnt/agent/models"
- "--http-port=${SERVER_HTTP_PORT}"
- "--grpc-port=${SERVER_GRPC_PORT}"
- "--model-control-mode=explicit"
Expand Down
35 changes: 26 additions & 9 deletions scheduler/cmd/agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
)

var (
agentHost string
serverName string
replicaIdx uint
schedulerHost string
Expand Down Expand Up @@ -58,6 +59,9 @@ var (
)

const (
DefaultInferenceSvcHttpPort = 9000
DefaultInferenceSvcGrpcPort = 9500

EnvServerHttpPort = "SELDON_SERVER_HTTP_PORT"
EnvServerGrpcPort = "SELDON_SERVER_GRPC_PORT"
EnvReverseProxyHttpPort = "SELDON_REVERSE_PROXY_HTTP_PORT"
Expand Down Expand Up @@ -93,6 +97,7 @@ const (
func init() {
rand.Seed(time.Now().UnixNano())

flag.StringVar(&agentHost, "agent-host", "0.0.0.0", "Agent hostname")
flag.StringVar(&serverName, FlagServerName, "mlserver", "Server name")
flag.UintVar(&replicaIdx, "server-idx", 0, "Server index")
flag.StringVar(&schedulerHost, FlagSchedulerHost, "0.0.0.0", "Scheduler host")
Expand All @@ -112,7 +117,7 @@ func init() {
flag.StringVar(&serverType, FlagServerType, serverTypes[0], "Server type. Default mlserver")
flag.IntVar(&memoryBytes, FlagMemoryBytes, 1000000, "Memory available for server")
flag.StringVar(&capabilitiesList, FlagCapabilities, "sklearn,xgboost", "Server capabilities")
flag.BoolVar(&overCommit, FlagOverCommit, false, "Overcommit memory")
flag.BoolVar(&overCommit, FlagOverCommit, true, "Overcommit memory")
flag.StringVar(&logLevel, FlagLogLevel, "debug", "Log level - examples: debug, info, error")
}

Expand Down Expand Up @@ -304,7 +309,7 @@ func setInferenceSvcName() {
if podName != "" {
inferenceSvcName = podName
} else {
inferenceSvcName = inferenceHost
inferenceSvcName = agentHost
}
log.Infof("Setting inference svc name to %s", inferenceSvcName)
}
Expand Down Expand Up @@ -341,25 +346,37 @@ func getRepositoryHandler(logger log.FieldLogger) repository.ModelRepositoryHand
}

func createReplicaConfig() *agent2.ReplicaConfig {
var rc *agent2.ReplicaConfig
if isFlagPassed(FlagReplicaConfig) {
rc, err := agent.ParseReplicaConfig(replicaConfigStr)
var err error
rc, err = agent.ParseReplicaConfig(replicaConfigStr)
if err != nil {
log.WithError(err).Fatalf("Failed to parse replica config %s", replicaConfigStr)
}
log.Infof("Created replicaConfig from command line %+v", rc)
return rc
log.Infof("Created replicaConfig from command line")
} else {
rc := &agent2.ReplicaConfig{
rc = &agent2.ReplicaConfig{
InferenceSvc: inferenceSvcName,
InferenceHttpPort: int32(inferenceHttpPort),
InferenceGrpcPort: int32(inferenceGrpcPort),
MemoryBytes: memoryBytes64,
Capabilities: capabilities,
OverCommit: overCommit,
}
log.Infof("Created replicaConfig from environment %+v", rc)
return rc
log.Infof("Created replicaConfig from environment")
}
//Setup ports correctly
if runningInsideK8s() {
// Inside k8s these will be fixed ports on a headless SVC pointing to the http and grpc named ports in this pod
rc.InferenceHttpPort = int32(DefaultInferenceSvcHttpPort)
rc.InferenceGrpcPort = int32(DefaultInferenceSvcGrpcPort)
} else {
// If not in k8s the we take whatever if set for reverse proxy ports
rc.InferenceHttpPort = int32(reverseProxyHttpPort)
rc.InferenceGrpcPort = int32(reverseProxyGrpcPort)
}
log.Infof("replicaConfig %+v", rc)
return rc
}

func main() {
Expand Down Expand Up @@ -419,7 +436,7 @@ func main() {

rpHTTP := agent.NewReverseHTTPProxy(logger, uint(reverseProxyHttpPort))

rpGRPC := agent.NewReverseGRPCProxy(logger, uint(inferenceGrpcPort), uint(reverseProxyGrpcPort))
rpGRPC := agent.NewReverseGRPCProxy(logger, inferenceHost, uint(inferenceGrpcPort), uint(reverseProxyGrpcPort))

clientDebugService := agent.NewClientDebug(logger, uint(debugGrpcPort))

Expand Down
Loading

0 comments on commit cfee09c

Please sign in to comment.