Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove workers from ray-cluster #1397

Merged
merged 4 commits into from
Aug 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ data:
- emptyDir:
name: log-volume
- configMap:
name: fluentbit-config
name: fluentbit-config
# Secret `ca-tls` has the information of CA's private key and certificate.
- name: ca-tls
Expand Down Expand Up @@ -272,259 +271,4 @@ data:
items:
- key: iptables.sh
path: iptables.sh
{{- end }}
workerGroupSpecs:
- groupName: g
maxReplicas: {{`{{ max_workers }}`}}
minReplicas: {{`{{ min_workers }}`}}
rayStartParams:
block: 'true'
replicas: {{`{{ workers }}`}}
template:
{{- if .Values.application.ray.scrapeWithPrometheus }}
metadata:
annotations:
prometheus.io/scrape: "true"
{{- end }}
spec:
initContainers:
# Generate worker's private key and certificate before `ray start`.
{{- if .Values.useCertManager }}
- name: ray-worker-cert
image: {{ .Values.application.ray.kubectlImage }}
command: ["/bin/sh", "-c", "/etc/gen/tls/gencert_cert_worker.sh /tmp/tls {{`{{ cluster_name }}`}} $POD_IP"]
volumeMounts:
- mountPath: /tmp/tls
name: cert-tls
- mountPath: /etc/gen/tls
name: gen-tls-script
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
{{- else }}
- name: ray-worker-tls
image: {{ .Values.application.ray.opensslImage }}
command: ["/bin/sh", "-c", "cp -R /etc/ca/tls /etc/ray && /etc/gen/tls/gencert_worker.sh"]
volumeMounts:
- mountPath: /etc/ca/tls
name: ca-tls
readOnly: true
- mountPath: /etc/ray/tls
name: ray-tls
- mountPath: /etc/gen/tls
name: gen-tls-script
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
{{- end }}
{{- if .Values.application.proxy.enabled }}
- name: iptables
image: {{ .Values.application.proxy.initImage }}
command: ["sh", "-c", "/var/iptables/iptables.sh"]
volumeMounts:
- mountPath: /var/iptables
name: iptables
securityContext:
privileged: true
capabilities:
add: ["NET_ADMIN"]

{{- end }}
affinity:
containers:
- resources:
limits:
cpu: 1
memory: 2G
requests:
cpu: 1
memory: 2G
securityContext: {}
volumeMounts:
- mountPath: /tmp/ray
name: log-volume
- mountPath: /etc/ca/tls
name: ca-tls
readOnly: true
- mountPath: /etc/ray/tls
name: ray-tls
- mountPath: /data
name: user-storage
subPath: {{`{{ user_id }}`}}
- mountPath: /function_data
name: user-storage
subPath: {{`{{ function_data }}`}}
{{- if .Values.useCertManager }}
- mountPath: /tmp/tls
name: cert-tls
{{- end }}
env:
# Environment variables for Ray TLS authentication.
# See https://docs.ray.io/en/latest/ray-core/configure.html#tls-authentication for more details.
{{- if .Values.application.ray.useTLS }}
- name: RAY_USE_TLS
value: "1"
{{- end }}
{{- if .Values.useCertManager }}
- name: RAY_TLS_SERVER_CERT
value: "/tmp/tls/tls.crt"
- name: RAY_TLS_SERVER_KEY
value: "/tmp/tls/tls.key"
- name: RAY_TLS_CA_CERT
value: "/tmp/tls/ca.crt"
{{- else }}
- name: RAY_TLS_SERVER_CERT
value: "/etc/ray/tls/tls.crt"
- name: RAY_TLS_SERVER_KEY
value: "/etc/ray/tls/tls.key"
- name: RAY_TLS_CA_CERT
value: "/etc/ca/tls/ca.crt"
{{- end }}
{{- if .Values.application.ray.openTelemetry }}
- name: OT_RAY_TRACER
value: "1"
{{- end }}
{{- if .Values.application.ray.openTelemetryCollector.local }}
- name: OT_JAEGER_HOST_KEY
value: {{ .Values.application.ray.openTelemetryCollector.host }}
- name: OT_JAEGER_PORT_KEY
value: {{ .Values.application.ray.openTelemetryCollector.port | quote }}
- name: OT_INSECURE
value: {{ .Values.application.ray.openTelemetryCollector.insecure | quote }}
- name: OT_ENABLED
value: {{ .Values.application.ray.openTelemetryCollector.enabled | quote }}
{{- end }}
image: {{`{{ node_image }}`| quote}}
imagePullPolicy: IfNotPresent
name: ray-worker
resources:
limits:
cpu: {{ .Values.application.ray.cpu }}
memory: {{ .Values.application.ray.memory }}Gi
requests:
cpu: {{ .Values.application.ray.cpu }}
memory: {{ .Values.application.ray.memory }}Gi
securityContext:
{{- if .Values.application.proxy.enabled }}
- resources:
limits:
cpu: 1
memory: 2G
requests:
cpu: 1
memory: 2G
volumeMounts:
- mountPath: /tmp/ray
name: log-volume
- mountPath: /etc/ca/tls
name: ca-tls
readOnly: true
- mountPath: /etc/ray/tls
name: ray-tls
- mountPath: /data
name: user-storage
subPath: {{`{{ user_id }}`}}
{{- if .Values.useCertManager }}
- mountPath: /tmp/tls
name: cert-tls
{{- end }}
env:
# Environment variables for Ray TLS authentication.
# See https://docs.ray.io/en/latest/ray-core/configure.html#tls-authentication for more details.
- name: DST_PROTOCOL
value: "https"
- name: GATEWAY_URL
value: "gateway:8000"
{{- if .Values.application.ray.useTLS }}
- name: RAY_USE_TLS
value: "1"
{{- end }}
{{- if .Values.useCertManager }}
- name: RAY_TLS_SERVER_CERT
value: "/tmp/tls/tls.crt"
- name: RAY_TLS_SERVER_KEY
value: "/tmp/tls/tls.key"
- name: RAY_TLS_CA_CERT
value: "/tmp/tls/ca.crt"
{{- else }}
- name: RAY_TLS_SERVER_CERT
value: "/etc/ray/tls/tls.crt"
- name: RAY_TLS_SERVER_KEY
value: "/etc/ray/tls/tls.key"
- name: RAY_TLS_CA_CERT
value: "/etc/ca/tls/ca.crt"
{{- end }}
{{- if .Values.application.ray.openTelemetry }}
- name: OT_RAY_TRACER
value: "1"
{{- end }}
{{- if .Values.application.ray.openTelemetryCollector.local }}
- name: OT_JAEGER_HOST_KEY
value: {{ .Values.application.ray.openTelemetryCollector.host }}
- name: OT_JAEGER_PORT_KEY
value: {{ .Values.application.ray.openTelemetryCollector.port | quote }}
- name: OT_INSECURE
value: {{ .Values.application.ray.openTelemetryCollector.insecure | quote }}
- name: OT_ENABLED
value: {{ .Values.application.ray.openTelemetryCollector.enabled | quote }}
{{- end }}
image: {{ .Values.application.ray.proxyImage }}
imagePullPolicy: IfNotPresent
name: proxy
resources:
limits:
cpu: {{ .Values.application.proxy.cpu }}
memory: {{ .Values.application.proxy.memory }}Gi
securityContext:
runAsGroup: 123
{{- end }}
imagePullSecrets: []
{{- if .Values.useCertManager }}
serviceAccountName: ray-cluster-sa
serviceAccount: ray-cluster-sa
{{- end }}
nodeSelector:
tolerations: []
securityContext:
fsGroup: 123
volumes:
- emptyDir:
name: log-volume
- name: ca-tls
secret:
secretName: ca-tls
- name: ray-tls
emptyDir: {}
{{- if .Values.useCertManager }}
- name: cert-tls
emptyDir: {}
{{- end }}
# `gencert_worker.sh` is a script to generate worker Pod's private key and worker's certificate.
- name: gen-tls-script
configMap:
name: tls
defaultMode: 0777
# An array of keys from the ConfigMap to create as files
items:
- key: gencert_worker.sh
path: gencert_worker.sh
{{- if .Values.useCertManager }}
- key: gencert_cert_worker.sh
path: gencert_cert_worker.sh
{{- end }}
- name: user-storage
persistentVolumeClaim:
claimName: {{ .Values.cos.claimName }}
{{- if .Values.application.proxy.enabled }}
- name: iptables
configMap:
name: proxy-iptables
defaultMode: 0777
items:
- key: iptables.sh
path: iptables.sh
{{- end }}
Loading