Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up example samples #434

Merged
104 changes: 18 additions & 86 deletions ray-operator/config/samples/ray-cluster.autoscaler.large.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ kind: RayCluster
metadata:
labels:
controller-tools.k8s.io: "1.0"
# An unique identifier for the head node and workers of this cluster.
name: raycluster-autoscaler-large
# A unique identifier for the head node and workers of this cluster.
name: raycluster-autoscaler
spec:
# The version of Ray you are using. Make sure all Ray containers are running this version of Ray.
rayVersion: '1.13.0'
Expand Down Expand Up @@ -51,7 +51,7 @@ spec:
requests:
cpu: "500m"
memory: "512Mi"
######################headGroupSpecs#################################
######################headGroupSpec#################################
# head group template and specs, (perhaps 'group' is not needed in the name)
headGroupSpec:
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
Expand All @@ -61,16 +61,13 @@ spec:
# rayNodeType: head # Not needed since it is under the headgroup
# the following params are used to complete the ray start: ray start --head --block --port=6379 ...
rayStartParams:
# Flag "no-monitor" must be set when running the autoscaler in
# a sidecar container.
port: '6379'
# Flag "no-monitor" will be automatically set when autoscaling is enabled.
dashboard-host: '0.0.0.0'
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
num-cpus: '1' # can be auto-completed from the limits
# num-cpus: '14' # can be auto-completed from the limits
# Use `resources` to optionally specify custom resource annotations for the Ray node.
# The value of `resources` is a string-integer mapping.
# Currently, `resources` must be provided in the unfortunate format demonstrated below:
# Currently, `resources` must be provided in the specific format demonstrated below:
# resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
#pod template
template:
Expand All @@ -86,36 +83,11 @@ spec:
# resource accounting. K8s requests are not used by Ray.
resources:
limits:
cpu: "14"
memory: "54Gi"
cpu: 14
memory: 54Gi
requests:
cpu: "14"
memory: "54Gi"
env:
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: requests.memory
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
cpu: 14
memory: 54Gi
ports:
- containerPort: 6379
name: gcs
Expand All @@ -132,22 +104,19 @@ spec:
- replicas: 1
minReplicas: 1
maxReplicas: 10
# logical group name, for this called small-group, also can be functional
# logical group name, for this called large-group, also can be functional
groupName: large-group
# if worker pods need to be added, we can simply increment the replicas
# if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list
# the operator will remove pods from the list until the number of replicas is satisfied
# when a pod is confirmed to be deleted, its name will be removed from the list below
#scaleStrategy:
# workersToDelete:
# - raycluster-complete-worker-small-group-bdtwh
# - raycluster-complete-worker-small-group-hv457
# - raycluster-complete-worker-small-group-k8tj7
# - raycluster-complete-worker-large-group-bdtwh
# - raycluster-complete-worker-large-group-hv457
# - raycluster-complete-worker-large-group-k8tj7
# the following params are used to complete the ray start: ray start --block --node-ip-address= ...
rayStartParams:
#redis-password: '5241590000000000'
redis-password: 'LetMeInRay' # Deprecated since Ray 1.11 due to GCS bootstrapping enabled
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
Expand All @@ -172,48 +141,11 @@ spec:
# resource accounting. K8s requests are not used by Ray.
resources:
limits:
cpu: "14"
memory: "54Gi"
cpu: 14
memory: 54Gi
requests:
cpu: "14"
memory: "54Gi"
# environment variables to set in the container.Optional.
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
env:
- name: RAY_DISABLE_DOCKER_CPU_WARNING
value: "1"
- name: TYPE
value: "worker"
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.memory
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
ports:
- containerPort: 80
cpu: 14
memory: 54Gi
lifecycle:
preStop:
exec:
Expand Down
80 changes: 7 additions & 73 deletions ray-operator/config/samples/ray-cluster.autoscaler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ kind: RayCluster
metadata:
labels:
controller-tools.k8s.io: "1.0"
# An unique identifier for the head node and workers of this cluster.
# A unique identifier for the head node and workers of this cluster.
name: raycluster-autoscaler
spec:
# The version of Ray you are using. Make sure all Ray containers are running this version of Ray.
Expand Down Expand Up @@ -42,26 +42,23 @@ spec:
requests:
cpu: "500m"
memory: "512Mi"
######################headGroupSpecs#################################
######################headGroupSpec#################################
# head group template and specs, (perhaps 'group' is not needed in the name)
headGroupSpec:
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
serviceType: ClusterIP
# logical group name, for this called head-group, also can be functional
# pod type head or worker
# rayNodeType: head # Not needed since it is under the headgroup
# the following params are used to complete the ray start: ray start --head --block --port=6379 ...
# the following params are used to complete the ray start: ray start --head --block ...
rayStartParams:
# Flag "no-monitor" must be set when running the autoscaler in
# a sidecar container.
port: '6379'
# Flag "no-monitor" will be automatically set when autoscaling is enabled.
dashboard-host: '0.0.0.0'
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
num-cpus: '1' # can be auto-completed from the limits
# num-cpus: '1' # can be auto-completed from the limits
# Use `resources` to optionally specify custom resource annotations for the Ray node.
# The value of `resources` is a string-integer mapping.
# Currently, `resources` must be provided in the unfortunate format demonstrated below:
# Currently, `resources` must be provided in the specific format demonstrated below:
# resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
#pod template
template:
Expand All @@ -71,31 +68,6 @@ spec:
- name: ray-head
image: rayproject/ray:1.13.0
imagePullPolicy: Always
env:
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: requests.memory
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
ports:
- containerPort: 6379
name: gcs
Expand Down Expand Up @@ -130,11 +102,8 @@ spec:
# - raycluster-complete-worker-small-group-bdtwh
# - raycluster-complete-worker-small-group-hv457
# - raycluster-complete-worker-small-group-k8tj7
# the following params are used to complete the ray start: ray start --block --node-ip-address= ...
# the following params are used to complete the ray start: ray start --block ...
rayStartParams:
#redis-password: '5241590000000000'
redis-password: 'LetMeInRay' # Deprecated since Ray 1.11 due to GCS bootstrapping enabled
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
Expand All @@ -155,41 +124,6 @@ spec:
image: rayproject/ray:1.13.0
# environment variables to set in the container.Optional.
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
env:
- name: RAY_DISABLE_DOCKER_CPU_WARNING
value: "1"
- name: TYPE
value: "worker"
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.memory
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
ports:
- containerPort: 80
lifecycle:
preStop:
exec:
Expand Down
Loading