ray-operator/config/samples/ray-cluster.complete.yaml

# The resource requests and limits in this config are too small for production!
# For examples with more realistic resource configuration, see
# ray-cluster.complete.large.yaml and
# ray-cluster.autoscaler.large.yaml.
apiVersion: ray.io/v1alpha1
kind: RayCluster
metadata:
  labels:
    controller-tools.k8s.io: "1.0"
    # A unique identifier for the head node and workers of this cluster.
  name: raycluster-complete
spec:
  rayVersion: '2.3.0'
  # Ray head pod configuration
  headGroupSpec:
    # Kubernetes Service Type. This is an optional field, and the default value is ClusterIP.
    # Refer to https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types.
    serviceType: ClusterIP
    # the following params are used to complete the ray start: ray start --head --block --dashboard-host: '0.0.0.0' ...
    rayStartParams:
      dashboard-host: '0.0.0.0'
      block: 'true'
    # pod template
    template:
      metadata:
        # Custom labels. NOTE: To avoid conflicts with KubeRay operator, do not define custom labels start with `raycluster`.
        # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
        labels: {}
      spec:
        containers:
        - name: ray-head
          image: rayproject/ray:2.3.0
          ports:
          - containerPort: 6379
            name: gcs
          - containerPort: 8265
            name: dashboard
          - containerPort: 10001
            name: client
          lifecycle:
            preStop:
              exec:
                command: ["/bin/sh","-c","ray stop"]
          volumeMounts:
            - mountPath: /tmp/ray
              name: ray-logs
          # The resource requests and limits in this config are too small for production!
          # For an example with more realistic resource configuration, see
          # ray-cluster.autoscaler.large.yaml.
          # It is better to use a few large Ray pod than many small ones.
          # For production, it is ideal to size each Ray pod to take up the
          # entire Kubernetes node on which it is scheduled.
          resources:
            limits:
              cpu: "1"
              memory: "2G"
            requests:
              # For production use-cases, we recommend specifying integer CPU reqests and limits.
              # We also recommend setting requests equal to limits for both CPU and memory.
              # For this example, we use a 500m CPU request to accomodate resource-constrained local
              # Kubernetes testing environments such as KinD and minikube.
              cpu: "500m"
              memory: "2G"
        volumes:
          - name: ray-logs
            emptyDir: {}
  workerGroupSpecs:
  # the pod replicas in this group typed worker
  - replicas: 1
    minReplicas: 1
    maxReplicas: 10
    # logical group name, for this called small-group, also can be functional
    groupName: small-group
    # If worker pods need to be added, we can increment the replicas.
    # If worker pods need to be removed, we decrement the replicas, and populate the workersToDelete list.
    # The operator will remove pods from the list until the desired number of replicas is satisfied.
    # If the difference between the current replica count and the desired replicas is greater than the
    # number of entries in workersToDelete, random worker pods will be deleted.
    #scaleStrategy:
    #  workersToDelete:
    #  - raycluster-complete-worker-small-group-bdtwh
    #  - raycluster-complete-worker-small-group-hv457
    #  - raycluster-complete-worker-small-group-k8tj7
    # the following params are used to complete the ray start: ray start --block
    rayStartParams:
      block: 'true'
    #pod template
    template:
      spec:
        containers:
        - name: ray-worker
          image: rayproject/ray:2.3.0
          lifecycle:
            preStop:
              exec:
                command: ["/bin/sh","-c","ray stop"]
          # use volumeMounts.Optional.
          # Refer to https://kubernetes.io/docs/concepts/storage/volumes/
          volumeMounts:
            - mountPath: /tmp/ray
              name: ray-logs
          # The resource requests and limits in this config are too small for production!
          # For an example with more realistic resource configuration, see
          # ray-cluster.autoscaler.large.yaml.
          # It is better to use a few large Ray pod than many small ones.
          # For production, it is ideal to size each Ray pod to take up the
          # entire Kubernetes node on which it is scheduled.
          resources:
            limits:
              cpu: "1"
              memory: "1G"
            # For production use-cases, we recommend specifying integer CPU reqests and limits.
            # We also recommend setting requests equal to limits for both CPU and memory.
            # For this example, we use a 500m CPU request to accomodate resource-constrained local
            # Kubernetes testing environments such as KinD and minikube.
            requests:
              # For production use-cases, we recommend specifying integer CPU reqests and limits.
              # We also recommend setting requests equal to limits for both CPU and memory.
              # For this example, we use a 500m CPU request to accomodate resource-constrained local
              # Kubernetes testing environments such as KinD and minikube.
              cpu: "500m"
              # For production use-cases, we recommend allocating at least 8Gb memory for each Ray container.
              memory: "1G"
        initContainers:
        # the env var $FQ_RAY_IP is set by the operator if missing, with the value of the head service name
        - name: init
          image: busybox:1.28
          # Change the cluster postfix if you don't have a default setting
          command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for K8s Service $RAY_IP; sleep 2; done"]
        # use volumes
        # Refer to https://kubernetes.io/docs/concepts/storage/volumes/
        volumes:
          - name: ray-logs
            emptyDir: {}
######################status#################################