Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP #1132

Closed
Closed

WIP #1132

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion ray-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,13 @@ help: ## Display this help.
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=kuberay-operator webhook paths="./..." output:crd:artifacts:config=config/crd/bases

generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
generate: controller-gen conversion-gen
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
$(CONVERSION_GEN) \
--input-dirs "./apis/ray/v1beta1" \
--go-header-file "./hack/boilerplate.go.txt" \
--output-file-base "zz_generated.conversion" --v 2

helm: manifests kustomize ## Sync the CRDs into the Helm chart
rm -r ../helm-chart/kuberay-operator/crds/
Expand Down Expand Up @@ -117,6 +122,10 @@ GOFUMPT = $(shell pwd)/bin/gofumpt
gofumpt: ## Download gofumpt locally if necessary.
$(call go-get-tool,$(GOFUMPT),mvdan.cc/gofumpt@latest)

CONVERSION_GEN = $(shell pwd)/bin/conversion-gen
conversion-gen: ## Download conversion-gen locally if necessary.
$(call go-get-tool,$(CONVERSION_GEN),k8s.io/code-generator/cmd/[email protected])

# go-get-tool will 'go get' any package $2 and install it to $1.
PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
define go-get-tool
Expand Down
23 changes: 23 additions & 0 deletions ray-operator/PROJECT
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Code generated by tool. DO NOT EDIT.
# This file is used to track the info used to scaffold your project
# and allow the plugins properly work.
# More info: https://book.kubebuilder.io/reference/project-config.html
domain: io
layout:
- go.kubebuilder.io/v3
Expand All @@ -23,6 +27,9 @@ resources:
kind: RayService
path: github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1
version: v1alpha1
webhooks:
conversion: true
webhookVersion: v1
- api:
crdVersion: v1
namespaced: true
Expand All @@ -32,4 +39,20 @@ resources:
kind: RayJob
path: github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
domain: io
group: ray
kind: RayService
path: github.com/ray-project/kuberay/ray-operator/apis/ray/v1beta1
version: v1beta1
- api:
crdVersion: v1
namespaced: true
domain: io
group: ray
kind: RayCluster
path: github.com/ray-project/kuberay/ray-operator/apis/ray/v1beta1
version: v1beta1
version: "3"
1 change: 1 addition & 0 deletions ray-operator/apis/ray/v1alpha1/doc.go
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
// +groupName=ray.io
// +kubebuilder:object:generate=true
package v1alpha1
6 changes: 5 additions & 1 deletion ray-operator/apis/ray/v1alpha1/raycluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ import (

// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
// var app appsv1.Deployment{}

// Hub marks this type as a conversion hub.
func (*RayCluster) Hub() {}

// RayClusterSpec defines the desired state of RayCluster
type RayClusterSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
Expand Down Expand Up @@ -155,6 +158,7 @@ const (
// RayCluster is the Schema for the RayClusters API
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:storageversion
// +kubebuilder:printcolumn:name="desired workers",type=integer,JSONPath=".status.desiredWorkerReplicas",priority=0
// +kubebuilder:printcolumn:name="available workers",type=integer,JSONPath=".status.availableWorkerReplicas",priority=0
// +kubebuilder:printcolumn:name="status",type="string",JSONPath=".status.state",priority=0
Expand Down
10 changes: 7 additions & 3 deletions ray-operator/apis/ray/v1alpha1/rayservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// Hub marks this type as a conversion hub.
func (*RayService) Hub() {}

type ServiceStatus string

const (
Expand Down Expand Up @@ -142,9 +145,10 @@ type ServeDeploymentStatus struct {
HealthLastUpdateTime *metav1.Time `json:"healthLastUpdateTime,omitempty"`
}

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+genclient
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:storageversion
// +genclient
// RayService is the Schema for the rayservices API
type RayService struct {
metav1.TypeMeta `json:",inline"`
Expand Down
17 changes: 17 additions & 0 deletions ray-operator/apis/ray/v1alpha1/rayservice_webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package v1alpha1

import (
ctrl "sigs.k8s.io/controller-runtime"
logf "sigs.k8s.io/controller-runtime/pkg/log"
)

// log is for logging in this package.
var rayservicelog = logf.Log.WithName("rayservice-resource")

func (r *RayService) SetupWebhookWithManager(mgr ctrl.Manager) error {
return ctrl.NewWebhookManagedBy(mgr).
For(r).
Complete()
}

// TODO(user): EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
3 changes: 3 additions & 0 deletions ray-operator/apis/ray/v1beta1/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// +groupName=ray.io
// +k8s:conversion-gen=github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1
package v1beta1
23 changes: 23 additions & 0 deletions ray-operator/apis/ray/v1beta1/groupversion_info.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Package v1beta1 contains API Schema definitions for the ray v1beta1 API group
// +kubebuilder:object:generate=true
// +groupName=ray.io
package v1beta1

import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/scheme"
)

var (
// GroupVersion is group version used to register these objects
GroupVersion = schema.GroupVersion{Group: "ray.io", Version: "v1beta1"}

// SchemeBuilder is used to add go types to the GroupVersionKind scheme
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme

localSchemeBuilder = runtime.NewSchemeBuilder(SchemeBuilder.AddToScheme)
)
24 changes: 24 additions & 0 deletions ray-operator/apis/ray/v1beta1/raycluster_conversion.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package v1beta1

import (
rayv1alpha1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
"sigs.k8s.io/controller-runtime/pkg/conversion"
)

// ConvertTo converts this version (v1beta1) to the Hub version (v1alpha1).
func (src *RayCluster) ConvertTo(dstRaw conversion.Hub) error {
dst := dstRaw.(*rayv1alpha1.RayCluster)
if err := Convert_v1beta1_RayCluster_To_v1alpha1_RayCluster(src, dst, nil); err != nil {
return err
}
return nil
}

// ConvertFrom converts from the Hub version (v1alpha1) to this version (v1beta1).
func (dst *RayCluster) ConvertFrom(srcRaw conversion.Hub) error {
src := srcRaw.(*rayv1alpha1.RayCluster)
if err := Convert_v1alpha1_RayCluster_To_v1beta1_RayCluster(src, dst, nil); err != nil {
return err
}
return nil
}
193 changes: 193 additions & 0 deletions ray-operator/apis/ray/v1beta1/raycluster_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
package v1beta1

import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// RayClusterSpec defines the desired state of RayCluster
type RayClusterSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make" to regenerate code after modifying this file
// HeadGroupSpecs are the spec for the head pod
HeadGroupSpec HeadGroupSpec `json:"headGroupSpec"`
// WorkerGroupSpecs are the specs for the worker pods
WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"`
// RayVersion is the version of ray being used. This determines the autoscaler's image version.
RayVersion string `json:"rayVersion,omitempty"`
// EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs
EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"`
// AutoscalerOptions specifies optional configuration for the Ray autoscaler.
AutoscalerOptions *AutoscalerOptions `json:"autoscalerOptions,omitempty"`
HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"`
}

// HeadGroupSpec are the spec for the head pod
type HeadGroupSpec struct {
// ServiceType is Kubernetes service type of the head service. it will be used by the workers to connect to the head pod
ServiceType v1.ServiceType `json:"serviceType,omitempty"`
// HeadService is the Kubernetes service of the head pod.
HeadService *v1.Service `json:"headService,omitempty"`
// EnableIngress indicates whether operator should create ingress object for head service or not.
EnableIngress *bool `json:"enableIngress,omitempty"`
// HeadGroupSpec.Replicas is deprecated and ignored; there can only be one head pod per Ray cluster.
Replicas *int32 `json:"replicas,omitempty"`
// RayStartParams are the params of the start command: node-manager-port, object-store-memory, ...
RayStartParams map[string]string `json:"rayStartParams"`
// Template is the eaxct pod template used in K8s depoyments, statefulsets, etc.
Template v1.PodTemplateSpec `json:"template"`
}

// WorkerGroupSpec are the specs for the worker pods
type WorkerGroupSpec struct {
// we can have multiple worker groups, we distinguish them by name
GroupName string `json:"groupName"`
// Replicas Number of desired pods in this pod group. This is a pointer to distinguish between explicit
// zero and not specified. Defaults to 1.
Replicas *int32 `json:"replicas"`
// MinReplicas defaults to 1
MinReplicas *int32 `json:"minReplicas"`
// MaxReplicas defaults to maxInt32
MaxReplicas *int32 `json:"maxReplicas"`
// RayStartParams are the params of the start command: address, object-store-memory, ...
RayStartParams map[string]string `json:"rayStartParams"`
// Template is a pod template for the worker
Template v1.PodTemplateSpec `json:"template"`
// ScaleStrategy defines which pods to remove
ScaleStrategy ScaleStrategy `json:"scaleStrategy,omitempty"`
}

// ScaleStrategy to remove workers
type ScaleStrategy struct {
// WorkersToDelete workers to be deleted
WorkersToDelete []string `json:"workersToDelete,omitempty"`
}

// AutoscalerOptions specifies optional configuration for the Ray autoscaler.
type AutoscalerOptions struct {
// Resources specifies optional resource request and limit overrides for the autoscaler container.
// Default values: 500m CPU request and limit. 512Mi memory request and limit.
Resources *v1.ResourceRequirements `json:"resources,omitempty"`
// Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development.
Image *string `json:"image,omitempty"`
// ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development.
ImagePullPolicy *v1.PullPolicy `json:"imagePullPolicy,omitempty"`
// Optional list of environment variables to set in the autoscaler container.
Env []v1.EnvVar `json:"env,omitempty"`
// Optional list of sources to populate environment variables in the autoscaler container.
EnvFrom []v1.EnvFromSource `json:"envFrom,omitempty"`
// Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container.
VolumeMounts []v1.VolumeMount `json:"volumeMounts,omitempty"`
// SecurityContext defines the security options the container should be run with.
// If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext.
// More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
SecurityContext *v1.SecurityContext `json:"securityContext,omitempty"`
// IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
// Defaults to 60 (one minute).
IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"`
// UpscalingMode is "Conservative", "Default", or "Aggressive."
// Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster.
// Default: Upscaling is not rate-limited.
// Aggressive: An alias for Default; upscaling is not rate-limited.
UpscalingMode *UpscalingMode `json:"upscalingMode,omitempty"`
}

// +kubebuilder:validation:Enum=Default;Aggressive;Conservative
type UpscalingMode string

// The overall state of the Ray cluster.
type ClusterState string

const (
Ready ClusterState = "ready"
Unhealthy ClusterState = "unhealthy"
Failed ClusterState = "failed"
)

// RayClusterStatus defines the observed state of RayCluster
type RayClusterStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
// Status reflects the status of the cluster
State ClusterState `json:"state,omitempty"`
// AvailableWorkerReplicas indicates how many replicas are available in the cluster
AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"`
// DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level.
DesiredWorkerReplicas int32 `json:"desiredWorkerReplicas,omitempty"`
// MinWorkerReplicas indicates sum of minimum replicas of each node group.
MinWorkerReplicas int32 `json:"minWorkerReplicas,omitempty"`
// MaxWorkerReplicas indicates sum of maximum replicas of each node group.
MaxWorkerReplicas int32 `json:"maxWorkerReplicas,omitempty"`
// LastUpdateTime indicates last update timestamp for this cluster status.
// +nullable
LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"`
// Service Endpoints
Endpoints map[string]string `json:"endpoints,omitempty"`
// Head info
Head HeadInfo `json:"head,omitempty"`
// Reason provides more information about current State
Reason string `json:"reason,omitempty"`
// observedGeneration is the most recent generation observed for this RayCluster. It corresponds to the
// RayCluster's generation, which is updated on mutation by the API Server.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
}

// HeadInfo gives info about head
type HeadInfo struct {
PodIP string `json:"podIP,omitempty"`
ServiceIP string `json:"serviceIP,omitempty"`
}

// RayNodeType the type of a ray node: head/worker
type RayNodeType string

const (
// HeadNode means that this pod will be ray cluster head
HeadNode RayNodeType = "head"
// WorkerNode means that this pod will be ray cluster worker
WorkerNode RayNodeType = "worker"
)

// RayCluster is the Schema for the RayClusters API
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="desired workers",type=integer,JSONPath=".status.desiredWorkerReplicas",priority=0
// +kubebuilder:printcolumn:name="available workers",type=integer,JSONPath=".status.availableWorkerReplicas",priority=0
// +kubebuilder:printcolumn:name="status",type="string",JSONPath=".status.state",priority=0
// +kubebuilder:printcolumn:name="age",type="date",JSONPath=".metadata.creationTimestamp",priority=0
// +kubebuilder:printcolumn:name="head pod IP",type="string",JSONPath=".status.head.podIP",priority=1
// +kubebuilder:printcolumn:name="head service IP",type="string",JSONPath=".status.head.serviceIP",priority=1
// +genclient
type RayCluster struct {
// Standard object metadata.
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

// Specification of the desired behavior of the RayCluster.
Spec RayClusterSpec `json:"spec,omitempty"`
Status RayClusterStatus `json:"status,omitempty"`
}

//+kubebuilder:object:root=true

// RayClusterList contains a list of RayCluster
type RayClusterList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []RayCluster `json:"items"`
}

func init() {
SchemeBuilder.Register(&RayCluster{}, &RayClusterList{})
}

type EventReason string

const (
RayConfigError EventReason = "RayConfigError"
PodReconciliationError EventReason = "PodReconciliationError"
)
Loading