planetscale · siadat · Aug 30, 2024 · Sep 10, 2024 · Sep 12, 2024 · Sep 18, 2024
diff --git a/Makefile b/Makefile
@@ -77,3 +77,7 @@ vtorc-vtadmin-test: build e2e-test-setup
 unmanaged-tablet-test: build e2e-test-setup
 	echo "Running Unmanaged Tablet test"
 	test/endtoend/unmanaged_tablet_test.sh
+
+hpa-test: build e2e-test-setup
+	echo "Running HPA test"
+	test/endtoend/hpa_test.sh
diff --git a/deploy/crds/planetscale.com_vitesscells.yaml b/deploy/crds/planetscale.com_vitesscells.yaml
diff --git a/deploy/crds/planetscale.com_vitessclusters.yaml b/deploy/crds/planetscale.com_vitessclusters.yaml
diff --git a/deploy/role.yaml b/deploy/role.yaml
@@ -78,4 +78,10 @@ rules:
   resources:
   - jobs
   verbs:
-  - '*'
+  - '*'
+- apiGroups:
+  - autoscaling
+  resources:
+  - horizontalpodautoscalers
+  verbs:
+  - '*'
diff --git a/docs/api/index.html b/docs/api/index.html
@@ -506,6 +506,78 @@ <h3 id="planetscale.com/v2.VitessCluster">VitessCluster
 </tr>
 </tbody>
 </table>
+<h3 id="planetscale.com/v2.AutoscalerSpec">AutoscalerSpec
+</h3>
+<p>
+(<em>Appears on:</em>
+<a href="#planetscale.com/v2.VitessCellGatewaySpec">VitessCellGatewaySpec</a>)
+</p>
+<p>
+<p>AutoscalerSpec defines the vtgate&rsquo;s pod autoscaling specification.</p>
+</p>
+<table class="table table-striped">
+<thead class="thead-dark">
+<tr>
+<th>Field</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>
+<code>minReplicas</code></br>
+<em>
+int32
+</em>
+</td>
+<td>
+<p>MinReplicas is the minimum number of instances of vtgate to run in
+this cell when autoscaling is enabled.</p>
+</td>
+</tr>
+<tr>
+<td>
+<code>maxReplicas</code></br>
+<em>
+int32
+</em>
+</td>
+<td>
+<p>MaxReplicas is the maximum number of instances of vtgate to run in
+this cell when autoscaling is enabled.</p>
+</td>
+</tr>
+<tr>
+<td>
+<code>behavior</code></br>
+<em>
+<a href="https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#horizontalpodautoscalerbehavior-v2-autoscaling">
+Kubernetes autoscaling/v2.HorizontalPodAutoscalerBehavior
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+</td>
+</tr>
+<tr>
+<td>
+<code>metrics</code></br>
+<em>
+<a href="https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#metricspec-v2-autoscaling">
+[]Kubernetes autoscaling/v2.MetricSpec
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>Metrics is meant to provide a customizable way to configure HPA metrics.
+currently the only supported custom metrics is type=Pod.
+Use TargetCPUUtilization or TargetMemoryUtilization instead if scaling on these common resource metrics.</p>
+</td>
+</tr>
+</tbody>
+</table>
 <h3 id="planetscale.com/v2.AzblobBackupLocation">AzblobBackupLocation
 </h3>
 <p>
@@ -3337,6 +3409,21 @@ <h3 id="planetscale.com/v2.VitessCellGatewaySpec">VitessCellGatewaySpec
 </tr>
 <tr>
 <td>
+<code>autoscaler</code></br>
+<em>
+<a href="#planetscale.com/v2.AutoscalerSpec">
+AutoscalerSpec
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>Autoscaler specifies the pod autoscaling configuration to use
+for the vtgate workload.</p>
+</td>
+</tr>
+<tr>
+<td>
 <code>resources</code></br>
 <em>
 <a href="https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#resourcerequirements-v1-core">
@@ -3614,6 +3701,30 @@ <h3 id="planetscale.com/v2.VitessCellGatewayStatus">VitessCellGatewayStatus
 <p>ServiceName is the name of the Service for this cell&rsquo;s vtgate.</p>
 </td>
 </tr>
+<tr>
+<td>
+<code>labelSelector</code></br>
+<em>
+string
+</em>
+</td>
+<td>
+<p>LabelSelector is required by the Scale subresource, which is used by
+HorizontalPodAutoscaler when reading pod metrics.</p>
+</td>
+</tr>
+<tr>
+<td>
+<code>replicas</code></br>
+<em>
+int32
+</em>
+</td>
+<td>
+<p>Replicas is required by the Scale subresource, which is used by
+HorizontalPodAutoscaler to determine the current number of replicas.</p>
+</td>
+</tr>
 </tbody>
 </table>
 <h3 id="planetscale.com/v2.VitessCellImages">VitessCellImages

diff --git a/pkg/apis/planetscale/v2/vitesscell_types.go b/pkg/apis/planetscale/v2/vitesscell_types.go
@@ -17,6 +17,7 @@ limitations under the License.
 package v2
 
 import (
+	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
@@ -39,6 +40,7 @@ import (
 // just like a Deployment can manage Pods that run on multiple Nodes.
 // +kubebuilder:resource:path=vitesscells,shortName=vtc
 // +kubebuilder:subresource:status
+// +kubebuilder:subresource:scale:specpath=.spec.gateway.replicas,statuspath=.status.gateway.replicas,selectorpath=.status.gateway.labelSelector
 type VitessCell struct {
 	metav1.TypeMeta   `json:",inline"`
 	metav1.ObjectMeta `json:"metadata,omitempty"`
@@ -117,12 +119,39 @@ type VitessCellImages struct {
 	Vtgate string `json:"vtgate,omitempty"`
 }
 
+// AutoscalerSpec defines the vtgate's pod autoscaling specification.
+type AutoscalerSpec struct {
+	// MinReplicas is the minimum number of instances of vtgate to run in
+	// this cell when autoscaling is enabled.
+	// +kubebuilder:validation:Minimum=0
+	MinReplicas *int32 `json:"minReplicas,omitempty"`
+
+	// MaxReplicas is the maximum number of instances of vtgate to run in
+	// this cell when autoscaling is enabled.
+	// +kubebuilder:validation:Minimum=0
+	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
+
+	// +optional
+	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
+
+	// Metrics is meant to provide a customizable way to configure HPA metrics.
+	// currently the only supported custom metrics is type=Pod.
+	// Use TargetCPUUtilization or TargetMemoryUtilization instead if scaling on these common resource metrics.
+	// +optional
+	Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
+}
+
 // VitessCellGatewaySpec specifies the per-cell deployment parameters for vtgate.
 type VitessCellGatewaySpec struct {
 	// Replicas is the number of vtgate instances to deploy in this cell.
 	// +kubebuilder:validation:Minimum=0
 	Replicas *int32 `json:"replicas,omitempty"`
 
+	// Autoscaler specifies the pod autoscaling configuration to use
+	// for the vtgate workload.
+	// +optional
+	Autoscaler *AutoscalerSpec `json:"autoscaler,omitempty"`
+
 	// Resources determines the compute resources reserved for each vtgate replica.
 	Resources corev1.ResourceRequirements `json:"resources,omitempty"`
 
@@ -252,6 +281,13 @@ type VitessCellGatewayStatus struct {
 	Available corev1.ConditionStatus `json:"available,omitempty"`
 	// ServiceName is the name of the Service for this cell's vtgate.
 	ServiceName string `json:"serviceName,omitempty"`
+	// LabelSelector is required by the Scale subresource, which is used by
+	// HorizontalPodAutoscaler when reading pod metrics.
+	LabelSelector string `json:"labelSelector,omitempty"`
+	// Replicas is required by the Scale subresource, which is used by
+	// HorizontalPodAutoscaler to determine the current number of replicas.
+	// +kubebuilder:validation:Minimum=0
+	Replicas int32 `json:"replicas,omitempty"`
 }
 
 // VitessCellStatus defines the observed state of VitessCell

diff --git a/pkg/apis/planetscale/v2/zz_generated.deepcopy.go b/pkg/apis/planetscale/v2/zz_generated.deepcopy.go
diff --git a/pkg/controller/vitesscell/reconcile_vtgate.go b/pkg/controller/vitesscell/reconcile_vtgate.go
@@ -20,6 +20,7 @@ import (
 	"context"
 
 	appsv1 "k8s.io/api/apps/v1"
+	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	apitypes "k8s.io/apimachinery/pkg/types"
@@ -161,6 +162,10 @@ func (r *ReconcileVitessCell) reconcileVtgate(ctx context.Context, vtc *planetsc
 			curObj := obj.(*appsv1.Deployment)
 
 			status := &vtc.Status.Gateway
+			if replicas := curObj.Spec.Replicas; replicas != nil {
+				status.Replicas = *replicas
+			}
+			status.LabelSelector = curObj.Spec.Selector.String()
 			if available := conditions.Deployment(curObj.Status.Conditions, appsv1.DeploymentAvailable); available != nil {
 				status.Available = available.Status
 			}
@@ -170,5 +175,35 @@ func (r *ReconcileVitessCell) reconcileVtgate(ctx context.Context, vtc *planetsc
 		resultBuilder.Error(err)
 	}
 
+	var wantHpa bool
+	var hpaSpec *vtgate.HpaSpec
+
+	if vtc.Spec.Gateway.Autoscaler != nil {
+		wantHpa = vtc.Spec.Gateway.Autoscaler.MaxReplicas != nil
+		hpaSpec = &vtgate.HpaSpec{
+			Labels:      labels,
+			MinReplicas: vtc.Spec.Gateway.Autoscaler.MinReplicas,
+			MaxReplicas: vtc.Spec.Gateway.Autoscaler.MaxReplicas,
+			Behavior:    vtc.Spec.Gateway.Autoscaler.Behavior,
+			Metrics:     vtc.Spec.Gateway.Autoscaler.Metrics,
+		}
+	}
+
+	// Reconcile vtgate HorizontalPodAutoscaler.
+	err = r.reconciler.ReconcileObject(ctx, vtc, key, labels, wantHpa, reconciler.Strategy{
+		Kind: &autoscalingv2.HorizontalPodAutoscaler{},
+
+		New: func(key client.ObjectKey) runtime.Object {
+			return vtgate.NewHorizontalPodAutoscaler(key, hpaSpec)
+		},
+		UpdateInPlace: func(key client.ObjectKey, obj runtime.Object) {
+			newObj := obj.(*autoscalingv2.HorizontalPodAutoscaler)
+			vtgate.UpdateHorizontalPodAutoscaler(newObj, hpaSpec)
+		},
+	})
+	if err != nil {
+		resultBuilder.Error(err)
+	}
+
 	return resultBuilder.Result()
 }
diff --git a/pkg/controller/vitesscell/vitesscell_controller.go b/pkg/controller/vitesscell/vitesscell_controller.go
@@ -37,6 +37,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 	"sigs.k8s.io/controller-runtime/pkg/source"
 
+	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	planetscalev2 "planetscale.dev/vitess-operator/pkg/apis/planetscale/v2"
 	"planetscale.dev/vitess-operator/pkg/operator/environment"
 	"planetscale.dev/vitess-operator/pkg/operator/metrics"
@@ -60,6 +61,7 @@ var log = logrus.WithField("controller", "VitessCell")
 var watchResources = []client.Object{
 	&corev1.Service{},
 	&appsv1.Deployment{},
+	&autoscalingv2.HorizontalPodAutoscaler{},
 
 	&planetscalev2.EtcdLockserver{},
 }

diff --git a/pkg/controller/vitesscluster/reconcile_cells.go b/pkg/controller/vitesscluster/reconcile_cells.go
@@ -152,9 +152,12 @@ func updateVitessCellInPlace(key client.ObjectKey, vtc *planetscalev2.VitessCell
 	// Update labels, but ignore existing ones we don't set.
 	update.Labels(&vtc.Labels, newCell.Labels)
 
-	// We allow immediate update of replica counts for stateless workloads,
-	// like Deployment does.
-	vtc.Spec.Gateway.Replicas = newCell.Spec.Gateway.Replicas
+	// Only update replicas if autoscaling is disabled.
+	if vtc.Spec.Gateway.Autoscaler != nil && vtc.Spec.Gateway.Autoscaler.MaxReplicas != nil {
+		// We allow immediate update of replica counts for stateless workloads,
+		// like Deployment does.
+		vtc.Spec.Gateway.Replicas = newCell.Spec.Gateway.Replicas
+	}
 }
 
 func updateVitessCell(key client.ObjectKey, vtc *planetscalev2.VitessCell, vt *planetscalev2.VitessCluster, parentLabels map[string]string, cell *planetscalev2.VitessCellTemplate) {