feat: scaleUpCooldownSeconds and scaleDownCooldownSeconds to replace …

…cooldownSeconds (#1008) Signed-off-by: Derek Wang <[email protected]>
numaproj · Aug 31, 2023 · aabb8af · aabb8af
1 parent 9d06332
commit aabb8af
Show file tree

Hide file tree

Showing 15 changed files with 373 additions and 154 deletions.
diff --git a/api/json-schema/schema.json b/api/json-schema/schema.json
@@ -19063,7 +19063,7 @@
       "description": "Scale defines the parameters for autoscaling.",
       "properties": {
         "cooldownSeconds": {
-          "description": "Cooldown seconds after a scaling operation before another one.",
+          "description": "Deprecated: Use scaleUpCooldownSeconds and scaleDownCooldownSeconds instead. Cooldown seconds after a scaling operation before another one.",
           "format": "int64",
           "type": "integer"
         },
@@ -19091,6 +19091,16 @@
           "format": "int64",
           "type": "integer"
         },
+        "scaleDownCooldownSeconds": {
+          "description": "ScaleDownCooldownSeconds defines the cooldown seconds after a scaling operation, before a follow-up scaling down. It defaults to the CooldownSeconds if not set.",
+          "format": "int64",
+          "type": "integer"
+        },
+        "scaleUpCooldownSeconds": {
+          "description": "ScaleUpCooldownSeconds defines the cooldown seconds after a scaling operation, before a follow-up scaling up. It defaults to the CooldownSeconds if not set.",
+          "format": "int64",
+          "type": "integer"
+        },
         "targetBufferAvailability": {
           "description": "TargetBufferAvailability is used to define the target percentage of the buffer availability. A valid and meaningful value should be less than the BufferUsageLimit defined in the Edge spec (or Pipeline spec), for example, 50. It only applies to UDF and Sink vertices because only they have buffers to read.",
           "format": "int64",

diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json
@@ -19050,7 +19050,7 @@
       "type": "object",
       "properties": {
         "cooldownSeconds": {
-          "description": "Cooldown seconds after a scaling operation before another one.",
+          "description": "Deprecated: Use scaleUpCooldownSeconds and scaleDownCooldownSeconds instead. Cooldown seconds after a scaling operation before another one.",
           "type": "integer",
           "format": "int64"
         },
@@ -19078,6 +19078,16 @@
           "type": "integer",
           "format": "int64"
         },
+        "scaleDownCooldownSeconds": {
+          "description": "ScaleDownCooldownSeconds defines the cooldown seconds after a scaling operation, before a follow-up scaling down. It defaults to the CooldownSeconds if not set.",
+          "type": "integer",
+          "format": "int64"
+        },
+        "scaleUpCooldownSeconds": {
+          "description": "ScaleUpCooldownSeconds defines the cooldown seconds after a scaling operation, before a follow-up scaling up. It defaults to the CooldownSeconds if not set.",
+          "type": "integer",
+          "format": "int64"
+        },
         "targetBufferAvailability": {
           "description": "TargetBufferAvailability is used to define the target percentage of the buffer availability. A valid and meaningful value should be less than the BufferUsageLimit defined in the Edge spec (or Pipeline spec), for example, 50. It only applies to UDF and Sink vertices because only they have buffers to read.",
           "type": "integer",

diff --git a/config/base/crds/full/numaflow.numaproj.io_pipelines.yaml b/config/base/crds/full/numaflow.numaproj.io_pipelines.yaml
@@ -5687,6 +5687,12 @@ spec:
                         replicasPerScale:
                           format: int32
                           type: integer
+                        scaleDownCooldownSeconds:
+                          format: int32
+                          type: integer
+                        scaleUpCooldownSeconds:
+                          format: int32
+                          type: integer
                         targetBufferAvailability:
                           format: int32
                           type: integer

diff --git a/config/base/crds/full/numaflow.numaproj.io_vertices.yaml b/config/base/crds/full/numaflow.numaproj.io_vertices.yaml
@@ -1530,6 +1530,12 @@ spec:
                   replicasPerScale:
                     format: int32
                     type: integer
+                  scaleDownCooldownSeconds:
+                    format: int32
+                    type: integer
+                  scaleUpCooldownSeconds:
+                    format: int32
+                    type: integer
                   targetBufferAvailability:
                     format: int32
                     type: integer

diff --git a/config/install.yaml b/config/install.yaml
@@ -8216,6 +8216,12 @@ spec:
                         replicasPerScale:
                           format: int32
                           type: integer
+                        scaleDownCooldownSeconds:
+                          format: int32
+                          type: integer
+                        scaleUpCooldownSeconds:
+                          format: int32
+                          type: integer
                         targetBufferAvailability:
                           format: int32
                           type: integer
@@ -12871,6 +12877,12 @@ spec:
                   replicasPerScale:
                     format: int32
                     type: integer
+                  scaleDownCooldownSeconds:
+                    format: int32
+                    type: integer
+                  scaleUpCooldownSeconds:
+                    format: int32
+                    type: integer
                   targetBufferAvailability:
                     format: int32
                     type: integer

diff --git a/config/namespace-install.yaml b/config/namespace-install.yaml
@@ -8216,6 +8216,12 @@ spec:
                         replicasPerScale:
                           format: int32
                           type: integer
+                        scaleDownCooldownSeconds:
+                          format: int32
+                          type: integer
+                        scaleUpCooldownSeconds:
+                          format: int32
+                          type: integer
                         targetBufferAvailability:
                           format: int32
                           type: integer
@@ -12871,6 +12877,12 @@ spec:
                   replicasPerScale:
                     format: int32
                     type: integer
+                  scaleDownCooldownSeconds:
+                    format: int32
+                    type: integer
+                  scaleUpCooldownSeconds:
+                    format: int32
+                    type: integer
                   targetBufferAvailability:
                     format: int32
                     type: integer

diff --git a/docs/APIs.md b/docs/APIs.md
@@ -4054,7 +4054,8 @@ processing rate.
 <td>
 <em>(Optional)</em>
 <p>
-Cooldown seconds after a scaling operation before another one.
+Deprecated: Use scaleUpCooldownSeconds and scaleDownCooldownSeconds
+instead. Cooldown seconds after a scaling operation before another one.
 </p>
 </td>
 </tr>
@@ -4112,6 +4113,32 @@ once. The is use to prevent too aggressive scaling operations
 </p>
 </td>
 </tr>
+<tr>
+<td>
+<code>scaleUpCooldownSeconds</code></br> <em> uint32 </em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>
+ScaleUpCooldownSeconds defines the cooldown seconds after a scaling
+operation, before a follow-up scaling up. It defaults to the
+CooldownSeconds if not set.
+</p>
+</td>
+</tr>
+<tr>
+<td>
+<code>scaleDownCooldownSeconds</code></br> <em> uint32 </em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>
+ScaleDownCooldownSeconds defines the cooldown seconds after a scaling
+operation, before a follow-up scaling down. It defaults to the
+CooldownSeconds if not set.
+</p>
+</td>
+</tr>
 </tbody>
 </table>
 <h3 id="numaflow.numaproj.io/v1alpha1.SideInput">

diff --git a/docs/user-guide/reference/autoscaling.md b/docs/user-guide/reference/autoscaling.md
@@ -32,7 +32,8 @@ spec:
         min: 0 # Optional, minimum replicas, defaults to 0.
         max: 20 # Optional, maximum replicas, defaults to 50.
         lookbackSeconds: 120 # Optional, defaults to 120.
-        cooldownSeconds: 90 # Optional, defaults to 90.
+        scaleUpCooldownSeconds: 90 # Optional, defaults to 90.
+        scaleDownCooldownSeconds: 90 # Optional, defaults to 90.
         zeroReplicaSleepSeconds: 120 # Optional, defaults to 120.
         targetProcessingSeconds: 20 # Optional, defaults to 20.
         targetBufferAvailability: 50 # Optional, defaults to 50.
@@ -43,7 +44,9 @@ spec:
 - `min` - Minimum replicas, valid value could be an integer >= 0. Defaults to `0`, which means it could be scaled down to 0.
 - `max` - Maximum replicas, positive integer which should not be less than `min`, defaults to `50`. if `max` and `min` are the same, that will be the fixed replica number.
 - `lookbackSeconds` - How many seconds to lookback for vertex average processing rate (tps) and pending messages calculation, defaults to `120`. Rate and pending messages metrics are critical for autoscaling, you might need to tune this parameter a bit to see better results. For example, your data source only have 1 minute data input in every 5 minutes, and you don't want the vertices to be scaled down to `0`. In this case, you need to increase `lookbackSeconds` to cover all the 5 minutes, so that the calculated average rate and pending messages won't be `0` during the silent period, to prevent scaling down to 0 from happening.
-- `cooldownSeconds` - After a scaling operation, how many seconds to wait before doing another scaling on the same vertex. This is to give some time for a vertex to stabilize, defaults to 90 seconds.
+- `cooldownSeconds` - **Deprecated**, use `scaleUpCooldownSeconds` and `scaleDownCooldownSeconds` instead. After a scaling operation, how many seconds to wait before doing another scaling on the same vertex. This is to give some time for a vertex to stabilize, defaults to 90 seconds.
+- `scaleUpCooldownSeconds` - After a scaling operation, how many seconds to wait for the same vertex, if the follow-up operation is a scaling up, defaults to `90`.
+- `scaleDownCooldownSeconds` - After a scaling operation, how many seconds to wait for the same vertex, if the follow-up operation is a scaling down, defaults to `90`.
 - `zeroReplicaSleepSeconds` - How many seconds it will wait after scaling down to `0`, defaults to `120`. Numaflow autoscaler periodically scales up a vertex pod to "peek" the incoming data, this is the period of time to wait before peeking.
 - `targetProcessingSeconds` - It is used to tune the aggressiveness of autoscaling for source vertices, it measures how fast you want the vertex to process all the pending messages, defaults to `20`. It is only effective for the `Source` vertices which support autoscaling, typically increasing the value leads to lower processing rate, thus less replicas.
 - `targetBufferAvailability` - Targeted buffer availability in percentage, defaults to `50`. It is only effective for `UDF` and `Sink` vertices, it determines how aggressive you want to do for autoscaling, increasing the value will bring more replicas.