zebra: Modify dplane loop to allow backpressure to filter up

Currently when the dplane_thread_loop is run, it moves contexts from the dg_update_list and puts the contexts on the input queue of the first provider. This provider is given a chance to run and then the items on the output queue are pulled off and placed on the input queue of the next provider. Rinse/Repeat down through the entire list of providers. Now imagine that we have a list of multiple providers and the last provider is getting backed up. Contexts will end up sticking in the input Queue of the `slow` provider. This can grow without bounds. This is a real problem when you have a situation where an interface is flapping and an upper level protocol is sending a continous stream of route updates to reflect the change in ecmp. You can end up with a very very large backlog of contexts. This is bad because zebra can easily grow to a very very large memory size and on restricted systems you can run out of memory. Fortunately for us, the MetaQ already participates with this process by not doing more route processing until the dg_update_list goes below the working limit of dg_updates_per_cycle. Thus if FRR modifies the behavior of this loop to not move more contexts onto the input queue if either the input queue or output queue of the next provider has reached this limit. FRR will naturaly start auto handling backpressure for the dplane context system and memory will not go out of control. Signed-off-by: Donald Sharp <[email protected]>
FRRouting · Jun 14, 2024 · e91d5d0 · e91d5d0
1 parent 7b59adc
commit e91d5d0
Showing 1 changed file with 87 additions and 20 deletions.
diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
@@ -7301,10 +7301,10 @@ static void dplane_thread_loop(struct event *event)
 {
 	struct dplane_ctx_list_head work_list;
 	struct dplane_ctx_list_head error_list;
-	struct zebra_dplane_provider *prov;
+	struct zebra_dplane_provider *prov, *next_prov;
 	struct zebra_dplane_ctx *ctx;
 	int limit, counter, error_counter;
-	uint64_t curr, high;
+	uint64_t curr, out_curr, high;
 	bool reschedule = false;
 
 	/* Capture work limit per cycle */
@@ -7328,18 +7328,46 @@ static void dplane_thread_loop(struct event *event)
 	/* Locate initial registered provider */
 	prov = dplane_prov_list_first(&zdplane_info.dg_providers);
 
-	/* Move new work from incoming list to temp list */
-	for (counter = 0; counter < limit; counter++) {
-		ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list);
-		if (ctx) {
-			ctx->zd_provider = prov->dp_id;
+	curr = atomic_load_explicit(&prov->dp_in_queued, memory_order_relaxed);
+	out_curr = atomic_load_explicit(&prov->dp_out_queued,
+					memory_order_relaxed);
 
-			dplane_ctx_list_add_tail(&work_list, ctx);
-		} else {
-			break;
+	if (curr >= (uint64_t)limit) {
+		reschedule = true;
+		if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+			zlog_debug("%s: Current first provider(%s) Input queue is %" PRIu64
+				   ", holding off work",
+				   __func__, prov->dp_name, curr);
+		counter = 0;
+	} else if (out_curr >= (uint64_t)limit) {
+		reschedule = true;
+		if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+			zlog_debug("%s: Current first provider(%s) Output queue is %" PRIu64
+				   ", holding off work",
+				   __func__, prov->dp_name, out_curr);
+		counter = 0;
+	} else {
+		/*
+		 * Let's limit the work to how what can be put on the
+		 * in or out queue without going over
+		 */
+		limit -= (curr > out_curr) ? curr : out_curr;
+		if ((uint32_t)limit != zdplane_info.dg_updates_per_cycle)
+			reschedule = true;
+		/* Move new work from incoming list to temp list */
+		for (counter = 0; counter < limit; counter++) {
+			ctx = dplane_ctx_list_pop(&zdplane_info.dg_update_list);
+			if (ctx) {
+				ctx->zd_provider = prov->dp_id;
+
+				dplane_ctx_list_add_tail(&work_list, ctx);
+			} else {
+				break;
+			}
 		}
 	}
 
+
 	DPLANE_UNLOCK();
 
 	atomic_fetch_sub_explicit(&zdplane_info.dg_routes_queued, counter,
@@ -7358,8 +7386,9 @@ static void dplane_thread_loop(struct event *event)
 		 * items.
 		 */
 		if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
-			zlog_debug("dplane enqueues %d new work to provider '%s'",
-				   counter, dplane_provider_get_name(prov));
+			zlog_debug("dplane enqueues %d new work to provider '%s' curr is %" PRIu64,
+				   counter, dplane_provider_get_name(prov),
+				   curr);
 
 		/* Capture current provider id in each context; check for
 		 * error status.
@@ -7420,16 +7449,54 @@ static void dplane_thread_loop(struct event *event)
 		if (!zdplane_info.dg_run)
 			break;
 
+		/* Locate next provider */
+		next_prov = dplane_prov_list_next(&zdplane_info.dg_providers,
+						  prov);
+		if (next_prov) {
+			curr = atomic_load_explicit(&next_prov->dp_in_queued,
+						    memory_order_relaxed);
+			out_curr =
+				atomic_load_explicit(&next_prov->dp_out_queued,
+						     memory_order_relaxed);
+		} else
+			out_curr = curr = 0;
+
 		/* Dequeue completed work from the provider */
 		dplane_provider_lock(prov);
 
-		while (counter < limit) {
-			ctx = dplane_provider_dequeue_out_ctx(prov);
-			if (ctx) {
-				dplane_ctx_list_add_tail(&work_list, ctx);
-				counter++;
-			} else
-				break;
+		if (curr >= (uint64_t)limit) {
+			reschedule = true;
+			if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+				zlog_debug("%s: Next Provider(%s) Input queue is %" PRIu64
+					   ", holding off work",
+					   __func__, next_prov->dp_name, curr);
+			counter = 0;
+		} else if (out_curr >= (uint64_t)limit) {
+			reschedule = true;
+			if (IS_ZEBRA_DEBUG_DPLANE_DETAIL)
+				zlog_debug("%s: Next Provider(%s) Output queue is %" PRIu64
+					   ", holding off work",
+					   __func__, next_prov->dp_name,
+					   out_curr);
+			counter = 0;
+		} else {
+			/*
+			 * Let's limit the work to how what can be put on the
+			 * in or out queue without going over
+			 */
+			limit -= (curr > out_curr) ? curr : out_curr;
+			if ((uint32_t)limit != zdplane_info.dg_updates_per_cycle)
+				reschedule = true;
+
+			while (counter < limit) {
+				ctx = dplane_provider_dequeue_out_ctx(prov);
+				if (ctx) {
+					dplane_ctx_list_add_tail(&work_list,
+								 ctx);
+					counter++;
+				} else
+					break;
+			}
 		}
 
 		dplane_provider_unlock(prov);
@@ -7442,7 +7509,7 @@ static void dplane_thread_loop(struct event *event)
 				   counter, dplane_provider_get_name(prov));
 
 		/* Locate next provider */
-		prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov);
+		prov = next_prov;
 	}
 
 	/*