From fb0a5fd8d2d7a7758297ae8177d10da6076f9f05 Mon Sep 17 00:00:00 2001 From: Stephen Sun <5379172+stephenxs@users.noreply.github.com> Date: Wed, 24 Nov 2021 21:06:23 +0800 Subject: [PATCH] Don't handle buffer pool watermark during warm reboot reconciling (#1987) - What I did Don't handle buffer pool watermark during warm reboot reconciling - Why I did it This is to fix the community issue Azure/sonic-sairedis#862 and Azure/sonic-buildimage#8722 - How I verified it Perform a warm reboot. Check whether buffer pool watermark handling is skipped during reconciling and handled after it. other watermark handling is handled during reconciling as it was before. Details if related The warm reboot flow is like this: System starts. Orchagent fetches the items from database stored before warm reboot and pushes them into m_toSync of all orchagents. This is done by bake, which can be overridden by sub orchagent. All sub orchagents handle the items in m_toSync. At this point, any notification from redis-db is blocked. Warm reboot converges. Orchagent starts to handle notifications from redis-db. The fix is like this: in FlexCounterOrch::bake. the buffer pool watermark handling is skipped. Signed-off-by: Stephen Sun --- orchagent/flexcounterorch.cpp | 44 ++++++++++++++++++++++++++++++++++- orchagent/flexcounterorch.h | 3 +++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/orchagent/flexcounterorch.cpp b/orchagent/flexcounterorch.cpp index 7ccc52e06ca3..dea2fcd0a375 100644 --- a/orchagent/flexcounterorch.cpp +++ b/orchagent/flexcounterorch.cpp @@ -1,5 +1,4 @@ #include -#include "flexcounterorch.h" #include "portsorch.h" #include "fabricportsorch.h" #include "select.h" @@ -49,6 +48,7 @@ unordered_map flexCounterGroupMap = FlexCounterOrch::FlexCounterOrch(DBConnector *db, vector &tableNames): Orch(db, tableNames), + m_flexCounterConfigTable(db, CFG_FLEX_COUNTER_TABLE_NAME), m_flexCounterDb(new DBConnector("FLEX_COUNTER_DB", 0)), m_flexCounterGroupTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_GROUP_TABLE)) { @@ -188,3 +188,45 @@ bool FlexCounterOrch::getPortBufferDropCountersState() const { return m_port_buffer_drop_counter_enabled; } + +bool FlexCounterOrch::bake() +{ + /* + * bake is called during warmreboot reconciling procedure. + * By default, it should fetch items from the tables the sub agents listen to, + * and then push them into m_toSync of each sub agent. + * The motivation is to make sub agents handle the saved entries first and then handle the upcoming entries. + */ + + std::deque entries; + vector keys; + m_flexCounterConfigTable.getKeys(keys); + for (const auto &key: keys) + { + if (!flexCounterGroupMap.count(key)) + { + SWSS_LOG_NOTICE("FlexCounterOrch: Invalid flex counter group intput %s is skipped during reconciling", key.c_str()); + continue; + } + + if (key == BUFFER_POOL_WATERMARK_KEY) + { + SWSS_LOG_NOTICE("FlexCounterOrch: Do not handle any FLEX_COUNTER table for %s update during reconciling", + BUFFER_POOL_WATERMARK_KEY); + continue; + } + + KeyOpFieldsValuesTuple kco; + + kfvKey(kco) = key; + kfvOp(kco) = SET_COMMAND; + + if (!m_flexCounterConfigTable.get(key, kfvFieldsValues(kco))) + { + continue; + } + entries.push_back(kco); + } + Consumer* consumer = dynamic_cast(getExecutor(CFG_FLEX_COUNTER_TABLE_NAME)); + return consumer->addToSync(entries); +} diff --git a/orchagent/flexcounterorch.h b/orchagent/flexcounterorch.h index 0fb9f70e4b07..9ae7e90aadc6 100644 --- a/orchagent/flexcounterorch.h +++ b/orchagent/flexcounterorch.h @@ -4,6 +4,7 @@ #include "orch.h" #include "port.h" #include "producertable.h" +#include "table.h" extern "C" { #include "sai.h" @@ -17,12 +18,14 @@ class FlexCounterOrch: public Orch virtual ~FlexCounterOrch(void); bool getPortCountersState() const; bool getPortBufferDropCountersState() const; + bool bake() override; private: std::shared_ptr m_flexCounterDb = nullptr; std::shared_ptr m_flexCounterGroupTable = nullptr; bool m_port_counter_enabled = false; bool m_port_buffer_drop_counter_enabled = false; + Table m_flexCounterConfigTable; }; #endif