From 2b1869c1bc120c534847223053e1d7b4999c333d Mon Sep 17 00:00:00 2001 From: Sudharsan Dhamal Gopalarathnam Date: Thu, 19 Jan 2023 14:25:25 -0800 Subject: [PATCH] [refactor]Refactoring sai handle status (#2621) * [refactor]Refactoring sai handle status --- orchagent/cbf/cbfnhgorch.cpp | 4 +- orchagent/crmorch.cpp | 1 + orchagent/fabricportsorch.cpp | 1 + orchagent/nhgbase.h | 5 - orchagent/nhgorch.cpp | 4 +- orchagent/orch.cpp | 199 --------------------------- orchagent/orch.h | 7 - orchagent/p4orch/tests/test_main.cpp | 26 ++++ orchagent/saihelper.cpp | 198 ++++++++++++++++++++++++++ orchagent/saihelper.h | 9 ++ orchagent/switchorch.cpp | 1 + 11 files changed, 240 insertions(+), 215 deletions(-) diff --git a/orchagent/cbf/cbfnhgorch.cpp b/orchagent/cbf/cbfnhgorch.cpp index 76435ad12d..fe396b207c 100644 --- a/orchagent/cbf/cbfnhgorch.cpp +++ b/orchagent/cbf/cbfnhgorch.cpp @@ -343,10 +343,10 @@ bool CbfNhg::sync() SWSS_LOG_ERROR("Failed to create CBF next hop group %s, rv %d", m_key.c_str(), status); - task_process_status handle_status = gCbfNhgOrch->handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status); + task_process_status handle_status = handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status); if (handle_status != task_success) { - return gCbfNhgOrch->parseHandleSaiStatusFailure(handle_status); + return parseHandleSaiStatusFailure(handle_status); } } diff --git a/orchagent/crmorch.cpp b/orchagent/crmorch.cpp index f02bbd68f2..44be1e85b8 100644 --- a/orchagent/crmorch.cpp +++ b/orchagent/crmorch.cpp @@ -4,6 +4,7 @@ #include "crmorch.h" #include "converter.h" #include "timer.h" +#include "saihelper.h" #define CRM_POLLING_INTERVAL "polling_interval" #define CRM_COUNTERS_TABLE_KEY "STATS" diff --git a/orchagent/fabricportsorch.cpp b/orchagent/fabricportsorch.cpp index 54d815aaaa..5fabdfc8f5 100644 --- a/orchagent/fabricportsorch.cpp +++ b/orchagent/fabricportsorch.cpp @@ -9,6 +9,7 @@ #include "schema.h" #include "sai_serialize.h" #include "timer.h" +#include "saihelper.h" #define FABRIC_POLLING_INTERVAL_DEFAULT (30) #define FABRIC_PORT_PREFIX "PORT" diff --git a/orchagent/nhgbase.h b/orchagent/nhgbase.h index 65f0690555..1dbf2f7762 100644 --- a/orchagent/nhgbase.h +++ b/orchagent/nhgbase.h @@ -451,11 +451,6 @@ class NhgOrchCommon : public Orch } inline void decSyncedNhgCount() { NhgBase::decSyncedCount(); } - /* Handling SAI status*/ - using Orch::handleSaiCreateStatus; - using Orch::handleSaiRemoveStatus; - using Orch::parseHandleSaiStatusFailure; - protected: /* * Map of synced next hop groups. diff --git a/orchagent/nhgorch.cpp b/orchagent/nhgorch.cpp index 32ddb27eb5..cefc2efbb1 100644 --- a/orchagent/nhgorch.cpp +++ b/orchagent/nhgorch.cpp @@ -576,10 +576,10 @@ bool NextHopGroup::sync() SWSS_LOG_ERROR("Failed to create next hop group %s, rv:%d", m_key.to_string().c_str(), status); - task_process_status handle_status = gNhgOrch->handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status); + task_process_status handle_status = handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status); if (handle_status != task_success) { - return gNhgOrch->parseHandleSaiStatusFailure(handle_status); + return parseHandleSaiStatusFailure(handle_status); } } diff --git a/orchagent/orch.cpp b/orchagent/orch.cpp index 3c34e28f0a..26093354c1 100644 --- a/orchagent/orch.cpp +++ b/orchagent/orch.cpp @@ -856,205 +856,6 @@ Executor *Orch::getExecutor(string executorName) return NULL; } -task_process_status Orch::handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context) -{ - /* - * This function aims to provide coarse handling of failures in sairedis create - * operation (i.e., notify users by throwing excepions when failures happen). - * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. - * task_need_retry - Cannot handle the status. Need to retry the SAI operation. - * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. - * TODO: 1. Add general handling logic for specific statuses (e.g., SAI_STATUS_ITEM_ALREADY_EXISTS) - * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling - * in each orch. - * 3. Take the type of sai api into consideration. - */ - switch (api) - { - case SAI_API_FDB: - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); - return task_success; - case SAI_STATUS_ITEM_ALREADY_EXISTS: - /* - * In FDB creation, there are scenarios where the hardware learns an FDB entry before orchagent. - * In such cases, the FDB SAI creation would report the status of SAI_STATUS_ITEM_ALREADY_EXISTS, - * and orchagent should ignore the error and treat it as entry was explicitly created. - */ - return task_success; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - abort(); - } - break; - case SAI_API_HOSTIF: - switch (status) - { - case SAI_STATUS_SUCCESS: - return task_success; - case SAI_STATUS_FAILURE: - /* - * Host interface maybe failed due to lane not available. - * In some scenarios, like SONiC virtual machine, the invalid lane may be not enabled by VM configuration, - * So just ignore the failure and report an error log. - */ - return task_ignore; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - abort(); - } - default: - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); - return task_success; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - abort(); - } - } - return task_need_retry; -} - -task_process_status Orch::handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context) -{ - /* - * This function aims to provide coarse handling of failures in sairedis set - * operation (i.e., notify users by throwing excepions when failures happen). - * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. - * task_need_retry - Cannot handle the status. Need to retry the SAI operation. - * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. - * TODO: 1. Add general handling logic for specific statuses - * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling - * in each orch. - * 3. Take the type of sai api into consideration. - */ - if (status == SAI_STATUS_SUCCESS) - { - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiSetStatus"); - return task_success; - } - - switch (api) - { - case SAI_API_PORT: - switch (status) - { - case SAI_STATUS_INVALID_ATTR_VALUE_0: - /* - * If user gives an invalid attribute value, no need to retry or exit orchagent, just fail the current task - * and let user correct the configuration. - */ - SWSS_LOG_ERROR("Encountered SAI_STATUS_INVALID_ATTR_VALUE_0 in set operation, task failed, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - return task_failed; - default: - SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - abort(); - } - case SAI_API_TUNNEL: - switch (status) - { - case SAI_STATUS_ATTR_NOT_SUPPORTED_0: - SWSS_LOG_ERROR("Encountered SAI_STATUS_ATTR_NOT_SUPPORTED_0 in set operation, task failed, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - return task_failed; - default: - SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - abort(); - } - default: - SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - abort(); - } - - return task_need_retry; -} - -task_process_status Orch::handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context) -{ - /* - * This function aims to provide coarse handling of failures in sairedis remove - * operation (i.e., notify users by throwing excepions when failures happen). - * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. - * task_need_retry - Cannot handle the status. Need to retry the SAI operation. - * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. - * TODO: 1. Add general handling logic for specific statuses (e.g., SAI_STATUS_OBJECT_IN_USE, - * SAI_STATUS_ITEM_NOT_FOUND) - * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling - * in each orch. - * 3. Take the type of sai api into consideration. - */ - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus"); - return task_success; - default: - SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - abort(); - } - return task_need_retry; -} - -task_process_status Orch::handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context) -{ - /* - * This function aims to provide coarse handling of failures in sairedis get - * operation (i.e., notify users by throwing excepions when failures happen). - * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. - * task_need_retry - Cannot handle the status. Need to retry the SAI operation. - * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. - * TODO: 1. Add general handling logic for specific statuses - * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling - * in each orch. - * 3. Take the type of sai api into consideration. - */ - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus"); - return task_success; - case SAI_STATUS_NOT_IMPLEMENTED: - SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s", - sai_serialize_api(api).c_str()); - throw std::logic_error("SAI get function not implemented"); - default: - SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - } - return task_failed; -} - -bool Orch::parseHandleSaiStatusFailure(task_process_status status) -{ - /* - * This function parses task process status from SAI failure handling function to whether a retry is needed. - * Return value: true - no retry is needed. - * false - retry is needed. - */ - switch (status) - { - case task_need_retry: - return false; - case task_failed: - return true; - default: - SWSS_LOG_WARN("task_process_status %d is not expected in parseHandleSaiStatusFailure", status); - } - return true; -} - void Orch2::doTask(Consumer &consumer) { SWSS_LOG_ENTER(); diff --git a/orchagent/orch.h b/orchagent/orch.h index 7e803bbd93..6c620a3ef4 100644 --- a/orchagent/orch.h +++ b/orchagent/orch.h @@ -246,13 +246,6 @@ class Orch void addExecutor(Executor* executor); Executor *getExecutor(std::string executorName); - /* Handling SAI status*/ - virtual task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr); - virtual task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr); - virtual task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr); - virtual task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr); - bool parseHandleSaiStatusFailure(task_process_status status); - ResponsePublisher m_publisher; private: void addConsumer(swss::DBConnector *db, std::string tableName, int pri = default_orch_pri); diff --git a/orchagent/p4orch/tests/test_main.cpp b/orchagent/p4orch/tests/test_main.cpp index 91b4296a29..08b72a9377 100644 --- a/orchagent/p4orch/tests/test_main.cpp +++ b/orchagent/p4orch/tests/test_main.cpp @@ -80,6 +80,32 @@ sai_my_mac_api_t *sai_my_mac_api; sai_counter_api_t *sai_counter_api; sai_generic_programmable_api_t *sai_generic_programmable_api; +task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context) +{ + return task_success; +} + +task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context) +{ + return task_success; +} + +task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context) +{ + return task_success; +} + +task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context) +{ + return task_success; +} + +bool parseHandleSaiStatusFailure(task_process_status status) +{ + return true; +} + + namespace { diff --git a/orchagent/saihelper.cpp b/orchagent/saihelper.cpp index 2df9ebce22..6abb011b69 100644 --- a/orchagent/saihelper.cpp +++ b/orchagent/saihelper.cpp @@ -465,3 +465,201 @@ sai_status_t initSaiPhyApi(swss::gearbox_phy_t *phy) return status; } +task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context) +{ + /* + * This function aims to provide coarse handling of failures in sairedis create + * operation (i.e., notify users by throwing excepions when failures happen). + * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. + * task_need_retry - Cannot handle the status. Need to retry the SAI operation. + * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. + * TODO: 1. Add general handling logic for specific statuses (e.g., SAI_STATUS_ITEM_ALREADY_EXISTS) + * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling + * in each orch. + * 3. Take the type of sai api into consideration. + */ + switch (api) + { + case SAI_API_FDB: + switch (status) + { + case SAI_STATUS_SUCCESS: + SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); + return task_success; + case SAI_STATUS_ITEM_ALREADY_EXISTS: + /* + * In FDB creation, there are scenarios where the hardware learns an FDB entry before orchagent. + * In such cases, the FDB SAI creation would report the status of SAI_STATUS_ITEM_ALREADY_EXISTS, + * and orchagent should ignore the error and treat it as entry was explicitly created. + */ + return task_success; + default: + SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + abort(); + } + break; + case SAI_API_HOSTIF: + switch (status) + { + case SAI_STATUS_SUCCESS: + return task_success; + case SAI_STATUS_FAILURE: + /* + * Host interface maybe failed due to lane not available. + * In some scenarios, like SONiC virtual machine, the invalid lane may be not enabled by VM configuration, + * So just ignore the failure and report an error log. + */ + return task_ignore; + default: + SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + abort(); + } + default: + switch (status) + { + case SAI_STATUS_SUCCESS: + SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); + return task_success; + default: + SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + abort(); + } + } + return task_need_retry; +} + +task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context) +{ + /* + * This function aims to provide coarse handling of failures in sairedis set + * operation (i.e., notify users by throwing excepions when failures happen). + * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. + * task_need_retry - Cannot handle the status. Need to retry the SAI operation. + * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. + * TODO: 1. Add general handling logic for specific statuses + * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling + * in each orch. + * 3. Take the type of sai api into consideration. + */ + if (status == SAI_STATUS_SUCCESS) + { + SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiSetStatus"); + return task_success; + } + + switch (api) + { + case SAI_API_PORT: + switch (status) + { + case SAI_STATUS_INVALID_ATTR_VALUE_0: + /* + * If user gives an invalid attribute value, no need to retry or exit orchagent, just fail the current task + * and let user correct the configuration. + */ + SWSS_LOG_ERROR("Encountered SAI_STATUS_INVALID_ATTR_VALUE_0 in set operation, task failed, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + return task_failed; + default: + SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + abort(); + } + case SAI_API_TUNNEL: + switch (status) + { + case SAI_STATUS_ATTR_NOT_SUPPORTED_0: + SWSS_LOG_ERROR("Encountered SAI_STATUS_ATTR_NOT_SUPPORTED_0 in set operation, task failed, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + return task_failed; + default: + SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + abort(); + } + default: + SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + abort(); + } + + return task_need_retry; +} + +task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context) +{ + /* + * This function aims to provide coarse handling of failures in sairedis remove + * operation (i.e., notify users by throwing excepions when failures happen). + * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. + * task_need_retry - Cannot handle the status. Need to retry the SAI operation. + * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. + * TODO: 1. Add general handling logic for specific statuses (e.g., SAI_STATUS_OBJECT_IN_USE, + * SAI_STATUS_ITEM_NOT_FOUND) + * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling + * in each orch. + * 3. Take the type of sai api into consideration. + */ + switch (status) + { + case SAI_STATUS_SUCCESS: + SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus"); + return task_success; + default: + SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + abort(); + } + return task_need_retry; +} + +task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context) +{ + /* + * This function aims to provide coarse handling of failures in sairedis get + * operation (i.e., notify users by throwing excepions when failures happen). + * Return value: task_success - Handled the status successfully. No need to retry this SAI operation. + * task_need_retry - Cannot handle the status. Need to retry the SAI operation. + * task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure. + * TODO: 1. Add general handling logic for specific statuses + * 2. Develop fine-grain failure handling mechanisms and replace this coarse handling + * in each orch. + * 3. Take the type of sai api into consideration. + */ + switch (status) + { + case SAI_STATUS_SUCCESS: + SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus"); + return task_success; + case SAI_STATUS_NOT_IMPLEMENTED: + SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s", + sai_serialize_api(api).c_str()); + throw std::logic_error("SAI get function not implemented"); + default: + SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s", + sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + } + return task_failed; +} + +bool parseHandleSaiStatusFailure(task_process_status status) +{ + /* + * This function parses task process status from SAI failure handling function to whether a retry is needed. + * Return value: true - no retry is needed. + * false - retry is needed. + */ + switch (status) + { + case task_need_retry: + return false; + case task_failed: + return true; + default: + SWSS_LOG_WARN("task_process_status %d is not expected in parseHandleSaiStatusFailure", status); + } + return true; +} diff --git a/orchagent/saihelper.h b/orchagent/saihelper.h index a0b2aa2fac..335bb5fa9f 100644 --- a/orchagent/saihelper.h +++ b/orchagent/saihelper.h @@ -3,6 +3,7 @@ #include "gearboxutils.h" #include +#include "orch.h" #define IS_ATTR_ID_IN_RANGE(attrId, objectType, attrPrefix) \ ((attrId) >= SAI_ ## objectType ## _ATTR_ ## attrPrefix ## _START && (attrId) <= SAI_ ## objectType ## _ATTR_ ## attrPrefix ## _END) @@ -10,3 +11,11 @@ void initSaiApi(); void initSaiRedis(const std::string &record_location, const std::string &record_filename); sai_status_t initSaiPhyApi(swss::gearbox_phy_t *phy); + +/* Handling SAI status*/ +task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr); +task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr); +task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr); +task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr); +bool parseHandleSaiStatusFailure(task_process_status status); + diff --git a/orchagent/switchorch.cpp b/orchagent/switchorch.cpp index 951358774f..b5aed63adc 100644 --- a/orchagent/switchorch.cpp +++ b/orchagent/switchorch.cpp @@ -9,6 +9,7 @@ #include "notificationproducer.h" #include "macaddress.h" #include "return_code.h" +#include "saihelper.h" using namespace std; using namespace swss;