From 4097a18d0b30b756a3e633377849ad4aea482151 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Thu, 9 Jul 2020 00:01:32 -0700 Subject: [PATCH 01/24] platform daemon changes for multi asic platform --- sonic-ledd/scripts/ledd | 33 ++++--- sonic-xcvrd/scripts/xcvrd | 195 ++++++++++++++++++++++++++------------ 2 files changed, 157 insertions(+), 71 deletions(-) diff --git a/sonic-ledd/scripts/ledd b/sonic-ledd/scripts/ledd index f27296ce6..b89ee4942 100644 --- a/sonic-ledd/scripts/ledd +++ b/sonic-ledd/scripts/ledd @@ -65,13 +65,18 @@ class DaemonLedd(daemon_base.DaemonBase): self.log_error("Failed to load ledutil: %s" % (str(e)), True) sys.exit(LEDUTIL_LOAD_ERROR) - # Open a handle to the Application database - appl_db = daemon_base.db_connect("APPL_DB") + # Get the namespaces in the platform + namespaces = multi_asic.get_namespaces() # Subscribe to PORT table notifications in the Application DB + appl_db, sst = {}, {} sel = swsscommon.Select() - sst = swsscommon.SubscriberStateTable(appl_db, swsscommon.APP_PORT_TABLE_NAME) - sel.addSelectable(sst) + + for namespace in namespaces: + # Open a handle to the Application database + appl_db[namespace] = daemon_base.db_connect("APPL_DB", namespace=namespace) + sst[namespace] = swsscommon.SubscriberStateTable(appl_db[namespace], swsscommon.APP_PORT_TABLE_NAME) + sel.addSelectable(sst[namespace]) # Listen indefinitely for changes to the PORT table in the Application DB while True: @@ -86,17 +91,19 @@ class DaemonLedd(daemon_base.DaemonBase): self.log_warning("sel.select() did not return swsscommon.Select.OBJECT") continue - (key, op, fvp) = sst.pop() - - # TODO: Once these flag entries have been removed from the DB, - # we can remove this check - if key in ["PortConfigDone", "PortInitDone"]: - continue + for namespace in namespaces: + (key, op, fvp) = sst[namespace].pop() + if fvp: + # TODO: Once these flag entries have been removed from the DB, + # we can remove this check + if key in ["PortConfigDone", "PortInitDone"]: + continue - fvp_dict = dict(fvp) + fvp_dict = dict(fvp) - if op == "SET" and "oper_status" in fvp_dict: - led_control.port_link_state_change(key, fvp_dict["oper_status"]) + if op == "SET" and "oper_status" in fvp_dict: + if not key.startswith(INTERNAL_INTERFACE_PREFIX): + led_control.port_link_state_change(key, fvp_dict["oper_status"]) return 1 diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 3ed032272..ed6b17dea 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -95,6 +95,9 @@ platform_chassis = None # by DaemonXcvrd helper_logger = logger.Logger(SYSLOG_IDENTIFIER) +# MultiAsic class instance +multi_asic = multiAsic() + # # Helper functions ============================================================= # @@ -421,14 +424,19 @@ def post_port_dom_info_to_db(logical_port_name, table, stop_event=threading.Even # Update port dom/sfp info in db def post_port_sfp_dom_info_to_db(is_warm_start, stop_event=threading.Event()): # Connect to STATE_DB and create transceiver dom/sfp info tables - transceiver_dict = {} - state_db = daemon_base.db_connect("STATE_DB") - int_tbl = swsscommon.Table(state_db, TRANSCEIVER_INFO_TABLE) - dom_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_SENSOR_TABLE) + transceiver_dict, state_db, appl_db, int_tbl, dom_tbl, app_port_tbl = {}, {}, {}, {}, {}, {} - appl_db = daemon_base.db_connect("APPL_DB") - app_port_tbl = swsscommon.ProducerStateTable(appl_db, - swsscommon.APP_PORT_TABLE_NAME) + # Get the namespaces in the platform + namespaces = multi_asic.get_namespaces() + namespaceIDs = multi_asic.get_namespaceIDs() + + for namespace in namespaces: + asic_id = multi_asic.get_asic_id_from_namespace(namespace) + state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) + appl_db[asic_id] = daemon_base.db_connect("APPL_DB", namespace) + int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) + dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) + app_port_tbl[asic_id] = swsscommon.ProducerStateTable(appl_db[asic_id], swsscommon.APP_PORT_TABLE_NAME) # Post all the current interface dom/sfp info to STATE_DB logical_port_list = platform_sfputil.logical @@ -436,13 +444,18 @@ def post_port_sfp_dom_info_to_db(is_warm_start, stop_event=threading.Event()): if stop_event.is_set(): break - post_port_sfp_info_to_db(logical_port_name, int_tbl, transceiver_dict, stop_event) - post_port_dom_info_to_db(logical_port_name, dom_tbl, stop_event) - post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl, stop_event) + # Get the asic to which this port belongs + asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) + if asic_index is None or asic_index not in namespaceIDs: + continue + + post_port_sfp_info_to_db(logical_port_name, int_tbl[asic_index], transceiver_dict, stop_event) + post_port_dom_info_to_db(logical_port_name, dom_tbl[asic_index], stop_event) + post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl[asic_index], stop_event) ## Do not notify media settings during warm reboot to avoid dataplane traffic impact if is_warm_start == False: - notify_media_setting(logical_port_name, transceiver_dict, app_port_tbl) + notify_media_setting(logical_port_name, transceiver_dict, app_port_tbl[asic_index]) transceiver_dict.clear() # Delete port dom/sfp info from db @@ -473,16 +486,22 @@ def del_port_sfp_dom_info_from_db(logical_port_name, int_tbl, dom_tbl): sys.exit(NOT_IMPLEMENTED_ERROR) # recover missing sfp table entries if any -def recover_missing_sfp_table_entries(sfp_util, int_tbl, status_tbl, stop_event): +def recover_missing_sfp_table_entries(self, sfp_util, stop_event): transceiver_dict = {} - keys = int_tbl.getKeys() + keys = self.int_tbl.getKeys() logical_port_list = sfp_util.logical for logical_port_name in logical_port_list: if stop_event.is_set(): break - if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, status_tbl): - post_port_sfp_info_to_db(logical_port_name, int_tbl, transceiver_dict, stop_event) + + # Get the asic to which this port belongs + asic_index = sfp_util.get_asicId_for_logical_port(logical_port_name) + if asic_index is None or asic_index not in namespaceIDs: + continue + + if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, self.status_tbl[asic_index]): + post_port_sfp_info_to_db(logical_port_name, self.int_tbl[asic_index], transceiver_dict, stop_event) def check_port_in_range(range_str, physical_port): @@ -719,28 +738,41 @@ def detect_port_in_error_status(logical_port_name, status_tbl): # Init TRANSCEIVER_STATUS table def init_port_sfp_status_tbl(stop_event=threading.Event()): # Connect to STATE_DB and create transceiver status table - state_db = daemon_base.db_connect("STATE_DB") - status_tbl = swsscommon.Table(state_db, TRANSCEIVER_STATUS_TABLE) + state_db, status_tbl = {},{} + + # Get the namespaces in the platform + namespaces = multi_asic.get_namespaces() + namespaceIDs = multi_asic.get_namespaceIDs() + + for namespace in namespaces: + asic_id = multi_asic.get_asic_id_from_namespace(namespace) + state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) + status_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_STATUS_TABLE) # Init TRANSCEIVER_STATUS table logical_port_list = platform_sfputil.logical for logical_port_name in logical_port_list: if stop_event.is_set(): break + + # Get the asic to which this port belongs + asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) + if asic_index is None or asic_index not in namespaceIDs: + continue + physical_port_list = logical_port_name_to_physical_port_list(logical_port_name) if physical_port_list is None: helper_logger.log_error("No physical ports found for logical port '%s'" % logical_port_name) - update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_REMOVED) + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_REMOVED) for physical_port in physical_port_list: if stop_event.is_set(): break if not _wrapper_get_presence(physical_port): - update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_REMOVED) + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_REMOVED) else: - update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_INSERTED) - + update_port_transceiver_status_table(logical_port_name, status_tbl[asic_index], SFP_STATUS_INSERTED) # # Helper classes =============================================================== @@ -756,17 +788,30 @@ class DomInfoUpdateTask(object): helper_logger.log_info("Start DOM monitoring loop") # Connect to STATE_DB and create transceiver dom info table - state_db = daemon_base.db_connect("STATE_DB") - dom_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_SENSOR_TABLE) - status_tbl = swsscommon.Table(state_db, TRANSCEIVER_STATUS_TABLE) + state_db, dom_tbl, status_tbl = {}, {}, {} + + # Get the namespaces in the platform + namespaces = multi_asic.get_namespaces() + namespaceIDs = multi_asic.get_namespaceIDs() + + for namespace in namespaces: + asic_id = multi_asic.get_asic_id_from_namespace(namespace) + state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) + dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) + status_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_STATUS_TABLE) # Start loop to update dom info in DB periodically while not self.task_stopping_event.wait(DOM_INFO_UPDATE_PERIOD_SECS): logical_port_list = platform_sfputil.logical for logical_port_name in logical_port_list: - if not detect_port_in_error_status(logical_port_name, status_tbl): - post_port_dom_info_to_db(logical_port_name, dom_tbl, self.task_stopping_event) - post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl, self.task_stopping_event) + # Get the asic to which this port belongs + asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) + if asic_index is None or asic_index not in namespaceIDs: + continue + + if not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): + post_port_dom_info_to_db(logical_port_name, dom_tbl[asic_index], self.task_stopping_event) + post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl[asic_index], self.task_stopping_event) helper_logger.log_info("Stop DOM monitoring loop") @@ -816,15 +861,22 @@ class SfpStateUpdateTask(object): transceiver_dict = {} # Connect to STATE_DB and create transceiver dom/sfp info tables - state_db = daemon_base.db_connect("STATE_DB") - int_tbl = swsscommon.Table(state_db, TRANSCEIVER_INFO_TABLE) - dom_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_SENSOR_TABLE) - status_tbl = swsscommon.Table(state_db, TRANSCEIVER_STATUS_TABLE) + state_db, appl_db, int_tbl, dom_tbl, status_tbl, app_port_tbl = {}, {}, {}, {}, {}, {} + + # Get the namespaces in the platform + namespaces = multi_asic.get_namespaces() + namespaceIDs = multi_asic.get_namespaceIDs() - # Connect to APPL_DB to notify Media notifications - appl_db = daemon_base.db_connect("APPL_DB") - app_port_tbl = swsscommon.ProducerStateTable(appl_db, - swsscommon.APP_PORT_TABLE_NAME) + for namespace in namespaces: + asic_id = multi_asic.get_asic_id_from_namespace(namespace) + state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) + int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) + dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) + status_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_STATUS_TABLE) + + # Connect to APPL_DB to notify Media notifications + appl_db[asic_id] = daemon_base.db_connect("APPL_DB", namespace) + app_port_tbl[asic_id] = swsscommon.ProducerStateTable(appl_db[asic_id], swsscommon.APP_PORT_TABLE_NAME) # Start main loop to listen to the SFP change event. # The state migrating sequence: @@ -949,37 +1001,43 @@ class SfpStateUpdateTask(object): helper_logger.log_warning("Got unknown FP port index {}, ignored".format(key)) continue for logical_port in logical_port_list: + + # Get the asic to which this port belongs + asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port) + if asic_index is None or asic_index not in namespaceIDs: + continue + if value == SFP_STATUS_INSERTED: helper_logger.log_info("Got SFP inserted event") # A plugin event will clear the error state. - update_port_transceiver_status_table(logical_port, status_tbl, SFP_STATUS_INSERTED) + update_port_transceiver_status_table(logical_port, status_tbl[asic_index], SFP_STATUS_INSERTED) helper_logger.log_info("receive plug in and update port sfp status table.") - rc = post_port_sfp_info_to_db(logical_port, int_tbl, transceiver_dict) + rc = post_port_sfp_info_to_db(logical_port, int_tbl[asic_index], transceiver_dict) # If we didn't get the sfp info, assuming the eeprom is not ready, give a try again. if rc == SFP_EEPROM_NOT_READY: helper_logger.log_warning("SFP EEPROM is not ready. One more try...") time.sleep(TIME_FOR_SFP_READY_SECS) - post_port_sfp_info_to_db(logical_port, int_tbl, transceiver_dict) - post_port_dom_info_to_db(logical_port, dom_tbl) - post_port_dom_threshold_info_to_db(logical_port, dom_tbl) - notify_media_setting(logical_port, transceiver_dict, app_port_tbl) + post_port_sfp_info_to_db(logical_port, int_tbl[asic_index], transceiver_dict) + post_port_dom_info_to_db(logical_port, dom_tbl[asic_index]) + post_port_dom_threshold_info_to_db(logical_port, dom_tbl[asic_index]) + notify_media_setting(logical_port, transceiver_dict, app_port_tbl[asic_index]) transceiver_dict.clear() elif value == SFP_STATUS_REMOVED: helper_logger.log_info("Got SFP removed event") - update_port_transceiver_status_table(logical_port, status_tbl, SFP_STATUS_REMOVED) + update_port_transceiver_status_table(logical_port, status_tbl[asic_index], SFP_STATUS_REMOVED) helper_logger.log_info("receive plug out and pdate port sfp status table.") - del_port_sfp_dom_info_from_db(logical_port, int_tbl, dom_tbl) + del_port_sfp_dom_info_from_db(logical_port, int_tbl[asic_index], dom_tbl[asic_index]) elif value in errors_block_eeprom_reading: helper_logger.log_info("Got SFP Error event") # Add port to error table to stop accessing eeprom of it # If the port already in the error table, the stored error code will # be updated to the new one. - update_port_transceiver_status_table(logical_port, status_tbl, value) + update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value) helper_logger.log_info("receive error update port sfp status table.") # In this case EEPROM is not accessible, so remove the DOM info # since it will be outdated if long time no update. # but will keep the interface info in the DB since it static. - del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl) + del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index]) else: # SFP return unkown event, just ignore for now. @@ -1042,6 +1100,7 @@ class DaemonXcvrd(daemon_base.DaemonBase): super(DaemonXcvrd, self).__init__(log_identifier) self.timeout = XCVRD_MAIN_THREAD_SLEEP_SECS + self.num_asics = multi_asic.get_num_asics() self.stop_event = threading.Event() self.sfp_error_event = multiprocessing.Event() @@ -1059,9 +1118,9 @@ class DaemonXcvrd(daemon_base.DaemonBase): self.log_warning("Caught unhandled signal '" + sig + "'") # Wait for port config is done - def wait_for_port_config_done(self): + def wait_for_port_config_done(self, namespace): # Connect to APPL_DB and subscribe to PORT table notifications - appl_db = daemon_base.db_connect("APPL_DB") + appl_db = daemon_base.db_connect("APPL_DB", namespace=namespace) sel = swsscommon.Select() sst = swsscommon.SubscriberStateTable(appl_db, swsscommon.APP_PORT_TABLE_NAME) @@ -1126,17 +1185,31 @@ class DaemonXcvrd(daemon_base.DaemonBase): # Load port info try: - port_config_file_path = device_info.get_path_to_port_config_file() - platform_sfputil.read_porttab_mappings(port_config_file_path) - except Exception as e: + if multi_asic.is_multi_asic(): + # For multi ASIC platforms we pass DIR of port_config_file_path and the number of asics + (platform_path, hwsku_path) = device_info.get_path_to_platform_and_hwsku() + platform_sfputil.read_all_porttab_mappings(hwsku_path, self.num_asics) + else: + # For single ASIC platforms we pass port_config_file_path and the asic_inst as 0 + port_config_file_path = device_info.get_path_to_port_config_file() + platform_sfputil.read_porttab_mappings(port_config_file_path, 0) + except Exception, e: self.log_error("Failed to read port info: %s" % (str(e)), True) sys.exit(PORT_CONFIG_LOAD_ERROR) # Connect to STATE_DB and create transceiver dom/sfp info tables - state_db = daemon_base.db_connect("STATE_DB") - self.int_tbl = swsscommon.Table(state_db, TRANSCEIVER_INFO_TABLE) - self.dom_tbl = swsscommon.Table(state_db, TRANSCEIVER_DOM_SENSOR_TABLE) - self.status_tbl = swsscommon.Table(state_db, TRANSCEIVER_STATUS_TABLE) + state_db, self.int_tbl, self.dom_tbl, self.status_tbl = {}, {}, {}, {} + + # Get the namespaces in the platform + namespaces = multi_asic.get_namespaces() + namespaceIDs = multi_asic.get_namespaceIDs() + + for namespace in namespaces: + asic_id = multi_asic.get_asic_id_from_namespace(namespace) + state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) + self.int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) + self.dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) + self.status_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_STATUS_TABLE) self.load_media_settings() warmstart = swsscommon.WarmStart() @@ -1146,7 +1219,8 @@ class DaemonXcvrd(daemon_base.DaemonBase): # Make sure this daemon started after all port configured self.log_info("Wait for port config is done") - self.wait_for_port_config_done() + for namespace in namespaces: + self.wait_for_port_config_done(namespace) # Post all the current interface dom/sfp info to STATE_DB self.log_info("Post all port DOM/SFP info to DB") @@ -1163,8 +1237,13 @@ class DaemonXcvrd(daemon_base.DaemonBase): # Delete all the information from DB and then exit logical_port_list = platform_sfputil.logical for logical_port_name in logical_port_list: - del_port_sfp_dom_info_from_db(logical_port_name, self.int_tbl, self.dom_tbl) - delete_port_from_status_table(logical_port_name, self.status_tbl) + # Get the asic to which this port belongs + asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) + if asic_index is None: + continue + + del_port_sfp_dom_info_from_db(logical_port_name, self.int_tbl[asic_index], self.dom_tbl[asic_index]) + delete_port_from_status_table(logical_port_name, self.status_tbl[asic_index]) # Run daemon def run(self): @@ -1186,7 +1265,7 @@ class DaemonXcvrd(daemon_base.DaemonBase): while not self.stop_event.wait(self.timeout): # Check the integrity of the sfp info table and recover the missing entries if any - recover_missing_sfp_table_entries(platform_sfputil, self.int_tbl, self.status_tbl, self.stop_event) + recover_missing_sfp_table_entries(self, platform_sfputil, self.stop_event) self.log_info("Stop daemon main loop") From 3d3c9ed3a5c2ca90b02e432c21d0228e225cf1bb Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Thu, 9 Jul 2020 23:35:59 -0700 Subject: [PATCH 02/24] Updates to initial commit --- sonic-ledd/scripts/ledd | 6 +++--- sonic-xcvrd/scripts/xcvrd | 27 +++++++++++---------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/sonic-ledd/scripts/ledd b/sonic-ledd/scripts/ledd index b89ee4942..8c356b3b8 100644 --- a/sonic-ledd/scripts/ledd +++ b/sonic-ledd/scripts/ledd @@ -73,12 +73,12 @@ class DaemonLedd(daemon_base.DaemonBase): sel = swsscommon.Select() for namespace in namespaces: - # Open a handle to the Application database + # Open a handle to the Application database, in all namespaces appl_db[namespace] = daemon_base.db_connect("APPL_DB", namespace=namespace) sst[namespace] = swsscommon.SubscriberStateTable(appl_db[namespace], swsscommon.APP_PORT_TABLE_NAME) sel.addSelectable(sst[namespace]) - # Listen indefinitely for changes to the PORT table in the Application DB + # Listen indefinitely for changes to the PORT table in the Application DB's while True: # Use timeout to prevent ignoring the signals we want to handle # in signal_handler() (e.g. SIGTERM for graceful shutdown) @@ -102,7 +102,7 @@ class DaemonLedd(daemon_base.DaemonBase): fvp_dict = dict(fvp) if op == "SET" and "oper_status" in fvp_dict: - if not key.startswith(INTERNAL_INTERFACE_PREFIX): + if not key.startswith(daemon_base.get_internal_interface_prefix()): led_control.port_link_state_change(key, fvp_dict["oper_status"]) return 1 diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index ed6b17dea..881fa4fee 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -428,8 +428,6 @@ def post_port_sfp_dom_info_to_db(is_warm_start, stop_event=threading.Event()): # Get the namespaces in the platform namespaces = multi_asic.get_namespaces() - namespaceIDs = multi_asic.get_namespaceIDs() - for namespace in namespaces: asic_id = multi_asic.get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) @@ -446,9 +444,9 @@ def post_port_sfp_dom_info_to_db(is_warm_start, stop_event=threading.Event()): # Get the asic to which this port belongs asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) - if asic_index is None or asic_index not in namespaceIDs: + if asic_index is None: + logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue - post_port_sfp_info_to_db(logical_port_name, int_tbl[asic_index], transceiver_dict, stop_event) post_port_dom_info_to_db(logical_port_name, dom_tbl[asic_index], stop_event) post_port_dom_threshold_info_to_db(logical_port_name, dom_tbl[asic_index], stop_event) @@ -497,7 +495,8 @@ def recover_missing_sfp_table_entries(self, sfp_util, stop_event): # Get the asic to which this port belongs asic_index = sfp_util.get_asicId_for_logical_port(logical_port_name) - if asic_index is None or asic_index not in namespaceIDs: + if asic_index is None: + logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, self.status_tbl[asic_index]): @@ -742,8 +741,6 @@ def init_port_sfp_status_tbl(stop_event=threading.Event()): # Get the namespaces in the platform namespaces = multi_asic.get_namespaces() - namespaceIDs = multi_asic.get_namespaceIDs() - for namespace in namespaces: asic_id = multi_asic.get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) @@ -757,7 +754,8 @@ def init_port_sfp_status_tbl(stop_event=threading.Event()): # Get the asic to which this port belongs asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) - if asic_index is None or asic_index not in namespaceIDs: + if asic_index is None: + logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue physical_port_list = logical_port_name_to_physical_port_list(logical_port_name) @@ -792,8 +790,6 @@ class DomInfoUpdateTask(object): # Get the namespaces in the platform namespaces = multi_asic.get_namespaces() - namespaceIDs = multi_asic.get_namespaceIDs() - for namespace in namespaces: asic_id = multi_asic.get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) @@ -806,7 +802,8 @@ class DomInfoUpdateTask(object): for logical_port_name in logical_port_list: # Get the asic to which this port belongs asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) - if asic_index is None or asic_index not in namespaceIDs: + if asic_index is None: + logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue if not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): @@ -865,8 +862,6 @@ class SfpStateUpdateTask(object): # Get the namespaces in the platform namespaces = multi_asic.get_namespaces() - namespaceIDs = multi_asic.get_namespaceIDs() - for namespace in namespaces: asic_id = multi_asic.get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) @@ -1004,7 +999,8 @@ class SfpStateUpdateTask(object): # Get the asic to which this port belongs asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port) - if asic_index is None or asic_index not in namespaceIDs: + if asic_index is None: + logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port)) continue if value == SFP_STATUS_INSERTED: @@ -1202,8 +1198,6 @@ class DaemonXcvrd(daemon_base.DaemonBase): # Get the namespaces in the platform namespaces = multi_asic.get_namespaces() - namespaceIDs = multi_asic.get_namespaceIDs() - for namespace in namespaces: asic_id = multi_asic.get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) @@ -1240,6 +1234,7 @@ class DaemonXcvrd(daemon_base.DaemonBase): # Get the asic to which this port belongs asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) if asic_index is None: + logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue del_port_sfp_dom_info_from_db(logical_port_name, self.int_tbl[asic_index], self.dom_tbl[asic_index]) From 81dddf17bae21bd674f00a13d317898685fce238 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Tue, 14 Jul 2020 00:41:51 -0700 Subject: [PATCH 03/24] Updates for review comments. --- sonic-xcvrd/scripts/xcvrd | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 881fa4fee..5aa70a9f0 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -96,7 +96,7 @@ platform_chassis = None helper_logger = logger.Logger(SYSLOG_IDENTIFIER) # MultiAsic class instance -multi_asic = multiAsic() +multi_asic = MultiAsic() # # Helper functions ============================================================= @@ -443,7 +443,7 @@ def post_port_sfp_dom_info_to_db(is_warm_start, stop_event=threading.Event()): break # Get the asic to which this port belongs - asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) + asic_index = platform_sfputil.get_asic_id_for_logical_port(logical_port_name) if asic_index is None: logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue @@ -484,23 +484,23 @@ def del_port_sfp_dom_info_from_db(logical_port_name, int_tbl, dom_tbl): sys.exit(NOT_IMPLEMENTED_ERROR) # recover missing sfp table entries if any -def recover_missing_sfp_table_entries(self, sfp_util, stop_event): +def recover_missing_sfp_table_entries(sfp_util, int_tbl, status_tbl, stop_event): transceiver_dict = {} - keys = self.int_tbl.getKeys() + keys = int_tbl.getKeys() logical_port_list = sfp_util.logical for logical_port_name in logical_port_list: if stop_event.is_set(): break # Get the asic to which this port belongs - asic_index = sfp_util.get_asicId_for_logical_port(logical_port_name) + asic_index = sfp_util.get_asic_id_for_logical_port(logical_port_name) if asic_index is None: logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue - if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, self.status_tbl[asic_index]): - post_port_sfp_info_to_db(logical_port_name, self.int_tbl[asic_index], transceiver_dict, stop_event) + if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): + post_port_sfp_info_to_db(logical_port_name, int_tbl[asic_index], transceiver_dict, stop_event) def check_port_in_range(range_str, physical_port): @@ -753,7 +753,7 @@ def init_port_sfp_status_tbl(stop_event=threading.Event()): break # Get the asic to which this port belongs - asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) + asic_index = platform_sfputil.get_asic_id_for_logical_port(logical_port_name) if asic_index is None: logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue @@ -801,7 +801,7 @@ class DomInfoUpdateTask(object): logical_port_list = platform_sfputil.logical for logical_port_name in logical_port_list: # Get the asic to which this port belongs - asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) + asic_index = platform_sfputil.get_asic_id_for_logical_port(logical_port_name) if asic_index is None: logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue @@ -998,7 +998,7 @@ class SfpStateUpdateTask(object): for logical_port in logical_port_list: # Get the asic to which this port belongs - asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port) + asic_index = platform_sfputil.get_asic_id_for_logical_port(logical_port) if asic_index is None: logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port)) continue @@ -1232,7 +1232,7 @@ class DaemonXcvrd(daemon_base.DaemonBase): logical_port_list = platform_sfputil.logical for logical_port_name in logical_port_list: # Get the asic to which this port belongs - asic_index = platform_sfputil.get_asicId_for_logical_port(logical_port_name) + asic_index = platform_sfputil.get_asic_id_for_logical_port(logical_port_name) if asic_index is None: logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue @@ -1260,7 +1260,7 @@ class DaemonXcvrd(daemon_base.DaemonBase): while not self.stop_event.wait(self.timeout): # Check the integrity of the sfp info table and recover the missing entries if any - recover_missing_sfp_table_entries(self, platform_sfputil, self.stop_event) + recover_missing_sfp_table_entries(platform_sfputil, self.int_tbl, self.status_tbl, self.stop_event) self.log_info("Stop daemon main loop") From 0e70e2f83bd3203e798b7edf33451d5f6fbf880f Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Mon, 20 Jul 2020 23:49:01 -0700 Subject: [PATCH 04/24] Updates in ledd daemon to get the namespace from selector object. --- sonic-ledd/scripts/ledd | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/sonic-ledd/scripts/ledd b/sonic-ledd/scripts/ledd index 8c356b3b8..87cbc51d2 100644 --- a/sonic-ledd/scripts/ledd +++ b/sonic-ledd/scripts/ledd @@ -91,19 +91,20 @@ class DaemonLedd(daemon_base.DaemonBase): self.log_warning("sel.select() did not return swsscommon.Select.OBJECT") continue - for namespace in namespaces: - (key, op, fvp) = sst[namespace].pop() - if fvp: - # TODO: Once these flag entries have been removed from the DB, - # we can remove this check - if key in ["PortConfigDone", "PortInitDone"]: - continue - - fvp_dict = dict(fvp) - - if op == "SET" and "oper_status" in fvp_dict: - if not key.startswith(daemon_base.get_internal_interface_prefix()): - led_control.port_link_state_change(key, fvp_dict["oper_status"]) + # Get the namespace from the selectable object and use it to index the SubscriberStateTable handle. + ns=c.getDbNamespace() + (key, op, fvp) = sst[ns].pop() + if fvp: + # TODO: Once these flag entries have been removed from the DB, + # we can remove this check + if key in ["PortConfigDone", "PortInitDone"]: + continue + + fvp_dict = dict(fvp) + + if op == "SET" and "oper_status" in fvp_dict: + if not key.startswith(daemon_base.get_internal_interface_prefix()): + led_control.port_link_state_change(key, fvp_dict["oper_status"]) return 1 From 332dfff9711ba2d8673131934601c257c30b7ff2 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Sun, 9 Aug 2020 15:37:32 -0700 Subject: [PATCH 05/24] Updated based on sonic-py-common --- sonic-ledd/scripts/ledd | 18 +++++++++++--- sonic-xcvrd/scripts/xcvrd | 49 ++++++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/sonic-ledd/scripts/ledd b/sonic-ledd/scripts/ledd index 87cbc51d2..9c9b6c5d7 100644 --- a/sonic-ledd/scripts/ledd +++ b/sonic-ledd/scripts/ledd @@ -10,6 +10,8 @@ try: import sys from sonic_py_common import daemon_base + from sonic_py_common import multi_asic + from sonic_py_common.interface import backplane_prefix from swsscommon import swsscommon except ImportError as e: raise ImportError (str(e) + " - required module not found") @@ -35,6 +37,9 @@ SELECT_TIMEOUT = 1000 LEDUTIL_LOAD_ERROR = 1 +# The empty namespace refers to linux host namespace. +EMPTY_NAMESPACE = '' + class DaemonLedd(daemon_base.DaemonBase): # Run daemon @@ -65,8 +70,15 @@ class DaemonLedd(daemon_base.DaemonBase): self.log_error("Failed to load ledutil: %s" % (str(e)), True) sys.exit(LEDUTIL_LOAD_ERROR) - # Get the namespaces in the platform - namespaces = multi_asic.get_namespaces() + # Load the namespace details first from the database_global.json file. + swsscommon.SonicDBConfig.initializeGlobalConfig() + + # Get the namespaces in the platform. For multi-asic devices we get the namespaces + # of front-end ascis which have front-panel interfaces. + namespaces = [EMPTY_NAMESPACE] + if multi_asic.is_multi_asic(): + ns_list = multi_asic.get_all_namespaces() + namespaces = ns_list['front_ns'] # Subscribe to PORT table notifications in the Application DB appl_db, sst = {}, {} @@ -103,7 +115,7 @@ class DaemonLedd(daemon_base.DaemonBase): fvp_dict = dict(fvp) if op == "SET" and "oper_status" in fvp_dict: - if not key.startswith(daemon_base.get_internal_interface_prefix()): + if not key.startswith(backplane_prefix()): led_control.port_link_state_change(key, fvp_dict["oper_status"]) return 1 diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 5aa70a9f0..30a1ee6c0 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -19,7 +19,8 @@ try: from enum import Enum from sonic_py_common import daemon_base, device_info, logger from swsscommon import swsscommon -except ImportError as e: + from sonic_py_common import multi_asic +except ImportError, e: raise ImportError (str(e) + " - required module not found") # @@ -95,13 +96,32 @@ platform_chassis = None # by DaemonXcvrd helper_logger = logger.Logger(SYSLOG_IDENTIFIER) -# MultiAsic class instance -multi_asic = MultiAsic() +# The empty namespace refers to linux host namespace. +EMPTY_NAMESPACE = '' # # Helper functions ============================================================= # +# Get namespaces, for single ASIC platform namespace is EMPTY_NAMESPACE +def get_front_end_namesapces(): + # Get the namespaces in the platform. For multi-asic devices we get the namespaces + # of front-end ascis which have front-panel interfaces. + namespaces = [EMPTY_NAMESPACE] + if multi_asic.is_multi_asic(): + ns_list = multi_asic.get_all_namespaces() + namespaces = ns_list['front_ns'] + + return namespaces + +# Get asic index from the namespace name +# With single ASIC platform, namespace is EMPTY_NAMESPACE, return index 0 +def get_asic_id_from_namespace(namespace): + if namespace == EMPTY_NAMESPACE: + return 0 + else: + return multi_asic.get_asic_id_from_name(namespace) + # Find out the underneath physical port list by logical name def logical_port_name_to_physical_port_list(port_name): if port_name.startswith("Ethernet"): @@ -427,9 +447,9 @@ def post_port_sfp_dom_info_to_db(is_warm_start, stop_event=threading.Event()): transceiver_dict, state_db, appl_db, int_tbl, dom_tbl, app_port_tbl = {}, {}, {}, {}, {}, {} # Get the namespaces in the platform - namespaces = multi_asic.get_namespaces() + namespaces = get_front_end_namesapces() for namespace in namespaces: - asic_id = multi_asic.get_asic_id_from_namespace(namespace) + asic_id = get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) appl_db[asic_id] = daemon_base.db_connect("APPL_DB", namespace) int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) @@ -740,9 +760,9 @@ def init_port_sfp_status_tbl(stop_event=threading.Event()): state_db, status_tbl = {},{} # Get the namespaces in the platform - namespaces = multi_asic.get_namespaces() + namespaces = get_front_end_namesapces() for namespace in namespaces: - asic_id = multi_asic.get_asic_id_from_namespace(namespace) + asic_id = get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) status_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_STATUS_TABLE) @@ -789,9 +809,9 @@ class DomInfoUpdateTask(object): state_db, dom_tbl, status_tbl = {}, {}, {} # Get the namespaces in the platform - namespaces = multi_asic.get_namespaces() + namespaces = get_front_end_namesapces() for namespace in namespaces: - asic_id = multi_asic.get_asic_id_from_namespace(namespace) + asic_id = get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) status_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_STATUS_TABLE) @@ -861,9 +881,9 @@ class SfpStateUpdateTask(object): state_db, appl_db, int_tbl, dom_tbl, status_tbl, app_port_tbl = {}, {}, {}, {}, {}, {} # Get the namespaces in the platform - namespaces = multi_asic.get_namespaces() + namespaces = get_front_end_namesapces() for namespace in namespaces: - asic_id = multi_asic.get_asic_id_from_namespace(namespace) + asic_id = get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) @@ -1179,6 +1199,9 @@ class DaemonXcvrd(daemon_base.DaemonBase): self.log_error("Failed to load sfputil: %s" % (str(e)), True) sys.exit(SFPUTIL_LOAD_ERROR) + # Load the namespace details first from the database_global.json file. + swsscommon.SonicDBConfig.initializeGlobalConfig() + # Load port info try: if multi_asic.is_multi_asic(): @@ -1197,9 +1220,9 @@ class DaemonXcvrd(daemon_base.DaemonBase): state_db, self.int_tbl, self.dom_tbl, self.status_tbl = {}, {}, {}, {} # Get the namespaces in the platform - namespaces = multi_asic.get_namespaces() + namespaces = get_front_end_namesapces() for namespace in namespaces: - asic_id = multi_asic.get_asic_id_from_namespace(namespace) + asic_id = get_asic_id_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) self.int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) self.dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) From 37cec198e763f6c070c5066bae1c045487c487da Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Fri, 14 Aug 2020 10:45:01 -0700 Subject: [PATCH 06/24] Invoke initializeGlobalConfig() in the SfpUpdate/DomInfoUpdate processes as well. --- sonic-xcvrd/scripts/xcvrd | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 30a1ee6c0..ee86de041 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -802,6 +802,9 @@ class DomInfoUpdateTask(object): self.task_thread = None self.task_stopping_event = threading.Event() + # Load the namespace details first from the database_global.json file. + swsscommon.SonicDBConfig.initializeGlobalConfig() + def task_worker(self): helper_logger.log_info("Start DOM monitoring loop") @@ -849,6 +852,9 @@ class SfpStateUpdateTask(object): self.task_process = None self.task_stopping_event = multiprocessing.Event() + # Load the namespace details first from the database_global.json file. + swsscommon.SonicDBConfig.initializeGlobalConfig() + def _mapping_event_from_change_event(self, status, port_dict): """ mapping from what get_transceiver_change_event returns to event defined in the state machine From 9323fa05787fb21ebeef26f9058d7d9557554be3 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Wed, 19 Aug 2020 16:45:21 -0700 Subject: [PATCH 07/24] Fixes in xcvrd. --- sonic-xcvrd/scripts/xcvrd | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index ee86de041..53e11f9e0 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -117,10 +117,13 @@ def get_front_end_namesapces(): # Get asic index from the namespace name # With single ASIC platform, namespace is EMPTY_NAMESPACE, return index 0 def get_asic_id_from_namespace(namespace): - if namespace == EMPTY_NAMESPACE: - return 0 - else: - return multi_asic.get_asic_id_from_name(namespace) + asic_id_string = multi_asic.get_asic_id_from_name(namespace) + if asic_id_string is not None: + return int(asic_id_string) + + # Default we return back 0, handles single asic platform and check when the func + # get_asic_id_from_name() returns back None. + return 0 # Find out the underneath physical port list by logical name def logical_port_name_to_physical_port_list(port_name): @@ -1212,7 +1215,7 @@ class DaemonXcvrd(daemon_base.DaemonBase): try: if multi_asic.is_multi_asic(): # For multi ASIC platforms we pass DIR of port_config_file_path and the number of asics - (platform_path, hwsku_path) = device_info.get_path_to_platform_and_hwsku() + (platform_path, hwsku_path) = device_info.get_paths_to_platform_and_hwsku_dirs() platform_sfputil.read_all_porttab_mappings(hwsku_path, self.num_asics) else: # For single ASIC platforms we pass port_config_file_path and the asic_inst as 0 From 17572dead98a7d4549d0b86b840d1afc1103b8e6 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Thu, 20 Aug 2020 19:41:50 -0700 Subject: [PATCH 08/24] Use common API's to get asic index and get namespaces with front-panel interfaces. --- sonic-ledd/scripts/ledd | 5 +---- sonic-xcvrd/scripts/xcvrd | 42 ++++++++++----------------------------- 2 files changed, 11 insertions(+), 36 deletions(-) diff --git a/sonic-ledd/scripts/ledd b/sonic-ledd/scripts/ledd index 9c9b6c5d7..52812dbaa 100644 --- a/sonic-ledd/scripts/ledd +++ b/sonic-ledd/scripts/ledd @@ -75,10 +75,7 @@ class DaemonLedd(daemon_base.DaemonBase): # Get the namespaces in the platform. For multi-asic devices we get the namespaces # of front-end ascis which have front-panel interfaces. - namespaces = [EMPTY_NAMESPACE] - if multi_asic.is_multi_asic(): - ns_list = multi_asic.get_all_namespaces() - namespaces = ns_list['front_ns'] + namespaces = multi_asic.get_front_end_namespaces() # Subscribe to PORT table notifications in the Application DB appl_db, sst = {}, {} diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 53e11f9e0..533f97fff 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -103,28 +103,6 @@ EMPTY_NAMESPACE = '' # Helper functions ============================================================= # -# Get namespaces, for single ASIC platform namespace is EMPTY_NAMESPACE -def get_front_end_namesapces(): - # Get the namespaces in the platform. For multi-asic devices we get the namespaces - # of front-end ascis which have front-panel interfaces. - namespaces = [EMPTY_NAMESPACE] - if multi_asic.is_multi_asic(): - ns_list = multi_asic.get_all_namespaces() - namespaces = ns_list['front_ns'] - - return namespaces - -# Get asic index from the namespace name -# With single ASIC platform, namespace is EMPTY_NAMESPACE, return index 0 -def get_asic_id_from_namespace(namespace): - asic_id_string = multi_asic.get_asic_id_from_name(namespace) - if asic_id_string is not None: - return int(asic_id_string) - - # Default we return back 0, handles single asic platform and check when the func - # get_asic_id_from_name() returns back None. - return 0 - # Find out the underneath physical port list by logical name def logical_port_name_to_physical_port_list(port_name): if port_name.startswith("Ethernet"): @@ -450,9 +428,9 @@ def post_port_sfp_dom_info_to_db(is_warm_start, stop_event=threading.Event()): transceiver_dict, state_db, appl_db, int_tbl, dom_tbl, app_port_tbl = {}, {}, {}, {}, {}, {} # Get the namespaces in the platform - namespaces = get_front_end_namesapces() + namespaces = multi_asic.get_front_end_namespaces() for namespace in namespaces: - asic_id = get_asic_id_from_namespace(namespace) + asic_id = multi_asic.get_asic_index_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) appl_db[asic_id] = daemon_base.db_connect("APPL_DB", namespace) int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) @@ -763,9 +741,9 @@ def init_port_sfp_status_tbl(stop_event=threading.Event()): state_db, status_tbl = {},{} # Get the namespaces in the platform - namespaces = get_front_end_namesapces() + namespaces = multi_asic.get_front_end_namespaces() for namespace in namespaces: - asic_id = get_asic_id_from_namespace(namespace) + asic_id = multi_asic.get_asic_index_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) status_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_STATUS_TABLE) @@ -815,9 +793,9 @@ class DomInfoUpdateTask(object): state_db, dom_tbl, status_tbl = {}, {}, {} # Get the namespaces in the platform - namespaces = get_front_end_namesapces() + namespaces = multi_asic.get_front_end_namespaces() for namespace in namespaces: - asic_id = get_asic_id_from_namespace(namespace) + asic_id = multi_asic.get_asic_index_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) status_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_STATUS_TABLE) @@ -890,9 +868,9 @@ class SfpStateUpdateTask(object): state_db, appl_db, int_tbl, dom_tbl, status_tbl, app_port_tbl = {}, {}, {}, {}, {}, {} # Get the namespaces in the platform - namespaces = get_front_end_namesapces() + namespaces = multi_asic.get_front_end_namespaces() for namespace in namespaces: - asic_id = get_asic_id_from_namespace(namespace) + asic_id = multi_asic.get_asic_index_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) @@ -1229,9 +1207,9 @@ class DaemonXcvrd(daemon_base.DaemonBase): state_db, self.int_tbl, self.dom_tbl, self.status_tbl = {}, {}, {}, {} # Get the namespaces in the platform - namespaces = get_front_end_namesapces() + namespaces = multi_asic.get_front_end_namespaces() for namespace in namespaces: - asic_id = get_asic_id_from_namespace(namespace) + asic_id = multi_asic.get_asic_index_from_namespace(namespace) state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) self.int_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_INFO_TABLE) self.dom_tbl[asic_id] = swsscommon.Table(state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) From 710689f01ace8f2a159ecc92a5f1d2f10dce5077 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Mon, 24 Aug 2020 08:19:54 -0700 Subject: [PATCH 09/24] Fix the space in initialization. --- sonic-xcvrd/scripts/xcvrd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 533f97fff..b59acdbc3 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -738,7 +738,7 @@ def detect_port_in_error_status(logical_port_name, status_tbl): # Init TRANSCEIVER_STATUS table def init_port_sfp_status_tbl(stop_event=threading.Event()): # Connect to STATE_DB and create transceiver status table - state_db, status_tbl = {},{} + state_db, status_tbl = {}, {} # Get the namespaces in the platform namespaces = multi_asic.get_front_end_namespaces() From fb86bc2bfa3a3a81d57ae45e03652269c01193bb Mon Sep 17 00:00:00 2001 From: junchao Date: Wed, 26 Aug 2020 15:33:52 +0800 Subject: [PATCH 10/24] Update pmon daemons for SONiC Physical Entity MIB feature --- sonic-psud/scripts/psud | 67 ++++++++++++---- sonic-thermalctld/scripts/thermalctld | 108 ++++++++++++++++++++++---- sonic-xcvrd/scripts/xcvrd | 13 +++- 3 files changed, 156 insertions(+), 32 deletions(-) diff --git a/sonic-psud/scripts/psud b/sonic-psud/scripts/psud index 9ea271c53..d9acc9964 100644 --- a/sonic-psud/scripts/psud +++ b/sonic-psud/scripts/psud @@ -29,18 +29,25 @@ PLATFORM_SPECIFIC_MODULE_NAME = "psuutil" PLATFORM_SPECIFIC_CLASS_NAME = "PsuUtil" CHASSIS_INFO_TABLE = 'CHASSIS_INFO' -CHASSIS_INFO_KEY_TEMPLATE = 'chassis {}' +CHASSIS_INFO_KEY = 'chassis 1' CHASSIS_INFO_PSU_NUM_FIELD = 'psu_num' PSU_INFO_TABLE = 'PSU_INFO' PSU_INFO_KEY_TEMPLATE = 'PSU {}' PSU_INFO_PRESENCE_FIELD = 'presence' +PSU_INFO_MODEL_FIELD = 'model' +PSU_INFO_SERIAL_FIELD = 'serial' PSU_INFO_STATUS_FIELD = 'status' PSU_INFO_TEMP_FIELD = 'temp' PSU_INFO_TEMP_TH_FIELD = 'temp_threshold' PSU_INFO_VOLTAGE_FIELD = 'voltage' PSU_INFO_VOLTAGE_MAX_TH_FIELD = 'voltage_max_threshold' PSU_INFO_VOLTAGE_MIN_TH_FIELD = 'voltage_min_threshold' +PSU_INFO_CURRENT_FIELD = 'current' +PSU_INFO_POWER_FIELD = 'power' +PSU_INFO_FRU_FIELD = 'is_replaceable' + +PHYSICAL_ENTITY_INFO_TABLE = 'PHYSICAL_ENTITY_INFO' FAN_INFO_TABLE = 'FAN_INFO' FAN_INFO_PRESENCE_FIELD = 'presence' @@ -93,13 +100,17 @@ def _wrapper_get_psus_status(psu_index): # Helper functions ============================================================= # +def get_psu_key(psu_index): + return PSU_INFO_KEY_TEMPLATE.format(psu_index) + + def psu_db_update(psu_tbl, psu_num): for psu_index in range(1, psu_num + 1): fvs = swsscommon.FieldValuePairs([(PSU_INFO_PRESENCE_FIELD, 'true' if _wrapper_get_psus_presence(psu_index) else 'false'), (PSU_INFO_STATUS_FIELD, 'true' if _wrapper_get_psus_status(psu_index) else 'false')]) - psu_tbl.set(PSU_INFO_KEY_TEMPLATE.format(psu_index), fvs) + psu_tbl.set(get_psu_key(psu_index), fvs) # try get information from platform API and return a default value if caught NotImplementedError @@ -256,16 +267,18 @@ class DaemonPsud(daemon_base.DaemonBase): chassis_tbl = swsscommon.Table(state_db, CHASSIS_INFO_TABLE) psu_tbl = swsscommon.Table(state_db, PSU_INFO_TABLE) self.fan_tbl = swsscommon.Table(state_db, FAN_INFO_TABLE) + self.phy_entity_tbl = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE) # Post psu number info to STATE_DB psu_num = _wrapper_get_num_psus() fvs = swsscommon.FieldValuePairs([(CHASSIS_INFO_PSU_NUM_FIELD, str(psu_num))]) - chassis_tbl.set(CHASSIS_INFO_KEY_TEMPLATE.format(1), fvs) + chassis_tbl.set(CHASSIS_INFO_KEY, fvs) # Start main loop self.log_info("Start daemon main loop") while not self.stop.wait(PSU_INFO_UPDATE_PERIOD_SECS): + self._update_psu_entity_info() psu_db_update(psu_tbl, psu_num) self.update_psu_data(psu_tbl) self._update_led_color(psu_tbl) @@ -274,9 +287,9 @@ class DaemonPsud(daemon_base.DaemonBase): # Delete all the information from DB and then exit for psu_index in range(1, psu_num + 1): - psu_tbl._del(PSU_INFO_KEY_TEMPLATE.format(psu_index)) + psu_tbl._del(get_psu_key(psu_index)) - chassis_tbl._del(CHASSIS_INFO_KEY_TEMPLATE.format(1)) + chassis_tbl._del(CHASSIS_INFO_KEY) self.log_info("Shutting down...") @@ -291,9 +304,7 @@ class DaemonPsud(daemon_base.DaemonBase): self.log_warning("Failed to update PSU data - {}".format(e)) def _update_single_psu_data(self, index, psu, psu_tbl): - name = try_get(psu.get_name) - if not name: - name = PSU_INFO_KEY_TEMPLATE.format(index) + name = get_psu_key(index) presence = _wrapper_get_psus_presence(index) power_good = False voltage = None @@ -301,6 +312,9 @@ class DaemonPsud(daemon_base.DaemonBase): voltage_low_threshold = None temperature = None temperature_threshold = None + current = None + power = None + is_replaceable = try_get(psu.is_replaceable, False) if presence: power_good = _wrapper_get_psus_status(index) voltage = try_get(psu.get_voltage) @@ -308,6 +322,8 @@ class DaemonPsud(daemon_base.DaemonBase): voltage_low_threshold = try_get(psu.get_voltage_low_threshold) temperature = try_get(psu.get_temperature) temperature_threshold = try_get(psu.get_temperature_high_threshold) + current = try_get(psu.get_current) + power = try_get(psu.get_power) if index not in self.psu_status_dict: self.psu_status_dict[index] = PsuStatus(psu) @@ -354,22 +370,44 @@ class DaemonPsud(daemon_base.DaemonBase): self._set_psu_led(psu, psu_status) fvs = swsscommon.FieldValuePairs( - [(PSU_INFO_TEMP_FIELD, str(temperature)), + [(PSU_INFO_MODEL_FIELD, str(try_get(psu.get_model))), + (PSU_INFO_SERIAL_FIELD, str(try_get(psu.get_serial))), + (PSU_INFO_TEMP_FIELD, str(temperature)), (PSU_INFO_TEMP_TH_FIELD, str(temperature_threshold)), (PSU_INFO_VOLTAGE_FIELD, str(voltage)), (PSU_INFO_VOLTAGE_MIN_TH_FIELD, str(voltage_low_threshold)), (PSU_INFO_VOLTAGE_MAX_TH_FIELD, str(voltage_high_threshold)), + (PSU_INFO_CURRENT_FIELD, str(current)), + (PSU_INFO_POWER_FIELD, str(power)), + (PSU_INFO_FRU_FIELD, str(is_replaceable)), + ]) + psu_tbl.set(name, fvs) + + def _update_psu_entity_info(self): + if not platform_chassis: + return + + for index, psu in enumerate(platform_chassis.get_all_psus()): + try: + self._update_single_psu_entity_info(index + 1, psu) + except Exception as e: + self.log_warning("Failed to update PSU data - {}".format(e)) + + def _update_single_psu_entity_info(self, psu_index, psu): + position = try_get(psu.get_position_in_parent, psu_index) + fvs = swsscommon.FieldValuePairs( + [('position_in_parent', str(position)), + ('parent_name', CHASSIS_INFO_KEY), ]) - psu_tbl.set(PSU_INFO_KEY_TEMPLATE.format(index), fvs) + self.phy_entity_tbl.set(get_psu_key(psu_index), fvs) def _update_psu_fan_data(self, psu, psu_index): """ - :param psu: :param psu_index: :return: """ - psu_name = try_get(psu.get_name, 'PSU {}'.format(psu_index)) + psu_name = get_psu_key(psu_index) presence = _wrapper_get_psus_presence(psu_index) fan_list = psu.get_all_fans() for index, fan in enumerate(fan_list): @@ -407,11 +445,11 @@ class DaemonPsud(daemon_base.DaemonBase): fvs = swsscommon.FieldValuePairs([ ('led_status', NOT_AVAILABLE) ]) - psu_tbl.set(PSU_INFO_KEY_TEMPLATE.format(index), fvs) + psu_tbl.set(get_psu_key(index), fvs) self._update_psu_fan_led_status(psu_status.psu, index) def _update_psu_fan_led_status(self, psu, psu_index): - psu_name = try_get(psu.get_name, 'PSU {}'.format(psu_index)) + psu_name = get_psu_key(psu_index) fan_list = psu.get_all_fans() for index, fan in enumerate(fan_list): fan_name = try_get(fan.get_name, '{} FAN {}'.format(psu_name, index + 1)) @@ -426,7 +464,6 @@ class DaemonPsud(daemon_base.DaemonBase): ]) self.fan_tbl.set(fan_name, fvs) - # # Main ========================================================================= # diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index d70c9c69d..36a70ae93 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -23,6 +23,7 @@ except ImportError as e: SYSLOG_IDENTIFIER = 'thermalctld' NOT_AVAILABLE = 'N/A' +CHASSIS_INFO_KEY = 'chassis 1' # utility functions @@ -44,6 +45,13 @@ def try_get(callback, default=NOT_AVAILABLE): return ret +def update_entity_info(table, parent_name, key, device, device_index): + fvs = swsscommon.FieldValuePairs( + [('position_in_parent', str(try_get(device.get_position_in_parent, device_index))), + ('parent_name', parent_name)]) + table.set(key, fvs) + + class FanStatus(logger.Logger): absence_fan_count = 0 fault_fan_count = 0 @@ -171,6 +179,7 @@ class FanStatus(logger.Logger): class FanUpdater(logger.Logger): # Fan information table name in database FAN_INFO_TABLE_NAME = 'FAN_INFO' + FAN_DRAWER_INFO_TABLE_NAME = 'FAN_DRAWER_INFO' def __init__(self, log_identifier, chassis): """ @@ -183,6 +192,8 @@ class FanUpdater(logger.Logger): self.fan_status_dict = {} state_db = daemon_base.db_connect("STATE_DB") self.table = swsscommon.Table(state_db, FanUpdater.FAN_INFO_TABLE_NAME) + self.drawer_table = swsscommon.Table(state_db, FanUpdater.FAN_DRAWER_INFO_TABLE_NAME) + self.phy_entity_table = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE) def deinit(self): """ @@ -214,20 +225,18 @@ class FanUpdater(logger.Logger): old_bad_fan_count = FanStatus.get_bad_fan_count() FanStatus.reset_fan_counter() - fan_index = 0 - for drawer in self.chassis.get_all_fan_drawers(): - for fan in drawer.get_all_fans(): + for drawer_index, drawer in enumerate(self.chassis.get_all_fan_drawers()): + self._refresh_fan_drawer_status(drawer, drawer_index) + for fan_index, fan in enumerate(drawer.get_all_fans()): try: - self._refresh_fan_status(drawer, fan, fan_index) + self._refresh_fan_status(drawer, drawer_index, fan, fan_index) except Exception as e: self.log_warning('Failed to update FAN status - {}'.format(e)) - fan_index += 1 for psu_index, psu in enumerate(self.chassis.get_all_psus()): - psu_name = try_get(psu.get_name, 'PSU {}'.format(psu_index)) for fan_index, fan in enumerate(psu.get_all_fans()): try: - self._refresh_fan_status(None, fan, fan_index, '{} FAN'.format(psu_name), True) + self._refresh_fan_status(psu, psu_index, fan, fan_index, True) except Exception as e: self.log_warning('Failed to update PSU FAN status - {}'.format(e)) @@ -243,17 +252,40 @@ class FanUpdater(logger.Logger): self.log_debug("End fan updating") - def _refresh_fan_status(self, fan_drawer, fan, index, name_prefix='FAN', is_psu_fan=False): + def _refresh_fan_drawer_status(self, fan_drawer, drawer_index): + drawer_name = try_get(fan_drawer.get_name) + if drawer_name == NOT_AVAILABLE: + return + + update_entity_info(self.phy_entity_table, CHASSIS_INFO_KEY, drawer_name, fan_drawer, drawer_index) + + fvs = swsscommon.FieldValuePairs( + [('presence', str(try_get(fan_drawer.get_presence, False))), + ('model', str(try_get(fan_drawer.get_model))), + ('serial', str(try_get(fan_drawer.get_serial))), + ('status', str(try_get(fan_drawer.get_status))), + ('is_replaceable', str(try_get(fan_drawer.is_replaceable, False))), + ]) + + self.drawer_table.set(drawer_name, fvs) + + def _refresh_fan_status(self, parent, parent_index, fan, fan_index, is_psu_fan=False): """ Get Fan status by platform API and write to database for a given Fan - :param fan_drawer: Object representing a platform Fan drawer + :param parent: Parent device of this fan + :param parent_index: Parent device index :param fan: Object representing a platform Fan - :param index: Index of the Fan object in the platform + :param fan_index: Index of the Fan object in its parent device :param name_prefix: name prefix of Fan object if Fan.get_name not presented :return: """ - drawer_name = NOT_AVAILABLE if is_psu_fan else str(try_get(fan_drawer.get_name)) - fan_name = try_get(fan.get_name, '{} {}'.format(name_prefix, index + 1)) + drawer_name = NOT_AVAILABLE if is_psu_fan else str(try_get(parent.get_name)) + if is_psu_fan: + parent_name = 'PSU {}'.format(parent_index) + else: + parent_name = drawer_name if drawer_name != NOT_AVAILABLE else CHASSIS_INFO_KEY + fan_name = try_get(fan.get_name, '{} FAN {}'.format(parent_name, fan_index + 1)) + update_entity_info(self.phy_entity_table, parent_name, fan_name, fan, fan_index + 1) if fan_name not in self.fan_status_dict: self.fan_status_dict[fan_name] = FanStatus(SYSLOG_IDENTIFIER, fan, is_psu_fan) @@ -264,6 +296,7 @@ class FanUpdater(logger.Logger): speed_target = NOT_AVAILABLE fan_fault_status = NOT_AVAILABLE fan_direction = NOT_AVAILABLE + is_replaceable = try_get(fan.is_replaceable, False) presence = try_get(fan.get_presence, False) if presence: speed = try_get(fan.get_speed) @@ -321,6 +354,7 @@ class FanUpdater(logger.Logger): ('speed', str(speed)), ('speed_tolerance', str(speed_tolerance)), ('speed_target', str(speed_target)), + ('is_replaceable', str(is_replaceable)), ('timestamp', datetime.now().strftime('%Y%m%d %H:%M:%S')) ]) @@ -360,6 +394,21 @@ class FanUpdater(logger.Logger): ]) self.table.set(fan_name, fvs) + for drawer in self.chassis.get_all_fan_drawers(): + drawer_name = try_get(drawer.get_name) + if drawer_name == NOT_AVAILABLE: + continue + try: + fvs = swsscommon.FieldValuePairs([ + ('led_status', str(try_get(drawer.get_status_led))) + ]) + except Exception as e: + self.log_warning('Failed to get led status for fan drawer') + fvs = swsscommon.FieldValuePairs([ + ('led_status', NOT_AVAILABLE) + ]) + self.drawer_table.set(drawer_name, fvs) + class TemperatureStatus(logger.Logger): TEMPERATURE_DIFF_THRESHOLD = 10 @@ -459,6 +508,7 @@ class TemperatureUpdater(logger.Logger): self.temperature_status_dict = {} state_db = daemon_base.db_connect("STATE_DB") self.table = swsscommon.Table(state_db, TemperatureUpdater.TEMPER_INFO_TABLE_NAME) + self.phy_entity_table = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE) def deinit(self): """ @@ -489,20 +539,44 @@ class TemperatureUpdater(logger.Logger): self.log_debug("Start temperature updating") for index, thermal in enumerate(self.chassis.get_all_thermals()): try: - self._refresh_temperature_status(thermal, index) + self._refresh_temperature_status(CHASSIS_INFO_KEY, thermal, index) except Exception as e: self.log_warning('Failed to update thermal status - {}'.format(e)) + for psu_index, psu in enumerate(self.chassis.get_all_psus()): + parent_name = 'PSU {}'.format(psu_index + 1) + for thermal_index, thermal in enumerate(psu.get_all_thermals()): + try: + self._refresh_temperature_status(parent_name, thermal, thermal_index) + except Exception as e: + self.log_warning('Failed to update thermal status - {}'.format(e)) + + for sfp_index, sfp in enumerate(self.chassis.get_all_sfps()): + parent_name = 'SFP {}'.format(sfp_index + 1) + for thermal_index, thermal in enumerate(sfp.get_all_thermals()): + try: + self._refresh_temperature_status(parent_name, thermal, thermal_index) + except Exception as e: + self.log_warning('Failed to update thermal status - {}'.format(e)) + self.log_debug("End temperature updating") - def _refresh_temperature_status(self, thermal, index): + def _refresh_temperature_status(self, parent_name, thermal, thermal_index): """ Get temperature status by platform API and write to database + :param parent_name: Name of parent device of the thermal object :param thermal: Object representing a platform thermal zone - :param index: Index of the thermal object in platform chassis + :param thermal_index: Index of the thermal object in platform chassis :return: """ - name = try_get(thermal.get_name, 'Thermal {}'.format(index + 1)) + name = try_get(thermal.get_name, '{} Thermal {}'.format(parent_name, thermal_index + 1)) + + # Only save entity info for thermals that belong to chassis + # for PSU and SFP thermal, they don't need save entity info because snmp can deduce the relation from PSU_INFO + # and TRANSCEIVER_DOM_SENSOR + if parent_name == CHASSIS_INFO_KEY: + update_entity_info(self.phy_entity_table, parent_name, name, thermal, thermal_index + 1) + if name not in self.temperature_status_dict: self.temperature_status_dict[name] = TemperatureStatus(SYSLOG_IDENTIFIER) @@ -513,6 +587,7 @@ class TemperatureUpdater(logger.Logger): high_critical_threshold = NOT_AVAILABLE low_critical_threshold = NOT_AVAILABLE temperature = try_get(thermal.get_temperature) + is_replaceable = try_get(thermal.is_replaceable, False) if temperature != NOT_AVAILABLE: temperature_status.set_temperature(name, temperature) high_threshold = try_get(thermal.get_high_threshold) @@ -546,6 +621,7 @@ class TemperatureUpdater(logger.Logger): ('warning_status', str(warning)), ('critical_high_threshold', str(high_critical_threshold)), ('critical_low_threshold', str(low_critical_threshold)), + ('is_replaceable', str(is_replaceable)), ('timestamp', datetime.now().strftime('%Y%m%d %H:%M:%S')) ]) diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 3ed032272..36dab37c5 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -138,6 +138,14 @@ def _wrapper_get_presence(physical_port): pass return platform_sfputil.get_presence(physical_port) +def _wrapper_is_replaceable(physical_port): + if platform_chassis is not None: + try: + return platform_chassis.get_sfp(physical_port).is_replaceable() + except NotImplementedError: + pass + return False + def _wrapper_get_transceiver_info(physical_port): if platform_chassis is not None: try: @@ -250,6 +258,7 @@ def post_port_sfp_info_to_db(logical_port_name, table, transceiver_dict, try: port_info_dict = _wrapper_get_transceiver_info(physical_port) if port_info_dict is not None: + is_replaceable = _wrapper_is_replaceable(physical_port) transceiver_dict[physical_port]=port_info_dict fvs = swsscommon.FieldValuePairs([('type', port_info_dict['type']), ('hardware_rev', port_info_dict['hardware_rev']), @@ -266,7 +275,9 @@ def post_port_sfp_info_to_db(logical_port_name, table, transceiver_dict, ('cable_length',port_info_dict['cable_length']), ('specification_compliance',port_info_dict['specification_compliance']), ('nominal_bit_rate',port_info_dict['nominal_bit_rate']), - ('application_advertisement',port_info_dict['application_advertisement'] if 'application_advertisement' in port_info_dict else 'N/A')]) + ('application_advertisement',port_info_dict['application_advertisement'] if 'application_advertisement' in port_info_dict else 'N/A'), + ('is_replaceable',str(is_replaceable)), + ]) table.set(port_name, fvs) else: return SFP_EEPROM_NOT_READY From 068d09bd6e5a334b719ea0bbf46f1fe7ee7441d9 Mon Sep 17 00:00:00 2001 From: junchao Date: Fri, 28 Aug 2020 10:20:00 +0800 Subject: [PATCH 11/24] Fix unit test failures --- sonic-thermalctld/scripts/thermalctld | 5 ++-- sonic-thermalctld/tests/mock_platform.py | 32 +++++++++++++++------ sonic-thermalctld/tests/test_thermalctld.py | 2 +- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index 36a70ae93..e4b8c8802 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -24,6 +24,7 @@ except ImportError as e: SYSLOG_IDENTIFIER = 'thermalctld' NOT_AVAILABLE = 'N/A' CHASSIS_INFO_KEY = 'chassis 1' +PHYSICAL_ENTITY_INFO_TABLE = 'PHYSICAL_ENTITY_INFO' # utility functions @@ -342,7 +343,7 @@ class FanUpdater(logger.Logger): # We don't set PSU led here, PSU led will be handled in psud if set_led: if not is_psu_fan: - self._set_fan_led(fan_drawer, fan, fan_name, fan_status) + self._set_fan_led(parent, fan, fan_name, fan_status) fvs = swsscommon.FieldValuePairs( [('presence', str(presence)), @@ -398,7 +399,7 @@ class FanUpdater(logger.Logger): drawer_name = try_get(drawer.get_name) if drawer_name == NOT_AVAILABLE: continue - try: + try: fvs = swsscommon.FieldValuePairs([ ('led_status', str(try_get(drawer.get_status_led))) ]) diff --git a/sonic-thermalctld/tests/mock_platform.py b/sonic-thermalctld/tests/mock_platform.py index b6fae1eaf..a3eec225f 100644 --- a/sonic-thermalctld/tests/mock_platform.py +++ b/sonic-thermalctld/tests/mock_platform.py @@ -17,6 +17,15 @@ def get_model(self): def get_serial(self): return self.serial + def get_position_in_parent(self): + return 1 + + def is_replaceable(self): + return True + + def get_status(self): + return True + class MockFan(MockDevice): STATUS_LED_COLOR_RED = 'red' @@ -75,6 +84,7 @@ def get_speed(self): class MockPsu(MockDevice): def __init__(self): + MockDevice.__init__(self) self.fan_list = [] def get_all_fans(self): @@ -82,8 +92,9 @@ def get_all_fans(self): class MockFanDrawer(MockDevice): - def __init__(self): - self.name = 'FanDrawer' + def __init__(self, index): + MockDevice.__init__(self) + self.name = 'FanDrawer {}'.format(index) self.fan_list = [] self.led_status = 'red' @@ -100,8 +111,9 @@ def set_status_led(self, value): self.led_status = value -class MockThermal: +class MockThermal(MockDevice): def __init__(self): + MockDevice.__init__(self) self.name = None self.temperature = 2 self.high_threshold = 3 @@ -154,6 +166,7 @@ def __init__(self): self.psu_list = [] self.thermal_list = [] self.fan_drawer_list = [] + self.sfp_list = [] def get_all_fans(self): return self.fan_list @@ -167,10 +180,13 @@ def get_all_thermals(self): def get_all_fan_drawers(self): return self.fan_drawer_list + def get_all_sfps(self): + return self.sfp_list + def make_absence_fan(self): fan = MockFan() fan.presence = False - fan_drawer = MockFanDrawer() + fan_drawer = MockFanDrawer(len(self.fan_drawer_list)) fan_drawer.fan_list.append(fan) self.fan_list.append(fan) self.fan_drawer_list.append(fan_drawer) @@ -178,7 +194,7 @@ def make_absence_fan(self): def make_fault_fan(self): fan = MockFan() fan.status = False - fan_drawer = MockFanDrawer() + fan_drawer = MockFanDrawer(len(self.fan_drawer_list)) fan_drawer.fan_list.append(fan) self.fan_list.append(fan) self.fan_drawer_list.append(fan_drawer) @@ -186,7 +202,7 @@ def make_fault_fan(self): def make_under_speed_fan(self): fan = MockFan() fan.make_under_speed() - fan_drawer = MockFanDrawer() + fan_drawer = MockFanDrawer(len(self.fan_drawer_list)) fan_drawer.fan_list.append(fan) self.fan_list.append(fan) self.fan_drawer_list.append(fan_drawer) @@ -194,14 +210,14 @@ def make_under_speed_fan(self): def make_over_speed_fan(self): fan = MockFan() fan.make_over_speed() - fan_drawer = MockFanDrawer() + fan_drawer = MockFanDrawer(len(self.fan_drawer_list)) fan_drawer.fan_list.append(fan) self.fan_list.append(fan) self.fan_drawer_list.append(fan_drawer) def make_error_fan(self): fan = MockErrorFan() - fan_drawer = MockFanDrawer() + fan_drawer = MockFanDrawer(len(self.fan_drawer_list)) fan_drawer.fan_list.append(fan) self.fan_list.append(fan) self.fan_drawer_list.append(fan_drawer) diff --git a/sonic-thermalctld/tests/test_thermalctld.py b/sonic-thermalctld/tests/test_thermalctld.py index fbfcf3c7e..38cc8b064 100644 --- a/sonic-thermalctld/tests/test_thermalctld.py +++ b/sonic-thermalctld/tests/test_thermalctld.py @@ -179,7 +179,7 @@ def test_insufficient_fan_number(): chassis.make_fault_fan() fan_updater = FanUpdater(SYSLOG_IDENTIFIER, chassis) fan_updater.update() - assert fan_updater.log_warning.call_count == 3 + assert fan_updater.log_warning.call_count == 3 fan_updater.log_warning.assert_called_with('Insufficient number of working fans warning: 2 fans are not working.') fan_list = chassis.get_all_fans() From e057e3545d207c0bf3aab394d810df442ccc90b2 Mon Sep 17 00:00:00 2001 From: junchao Date: Mon, 31 Aug 2020 16:17:17 +0800 Subject: [PATCH 12/24] Fix issues found in manual test --- sonic-psud/scripts/psud | 26 ++++++++++++++------------ sonic-thermalctld/scripts/thermalctld | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/sonic-psud/scripts/psud b/sonic-psud/scripts/psud index d9acc9964..470d3f155 100644 --- a/sonic-psud/scripts/psud +++ b/sonic-psud/scripts/psud @@ -14,7 +14,7 @@ try: import sys import threading - from sonic_py_common import daemon_base + from sonic_py_common import daemon_base, logger from swsscommon import swsscommon except ImportError as e: raise ImportError (str(e) + " - required module not found") @@ -131,26 +131,28 @@ def try_get(callback, default=None): return ret -def log_on_status_changed(normal_status, normal_log, abnormal_log): +def log_on_status_changed(logger, normal_status, normal_log, abnormal_log): """ Log when any status changed + :param logger: Logger object. :param normal_status: Expected status. :param normal_log: Log string for expected status. :param abnormal_log: Log string for unexpected status :return: """ if normal_status: - self.log_notice(normal_log) + logger.log_notice(normal_log) else: - self.log_warning(abnormal_log) + logger.log_warning(abnormal_log) # # PSU status =================================================================== # -class PsuStatus(object): - def __init__(self, psu): +class PsuStatus(logger.Logger): + def __init__(self, psu, log_identifier): + super(PsuStatus, self).__init__(log_identifier) self.psu = psu self.presence = True self.power_good = True @@ -326,13 +328,13 @@ class DaemonPsud(daemon_base.DaemonBase): power = try_get(psu.get_power) if index not in self.psu_status_dict: - self.psu_status_dict[index] = PsuStatus(psu) + self.psu_status_dict[index] = PsuStatus(psu, SYSLOG_IDENTIFIER) psu_status = self.psu_status_dict[index] set_led = False if psu_status.set_presence(presence): set_led = True - log_on_status_changed(psu_status.presence, + log_on_status_changed(self, psu_status.presence, 'PSU absence warning cleared: {} is inserted back.'.format(name), 'PSU absence warning: {} is not present.'.format(name) ) @@ -345,14 +347,14 @@ class DaemonPsud(daemon_base.DaemonBase): if presence and psu_status.set_power_good(power_good): set_led = True - log_on_status_changed(psu_status.power_good, + log_on_status_changed(self, psu_status.power_good, 'Power absence warning cleared: {} power is back to normal.'.format(name), 'Power absence warning: {} is out of power.'.format(name) ) if presence and psu_status.set_voltage(voltage, voltage_high_threshold, voltage_low_threshold): set_led = True - log_on_status_changed(psu_status.voltage_good, + log_on_status_changed(self, psu_status.voltage_good, 'PSU voltage warning cleared: {} voltage is back to normal.'.format(name), 'PSU voltage warning: {} voltage out of range, current voltage={}, valid range=[{}, {}].'.format( name, voltage, voltage_high_threshold, voltage_low_threshold) @@ -360,7 +362,7 @@ class DaemonPsud(daemon_base.DaemonBase): if presence and psu_status.set_temperature(temperature, temperature_threshold): set_led = True - log_on_status_changed(psu_status.temperature_good, + log_on_status_changed(self, psu_status.temperature_good, 'PSU temperature warning cleared: {} temperature is back to normal.'.format(name), 'PSU temperature warning: {} temperature too hot, temperature={}, threshold={}.'.format( name, temperature, temperature_threshold) @@ -458,7 +460,7 @@ class DaemonPsud(daemon_base.DaemonBase): (FAN_INFO_LED_STATUS_FIELD, str(try_get(fan.get_status_led))) ]) except Exception as e: - logger.log_warning('Failed to get led status for fan {}'.format(fan_name)) + self.log_warning('Failed to get led status for fan {}'.format(fan_name)) fvs = swsscommon.FieldValuePairs([ (FAN_INFO_LED_STATUS_FIELD, NOT_AVAILABLE) ]) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index e4b8c8802..5e67e3d68 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -282,7 +282,7 @@ class FanUpdater(logger.Logger): """ drawer_name = NOT_AVAILABLE if is_psu_fan else str(try_get(parent.get_name)) if is_psu_fan: - parent_name = 'PSU {}'.format(parent_index) + parent_name = 'PSU {}'.format(parent_index + 1) else: parent_name = drawer_name if drawer_name != NOT_AVAILABLE else CHASSIS_INFO_KEY fan_name = try_get(fan.get_name, '{} FAN {}'.format(parent_name, fan_index + 1)) From 1d77bc9ca49dfa199056c618ecaf3339ab17844f Mon Sep 17 00:00:00 2001 From: Sujin Kang Date: Thu, 3 Sep 2020 12:05:42 -0700 Subject: [PATCH 13/24] remove sonic-utilities dependency from pcied (#88) --- sonic-pcied/scripts/pcied | 41 ++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/sonic-pcied/scripts/pcied b/sonic-pcied/scripts/pcied index 47b7a496e..8cc208321 100644 --- a/sonic-pcied/scripts/pcied +++ b/sonic-pcied/scripts/pcied @@ -53,20 +53,35 @@ class DaemonPcied(DaemonBase): self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP) self.state_db.connect("STATE_DB") + # Check the PCIe devices def check_pcie_devices(self): - cmd = [ 'sudo', 'pcieutil', 'pcie-check' ] - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - resultInfo, err = p.communicate() - pcie_db_state = self.read_state_db("PCIE_STATUS|", "PCIE_DEVICES") - - for line in resultInfo.splitlines(): - if PCIE_RESULT_REGEX in line: - if "PASSED" in line and "PASSED" not in pcie_db_state: - self.update_state_db("PCIE_STATUS|", "PCIE_DEVICES", "PASSED") - self.log_info("PCIe device status check : PASSED") - elif "FAILED" in line and "PASSED" in pcie_db_state: - self.update_state_db("PCIE_STATUS|", "PCIE_DEVICES", "FAILED") - self.log_info("PCIe device status check : FAILED") + try: + platform_path, _ = device_info.get_paths_to_platform_and_hwsku_dirs() + platform_plugins_path = os.path.join(platform_path, "plugins") + sys.path.append(os.path.abspath(platform_plugins_path)) + from pcieutil import PcieUtil + except ImportError as e: + self.log_warning("Failed to load platform-specific PcieUtil module. Falling back to the common implementation") + try: + from sonic_platform_base.sonic_pcie.pcie_common import PcieUtil + platform_pcieutil = PcieUtil(platform_plugins_path) + except ImportError as e: + self.log_error("Failed to load default PcieUtil module. Error : {}".format(str(e)), True) + raise e + + resultInfo = platform_pcieutil.get_pcie_check() + + for item in resultInfo: + if item["result"] == "Failed": + self.log_warning("PCIe Device: " + item["name"] + " Not Found") + err += 1 + + if err: + self.update_state_db("PCIE_STATUS|", "PCIE_DEVICES", "FAILED") + self.log_error("PCIe device status check : FAILED") + else: + self.update_state_db("PCIE_STATUS|", "PCIE_DEVICES", "PASSED") + self.log_info("PCIe device status check : PASSED") def read_state_db(self, key1, key2): return self.state_db.get('STATE_DB', key1, key2) From e4ee8528025fb08e7e330ae66dd77d9f723dbcc5 Mon Sep 17 00:00:00 2001 From: judyjoseph <53951155+judyjoseph@users.noreply.github.com> Date: Thu, 3 Sep 2020 13:59:16 -0700 Subject: [PATCH 14/24] [ledd][multi-ASIC] Update to ledd based on sonic-swss-common updates (#87) * Updated based on sonic-swss-common changes to get the namespace (PR#378) --- sonic-ledd/scripts/ledd | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sonic-ledd/scripts/ledd b/sonic-ledd/scripts/ledd index 52812dbaa..237712da0 100644 --- a/sonic-ledd/scripts/ledd +++ b/sonic-ledd/scripts/ledd @@ -91,7 +91,7 @@ class DaemonLedd(daemon_base.DaemonBase): while True: # Use timeout to prevent ignoring the signals we want to handle # in signal_handler() (e.g. SIGTERM for graceful shutdown) - (state, c) = sel.select(SELECT_TIMEOUT) + (state, selectableObj) = sel.select(SELECT_TIMEOUT) if state == swsscommon.Select.TIMEOUT: # Do not flood log when select times out @@ -100,9 +100,12 @@ class DaemonLedd(daemon_base.DaemonBase): self.log_warning("sel.select() did not return swsscommon.Select.OBJECT") continue - # Get the namespace from the selectable object and use it to index the SubscriberStateTable handle. - ns=c.getDbNamespace() - (key, op, fvp) = sst[ns].pop() + # Get the redisselect object from selectable object + redisSelectObj = swsscommon.CastSelectableToRedisSelectObj(selectableObj) + # Get the corresponding namespace from redisselect db connector object + namespace = redisSelectObj.getDbConnector().getNamespace() + + (key, op, fvp) = sst[namespace].pop() if fvp: # TODO: Once these flag entries have been removed from the DB, # we can remove this check From 1893c4049531194c8937af31a7cd25cb19852834 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Tue, 8 Sep 2020 09:46:44 -0700 Subject: [PATCH 15/24] Fix the xcvrd theowing error on sfprecover function on getKeys() not valid. Signed-off-by: Abhishek Dosi --- sonic-xcvrd/scripts/xcvrd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index b59acdbc3..3095ebe2a 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -488,7 +488,6 @@ def del_port_sfp_dom_info_from_db(logical_port_name, int_tbl, dom_tbl): def recover_missing_sfp_table_entries(sfp_util, int_tbl, status_tbl, stop_event): transceiver_dict = {} - keys = int_tbl.getKeys() logical_port_list = sfp_util.logical for logical_port_name in logical_port_list: if stop_event.is_set(): @@ -500,6 +499,7 @@ def recover_missing_sfp_table_entries(sfp_util, int_tbl, status_tbl, stop_event) logger.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name)) continue + keys = int_tbl[asic_index].getKeys() if logical_port_name not in keys and not detect_port_in_error_status(logical_port_name, status_tbl[asic_index]): post_port_sfp_info_to_db(logical_port_name, int_tbl[asic_index], transceiver_dict, stop_event) From ca414df6a4732e59a12b210b3f7b30d73f1637fd Mon Sep 17 00:00:00 2001 From: junchao Date: Wed, 9 Sep 2020 10:17:39 +0800 Subject: [PATCH 16/24] Put PSU thermal to PHYSICAL_ENTITY_INFO table --- sonic-thermalctld/scripts/thermalctld | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index 5e67e3d68..39d944cc2 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -572,10 +572,11 @@ class TemperatureUpdater(logger.Logger): """ name = try_get(thermal.get_name, '{} Thermal {}'.format(parent_name, thermal_index + 1)) - # Only save entity info for thermals that belong to chassis - # for PSU and SFP thermal, they don't need save entity info because snmp can deduce the relation from PSU_INFO - # and TRANSCEIVER_DOM_SENSOR - if parent_name == CHASSIS_INFO_KEY: + # Only save entity info for thermals that belong to chassis and PSU + # for SFP thermal, they don't need save entity info because snmp can deduce the relation from TRANSCEIVER_DOM_SENSOR + # and as we save logical port in TRANSCEIVER_INFO table, for split cable, a SFP thermal might have multiple parent + # logical port + if 'SFP' not in parent_name: update_entity_info(self.phy_entity_table, parent_name, name, thermal, thermal_index + 1) if name not in self.temperature_status_dict: From 7f812c93b19fc456ac1ecd66510d403b8ff775c3 Mon Sep 17 00:00:00 2001 From: ChiouRung Haung Date: Wed, 9 Sep 2020 14:35:35 +0800 Subject: [PATCH 17/24] [xcvrd] Don't log unnecessary messages upon empty transceiver change event (#53) When port_dict of transceiver_change is empty, do nothing rather than log messages like ``` xcvrd: Got event True {} in state 1 xcvrd: mapping from True {'-1': 'system_become_ready'} to system_become_ready xcvrd: Got system_become_ready in normal state, ignored ``` --- sonic-xcvrd/scripts/xcvrd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sonic-xcvrd/scripts/xcvrd b/sonic-xcvrd/scripts/xcvrd index 3095ebe2a..48f110a73 100644 --- a/sonic-xcvrd/scripts/xcvrd +++ b/sonic-xcvrd/scripts/xcvrd @@ -954,6 +954,8 @@ class SfpStateUpdateTask(object): next_state = state time_start = time.time() status, port_dict = _wrapper_get_transceiver_change_event(timeout) + if not port_dict: + continue helper_logger.log_debug("Got event {} {} in state {}".format(status, port_dict, state)) event = self._mapping_event_from_change_event(status, port_dict) if event == SYSTEM_NOT_READY: From 096920273b685e451bc20035a0e3ab8dbba7df31 Mon Sep 17 00:00:00 2001 From: Petro Bratash <68950226+bratashX@users.noreply.github.com> Date: Thu, 10 Sep 2020 01:18:17 +0300 Subject: [PATCH 18/24] Fix pcied daemon failure (#91) Signed-off-by: Petro Bratash --- sonic-pcied/scripts/pcied | 1 + 1 file changed, 1 insertion(+) diff --git a/sonic-pcied/scripts/pcied b/sonic-pcied/scripts/pcied index 8cc208321..646bb771c 100644 --- a/sonic-pcied/scripts/pcied +++ b/sonic-pcied/scripts/pcied @@ -70,6 +70,7 @@ class DaemonPcied(DaemonBase): raise e resultInfo = platform_pcieutil.get_pcie_check() + err = 0 for item in resultInfo: if item["result"] == "Failed": From a6c00714374dc0953632c4582240e4b5b44d9cce Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Wed, 16 Sep 2020 01:52:09 +0800 Subject: [PATCH 19/24] [thermalctld] Fix issue: fan status should not be True when fan is absent (#92) If fan is not present, keep fan status value as "N/A". --- sonic-thermalctld/scripts/thermalctld | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index d70c9c69d..0e2797f89 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -311,12 +311,15 @@ class FanUpdater(logger.Logger): if not is_psu_fan: self._set_fan_led(fan_drawer, fan, fan_name, fan_status) + if fan_fault_status != NOT_AVAILABLE: + fan_fault_status = fan_status.is_ok() + fvs = swsscommon.FieldValuePairs( [('presence', str(presence)), ('drawer_name', drawer_name), ('model', str(try_get(fan.get_model))), ('serial', str(try_get(fan.get_serial))), - ('status', str(fan_fault_status and not(fan_status.under_speed or fan_status.over_speed))), + ('status', str(fan_fault_status)), ('direction', str(fan_direction)), ('speed', str(speed)), ('speed_tolerance', str(speed_tolerance)), From e1842b2dff481735ab42a7b0fd5594c24b7018e9 Mon Sep 17 00:00:00 2001 From: Petro Bratash <68950226+bratashX@users.noreply.github.com> Date: Wed, 16 Sep 2020 01:22:15 +0300 Subject: [PATCH 20/24] Change STATE_DB key (PCIE_STATUS|PCIE_DEVICES -> PCIE_DEVICES) (#93) Signed-off-by: Petro Bratash --- sonic-pcied/scripts/pcied | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sonic-pcied/scripts/pcied b/sonic-pcied/scripts/pcied index 646bb771c..31312dd4c 100644 --- a/sonic-pcied/scripts/pcied +++ b/sonic-pcied/scripts/pcied @@ -78,10 +78,10 @@ class DaemonPcied(DaemonBase): err += 1 if err: - self.update_state_db("PCIE_STATUS|", "PCIE_DEVICES", "FAILED") + self.update_state_db("PCIE_DEVICES", "status", "FAILED") self.log_error("PCIe device status check : FAILED") else: - self.update_state_db("PCIE_STATUS|", "PCIE_DEVICES", "PASSED") + self.update_state_db("PCIE_DEVICES", "status", "PASSED") self.log_info("PCIe device status check : PASSED") def read_state_db(self, key1, key2): From 4fdf975f91193ea0897352af1bfdfb4bd0b6f244 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Fri, 18 Sep 2020 13:19:05 +0800 Subject: [PATCH 21/24] [thermalctld] Fix issue: thermalctld should be auto restarted when being killed (#94) Part of thermalctld function is to handle user space thermal policies for events like fan/PSU removing, it works together with kernel thermal algorithm to make sure the switch won't be overheat. Recently, we found that commit Azure/sonic-buildimage@cbc75fe changes its autorestart configuration in supervisord, and it won't be auto restarted after being killed. This PR is to make sure that thermalctld will be always restarted when it is killed. --- sonic-thermalctld/scripts/thermalctld | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index 0e2797f89..d305dd101 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -622,6 +622,11 @@ class ThermalControlDaemon(daemon_base.DaemonBase): super(ThermalControlDaemon, self).__init__(log_identifier) self.stop_event = threading.Event() + # Thermal control daemon is designed to never exit, it must always + # return non zero exit code when exiting and so that supervisord will + # restart it automatically. + self.exit_code = 1 + # Signal handler def signal_handler(self, sig, frame): """ @@ -632,11 +637,9 @@ class ThermalControlDaemon(daemon_base.DaemonBase): """ if sig == signal.SIGHUP: self.log_info("Caught SIGHUP - ignoring...") - elif sig == signal.SIGINT: - self.log_info("Caught SIGINT - exiting...") - self.stop_event.set() - elif sig == signal.SIGTERM: - self.log_info("Caught SIGTERM - exiting...") + elif sig == signal.SIGINT or sig == signal.SIGTERM: + self.log_info("Caught signal {} - exiting...".format(sig)) + self.exit_code = sig + 128 self.stop_event.set() else: self.log_warning("Caught unhandled signal '" + sig + "'") @@ -690,7 +693,8 @@ class ThermalControlDaemon(daemon_base.DaemonBase): thermal_monitor.task_stop() - self.log_info("Shutdown...") + self.log_info("Shutdown with exit code {}...".format(self.exit_code)) + exit(self.exit_code) # From 1aaffcc98fc56574a2d8b3ce3a7c50c4a6e8f8fc Mon Sep 17 00:00:00 2001 From: Joe LeVeque Date: Tue, 22 Sep 2020 14:58:16 -0700 Subject: [PATCH 22/24] Add 'wheel' package to 'setup_requires' list (#95) Add 'wheel' to the list of packages required for building the package. This way it will be implicitly installed at build time, preventing the need to install the 'wheel' package explicitly in our build environment. --- sonic-ledd/setup.py | 3 +++ sonic-pcied/setup.py | 3 +++ sonic-psud/setup.py | 3 +++ sonic-syseepromd/setup.py | 3 +++ sonic-thermalctld/setup.py | 3 ++- sonic-xcvrd/setup.py | 3 +++ 6 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sonic-ledd/setup.py b/sonic-ledd/setup.py index d995d3d28..b88d87d56 100644 --- a/sonic-ledd/setup.py +++ b/sonic-ledd/setup.py @@ -13,6 +13,9 @@ scripts=[ 'scripts/ledd', ], + setup_requires= [ + 'wheel' + ], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: No Input/Output (Daemon)', diff --git a/sonic-pcied/setup.py b/sonic-pcied/setup.py index b5b2577ae..8d6070056 100644 --- a/sonic-pcied/setup.py +++ b/sonic-pcied/setup.py @@ -13,6 +13,9 @@ scripts=[ 'scripts/pcied', ], + setup_requires= [ + 'wheel' + ], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: No Input/Output (Daemon)', diff --git a/sonic-psud/setup.py b/sonic-psud/setup.py index 51771a676..3a465b901 100644 --- a/sonic-psud/setup.py +++ b/sonic-psud/setup.py @@ -13,6 +13,9 @@ scripts=[ 'scripts/psud', ], + setup_requires= [ + 'wheel' + ], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: No Input/Output (Daemon)', diff --git a/sonic-syseepromd/setup.py b/sonic-syseepromd/setup.py index 274369c32..b90dc66a7 100644 --- a/sonic-syseepromd/setup.py +++ b/sonic-syseepromd/setup.py @@ -13,6 +13,9 @@ scripts=[ 'scripts/syseepromd', ], + setup_requires= [ + 'wheel' + ], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: No Input/Output (Daemon)', diff --git a/sonic-thermalctld/setup.py b/sonic-thermalctld/setup.py index b2f9a4333..ada08a1d3 100644 --- a/sonic-thermalctld/setup.py +++ b/sonic-thermalctld/setup.py @@ -17,7 +17,8 @@ 'scripts/thermalctld', ], setup_requires= [ - 'pytest-runner' + 'pytest-runner', + 'wheel' ], tests_require = [ 'pytest', diff --git a/sonic-xcvrd/setup.py b/sonic-xcvrd/setup.py index 247331925..2f8e6d64f 100644 --- a/sonic-xcvrd/setup.py +++ b/sonic-xcvrd/setup.py @@ -13,6 +13,9 @@ scripts=[ 'scripts/xcvrd', ], + setup_requires= [ + 'wheel' + ], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: No Input/Output (Daemon)', From 8507085946bc8f01f11fcd4029c09184a20b8d15 Mon Sep 17 00:00:00 2001 From: Volodymyr Boiko <66446128+vboykox@users.noreply.github.com> Date: Mon, 5 Oct 2020 18:46:18 +0300 Subject: [PATCH 23/24] [psud] Fix psud logging (#98) Fix `Failed to update PSU data - global name 'self' is not defined` Signed-off-by: Volodymyr Boyko --- sonic-psud/scripts/psud | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/sonic-psud/scripts/psud b/sonic-psud/scripts/psud index 9ea271c53..0bc7526ce 100644 --- a/sonic-psud/scripts/psud +++ b/sonic-psud/scripts/psud @@ -120,7 +120,7 @@ def try_get(callback, default=None): return ret -def log_on_status_changed(normal_status, normal_log, abnormal_log): +def log_on_status_changed(logger, normal_status, normal_log, abnormal_log): """ Log when any status changed :param normal_status: Expected status. @@ -129,9 +129,9 @@ def log_on_status_changed(normal_status, normal_log, abnormal_log): :return: """ if normal_status: - self.log_notice(normal_log) + logger.log_notice(normal_log) else: - self.log_warning(abnormal_log) + logger.log_warning(abnormal_log) # @@ -139,12 +139,13 @@ def log_on_status_changed(normal_status, normal_log, abnormal_log): # class PsuStatus(object): - def __init__(self, psu): + def __init__(self, logger, psu): self.psu = psu self.presence = True self.power_good = True self.voltage_good = True self.temperature_good = True + self.logger = logger def set_presence(self, presence): """ @@ -173,7 +174,7 @@ class PsuStatus(object): def set_voltage(self, voltage, high_threshold, low_threshold): if not voltage or not high_threshold or not low_threshold: if self.voltage_good is not True: - self.log_warning('PSU voltage or high_threshold or low_threshold become unavailable, ' + self.logger.log_warning('PSU voltage or high_threshold or low_threshold become unavailable, ' 'voltage={}, high_threshold={}, low_threshold={}'.format(voltage, high_threshold, low_threshold)) self.voltage_good = True return False @@ -188,7 +189,7 @@ class PsuStatus(object): def set_temperature(self, temperature, high_threshold): if not temperature or not high_threshold: if self.temperature_good is not True: - self.log_warning('PSU temperature or high_threshold become unavailable, ' + self.logger.log_warning('PSU temperature or high_threshold become unavailable, ' 'temperature={}, high_threshold={}'.format(temperature, high_threshold)) self.temperature_good = True return False @@ -310,13 +311,13 @@ class DaemonPsud(daemon_base.DaemonBase): temperature_threshold = try_get(psu.get_temperature_high_threshold) if index not in self.psu_status_dict: - self.psu_status_dict[index] = PsuStatus(psu) + self.psu_status_dict[index] = PsuStatus(self, psu) psu_status = self.psu_status_dict[index] set_led = False if psu_status.set_presence(presence): set_led = True - log_on_status_changed(psu_status.presence, + log_on_status_changed(self, psu_status.presence, 'PSU absence warning cleared: {} is inserted back.'.format(name), 'PSU absence warning: {} is not present.'.format(name) ) @@ -329,14 +330,14 @@ class DaemonPsud(daemon_base.DaemonBase): if presence and psu_status.set_power_good(power_good): set_led = True - log_on_status_changed(psu_status.power_good, + log_on_status_changed(self, psu_status.power_good, 'Power absence warning cleared: {} power is back to normal.'.format(name), 'Power absence warning: {} is out of power.'.format(name) ) if presence and psu_status.set_voltage(voltage, voltage_high_threshold, voltage_low_threshold): set_led = True - log_on_status_changed(psu_status.voltage_good, + log_on_status_changed(self, psu_status.voltage_good, 'PSU voltage warning cleared: {} voltage is back to normal.'.format(name), 'PSU voltage warning: {} voltage out of range, current voltage={}, valid range=[{}, {}].'.format( name, voltage, voltage_high_threshold, voltage_low_threshold) @@ -344,7 +345,7 @@ class DaemonPsud(daemon_base.DaemonBase): if presence and psu_status.set_temperature(temperature, temperature_threshold): set_led = True - log_on_status_changed(psu_status.temperature_good, + log_on_status_changed(self, psu_status.temperature_good, 'PSU temperature warning cleared: {} temperature is back to normal.'.format(name), 'PSU temperature warning: {} temperature too hot, temperature={}, threshold={}.'.format( name, temperature, temperature_threshold) @@ -420,7 +421,7 @@ class DaemonPsud(daemon_base.DaemonBase): (FAN_INFO_LED_STATUS_FIELD, str(try_get(fan.get_status_led))) ]) except Exception as e: - logger.log_warning('Failed to get led status for fan {}'.format(fan_name)) + self.log_warning('Failed to get led status for fan {}'.format(fan_name)) fvs = swsscommon.FieldValuePairs([ (FAN_INFO_LED_STATUS_FIELD, NOT_AVAILABLE) ]) From 8ae63a7c69b4f5967dd9996bcdfc1832b7701f99 Mon Sep 17 00:00:00 2001 From: junchao Date: Fri, 16 Oct 2020 14:39:20 +0800 Subject: [PATCH 24/24] Fix LGTM warning --- sonic-psud/scripts/psud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-psud/scripts/psud b/sonic-psud/scripts/psud index ccfa1814e..1ca4dbf56 100644 --- a/sonic-psud/scripts/psud +++ b/sonic-psud/scripts/psud @@ -14,7 +14,7 @@ try: import sys import threading - from sonic_py_common import daemon_base, logger + from sonic_py_common import daemon_base from swsscommon import swsscommon except ImportError as e: raise ImportError (str(e) + " - required module not found")