Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect asic info and store in CHASSIS_STATE_DB #175

Merged
merged 5 commits into from
Jun 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion sonic-chassisd/scripts/chassisd
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ CHASSIS_MODULE_INFO_NAME_FIELD = 'name'
CHASSIS_MODULE_INFO_DESC_FIELD = 'desc'
CHASSIS_MODULE_INFO_SLOT_FIELD = 'slot'
CHASSIS_MODULE_INFO_OPERSTATUS_FIELD = 'oper_status'
CHASSIS_MODULE_INFO_NUM_ASICS_FIELD = 'num_asics'
CHASSIS_MODULE_INFO_ASICS = 'asics'

CHASSIS_ASIC_INFO_TABLE = 'CHASSIS_ASIC_TABLE'
CHASSIS_ASIC = 'asic'
CHASSIS_ASIC_PCI_ADDRESS_FIELD = 'asic_pci_address'
CHASSIS_ASIC_ID_IN_MODULE_FIELD = 'asic_id_in_module'

CHASSIS_MIDPLANE_INFO_TABLE = 'CHASSIS_MIDPLANE_TABLE'
CHASSIS_MIDPLANE_INFO_KEY_TEMPLATE = 'CHASSIS_MIDPLANE {}'
Expand Down Expand Up @@ -163,6 +170,9 @@ class ModuleUpdater(logger.Logger):
CHASSIS_MODULE_INFO_SLOT_FIELD,
CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]

chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new table is kept in CHASSIS_STATE_DB, unlike the other tables like MIDPLANE_TABLE etc that is kept in STATE_DB ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See this comment in PR description that the info is kept in CHASSIS_STATE_DB because it's accessible by swss containers. Does the swss use this asic present info ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a couple of places we can keep asic info like STATE_DB (redis instance) or CHASSIS_STATE_DB (redis chassis instance).

CHASSIS_STATE_DB is accessible from every where in swss like/usr/bin/swss.sh or /usr/bin/local/swss.sh - script creating swss containeranddocker_init.sh` inside swss container. Inside swss container, STATE_DB is not accessible (asic database is mapped, instead of database). So CHASSIS_STATE_DB is a safer choice.

self.asic_table = swsscommon.Table(chassis_state_db, CHASSIS_ASIC_INFO_TABLE)

self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False)
if not self.midplane_initialized:
self.log_error("Chassisd midplane intialization failed")
Expand All @@ -182,6 +192,11 @@ class ModuleUpdater(logger.Logger):
if self.chassis_table is not None:
self.chassis_table._del(CHASSIS_INFO_KEY_TEMPLATE.format(1))

if self.asic_table is not None:
asics = list(self.asic_table.getKeys())
for asic in asics:
self.asic_table._del(asic)

def modules_num_update(self):
# Check if module list is populated
num_modules = self.chassis.get_num_modules()
Expand All @@ -194,6 +209,8 @@ class ModuleUpdater(logger.Logger):
self.chassis_table.set(CHASSIS_INFO_KEY_TEMPLATE.format(1), fvs)

def module_db_update(self):
notOnlineModules = []

for module_index in range(0, self.num_modules):
module_info_dict = self._get_module_info(module_index)
if module_info_dict is not None:
Expand All @@ -211,9 +228,29 @@ class ModuleUpdater(logger.Logger):
fvs = swsscommon.FieldValuePairs([(CHASSIS_MODULE_INFO_DESC_FIELD, module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD]),
(CHASSIS_MODULE_INFO_SLOT_FIELD,
module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD]),
(CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD])])
(CHASSIS_MODULE_INFO_OPERSTATUS_FIELD, module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]),
(CHASSIS_MODULE_INFO_NUM_ASICS_FIELD, str(len(module_info_dict[CHASSIS_MODULE_INFO_ASICS])))])
self.module_table.set(key, fvs)

if module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] != str(ModuleBase.MODULE_STATUS_ONLINE):
notOnlineModules.append(key)
continue

for asic_id, asic in enumerate(module_info_dict[CHASSIS_MODULE_INFO_ASICS]):
asic_global_id, asic_pci_addr = asic
asic_key = "%s%s" % (CHASSIS_ASIC, asic_global_id)
asic_fvs = swsscommon.FieldValuePairs([(CHASSIS_ASIC_PCI_ADDRESS_FIELD, asic_pci_addr),
(CHASSIS_MODULE_INFO_NAME_FIELD, key),
(CHASSIS_ASIC_ID_IN_MODULE_FIELD, str(asic_id))])
self.asic_table.set(asic_key, asic_fvs)
judyjoseph marked this conversation as resolved.
Show resolved Hide resolved

judyjoseph marked this conversation as resolved.
Show resolved Hide resolved
# Asics that are on the "not online" modules need to be cleaned up
asics = list(self.asic_table.getKeys())
for asic in asics:
fvs = self.asic_table.get(asic)
if fvs[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ngoc-do
Looks like there is a bug here? Are you seeing this as well? fvs is in the format below:

 [True, (('asic_pci_address', 'n/a'), ('name', 'FABRIC-CARD6'), ('asic_id_in_module', '1'))]

However, if the code is changed to below, it works:

fvp = dict(fvs[1])
if fvp[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm I don't see that. But if this happens, the unit test will fail, I think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unit-tests use mock_swsscommon.py, where it uses a simple dictionary. So, unit-tests are passing. However, I am seeing the behavior I mentioned previously in the real hw environment. Are you testing on the near latest github? Will try and investigate. Will help if you can confirm/investigate as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK.

Yes, I tested on our chassis when committing the change. But it could be that there were new changes at the upstream, and I didn't have latest upstream in our local repos by the time I tested. I will double check, but I'm afraid that our local repos hasn't been updated at this moment. So if you see it needs to be fixed, free to do that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I found it happened on our latest update.

I have a fix in #203. Please review.

self.asic_table._del(asic)

def _get_module_info(self, module_index):
"""
Retrieves module info of this module
Expand All @@ -225,11 +262,14 @@ class ModuleUpdater(logger.Logger):
slot = try_get(self.chassis.get_module(module_index).get_slot, default=INVALID_SLOT)
status = try_get(self.chassis.get_module(module_index).get_oper_status,
default=ModuleBase.MODULE_STATUS_OFFLINE)
asics = try_get(self.chassis.get_module(module_index).get_all_asics,
default=[])

module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] = name
module_info_dict[CHASSIS_MODULE_INFO_DESC_FIELD] = str(desc)
module_info_dict[CHASSIS_MODULE_INFO_SLOT_FIELD] = str(slot)
module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] = str(status)
module_info_dict[CHASSIS_MODULE_INFO_ASICS] = asics

return module_info_dict

Expand Down
9 changes: 7 additions & 2 deletions sonic-chassisd/tests/mock_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def get_serial(self):


class MockModule(MockDevice):
def __init__(self, module_index, module_name, module_desc, module_type, module_slot):
def __init__(self, module_index, module_name, module_desc, module_type, module_slot,
asic_list=[]):
self.module_index = module_index
self.module_name = module_name
self.module_desc = module_desc
Expand All @@ -29,7 +30,8 @@ def __init__(self, module_index, module_name, module_desc, module_type, module_s
self.admin_state = 1
self.supervisor_slot = 16
self.midplane_access = False

self.asic_list = asic_list

def get_name(self):
return self.module_name

Expand Down Expand Up @@ -69,6 +71,9 @@ def is_midplane_reachable(self):
def set_midplane_reachable(self, up):
self.midplane_access = up

def get_all_asics(self):
return self.asic_list

class MockChassis:
def __init__(self):
self.module_list = []
Expand Down
3 changes: 3 additions & 0 deletions sonic-chassisd/tests/mock_swsscommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def get(self, key):
return self.mock_dict[key]
return None

def getKeys(self):
return list(self.mock_dict)

def size(self):
return len(self.mock_dict)

Expand Down
75 changes: 75 additions & 0 deletions sonic-chassisd/tests/test_chassisd.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
CHASSIS_INFO_KEY_TEMPLATE = 'CHASSIS {}'
CHASSIS_INFO_CARD_NUM_FIELD = 'module_num'

CHASSIS_ASIC_PCI_ADDRESS_FIELD = 'asic_pci_address'
CHASSIS_ASIC_ID_IN_MODULE_FIELD = 'asic_id_in_module'

def setup_function():
ModuleUpdater.log_notice = MagicMock()
Expand Down Expand Up @@ -366,3 +368,76 @@ def test_midplane_presence_supervisor():
module_updater.deinit()
fvs = midplane_table.get(name)
assert fvs == None

def test_asic_presence():
chassis = MockChassis()

#Supervisor
index = 0
name = "SUPERVISOR0"
desc = "Supervisor card"
slot = 16
module_type = ModuleBase.MODULE_TYPE_SUPERVISOR
supervisor = MockModule(index, name, desc, module_type, slot)
supervisor.set_midplane_ip()
chassis.module_list.append(supervisor)

#Linecard
index = 1
name = "LINE-CARD0"
desc = "36 port 400G card"
slot = 1
module_type = ModuleBase.MODULE_TYPE_LINE
module = MockModule(index, name, desc, module_type, slot)
module.set_midplane_ip()
chassis.module_list.append(module)

#Fabric-card with asics
index = 1
name = "FABRIC-CARD0"
desc = "Switch fabric card"
slot = 17
module_type = ModuleBase.MODULE_TYPE_FABRIC
fabric_asic_list = [("4", "0000:04:00.0"), ("5", "0000:05:00.0")]
fabric = MockModule(index, name, desc, module_type, slot, fabric_asic_list)
chassis.module_list.append(fabric)

#Run on supervisor
module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis)
module_updater.supervisor_slot = supervisor.get_slot()
module_updater.my_slot = supervisor.get_slot()
module_updater.modules_num_update()
module_updater.module_db_update()
module_updater.check_midplane_reachability()

#Asic presence on fabric module
fabric.set_oper_status(ModuleBase.MODULE_STATUS_ONLINE)
module_updater.module_db_update()
fabric_asic_table = module_updater.asic_table
assert len(fabric_asic_table.getKeys()) == 2

def verify_fabric_asic(asic_name, asic_pci_address, module_name, asic_id_in_module):
fvs = fabric_asic_table.get(asic_name)
assert fvs[CHASSIS_ASIC_PCI_ADDRESS_FIELD] == asic_pci_address
assert fvs[CHASSIS_MODULE_INFO_NAME_FIELD] == module_name
assert fvs[CHASSIS_ASIC_ID_IN_MODULE_FIELD] == asic_id_in_module

verify_fabric_asic("asic4", "0000:04:00.0", name, "0")
verify_fabric_asic("asic5", "0000:05:00.0", name, "1")

#Card goes down and asics should be gone
fabric.set_oper_status(ModuleBase.MODULE_STATUS_OFFLINE)
module_updater.module_db_update()
assert len(fabric_asic_table.getKeys()) == 0

#Deinit
fabric.set_oper_status(ModuleBase.MODULE_STATUS_ONLINE)
module_updater.module_db_update()
module_updater.deinit()
midplane_table = module_updater.midplane_table
fvs = midplane_table.get(name)
assert fvs == None
fvs = fabric_asic_table.get("asic4")
assert fvs == None
fvs = fabric_asic_table.get("asic5")
assert fvs == None