-
Notifications
You must be signed in to change notification settings - Fork 520
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[fabricportsorch] Add fabric support #1459
Changes from all commits
45b2c19
6271876
086fe26
06dcfbc
c7244c4
03d9e26
f054f8b
38ddc7a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,268 @@ | ||
#include "fabricportsorch.h" | ||
|
||
#include <inttypes.h> | ||
#include <fstream> | ||
#include <sstream> | ||
#include <tuple> | ||
|
||
#include "logger.h" | ||
#include "schema.h" | ||
#include "sai_serialize.h" | ||
#include "timer.h" | ||
|
||
#define FABRIC_POLLING_INTERVAL_DEFAULT (30) | ||
#define FABRIC_PORT_ERROR 0 | ||
#define FABRIC_PORT_SUCCESS 1 | ||
#define FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_PORT_STAT_COUNTER" | ||
#define FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 10000 | ||
#define FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_QUEUE_STAT_COUNTER" | ||
#define FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 100000 | ||
#define FABRIC_PORT_TABLE "FABRIC_PORT_TABLE" | ||
|
||
extern sai_object_id_t gSwitchId; | ||
extern sai_switch_api_t *sai_switch_api; | ||
extern sai_port_api_t *sai_port_api; | ||
|
||
const vector<sai_port_stat_t> port_stat_ids = | ||
{ | ||
SAI_PORT_STAT_IF_IN_OCTETS, | ||
SAI_PORT_STAT_IF_IN_ERRORS, | ||
SAI_PORT_STAT_IF_IN_FABRIC_DATA_UNITS, | ||
SAI_PORT_STAT_IF_IN_FEC_CORRECTABLE_FRAMES, | ||
SAI_PORT_STAT_IF_IN_FEC_NOT_CORRECTABLE_FRAMES, | ||
SAI_PORT_STAT_IF_IN_FEC_SYMBOL_ERRORS, | ||
SAI_PORT_STAT_IF_OUT_OCTETS, | ||
SAI_PORT_STAT_IF_OUT_FABRIC_DATA_UNITS, | ||
}; | ||
|
||
static const vector<sai_queue_stat_t> queue_stat_ids = | ||
{ | ||
SAI_QUEUE_STAT_WATERMARK_LEVEL, | ||
SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES, | ||
SAI_QUEUE_STAT_CURR_OCCUPANCY_LEVEL, | ||
}; | ||
|
||
FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames) : | ||
Orch(appl_db, tableNames), | ||
port_stat_manager(FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, | ||
FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), | ||
queue_stat_manager(FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i assume this is for queue_stat_ids ? So again is this needed for fabric port. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, once jupiter-sai supports queue stats query, |
||
FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), | ||
m_timer(new SelectableTimer(timespec { .tv_sec = FABRIC_POLLING_INTERVAL_DEFAULT, .tv_nsec = 0 })) | ||
{ | ||
SWSS_LOG_ENTER(); | ||
|
||
SWSS_LOG_NOTICE( "FabricPortsOrch constructor" ); | ||
|
||
m_state_db = shared_ptr<DBConnector>(new DBConnector("STATE_DB", 0)); | ||
m_stateTable = unique_ptr<Table>(new Table(m_state_db.get(), FABRIC_PORT_TABLE)); | ||
|
||
m_counter_db = shared_ptr<DBConnector>(new DBConnector("COUNTERS_DB", 0)); | ||
m_laneQueueCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_QUEUE_NAME_MAP)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what does COUNTERS_QUEUE_NAME_MAP and COUNTERS_QUEUE_PORT_MAP looks like for fabric port ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once jupiter-sai supports port and queue stats completely for fabric ports on both fabric and voq asic, this will look like below in
|
||
m_lanePortCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_QUEUE_PORT_MAP)); | ||
|
||
m_flex_db = shared_ptr<DBConnector>(new DBConnector("FLEX_COUNTER_DB", 0)); | ||
m_flexCounterTable = unique_ptr<ProducerTable>(new ProducerTable(m_flex_db.get(), FABRIC_PORT_TABLE)); | ||
|
||
getFabricPortList(); | ||
|
||
auto executor = new ExecutableTimer(m_timer, this, "FABRIC_POLL"); | ||
Orch::addExecutor(executor); | ||
m_timer->start(); | ||
} | ||
|
||
int FabricPortsOrch::getFabricPortList() | ||
{ | ||
SWSS_LOG_ENTER(); | ||
|
||
if (m_getFabricPortListDone) { | ||
return FABRIC_PORT_SUCCESS; | ||
} | ||
|
||
uint32_t i; | ||
sai_status_t status; | ||
sai_attribute_t attr; | ||
|
||
attr.id = SAI_SWITCH_ATTR_NUMBER_OF_FABRIC_PORTS; | ||
status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); | ||
if (status != SAI_STATUS_SUCCESS) | ||
{ | ||
SWSS_LOG_ERROR("Failed to get fabric port number, rv:%d", status); | ||
return FABRIC_PORT_ERROR; | ||
} | ||
m_fabricPortCount = attr.value.u32; | ||
SWSS_LOG_NOTICE("Get %d fabric ports", m_fabricPortCount); | ||
|
||
vector<sai_object_id_t> fabric_port_list; | ||
fabric_port_list.resize(m_fabricPortCount); | ||
attr.id = SAI_SWITCH_ATTR_FABRIC_PORT_LIST; | ||
attr.value.objlist.count = (uint32_t)fabric_port_list.size(); | ||
attr.value.objlist.list = fabric_port_list.data(); | ||
status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); | ||
if (status != SAI_STATUS_SUCCESS) | ||
{ | ||
throw runtime_error("FabricPortsOrch get port list failure"); | ||
} | ||
|
||
for (i = 0; i < m_fabricPortCount; i++) | ||
{ | ||
sai_uint32_t lanes[1] = { 0 }; | ||
attr.id = SAI_PORT_ATTR_HW_LANE_LIST; | ||
attr.value.u32list.count = 1; | ||
attr.value.u32list.list = lanes; | ||
status = sai_port_api->get_port_attribute(fabric_port_list[i], 1, &attr); | ||
if (status != SAI_STATUS_SUCCESS) | ||
{ | ||
throw runtime_error("FabricPortsOrch get port lane failure"); | ||
} | ||
int lane = attr.value.u32list.list[0]; | ||
m_fabricLanePortMap[lane] = fabric_port_list[i]; | ||
} | ||
|
||
generatePortStats(); | ||
|
||
m_getFabricPortListDone = true; | ||
|
||
updateFabricPortState(); | ||
|
||
return FABRIC_PORT_SUCCESS; | ||
} | ||
|
||
bool FabricPortsOrch::allPortsReady() | ||
{ | ||
return m_getFabricPortListDone; | ||
} | ||
|
||
void FabricPortsOrch::generatePortStats() | ||
{ | ||
// FIX_ME: This function installs flex counters for port stats | ||
// on fabric ports for fabric asics and voq asics (that connect | ||
// to fabric asics via fabric ports). These counters will be | ||
// installed in FLEX_COUNTER_DB, and queried by syncd and updated | ||
// to COUNTERS_DB. | ||
// However, currently BCM SAI doesn't update its code to query | ||
// port stats (metrics in list port_stat_ids) yet. | ||
// Also, BCM sets too low value for "Max logical port count" (256), | ||
// causing syncd to crash on voq asics that now include regular front | ||
// panel ports, fabric ports, and multiple logical ports. | ||
// So, this function will just do nothing for now, and we will readd | ||
// code to install port stats counters when BCM completely supports. | ||
} | ||
|
||
void FabricPortsOrch::generateQueueStats() | ||
{ | ||
if (m_isQueueStatsGenerated) return; | ||
if (!m_getFabricPortListDone) return; | ||
|
||
// FIX_ME: Similar to generatePortStats(), generateQueueStats() installs | ||
// flex counters for queue stats on fabric ports for fabric asics and voq asics. | ||
// However, currently BCM SAI doesn't fully support queue stats query. | ||
// Query on queue type and index is not supported for fabric asics while | ||
// voq asics are not completely supported. | ||
// So, this function will just do nothing for now, and we will readd | ||
// code to install queue stats counters when BCM completely supports. | ||
|
||
m_isQueueStatsGenerated = true; | ||
} | ||
|
||
void FabricPortsOrch::updateFabricPortState() | ||
{ | ||
if (!m_getFabricPortListDone) return; | ||
|
||
SWSS_LOG_ENTER(); | ||
|
||
sai_status_t status; | ||
sai_attribute_t attr; | ||
|
||
time_t now; | ||
struct timespec time_now; | ||
if (clock_gettime(CLOCK_MONOTONIC, &time_now) < 0) | ||
{ | ||
return; | ||
} | ||
now = time_now.tv_sec; | ||
|
||
for (auto p : m_fabricLanePortMap) | ||
{ | ||
int lane = p.first; | ||
sai_object_id_t port = p.second; | ||
|
||
string key = "PORT" + to_string(lane); | ||
std::vector<FieldValueTuple> values; | ||
uint32_t remote_peer; | ||
uint32_t remote_port; | ||
|
||
attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED; | ||
status = sai_port_api->get_port_attribute(port, 1, &attr); | ||
if (status != SAI_STATUS_SUCCESS) | ||
{ | ||
// Port may not be ready for query | ||
SWSS_LOG_ERROR("Failed to get fabric port (%d) status, rv:%d", lane, status); | ||
return; | ||
} | ||
|
||
if (m_portStatus.find(lane) != m_portStatus.end() && | ||
m_portStatus[lane] && !attr.value.booldata) | ||
{ | ||
m_portDownCount[lane] ++; | ||
m_portDownSeenLastTime[lane] = now; | ||
} | ||
m_portStatus[lane] = attr.value.booldata; | ||
|
||
if (m_portStatus[lane]) | ||
{ | ||
attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_SWITCH_ID; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add the crc error collection here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we detect if a link flapped between the polling period? Is that a count that can be maintained in STATE_DB? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will add counter list, one counter for one port, that counts the number of times the link flaps. I also maintain the last time the link flaps. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CRC error collection will be added in another PR. It's because sonic-sairedis now doesn't understand attribute type which is used for getting fabric port error list. So we will need to modify sonic-sairedis, and then add CRC in sonic-swss. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. Sounds good to me. |
||
status = sai_port_api->get_port_attribute(port, 1, &attr); | ||
if (status != SAI_STATUS_SUCCESS) | ||
{ | ||
throw runtime_error("FabricPortsOrch get remote id failure"); | ||
} | ||
remote_peer = attr.value.u32; | ||
|
||
attr.id = SAI_PORT_ATTR_FABRIC_ATTACHED_PORT_INDEX; | ||
status = sai_port_api->get_port_attribute(port, 1, &attr); | ||
if (status != SAI_STATUS_SUCCESS) | ||
{ | ||
throw runtime_error("FabricPortsOrch get remote port index failure"); | ||
} | ||
remote_port = attr.value.u32; | ||
} | ||
|
||
values.emplace_back("STATUS", m_portStatus[lane] ? "up" : "down"); | ||
if (m_portStatus[lane]) | ||
{ | ||
values.emplace_back("REMOTE_MOD", to_string(remote_peer)); | ||
values.emplace_back("REMOTE_PORT", to_string(remote_port)); | ||
} | ||
if (m_portDownCount[lane] > 0) | ||
{ | ||
values.emplace_back("PORT_DOWN_COUNT", to_string(m_portDownCount[lane])); | ||
values.emplace_back("PORT_DOWN_SEEN_LAST_TIME", | ||
to_string(m_portDownSeenLastTime[lane])); | ||
} | ||
m_stateTable->set(key, values); | ||
} | ||
} | ||
|
||
void FabricPortsOrch::doTask() | ||
{ | ||
} | ||
|
||
void FabricPortsOrch::doTask(Consumer &consumer) | ||
{ | ||
} | ||
|
||
void FabricPortsOrch::doTask(swss::SelectableTimer &timer) | ||
{ | ||
SWSS_LOG_ENTER(); | ||
|
||
if (!m_getFabricPortListDone) | ||
{ | ||
getFabricPortList(); | ||
} | ||
|
||
if (m_getFabricPortListDone) | ||
{ | ||
updateFabricPortState(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add link to the PR description where these State DB fields are define. Also looks like current VS test case added as part of this do not cover state db changes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. I updated the description with the link. The VS test already verifies the number of fabric ports in
Below is what we see from a container of a linecard created by a vs test.
In addition to the change in the test that verifies Note that we don't test fabric asics because it will require significant changes in the tests - all tests now assume testing asic is NPU with front panel ports. |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#ifndef SWSS_FABRICPORTSORCH_H | ||
#define SWSS_FABRICPORTSORCH_H | ||
|
||
#include <map> | ||
|
||
#include "orch.h" | ||
#include "observer.h" | ||
#include "observer.h" | ||
#include "producertable.h" | ||
#include "flex_counter_manager.h" | ||
|
||
class FabricPortsOrch : public Orch, public Subject | ||
{ | ||
public: | ||
FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames); | ||
bool allPortsReady(); | ||
void generateQueueStats(); | ||
|
||
private: | ||
shared_ptr<DBConnector> m_state_db; | ||
shared_ptr<DBConnector> m_counter_db; | ||
shared_ptr<DBConnector> m_flex_db; | ||
|
||
unique_ptr<Table> m_stateTable; | ||
unique_ptr<Table> m_laneQueueCounterTable; | ||
unique_ptr<Table> m_lanePortCounterTable; | ||
unique_ptr<ProducerTable> m_flexCounterTable; | ||
|
||
swss::SelectableTimer *m_timer = nullptr; | ||
|
||
FlexCounterManager port_stat_manager; | ||
FlexCounterManager queue_stat_manager; | ||
|
||
sai_uint32_t m_fabricPortCount; | ||
map<int, sai_object_id_t> m_fabricLanePortMap; | ||
unordered_map<int, bool> m_portStatus; | ||
unordered_map<int, size_t> m_portDownCount; | ||
unordered_map<int, time_t> m_portDownSeenLastTime; | ||
|
||
bool m_getFabricPortListDone = false; | ||
bool m_isQueueStatsGenerated = false; | ||
int getFabricPortList(); | ||
void generatePortStats(); | ||
void updateFabricPortState(); | ||
|
||
void doTask() override; | ||
void doTask(Consumer &consumer); | ||
void doTask(swss::SelectableTimer &timer); | ||
}; | ||
|
||
#endif /* SWSS_FABRICPORTSORCH_H */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are these valid for Fabric ports ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, those are valid for fabric ports. But currently we are not able to query those stats yet until jupiter-sai completely supports them. I keep them here though.
Those stats will be setup in generateQueueStats(), which is currently empty. For reference purpose, generateQueueStats() will do the below thing.
For each port:
I'll update generateQueueStats() to use
queue_stat_ids
once we have fully support from jupiter-sai.