Skip to content

Commit

Permalink
[voq/systemlag]Code review comments fix 2
Browse files Browse the repository at this point in the history
Signed-off-by: vedganes <[email protected]>

Code review comments fix changes: (1)Enclosed single line if block by {}
(2) Added system lag negative tests in vs tests and added code to log
error on system lag id release failure to help vs tests.
In addition to these review comments fixes, changes are done to remove
redundant error logs after addNeighbor() and removeNeighbor() in
system neighbor processing
  • Loading branch information
vedganes committed Feb 18, 2021
1 parent b3f9299 commit ebdba8c
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 19 deletions.
3 changes: 2 additions & 1 deletion orchagent/intfsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1363,9 +1363,10 @@ bool IntfsOrch::isRemoteSystemPortIntf(string alias)
Port port;
if(gPortsOrch->getPort(alias, port))
{

if (port.m_type == Port::LAG)
{
return(port.m_system_lag_info.switch_id != gVoqMySwitchId);
}

return(port.m_system_port_info.type == SAI_SYSTEM_PORT_TYPE_REMOTE);
}
Expand Down
2 changes: 0 additions & 2 deletions orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1093,7 +1093,6 @@ void NeighOrch::doVoqSystemNeighTask(Consumer &consumer)
}
else
{
SWSS_LOG_ERROR("Failed to add voq neighbor %s to SAI", kfvKey(t).c_str());
it++;
}
}
Expand All @@ -1118,7 +1117,6 @@ void NeighOrch::doVoqSystemNeighTask(Consumer &consumer)
}
else
{
SWSS_LOG_ERROR("Failed to remove voq neighbor %s from SAI", kfvKey(t).c_str());
it++;
}
}
Expand Down
11 changes: 10 additions & 1 deletion orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4058,7 +4058,16 @@ bool PortsOrch::removeLag(Port lag)

if (lag.m_system_lag_info.switch_id == gVoqMySwitchId)
{
m_lagIdAllocator->lagIdDel(lag.m_system_lag_info.alias);
int32_t rv;
int32_t spa_id = lag.m_system_lag_info.spa_id;

rv = m_lagIdAllocator->lagIdDel(lag.m_system_lag_info.alias);

if (rv != spa_id)
{
SWSS_LOG_ERROR("Failed to delete LAG id %d of local lag %s rv:%d", spa_id, lag.m_alias.c_str(), rv);
return false;
}

// Sync to SYSTEM_LAG_TABLE of CHASSIS_APP_DB

Expand Down
257 changes: 242 additions & 15 deletions tests/test_virtual_chassis.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def set_lag_id_boundaries(self, vct):
The lag id boundaries are used by lag id allocator while adding a PortChannel to the asic db.
Note:
In real systems, the lag id boundries are taken from a platform specific file. For testing
we assume the chassis capability with maximum 512 lags.
we assume the chassis capability with maximum 2 lags so that we can test the lag id allocator
table full error with less number of PortChannel configuration
"""

dvss = vct.dvss
Expand All @@ -22,7 +23,7 @@ def set_lag_id_boundaries(self, vct):
dvs = dvss[name]
chassis_app_db = DVSDatabase(swsscommon.CHASSIS_APP_DB, dvs.redis_chassis_sock)
chassis_app_db.db_connection.set("SYSTEM_LAG_ID_START", "1")
chassis_app_db.db_connection.set("SYSTEM_LAG_ID_END", "512")
chassis_app_db.db_connection.set("SYSTEM_LAG_ID_END", "2")
break

def test_connectivity(self, vct):
Expand Down Expand Up @@ -266,7 +267,7 @@ def test_chassis_system_neigh(self, vct):
break

def test_chassis_system_lag(self, vct):
"""Test portchannel in VOQ based chassis systems.
"""Test PortChannel in VOQ based chassis systems.
This test validates that
(i) PortChannel is created in local asic with system port aggregator id (system lag id)
Expand All @@ -282,8 +283,8 @@ def test_chassis_system_lag(self, vct):
if vct is None:
return

test_lag_name = "PortChannel0001"
test_lag_member = "Ethernet4"
test_lag1_name = "PortChannel0001"
test_lag1_member = "Ethernet4"

# Set the lag id boundaries in the chassis ap db
self.set_lag_id_boundaries(vct)
Expand All @@ -305,31 +306,33 @@ def test_chassis_system_lag(self, vct):

cfg_switch_type = metatbl.get("switch_type")

# Neighbor record verifiation done in line card
# Portchannel record verifiation done in line card
if cfg_switch_type == "voq":
lc_switch_id = metatbl.get("switch_id")
assert lc_switch_id != "", "Got error in getting switch_id from CONFIG_DB DEVICE_METADATA"
if lc_switch_id == "0":
# Create PortChannel

# Connect to app db: lag table and lag member table
app_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
psTbl_lag = swsscommon.ProducerStateTable(app_db, "LAG_TABLE")
psTbl_lagMember = swsscommon.ProducerStateTable(app_db, "LAG_MEMBER_TABLE")

# Create PortChannel
fvs = swsscommon.FieldValuePairs([("admin", "up"), ("mtu", "9100")])
psTbl_lag.set(f"{test_lag_name}", fvs)
psTbl_lag.set(f"{test_lag1_name}", fvs)

time.sleep(1)

# Add port channel member
psTbl_lagMember = swsscommon.ProducerStateTable(app_db, "LAG_MEMBER_TABLE")
fvs = swsscommon.FieldValuePairs([("status", "enabled")])
psTbl_lagMember.set(f"{test_lag_name}:{test_lag_member}", fvs)
psTbl_lagMember.set(f"{test_lag1_name}:{test_lag1_member}", fvs)

time.sleep(1)

# Verify creation of the PorChannel with voq system port aggregator id in asic db
asic_db = dvs.get_asic_db()
lagkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_LAG")
assert len(lagkeys) == 1, "The LAG entry for configured portchannel is not available in asic db"
assert len(lagkeys) == 1, "The LAG entry for configured PortChannel is not available in asic db"

# Check for the presence of voq system port aggregate id attribute
lag_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_LAG", lagkeys[0])
Expand All @@ -338,7 +341,7 @@ def test_chassis_system_lag(self, vct):

# Check for presence of lag member added
lagmemberkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_LAG_MEMBER")
assert len(lagmemberkeys) == 1, "The LAG member for configured portchannel is not available in asic db"
assert len(lagmemberkeys) == 1, "The LAG member for configured PortChannel is not available in asic db"

break

Expand All @@ -351,9 +354,9 @@ def test_chassis_system_lag(self, vct):
assert len(syslagkeys) == 1, "System lag entry is not available in chassis app db"

# system lag alias (key) should be unique across chassis. To ensure such uniqueness,
# the system lag name is derived from hostname, asic_name and portchannel name
# the system lag name is derived from hostname, asic_name and PortChannel name
# Verify for correct name
assert f"{cfg_hostname}|{cfg_asic_name}|{test_lag_name}" in syslagkeys[0], "Invalid unique system lag name"
assert f"{cfg_hostname}|{cfg_asic_name}|{test_lag1_name}" in syslagkeys[0], "Invalid unique system lag name"

# Verify lag id of the system lag in chassis app db
syslag_entry = chassis_app_db.get_entry("SYSTEM_LAG_TABLE", syslagkeys[0])
Expand Down Expand Up @@ -402,6 +405,230 @@ def test_chassis_system_lag(self, vct):
assert "oid:0x5d" in member_port_id, "System LAG member is not system port"

break

def test_chassis_system_lag_id_allocator_table_full(self, vct):
"""Test lag id allocator table full.
Pre-requisite:
(i) Test case: test_chassis_system_lag
This test validates that
(i) If PortChannel configuration goes beyond the platfrom capacitty boundary, lag id
allocator returns table full error
"""

if vct is None:
return

test_lag2_name = "PortChannel0002"
test_lag3_name = "PortChannel0003"

# Create a PortChannel in a line card 1 (owner line card)
dvss = vct.dvss
for name in dvss.keys():
dvs = dvss[name]

config_db = dvs.get_config_db()
metatbl = config_db.get_entry("DEVICE_METADATA", "localhost")

# Get the host name and asic name for the system lag alias verification
cfg_hostname = metatbl.get("hostname")
assert cfg_hostname != "", "Got error in getting hostname from CONFIG_DB DEVICE_METADATA"

cfg_asic_name = metatbl.get("asic_name")
assert cfg_asic_name != "", "Got error in getting asic_name from CONFIG_DB DEVICE_METADATA"

cfg_switch_type = metatbl.get("switch_type")

# Portchannel record verifiation done in line card
if cfg_switch_type == "voq":
lc_switch_id = metatbl.get("switch_id")
assert lc_switch_id != "", "Got error in getting switch_id from CONFIG_DB DEVICE_METADATA"
if lc_switch_id == "0":

# Connect to app db: lag table
app_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
psTbl_lag = swsscommon.ProducerStateTable(app_db, "LAG_TABLE")

# Create PortChannel 2. This should be successfully configured
fvs = swsscommon.FieldValuePairs([("admin", "up"), ("mtu", "9100")])
psTbl_lag.set(f"{test_lag2_name}", fvs)

time.sleep(1)

# Verify creation of the PorChannels with voq system port aggregator id in asic db
asic_db = dvs.get_asic_db()
lagkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_LAG")
assert len(lagkeys) == 2, "Two configured LAG entries are not available in asic db"

# Check for the presence of voq system port aggregate id attribute for 2 LAGs
lag_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_LAG", lagkeys[0])
spa_id = lag_entry.get("SAI_LAG_ATTR_SYSTEM_PORT_AGGREGATE_ID")
assert spa_id != "", "VOQ System port aggregate id not present for the LAG 1"

lag_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_LAG", lagkeys[1])
spa_id = lag_entry.get("SAI_LAG_ATTR_SYSTEM_PORT_AGGREGATE_ID")
assert spa_id != "", "VOQ System port aggregate id not present for the LAG 2"

# Create PortChannel 3. This should not be configured since lag id limit reached
fvs = swsscommon.FieldValuePairs([("admin", "up"), ("mtu", "9100")])
psTbl_lag.set(f"{test_lag3_name}", fvs)

# Check syslog for the table full error
marker = "ERR #orchagent"
srch_str = f"addLag: Failed to allocate unique LAG id for local lag {test_lag3_name} rv:-1"
_, num = dvs.runcmd(["sh", "-c", "awk '/%s/,ENDFILE {print;}' /var/log/syslog \
| grep \"%s\" | wc -l" % (marker, srch_str)])
assert num.strip() == '1', "LAG ID allocator table full error is not returned"

# Clean up the app db for the PortChannel creation failure
psTbl_lag.delete(f"{test_lag3_name}")

break

def test_chassis_system_lag_id_allocator_del_id(self, vct):
"""Test lag id allocator's release id and re-use id processing.
Pre-requisite:
(i) Test case: test_chassis_system_lag
(ii) Test case: test_chassis_system_lag_id_allocator_table_full
This test validates that
(i) Portchannel is deleted and id allocator does not return error
(ii) Should be able to add PortChannel to re-use released id
(iii) Deleted portchaneels are removed from chassis app db
(iv) Remote asics remove the system lag corresponding to the deleted PortChannels
"""

if vct is None:
return

test_lag1_name = "PortChannel0001"
test_lag1_member = "Ethernet4"
test_lag2_name = "PortChannel0002"
test_lag3_name = "PortChannel0003"

# Create a PortChannel in a line card 1 (owner line card)
dvss = vct.dvss
for name in dvss.keys():
dvs = dvss[name]

config_db = dvs.get_config_db()
metatbl = config_db.get_entry("DEVICE_METADATA", "localhost")

# Get the host name and asic name for the system lag alias verification
cfg_hostname = metatbl.get("hostname")
assert cfg_hostname != "", "Got error in getting hostname from CONFIG_DB DEVICE_METADATA"

cfg_asic_name = metatbl.get("asic_name")
assert cfg_asic_name != "", "Got error in getting asic_name from CONFIG_DB DEVICE_METADATA"

cfg_switch_type = metatbl.get("switch_type")

# Portchannel record verifiation done in line card
if cfg_switch_type == "voq":
lc_switch_id = metatbl.get("switch_id")
assert lc_switch_id != "", "Got error in getting switch_id from CONFIG_DB DEVICE_METADATA"
if lc_switch_id == "0":

# At this point we have 2 port channels test_lag1_name and test_lag2_name.
# These were created by the above two test cases. Now delete the PortChannel
# test_lag1_name and verify that the lag is removed and add test_lag3_name to
# test for lag id allocator allocating newly available lag id

# Connect to app db: lag table and lag member table
app_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
psTbl_lag = swsscommon.ProducerStateTable(app_db, "LAG_TABLE")
psTbl_lagMember = swsscommon.ProducerStateTable(app_db, "LAG_MEMBER_TABLE")

# Delete port channel member of PortChannel test_lag1_name
psTbl_lagMember.delete(f"{test_lag1_name}:{test_lag1_member}")

time.sleep(1)

# Delete PortChannel test_lag1_name
psTbl_lag.delete(f"{test_lag1_name}")

time.sleep(1)

# Verify deletion of the PorChannel
asic_db = dvs.get_asic_db()
lagkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_LAG")
assert len(lagkeys) == 1, "Two LAG entries in asic db even after deleting a PortChannel"

# Create PortChannel test_lag3_name. This should be addedd successfully since deleting
# PortChannel test_lag1_name made a lag id available for allocation
fvs = swsscommon.FieldValuePairs([("admin", "up"), ("mtu", "9100")])
psTbl_lag.set(f"{test_lag3_name}", fvs)

time.sleep(1)

# Verify creation of the additional PortChannel after making space for more
# PortChannels by deleting some PortChannels
asic_db = dvs.get_asic_db()
lagkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_LAG")
assert len(lagkeys) == 2, "Two configured LAG entries are not available in asic db"

# Check for the presence of voq system port aggregate id attribute for 2 LAGs
lag_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_LAG", lagkeys[0])
spa_id = lag_entry.get("SAI_LAG_ATTR_SYSTEM_PORT_AGGREGATE_ID")
assert spa_id != "", "VOQ System port aggregate id not present for the LAG 1"

lag_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_LAG", lagkeys[1])
spa_id = lag_entry.get("SAI_LAG_ATTR_SYSTEM_PORT_AGGREGATE_ID")
assert spa_id != "", "VOQ System port aggregate id not present for the LAG 2"

# Now delete all the PortChannels so that we can veify the chassis app db
# clearing and remote asics clearing
psTbl_lag.delete(f"{test_lag2_name}")

psTbl_lag.delete(f"{test_lag3_name}")

time.sleep(1)

# Verify deletion of all PortChannels
asic_db = dvs.get_asic_db()
lagkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_LAG")
assert len(lagkeys) == 0, "LAG entries in asic db even after deleting all PortChannels"

break

# Check syncing deletion of the PortChannels and PortChannel member in chasiss app db
for name in dvss.keys():
if name.startswith("supervisor"):
dvs = dvss[name]
chassis_app_db = DVSDatabase(swsscommon.CHASSIS_APP_DB, dvs.redis_chassis_sock)
syslagkeys = chassis_app_db.get_keys("SYSTEM_LAG_TABLE")
assert len(syslagkeys) == 0, "Stale system lag entries in chassis app db"

syslagmemberkeys = chassis_app_db.get_keys("SYSTEM_LAG_MEMBER_TABLE")
assert len(syslagmemberkeys) == 0, "Stale system lag member entries in chassis app db"

break

# Verify removal of remote system lag in non-owner line card
# Verify removal of system lag menbers in non-owner line card
for name in dvss.keys():
dvs = dvss[name]

config_db = dvs.get_config_db()
metatbl = config_db.get_entry("DEVICE_METADATA", "localhost")

cfg_switch_type = metatbl.get("switch_type")

# System LAG info verifiation done in non-owner line card
if cfg_switch_type == "voq":
lc_switch_id = metatbl.get("switch_id")
assert lc_switch_id != "", "Got error in getting switch_id from CONFIG_DB DEVICE_METADATA"
if lc_switch_id != "0":
# Linecard other than linecard 1 (owner line card)
asic_db = dvs.get_asic_db()
remotesyslagkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_LAG")
assert len(remotesyslagkeys) == 0, "Stale remote system lag entries in asic db"

# Verify cleaning of system lag members
lagmemberkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_LAG_MEMBER")
assert len(lagmemberkeys) == 0, "Stale system lag member entries in asic db"

break

# Add Dummy always-pass test at end as workaroud
# for issue when Flaky fail on final test it invokes module tear-down before retrying
Expand Down

0 comments on commit ebdba8c

Please sign in to comment.