From 3c230d2b51ebf2ffc7163b2641ffab7ef358bfd4 Mon Sep 17 00:00:00 2001 From: Pavan Naregundi <92989231+pavannaregundi@users.noreply.github.com> Date: Wed, 25 Sep 2024 06:07:36 +0530 Subject: [PATCH] [Orchagent] Add optional create_switch timeout parameter (#3258) What I did Change adds optional create_switch timeout as command line parameter to orchagent. Why I did it Older platforms are seeing increase in time required in bookworm based branches. --- orchagent/main.cpp | 20 +++++++++++++++----- tests/test_zmq.py | 3 ++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/orchagent/main.cpp b/orchagent/main.cpp index 0a804eb38c..556bb70892 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -69,10 +69,11 @@ uint32_t gCfgSystemPorts = 0; string gMyHostName = ""; string gMyAsicName = ""; bool gTraditionalFlexCounter = false; +uint32_t create_switch_timeout = 0; void usage() { - cout << "usage: orchagent [-h] [-r record_type] [-d record_location] [-f swss_rec_filename] [-j sairedis_rec_filename] [-b batch_size] [-m MAC] [-i INST_ID] [-s] [-z mode] [-k bulk_size] [-q zmq_server_address] [-c mode]" << endl; + cout << "usage: orchagent [-h] [-r record_type] [-d record_location] [-f swss_rec_filename] [-j sairedis_rec_filename] [-b batch_size] [-m MAC] [-i INST_ID] [-s] [-z mode] [-k bulk_size] [-q zmq_server_address] [-c mode] [-t create_switch_timeout]" << endl; cout << " -h: display this message" << endl; cout << " -r record_type: record orchagent logs with type (default 3)" << endl; cout << " Bit 0: sairedis.rec, Bit 1: swss.rec, Bit 2: responsepublisher.rec. For example:" << endl; @@ -92,6 +93,7 @@ void usage() cout << " -k max bulk size in bulk mode (default 1000)" << endl; cout << " -q zmq_server_address: ZMQ server address (default disable ZMQ)" << endl; cout << " -c counter mode (traditional|asic_db), default: asic_db" << endl; + cout << " -t Override create switch timeout, in sec" << endl; } void sighup_handler(int signo) @@ -346,7 +348,7 @@ int main(int argc, char **argv) string responsepublisher_rec_filename = Recorder::RESPPUB_FNAME; int record_type = 3; // Only swss and sairedis recordings enabled by default. - while ((opt = getopt(argc, argv, "b:m:r:f:j:d:i:hsz:k:q:c:")) != -1) + while ((opt = getopt(argc, argv, "b:m:r:f:j:d:i:hsz:k:q:c:t:")) != -1) { switch (opt) { @@ -437,6 +439,9 @@ int main(int argc, char **argv) enable_zmq = true; } break; + case 't': + create_switch_timeout = atoi(optarg); + break; default: /* '?' */ exit(EXIT_FAILURE); } @@ -629,7 +634,7 @@ int main(int argc, char **argv) delay_factor = 2; } - if (gMySwitchType == "voq" || gMySwitchType == "fabric" || gMySwitchType == "chassis-packet" || gMySwitchType == "dpu" || asan_enabled) + if (gMySwitchType == "voq" || gMySwitchType == "fabric" || gMySwitchType == "chassis-packet" || gMySwitchType == "dpu" || asan_enabled || create_switch_timeout) { /* We set this long timeout in order for orchagent to wait enough time for * response from syncd. It is needed since switch create takes more time @@ -637,7 +642,12 @@ int main(int argc, char **argv) * and systems ports to initialize */ - if (gMySwitchType == "voq" || gMySwitchType == "chassis-packet" || gMySwitchType == "dpu") + if (create_switch_timeout) + { + /* Convert timeout to milliseconds from seconds */ + attr.value.u64 = (create_switch_timeout * 1000); + } + else if (gMySwitchType == "voq" || gMySwitchType == "chassis-packet" || gMySwitchType == "dpu") { attr.value.u64 = (5 * SAI_REDIS_DEFAULT_SYNC_OPERATION_RESPONSE_TIMEOUT); } @@ -672,7 +682,7 @@ int main(int argc, char **argv) } SWSS_LOG_NOTICE("Create a switch, id:%" PRIu64, gSwitchId); - if (gMySwitchType == "voq" || gMySwitchType == "fabric" || gMySwitchType == "chassis-packet" || gMySwitchType == "dpu") + if (gMySwitchType == "voq" || gMySwitchType == "fabric" || gMySwitchType == "chassis-packet" || gMySwitchType == "dpu" || create_switch_timeout) { /* Set syncd response timeout back to the default value */ attr.id = SAI_REDIS_SWITCH_ATTR_SYNC_OPERATION_RESPONSE_TIMEOUT; diff --git a/tests/test_zmq.py b/tests/test_zmq.py index 8a3dc49894..4894df0751 100644 --- a/tests/test_zmq.py +++ b/tests/test_zmq.py @@ -57,8 +57,9 @@ class TestZmqDash(object): @pytest.fixture(scope="class") def enable_orchagent_zmq(self, dvs): # change orchagent to use ZMQ + # change orchagent to use custom create_switch_timeout dvs.runcmd("cp /usr/bin/orchagent.sh /usr/bin/orchagent.sh_zmq_ut_backup") - dvs.runcmd("sed -i.bak 's/\/usr\/bin\/orchagent /\/usr\/bin\/orchagent -q tcp:\/\/127.0.0.1:8100 /g' /usr/bin/orchagent.sh") + dvs.runcmd("sed -i.bak 's/\/usr\/bin\/orchagent /\/usr\/bin\/orchagent -q tcp:\/\/127.0.0.1:8100 -t 60 /g' /usr/bin/orchagent.sh") dvs.stop_swss() dvs.start_swss()