Skip to content

Commit

Permalink
[auto-ts] add memory check (#10433) (#12291)
Browse files Browse the repository at this point in the history
#### Why I did it

To support automatic techsupport invokation in case memory usage is too high.

#### How I did it

Implemented according to sonic-net/SONiC#939

#### How to verify it

UT, manual test on the switch.

*DEPENDS* on sonic-net/sonic-utilities#2116
  • Loading branch information
stepanblyschak authored Oct 6, 2022
1 parent 2b36f81 commit 06f8b1f
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 10 deletions.
5 changes: 4 additions & 1 deletion files/build_templates/init_cfg.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@
"rate_limit_interval" : "180",
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"available_mem_threshold": "10.0",
"min_available_mem": "200",
"since" : "2 days ago"
}
},
Expand All @@ -93,7 +95,8 @@
{%- if enable_auto_tech_support == "y" %}
"state" : "enabled", {% else %}
"state" : "disabled", {% endif %}
"rate_limit_interval" : "600"
"rate_limit_interval" : "600",
"available_mem_threshold": "10.0"
}{%if not loop.last %},{% endif -%}
{% endfor %}
},
Expand Down
3 changes: 3 additions & 0 deletions files/image_config/monit/conf.d/sonic-host
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,6 @@ check program vnetRouteCheck with path "/usr/local/bin/vnet_route_check.py"
every 5 cycles
if status != 0 for 3 cycle then alert repeat every 1 cycles

# memory_check tool that verifies that memory usage does not cross the threshold or invokes techsupport.
check program memory_check with path "/usr/local/bin/memory_threshold_check.py"
if status == 2 for 10 times within 20 cycles then exec "/usr/local/bin/memory_threshold_check_handler.py"
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
"desc" : "Configure cooloff with a value of invalid format",
"eStrKey": "InvalidValue"
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
"desc" : "Configure a value for core-uage outside the range [0, 100)",
Expand All @@ -19,9 +19,23 @@
},
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
"desc" : "Configure and test the valid configuration"
},
"AUTO_TECHSUPPORT_AVAILABLE_MEM_THRESHOLD": {
"desc" : "Configure and test the valid configuration"
},
"AUTO_TECHSUPPORT_INVALID_AVAILABLE_MEM_THRESHOLD": {
"desc" : "Configure a value for available_mem_threshold inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_VALID": {
"desc" : "Configure and test the valid configuration"
},
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_INVALID_THRESHOLD": {
"desc" : "Configure a value for available_mem_threshold inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"since" : "2 days ago"
}
}
}
}
},
Expand All @@ -20,8 +20,8 @@
"rate_limit_interval" : "180",
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"since" : "2 days ago"
}
"since" : "2 days ago"
}
}
}
},
Expand All @@ -30,7 +30,7 @@
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"rate_limit_interval" : "whatever"
}
}
}
}
},
Expand All @@ -40,7 +40,7 @@
"sonic-auto_techsupport:GLOBAL": {
"max_core_limit" : "100.00",
"rate_limit_interval" : "180"
}
}
}
}
},
Expand All @@ -50,7 +50,7 @@
"sonic-auto_techsupport:GLOBAL": {
"max_techsupport_limit" : "11.23",
"max_core_limit" : "99.99"
}
}
}
}
},
Expand All @@ -60,7 +60,7 @@
"sonic-auto_techsupport:GLOBAL": {
"max_techsupport_limit" : "11.111",
"max_core_limit" : "99.99"
}
}
}
}
},
Expand All @@ -81,5 +81,60 @@
]
}
}
},
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_VALID": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"available_mem_threshold": "10.0",
"min_available_mem": "900"
}
}
}
},
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_INVALID_THRESHOLD": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"available_mem_threshold": "11.111"
}
}
}
},
"AUTO_TECHSUPPORT_AVAILABLE_MEM_THRESHOLD": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
"AUTO_TECHSUPPORT_FEATURE_LIST": [
{
"feature_name" : "bgp",
"state" : "enabled",
"available_mem_threshold": "10.0"
},
{
"feature_name" : "swss",
"state" : "disabled",
"available_mem_threshold": "10.0"
}
]
}
}
},
"AUTO_TECHSUPPORT_INVALID_AVAILABLE_MEM_THRESHOLD": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
"AUTO_TECHSUPPORT_FEATURE_LIST": [
{
"feature_name" : "bgp",
"state" : "enabled",
"available_mem_threshold": "11.111"
},
{
"feature_name" : "swss",
"state" : "disabled",
"available_mem_threshold": "10.0"
}
]
}
}
}
}
18 changes: 18 additions & 0 deletions src/sonic-yang-models/yang-models/sonic-auto_techsupport.yang
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ module sonic-auto_techsupport {
description "Max Limit in percentage for the cummulative size of core dumps. No cleanup is performed if the value isn't congiured or is 0.0";
type decimal-repr;
}

leaf available_mem_threshold {
description "Memory threshold; 0 to disable techsupport invocation on memory usage threshold crossing";
type decimal-repr;
default 10.0;
}

leaf min_available_mem {
description "Minimum Free memory (in MB) that should be available for the techsupport execution to start";
type uint32;
default 200;
}

leaf since {
/*
Expand Down Expand Up @@ -96,6 +108,12 @@ module sonic-auto_techsupport {
type stypes:admin_mode;
}

leaf available_mem_threshold {
description "Memory threshold; 0 to disable techsupport invocation on memory usage threshold crossing";
type decimal-repr;
default 10.0;
}

leaf rate_limit_interval {
description "Rate limit interval for the corresponding feature. Configure 0 to explicitly disable";
type uint16;
Expand Down

0 comments on commit 06f8b1f

Please sign in to comment.