From 109a13cc03135de7e758a31031c4b763a5cdac03 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Tue, 10 Mar 2020 01:41:10 +0800 Subject: [PATCH] Add thermal control support for SONiC (#3949) --- .../thermal_policy.json | 1 + .../thermal_policy.json | 1 + .../thermal_policy.json | 1 + .../thermal_policy.json | 1 + .../thermal_policy.json | 72 +++++ .../thermal_policy.json | 1 + .../thermal_policy.json | 1 + .../thermal_policy.json | 1 + .../thermal_policy.json | 1 + .../thermal_policy.json | 1 + .../thermal_policy.json | 1 + .../docker-pmon.supervisord.conf.j2 | 11 + dockers/docker-platform-monitor/start.sh.j2 | 4 + ...-Disable-thermal-policy-running-in-h.patch | 31 ++ platform/mellanox/mlnx-platform-api.mk | 1 + .../mellanox/mlnx-platform-api/.gitignore | 2 + .../mellanox/mlnx-platform-api/pytest.ini | 3 + platform/mellanox/mlnx-platform-api/setup.cfg | 2 + platform/mellanox/mlnx-platform-api/setup.py | 9 + .../sonic_platform/__init__.py | 2 +- .../sonic_platform/chassis.py | 5 + .../mlnx-platform-api/sonic_platform/fan.py | 18 +- .../sonic_platform/platform.py | 1 + .../mlnx-platform-api/sonic_platform/psu.py | 6 +- .../sonic_platform/thermal.py | 52 +++- .../sonic_platform/thermal_actions.py | 108 +++++++ .../sonic_platform/thermal_conditions.py | 63 ++++ .../sonic_platform/thermal_infos.py | 136 +++++++++ .../sonic_platform/thermal_manager.py | 50 ++++ .../mlnx-platform-api/tests/__init__.py | 0 .../mlnx-platform-api/tests/mock_platform.py | 44 +++ .../tests/test_thermal_policy.py | 272 ++++++++++++++++++ .../tests/thermal_policy.json | 72 +++++ rules/docker-platform-monitor.mk | 2 +- rules/sonic-thermalctld.mk | 6 + .../sonic_daemon_base/task_base.py | 50 ++++ 36 files changed, 1019 insertions(+), 13 deletions(-) create mode 120000 device/mellanox/x86_64-mlnx_lssn2700-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json create mode 100644 device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn2700_simx-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn3700_simx-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn3700c-r0/thermal_policy.json create mode 120000 device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json create mode 100644 platform/mellanox/hw-management/0002-hw-management.sh-Disable-thermal-policy-running-in-h.patch create mode 100644 platform/mellanox/mlnx-platform-api/.gitignore create mode 100644 platform/mellanox/mlnx-platform-api/pytest.ini create mode 100644 platform/mellanox/mlnx-platform-api/setup.cfg create mode 100644 platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py create mode 100644 platform/mellanox/mlnx-platform-api/sonic_platform/thermal_conditions.py create mode 100644 platform/mellanox/mlnx-platform-api/sonic_platform/thermal_infos.py create mode 100644 platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/__init__.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/mock_platform.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py create mode 100644 platform/mellanox/mlnx-platform-api/tests/thermal_policy.json create mode 100644 rules/sonic-thermalctld.mk create mode 100644 src/sonic-daemon-base/sonic_daemon_base/task_base.py diff --git a/device/mellanox/x86_64-mlnx_lssn2700-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_lssn2700-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_lssn2700-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json new file mode 100644 index 000000000000..054d797be951 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json @@ -0,0 +1,72 @@ +{ + "thermal_control_algorithm": { + "run_at_boot_up": "false", + "fan_speed_when_suspend": "60" + }, + "info_types": [ + { + "type": "fan_info" + }, + { + "type": "psu_info" + }, + { + "type": "chassis_info" + } + ], + "policies": [ + { + "name": "any fan absence", + "conditions": [ + { + "type": "fan.any.absence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + }, + { + "type": "fan.all.set_speed", + "speed": "100" + } + ] + }, + { + "name": "any psu absence", + "conditions": [ + { + "type": "psu.any.absence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + }, + { + "type": "fan.all.set_speed", + "speed": "100" + } + ] + }, + { + "name": "all fan and psu presence", + "conditions": [ + { + "type": "fan.all.presence" + }, + { + "type": "psu.all.presence" + } + ], + "actions": [ + { + "type": "fan.all.set_speed", + "speed": "60" + } + ] + } + ] +} \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2700_simx-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2700_simx-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2700_simx-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn3700_simx-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn3700_simx-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3700_simx-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn3700c-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn3700c-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3700c-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json b/device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json new file mode 120000 index 000000000000..5a25cd87f70c --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json @@ -0,0 +1 @@ +../x86_64-mlnx_msn2700-r0/thermal_policy.json \ No newline at end of file diff --git a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 index df1c9c9ec3cc..87b6808a744c 100644 --- a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 +++ b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 @@ -91,3 +91,14 @@ stdout_logfile=syslog stderr_logfile=syslog startsecs=0 {% endif %} + +{% if not skip_thermalctld %} +[program:thermalctld] +command=/usr/bin/thermalctld +priority=9 +autostart=false +autorestart=true +stdout_logfile=syslog +stderr_logfile=syslog +startsecs=0 +{% endif %} diff --git a/dockers/docker-platform-monitor/start.sh.j2 b/dockers/docker-platform-monitor/start.sh.j2 index 5b4fe4588819..03e0b49b8c4d 100644 --- a/dockers/docker-platform-monitor/start.sh.j2 +++ b/dockers/docker-platform-monitor/start.sh.j2 @@ -75,3 +75,7 @@ supervisorctl start psud supervisorctl start syseepromd {% endif %} +{% if not skip_thermalctld %} +supervisorctl start thermalctld +{% endif %} + diff --git a/platform/mellanox/hw-management/0002-hw-management.sh-Disable-thermal-policy-running-in-h.patch b/platform/mellanox/hw-management/0002-hw-management.sh-Disable-thermal-policy-running-in-h.patch new file mode 100644 index 000000000000..d1c34fd16ec0 --- /dev/null +++ b/platform/mellanox/hw-management/0002-hw-management.sh-Disable-thermal-policy-running-in-h.patch @@ -0,0 +1,31 @@ +From 76b02916794be2e2558fcff1d11609a594f633d7 Mon Sep 17 00:00:00 2001 +From: Stephen Sun +Date: Fri, 14 Feb 2020 13:48:00 +0800 +Subject: [PATCH] Disable thermal policy running in hw-mgmt service SONiC + thermal control algorithm has been supported. + +Signed-off-by: Stephen Sun +--- + usr/usr/bin/hw-management.sh | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh +index 2cdbfb2..48b41d5 100755 +--- a/usr/usr/bin/hw-management.sh ++++ b/usr/usr/bin/hw-management.sh +@@ -799,7 +799,11 @@ do_start() + #disabled for leopard chipless bringup. + echo 1 > $config_path/suspend + +- $THERMAL_CONTROL $thermal_type $max_tachos $max_psus& ++# ++# Disable thermal control algorithm in hw-management service ++# because there has already been that in SONiC ++# ++# $THERMAL_CONTROL $thermal_type $max_tachos $max_psus& + } + + do_stop() +-- +1.9.1 + diff --git a/platform/mellanox/mlnx-platform-api.mk b/platform/mellanox/mlnx-platform-api.mk index 4b70e59debc1..7bbbc3c70b0e 100644 --- a/platform/mellanox/mlnx-platform-api.mk +++ b/platform/mellanox/mlnx-platform-api.mk @@ -3,6 +3,7 @@ SONIC_PLATFORM_API_PY2 = mlnx_platform_api-1.0-py2-none-any.whl $(SONIC_PLATFORM_API_PY2)_SRC_PATH = $(PLATFORM_PATH)/mlnx-platform-api $(SONIC_PLATFORM_API_PY2)_PYTHON_VERSION = 2 +$(SONIC_PLATFORM_API_PY2)_DEPENDS = $(SONIC_PLATFORM_COMMON_PY2) $(SONIC_DAEMON_BASE_PY2) $(SONIC_CONFIG_ENGINE) SONIC_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2) export mlnx_platform_api_py2_wheel_path="$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY2))" diff --git a/platform/mellanox/mlnx-platform-api/.gitignore b/platform/mellanox/mlnx-platform-api/.gitignore new file mode 100644 index 000000000000..07f8a98e1f4a --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/.gitignore @@ -0,0 +1,2 @@ +*.pyc +.cache/ diff --git a/platform/mellanox/mlnx-platform-api/pytest.ini b/platform/mellanox/mlnx-platform-api/pytest.ini new file mode 100644 index 000000000000..c24fe5bb9e65 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +filterwarnings = + ignore::DeprecationWarning diff --git a/platform/mellanox/mlnx-platform-api/setup.cfg b/platform/mellanox/mlnx-platform-api/setup.cfg new file mode 100644 index 000000000000..b7e478982ccf --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/setup.cfg @@ -0,0 +1,2 @@ +[aliases] +test=pytest diff --git a/platform/mellanox/mlnx-platform-api/setup.py b/platform/mellanox/mlnx-platform-api/setup.py index 12809c4085ed..f10f84924d2c 100644 --- a/platform/mellanox/mlnx-platform-api/setup.py +++ b/platform/mellanox/mlnx-platform-api/setup.py @@ -12,6 +12,14 @@ maintainer_email='kevinw@mellanox.com', packages=[ 'sonic_platform', + 'tests' + ], + setup_requires= [ + 'pytest-runner' + ], + tests_require = [ + 'pytest', + 'mock>=2.0.0' ], classifiers=[ 'Development Status :: 3 - Alpha', @@ -26,5 +34,6 @@ 'Topic :: Utilities', ], keywords='sonic SONiC platform PLATFORM', + test_suite='setup.get_test_suite' ) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/__init__.py b/platform/mellanox/mlnx-platform-api/sonic_platform/__init__.py index d94d4c9ec820..d82f3749319c 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/__init__.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/__init__.py @@ -1,2 +1,2 @@ __all__ = ["platform", "chassis"] -from sonic_platform import * \ No newline at end of file +from sonic_platform import * diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py index c693b93462de..78f8dbc3c48e 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py @@ -442,3 +442,8 @@ def get_change_event(self, timeout=0): return True, {'sfp':port_dict} else: return True, {'sfp':{}} + + def get_thermal_manager(self): + from .thermal_manager import ThermalManager + return ThermalManager + diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py b/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py index 8b057e4123a2..ea8104e4883a 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py @@ -38,10 +38,12 @@ def __init__(self, fan_index, drawer_index = 1, psu_fan = False): self.fan_speed_set_path = "fan{}_speed_set".format(self.index) self.fan_presence_path = "fan{}_status".format(self.drawer_index) self.fan_max_speed_path = "fan{}_max".format(self.index) + self._name = "fan{}".format(fan_index + 1) else: self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index) self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index) - self.fan_max_speed_path = "psu{}_max".format(self.index) + self._name = 'psu_{}_fan_{}'.format(self.index, fan_index) + self.fan_max_speed_path = None self.fan_status_path = "fan{}_fault".format(self.index) self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index) self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index) @@ -49,6 +51,9 @@ def __init__(self, fan_index, drawer_index = 1, psu_fan = False): self.fan_pwm_path = "pwm1" self.fan_led_cap_path = "led_fan{}_capability".format(self.drawer_index) + def get_name(self): + return self._name + def get_status(self): """ Retrieves the operational status of fan @@ -123,7 +128,11 @@ def get_speed(self): speed_in_rpm = int(fan_curr_speed.read()) except (ValueError, IOError): speed_in_rpm = 0 - + + if self.fan_max_speed_path is None: + # in case of max speed unsupported, we just return speed in unit of RPM. + return speed_in_rpm + max_speed_in_rpm = self._get_max_speed_in_rpm() speed = 100*speed_in_rpm/max_speed_in_rpm @@ -136,11 +145,10 @@ def get_target_speed(self): Returns: int: percentage of the max fan speed """ - speed = 0 - if self.is_psu_fan: # Not like system fan, psu fan speed can not be modified, so target speed is N/A - return speed + return self.get_speed() + try: with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm: pwm = int(fan_pwm.read()) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py b/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py index 25461986f37a..6d81ca3e7b51 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/platform.py @@ -24,6 +24,7 @@ def __init__(self): self._chassis.initialize_psu() self._chassis.initialize_fan() self._chassis.initialize_eeprom() + self._chassis.initialize_thermals() def _is_host(self): """ diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py b/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py index f403678a6698..22091474e437 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py @@ -60,6 +60,7 @@ def __init__(self, psu_index, sku): psu_oper_status = "thermal/psu{}_pwr_status".format(self.index) #psu_oper_status should always be present for all SKUs self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status) + self._name = "PSU{}".format(psu_index + 1) if sku in hwsku_dict_psu: filemap = psu_profile_list[hwsku_dict_psu[sku]] @@ -92,7 +93,10 @@ def __init__(self, psu_index, sku): fan = Fan(psu_index, psu_index, True) if fan.get_presence(): - self._fan = fan + self._fan_list.append(fan) + + def get_name(self): + return self._name def _read_generic_file(self, filename, len): """ diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py index 1d03016af4ef..f445c3b25058 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py @@ -36,24 +36,31 @@ THERMAL_API_GET_TEMPERATURE = "get_temperature" THERMAL_API_GET_HIGH_THRESHOLD = "get_high_threshold" +THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD = "get_high_critical_threshold" + +THERMAL_API_INVALID_HIGH_THRESHOLD = 0.0 HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/" thermal_api_handler_cpu_core = { THERMAL_API_GET_TEMPERATURE:"cpu_core{}", - THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max" + THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max", + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_core{}_crit" } thermal_api_handler_cpu_pack = { THERMAL_API_GET_TEMPERATURE:"cpu_pack", - THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max" + THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max", + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"cpu_pack_crit" } thermal_api_handler_module = { THERMAL_API_GET_TEMPERATURE:"module{}_temp_input", - THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit" + THERMAL_API_GET_HIGH_THRESHOLD:"module{}_temp_crit", + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:"module{}_temp_emergency" } thermal_api_handler_psu = { THERMAL_API_GET_TEMPERATURE:"psu{}_temp", - THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max" + THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_temp_max", + THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD:None } thermal_api_handler_gearbox = { THERMAL_API_GET_TEMPERATURE:"temp_input_gearbox{}", @@ -281,6 +288,7 @@ } ] + def initialize_thermals(sku, thermal_list, psu_list): # create thermal objects for all categories of sensors tp_index = hwsku_dict_thermal[sku] @@ -310,6 +318,8 @@ def initialize_thermals(sku, thermal_list, psu_list): thermal = Thermal(category, start + index, True) thermal_list.append(thermal) + + class Thermal(ThermalBase): def __init__(self, category, index, has_index, dependency = None, hint = None): """ @@ -328,9 +338,11 @@ def __init__(self, category, index, has_index, dependency = None, hint = None): self.category = category self.temperature = self._get_file_from_api(THERMAL_API_GET_TEMPERATURE) self.high_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_THRESHOLD) + self.high_critical_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_CRITICAL_THRESHOLD) self.dependency = dependency self.dependent_hint = hint + def get_name(self): """ Retrieves the name of the device @@ -340,6 +352,7 @@ def get_name(self): """ return self.name + def _read_generic_file(self, filename, len): """ Read a generic file, returns the contents of the file @@ -352,6 +365,7 @@ def _read_generic_file(self, filename, len): logger.log_info("Fail to read file {} due to {}".format(filename, repr(e))) return result + def _get_file_from_api(self, api_name): if self.category == THERMAL_DEV_CATEGORY_AMBIENT: if api_name == THERMAL_API_GET_TEMPERATURE: @@ -363,9 +377,13 @@ def _get_file_from_api(self, api_name): if self.category in thermal_device_categories_singleton: filename = handler else: - filename = handler.format(self.index) + if handler: + filename = handler.format(self.index) + else: + return None return join(HW_MGMT_THERMAL_ROOT, filename) + def get_temperature(self): """ Retrieves current temperature reading from thermal @@ -385,8 +403,11 @@ def get_temperature(self): if value_str is None: return None value_float = float(value_str) + if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: + return None return value_float / 1000.0 + def get_high_threshold(self): """ Retrieves the high threshold temperature of thermal @@ -401,4 +422,25 @@ def get_high_threshold(self): if value_str is None: return None value_float = float(value_str) + if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: + return None + return value_float / 1000.0 + + + def get_high_critical_threshold(self): + """ + Retrieves the high critical threshold temperature of thermal + + Returns: + A float number, the high critical threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + if self.high_critical_threshold is None: + return None + value_str = self._read_generic_file(self.high_critical_threshold, 0) + if value_str is None: + return None + value_float = float(value_str) + if self.category == THERMAL_DEV_CATEGORY_MODULE and value_float == THERMAL_API_INVALID_HIGH_THRESHOLD: + return None return value_float / 1000.0 diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py new file mode 100644 index 000000000000..72729287d1c5 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py @@ -0,0 +1,108 @@ +from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase +from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object + + +class SetFanSpeedAction(ThermalPolicyActionBase): + """ + Base thermal action class to set speed for fans + """ + # JSON field definition + JSON_FIELD_SPEED = 'speed' + + def __init__(self): + """ + Constructor of SetFanSpeedAction which actually do nothing. + """ + self.speed = None + + def load_from_json(self, json_obj): + """ + Construct SetFanSpeedAction via JSON. JSON example: + { + "type": "fan.all.set_speed" + "speed": "100" + } + :param json_obj: A JSON object representing a SetFanSpeedAction action. + :return: + """ + if SetFanSpeedAction.JSON_FIELD_SPEED in json_obj: + speed = float(json_obj[SetFanSpeedAction.JSON_FIELD_SPEED]) + if speed < 0 or speed > 100: + raise ValueError('SetFanSpeedAction invalid speed value {} in JSON policy file, valid value should be [0, 100]'. + format(speed)) + self.speed = float(json_obj[SetFanSpeedAction.JSON_FIELD_SPEED]) + else: + raise ValueError('SetFanSpeedAction missing mandatory field {} in JSON policy file'. + format(SetFanSpeedAction.JSON_FIELD_SPEED)) + + +@thermal_json_object('fan.all.set_speed') +class SetAllFanSpeedAction(SetFanSpeedAction): + """ + Action to set speed for all fans + """ + def execute(self, thermal_info_dict): + """ + Set speed for all fans + :param thermal_info_dict: A dictionary stores all thermal information. + :return: + """ + from .thermal_infos import FanInfo + if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo): + fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME] + for fan in fan_info_obj.get_presence_fans(): + fan.set_speed(self.speed) + + +@thermal_json_object('thermal_control.control') +class ControlThermalAlgoAction(ThermalPolicyActionBase): + """ + Action to control the thermal control algorithm + """ + # JSON field definition + JSON_FIELD_STATUS = 'status' + + def __init__(self): + self.status = True + + def load_from_json(self, json_obj): + """ + Construct ControlThermalAlgoAction via JSON. JSON example: + { + "type": "thermal_control.control" + "status": "true" + } + :param json_obj: A JSON object representing a ControlThermalAlgoAction action. + :return: + """ + if ControlThermalAlgoAction.JSON_FIELD_STATUS in json_obj: + status_str = json_obj[ControlThermalAlgoAction.JSON_FIELD_STATUS].lower() + if status_str == 'true': + self.status = True + elif status_str == 'false': + self.status = False + else: + raise ValueError('Invalid {} field value, please specify true of false'. + format(ControlThermalAlgoAction.JSON_FIELD_STATUS)) + else: + raise ValueError('ControlThermalAlgoAction ' + 'missing mandatory field {} in JSON policy file'. + format(ControlThermalAlgoAction.JSON_FIELD_STATUS)) + + def execute(self, thermal_info_dict): + """ + Disable thermal control algorithm + :param thermal_info_dict: A dictionary stores all thermal information. + :return: + """ + from .thermal_infos import ChassisInfo + if ChassisInfo.INFO_NAME in thermal_info_dict: + chassis_info_obj = thermal_info_dict[ChassisInfo.INFO_NAME] + chassis = chassis_info_obj.get_chassis() + thermal_manager = chassis.get_thermal_manager() + if self.status: + thermal_manager.start_thermal_control_algorithm() + else: + thermal_manager.stop_thermal_control_algorithm() + + diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_conditions.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_conditions.py new file mode 100644 index 000000000000..2df59acc9bf1 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_conditions.py @@ -0,0 +1,63 @@ +from sonic_platform_base.sonic_thermal_control.thermal_condition_base import ThermalPolicyConditionBase +from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object + + +class FanCondition(ThermalPolicyConditionBase): + def get_fan_info(self, thermal_info_dict): + from .thermal_infos import FanInfo + if FanInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[FanInfo.INFO_NAME], FanInfo): + return thermal_info_dict[FanInfo.INFO_NAME] + else: + return None + + +@thermal_json_object('fan.any.absence') +class AnyFanAbsenceCondition(FanCondition): + def is_match(self, thermal_info_dict): + fan_info_obj = self.get_fan_info(thermal_info_dict) + return len(fan_info_obj.get_absence_fans()) > 0 if fan_info_obj else False + + +@thermal_json_object('fan.all.absence') +class AllFanAbsenceCondition(FanCondition): + def is_match(self, thermal_info_dict): + fan_info_obj = self.get_fan_info(thermal_info_dict) + return len(fan_info_obj.get_presence_fans()) == 0 if fan_info_obj else False + + +@thermal_json_object('fan.all.presence') +class AllFanPresenceCondition(FanCondition): + def is_match(self, thermal_info_dict): + fan_info_obj = self.get_fan_info(thermal_info_dict) + return len(fan_info_obj.get_absence_fans()) == 0 if fan_info_obj else False + + +class PsuCondition(ThermalPolicyConditionBase): + def get_psu_info(self, thermal_info_dict): + from .thermal_infos import PsuInfo + if PsuInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[PsuInfo.INFO_NAME], PsuInfo): + return thermal_info_dict[PsuInfo.INFO_NAME] + else: + return None + + +@thermal_json_object('psu.any.absence') +class AnyPsuAbsenceCondition(PsuCondition): + def is_match(self, thermal_info_dict): + psu_info_obj = self.get_psu_info(thermal_info_dict) + return len(psu_info_obj.get_absence_psus()) > 0 if psu_info_obj else False + + +@thermal_json_object('psu.all.absence') +class AllPsuAbsenceCondition(PsuCondition): + def is_match(self, thermal_info_dict): + psu_info_obj = self.get_psu_info(thermal_info_dict) + return len(psu_info_obj.get_presence_psus()) == 0 if psu_info_obj else False + + +@thermal_json_object('psu.all.presence') +class AllPsuPresenceCondition(PsuCondition): + def is_match(self, thermal_info_dict): + psu_info_obj = self.get_psu_info(thermal_info_dict) + return len(psu_info_obj.get_absence_psus()) == 0 if psu_info_obj else False + diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_infos.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_infos.py new file mode 100644 index 000000000000..34d31e47d24c --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_infos.py @@ -0,0 +1,136 @@ +from sonic_platform_base.sonic_thermal_control.thermal_info_base import ThermalPolicyInfoBase +from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object + + +@thermal_json_object('fan_info') +class FanInfo(ThermalPolicyInfoBase): + """ + Fan information needed by thermal policy + """ + + # Fan information name + INFO_NAME = 'fan_info' + + def __init__(self): + self._absence_fans = set() + self._presence_fans = set() + self._status_changed = False + + def collect(self, chassis): + """ + Collect absence and presence fans. + :param chassis: The chassis object + :return: + """ + self._status_changed = False + for fan in chassis.get_all_fans(): + if fan.get_presence() and fan not in self._presence_fans: + self._presence_fans.add(fan) + self._status_changed = True + if fan in self._absence_fans: + self._absence_fans.remove(fan) + elif not fan.get_presence() and fan not in self._absence_fans: + self._absence_fans.add(fan) + self._status_changed = True + if fan in self._presence_fans: + self._presence_fans.remove(fan) + + def get_absence_fans(self): + """ + Retrieves absence fans + :return: A set of absence fans + """ + return self._absence_fans + + def get_presence_fans(self): + """ + Retrieves presence fans + :return: A set of presence fans + """ + return self._presence_fans + + def is_status_changed(self): + """ + Retrieves if the status of fan information changed + :return: True if status changed else False + """ + return self._status_changed + + +@thermal_json_object('psu_info') +class PsuInfo(ThermalPolicyInfoBase): + """ + PSU information needed by thermal policy + """ + INFO_NAME = 'psu_info' + + def __init__(self): + self._absence_psus = set() + self._presence_psus = set() + self._status_changed = False + + def collect(self, chassis): + """ + Collect absence and presence PSUs. + :param chassis: The chassis object + :return: + """ + self._status_changed = False + for psu in chassis.get_all_psus(): + if psu.get_presence() and psu not in self._presence_psus: + self._presence_psus.add(psu) + self._status_changed = True + if psu in self._absence_psus: + self._absence_psus.remove(psu) + elif not psu.get_presence() and psu not in self._absence_psus: + self._absence_psus.add(psu) + self._status_changed = True + if psu in self._presence_psus: + self._presence_psus.remove(psu) + + def get_absence_psus(self): + """ + Retrieves presence PSUs + :return: A set of absence PSUs + """ + return self._absence_psus + + def get_presence_psus(self): + """ + Retrieves presence PSUs + :return: A set of presence fans + """ + return self._presence_psus + + def is_status_changed(self): + """ + Retrieves if the status of PSU information changed + :return: True if status changed else False + """ + return self._status_changed + + +@thermal_json_object('chassis_info') +class ChassisInfo(ThermalPolicyInfoBase): + """ + Chassis information needed by thermal policy + """ + INFO_NAME = 'chassis_info' + + def __init__(self): + self._chassis = None + + def collect(self, chassis): + """ + Collect platform chassis. + :param chassis: The chassis object + :return: + """ + self._chassis = chassis + + def get_chassis(self): + """ + Retrieves platform chassis object + :return: A platform chassis object. + """ + return self._chassis diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py new file mode 100644 index 000000000000..133bb078ca20 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py @@ -0,0 +1,50 @@ +import os +from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase +from .thermal_actions import * +from .thermal_conditions import * +from .thermal_infos import * + + +class ThermalManager(ThermalManagerBase): + THERMAL_ALGORITHM_CONTROL_PATH = '/var/run/hw-management/config/suspend' + + @classmethod + def start_thermal_control_algorithm(cls): + """ + Start thermal control algorithm + + Returns: + bool: True if set success, False if fail. + """ + cls._control_thermal_control_algorithm(False) + + @classmethod + def stop_thermal_control_algorithm(cls): + """ + Stop thermal control algorithm + + Returns: + bool: True if set success, False if fail. + """ + cls._control_thermal_control_algorithm(True) + + @classmethod + def _control_thermal_control_algorithm(cls, suspend): + """ + Control thermal control algorithm + + Args: + suspend: Bool, indicate suspend the algorithm or not + + Returns: + bool: True if set success, False if fail. + """ + status = True + write_value = 1 if suspend else 0 + try: + with open(cls.THERMAL_ALGORITHM_CONTROL_PATH, 'w') as control_file: + control_file.write(str(write_value)) + except (ValueError, IOError): + status = False + + return status diff --git a/platform/mellanox/mlnx-platform-api/tests/__init__.py b/platform/mellanox/mlnx-platform-api/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/platform/mellanox/mlnx-platform-api/tests/mock_platform.py b/platform/mellanox/mlnx-platform-api/tests/mock_platform.py new file mode 100644 index 000000000000..b8d070d44955 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/mock_platform.py @@ -0,0 +1,44 @@ +class MockFan: + def __init__(self): + self.presence = True + self.speed = 60 + + def get_presence(self): + return self.presence + + def set_speed(self, speed): + self.speed = speed + + +class MockPsu: + def __init__(self): + self.presence = True + + def get_presence(self): + return self.presence + + +class MockChassis: + def __init__(self): + self.fan_list = [] + self.psu_list = [] + + def get_all_psus(self): + return self.psu_list + + def get_all_fans(self): + return self.fan_list + + def get_thermal_manager(self): + from sonic_platform.thermal_manager import ThermalManager + return ThermalManager + + def make_fan_absence(self): + fan = MockFan() + fan.presence = False + self.fan_list.append(fan) + + def make_psu_absence(self): + psu = MockPsu() + psu.presence = False + self.psu_list.append(psu) diff --git a/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py b/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py new file mode 100644 index 000000000000..ba9e502d4f74 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py @@ -0,0 +1,272 @@ +import os +import sys +import pytest +import json +from mock import MagicMock +from .mock_platform import MockChassis, MockFan, MockPsu + +test_path = os.path.dirname(os.path.abspath(__file__)) +modules_path = os.path.dirname(test_path) +sys.path.insert(0, modules_path) + +from sonic_platform.thermal_manager import ThermalManager +from sonic_platform.thermal_infos import FanInfo, PsuInfo + + +@pytest.fixture(scope='session', autouse=True) +def thermal_manager(): + policy_file = os.path.join(test_path, 'thermal_policy.json') + ThermalManager.load(policy_file) + return ThermalManager + + +def test_load_policy(thermal_manager): + assert 'psu_info' in thermal_manager._thermal_info_dict + assert 'fan_info' in thermal_manager._thermal_info_dict + assert 'chassis_info' in thermal_manager._thermal_info_dict + + assert 'any fan absence' in thermal_manager._policy_dict + assert 'any psu absence' in thermal_manager._policy_dict + assert 'all fan and psu presence' in thermal_manager._policy_dict + + assert thermal_manager._fan_speed_when_suspend == 60 + assert thermal_manager._run_thermal_algorithm_at_boot_up == False + + +def test_fan_info(): + chassis = MockChassis() + chassis.make_fan_absence() + fan_info = FanInfo() + fan_info.collect(chassis) + assert len(fan_info.get_absence_fans()) == 1 + assert len(fan_info.get_presence_fans()) == 0 + assert fan_info.is_status_changed() + + fan_list = chassis.get_all_fans() + fan_list[0].presence = True + fan_info.collect(chassis) + assert len(fan_info.get_absence_fans()) == 0 + assert len(fan_info.get_presence_fans()) == 1 + assert fan_info.is_status_changed() + + +def test_psu_info(): + chassis = MockChassis() + chassis.make_psu_absence() + psu_info = PsuInfo() + psu_info.collect(chassis) + assert len(psu_info.get_absence_psus()) == 1 + assert len(psu_info.get_presence_psus()) == 0 + assert psu_info.is_status_changed() + + psu_list = chassis.get_all_psus() + psu_list[0].presence = True + psu_info.collect(chassis) + assert len(psu_info.get_absence_psus()) == 0 + assert len(psu_info.get_presence_psus()) == 1 + assert psu_info.is_status_changed() + + +def test_fan_policy(thermal_manager): + chassis = MockChassis() + chassis.make_fan_absence() + chassis.fan_list.append(MockFan()) + thermal_manager.start_thermal_control_algorithm = MagicMock() + thermal_manager.stop_thermal_control_algorithm = MagicMock() + thermal_manager.run_policy(chassis) + + fan_list = chassis.get_all_fans() + assert fan_list[1].speed == 100 + thermal_manager.stop_thermal_control_algorithm.assert_called_once() + + fan_list[0].presence = True + thermal_manager.run_policy(chassis) + thermal_manager.start_thermal_control_algorithm.assert_called_once() + + +def test_psu_policy(thermal_manager): + chassis = MockChassis() + chassis.make_psu_absence() + chassis.fan_list.append(MockFan()) + thermal_manager.start_thermal_control_algorithm = MagicMock() + thermal_manager.stop_thermal_control_algorithm = MagicMock() + thermal_manager.run_policy(chassis) + + fan_list = chassis.get_all_fans() + assert fan_list[0].speed == 100 + thermal_manager.stop_thermal_control_algorithm.assert_called_once() + + psu_list = chassis.get_all_psus() + psu_list[0].presence = True + thermal_manager.run_policy(chassis) + thermal_manager.start_thermal_control_algorithm.assert_called_once() + + +def test_any_fan_absence_condition(): + chassis = MockChassis() + chassis.make_fan_absence() + fan_info = FanInfo() + fan_info.collect(chassis) + + from sonic_platform.thermal_conditions import AnyFanAbsenceCondition + condition = AnyFanAbsenceCondition() + assert condition.is_match({'fan_info': fan_info}) + + fan = chassis.get_all_fans()[0] + fan.presence = True + fan_info.collect(chassis) + assert not condition.is_match({'fan_info': fan_info}) + + +def test_all_fan_absence_condition(): + chassis = MockChassis() + chassis.make_fan_absence() + fan = MockFan() + fan_list = chassis.get_all_fans() + fan_list.append(fan) + fan_info = FanInfo() + fan_info.collect(chassis) + + from sonic_platform.thermal_conditions import AllFanAbsenceCondition + condition = AllFanAbsenceCondition() + assert not condition.is_match({'fan_info': fan_info}) + + fan.presence = False + fan_info.collect(chassis) + assert condition.is_match({'fan_info': fan_info}) + + +def test_all_fan_presence_condition(): + chassis = MockChassis() + chassis.make_fan_absence() + fan = MockFan() + fan_list = chassis.get_all_fans() + fan_list.append(fan) + fan_info = FanInfo() + fan_info.collect(chassis) + + from sonic_platform.thermal_conditions import AllFanPresenceCondition + condition = AllFanPresenceCondition() + assert not condition.is_match({'fan_info': fan_info}) + + fan_list[0].presence = True + fan_info.collect(chassis) + assert condition.is_match({'fan_info': fan_info}) + + +def test_any_psu_absence_condition(): + chassis = MockChassis() + chassis.make_psu_absence() + psu_info = PsuInfo() + psu_info.collect(chassis) + + from sonic_platform.thermal_conditions import AnyPsuAbsenceCondition + condition = AnyPsuAbsenceCondition() + assert condition.is_match({'psu_info': psu_info}) + + psu = chassis.get_all_psus()[0] + psu.presence = True + psu_info.collect(chassis) + assert not condition.is_match({'psu_info': psu_info}) + + +def test_all_psu_absence_condition(): + chassis = MockChassis() + chassis.make_psu_absence() + psu = MockPsu() + psu_list = chassis.get_all_psus() + psu_list.append(psu) + psu_info = PsuInfo() + psu_info.collect(chassis) + + from sonic_platform.thermal_conditions import AllPsuAbsenceCondition + condition = AllPsuAbsenceCondition() + assert not condition.is_match({'psu_info': psu_info}) + + psu.presence = False + psu_info.collect(chassis) + assert condition.is_match({'psu_info': psu_info}) + + +def test_all_fan_presence_condition(): + chassis = MockChassis() + chassis.make_psu_absence() + psu = MockPsu() + psu_list = chassis.get_all_psus() + psu_list.append(psu) + psu_info = PsuInfo() + psu_info.collect(chassis) + + from sonic_platform.thermal_conditions import AllPsuPresenceCondition + condition = AllPsuPresenceCondition() + assert not condition.is_match({'psu_info': psu_info}) + + psu_list[0].presence = True + psu_info.collect(chassis) + assert condition.is_match({'psu_info': psu_info}) + + +def test_load_set_fan_speed_action(): + from sonic_platform.thermal_actions import SetAllFanSpeedAction + action = SetAllFanSpeedAction() + json_str = '{\"speed\": \"50\"}' + json_obj = json.loads(json_str) + action.load_from_json(json_obj) + assert action.speed == 50 + + json_str = '{\"speed\": \"-1\"}' + json_obj = json.loads(json_str) + with pytest.raises(ValueError): + action.load_from_json(json_obj) + + json_str = '{\"speed\": \"101\"}' + json_obj = json.loads(json_str) + with pytest.raises(ValueError): + action.load_from_json(json_obj) + + json_str = '{\"invalid\": \"101\"}' + json_obj = json.loads(json_str) + with pytest.raises(ValueError): + action.load_from_json(json_obj) + + +def test_execute_set_fan_speed_action(): + chassis = MockChassis() + fan_list = chassis.get_all_fans() + fan_list.append(MockFan()) + fan_list.append(MockFan()) + fan_info = FanInfo() + fan_info.collect(chassis) + + from sonic_platform.thermal_actions import SetAllFanSpeedAction + action = SetAllFanSpeedAction() + action.speed = 99 + action.execute({'fan_info': fan_info}) + assert fan_list[0].speed == 99 + assert fan_list[1].speed == 99 + + +def test_load_control_thermal_algo_action(): + from sonic_platform.thermal_actions import ControlThermalAlgoAction + action = ControlThermalAlgoAction() + json_str = '{\"status\": \"false\"}' + json_obj = json.loads(json_str) + action.load_from_json(json_obj) + assert not action.status + + json_str = '{\"status\": \"true\"}' + json_obj = json.loads(json_str) + action.load_from_json(json_obj) + assert action.status + + json_str = '{\"status\": \"invalid\"}' + json_obj = json.loads(json_str) + with pytest.raises(ValueError): + action.load_from_json(json_obj) + + json_str = '{\"invalid\": \"true\"}' + json_obj = json.loads(json_str) + with pytest.raises(ValueError): + action.load_from_json(json_obj) + + diff --git a/platform/mellanox/mlnx-platform-api/tests/thermal_policy.json b/platform/mellanox/mlnx-platform-api/tests/thermal_policy.json new file mode 100644 index 000000000000..5d31b2abd875 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/thermal_policy.json @@ -0,0 +1,72 @@ +{ + "thermal_control_algorithm": { + "run_at_boot_up": "false", + "fan_speed_when_suspend": "60" + }, + "info_types": [ + { + "type": "fan_info" + }, + { + "type": "psu_info" + }, + { + "type": "chassis_info" + } + ], + "policies": [ + { + "name": "any fan absence", + "conditions": [ + { + "type": "fan.any.absence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + }, + { + "type": "fan.all.set_speed", + "speed": "100" + } + ] + }, + { + "name": "any psu absence", + "conditions": [ + { + "type": "psu.any.absence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "false" + }, + { + "type": "fan.all.set_speed", + "speed": "100" + } + ] + }, + { + "name": "all fan and psu presence", + "conditions": [ + { + "type": "fan.all.presence" + }, + { + "type": "psu.all.presence" + } + ], + "actions": [ + { + "type": "thermal_control.control", + "status": "true" + } + ] + } + ] +} \ No newline at end of file diff --git a/rules/docker-platform-monitor.mk b/rules/docker-platform-monitor.mk index a37f4d2e9ee7..db1c8c5a0289 100644 --- a/rules/docker-platform-monitor.mk +++ b/rules/docker-platform-monitor.mk @@ -10,7 +10,7 @@ $(DOCKER_PLATFORM_MONITOR)_DEPENDS += $(LIBSENSORS) $(LM_SENSORS) $(FANCONTROL) ifeq ($(CONFIGURED_PLATFORM),barefoot) $(DOCKER_PLATFORM_MONITOR)_DEPENDS += $(PYTHON_THRIFT) endif -$(DOCKER_PLATFORM_MONITOR)_PYTHON_DEBS += $(SONIC_LEDD) $(SONIC_XCVRD) $(SONIC_PSUD) $(SONIC_SYSEEPROMD) +$(DOCKER_PLATFORM_MONITOR)_PYTHON_DEBS += $(SONIC_LEDD) $(SONIC_XCVRD) $(SONIC_PSUD) $(SONIC_SYSEEPROMD) $(SONIC_THERMALCTLD) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_COMMON_PY2) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SWSSSDK_PY2) $(DOCKER_PLATFORM_MONITOR)_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2) diff --git a/rules/sonic-thermalctld.mk b/rules/sonic-thermalctld.mk new file mode 100644 index 000000000000..775082e7bbce --- /dev/null +++ b/rules/sonic-thermalctld.mk @@ -0,0 +1,6 @@ +# sonic-thermalctld (SONiC Thermal control daemon) Debian package + +SONIC_THERMALCTLD = python-sonic-thermalctld_1.0-1_all.deb +$(SONIC_THERMALCTLD)_SRC_PATH = $(SRC_PATH)/sonic-platform-daemons/sonic-thermalctld +$(SONIC_THERMALCTLD)_WHEEL_DEPENDS = $(SONIC_DAEMON_BASE_PY2) +SONIC_PYTHON_STDEB_DEBS += $(SONIC_THERMALCTLD) diff --git a/src/sonic-daemon-base/sonic_daemon_base/task_base.py b/src/sonic-daemon-base/sonic_daemon_base/task_base.py new file mode 100644 index 000000000000..e1738ffba213 --- /dev/null +++ b/src/sonic-daemon-base/sonic_daemon_base/task_base.py @@ -0,0 +1,50 @@ +import multiprocessing +import os +import signal +import threading + + +# +# ProcessTaskBase ===================================================================== +# +class ProcessTaskBase(object): # TODO: put this class to swss-platform-common + def __init__(self): + self.task_process = None + self.task_stopping_event = multiprocessing.Event() + + def task_worker(self): + pass + + def task_run(self): + if self.task_stopping_event.is_set(): + return + + self.task_process = multiprocessing.Process(target=self.task_worker) + self.task_process.start() + + def task_stop(self): + self.task_stopping_event.set() + os.kill(self.task_process.pid, signal.SIGKILL) + + +# +# ThreadTaskBase ===================================================================== +# +class ThreadTaskBase(object): # TODO: put this class to swss-platform-common; + def __init__(self): + self.task_thread = None + self.task_stopping_event = threading.Event() + + def task_worker(self): + pass + + def task_run(self): + if self.task_stopping_event.is_set(): + return + + self.task_thread = threading.Thread(target=self.task_worker) + self.task_thread.start() + + def task_stop(self): + self.task_stopping_event.set() + self.task_thread.join()