From 6e87e2d4783519d5d4dd0d9443b4c54ac126ad8e Mon Sep 17 00:00:00 2001 From: gyptazy Date: Thu, 17 Oct 2024 13:25:34 +0200 Subject: [PATCH] fix: Fix offline node evaluation & maintenance compare of different type objects - Fix node (and its objects) evaluation when not reachable (e.g., maintenance). - Fix evaluation of maintenance mode where comparing list & string resulted in a crash (by @glitchvern). - Set ProxLB version to 1.0.5b Fixes: #160 Fixes: #107 Contributed-by: @glitchvern --- ..._fix_maintenance_mode_compare_str_list.yml | 2 + .../1.0.5/107_fix_offline_node_eval.yml | 2 + proxlb | 266 +++++++++--------- 3 files changed, 139 insertions(+), 131 deletions(-) create mode 100644 .changelogs/1.0.5/106_fix_maintenance_mode_compare_str_list.yml create mode 100644 .changelogs/1.0.5/107_fix_offline_node_eval.yml diff --git a/.changelogs/1.0.5/106_fix_maintenance_mode_compare_str_list.yml b/.changelogs/1.0.5/106_fix_maintenance_mode_compare_str_list.yml new file mode 100644 index 0000000..3b9a68c --- /dev/null +++ b/.changelogs/1.0.5/106_fix_maintenance_mode_compare_str_list.yml @@ -0,0 +1,2 @@ +fixed: + - Fix evaluation of maintenance mode where comparing list & string resulted in a crash (by @glitchvern). [#106] diff --git a/.changelogs/1.0.5/107_fix_offline_node_eval.yml b/.changelogs/1.0.5/107_fix_offline_node_eval.yml new file mode 100644 index 0000000..9d2037e --- /dev/null +++ b/.changelogs/1.0.5/107_fix_offline_node_eval.yml @@ -0,0 +1,2 @@ +fixed: + - Fix node (and its objects) evaluation when not reachable (e.g., maintenance). [#107] diff --git a/proxlb b/proxlb index 8290698..0a7e24d 100755 --- a/proxlb +++ b/proxlb @@ -42,7 +42,7 @@ import urllib3 # Constants __appname__ = "ProxLB" -__version__ = "1.0.4" +__version__ = "1.0.5b" __config_version__ = 3 __author__ = "Florian Paul Azim Hoberg @gyptazy" __errors__ = False @@ -543,133 +543,136 @@ def get_vm_statistics(api_object, ignore_vms, balancing_type): for node in api_object.nodes.get(): - # Add all virtual machines if type is vm or all. - if balancing_type == 'vm' or balancing_type == 'all': - for vm in api_object.nodes(node['node']).qemu.get(): - - # Get the VM tags from API. - vm_tags = __get_vm_tags(api_object, node, vm['vmid'], 'vm') - if vm_tags is not None: - group_include, group_exclude, vm_ignore = __get_proxlb_groups(vm_tags) - - # Get wildcard match for VMs to ignore if a wildcard pattern was - # previously found. Wildcards may slow down the task when using - # many patterns in the ignore list. Therefore, run this only if - # a wildcard pattern was found. We also do not need to validate - # this if the VM is already being ignored by a defined tag. - if vm_ignore_wildcard and not vm_ignore: - vm_ignore = __check_vm_name_wildcard_pattern(vm['name'], ignore_vms_list) - - if vm['status'] == 'running' and vm['name'] not in ignore_vms_list and not vm_ignore: - vm_statistics[vm['name']] = {} - vm_statistics[vm['name']]['group_include'] = group_include - vm_statistics[vm['name']]['group_exclude'] = group_exclude - vm_statistics[vm['name']]['cpu_total'] = vm['cpus'] - vm_statistics[vm['name']]['cpu_used'] = vm['cpu'] - vm_statistics[vm['name']]['memory_total'] = vm['maxmem'] - vm_statistics[vm['name']]['memory_used'] = vm['mem'] - vm_statistics[vm['name']]['disk_total'] = vm['maxdisk'] - vm_statistics[vm['name']]['disk_used'] = vm['disk'] - vm_statistics[vm['name']]['vmid'] = vm['vmid'] - vm_statistics[vm['name']]['node_parent'] = node['node'] - vm_statistics[vm['name']]['node_rebalance'] = node['node'] - vm_statistics[vm['name']]['storage'] = {} - vm_statistics[vm['name']]['type'] = 'vm' - - # Get disk details of the related object. - _vm_details = api_object.nodes(node['node']).qemu(vm['vmid']).config.get() - logging.info(f'{info_prefix} Getting disk information for vm {vm["name"]}.') - - for vm_detail_key, vm_detail_value in _vm_details.items(): - # vm_detail_key_validator = re.sub('\d+$', '', vm_detail_key) - vm_detail_key_validator = re.sub(r'\d+$', '', vm_detail_key) - - if vm_detail_key_validator in _vm_details_storage_allowed: - vm_statistics[vm['name']]['storage'][vm_detail_key] = {} - match = re.match(r'([^:]+):[^/]+/(.+),iothread=\d+,size=(\d+G)', _vm_details[vm_detail_key]) - - # Create an efficient match group and split the strings to assign them to the storage information. - if match: - _volume = match.group(1) - _disk_name = match.group(2) - _disk_size = match.group(3) - - vm_statistics[vm['name']]['storage'][vm_detail_key]['name'] = _disk_name - vm_statistics[vm['name']]['storage'][vm_detail_key]['device_name'] = vm_detail_key - vm_statistics[vm['name']]['storage'][vm_detail_key]['volume'] = _volume - vm_statistics[vm['name']]['storage'][vm_detail_key]['storage_parent'] = _volume - vm_statistics[vm['name']]['storage'][vm_detail_key]['storage_rebalance'] = _volume - vm_statistics[vm['name']]['storage'][vm_detail_key]['size'] = _disk_size[:-1] - logging.info(f'{info_prefix} Added disk for {vm["name"]}: Name {_disk_name} on volume {_volume} with size {_disk_size}.') - else: - logging.info(f'{info_prefix} No (or unsupported) disk(s) for {vm["name"]} found.') - - logging.info(f'{info_prefix} Added vm {vm["name"]}.') - - # Add all containers if type is ct or all. - if balancing_type == 'ct' or balancing_type == 'all': - for vm in api_object.nodes(node['node']).lxc.get(): - - logging.warning(f'{warn_prefix} Rebalancing on LXC containers (CT) always requires them to shut down.') - logging.warning(f'{warn_prefix} {vm["name"]} is from type CT and cannot be live migrated!') - # Get the VM tags from API. - vm_tags = __get_vm_tags(api_object, node, vm['vmid'], 'ct') - if vm_tags is not None: - group_include, group_exclude, vm_ignore = __get_proxlb_groups(vm_tags) - - # Get wildcard match for VMs to ignore if a wildcard pattern was - # previously found. Wildcards may slow down the task when using - # many patterns in the ignore list. Therefore, run this only if - # a wildcard pattern was found. We also do not need to validate - # this if the VM is already being ignored by a defined tag. - if vm_ignore_wildcard and not vm_ignore: - vm_ignore = __check_vm_name_wildcard_pattern(vm['name'], ignore_vms_list) - - if vm['status'] == 'running' and vm['name'] not in ignore_vms_list and not vm_ignore: - vm_statistics[vm['name']] = {} - vm_statistics[vm['name']]['group_include'] = group_include - vm_statistics[vm['name']]['group_exclude'] = group_exclude - vm_statistics[vm['name']]['cpu_total'] = vm['cpus'] - vm_statistics[vm['name']]['cpu_used'] = vm['cpu'] - vm_statistics[vm['name']]['memory_total'] = vm['maxmem'] - vm_statistics[vm['name']]['memory_used'] = vm['mem'] - vm_statistics[vm['name']]['disk_total'] = vm['maxdisk'] - vm_statistics[vm['name']]['disk_used'] = vm['disk'] - vm_statistics[vm['name']]['vmid'] = vm['vmid'] - vm_statistics[vm['name']]['node_parent'] = node['node'] - vm_statistics[vm['name']]['node_rebalance'] = node['node'] - vm_statistics[vm['name']]['storage'] = {} - vm_statistics[vm['name']]['type'] = 'ct' - - # Get disk details of the related object. - _vm_details = api_object.nodes(node['node']).lxc(vm['vmid']).config.get() - logging.info(f'{info_prefix} Getting disk information for vm {vm["name"]}.') - - for vm_detail_key, vm_detail_value in _vm_details.items(): - # vm_detail_key_validator = re.sub('\d+$', '', vm_detail_key) - vm_detail_key_validator = re.sub(r'\d+$', '', vm_detail_key) - - if vm_detail_key_validator in _vm_details_storage_allowed: - vm_statistics[vm['name']]['storage'][vm_detail_key] = {} - match = re.match(r'(?P[^:]+):(?P[^,]+),size=(?P\S+)', _vm_details[vm_detail_key]) - - # Create an efficient match group and split the strings to assign them to the storage information. - if match: - _volume = match.group(1) - _disk_name = match.group(2) - _disk_size = match.group(3) - - vm_statistics[vm['name']]['storage'][vm_detail_key]['name'] = _disk_name - vm_statistics[vm['name']]['storage'][vm_detail_key]['device_name'] = vm_detail_key - vm_statistics[vm['name']]['storage'][vm_detail_key]['volume'] = _volume - vm_statistics[vm['name']]['storage'][vm_detail_key]['storage_parent'] = _volume - vm_statistics[vm['name']]['storage'][vm_detail_key]['storage_rebalance'] = _volume - vm_statistics[vm['name']]['storage'][vm_detail_key]['size'] = _disk_size[:-1] - logging.info(f'{info_prefix} Added disk for {vm["name"]}: Name {_disk_name} on volume {_volume} with size {_disk_size}.') - else: - logging.info(f'{info_prefix} No disks for {vm["name"]} found.') - - logging.info(f'{info_prefix} Added vm {vm["name"]}.') + # Get VM/CT objects only when the node is online and reachable. + if node['status'] == 'online': + + # Add all virtual machines if type is vm or all. + if balancing_type == 'vm' or balancing_type == 'all': + for vm in api_object.nodes(node['node']).qemu.get(): + + # Get the VM tags from API. + vm_tags = __get_vm_tags(api_object, node, vm['vmid'], 'vm') + if vm_tags is not None: + group_include, group_exclude, vm_ignore = __get_proxlb_groups(vm_tags) + + # Get wildcard match for VMs to ignore if a wildcard pattern was + # previously found. Wildcards may slow down the task when using + # many patterns in the ignore list. Therefore, run this only if + # a wildcard pattern was found. We also do not need to validate + # this if the VM is already being ignored by a defined tag. + if vm_ignore_wildcard and not vm_ignore: + vm_ignore = __check_vm_name_wildcard_pattern(vm['name'], ignore_vms_list) + + if vm['status'] == 'running' and vm['name'] not in ignore_vms_list and not vm_ignore: + vm_statistics[vm['name']] = {} + vm_statistics[vm['name']]['group_include'] = group_include + vm_statistics[vm['name']]['group_exclude'] = group_exclude + vm_statistics[vm['name']]['cpu_total'] = vm['cpus'] + vm_statistics[vm['name']]['cpu_used'] = vm['cpu'] + vm_statistics[vm['name']]['memory_total'] = vm['maxmem'] + vm_statistics[vm['name']]['memory_used'] = vm['mem'] + vm_statistics[vm['name']]['disk_total'] = vm['maxdisk'] + vm_statistics[vm['name']]['disk_used'] = vm['disk'] + vm_statistics[vm['name']]['vmid'] = vm['vmid'] + vm_statistics[vm['name']]['node_parent'] = node['node'] + vm_statistics[vm['name']]['node_rebalance'] = node['node'] + vm_statistics[vm['name']]['storage'] = {} + vm_statistics[vm['name']]['type'] = 'vm' + + # Get disk details of the related object. + _vm_details = api_object.nodes(node['node']).qemu(vm['vmid']).config.get() + logging.info(f'{info_prefix} Getting disk information for vm {vm["name"]}.') + + for vm_detail_key, vm_detail_value in _vm_details.items(): + # vm_detail_key_validator = re.sub('\d+$', '', vm_detail_key) + vm_detail_key_validator = re.sub(r'\d+$', '', vm_detail_key) + + if vm_detail_key_validator in _vm_details_storage_allowed: + vm_statistics[vm['name']]['storage'][vm_detail_key] = {} + match = re.match(r'([^:]+):[^/]+/(.+),iothread=\d+,size=(\d+G)', _vm_details[vm_detail_key]) + + # Create an efficient match group and split the strings to assign them to the storage information. + if match: + _volume = match.group(1) + _disk_name = match.group(2) + _disk_size = match.group(3) + + vm_statistics[vm['name']]['storage'][vm_detail_key]['name'] = _disk_name + vm_statistics[vm['name']]['storage'][vm_detail_key]['device_name'] = vm_detail_key + vm_statistics[vm['name']]['storage'][vm_detail_key]['volume'] = _volume + vm_statistics[vm['name']]['storage'][vm_detail_key]['storage_parent'] = _volume + vm_statistics[vm['name']]['storage'][vm_detail_key]['storage_rebalance'] = _volume + vm_statistics[vm['name']]['storage'][vm_detail_key]['size'] = _disk_size[:-1] + logging.info(f'{info_prefix} Added disk for {vm["name"]}: Name {_disk_name} on volume {_volume} with size {_disk_size}.') + else: + logging.info(f'{info_prefix} No (or unsupported) disk(s) for {vm["name"]} found.') + + logging.info(f'{info_prefix} Added vm {vm["name"]}.') + + # Add all containers if type is ct or all. + if balancing_type == 'ct' or balancing_type == 'all': + for vm in api_object.nodes(node['node']).lxc.get(): + + logging.warning(f'{warn_prefix} Rebalancing on LXC containers (CT) always requires them to shut down.') + logging.warning(f'{warn_prefix} {vm["name"]} is from type CT and cannot be live migrated!') + # Get the VM tags from API. + vm_tags = __get_vm_tags(api_object, node, vm['vmid'], 'ct') + if vm_tags is not None: + group_include, group_exclude, vm_ignore = __get_proxlb_groups(vm_tags) + + # Get wildcard match for VMs to ignore if a wildcard pattern was + # previously found. Wildcards may slow down the task when using + # many patterns in the ignore list. Therefore, run this only if + # a wildcard pattern was found. We also do not need to validate + # this if the VM is already being ignored by a defined tag. + if vm_ignore_wildcard and not vm_ignore: + vm_ignore = __check_vm_name_wildcard_pattern(vm['name'], ignore_vms_list) + + if vm['status'] == 'running' and vm['name'] not in ignore_vms_list and not vm_ignore: + vm_statistics[vm['name']] = {} + vm_statistics[vm['name']]['group_include'] = group_include + vm_statistics[vm['name']]['group_exclude'] = group_exclude + vm_statistics[vm['name']]['cpu_total'] = vm['cpus'] + vm_statistics[vm['name']]['cpu_used'] = vm['cpu'] + vm_statistics[vm['name']]['memory_total'] = vm['maxmem'] + vm_statistics[vm['name']]['memory_used'] = vm['mem'] + vm_statistics[vm['name']]['disk_total'] = vm['maxdisk'] + vm_statistics[vm['name']]['disk_used'] = vm['disk'] + vm_statistics[vm['name']]['vmid'] = vm['vmid'] + vm_statistics[vm['name']]['node_parent'] = node['node'] + vm_statistics[vm['name']]['node_rebalance'] = node['node'] + vm_statistics[vm['name']]['storage'] = {} + vm_statistics[vm['name']]['type'] = 'ct' + + # Get disk details of the related object. + _vm_details = api_object.nodes(node['node']).lxc(vm['vmid']).config.get() + logging.info(f'{info_prefix} Getting disk information for vm {vm["name"]}.') + + for vm_detail_key, vm_detail_value in _vm_details.items(): + # vm_detail_key_validator = re.sub('\d+$', '', vm_detail_key) + vm_detail_key_validator = re.sub(r'\d+$', '', vm_detail_key) + + if vm_detail_key_validator in _vm_details_storage_allowed: + vm_statistics[vm['name']]['storage'][vm_detail_key] = {} + match = re.match(r'(?P[^:]+):(?P[^,]+),size=(?P\S+)', _vm_details[vm_detail_key]) + + # Create an efficient match group and split the strings to assign them to the storage information. + if match: + _volume = match.group(1) + _disk_name = match.group(2) + _disk_size = match.group(3) + + vm_statistics[vm['name']]['storage'][vm_detail_key]['name'] = _disk_name + vm_statistics[vm['name']]['storage'][vm_detail_key]['device_name'] = vm_detail_key + vm_statistics[vm['name']]['storage'][vm_detail_key]['volume'] = _volume + vm_statistics[vm['name']]['storage'][vm_detail_key]['storage_parent'] = _volume + vm_statistics[vm['name']]['storage'][vm_detail_key]['storage_rebalance'] = _volume + vm_statistics[vm['name']]['storage'][vm_detail_key]['size'] = _disk_size[:-1] + logging.info(f'{info_prefix} Added disk for {vm["name"]}: Name {_disk_name} on volume {_volume} with size {_disk_size}.') + else: + logging.info(f'{info_prefix} No disks for {vm["name"]} found.') + + logging.info(f'{info_prefix} Added vm {vm["name"]}.') logging.info(f'{info_prefix} Created VM statistics.') return vm_statistics @@ -882,7 +885,7 @@ def balancing_vm_maintenance(proxlb_config, app_args, node_statistics, vm_statis return node_statistics, vm_statistics for node_name in maintenance_nodes_list: - node_vms = sorted(vm_statistics.items(), key=lambda item: item[0] if item[1]['node_parent'] == node_name else []) + node_vms = list(filter(lambda item: item[0] if item[1]['node_parent'] == node_name else [], vm_statistics.items())) # Update resource statistics for VMs and nodes. for vm in node_vms: resources_node_most_free = __get_most_free_resources_node(balancing_method, balancing_mode, balancing_mode_option, node_statistics) @@ -954,8 +957,9 @@ def __validate_balanciness(balanciness, balancing_method, balancing_mode, node_s return False # Add node information to resource list. - node_resource_percent_list.append(int(node_info[f'{balancing_method}_{node_resource_selector}_percent'])) - logging.debug(f'{info_prefix} Node: {node_name} with values: {node_info}') + if not node_statistics[node_name]['maintenance']: + node_resource_percent_list.append(int(node_info[f'{balancing_method}_{node_resource_selector}_percent'])) + logging.debug(f'{info_prefix} Node: {node_name} with values: {node_info}') # Create a sorted list of the delta + balanciness between the node resources. node_resource_percent_list_sorted = sorted(node_resource_percent_list)