diff --git a/documentation/Troubleshooting.md b/documentation/Troubleshooting.md index b7241f168..561e8870e 100644 --- a/documentation/Troubleshooting.md +++ b/documentation/Troubleshooting.md @@ -4,7 +4,7 @@ This section provides troubleshooting information for Kubemarine and Kubernetes - [KME0001: Unexpected exception](#kme0001-unexpected-exception) - [KME0002: Remote group exception](#kme0002-remote-group-exception) - [Command did not complete within a number of seconds](#command-did-not-complete-within-a-number-of-seconds) - - [KME0004: There are no workers defined in the cluster scheme](#kme0004-there-are-no-workers-defined-in-the-cluster-scheme) + - [KME0004: There are no control planes defined in the cluster scheme](#kme0004-there-are-no-control-planes-defined-in-the-cluster-scheme) - [KME0005: {hostnames} are not sudoers](#kme0005-hostnames-are-not-sudoers) - [Troubleshooting Tools](#troubleshooting-tools) - [etcdctl Script](#etcdctl-script) @@ -168,18 +168,18 @@ frozen stage of the procedure. It will be useful to check the cluster with [IAAS checker](Kubecheck.md#iaas-procedure) to detect problems with network connectivity. -## KME0004: There are no workers defined in the cluster scheme +## KME0004: There are no control planes defined in the cluster scheme ``` FAILURE! -KME0004: There are no workers defined in the cluster scheme +KME0004: There are no control planes defined in the cluster scheme ``` -An error related with the absence of any worker role in the inventory file. The error occurs before +An error related with the absence of any control plane role in the inventory file. The error occurs before the payload is executed on the cluster. -To fix it, you need to either specify new nodes with the `worker` role, or add the `worker` role to -the existing control-planes nodes. +To fix it, you need to either specify new nodes with the `control-plane` role, or add the `control-plane` role to +the existing worker nodes. An example of specifying different nodes with separate `control-plane` and `worker` roles is as follows. diff --git a/kubemarine/core/errors.py b/kubemarine/core/errors.py index 18fe71eff..a4a67708c 100644 --- a/kubemarine/core/errors.py +++ b/kubemarine/core/errors.py @@ -33,7 +33,7 @@ def get_kme_dictionary() -> dict: "name": "Remote group exception\n{reason}" }, "KME0004": { - "name": "There are no workers defined in the cluster scheme" + "name": "There are no control planes defined in the cluster scheme" }, "KME0005": { "name": "{hostnames} are not sudoers" diff --git a/kubemarine/core/resources.py b/kubemarine/core/resources.py index 8dcc06e56..7fd82c7be 100644 --- a/kubemarine/core/resources.py +++ b/kubemarine/core/resources.py @@ -431,6 +431,7 @@ def enrichment_functions(self) -> List[c.EnrichmentFunction]: kubemarine.core.defaults.apply_connection_defaults, kubemarine.core.defaults.calculate_nodegroups, kubemarine.core.defaults.remove_service_roles, + kubemarine.kubernetes.verify_roles, # Enrichment of inventory for LIGHT stage should be finished at this step. # Should be just after compilation, but currently not necessary for LIGHT stage. diff --git a/kubemarine/kubernetes/__init__.py b/kubemarine/kubernetes/__init__.py index fb7a363e0..4c478b5ac 100644 --- a/kubemarine/kubernetes/__init__.py +++ b/kubemarine/kubernetes/__init__.py @@ -106,6 +106,12 @@ def enrich_reconfigure_inventory(cluster: KubernetesCluster) -> None: default_merger.merge(cluster.inventory.setdefault('services', {}), utils.deepcopy_yaml(kubeadm_sections)) +@enrichment(EnrichmentStage.ALL) +def verify_roles(cluster: KubernetesCluster) -> None: + if cluster.make_group_from_roles(['control-plane']).is_empty(): + raise KME("KME0004") + + @enrichment(EnrichmentStage.FULL) def enrich_inventory(cluster: KubernetesCluster) -> None: inventory = cluster.inventory @@ -136,8 +142,6 @@ def enrich_inventory(cluster: KubernetesCluster) -> None: if name not in certsans: certsans.append(name) - any_worker_found = False - # validating node labels and configuring additional labels for node in inventory["nodes"]: if "control-plane" not in node["roles"] and "worker" not in node["roles"]: @@ -150,8 +154,6 @@ def enrich_inventory(cluster: KubernetesCluster) -> None: continue if "worker" in node["roles"]: - any_worker_found = True - if "labels" not in node: node["labels"] = {} node["labels"]["node-role.kubernetes.io/worker"] = "worker" @@ -175,9 +177,6 @@ def enrich_inventory(cluster: KubernetesCluster) -> None: if control_plane_item != 'kubelet' and ('control-plane' not in node['roles']): raise Exception(ERROR_CONTROL_PLANE_PATCH_NOT_CONTROL_PLANE_NODE % control_plane_item) - if not any_worker_found: - raise KME("KME0004") - # check ignorePreflightErrors value and add mandatory errors from defaults.yaml if they're absent default_preflight_errors = static.DEFAULTS["services"]["kubeadm_flags"]["ignorePreflightErrors"].split(",") preflight_errors = inventory["services"]["kubeadm_flags"]["ignorePreflightErrors"].split(",") @@ -851,7 +850,7 @@ def verify_upgrade_versions(cluster: KubernetesCluster) -> None: first_control_plane = cluster.nodes['control-plane'].get_first_member() upgrade_version = get_kubernetes_version(cluster.inventory) - k8s_nodes_group = cluster.nodes["worker"].include_group(cluster.nodes['control-plane']) + k8s_nodes_group = cluster.make_group_from_roles(['control-plane', 'worker']) for node in k8s_nodes_group.get_ordered_members_list(): cluster.log.debug(f"Verifying current k8s version for node {node.get_node_name()}") result = first_control_plane.sudo("kubectl get nodes " diff --git a/test/unit/test_defaults.py b/test/unit/test_defaults.py index 3979f65cb..7e31d663c 100755 --- a/test/unit/test_defaults.py +++ b/test/unit/test_defaults.py @@ -242,6 +242,22 @@ def test_controlplain_skip_vrrp_ips_assigned_to_removed_balancer(self): self.assertEqual(inventory['control_plain']['internal'], inventory['vrrp_ips'][1]['ip']) self.assertEqual(inventory['control_plain']['external'], inventory['vrrp_ips'][1]['floating_ip']) + def test_single_control_plane(self): + inventory = demo.generate_inventory(master=['node-1'], worker=['node-1'], balancer=0) + inventory['nodes'][0]['roles'].remove('worker') + cluster = demo.new_cluster(inventory) + self.assertTrue(cluster.make_group_from_roles(['worker']).is_empty()) + + def test_error_no_control_planes_balancers(self): + inventory = demo.generate_inventory(master=0, worker=1, balancer=0) + with test_utils.assert_raises_kme(self, 'KME0004'): + demo.new_cluster(inventory) + + def test_error_no_control_planes(self): + inventory = demo.generate_inventory(master=0, worker=1, balancer=1) + with test_utils.assert_raises_kme(self, 'KME0004'): + demo.new_cluster(inventory) + class PrimitiveValuesAsString(unittest.TestCase): def test_default_enrichment(self): diff --git a/test/unit/test_inventory.py b/test/unit/test_inventory.py index 4d7892e6e..63cc70ab6 100755 --- a/test/unit/test_inventory.py +++ b/test/unit/test_inventory.py @@ -25,16 +25,20 @@ class TestInventoryValidation(unittest.TestCase): def test_labels_check(self): - inventory = demo.generate_inventory(master=0, balancer=1, worker=0) - inventory["nodes"][0]["labels"] = {"should": "fail"} + inventory = demo.generate_inventory(master=1, balancer=1, worker=0) + for node in inventory['nodes']: + if 'balancer' in node['roles']: + node["labels"] = {"should": "fail"} with self.assertRaises(Exception) as context: demo.new_cluster(inventory) self.assertIn("Only 'worker' or 'control-plane' nodes can have labels", str(context.exception)) def test_taints_check(self): - inventory = demo.generate_inventory(master=0, balancer=1, worker=0) - inventory["nodes"][0]["taints"] = ["should fail"] + inventory = demo.generate_inventory(master=1, balancer=1, worker=0) + for node in inventory['nodes']: + if 'balancer' in node['roles']: + node["taints"] = ["should fail"] with self.assertRaises(Exception) as context: demo.new_cluster(inventory)