Apply PR feedback

Happy-Algorithms-League · Sep 4, 2020 · 50f887c · 50f887c
1 parent daf0a65
commit 50f887c
Show file tree

Hide file tree

Showing 4 changed files with 72 additions and 127 deletions.
diff --git a/cgp/genome.py b/cgp/genome.py
@@ -138,7 +138,7 @@ def __repr__(self) -> str:
         return s
 
     def determine_permissible_values_per_gene(self, gene_idx: int) -> np.ndarray:
-        region_idx = gene_idx // self._length_per_region
+        region_idx = self._get_region_idx(gene_idx)
 
         if self._is_input_region(region_idx):
             return self._determine_permissible_values_input_region(gene_idx)
@@ -273,16 +273,10 @@ def reorder(self, rng: np.random.RandomState) -> None:
         Shuffle node ordering of internal (hidden) nodes in genome without changing node behavior.
         (Goldman 2015, DOI: 10.1109/TEVC.2014.2324539)
 
-        From a set of nodes with no dependencies (addable_nodes)
-        a (pseudo-) random node is placed at the first internal position.
-        After updating the set of addable nodes,
-        a (pseudo-) random node for the next position is picked.
-        This procedure is repeated until all nodes are placed.
-
-        Gene values are updated to correspond to the new address of the corresponding node.
-
-        Invalid gene values (from unused arities of eg. constant operator nodes)
-        are replaced by (pseudo-) random picks of permissible values for that position.
+        During reordering, inactive genes, e.g., input genes of nodes with arity zero, are not taken
+        into account and can hence have invalid values after reordering.
+        These invalid values are replaced by random values
+        for the respective gene after reordering.
 
         Parameters
         ----------
@@ -300,38 +294,38 @@ def reorder(self, rng: np.random.RandomState) -> None:
             Warning("levels_back must be equal n_columns, reorder is not applied")
             return
 
-        dna_for_reorder = self._dna.copy()
+        dna = self._dna.copy()
 
-        dependencies = self._determine_dependencies()
+        node_dependencies = self._determine_node_dependencies()
 
-        addable_nodes = set(int(idx_str) for idx_str, item in dependencies.items() if not item)
+        addable_nodes = self._get_addable_nodes(node_dependencies)
 
         node_idx = self._n_inputs  # First position to be placed is after inputs
-        used_idx_list: List = []
+        used_node_indices: List[int] = []
 
         while len(addable_nodes) > 0:
-            # pick one of the addable nodes
-            current_idx = rng.choice(list(addable_nodes))
 
-            dna_for_reorder = self._exchange_dna_segments(
-                dna_for_reorder, idx_old_location=current_idx, idx_new_location=node_idx
+            current_node_idx = rng.choice(list(addable_nodes))
+
+            dna = self._copy_dna_segment(
+                dna, idx_old_location=current_node_idx, idx_new_location=node_idx
             )
 
-            for _, current_dependencies in dependencies.items():
-                current_dependencies.discard(current_idx)
+            for dependencies in node_dependencies.values():
+                dependencies.discard(current_node_idx)
 
-            addable_nodes = self._update_addable_nodes(
-                dependencies, used_idx_list, idx_used=current_idx
+            [addable_nodes, used_node_indices] = self._update_addable_nodes(
+                node_dependencies, used_node_indices, current_idx=current_node_idx
             )
             node_idx += 1
 
-        self._update_gene_values(dna_for_reorder, used_idx_list)
-        self._replace_invalid_gene_values(dna_for_reorder, rng)
+        self._update_gene_values(dna, used_node_indices)
+        self._replace_invalid_gene_values(dna, rng)
 
-        self.dna = dna_for_reorder
+        self.dna = dna
 
-    def _exchange_dna_segments(
-        self, dna_new: List[int], idx_old_location: int, idx_new_location: int
+    def _copy_dna_segment(
+        self, dna: List[int], idx_old_location: int, idx_new_location: int
     ) -> List[int]:
         """ Moves a nodes dna from its old node location to a new location. """
 
@@ -340,55 +334,67 @@ def _exchange_dna_segments(
             * self._length_per_region : (idx_old_location + 1)
             * self._length_per_region
         ]
-        dna_new[
+        dna[
             idx_new_location
             * self._length_per_region : (idx_new_location + 1)
             * self._length_per_region
         ] = node_dna
 
-        return dna_new
+        return dna
 
     def _update_addable_nodes(
-        self, dependencies: Dict, used_idx_list: List[int], idx_used: int
-    ) -> set:
+        self, node_dependencies: Dict, used_node_indices: List[int], current_idx: int
+    ) -> [set, List[int]]:
         """ Update the set of addable nodes,
         by reevaluating which nodes haves no dependencies
         and remove nodes which were already placed from the list.
         """
-        addable_nodes = set(int(idx_str) for idx_str, item in dependencies.items() if not item)
-        used_idx_list.append(idx_used)
-        addable_nodes = addable_nodes.difference(used_idx_list)
+        addable_nodes = self._get_addable_nodes(node_dependencies)
+        used_node_indices.append(current_idx)
+        addable_nodes = addable_nodes.difference(used_node_indices)
 
-        return addable_nodes
+        return addable_nodes, used_node_indices
 
-    def _update_gene_values(self, dna_for_reorder: List[int], used_idx_list: List[int]) -> None:
+    def _update_gene_values(self, dna: List[int], used_node_indices: List[int]) -> None:
         """ Update gene values to correspond to the new position of the corresponding node"""
-        for gene_idx, gene_value in enumerate(dna_for_reorder):
-            region_idx = gene_idx // self._length_per_region
+        for gene_idx, gene_value in enumerate(dna):
+            region_idx = self._get_region_idx(gene_idx)
             if self._is_hidden_input_gene(gene_idx, region_idx) or self._is_output_input_gene(
                 gene_idx
             ):
-                try:
-                    gene_value = self._n_inputs + used_idx_list.index(gene_value)
-                except ValueError:  # means the input is from an input node -> do nothing
-                    pass
-            dna_for_reorder[gene_idx] = gene_value
+                if gene_value >= self._n_inputs:
+                    gene_value = self._n_inputs + used_node_indices.index(gene_value)
+            dna[gene_idx] = gene_value
 
     def _replace_invalid_gene_values(
         self, dna_for_reorder: List[int], rng: np.random.RandomState
     ) -> None:
         """ Replace gene values of unused arities
-        by (pseudo-) random picks of permissible values for that node
+        by (pseudo-) random picks of permissible values for that node.
+        Works only in self.n_rows==1
         """
+        if not self._n_rows == 1:
+            raise ValueError("Replacing invalid gene values only implemented for n_rows = 1")
+
         for gene_idx, gene_value in enumerate(dna_for_reorder):
-            region_idx = gene_idx // self._length_per_region
+            region_idx = self._get_region_idx(gene_idx)
             if self._is_hidden_input_gene(gene_idx, region_idx) and gene_value > region_idx:
-                # replace value that is too large by smaller value
+                # replace value that is too large by valid value
                 permissible_values = self.determine_permissible_values_per_gene(gene_idx)
                 gene_value = rng.choice(permissible_values)
                 dna_for_reorder[gene_idx] = gene_value
 
-    def _determine_dependencies(self) -> Dict:
+    def _get_addable_nodes(self, node_dependencies: Dict) -> set:
+
+        return set(
+            idx for idx, dependencies in node_dependencies.items() if len(dependencies) == 0
+        )
+
+    def _get_region_idx(self, gene_idx):
+
+        return gene_idx // self._length_per_region
+
+    def _determine_node_dependencies(self) -> Dict:
         """ Determines for every node a set of dependencies.
 
             The set of dependencies for a node is given by the indices of the nodes,
@@ -416,31 +422,28 @@ def _determine_dependencies(self) -> Dict:
 
                 operator_idx = region_idx * self._length_per_region
 
-                # only consider genes which are used in current node
                 current_arity = self._determine_operator_arity(operator_idx)
 
-                # append each input to the dependencies
                 for idx_gene in range(
                     1, current_arity + 1
                 ):  # shift by 1 since first gene is the operator gene
                     input_node_idx = self._dna[operator_idx + idx_gene]
                     if not self._is_input_region(
                         input_node_idx
                     ):  # not necessary to add input regions, since they are first anyway
-                        current_node_dependencies.add(input_node_idx)  # append the input node &
+                        current_node_dependencies.add(input_node_idx)
 
-                dependencies[str(region_idx)] = current_node_dependencies
+                dependencies[region_idx] = current_node_dependencies
 
             else:
                 assert False  # should never be reached
 
         return dependencies
 
-    def _determine_operator_arity(self, operator_idx):
+    def _determine_operator_arity(self, operator_idx: int) -> int:
 
         operator_value = self._dna[operator_idx]
-        arity = self._primitives[operator_value]._arity
-        return arity
+        return self._primitives[operator_value]._arity
 
     def _permissible_inputs(self, region_idx: int) -> List[int]:
 
@@ -630,7 +633,7 @@ def mutate(self, mutation_rate: float, rng: np.random.RandomState):
 
         for (gene_idx, allele) in zip(selected_gene_indices, np.array(dna)[selected_gene_indices]):
 
-            region_idx = gene_idx // self._length_per_region
+            region_idx = self._get_region_idx(gene_idx)
 
             permissible_values = self._permissible_values[gene_idx]
             permissible_alternative_values = permissible_values[permissible_values != allele]

diff --git a/cgp/hl_api.py b/cgp/hl_api.py
@@ -47,10 +47,9 @@ def evolve(
         not implemented. Defaults to 1.
     reorder_genome: bool, optional
         Whether genome reordering should be applied.
-        If True, reorder is applied to the parents (genome) before creating offspring.
-        Since the parents genome changes to a reordered genome at every generation,
-        this creates a neutral drift through the search space.
-        Defaults to False
+        If True, reorder is applied to the parents genomes
+        at every generation before creating offspring.
+        Reorder randomizes the genotype of an individual without changing the phenotype.
     Returns
     -------
     None
@@ -65,7 +64,6 @@ def evolve(
     # Main loop: -1 offset since the last loop iteration will still increase generation by one
     while pop.generation < max_generations - 1:
 
-        # reorder the genome of the parents before creating offspring
         if reorder_genome:
             pop.reorder_genome()
 

diff --git a/cgp/population.py b/cgp/population.py
@@ -132,6 +132,9 @@ def fitness_parents(self) -> List[Union[None, float]]:
     def reorder_genome(self) -> None:
         """ Reorders the genome for all parents in the population
 
+        Creates a neutral drift though the search space,
+        by reordering parents and not offspring.
+
         Returns
         ---------
         None

diff --git a/test/test_genome.py b/test/test_genome.py
@@ -555,66 +555,6 @@ def test_genome_reordering_empirically(rng):
 
     pytest.importorskip("sympy")
 
-    # target: f(x_0) = x_0 ** 2 - x_0 + 1
-
-    genome_params = {
-        "n_inputs": 1,
-        "n_outputs": 1,
-        "n_columns": 10,
-        "n_rows": 1,
-        "levels_back": None,
-        "primitives": (cgp.Mul, cgp.Sub, cgp.Add, cgp.ConstantFloat),
-    }
-
-    genome = cgp.Genome(**genome_params)
-
-    dna_fixed = [
-        ID_INPUT_NODE,
-        ID_NON_CODING_GENE,
-        ID_NON_CODING_GENE,
-        0,  # Mul -> outs x² (address 1)
-        0,  # x
-        0,  # x
-        1,  # Sub -> x² - x (address 2)
-        1,  # x²
-        0,  # x
-        1,  # Sub -> outs 0 (address 3)
-        0,  # x
-        0,  # x
-        3,  # const -> outs 1 (address 4)
-        1,  # address of x² (unused)
-        2,  # address of 0 (unused)
-        3,  # const -> outs 1 (address 5)
-        0,
-        0,
-        2,  # Add -> x² - x + 1 (address 6)
-        2,  # x² - x
-        4,  # 1
-        3,  # const (address 7)
-        0,
-        0,
-        3,  # const (address 8)
-        0,
-        0,
-        3,  # const (address 9)
-        0,
-        0,
-        3,  # const (address 10)
-        0,
-        0,
-        ID_OUTPUT_NODE,
-        6,
-        ID_NON_CODING_GENE,
-    ]
-
-    genome.dna = dna_fixed
-    sympy_expression = cgp.CartesianGraph(genome).to_sympy()
-    n_reorderings = 100
-    for _ in range(n_reorderings):
-        genome.reorder(rng)
-        sympy_expression_after_reorder = cgp.CartesianGraph(genome).to_sympy()
-        assert sympy_expression_after_reorder == sympy_expression
-
     # target: f(x_0,x_1) = x_0 ** 2 - x_1 + 1
 
     genome_params = {
@@ -635,23 +575,23 @@ def test_genome_reordering_empirically(rng):
         ID_INPUT_NODE,
         ID_NON_CODING_GENE,
         ID_NON_CODING_GENE,
-        0,  # Mul -> outs x² (address 2)
+        0,  # Mul -> outs x_0² (address 2)
         0,  # x
         0,  # x
-        1,  # Sub -> x² - y (address 3)
+        1,  # Sub -> x_0² - x_1 (address 3)
         2,  # x²
-        1,  # x
+        1,  # y
         1,  # Sub -> outs 0 (address 4)
         0,  # x
         0,  # x
         3,  # const -> outs 1 (address 5)
-        2,  # address of x² (unused)
+        2,  # address of x_0² (unused)
         3,  # address of 0 (unused)
         3,  # const -> outs 1 (address 6)
         0,
         0,
-        2,  # Add -> x² - x + 1 (address 7)
-        3,  # x² - x
+        2,  # Add -> x_0² - x_1 + 1 (address 7)
+        3,  # x_0² - x_1
         5,  # 1
         3,  # const (address 8)
         0,
@@ -672,6 +612,7 @@ def test_genome_reordering_empirically(rng):
 
     genome.dna = dna_fixed
     sympy_expression = cgp.CartesianGraph(genome).to_sympy()
+    n_reorderings = 100
     for _ in range(n_reorderings):
         genome.reorder(rng)
         sympy_expression_after_reorder = cgp.CartesianGraph(genome).to_sympy()