diff --git a/flare/env.py b/flare/env.py index 350f46bd0..58f06b02a 100644 --- a/flare/env.py +++ b/flare/env.py @@ -2,7 +2,7 @@ environment of an atom. :class:`AtomicEnvironment` objects are inputs to the 2-, 3-, and 2+3-body kernels.""" import numpy as np -from math import sqrt +from math import sqrt, ceil from numba import njit from flare.struc import Structure @@ -21,12 +21,16 @@ class AtomicEnvironment: :type cutoffs: np.ndarray """ - def __init__(self, structure: Structure, atom: int, cutoffs, sweep=1): + def __init__(self, structure: Structure, atom: int, cutoffs): self.structure = structure self.positions = structure.wrapped_positions self.cell = structure.cell self.species = structure.coded_species - self.sweep_array = np.arange(-sweep, sweep+1, 1) + + # Set the sweep array based on the max cutoff. + sweep_val = ceil(np.max(cutoffs) / structure.max_cutoff) + self.sweep_val = sweep_val + self.sweep_array = np.arange(-sweep_val, sweep_val + 1, 1) self.atom = atom self.ctype = structure.coded_species[atom] @@ -107,7 +111,7 @@ def from_dict(dictionary): cutoffs = dictionary['cutoffs'] else: cutoffs = [] - for cutoff in ['cutoff_2','cutoff_3','cutoff_mb']: + for cutoff in ['cutoff_2', 'cutoff_3', 'cutoff_mb']: if dictionary.get(cutoff): cutoffs.append(dictionary[cutoff]) @@ -212,10 +216,12 @@ class to allow for njit acceleration with Numba. @njit -def get_2_body_arrays_ind(positions, atom: int, cell, cutoff_2: float, species): - """Returns distances, coordinates, species of atoms, and indexes of neighbors - in the 2-body local environment. This method is implemented outside - the AtomicEnvironment class to allow for njit acceleration with Numba. +def get_2_body_arrays_ind(positions, atom: int, cell, cutoff_2: float, + species: np.ndarray): + """Returns distances, coordinates, species of atoms, and indexes of + neighbors in the 2-body local environment. This method is implemented + outside the AtomicEnvironment class to allow for njit acceleration + with Numba. :param positions: Positions of atoms in the structure. :type positions: np.ndarray @@ -377,10 +383,11 @@ def get_3_body_arrays(bond_array_2, bond_positions_2, cutoff_3: float): @njit def get_m_body_arrays(positions, atom: int, cell, cutoff_mb: float, species, sweep: np.ndarray): - """Returns distances, and species of atoms in the many-body - local environment, and returns distances and numbers of neighbours for atoms in the one - many-body local environment. This method is implemented outside the AtomicEnvironment - class to allow for njit acceleration with Numba. + """Returns distances, and species of atoms in the many-body local + environment, and returns distances and numbers of neighbours for atoms + in the one many-body local environment. This method is implemented + outside the AtomicEnvironment class to allow for njit acceleration + with Numba. :param positions: Positions of atoms in the structure. :type positions: np.ndarray @@ -393,7 +400,8 @@ class to allow for njit acceleration with Numba. :type cutoff_mb: float :param species: Numpy array of species represented by their atomic numbers. :type species: np.ndarray - :param indexes: Boolean indicating whether indexes of neighbours are returned + :param indexes: Boolean indicating whether indexes of neighbours are + returned :type indexes: boolean :return: Tuple of arrays describing pairs of atoms in the 2-body local environment. @@ -412,11 +420,13 @@ class to allow for njit acceleration with Numba. num_neighs_mb: number of neighbours of each atom in the local environment - etypes_mb_array: species of neighbours of each atom in the local environment + etypes_mb_array: species of neighbours of each atom in the local + environment :rtype: np.ndarray, np.ndarray, np.ndarray, np.ndarray """ - # TODO: this can be probably improved using stored arrays, redundant calls to get_2_body_arrays + # TODO: this can be probably improved using stored arrays, redundant calls + # to get_2_body_arrays # Get distances, positions, species and indexes of neighbouring atoms bond_array_mb, __, etypes, bond_inds = get_2_body_arrays_ind( positions, atom, cell, cutoff_mb, species) @@ -443,7 +453,8 @@ class to allow for njit acceleration with Numba. neigh_dists_mb[i, :num_neighs_mb[i]] = neighbouring_dists[i] etypes_mb_array[i, :num_neighs_mb[i]] = neighbouring_etypes[i] - return bond_array_mb, neigh_dists_mb, num_neighs_mb, etypes_mb_array, etypes + return bond_array_mb, neigh_dists_mb, num_neighs_mb, etypes_mb_array, \ + etypes if __name__ == '__main__': diff --git a/flare/gp.py b/flare/gp.py index 79c9d6c63..18a13a8c1 100644 --- a/flare/gp.py +++ b/flare/gp.py @@ -344,8 +344,7 @@ def check_instantiation(self): self.hyps_mask = None def update_db(self, struc: Structure, forces: List, - custom_range: List[int] = (), energy: float = None, - sweep: int = 1): + custom_range: List[int] = (), energy: float = None): """Given a structure and forces, add local environments from the structure to the training set of the GP. If energy is given, add the entire structure to the training set. @@ -370,7 +369,7 @@ def update_db(self, struc: Structure, forces: List, if forces is not None: for atom in update_indices: env_curr = \ - AtomicEnvironment(struc, atom, self.cutoffs, sweep=sweep) + AtomicEnvironment(struc, atom, self.cutoffs) forces_curr = np.array(forces[atom]) self.training_data.append(env_curr) @@ -386,7 +385,7 @@ def update_db(self, struc: Structure, forces: List, structure_list = [] # Populate with all environments of the struc for atom in range(noa): env_curr = \ - AtomicEnvironment(struc, atom, self.cutoffs, sweep=sweep) + AtomicEnvironment(struc, atom, self.cutoffs) structure_list.append(env_curr) self.energy_labels.append(energy) diff --git a/flare/struc.py b/flare/struc.py index dc9d235a3..2c27a189c 100644 --- a/flare/struc.py +++ b/flare/struc.py @@ -8,7 +8,8 @@ used to train ML models. """ import numpy as np -from flare.util import element_to_Z, Z_to_element, NumpyEncoder +from flare.util import element_to_Z, Z_to_element, NumpyEncoder, \ + get_max_cutoff from json import dumps, loads from typing import List, Union, Any @@ -60,6 +61,10 @@ def __init__(self, cell: 'ndarray', species: Union[List[str], List[int]], self.vec2 = self.cell[1, :] self.vec3 = self.cell[2, :] + # Compute the max cutoff compatible with a 3x3x3 supercell of the + # structure. + self.max_cutoff = get_max_cutoff(self.cell) + # get cell matrices for wrapping coordinates self.cell_transpose = self.cell.transpose() self.cell_transpose_inverse = np.linalg.inv(self.cell_transpose) @@ -129,7 +134,7 @@ def get_cell_dot(self): @staticmethod def raw_to_relative(positions: 'ndarray', cell_transpose: 'ndarray', - cell_dot_inverse: 'ndarray')-> 'ndarray': + cell_dot_inverse: 'ndarray') -> 'ndarray': """Convert Cartesian coordinates to relative (fractional) coordinates, expressed in terms of the cell vectors set in self.cell. @@ -153,7 +158,7 @@ def raw_to_relative(positions: 'ndarray', cell_transpose: 'ndarray', @staticmethod def relative_to_raw(relative_positions: 'ndarray', cell_transpose_inverse: 'ndarray', - cell_dot: 'ndarray')-> 'ndarray': + cell_dot: 'ndarray') -> 'ndarray': """Convert fractional coordinates to raw (Cartesian) coordinates. :param relative_positions: fractional coordinates. @@ -169,16 +174,16 @@ def relative_to_raw(relative_positions: 'ndarray', return np.matmul(np.matmul(relative_positions, cell_dot), cell_transpose_inverse) - def wrap_positions(self, in_place: bool = True)-> 'ndarray': + def wrap_positions(self, in_place: bool = True) -> 'ndarray': """ Convenience function which folds atoms outside of the unit cell back into the unit cell. in_place flag controls if the wrapped positions are set in the class. - :param in_place: If true, set the current structure - positions to be the wrapped positions. + :param in_place: If true, set the current structure positions to be + the wrapped positions. :return: Cartesian coordinates of positions all in unit cell - :rtype: np.ndarray + :rtype: np.ndarray """ rel_pos = \ self.raw_to_relative(self.positions, self.cell_transpose, @@ -262,7 +267,7 @@ def as_str(self) -> str: return dumps(self.as_dict(), cls=NumpyEncoder) @staticmethod - def from_dict(dictionary: dict)-> 'flare.struc.Structure': + def from_dict(dictionary: dict) -> 'flare.struc.Structure': """ Assembles a Structure object from a dictionary parameterizing one. @@ -283,7 +288,7 @@ def from_dict(dictionary: dict)-> 'flare.struc.Structure': return struc @staticmethod - def from_ase_atoms(atoms: 'ase.Atoms')-> 'flare.struc.Structure': + def from_ase_atoms(atoms: 'ase.Atoms') -> 'flare.struc.Structure': """ From an ASE Atoms object, return a FLARE structure @@ -296,12 +301,10 @@ def from_ase_atoms(atoms: 'ase.Atoms')-> 'flare.struc.Structure': species=atoms.get_chemical_symbols()) return struc - def to_ase_atoms(self)-> 'ase.Atoms': + def to_ase_atoms(self) -> 'ase.Atoms': from ase import Atoms - return Atoms(self.species_labels, - positions=self.positions, - cell=self.cell, - pbc=True) + return Atoms(self.species_labels, positions=self.positions, + cell=self.cell, pbc=True) def to_pmg_structure(self): """ @@ -328,7 +331,7 @@ def to_pmg_structure(self): ) @staticmethod - def from_pmg_structure(structure: 'pymatgen Structure')-> \ + def from_pmg_structure(structure: 'pymatgen Structure') -> \ 'flare Structure': """ Returns Pymatgen structure as FLARE structure. @@ -357,11 +360,9 @@ def from_pmg_structure(structure: 'pymatgen Structure')-> \ return new_struc - def to_xyz(self, extended_xyz: bool = True, - print_stds: bool = False, - print_forces : bool = False, - print_max_stds: bool = False, - write_file: str = '')->str: + def to_xyz(self, extended_xyz: bool = True, print_stds: bool = False, + print_forces: bool = False, print_max_stds: bool = False, + write_file: str = '') -> str: """ Convenience function which turns a structure into an extended .xyz file; useful for further input into visualization programs like VESTA @@ -422,19 +423,19 @@ def to_xyz(self, extended_xyz: bool = True, @staticmethod def from_file(file_name, format='') -> Union['flare.struc.Structure', - List['flare.struc.Structure']]: + List['flare.struc.Structure'] + ]: """ Load a FLARE structure from a file or a series of FLARE structures :param file_name: :param format: :return: """ - - try: - with open(file_name, 'r') as _: - pass - except: - raise FileNotFoundError + + # Ensure the file specified exists. + with open(file_name, 'r') as _: + pass + if 'xyz' in file_name or 'xyz' in format.lower(): raise NotImplementedError @@ -456,16 +457,16 @@ def from_file(file_name, format='') -> Union['flare.struc.Structure', else: return structures - is_poscar = 'POSCAR' in file_name or 'CONTCAR' in file_name or 'vasp' in format.lower() + is_poscar = 'POSCAR' in file_name or 'CONTCAR' in file_name \ + or 'vasp' in format.lower() if is_poscar and _pmg_present: pmg_structure = pmgvaspio.Poscar.from_file(file_name).structure return Structure.from_pmg_structure(pmg_structure) elif is_poscar and not _pmg_present: - raise ImportError("Pymatgen not imported; " \ + raise ImportError("Pymatgen not imported; " "functionality requires pymatgen.") - def get_unique_species(species: List[Any]) -> (List, List[int]): """ Returns a list of the unique species passed in, and a list of @@ -485,5 +486,3 @@ def get_unique_species(species: List[Any]) -> (List, List[int]): coded_species = np.array(coded_species) return unique_species, coded_species - - diff --git a/flare/util.py b/flare/util.py index f43b277e7..ce87bcaa5 100644 --- a/flare/util.py +++ b/flare/util.py @@ -19,15 +19,16 @@ def get_random_velocities(noa: int, temperature: float, mass: float): mass (float): Mass of each particle in amu. Returns: - np.ndarray: Particle velocities, corrected to give zero center of mass motion. + np.ndarray: Particle velocities, corrected to give zero center of mass + motion. """ - + # Use FLARE mass units (time = ps, length = A, energy = eV) mass_md = mass * 0.000103642695727 kb = 0.0000861733034 std = np.sqrt(kb * temperature / mass_md) velocities = np.random.normal(scale=std, size=(noa, 3)) - + # Remove center-of-mass motion vel_sum = np.sum(velocities, axis=0) corrected_velocities = velocities - vel_sum / noa @@ -44,7 +45,8 @@ def multicomponent_velocities(temperature: float, masses: List[float]): masses (List[float]): Particle masses in amu. Returns: - np.ndarray: Particle velocities, corrected to give zero center of mass motion. + np.ndarray: Particle velocities, corrected to give zero center of mass + motion. """ noa = len(masses) @@ -294,7 +296,8 @@ class NumpyEncoder(JSONEncoder): json.dumps(... cls = NumpyEncoder) - Thanks to StackOverflow users karlB and fnunnari, who contributed this from: + Thanks to StackOverflow users karlB and fnunnari, who contributed this + from: `https://stackoverflow.com/a/47626762` """ @@ -333,7 +336,7 @@ def Z_to_element(Z: int) -> str: def is_std_in_bound(std_tolerance: float, noise: float, structure: 'flare.struc.Structure', - max_atoms_added: int = inf)-> (bool, List[int]): + max_atoms_added: int = inf) -> (bool, List[int]): """ Given an uncertainty tolerance and a structure decorated with atoms, species, and associated uncertainties, return those which are above a @@ -383,7 +386,8 @@ def is_std_in_bound_per_species(rel_std_tolerance: float, abs_std_tolerance: float, noise: float, structure: 'flare.struc.Structure', max_atoms_added: int = inf, - max_by_species: dict = {})-> (bool, List[int]): + max_by_species: dict = {}) -> (bool, + List[int]): """ Checks the stds of GP prediction assigned to the structure, returns a list of atoms which either meet an absolute threshold or a relative @@ -473,9 +477,9 @@ def is_force_in_bound_per_species(abs_force_tolerance: float, label_forces: 'ndarray', structure, max_atoms_added: int = inf, - max_by_species: dict ={}, + max_by_species: dict = {}, max_force_error: float - = inf)-> (bool, List[int]): + = inf) -> (bool, List[int]): """ Checks the forces of GP prediction assigned to the structure against a DFT calculation, and return a list of atoms which meet an absolute @@ -532,7 +536,7 @@ def is_force_in_bound_per_species(abs_force_tolerance: float, # conclude if len(target_atoms) == max_atoms_added or \ (max_error_components[i] < abs_force_tolerance and - max_error_components[i] != np.nan): + max_error_components[i] != np.nan): break cur_spec = structure.species_labels[i] @@ -553,7 +557,7 @@ def is_force_in_bound_per_species(abs_force_tolerance: float, def subset_of_frame_by_element(frame: 'flare.Structure', - predict_atoms_per_element: dict)->List[int]: + predict_atoms_per_element: dict) -> List[int]: """ Given a structure and a dictionary formatted as {"Symbol":int, ..} describing a number of atoms per element, return a sorted list of @@ -584,7 +588,7 @@ def subset_of_frame_by_element(frame: 'flare.Structure', if len(matching_atoms) == 0: continue # Choose the atoms to add - to_add_atoms = np.random.choice(matching_atoms,replace=False, + to_add_atoms = np.random.choice(matching_atoms, replace=False, size=min(n, len(matching_atoms))) return_atoms += list(to_add_atoms) @@ -592,4 +596,53 @@ def subset_of_frame_by_element(frame: 'flare.Structure', return_atoms.sort() - return return_atoms \ No newline at end of file + return return_atoms + + +def get_max_cutoff(cell: np.ndarray) -> float: + """Compute the maximum cutoff compatible with a 3x3x3 supercell of a + structure. Called in the Structure constructor when + setting the max_cutoff attribute, which is used to create local + environments with arbitrarily large cutoff radii. + + Args: + cell (np.ndarray): Bravais lattice vectors of the structure stored as + rows of a 3x3 Numpy array. + + Returns: + float: Maximum cutoff compatible with a 3x3x3 supercell of the + structure. + """ + + # Retrieve the lattice vectors. + a_vec = cell[0] + b_vec = cell[1] + c_vec = cell[2] + + # Compute dot products and norms of lattice vectors. + a_dot_b = np.dot(a_vec, b_vec) + a_dot_c = np.dot(a_vec, c_vec) + b_dot_c = np.dot(b_vec, c_vec) + + a_norm = np.linalg.norm(a_vec) + b_norm = np.linalg.norm(b_vec) + c_norm = np.linalg.norm(c_vec) + + # Compute the six independent altitudes of the cell faces. + # The smallest is the maximum atomic environment cutoff that can be + # used with sweep=1. + max_candidates = np.zeros(6) + max_candidates[0] = \ + a_norm * np.sqrt(1 - (a_dot_b / (a_norm * b_norm))**2) + max_candidates[1] = \ + b_norm * np.sqrt(1 - (a_dot_b / (a_norm * b_norm))**2) + max_candidates[2] = \ + a_norm * np.sqrt(1 - (a_dot_c / (a_norm * c_norm))**2) + max_candidates[3] = \ + c_norm * np.sqrt(1 - (a_dot_c / (a_norm * c_norm))**2) + max_candidates[4] = \ + b_norm * np.sqrt(1 - (b_dot_c / (b_norm * c_norm))**2) + max_candidates[5] = \ + c_norm * np.sqrt(1 - (b_dot_c / (b_norm * c_norm))**2) + + return np.min(max_candidates) diff --git a/tests/test_env.py b/tests/test_env.py index 70c76b7c9..1f89182af 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -11,13 +11,13 @@ def test_species_count(cutoff): species = [1, 2, 3] positions = np.array([[0, 0, 0], [0.5, 0.5, 0.5], [0.1, 0.1, 0.1]]) struc_test = Structure(cell, species, positions) - env_test = AtomicEnvironment(structure=struc_test, - atom=0, + env_test = AtomicEnvironment(structure=struc_test, atom=0, cutoffs=np.array([1, 1])) assert (len(struc_test.positions) == len(struc_test.coded_species)) assert (len(env_test.bond_array_2) == len(env_test.etypes)) assert (isinstance(env_test.etypes[0], np.int8)) + @pytest.mark.parametrize('cutoff', cutoff_list) def test_env_methods(cutoff): cell = np.eye(3) @@ -39,3 +39,43 @@ def test_env_methods(cutoff): assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2) assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3) assert np.array_equal(remade_env.bond_array_mb, env_test.bond_array_mb) + + +def test_auto_sweep(): + """Test that the number of neighbors inside the local environment is + correctly computed.""" + + # Make an arbitrary non-cubic structure. + cell = np.array([[1.3, 0.5, 0.8], + [-1.2, 1, 0.73], + [-0.8, 0.1, 0.9]]) + positions = np.array([[1.2, 0.7, 2.3], + [3.1, 2.5, 8.9], + [-1.8, -5.8, 3.0], + [0.2, 1.1, 2.1], + [3.2, 1.1, 3.3]]) + species = np.array([1, 2, 3, 4, 5]) + arbitrary_structure = Structure(cell, species, positions) + + # Construct an environment. + cutoffs = np.array([4., 3.]) + arbitrary_environment = \ + AtomicEnvironment(arbitrary_structure, 0, cutoffs) + + # Count the neighbors. + n_neighbors_1 = len(arbitrary_environment.etypes) + + # Reduce the sweep value, and check that neighbors are missing. + sweep_val = arbitrary_environment.sweep_val + arbitrary_environment.sweep_array = \ + np.arange(-sweep_val + 1, sweep_val, 1) + arbitrary_environment.compute_env() + n_neighbors_2 = len(arbitrary_environment.etypes) + assert(n_neighbors_1 > n_neighbors_2) + + # Increase the sweep value, and check that the count is the same. + arbitrary_environment.sweep_array = \ + np.arange(-sweep_val - 1, sweep_val + 2, 1) + arbitrary_environment.compute_env() + n_neighbors_3 = len(arbitrary_environment.etypes) + assert(n_neighbors_1 == n_neighbors_3)