From fdbccabce784a1178d3f17e340d4bcc3cabd472f Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Tue, 30 Jan 2024 17:14:36 +0800 Subject: [PATCH 01/10] Fix GPU UTs --- deepmd/pt/utils/dataloader.py | 4 +- deepmd/pt/utils/dataset.py | 26 +++++----- deepmd/pt/utils/preprocess.py | 30 +++++------ deepmd/pt/utils/stat.py | 4 +- source/tests/pt/test_descriptor.py | 11 ++-- source/tests/pt/test_descriptor_dpa1.py | 8 +-- source/tests/pt/test_descriptor_dpa2.py | 8 +-- source/tests/pt/test_mlp.py | 67 +++++++++++++------------ 8 files changed, 82 insertions(+), 76 deletions(-) diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index 7c95f66c9c..5408452703 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -276,13 +276,13 @@ def collate_batch(batch): result[key] = torch.zeros( (n_frames, natoms_extended, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) else: result[key] = torch.zeros( (n_frames, natoms_extended), dtype=torch.long, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) for i in range(len(batch)): natoms_tmp = list[i].shape[0] diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py index c104e64491..6b19755b59 100644 --- a/deepmd/pt/utils/dataset.py +++ b/deepmd/pt/utils/dataset.py @@ -480,7 +480,7 @@ def preprocess(self, batch): batch[kk] = torch.tensor( batch[kk], dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) if self._data_dict[kk]["atomic"]: batch[kk] = batch[kk].view( @@ -490,7 +490,7 @@ def preprocess(self, batch): for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): batch[kk] = torch.tensor( - batch[kk], dtype=torch.long, device=env.PREPROCESS_DEVICE + batch[kk], dtype=torch.long, device=env.DEVICE ) batch["atype"] = batch.pop("type") @@ -526,10 +526,10 @@ def preprocess(self, batch): batch["shift"] = torch.zeros( (n_frames, natoms_extended, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) batch["mapping"] = torch.zeros( - (n_frames, natoms_extended), dtype=torch.long, device=env.PREPROCESS_DEVICE + (n_frames, natoms_extended), dtype=torch.long, device=env.DEVICE ) for i in range(len(shift)): natoms_tmp = shift[i].shape[0] @@ -568,14 +568,14 @@ def single_preprocess(self, batch, sid): batch[kk] = torch.tensor( batch[kk][sid], dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) if self._data_dict[kk]["atomic"]: batch[kk] = batch[kk].view(-1, self._data_dict[kk]["ndof"]) for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): batch[kk] = torch.tensor( - batch[kk][sid], dtype=torch.long, device=env.PREPROCESS_DEVICE + batch[kk][sid], dtype=torch.long, device=env.DEVICE ) clean_coord = batch.pop("coord") clean_type = batch.pop("type") @@ -671,14 +671,14 @@ def single_preprocess(self, batch, sid): noised_coord = _clean_coord.clone().detach() noised_coord[coord_mask] += noise_on_coord batch["coord_mask"] = torch.tensor( - coord_mask, dtype=torch.bool, device=env.PREPROCESS_DEVICE + coord_mask, dtype=torch.bool, device=env.DEVICE ) else: noised_coord = _clean_coord batch["coord_mask"] = torch.tensor( np.zeros_like(coord_mask, dtype=bool), dtype=torch.bool, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) # add mask for type @@ -686,14 +686,14 @@ def single_preprocess(self, batch, sid): masked_type = clean_type.clone().detach() masked_type[type_mask] = self.mask_type_idx batch["type_mask"] = torch.tensor( - type_mask, dtype=torch.bool, device=env.PREPROCESS_DEVICE + type_mask, dtype=torch.bool, device=env.DEVICE ) else: masked_type = clean_type batch["type_mask"] = torch.tensor( np.zeros_like(type_mask, dtype=bool), dtype=torch.bool, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) if self.pbc: _coord = normalize_coord(noised_coord, region, nloc) @@ -803,7 +803,7 @@ def __len__(self): def __getitem__(self, index): """Get a frame from the selected system.""" b_data = self._data_system._get_item(index) - b_data["natoms"] = torch.tensor(self._natoms_vec, device=env.PREPROCESS_DEVICE) + b_data["natoms"] = torch.tensor(self._natoms_vec, device=env.DEVICE) return b_data @@ -879,7 +879,7 @@ def __getitem__(self, index=None): index = dp_random.choice(np.arange(self.nsystems), p=self.probs) b_data = self._data_systems[index].get_batch(self._batch_size) b_data["natoms"] = torch.tensor( - self._natoms_vec[index], device=env.PREPROCESS_DEVICE + self._natoms_vec[index], device=env.DEVICE ) batch_size = b_data["coord"].shape[0] b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) @@ -892,7 +892,7 @@ def get_training_batch(self, index=None): index = dp_random.choice(np.arange(self.nsystems), p=self.probs) b_data = self._data_systems[index].get_batch_for_train(self._batch_size) b_data["natoms"] = torch.tensor( - self._natoms_vec[index], device=env.PREPROCESS_DEVICE + self._natoms_vec[index], device=env.DEVICE ) batch_size = b_data["coord"].shape[0] b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py index 463ac112ad..be24a3a770 100644 --- a/deepmd/pt/utils/preprocess.py +++ b/deepmd/pt/utils/preprocess.py @@ -99,7 +99,7 @@ def build_inside_clist(coord, region: Region3D, ncell): cell_offset[cell_offset < 0] = 0 delta = cell_offset - ncell a2c = compute_serial_cid(cell_offset, ncell) # cell id of atoms - arange = torch.arange(0, loc_ncell, 1, device=env.PREPROCESS_DEVICE) + arange = torch.arange(0, loc_ncell, 1, device=env.DEVICE) cellid = a2c == arange.unsqueeze(-1) # one hot cellid c2a = cellid.nonzero() lst = [] @@ -131,17 +131,17 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float): # add ghost atoms a2c, c2a = build_inside_clist(coord, region, ncell) - xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1, device=env.PREPROCESS_DEVICE) - yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1, device=env.PREPROCESS_DEVICE) - zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1, device=env.PREPROCESS_DEVICE) + xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1, device=env.DEVICE) + yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1, device=env.DEVICE) + zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1, device=env.DEVICE) xyz = xi.view(-1, 1, 1, 1) * torch.tensor( - [1, 0, 0], dtype=torch.long, device=env.PREPROCESS_DEVICE + [1, 0, 0], dtype=torch.long, device=env.DEVICE ) xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor( - [0, 1, 0], dtype=torch.long, device=env.PREPROCESS_DEVICE + [0, 1, 0], dtype=torch.long, device=env.DEVICE ) xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor( - [0, 0, 1], dtype=torch.long, device=env.PREPROCESS_DEVICE + [0, 0, 1], dtype=torch.long, device=env.DEVICE ) xyz = xyz.view(-1, 3) mask_a = (xyz >= 0).all(dim=-1) @@ -166,7 +166,7 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float): merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]]) merged_atype = torch.cat([atype, tmp_atype]) merged_mapping = torch.cat( - [torch.arange(atype.numel(), device=env.PREPROCESS_DEVICE), aid] + [torch.arange(atype.numel(), device=env.DEVICE), aid] ) return merged_coord_shift, merged_atype, merged_mapping @@ -189,7 +189,7 @@ def build_neighbor_list( distance = torch.linalg.norm(distance, dim=-1) DISTANCE_INF = distance.max().detach() + rcut distance[:nloc, :nloc] += ( - torch.eye(nloc, dtype=torch.bool, device=env.PREPROCESS_DEVICE) * DISTANCE_INF + torch.eye(nloc, dtype=torch.bool, device=env.DEVICE) * DISTANCE_INF ) if min_check: if distance.min().abs() < 1e-6: @@ -197,12 +197,12 @@ def build_neighbor_list( if not type_split: sec = sec[-1:] lst = [] - nlist = torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1 + nlist = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 nlist_loc = ( - torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1 + torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 ) nlist_type = ( - torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1 + torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 ) for i, nnei in enumerate(sec): if i > 0: @@ -216,9 +216,9 @@ def build_neighbor_list( _sorted, indices = torch.topk(tmp, nnei, dim=1, largest=False) else: # when nnei > nall - indices = torch.zeros((nloc, nnei), device=env.PREPROCESS_DEVICE).long() - 1 + indices = torch.zeros((nloc, nnei), device=env.DEVICE).long() - 1 _sorted = ( - torch.ones((nloc, nnei), device=env.PREPROCESS_DEVICE).long() + torch.ones((nloc, nnei), device=env.DEVICE).long() * DISTANCE_INF ) _sorted_nnei, indices_nnei = torch.topk( @@ -284,7 +284,7 @@ def make_env_mat( else: merged_coord_shift = torch.zeros_like(coord) merged_atype = atype.clone() - merged_mapping = torch.arange(atype.numel(), device=env.PREPROCESS_DEVICE) + merged_mapping = torch.arange(atype.numel(), device=env.DEVICE) merged_coord = coord.clone() # build nlist diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 18ee4d9abe..7fffd15ca1 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -62,13 +62,13 @@ def make_stat_input(datasets, dataloaders, nbatches): shape = torch.zeros( (n_frames, extend, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) else: shape = torch.zeros( (n_frames, extend), dtype=torch.long, - device=env.PREPROCESS_DEVICE, + device=env.DEVICE, ) for i in range(len(item)): natoms_tmp = l[i].shape[0] diff --git a/source/tests/pt/test_descriptor.py b/source/tests/pt/test_descriptor.py index da38cf007f..a7696e7095 100644 --- a/source/tests/pt/test_descriptor.py +++ b/source/tests/pt/test_descriptor.py @@ -12,6 +12,9 @@ from pathlib import ( Path, ) +from deepmd.pt.utils import ( + env, +) from deepmd.pt.model.descriptor import ( prod_env_mat_se_a, @@ -112,18 +115,18 @@ def setUp(self): def test_consistency(self): avg_zero = torch.zeros( - [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION + [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE ) std_ones = torch.ones( - [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION + [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE ) base_d, base_force, nlist = base_se_a( rcut=self.rcut, rcut_smth=self.rcut_smth, sel=self.sel, batch=self.np_batch, - mean=avg_zero, - stddev=std_ones, + mean=avg_zero.detach().cpu(), + stddev=std_ones.detach().cpu(), ) pt_coord = self.pt_batch["coord"] diff --git a/source/tests/pt/test_descriptor_dpa1.py b/source/tests/pt/test_descriptor_dpa1.py index 689fa7e49c..725369d68d 100644 --- a/source/tests/pt/test_descriptor_dpa1.py +++ b/source/tests/pt/test_descriptor_dpa1.py @@ -243,7 +243,7 @@ def test_descriptor_block(self): dparams["ntypes"] = ntypes des = DescrptBlockSeAtten( **dparams, - ) + ).to(env.DEVICE) des.load_state_dict(torch.load(self.file_model_param)) rcut = dparams["rcut"] nsel = dparams["sel"] @@ -260,7 +260,7 @@ def test_descriptor_block(self): extended_coord, extended_atype, nloc, rcut, nsel, distinguish_types=False ) # handel type_embedding - type_embedding = TypeEmbedNet(ntypes, 8) + type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE) type_embedding.load_state_dict(torch.load(self.file_type_embed)) ## to save model parameters @@ -293,7 +293,7 @@ def test_descriptor(self): dparams["concat_output_tebd"] = False des = DescrptDPA1( **dparams, - ) + ).to(env.DEVICE) target_dict = des.state_dict() source_dict = torch.load(self.file_model_param) type_embd_dict = torch.load(self.file_type_embed) @@ -337,7 +337,7 @@ def test_descriptor(self): dparams["concat_output_tebd"] = True des = DescrptDPA1( **dparams, - ) + ).to(env.DEVICE) descriptor, env_mat, diff, rot_mat, sw = des( extended_coord, extended_atype, diff --git a/source/tests/pt/test_descriptor_dpa2.py b/source/tests/pt/test_descriptor_dpa2.py index 45c95961fe..aa6b16964e 100644 --- a/source/tests/pt/test_descriptor_dpa2.py +++ b/source/tests/pt/test_descriptor_dpa2.py @@ -124,7 +124,7 @@ def test_descriptor_hyb(self): dlist, ntypes, hybrid_mode=dparams["hybrid_mode"], - ) + ).to(env.DEVICE) model_dict = torch.load(self.file_model_param) # type_embd of repformer is removed model_dict.pop("descriptor_list.1.type_embd.embedding.weight") @@ -158,7 +158,7 @@ def test_descriptor_hyb(self): ) nlist = torch.cat(nlist_list, -1) # handel type_embedding - type_embedding = TypeEmbedNet(ntypes, 8) + type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE) type_embedding.load_state_dict(torch.load(self.file_type_embed)) ## to save model parameters @@ -186,7 +186,7 @@ def test_descriptor(self): dparams["concat_output_tebd"] = False des = DescrptDPA2( **dparams, - ) + ).to(env.DEVICE) target_dict = des.state_dict() source_dict = torch.load(self.file_model_param) # type_embd of repformer is removed @@ -232,7 +232,7 @@ def test_descriptor(self): dparams["concat_output_tebd"] = True des = DescrptDPA2( **dparams, - ) + ).to(env.DEVICE) descriptor, env_mat, diff, rot_mat, sw = des( extended_coord, extended_atype, diff --git a/source/tests/pt/test_mlp.py b/source/tests/pt/test_mlp.py index c06047b2a5..658f472180 100644 --- a/source/tests/pt/test_mlp.py +++ b/source/tests/pt/test_mlp.py @@ -8,6 +8,9 @@ from deepmd.pt.utils.env import ( PRECISION_DICT, ) +from deepmd.pt.utils import ( + env, +) try: from deepmd.pt.model.network.mlp import ( @@ -104,23 +107,23 @@ def test_match_native_layer( inp_shap = ashp + inp_shap rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(np.prod(inp_shap), dtype=dtype).view(inp_shap) + xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(inp_shap) # def mlp layer - ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec) + ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to(env.DEVICE) # check consistency nl = NativeLayer.deserialize(ml.serialize()) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - nl.call(xx.detach().numpy()), + ml.forward(xx).detach().cpu().numpy(), + nl.call(xx.detach().cpu().numpy()), rtol=rtol, atol=atol, err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}", ) # check self-consistency - ml1 = MLPLayer.deserialize(ml.serialize()) + ml1 = MLPLayer.deserialize(ml.serialize()).to(env.DEVICE) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - ml1.forward(xx).detach().numpy(), + ml.forward(xx).detach().cpu().numpy(), + ml1.forward(xx).detach().cpu().numpy(), rtol=rtol, atol=atol, err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}", @@ -157,7 +160,7 @@ def test_match_native_net( inp_shap = ashp + inp_shap rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(np.prod(inp_shap), dtype=dtype).view(inp_shap) + xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(inp_shap) # def MLP layers = [] for ii in range(1, len(ndims)): @@ -166,21 +169,21 @@ def test_match_native_net( ndims[ii - 1], ndims[ii], bias, ut, ac, resnet, precision=prec ).serialize() ) - ml = MLP(layers) + ml = MLP(layers).to(env.DEVICE) # check consistency nl = NativeNet.deserialize(ml.serialize()) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - nl.call(xx.detach().numpy()), + ml.forward(xx).detach().cpu().numpy(), + nl.call(xx.detach().cpu().numpy()), rtol=rtol, atol=atol, err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}", ) # check self-consistency - ml1 = MLP.deserialize(ml.serialize()) + ml1 = MLP.deserialize(ml.serialize()).to(env.DEVICE) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - ml1.forward(xx).detach().numpy(), + ml.forward(xx).detach().cpu().numpy(), + ml1.forward(xx).detach().cpu().numpy(), rtol=rtol, atol=atol, err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}", @@ -219,23 +222,23 @@ def test_match_embedding_net( # input rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(idim, dtype=dtype) + xx = torch.arange(idim, dtype=dtype, device=env.DEVICE) # def MLP - ml = EmbeddingNet(idim, nn, act, idt, prec) + ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE) # check consistency nl = DPEmbeddingNet.deserialize(ml.serialize()) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - nl.call(xx.detach().numpy()), + ml.forward(xx).detach().cpu().numpy(), + nl.call(xx.detach().cpu().numpy()), rtol=rtol, atol=atol, err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}", ) # check self-consistency - ml1 = EmbeddingNet.deserialize(ml.serialize()) + ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - ml1.forward(xx).detach().numpy(), + ml.forward(xx).detach().cpu().numpy(), + ml1.forward(xx).detach().cpu().numpy(), rtol=rtol, atol=atol, err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}", @@ -246,8 +249,8 @@ def test_jit( ): for idim, nn, act, idt, prec in self.test_cases: # def MLP - ml = EmbeddingNet(idim, nn, act, idt, prec) - ml1 = EmbeddingNet.deserialize(ml.serialize()) + ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE) + ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE) model = torch.jit.script(ml) model = torch.jit.script(ml1) @@ -272,7 +275,7 @@ def test_match_fitting_net( # input rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(idim, dtype=dtype) + xx = torch.arange(idim, dtype=dtype, device=env.DEVICE) # def MLP ml = FittingNet( idim, @@ -282,21 +285,21 @@ def test_match_fitting_net( resnet_dt=idt, precision=prec, bias_out=ob, - ) + ).to(env.DEVICE) # check consistency nl = DPFittingNet.deserialize(ml.serialize()) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - nl.call(xx.detach().numpy()), + ml.forward(xx).detach().cpu().numpy(), + nl.call(xx.detach().cpu().numpy()), rtol=rtol, atol=atol, err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}", ) # check self-consistency - ml1 = FittingNet.deserialize(ml.serialize()) + ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - ml1.forward(xx).detach().numpy(), + ml.forward(xx).detach().cpu().numpy(), + ml1.forward(xx).detach().cpu().numpy(), rtol=rtol, atol=atol, err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}", @@ -315,7 +318,7 @@ def test_jit( resnet_dt=idt, precision=prec, bias_out=ob, - ) - ml1 = FittingNet.deserialize(ml.serialize()) + ).to(env.DEVICE) + ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE) model = torch.jit.script(ml) model = torch.jit.script(ml1) From 3f0f1f8de65dc6818395d1456c7464197c632f24 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Tue, 30 Jan 2024 17:18:29 +0800 Subject: [PATCH 02/10] Update env.py --- deepmd/pt/utils/env.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py index 6fa72943c7..559dba0167 100644 --- a/deepmd/pt/utils/env.py +++ b/deepmd/pt/utils/env.py @@ -24,11 +24,6 @@ else: DEVICE = torch.device(f"cuda:{LOCAL_RANK}") -if os.environ.get("PREPROCESS_DEVICE") == "gpu": - PREPROCESS_DEVICE = torch.device(f"cuda:{LOCAL_RANK}") -else: - PREPROCESS_DEVICE = torch.device("cpu") - JIT = False CACHE_PER_SYS = 5 # keep at most so many sets per sys in memory ENERGY_BIAS_TRAINABLE = True From 3dd415b6630c487b4b0be33fd5a063055cb5d643 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Jan 2024 09:20:41 +0000 Subject: [PATCH 03/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/dataset.py | 12 +++--------- deepmd/pt/utils/preprocess.py | 17 ++++------------- source/tests/pt/test_descriptor.py | 12 +++++++----- source/tests/pt/test_mlp.py | 18 ++++++++++++------ 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py index 6b19755b59..b886dbb786 100644 --- a/deepmd/pt/utils/dataset.py +++ b/deepmd/pt/utils/dataset.py @@ -489,9 +489,7 @@ def preprocess(self, batch): for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): - batch[kk] = torch.tensor( - batch[kk], dtype=torch.long, device=env.DEVICE - ) + batch[kk] = torch.tensor(batch[kk], dtype=torch.long, device=env.DEVICE) batch["atype"] = batch.pop("type") keys = ["nlist", "nlist_loc", "nlist_type", "shift", "mapping"] @@ -878,9 +876,7 @@ def __getitem__(self, index=None): if index is None: index = dp_random.choice(np.arange(self.nsystems), p=self.probs) b_data = self._data_systems[index].get_batch(self._batch_size) - b_data["natoms"] = torch.tensor( - self._natoms_vec[index], device=env.DEVICE - ) + b_data["natoms"] = torch.tensor(self._natoms_vec[index], device=env.DEVICE) batch_size = b_data["coord"].shape[0] b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) return b_data @@ -891,9 +887,7 @@ def get_training_batch(self, index=None): if index is None: index = dp_random.choice(np.arange(self.nsystems), p=self.probs) b_data = self._data_systems[index].get_batch_for_train(self._batch_size) - b_data["natoms"] = torch.tensor( - self._natoms_vec[index], device=env.DEVICE - ) + b_data["natoms"] = torch.tensor(self._natoms_vec[index], device=env.DEVICE) batch_size = b_data["coord"].shape[0] b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) return b_data diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py index be24a3a770..3ea26d0041 100644 --- a/deepmd/pt/utils/preprocess.py +++ b/deepmd/pt/utils/preprocess.py @@ -165,9 +165,7 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float): merged_coord = torch.cat([coord, tmp_coord]) merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]]) merged_atype = torch.cat([atype, tmp_atype]) - merged_mapping = torch.cat( - [torch.arange(atype.numel(), device=env.DEVICE), aid] - ) + merged_mapping = torch.cat([torch.arange(atype.numel(), device=env.DEVICE), aid]) return merged_coord_shift, merged_atype, merged_mapping @@ -198,12 +196,8 @@ def build_neighbor_list( sec = sec[-1:] lst = [] nlist = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 - nlist_loc = ( - torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 - ) - nlist_type = ( - torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 - ) + nlist_loc = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 + nlist_type = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 for i, nnei in enumerate(sec): if i > 0: nnei = nnei - sec[i - 1] @@ -217,10 +211,7 @@ def build_neighbor_list( else: # when nnei > nall indices = torch.zeros((nloc, nnei), device=env.DEVICE).long() - 1 - _sorted = ( - torch.ones((nloc, nnei), device=env.DEVICE).long() - * DISTANCE_INF - ) + _sorted = torch.ones((nloc, nnei), device=env.DEVICE).long() * DISTANCE_INF _sorted_nnei, indices_nnei = torch.topk( tmp, tmp.shape[1], dim=1, largest=False ) diff --git a/source/tests/pt/test_descriptor.py b/source/tests/pt/test_descriptor.py index a7696e7095..4f31bac7bf 100644 --- a/source/tests/pt/test_descriptor.py +++ b/source/tests/pt/test_descriptor.py @@ -12,15 +12,13 @@ from pathlib import ( Path, ) -from deepmd.pt.utils import ( - env, -) from deepmd.pt.model.descriptor import ( prod_env_mat_se_a, ) from deepmd.pt.utils import ( dp_random, + env, ) from deepmd.pt.utils.dataset import ( DeepmdDataSet, @@ -115,10 +113,14 @@ def setUp(self): def test_consistency(self): avg_zero = torch.zeros( - [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + [self.ntypes, self.nnei * 4], + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=env.DEVICE, ) std_ones = torch.ones( - [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE + [self.ntypes, self.nnei * 4], + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=env.DEVICE, ) base_d, base_force, nlist = base_se_a( rcut=self.rcut, diff --git a/source/tests/pt/test_mlp.py b/source/tests/pt/test_mlp.py index 658f472180..26f0041bf9 100644 --- a/source/tests/pt/test_mlp.py +++ b/source/tests/pt/test_mlp.py @@ -5,12 +5,12 @@ import numpy as np import torch -from deepmd.pt.utils.env import ( - PRECISION_DICT, -) from deepmd.pt.utils import ( env, ) +from deepmd.pt.utils.env import ( + PRECISION_DICT, +) try: from deepmd.pt.model.network.mlp import ( @@ -107,9 +107,13 @@ def test_match_native_layer( inp_shap = ashp + inp_shap rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(inp_shap) + xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view( + inp_shap + ) # def mlp layer - ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to(env.DEVICE) + ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to( + env.DEVICE + ) # check consistency nl = NativeLayer.deserialize(ml.serialize()) np.testing.assert_allclose( @@ -160,7 +164,9 @@ def test_match_native_net( inp_shap = ashp + inp_shap rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(inp_shap) + xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view( + inp_shap + ) # def MLP layers = [] for ii in range(1, len(ndims)): From cb4cc67c56e648ea04e462e649fad98f25fd85d4 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:25:03 +0800 Subject: [PATCH 04/10] Devel update (#30) * throw errors when PyTorch CXX11 ABI is different from TensorFlow (#3201) If so, throw the following error: ``` -- PyTorch CXX11 ABI: 0 CMake Error at CMakeLists.txt:162 (message): PyTorch CXX11 ABI mismatch TensorFlow: 0 != 1 ``` Signed-off-by: Jinzhe Zeng * allow disabling TensorFlow backend during Python installation (#3200) Fix #3120. One can disable building the TensorFlow backend during `pip install` by setting `DP_ENABLE_TENSORFLOW=0`. --------- Signed-off-by: Jinzhe Zeng * breaking: pt: add dp model format and refactor pt impl for the fitting net. (#3199) - add dp model format (backend independent definition) for the fitting - refactor torch support, compatible with dp model format - fix mlp issue: the idt should only be used when a skip connection is available. - add tools `to_numpy_array` and `to_torch_tensor`. --------- Co-authored-by: Han Wang * remove duplicated fitting output check. fix codeql (#3202) Co-authored-by: Han Wang --------- Signed-off-by: Jinzhe Zeng Co-authored-by: Jinzhe Zeng Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com> Co-authored-by: Han Wang --- backend/find_tensorflow.py | 6 + backend/read_env.py | 24 +- deepmd/model_format/__init__.py | 4 + deepmd/model_format/fitting.py | 355 +++++++++++++++++ deepmd/model_format/network.py | 2 + deepmd/model_format/se_e2_a.py | 10 +- deepmd/pt/model/model/dp_atomic_model.py | 10 +- deepmd/pt/model/network/mlp.py | 7 +- deepmd/pt/model/task/ener.py | 373 +++++++++++++++--- deepmd/pt/model/task/fitting.py | 13 +- deepmd/pt/model/task/task.py | 18 +- deepmd/pt/utils/utils.py | 40 ++ deepmd/tf/env.py | 5 + doc/install/install-from-source.md | 15 +- source/CMakeLists.txt | 21 +- source/config/CMakeLists.txt | 14 + source/config/run_config.ini | 2 + source/lib/src/gpu/CMakeLists.txt | 6 +- .../tests/common/test_model_format_utils.py | 121 ++++++ source/tests/pt/test_ener_fitting.py | 181 +++++++++ source/tests/pt/test_fitting_net.py | 24 +- source/tests/pt/test_model.py | 25 +- source/tests/pt/test_se_e2_a.py | 33 +- source/tests/pt/test_utils.py | 31 ++ 24 files changed, 1197 insertions(+), 143 deletions(-) create mode 100644 deepmd/model_format/fitting.py create mode 100644 source/tests/pt/test_ener_fitting.py create mode 100644 source/tests/pt/test_utils.py diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 32ae62469c..083e2673f7 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -127,6 +127,12 @@ def get_tf_requirement(tf_version: str = "") -> dict: dict TensorFlow requirement, including cpu and gpu. """ + if tf_version is None: + return { + "cpu": [], + "gpu": [], + "mpi": [], + } if tf_version == "": tf_version = os.environ.get("TENSORFLOW_VERSION", "") diff --git a/backend/read_env.py b/backend/read_env.py index 2cf433181a..bee5d607e3 100644 --- a/backend/read_env.py +++ b/backend/read_env.py @@ -80,16 +80,26 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: cmake_args.append("-DENABLE_IPI:BOOL=TRUE") extra_scripts["dp_ipi"] = "deepmd.tf.entrypoints.ipi:dp_ipi" - tf_install_dir, _ = find_tensorflow() - tf_version = get_tf_version(tf_install_dir) - if tf_version == "" or Version(tf_version) >= Version("2.12"): - find_libpython_requires = [] + if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "1": + tf_install_dir, _ = find_tensorflow() + tf_version = get_tf_version(tf_install_dir) + if tf_version == "" or Version(tf_version) >= Version("2.12"): + find_libpython_requires = [] + else: + find_libpython_requires = ["find_libpython"] + cmake_args.extend( + [ + "-DENABLE_TENSORFLOW=ON", + f"-DTENSORFLOW_VERSION={tf_version}", + f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", + ] + ) else: - find_libpython_requires = ["find_libpython"] - cmake_args.append(f"-DTENSORFLOW_VERSION={tf_version}") + find_libpython_requires = [] + cmake_args.append("-DENABLE_TENSORFLOW=OFF") + tf_version = None cmake_args = [ - f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", "-DBUILD_PY_IF:BOOL=TRUE", *cmake_args, ] diff --git a/deepmd/model_format/__init__.py b/deepmd/model_format/__init__.py index 253bca3507..e15f73758e 100644 --- a/deepmd/model_format/__init__.py +++ b/deepmd/model_format/__init__.py @@ -7,6 +7,9 @@ from .env_mat import ( EnvMat, ) +from .fitting import ( + InvarFitting, +) from .network import ( EmbeddingNet, FittingNet, @@ -34,6 +37,7 @@ ) __all__ = [ + "InvarFitting", "DescrptSeA", "EnvMat", "make_multilayer_network", diff --git a/deepmd/model_format/fitting.py b/deepmd/model_format/fitting.py new file mode 100644 index 0000000000..904fb42b76 --- /dev/null +++ b/deepmd/model_format/fitting.py @@ -0,0 +1,355 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import copy +from typing import ( + Any, + List, + Optional, +) + +import numpy as np + +from .common import ( + DEFAULT_PRECISION, + NativeOP, +) +from .network import ( + FittingNet, + NetworkCollection, +) +from .output_def import ( + FittingOutputDef, + OutputVariableDef, + fitting_check_output, +) + + +@fitting_check_output +class InvarFitting(NativeOP): + r"""Fitting the energy (or a porperty of `dim_out`) of the system. The force and the virial can also be trained. + + Lets take the energy fitting task as an example. + The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`: + + .. math:: + E(\mathcal{D}) = \mathcal{L}^{(n)} \circ \mathcal{L}^{(n-1)} + \circ \cdots \circ \mathcal{L}^{(1)} \circ \mathcal{L}^{(0)} + + The first :math:`n` hidden layers :math:`\mathcal{L}^{(0)}, \cdots, \mathcal{L}^{(n-1)}` are given by + + .. math:: + \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})= + \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b}) + + where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}` + is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and + :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively, + both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}` + is the activation function. + + The output layer :math:`\mathcal{L}^{(n)}` is given by + + .. math:: + \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})= + \mathbf{x}^T\mathbf{w}+\mathbf{b} + + where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}` + is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and + :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively, + both of which are trainable if `trainable[n]` is `True`. + + Parameters + ---------- + var_name + The name of the output variable. + ntypes + The number of atom types. + dim_descrpt + The dimension of the input descriptor. + dim_out + The dimension of the output fit property. + neuron + Number of neurons :math:`N` in each hidden layer of the fitting net + resnet_dt + Time-step `dt` in the resnet construction: + :math:`y = x + dt * \phi (Wx + b)` + numb_fparam + Number of frame parameter + numb_aparam + Number of atomic parameter + rcond + The condition number for the regression of atomic energy. + tot_ener_zero + Force the total energy to zero. Useful for the charge fitting. + trainable + If the weights of fitting net are trainable. + Suppose that we have :math:`N_l` hidden layers in the fitting net, + this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. + atom_ener + Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set. + activation_function + The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN| + precision + The precision of the embedding net parameters. Supported options are |PRECISION| + layer_name : list[Optional[str]], optional + The name of the each layer. If two layers, either in the same fitting or different fittings, + have the same name, they will share the same neural network parameters. + use_aparam_as_mask: bool, optional + If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. + And the aparam will not be used as the atomic parameters for embedding. + distinguish_types + Different atomic types uses different fitting net. + + """ + + def __init__( + self, + var_name: str, + ntypes: int, + dim_descrpt: int, + dim_out: int, + neuron: List[int] = [120, 120, 120], + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + rcond: Optional[float] = None, + tot_ener_zero: bool = False, + trainable: Optional[List[bool]] = None, + atom_ener: Optional[List[float]] = None, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + layer_name: Optional[List[Optional[str]]] = None, + use_aparam_as_mask: bool = False, + spin: Any = None, + distinguish_types: bool = False, + ): + # seed, uniform_seed are not included + if tot_ener_zero: + raise NotImplementedError("tot_ener_zero is not implemented") + if spin is not None: + raise NotImplementedError("spin is not implemented") + if use_aparam_as_mask: + raise NotImplementedError("use_aparam_as_mask is not implemented") + if use_aparam_as_mask: + raise NotImplementedError("use_aparam_as_mask is not implemented") + if layer_name is not None: + raise NotImplementedError("layer_name is not implemented") + if atom_ener is not None: + raise NotImplementedError("atom_ener is not implemented") + + self.var_name = var_name + self.ntypes = ntypes + self.dim_descrpt = dim_descrpt + self.dim_out = dim_out + self.neuron = neuron + self.resnet_dt = resnet_dt + self.numb_fparam = numb_fparam + self.numb_aparam = numb_aparam + self.rcond = rcond + self.tot_ener_zero = tot_ener_zero + self.trainable = trainable + self.atom_ener = atom_ener + self.activation_function = activation_function + self.precision = precision + self.layer_name = layer_name + self.use_aparam_as_mask = use_aparam_as_mask + self.spin = spin + self.distinguish_types = distinguish_types + if self.spin is not None: + raise NotImplementedError("spin is not supported") + + # init constants + self.bias_atom_e = np.zeros([self.ntypes, self.dim_out]) + if self.numb_fparam > 0: + self.fparam_avg = np.zeros(self.numb_fparam) + self.fparam_inv_std = np.ones(self.numb_fparam) + else: + self.fparam_avg, self.fparam_inv_std = None, None + if self.numb_aparam > 0: + self.aparam_avg = np.zeros(self.numb_aparam) + self.aparam_inv_std = np.ones(self.numb_aparam) + else: + self.aparam_avg, self.aparam_inv_std = None, None + # init networks + in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam + out_dim = self.dim_out + self.nets = NetworkCollection( + 1 if self.distinguish_types else 0, + self.ntypes, + network_type="fitting_network", + networks=[ + FittingNet( + in_dim, + out_dim, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + bias_out=True, + ) + for ii in range(self.ntypes if self.distinguish_types else 1) + ], + ) + + def output_def(self): + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, [self.dim_out], reduciable=True, differentiable=True + ), + ] + ) + + def __setitem__(self, key, value): + if key in ["bias_atom_e"]: + self.bias_atom_e = value + elif key in ["fparam_avg"]: + self.fparam_avg = value + elif key in ["fparam_inv_std"]: + self.fparam_inv_std = value + elif key in ["aparam_avg"]: + self.aparam_avg = value + elif key in ["aparam_inv_std"]: + self.aparam_inv_std = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ["bias_atom_e"]: + return self.bias_atom_e + elif key in ["fparam_avg"]: + return self.fparam_avg + elif key in ["fparam_inv_std"]: + return self.fparam_inv_std + elif key in ["aparam_avg"]: + return self.aparam_avg + elif key in ["aparam_inv_std"]: + return self.aparam_inv_std + else: + raise KeyError(key) + + def serialize(self) -> dict: + """Serialize the fitting to dict.""" + return { + "var_name": self.var_name, + "ntypes": self.ntypes, + "dim_descrpt": self.dim_descrpt, + "dim_out": self.dim_out, + "neuron": self.neuron, + "resnet_dt": self.resnet_dt, + "numb_fparam": self.numb_fparam, + "numb_aparam": self.numb_aparam, + "rcond": self.rcond, + "activation_function": self.activation_function, + "precision": self.precision, + "distinguish_types": self.distinguish_types, + "nets": self.nets.serialize(), + "@variables": { + "bias_atom_e": self.bias_atom_e, + "fparam_avg": self.fparam_avg, + "fparam_inv_std": self.fparam_inv_std, + "aparam_avg": self.aparam_avg, + "aparam_inv_std": self.aparam_inv_std, + }, + # not supported + "tot_ener_zero": self.tot_ener_zero, + "trainable": self.trainable, + "atom_ener": self.atom_ener, + "layer_name": self.layer_name, + "use_aparam_as_mask": self.use_aparam_as_mask, + "spin": self.spin, + } + + @classmethod + def deserialize(cls, data: dict) -> "InvarFitting": + data = copy.deepcopy(data) + variables = data.pop("@variables") + nets = data.pop("nets") + obj = cls(**data) + for kk in variables.keys(): + obj[kk] = variables[kk] + obj.nets = NetworkCollection.deserialize(nets) + return obj + + def call( + self, + descriptor: np.array, + atype: np.array, + gr: Optional[np.array] = None, + g2: Optional[np.array] = None, + h2: Optional[np.array] = None, + fparam: Optional[np.array] = None, + aparam: Optional[np.array] = None, + ): + """Calculate the fitting. + + Parameters + ---------- + descriptor + input descriptor. shape: nf x nloc x nd + atype + the atom type. shape: nf x nloc + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + shape: nf x nloc x nnei x ng + h2 + The rotationally equivariant pair-partical representation. + shape: nf x nloc x nnei x 3 + fparam + The frame parameter. shape: nf x nfp. nfp being `numb_fparam` + aparam + The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam` + + """ + nf, nloc, nd = descriptor.shape + # check input dim + if nd != self.dim_descrpt: + raise ValueError( + "get an input descriptor of dim {nd}," + "which is not consistent with {self.dim_descrpt}." + ) + xx = descriptor + # check fparam dim, concate to input descriptor + if self.numb_fparam > 0: + assert fparam is not None, "fparam should not be None" + if fparam.shape[-1] != self.numb_fparam: + raise ValueError( + "get an input fparam of dim {fparam.shape[-1]}, ", + "which is not consistent with {self.numb_fparam}.", + ) + fparam = (fparam - self.fparam_avg) * self.fparam_inv_std + fparam = np.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1]) + xx = np.concatenate( + [xx, fparam], + axis=-1, + ) + # check aparam dim, concate to input descriptor + if self.numb_aparam > 0: + assert aparam is not None, "aparam should not be None" + if aparam.shape[-1] != self.numb_aparam: + raise ValueError( + "get an input aparam of dim {aparam.shape[-1]}, ", + "which is not consistent with {self.numb_aparam}.", + ) + aparam = (aparam - self.aparam_avg) * self.aparam_inv_std + xx = np.concatenate( + [xx, aparam], + axis=-1, + ) + + # calcualte the prediction + if self.distinguish_types: + outs = np.zeros([nf, nloc, self.dim_out]) + for type_i in range(self.ntypes): + mask = np.tile( + (atype == type_i).reshape([nf, nloc, 1]), [1, 1, self.dim_out] + ) + atom_energy = self.nets[(type_i,)](xx) + atom_energy = atom_energy + self.bias_atom_e[type_i] + atom_energy = atom_energy * mask + outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] + else: + outs = self.nets[()](xx) + self.bias_atom_e[atype] + return {self.var_name: outs} diff --git a/deepmd/model_format/network.py b/deepmd/model_format/network.py index a327d990c9..f2056c0b95 100644 --- a/deepmd/model_format/network.py +++ b/deepmd/model_format/network.py @@ -161,6 +161,8 @@ def __init__( ) -> None: prec = PRECISION_DICT[precision.lower()] self.precision = precision + # only use_timestep when skip connection is established. + use_timestep = use_timestep and (num_out == num_in or num_out == num_in * 2) rng = np.random.default_rng() self.w = rng.normal(size=(num_in, num_out)).astype(prec) self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None diff --git a/deepmd/model_format/se_e2_a.py b/deepmd/model_format/se_e2_a.py index 28751cad8d..f179b10ac3 100644 --- a/deepmd/model_format/se_e2_a.py +++ b/deepmd/model_format/se_e2_a.py @@ -171,9 +171,8 @@ def __init__( ) self.env_mat = EnvMat(self.rcut, self.rcut_smth) self.nnei = np.sum(self.sel) - self.nneix4 = self.nnei * 4 - self.davg = np.zeros([self.ntypes, self.nneix4]) - self.dstd = np.ones([self.ntypes, self.nneix4]) + self.davg = np.zeros([self.ntypes, self.nnei, 4]) + self.dstd = np.ones([self.ntypes, self.nnei, 4]) self.orig_sel = self.sel def __setitem__(self, key, value): @@ -192,6 +191,11 @@ def __getitem__(self, key): else: raise KeyError(key) + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.neuron[-1] * self.axis_neuron + def cal_g( self, ss, diff --git a/deepmd/pt/model/model/dp_atomic_model.py b/deepmd/pt/model/model/dp_atomic_model.py index 853eacb875..a222c8e6f6 100644 --- a/deepmd/pt/model/model/dp_atomic_model.py +++ b/deepmd/pt/model/model/dp_atomic_model.py @@ -93,11 +93,11 @@ def __init__( ) fitting_net["type"] = fitting_net.get("type", "ener") - if self.descriptor_type not in ["se_e2_a"]: - fitting_net["ntypes"] = 1 + fitting_net["ntypes"] = self.descriptor.get_ntype() + if self.descriptor_type in ["se_e2_a"]: + fitting_net["distinguish_types"] = True else: - fitting_net["ntypes"] = self.descriptor.get_ntype() - fitting_net["use_tebd"] = False + fitting_net["distinguish_types"] = False fitting_net["embedding_width"] = self.descriptor.dim_out self.grad_force = "direct" not in fitting_net["type"] @@ -165,5 +165,5 @@ def forward_atomic( ) assert descriptor is not None # energy, force - fit_ret = self.fitting_net(descriptor, atype, atype_tebd=None, rot_mat=rot_mat) + fit_ret = self.fitting_net(descriptor, atype, gr=rot_mat) return fit_ret diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py index e3ac0e7bc2..d76abd82f9 100644 --- a/deepmd/pt/model/network/mlp.py +++ b/deepmd/pt/model/network/mlp.py @@ -56,7 +56,10 @@ def __init__( precision: str = DEFAULT_PRECISION, ): super().__init__() - self.use_timestep = use_timestep + # only use_timestep when skip connection is established. + self.use_timestep = use_timestep and ( + num_out == num_in or num_out == num_in * 2 + ) self.activate_name = activation_function self.activate = ActivationFn(self.activate_name) self.precision = precision @@ -207,7 +210,7 @@ class NetworkCollection(DPNetworkCollection, nn.Module): NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = { "network": MLP, "embedding_network": EmbeddingNet, - # "fitting_network": FittingNet, + "fitting_network": FittingNet, } def __init__(self, *args, **kwargs): diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py index 03043e2fcb..e40a6bda44 100644 --- a/deepmd/pt/model/task/ener.py +++ b/deepmd/pt/model/task/ener.py @@ -1,10 +1,13 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import copy import logging from typing import ( + List, Optional, Tuple, ) +import numpy as np import torch from deepmd.model_format import ( @@ -12,6 +15,10 @@ OutputVariableDef, fitting_check_output, ) +from deepmd.pt.model.network.mlp import ( + FittingNet, + NetworkCollection, +) from deepmd.pt.model.network.network import ( ResidualDeep, ) @@ -21,19 +28,35 @@ from deepmd.pt.utils import ( env, ) +from deepmd.pt.utils.env import ( + DEFAULT_PRECISION, + PRECISION_DICT, +) +from deepmd.pt.utils.utils import ( + to_numpy_array, + to_torch_tensor, +) + +dtype = env.GLOBAL_PT_FLOAT_PRECISION +device = env.DEVICE -@Fitting.register("ener") @fitting_check_output -class EnergyFittingNet(Fitting): +class InvarFitting(Fitting): def __init__( self, - ntypes, - embedding_width, - neuron, - bias_atom_e, - resnet_dt=True, - use_tebd=True, + var_name: str, + ntypes: int, + dim_descrpt: int, + dim_out: int, + neuron: List[int] = [128, 128, 128], + bias_atom_e: Optional[torch.Tensor] = None, + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + distinguish_types: bool = False, **kwargs, ): """Construct a fitting net for energy. @@ -46,67 +69,322 @@ def __init__( - resnet_dt: Using time-step in the ResNet construction. """ super().__init__() + self.var_name = var_name self.ntypes = ntypes - self.embedding_width = embedding_width - self.use_tebd = use_tebd - if not use_tebd: - assert self.ntypes == len(bias_atom_e), "Element count mismatches!" - bias_atom_e = torch.tensor(bias_atom_e) + self.dim_descrpt = dim_descrpt + self.dim_out = dim_out + self.neuron = neuron + self.distinguish_types = distinguish_types + self.use_tebd = not self.distinguish_types + self.resnet_dt = resnet_dt + self.numb_fparam = numb_fparam + self.numb_aparam = numb_aparam + self.activation_function = activation_function + self.precision = precision + self.prec = PRECISION_DICT[self.precision] + if bias_atom_e is None: + bias_atom_e = np.zeros([self.ntypes, self.dim_out]) + bias_atom_e = torch.tensor(bias_atom_e, dtype=self.prec, device=device) + bias_atom_e = bias_atom_e.view([self.ntypes, self.dim_out]) + if not self.use_tebd: + assert self.ntypes == bias_atom_e.shape[0], "Element count mismatches!" self.register_buffer("bias_atom_e", bias_atom_e) + # init constants + if self.numb_fparam > 0: + self.register_buffer( + "fparam_avg", + torch.zeros(self.numb_fparam, dtype=self.prec, device=device), + ) + self.register_buffer( + "fparam_inv_std", + torch.ones(self.numb_fparam, dtype=self.prec, device=device), + ) + else: + self.fparam_avg, self.fparam_inv_std = None, None + if self.numb_aparam > 0: + self.register_buffer( + "aparam_avg", + torch.zeros(self.numb_aparam, dtype=self.prec, device=device), + ) + self.register_buffer( + "aparam_inv_std", + torch.ones(self.numb_aparam, dtype=self.prec, device=device), + ) + else: + self.aparam_avg, self.aparam_inv_std = None, None - filter_layers = [] - for type_i in range(self.ntypes): - bias_type = 0.0 - one = ResidualDeep( - type_i, embedding_width, neuron, bias_type, resnet_dt=resnet_dt + in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam + out_dim = 1 + + self.old_impl = kwargs.get("old_impl", False) + if self.old_impl: + filter_layers = [] + for type_i in range(self.ntypes): + bias_type = 0.0 + one = ResidualDeep( + type_i, + self.dim_descrpt, + self.neuron, + bias_type, + resnet_dt=self.resnet_dt, + ) + filter_layers.append(one) + self.filter_layers_old = torch.nn.ModuleList(filter_layers) + self.filter_layers = None + else: + self.filter_layers = NetworkCollection( + 1 if self.distinguish_types else 0, + self.ntypes, + network_type="fitting_network", + networks=[ + FittingNet( + in_dim, + out_dim, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + bias_out=True, + ) + for ii in range(self.ntypes if self.distinguish_types else 1) + ], ) - filter_layers.append(one) - self.filter_layers = torch.nn.ModuleList(filter_layers) + self.filter_layers_old = None + # very bad design... if "seed" in kwargs: logging.info("Set seed to %d in fitting net.", kwargs["seed"]) torch.manual_seed(kwargs["seed"]) - def output_def(self): + def output_def(self) -> FittingOutputDef: return FittingOutputDef( [ - OutputVariableDef("energy", [1], reduciable=True, differentiable=True), + OutputVariableDef( + self.var_name, [self.dim_out], reduciable=True, differentiable=True + ), ] ) + def __setitem__(self, key, value): + if key in ["bias_atom_e"]: + # correct bias_atom_e shape. user may provide stupid shape + self.bias_atom_e = value + elif key in ["fparam_avg"]: + self.fparam_avg = value + elif key in ["fparam_inv_std"]: + self.fparam_inv_std = value + elif key in ["aparam_avg"]: + self.aparam_avg = value + elif key in ["aparam_inv_std"]: + self.aparam_inv_std = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ["bias_atom_e"]: + return self.bias_atom_e + elif key in ["fparam_avg"]: + return self.fparam_avg + elif key in ["fparam_inv_std"]: + return self.fparam_inv_std + elif key in ["aparam_avg"]: + return self.aparam_avg + elif key in ["aparam_inv_std"]: + return self.aparam_inv_std + else: + raise KeyError(key) + + def serialize(self) -> dict: + """Serialize the fitting to dict.""" + return { + "var_name": self.var_name, + "ntypes": self.ntypes, + "dim_descrpt": self.dim_descrpt, + "dim_out": self.dim_out, + "neuron": self.neuron, + "resnet_dt": self.resnet_dt, + "numb_fparam": self.numb_fparam, + "numb_aparam": self.numb_aparam, + "activation_function": self.activation_function, + "precision": self.precision, + "distinguish_types": self.distinguish_types, + "nets": self.filter_layers.serialize(), + "@variables": { + "bias_atom_e": to_numpy_array(self.bias_atom_e), + "fparam_avg": to_numpy_array(self.fparam_avg), + "fparam_inv_std": to_numpy_array(self.fparam_inv_std), + "aparam_avg": to_numpy_array(self.aparam_avg), + "aparam_inv_std": to_numpy_array(self.aparam_inv_std), + }, + # "rcond": self.rcond , + # "tot_ener_zero": self.tot_ener_zero , + # "trainable": self.trainable , + # "atom_ener": self.atom_ener , + # "layer_name": self.layer_name , + # "use_aparam_as_mask": self.use_aparam_as_mask , + # "spin": self.spin , + ## NOTICE: not supported by far + "rcond": None, + "tot_ener_zero": False, + "trainable": True, + "atom_ener": None, + "layer_name": None, + "use_aparam_as_mask": False, + "spin": None, + } + + @classmethod + def deserialize(cls, data: dict) -> "InvarFitting": + data = copy.deepcopy(data) + variables = data.pop("@variables") + nets = data.pop("nets") + obj = cls(**data) + for kk in variables.keys(): + obj[kk] = to_torch_tensor(variables[kk]) + obj.filter_layers = NetworkCollection.deserialize(nets) + return obj + + def _extend_f_avg_std(self, xx: torch.Tensor, nb: int) -> torch.Tensor: + return torch.tile(xx.view([1, self.numb_fparam]), [nb, 1]) + + def _extend_a_avg_std(self, xx: torch.Tensor, nb: int, nloc: int) -> torch.Tensor: + return torch.tile(xx.view([1, 1, self.numb_aparam]), [nb, nloc, 1]) + def forward( self, - inputs: torch.Tensor, + descriptor: torch.Tensor, atype: torch.Tensor, - atype_tebd: Optional[torch.Tensor] = None, - rot_mat: Optional[torch.Tensor] = None, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, ): """Based on embedding net output, alculate total energy. Args: - - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.embedding_width]. + - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt]. - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. Returns ------- - `torch.Tensor`: Total energy with shape [nframes, natoms[0]]. """ + xx = descriptor + nf, nloc, nd = xx.shape + # NOTICE in tests/pt/test_model.py + # it happens that the user directly access the data memeber self.bias_atom_e + # and set it to a wrong shape! + self.bias_atom_e = self.bias_atom_e.view([self.ntypes, self.dim_out]) + # check input dim + if nd != self.dim_descrpt: + raise ValueError( + "get an input descriptor of dim {nd}," + "which is not consistent with {self.dim_descrpt}." + ) + # check fparam dim, concate to input descriptor + if self.numb_fparam > 0: + assert fparam is not None, "fparam should not be None" + assert self.fparam_avg is not None + assert self.fparam_inv_std is not None + if fparam.shape[-1] != self.numb_fparam: + raise ValueError( + "get an input fparam of dim {fparam.shape[-1]}, ", + "which is not consistent with {self.numb_fparam}.", + ) + nb, _ = fparam.shape + t_fparam_avg = self._extend_f_avg_std(self.fparam_avg, nb) + t_fparam_inv_std = self._extend_f_avg_std(self.fparam_inv_std, nb) + fparam = (fparam - t_fparam_avg) * t_fparam_inv_std + fparam = torch.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1]) + xx = torch.cat( + [xx, fparam], + dim=-1, + ) + # check aparam dim, concate to input descriptor + if self.numb_aparam > 0: + assert aparam is not None, "aparam should not be None" + assert self.aparam_avg is not None + assert self.aparam_inv_std is not None + if aparam.shape[-1] != self.numb_aparam: + raise ValueError( + "get an input aparam of dim {aparam.shape[-1]}, ", + "which is not consistent with {self.numb_aparam}.", + ) + nb, nloc, _ = aparam.shape + t_aparam_avg = self._extend_a_avg_std(self.aparam_avg, nb, nloc) + t_aparam_inv_std = self._extend_a_avg_std(self.aparam_inv_std, nb, nloc) + aparam = (aparam - t_aparam_avg) * t_aparam_inv_std + xx = torch.cat( + [xx, aparam], + dim=-1, + ) + outs = torch.zeros_like(atype).unsqueeze(-1) # jit assertion - if self.use_tebd: - if atype_tebd is not None: - inputs = torch.concat([inputs, atype_tebd], dim=-1) - atom_energy = self.filter_layers[0](inputs) + self.bias_atom_e[ - atype - ].unsqueeze(-1) - outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] + if self.old_impl: + outs = torch.zeros_like(atype).unsqueeze(-1) # jit assertion + assert self.filter_layers_old is not None + if self.use_tebd: + atom_energy = self.filter_layers_old[0](xx) + self.bias_atom_e[ + atype + ].unsqueeze(-1) + outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] + else: + for type_i, filter_layer in enumerate(self.filter_layers_old): + mask = atype == type_i + atom_energy = filter_layer(xx) + atom_energy = atom_energy + self.bias_atom_e[type_i] + atom_energy = atom_energy * mask.unsqueeze(-1) + outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] + return {"energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION)} else: - for type_i, filter_layer in enumerate(self.filter_layers): - mask = atype == type_i - atom_energy = filter_layer(inputs) - atom_energy = atom_energy + self.bias_atom_e[type_i] - atom_energy = atom_energy * mask.unsqueeze(-1) + if self.use_tebd: + atom_energy = ( + self.filter_layers.networks[0](xx) + self.bias_atom_e[atype] + ) outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] - return {"energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION)} + else: + for type_i, ll in enumerate(self.filter_layers.networks): + mask = (atype == type_i).unsqueeze(-1) + mask = torch.tile(mask, (1, 1, self.dim_out)) + atom_energy = ll(xx) + atom_energy = atom_energy + self.bias_atom_e[type_i] + atom_energy = atom_energy * mask + outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] + return {self.var_name: outs.to(env.GLOBAL_PT_FLOAT_PRECISION)} + + +@Fitting.register("ener") +class EnergyFittingNet(InvarFitting): + def __init__( + self, + ntypes: int, + embedding_width: int, + neuron: List[int] = [128, 128, 128], + bias_atom_e: Optional[torch.Tensor] = None, + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + use_tebd: bool = True, + **kwargs, + ): + super().__init__( + "energy", + ntypes, + embedding_width, + 1, + neuron=neuron, + bias_atom_e=bias_atom_e, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + activation_function=activation_function, + precision=precision, + use_tebd=use_tebd, + **kwargs, + ) @Fitting.register("direct_force") @@ -136,7 +414,7 @@ def __init__( """ super().__init__() self.ntypes = ntypes - self.embedding_width = embedding_width + self.dim_descrpt = embedding_width self.use_tebd = use_tebd self.out_dim = out_dim if not use_tebd: @@ -186,13 +464,12 @@ def forward( self, inputs: torch.Tensor, atype: torch.Tensor, - atype_tebd: Optional[torch.Tensor] = None, - rot_mat: Optional[torch.Tensor] = None, + gr: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, None]: """Based on embedding net output, alculate total energy. Args: - - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.embedding_width]. + - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt]. - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. Returns @@ -201,19 +478,19 @@ def forward( """ nframes, nloc, _ = inputs.size() if self.use_tebd: - if atype_tebd is not None: - inputs = torch.concat([inputs, atype_tebd], dim=-1) + # if atype_tebd is not None: + # inputs = torch.concat([inputs, atype_tebd], dim=-1) vec_out = self.filter_layers_dipole[0]( inputs ) # Shape is [nframes, nloc, m1] assert list(vec_out.size()) == [nframes, nloc, self.out_dim] # (nf x nloc) x 1 x od vec_out = vec_out.view(-1, 1, self.out_dim) - assert rot_mat is not None + assert gr is not None # (nf x nloc) x od x 3 - rot_mat = rot_mat.view(-1, self.out_dim, 3) + gr = gr.view(-1, self.out_dim, 3) vec_out = ( - torch.bmm(vec_out, rot_mat).squeeze(-2).view(nframes, nloc, 3) + torch.bmm(vec_out, gr).squeeze(-2).view(nframes, nloc, 3) ) # Shape is [nframes, nloc, 3] else: vec_out = torch.zeros_like(atype).unsqueeze(-1) # jit assertion diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 16e80f9c20..c6fb6b27e1 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -7,9 +7,6 @@ import numpy as np import torch -from deepmd.model_format import ( - FittingOutputDef, -) from deepmd.pt.model.task.task import ( TaskBaseMethod, ) @@ -61,17 +58,9 @@ def __new__(cls, *args, **kwargs): if fitting_type in Fitting.__plugins.plugins: cls = Fitting.__plugins.plugins[fitting_type] else: - raise RuntimeError("Unknown descriptor type: " + fitting_type) + raise RuntimeError("Unknown fitting type: " + fitting_type) return super().__new__(cls) - def output_def(self) -> FittingOutputDef: - """Definition for the task Output.""" - raise NotImplementedError - - def forward(self, **kwargs): - """Task Output.""" - raise NotImplementedError - def share_params(self, base_class, shared_level, resume=False): assert ( self.__class__ == base_class.__class__ diff --git a/deepmd/pt/model/task/task.py b/deepmd/pt/model/task/task.py index a9b2efeb9a..b2dc03e4bd 100644 --- a/deepmd/pt/model/task/task.py +++ b/deepmd/pt/model/task/task.py @@ -1,12 +1,18 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) + import torch +from deepmd.model_format import ( + FittingOutputDef, +) -class TaskBaseMethod(torch.nn.Module): - def __init__(self, **kwargs): - """Construct a basic head for different tasks.""" - super().__init__() - def forward(self, **kwargs): - """Task Output.""" +class TaskBaseMethod(torch.nn.Module, ABC): + @abstractmethod + def output_def(self) -> FittingOutputDef: + """Definition for the task Output.""" raise NotImplementedError diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py index 780dbf7e62..e83e12f608 100644 --- a/deepmd/pt/utils/utils.py +++ b/deepmd/pt/utils/utils.py @@ -4,9 +4,17 @@ Optional, ) +import numpy as np import torch import torch.nn.functional as F +from deepmd.model_format.common import PRECISION_DICT as NP_PRECISION_DICT + +from .env import ( + DEVICE, +) +from .env import PRECISION_DICT as PT_PRECISION_DICT + def get_activation_fn(activation: str) -> Callable: """Returns the activation function corresponding to `activation`.""" @@ -41,3 +49,35 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x else: raise RuntimeError(f"activation function {self.activation} not supported") + + +def to_numpy_array( + xx: torch.Tensor, +) -> np.ndarray: + if xx is None: + return None + assert xx is not None + # Create a reverse mapping of PT_PRECISION_DICT + reverse_precision_dict = {v: k for k, v in PT_PRECISION_DICT.items()} + # Use the reverse mapping to find keys with the desired value + prec = reverse_precision_dict.get(xx.dtype, None) + prec = NP_PRECISION_DICT.get(prec, None) + if prec is None: + raise ValueError(f"unknown precision {xx.dtype}") + return xx.detach().cpu().numpy().astype(prec) + + +def to_torch_tensor( + xx: np.ndarray, +) -> torch.Tensor: + if xx is None: + return None + assert xx is not None + # Create a reverse mapping of NP_PRECISION_DICT + reverse_precision_dict = {v: k for k, v in NP_PRECISION_DICT.items()} + # Use the reverse mapping to find keys with the desired value + prec = reverse_precision_dict.get(type(xx.flat[0]), None) + prec = PT_PRECISION_DICT.get(prec, None) + if prec is None: + raise ValueError(f"unknown precision {xx.dtype}") + return torch.tensor(xx, dtype=prec, device=DEVICE) diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py index da03631689..eada2774d3 100644 --- a/deepmd/tf/env.py +++ b/deepmd/tf/env.py @@ -472,6 +472,11 @@ def _get_package_constants( GLOBAL_CONFIG = _get_package_constants() +if GLOBAL_CONFIG["enable_tensorflow"] == "0": + raise RuntimeError( + "TensorFlow backend is not built. To enable it, " + "set the environmental variable DP_ENABLE_TENSORFLOW=1." + ) MODEL_VERSION = GLOBAL_CONFIG["model_version"] TF_VERSION = GLOBAL_CONFIG["tf_version"] TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"]) diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md index ae1509f2ca..389cc78c9f 100644 --- a/doc/install/install-from-source.md +++ b/doc/install/install-from-source.md @@ -90,7 +90,17 @@ Check the compiler version on your machine gcc --version ``` -The compiler GCC 4.8 or later is supported in the DeePMD-kit. Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`. +The compiler GCC 4.8 or later is supported in the DeePMD-kit. + +::::{tab-set} + +:::{tab-item} TensorFlow {{ tensorflow_icon }} + +Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`. + +::: + +:::: Execute ```bash @@ -105,7 +115,8 @@ One may set the following environment variables before executing `pip`: | DP_VARIANT | `cpu`, `cuda`, `rocm` | `cpu` | Build CPU variant or GPU variant with CUDA or ROCM support. | | CUDAToolkit_ROOT | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. | | ROCM_ROOT | Path | Detected automatically | The path to the ROCM toolkit directory. | -| TENSORFLOW_ROOT | Path | Detected automatically | The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.| +| DP_ENABLE_TENSORFLOW | 0, 1 | 1 | {{ tensorflow_icon }} Enable the TensorFlow backend. +| TENSORFLOW_ROOT | Path | Detected automatically | {{ tensorflow_icon }} The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.| | DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1 | 0 | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. | | CMAKE_ARGS | str | - | Additional CMake arguments | | <LANG>FLAGS (``=`CXX`, `CUDA` or `HIP`) | str | - | Default compilation flags to be used when compiling `` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). | diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index c273bc9263..d6ee3d0958 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -154,7 +154,22 @@ if(ENABLE_TENSORFLOW AND NOT DEEPMD_C_ROOT) endif() if(ENABLE_PYTORCH AND NOT DEEPMD_C_ROOT) find_package(Torch REQUIRED) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") + string(REGEX MATCH "_GLIBCXX_USE_CXX11_ABI=([0-9]+)" CXXABI_PT_MATCH + ${TORCH_CXX_FLAGS}) + if(CXXABI_PT_MATCH) + message(STATUS "PyTorch CXX11 ABI: ${CMAKE_MATCH_1}") + if(DEFINED OP_CXX_ABI) + if(NOT ${CMAKE_MATCH_1} EQUAL ${OP_CXX_ABI}) + message( + FATAL_ERROR + "PyTorch CXX11 ABI mismatch TensorFlow: ${CMAKE_MATCH_1} != ${OP_CXX_ABI}" + ) + endif() + else() + set(OP_CXX_ABI ${CMAKE_MATCH_1}) + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}) + endif() + endif() endif() # log enabled backends if(NOT DEEPMD_C_ROOT) @@ -165,7 +180,9 @@ if(NOT DEEPMD_C_ROOT) if(ENABLE_PYTORCH) message(STATUS "- PyTorch") endif() - if(NOT ENABLE_TENSORFLOW AND NOT ENABLE_PYTORCH) + if(NOT ENABLE_TENSORFLOW + AND NOT ENABLE_PYTORCH + AND NOT BUILD_PY_IF) message(FATAL_ERROR "No backend is enabled.") endif() endif() diff --git a/source/config/CMakeLists.txt b/source/config/CMakeLists.txt index 5473b91f29..b1ce17566f 100644 --- a/source/config/CMakeLists.txt +++ b/source/config/CMakeLists.txt @@ -1,5 +1,19 @@ # config +# cmake will treat true, false, on, off, 1, 0 as booleans we hope an easy way to +# check it +if(ENABLE_TENSORFLOW) + set(ENABLE_TENSORFLOW 1) +else() + set(ENABLE_TENSORFLOW 0) +endif() + +if(ENABLE_PYTORCH) + set(ENABLE_PYTORCH 1) +else() + set(ENABLE_PYTORCH 0) +endif() + configure_file("run_config.ini" "${CMAKE_CURRENT_BINARY_DIR}/run_config.ini" @ONLY) diff --git a/source/config/run_config.ini b/source/config/run_config.ini index 3f0a7a33a8..11f4100e61 100644 --- a/source/config/run_config.ini +++ b/source/config/run_config.ini @@ -4,6 +4,8 @@ GIT_SUMM = @GIT_SUMM@ GIT_HASH = @GIT_HASH@ GIT_DATE = @GIT_DATE@ GIT_BRANCH = @GIT_BRANCH@ +ENABLE_TENSORFLOW = @ENABLE_TENSORFLOW@ +ENABLE_PYTORCH = @ENABLE_PYTORCH@ TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@ TF_LIBS = @TensorFlow_LIBRARY@ TF_VERSION = @TENSORFLOW_VERSION@ diff --git a/source/lib/src/gpu/CMakeLists.txt b/source/lib/src/gpu/CMakeLists.txt index 3bd24cc620..804e1c0506 100644 --- a/source/lib/src/gpu/CMakeLists.txt +++ b/source/lib/src/gpu/CMakeLists.txt @@ -10,8 +10,10 @@ if(USE_CUDA_TOOLKIT) endif() enable_language(CUDA) set(CMAKE_CUDA_STANDARD 11) - add_compile_definitions( - "$<$:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>") + if(DEFINED OP_CXX_ABI) + add_compile_definitions( + "$<$:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>") + endif() find_package(CUDAToolkit REQUIRED) diff --git a/source/tests/common/test_model_format_utils.py b/source/tests/common/test_model_format_utils.py index da76c53ed9..cb85fd2bb2 100644 --- a/source/tests/common/test_model_format_utils.py +++ b/source/tests/common/test_model_format_utils.py @@ -13,6 +13,7 @@ EmbeddingNet, EnvMat, FittingNet, + InvarFitting, NativeLayer, NativeNet, NetworkCollection, @@ -369,3 +370,123 @@ def test_self_consistency( mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist) for ii in [0, 1, 4]: np.testing.assert_allclose(mm0[ii], mm1[ii]) + + +class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist): + def setUp(self): + TestCaseSingleFrameWithNlist.setUp(self) + + def test_self_consistency( + self, + ): + rng = np.random.default_rng() + nf, nloc, nnei = self.nlist.shape + ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel) + dd = ds.call(self.coord_ext, self.atype_ext, self.nlist) + atype = self.atype_ext[:, :nloc] + + for ( + distinguish_types, + od, + nfp, + nap, + ) in itertools.product( + [True, False], + [1, 2], + [0, 3], + [0, 4], + ): + ifn0 = InvarFitting( + "energy", + self.nt, + ds.dim_out, + od, + numb_fparam=nfp, + numb_aparam=nap, + distinguish_types=distinguish_types, + ) + ifn1 = InvarFitting.deserialize(ifn0.serialize()) + if nfp > 0: + ifp = rng.normal(size=(self.nf, nfp)) + else: + ifp = None + if nap > 0: + iap = rng.normal(size=(self.nf, self.nloc, nap)) + else: + iap = None + ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap) + ret1 = ifn1(dd[0], atype, fparam=ifp, aparam=iap) + np.testing.assert_allclose(ret0["energy"], ret1["energy"]) + + def test_self_exception( + self, + ): + rng = np.random.default_rng() + nf, nloc, nnei = self.nlist.shape + ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel) + dd = ds.call(self.coord_ext, self.atype_ext, self.nlist) + atype = self.atype_ext[:, :nloc] + + for ( + distinguish_types, + od, + nfp, + nap, + ) in itertools.product( + [True, False], + [1, 2], + [0, 3], + [0, 4], + ): + ifn0 = InvarFitting( + "energy", + self.nt, + ds.dim_out, + od, + numb_fparam=nfp, + numb_aparam=nap, + distinguish_types=distinguish_types, + ) + + if nfp > 0: + ifp = rng.normal(size=(self.nf, nfp)) + else: + ifp = None + if nap > 0: + iap = rng.normal(size=(self.nf, self.nloc, nap)) + else: + iap = None + with self.assertRaises(ValueError) as context: + ret0 = ifn0(dd[0][:, :, :-2], atype, fparam=ifp, aparam=iap) + self.assertIn("input descriptor", context.exception) + + if nfp > 0: + ifp = rng.normal(size=(self.nf, nfp - 1)) + with self.assertRaises(ValueError) as context: + ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap) + self.assertIn("input fparam", context.exception) + + if nap > 0: + iap = rng.normal(size=(self.nf, self.nloc, nap - 1)) + with self.assertRaises(ValueError) as context: + ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap) + self.assertIn("input aparam", context.exception) + + def test_get_set(self): + ifn0 = InvarFitting( + "energy", + self.nt, + 3, + 1, + ) + rng = np.random.default_rng() + foo = rng.normal([3, 4]) + for ii in [ + "bias_atom_e", + "fparam_avg", + "fparam_inv_std", + "aparam_avg", + "aparam_inv_std", + ]: + ifn0[ii] = foo + np.testing.assert_allclose(foo, ifn0[ii]) diff --git a/source/tests/pt/test_ener_fitting.py b/source/tests/pt/test_ener_fitting.py new file mode 100644 index 0000000000..eece8447df --- /dev/null +++ b/source/tests/pt/test_ener_fitting.py @@ -0,0 +1,181 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import itertools +import unittest + +import numpy as np +import torch + +from deepmd.model_format import InvarFitting as DPInvarFitting +from deepmd.pt.model.descriptor.se_a import ( + DescrptSeA, +) +from deepmd.pt.model.task.ener import ( + EnergyFittingNet, + InvarFitting, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.utils import ( + to_numpy_array, +) + +from .test_env_mat import ( + TestCaseSingleFrameWithNlist, +) + +dtype = env.GLOBAL_PT_FLOAT_PRECISION + + +class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist): + def setUp(self): + TestCaseSingleFrameWithNlist.setUp(self) + + def test_consistency( + self, + ): + rng = np.random.default_rng() + nf, nloc, nnei = self.nlist.shape + dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE) + rd0, _, _, _, _ = dd0( + torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE), + torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE), + torch.tensor(self.nlist, dtype=int, device=env.DEVICE), + ) + atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE) + + for od, distinguish_types, nfp, nap in itertools.product( + [1, 3], + [True, False], + [0, 3], + [0, 4], + ): + ft0 = InvarFitting( + "foo", + self.nt, + dd0.dim_out, + od, + numb_fparam=nfp, + numb_aparam=nap, + use_tebd=(not distinguish_types), + ).to(env.DEVICE) + ft1 = DPInvarFitting.deserialize(ft0.serialize()) + ft2 = InvarFitting.deserialize(ft0.serialize()) + + if nfp > 0: + ifp = torch.tensor( + rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE + ) + else: + ifp = None + if nap > 0: + iap = torch.tensor( + rng.normal(size=(self.nf, self.nloc, nap)), + dtype=dtype, + device=env.DEVICE, + ) + else: + iap = None + + ret0 = ft0(rd0, atype, fparam=ifp, aparam=iap) + ret1 = ft1( + rd0.detach().cpu().numpy(), + atype.detach().cpu().numpy(), + fparam=to_numpy_array(ifp), + aparam=to_numpy_array(iap), + ) + ret2 = ft2(rd0, atype, fparam=ifp, aparam=iap) + np.testing.assert_allclose( + to_numpy_array(ret0["foo"]), + ret1["foo"], + ) + np.testing.assert_allclose( + to_numpy_array(ret0["foo"]), + to_numpy_array(ret2["foo"]), + ) + + def test_new_old( + self, + ): + rng = np.random.default_rng() + nf, nloc, nnei = self.nlist.shape + dd = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE) + rd0, _, _, _, _ = dd( + torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE), + torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE), + torch.tensor(self.nlist, dtype=int, device=env.DEVICE), + ) + atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE) + + od = 1 + for distinguish_types in itertools.product( + [True, False], + ): + ft0 = EnergyFittingNet( + self.nt, + dd.dim_out, + distinguish_types=distinguish_types, + ).to(env.DEVICE) + ft1 = EnergyFittingNet( + self.nt, + dd.dim_out, + distinguish_types=distinguish_types, + old_impl=True, + ).to(env.DEVICE) + dd0 = ft0.state_dict() + dd1 = ft1.state_dict() + for kk, vv in dd1.items(): + new_kk = kk + new_kk = new_kk.replace("filter_layers_old", "filter_layers.networks") + new_kk = new_kk.replace("deep_layers", "layers") + new_kk = new_kk.replace("final_layer", "layers.3") + dd1[kk] = dd0[new_kk] + if kk.split(".")[-1] in ["idt", "bias"]: + dd1[kk] = dd1[kk].unsqueeze(0) + dd1["bias_atom_e"] = dd0["bias_atom_e"] + ft1.load_state_dict(dd1) + ret0 = ft0(rd0, atype) + ret1 = ft1(rd0, atype) + np.testing.assert_allclose( + to_numpy_array(ret0["energy"]), + to_numpy_array(ret1["energy"]), + ) + + def test_jit( + self, + ): + for od, distinguish_types, nfp, nap in itertools.product( + [1, 3], + [True, False], + [0, 3], + [0, 4], + ): + ft0 = InvarFitting( + "foo", + self.nt, + 9, + od, + numb_fparam=nfp, + numb_aparam=nap, + use_tebd=(not distinguish_types), + ).to(env.DEVICE) + torch.jit.script(ft0) + + def test_get_set(self): + ifn0 = InvarFitting( + "energy", + self.nt, + 3, + 1, + ) + rng = np.random.default_rng() + foo = rng.normal([3, 4]) + for ii in [ + "bias_atom_e", + "fparam_avg", + "fparam_inv_std", + "aparam_avg", + "aparam_inv_std", + ]: + ifn0[ii] = torch.tensor(foo, dtype=dtype, device=env.DEVICE) + np.testing.assert_allclose(foo, ifn0[ii].detach().cpu().numpy()) diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py index 3feb4f4739..ed2c428de5 100644 --- a/source/tests/pt/test_fitting_net.py +++ b/source/tests/pt/test_fitting_net.py @@ -102,25 +102,25 @@ def test_consistency(self): my_fn = EnergyFittingNet( self.ntypes, self.embedding_width, - self.n_neuron, - self.dp_fn.bias_atom_e, - use_tebd=False, + neuron=self.n_neuron, + bias_atom_e=self.dp_fn.bias_atom_e, + distinguish_types=True, ) for name, param in my_fn.named_parameters(): - matched = re.match("filter_layers\.(\d).deep_layers\.(\d)\.([a-z]+)", name) + matched = re.match( + "filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name + ) key = None if matched: + if int(matched.group(2)) == len(self.n_neuron): + layer_id = -1 + else: + layer_id = matched.group(2) key = gen_key( type_id=matched.group(1), - layer_id=matched.group(2), + layer_id=layer_id, w_or_b=matched.group(3), ) - else: - matched = re.match("filter_layers\.(\d).final_layer\.([a-z]+)", name) - if matched: - key = gen_key( - type_id=matched.group(1), layer_id=-1, w_or_b=matched.group(2) - ) assert key is not None var = values[key] with torch.no_grad(): @@ -132,7 +132,7 @@ def test_consistency(self): ret = my_fn(embedding, atype) my_energy = ret["energy"] my_energy = my_energy.detach() - self.assertTrue(np.allclose(dp_energy, my_energy.numpy().reshape([-1]))) + np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1])) if __name__ == "__main__": diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py index 5bbbc9e352..c6595e6471 100644 --- a/source/tests/pt/test_model.py +++ b/source/tests/pt/test_model.py @@ -53,23 +53,24 @@ VariableState = collections.namedtuple("VariableState", ["value", "gradient"]) -def torch2tf(torch_name): +def torch2tf(torch_name, last_layer_id=None): fields = torch_name.split(".") offset = int(fields[2] == "networks") element_id = int(fields[2 + offset]) if fields[0] == "descriptor": layer_id = int(fields[4 + offset]) + 1 weight_type = fields[5 + offset] - return "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id) - elif fields[3] == "deep_layers": - layer_id = int(fields[4]) - weight_type = fields[5] - return "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type) - elif fields[3] == "final_layer": - weight_type = fields[4] - return "final_layer_type_%d/%s:0" % (element_id, weight_type) + ret = "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id) + elif fields[0] == "fitting_net": + layer_id = int(fields[4 + offset]) + weight_type = fields[5 + offset] + if layer_id != last_layer_id: + ret = "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type) + else: + ret = "final_layer_type_%d/%s:0" % (element_id, weight_type) else: raise RuntimeError("Unexpected parameter name: %s" % torch_name) + return ret class DpTrainer: @@ -290,7 +291,7 @@ def test_consistency(self): "neuron": self.filter_neuron, "axis_neuron": self.axis_neuron, }, - "fitting_net": {"neuron": self.n_neuron}, + "fitting_net": {"neuron": self.n_neuron, "distinguish_types": True}, "data_stat_nbatch": self.data_stat_nbatch, "type_map": self.type_map, }, @@ -323,7 +324,7 @@ def test_consistency(self): # Keep parameter value consistency between 2 implentations for name, param in my_model.named_parameters(): name = name.replace("sea.", "") - var_name = torch2tf(name) + var_name = torch2tf(name, last_layer_id=len(self.n_neuron)) var = vs_dict[var_name].value with torch.no_grad(): src = torch.from_numpy(var) @@ -404,7 +405,7 @@ def step(step_id): for name, param in my_model.named_parameters(): name = name.replace("sea.", "") - var_name = torch2tf(name) + var_name = torch2tf(name, last_layer_id=len(self.n_neuron)) var_grad = vs_dict[var_name].gradient param_grad = param.grad.cpu() var_grad = torch.tensor(var_grad) diff --git a/source/tests/pt/test_se_e2_a.py b/source/tests/pt/test_se_e2_a.py index c0a106cb16..0da80ea1ea 100644 --- a/source/tests/pt/test_se_e2_a.py +++ b/source/tests/pt/test_se_e2_a.py @@ -25,6 +25,9 @@ PRECISION_DICT, ) +from .test_env_mat import ( + TestCaseSingleFrameWithNlist, +) from .test_mlp import ( get_tols, ) @@ -32,36 +35,6 @@ dtype = env.GLOBAL_PT_FLOAT_PRECISION -class TestCaseSingleFrameWithNlist: - def setUp(self): - # nloc == 3, nall == 4 - self.nloc = 3 - self.nall = 4 - self.nf, self.nt = 1, 2 - self.coord_ext = np.array( - [ - [0, 0, 0], - [0, 1, 0], - [0, 0, 1], - [0, -2, 0], - ], - dtype=np.float64, - ).reshape([1, self.nall * 3]) - self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall]) - # sel = [5, 2] - self.sel = [5, 2] - self.nlist = np.array( - [ - [1, 3, -1, -1, -1, 2, -1], - [0, -1, -1, -1, -1, 2, -1], - [0, 1, -1, -1, -1, 0, -1], - ], - dtype=int, - ).reshape([1, self.nloc, sum(self.sel)]) - self.rcut = 0.4 - self.rcut_smth = 2.2 - - # to be merged with the tf test case @unittest.skipIf(not support_se_e2_a, "EnvMat not supported") class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist): diff --git a/source/tests/pt/test_utils.py b/source/tests/pt/test_utils.py new file mode 100644 index 0000000000..9c9a9479ad --- /dev/null +++ b/source/tests/pt/test_utils.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import unittest + +import numpy as np +import torch + +from deepmd.pt.utils.utils import ( + to_numpy_array, + to_torch_tensor, +) + + +class TestCvt(unittest.TestCase): + def test_to_numpy(self): + rng = np.random.default_rng() + foo = rng.normal([3, 4]) + for ptp, npp in zip( + [torch.float16, torch.float32, torch.float64], + [np.float16, np.float32, np.float64], + ): + foo = foo.astype(npp) + bar = to_torch_tensor(foo) + self.assertEqual(bar.dtype, ptp) + onk = to_numpy_array(bar) + self.assertEqual(onk.dtype, npp) + with self.assertRaises(ValueError) as ee: + foo = foo.astype(np.int32) + bar = to_torch_tensor(foo) + with self.assertRaises(ValueError) as ee: + bar = to_torch_tensor(foo) + bar = to_numpy_array(bar.int()) From 4cd82586fee0b47a26bfed7471d97f9ae87dea31 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:28:37 +0800 Subject: [PATCH 05/10] Revert "Devel update (#30)" This reverts commit cb4cc67c56e648ea04e462e649fad98f25fd85d4. --- backend/find_tensorflow.py | 6 - backend/read_env.py | 24 +- deepmd/model_format/__init__.py | 4 - deepmd/model_format/fitting.py | 355 ----------------- deepmd/model_format/network.py | 2 - deepmd/model_format/se_e2_a.py | 10 +- deepmd/pt/model/model/dp_atomic_model.py | 10 +- deepmd/pt/model/network/mlp.py | 7 +- deepmd/pt/model/task/ener.py | 373 +++--------------- deepmd/pt/model/task/fitting.py | 13 +- deepmd/pt/model/task/task.py | 18 +- deepmd/pt/utils/utils.py | 40 -- deepmd/tf/env.py | 5 - doc/install/install-from-source.md | 15 +- source/CMakeLists.txt | 21 +- source/config/CMakeLists.txt | 14 - source/config/run_config.ini | 2 - source/lib/src/gpu/CMakeLists.txt | 6 +- .../tests/common/test_model_format_utils.py | 121 ------ source/tests/pt/test_ener_fitting.py | 181 --------- source/tests/pt/test_fitting_net.py | 24 +- source/tests/pt/test_model.py | 25 +- source/tests/pt/test_se_e2_a.py | 33 +- source/tests/pt/test_utils.py | 31 -- 24 files changed, 143 insertions(+), 1197 deletions(-) delete mode 100644 deepmd/model_format/fitting.py delete mode 100644 source/tests/pt/test_ener_fitting.py delete mode 100644 source/tests/pt/test_utils.py diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 083e2673f7..32ae62469c 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -127,12 +127,6 @@ def get_tf_requirement(tf_version: str = "") -> dict: dict TensorFlow requirement, including cpu and gpu. """ - if tf_version is None: - return { - "cpu": [], - "gpu": [], - "mpi": [], - } if tf_version == "": tf_version = os.environ.get("TENSORFLOW_VERSION", "") diff --git a/backend/read_env.py b/backend/read_env.py index bee5d607e3..2cf433181a 100644 --- a/backend/read_env.py +++ b/backend/read_env.py @@ -80,26 +80,16 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: cmake_args.append("-DENABLE_IPI:BOOL=TRUE") extra_scripts["dp_ipi"] = "deepmd.tf.entrypoints.ipi:dp_ipi" - if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "1": - tf_install_dir, _ = find_tensorflow() - tf_version = get_tf_version(tf_install_dir) - if tf_version == "" or Version(tf_version) >= Version("2.12"): - find_libpython_requires = [] - else: - find_libpython_requires = ["find_libpython"] - cmake_args.extend( - [ - "-DENABLE_TENSORFLOW=ON", - f"-DTENSORFLOW_VERSION={tf_version}", - f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", - ] - ) - else: + tf_install_dir, _ = find_tensorflow() + tf_version = get_tf_version(tf_install_dir) + if tf_version == "" or Version(tf_version) >= Version("2.12"): find_libpython_requires = [] - cmake_args.append("-DENABLE_TENSORFLOW=OFF") - tf_version = None + else: + find_libpython_requires = ["find_libpython"] + cmake_args.append(f"-DTENSORFLOW_VERSION={tf_version}") cmake_args = [ + f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", "-DBUILD_PY_IF:BOOL=TRUE", *cmake_args, ] diff --git a/deepmd/model_format/__init__.py b/deepmd/model_format/__init__.py index e15f73758e..253bca3507 100644 --- a/deepmd/model_format/__init__.py +++ b/deepmd/model_format/__init__.py @@ -7,9 +7,6 @@ from .env_mat import ( EnvMat, ) -from .fitting import ( - InvarFitting, -) from .network import ( EmbeddingNet, FittingNet, @@ -37,7 +34,6 @@ ) __all__ = [ - "InvarFitting", "DescrptSeA", "EnvMat", "make_multilayer_network", diff --git a/deepmd/model_format/fitting.py b/deepmd/model_format/fitting.py deleted file mode 100644 index 904fb42b76..0000000000 --- a/deepmd/model_format/fitting.py +++ /dev/null @@ -1,355 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import copy -from typing import ( - Any, - List, - Optional, -) - -import numpy as np - -from .common import ( - DEFAULT_PRECISION, - NativeOP, -) -from .network import ( - FittingNet, - NetworkCollection, -) -from .output_def import ( - FittingOutputDef, - OutputVariableDef, - fitting_check_output, -) - - -@fitting_check_output -class InvarFitting(NativeOP): - r"""Fitting the energy (or a porperty of `dim_out`) of the system. The force and the virial can also be trained. - - Lets take the energy fitting task as an example. - The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`: - - .. math:: - E(\mathcal{D}) = \mathcal{L}^{(n)} \circ \mathcal{L}^{(n-1)} - \circ \cdots \circ \mathcal{L}^{(1)} \circ \mathcal{L}^{(0)} - - The first :math:`n` hidden layers :math:`\mathcal{L}^{(0)}, \cdots, \mathcal{L}^{(n-1)}` are given by - - .. math:: - \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})= - \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b}) - - where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}` - is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and - :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively, - both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}` - is the activation function. - - The output layer :math:`\mathcal{L}^{(n)}` is given by - - .. math:: - \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})= - \mathbf{x}^T\mathbf{w}+\mathbf{b} - - where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}` - is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and - :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively, - both of which are trainable if `trainable[n]` is `True`. - - Parameters - ---------- - var_name - The name of the output variable. - ntypes - The number of atom types. - dim_descrpt - The dimension of the input descriptor. - dim_out - The dimension of the output fit property. - neuron - Number of neurons :math:`N` in each hidden layer of the fitting net - resnet_dt - Time-step `dt` in the resnet construction: - :math:`y = x + dt * \phi (Wx + b)` - numb_fparam - Number of frame parameter - numb_aparam - Number of atomic parameter - rcond - The condition number for the regression of atomic energy. - tot_ener_zero - Force the total energy to zero. Useful for the charge fitting. - trainable - If the weights of fitting net are trainable. - Suppose that we have :math:`N_l` hidden layers in the fitting net, - this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. - atom_ener - Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set. - activation_function - The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN| - precision - The precision of the embedding net parameters. Supported options are |PRECISION| - layer_name : list[Optional[str]], optional - The name of the each layer. If two layers, either in the same fitting or different fittings, - have the same name, they will share the same neural network parameters. - use_aparam_as_mask: bool, optional - If True, the atomic parameters will be used as a mask that determines the atom is real/virtual. - And the aparam will not be used as the atomic parameters for embedding. - distinguish_types - Different atomic types uses different fitting net. - - """ - - def __init__( - self, - var_name: str, - ntypes: int, - dim_descrpt: int, - dim_out: int, - neuron: List[int] = [120, 120, 120], - resnet_dt: bool = True, - numb_fparam: int = 0, - numb_aparam: int = 0, - rcond: Optional[float] = None, - tot_ener_zero: bool = False, - trainable: Optional[List[bool]] = None, - atom_ener: Optional[List[float]] = None, - activation_function: str = "tanh", - precision: str = DEFAULT_PRECISION, - layer_name: Optional[List[Optional[str]]] = None, - use_aparam_as_mask: bool = False, - spin: Any = None, - distinguish_types: bool = False, - ): - # seed, uniform_seed are not included - if tot_ener_zero: - raise NotImplementedError("tot_ener_zero is not implemented") - if spin is not None: - raise NotImplementedError("spin is not implemented") - if use_aparam_as_mask: - raise NotImplementedError("use_aparam_as_mask is not implemented") - if use_aparam_as_mask: - raise NotImplementedError("use_aparam_as_mask is not implemented") - if layer_name is not None: - raise NotImplementedError("layer_name is not implemented") - if atom_ener is not None: - raise NotImplementedError("atom_ener is not implemented") - - self.var_name = var_name - self.ntypes = ntypes - self.dim_descrpt = dim_descrpt - self.dim_out = dim_out - self.neuron = neuron - self.resnet_dt = resnet_dt - self.numb_fparam = numb_fparam - self.numb_aparam = numb_aparam - self.rcond = rcond - self.tot_ener_zero = tot_ener_zero - self.trainable = trainable - self.atom_ener = atom_ener - self.activation_function = activation_function - self.precision = precision - self.layer_name = layer_name - self.use_aparam_as_mask = use_aparam_as_mask - self.spin = spin - self.distinguish_types = distinguish_types - if self.spin is not None: - raise NotImplementedError("spin is not supported") - - # init constants - self.bias_atom_e = np.zeros([self.ntypes, self.dim_out]) - if self.numb_fparam > 0: - self.fparam_avg = np.zeros(self.numb_fparam) - self.fparam_inv_std = np.ones(self.numb_fparam) - else: - self.fparam_avg, self.fparam_inv_std = None, None - if self.numb_aparam > 0: - self.aparam_avg = np.zeros(self.numb_aparam) - self.aparam_inv_std = np.ones(self.numb_aparam) - else: - self.aparam_avg, self.aparam_inv_std = None, None - # init networks - in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam - out_dim = self.dim_out - self.nets = NetworkCollection( - 1 if self.distinguish_types else 0, - self.ntypes, - network_type="fitting_network", - networks=[ - FittingNet( - in_dim, - out_dim, - self.neuron, - self.activation_function, - self.resnet_dt, - self.precision, - bias_out=True, - ) - for ii in range(self.ntypes if self.distinguish_types else 1) - ], - ) - - def output_def(self): - return FittingOutputDef( - [ - OutputVariableDef( - self.var_name, [self.dim_out], reduciable=True, differentiable=True - ), - ] - ) - - def __setitem__(self, key, value): - if key in ["bias_atom_e"]: - self.bias_atom_e = value - elif key in ["fparam_avg"]: - self.fparam_avg = value - elif key in ["fparam_inv_std"]: - self.fparam_inv_std = value - elif key in ["aparam_avg"]: - self.aparam_avg = value - elif key in ["aparam_inv_std"]: - self.aparam_inv_std = value - else: - raise KeyError(key) - - def __getitem__(self, key): - if key in ["bias_atom_e"]: - return self.bias_atom_e - elif key in ["fparam_avg"]: - return self.fparam_avg - elif key in ["fparam_inv_std"]: - return self.fparam_inv_std - elif key in ["aparam_avg"]: - return self.aparam_avg - elif key in ["aparam_inv_std"]: - return self.aparam_inv_std - else: - raise KeyError(key) - - def serialize(self) -> dict: - """Serialize the fitting to dict.""" - return { - "var_name": self.var_name, - "ntypes": self.ntypes, - "dim_descrpt": self.dim_descrpt, - "dim_out": self.dim_out, - "neuron": self.neuron, - "resnet_dt": self.resnet_dt, - "numb_fparam": self.numb_fparam, - "numb_aparam": self.numb_aparam, - "rcond": self.rcond, - "activation_function": self.activation_function, - "precision": self.precision, - "distinguish_types": self.distinguish_types, - "nets": self.nets.serialize(), - "@variables": { - "bias_atom_e": self.bias_atom_e, - "fparam_avg": self.fparam_avg, - "fparam_inv_std": self.fparam_inv_std, - "aparam_avg": self.aparam_avg, - "aparam_inv_std": self.aparam_inv_std, - }, - # not supported - "tot_ener_zero": self.tot_ener_zero, - "trainable": self.trainable, - "atom_ener": self.atom_ener, - "layer_name": self.layer_name, - "use_aparam_as_mask": self.use_aparam_as_mask, - "spin": self.spin, - } - - @classmethod - def deserialize(cls, data: dict) -> "InvarFitting": - data = copy.deepcopy(data) - variables = data.pop("@variables") - nets = data.pop("nets") - obj = cls(**data) - for kk in variables.keys(): - obj[kk] = variables[kk] - obj.nets = NetworkCollection.deserialize(nets) - return obj - - def call( - self, - descriptor: np.array, - atype: np.array, - gr: Optional[np.array] = None, - g2: Optional[np.array] = None, - h2: Optional[np.array] = None, - fparam: Optional[np.array] = None, - aparam: Optional[np.array] = None, - ): - """Calculate the fitting. - - Parameters - ---------- - descriptor - input descriptor. shape: nf x nloc x nd - atype - the atom type. shape: nf x nloc - gr - The rotationally equivariant and permutationally invariant single particle - representation. shape: nf x nloc x ng x 3 - g2 - The rotationally invariant pair-partical representation. - shape: nf x nloc x nnei x ng - h2 - The rotationally equivariant pair-partical representation. - shape: nf x nloc x nnei x 3 - fparam - The frame parameter. shape: nf x nfp. nfp being `numb_fparam` - aparam - The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam` - - """ - nf, nloc, nd = descriptor.shape - # check input dim - if nd != self.dim_descrpt: - raise ValueError( - "get an input descriptor of dim {nd}," - "which is not consistent with {self.dim_descrpt}." - ) - xx = descriptor - # check fparam dim, concate to input descriptor - if self.numb_fparam > 0: - assert fparam is not None, "fparam should not be None" - if fparam.shape[-1] != self.numb_fparam: - raise ValueError( - "get an input fparam of dim {fparam.shape[-1]}, ", - "which is not consistent with {self.numb_fparam}.", - ) - fparam = (fparam - self.fparam_avg) * self.fparam_inv_std - fparam = np.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1]) - xx = np.concatenate( - [xx, fparam], - axis=-1, - ) - # check aparam dim, concate to input descriptor - if self.numb_aparam > 0: - assert aparam is not None, "aparam should not be None" - if aparam.shape[-1] != self.numb_aparam: - raise ValueError( - "get an input aparam of dim {aparam.shape[-1]}, ", - "which is not consistent with {self.numb_aparam}.", - ) - aparam = (aparam - self.aparam_avg) * self.aparam_inv_std - xx = np.concatenate( - [xx, aparam], - axis=-1, - ) - - # calcualte the prediction - if self.distinguish_types: - outs = np.zeros([nf, nloc, self.dim_out]) - for type_i in range(self.ntypes): - mask = np.tile( - (atype == type_i).reshape([nf, nloc, 1]), [1, 1, self.dim_out] - ) - atom_energy = self.nets[(type_i,)](xx) - atom_energy = atom_energy + self.bias_atom_e[type_i] - atom_energy = atom_energy * mask - outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] - else: - outs = self.nets[()](xx) + self.bias_atom_e[atype] - return {self.var_name: outs} diff --git a/deepmd/model_format/network.py b/deepmd/model_format/network.py index f2056c0b95..a327d990c9 100644 --- a/deepmd/model_format/network.py +++ b/deepmd/model_format/network.py @@ -161,8 +161,6 @@ def __init__( ) -> None: prec = PRECISION_DICT[precision.lower()] self.precision = precision - # only use_timestep when skip connection is established. - use_timestep = use_timestep and (num_out == num_in or num_out == num_in * 2) rng = np.random.default_rng() self.w = rng.normal(size=(num_in, num_out)).astype(prec) self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None diff --git a/deepmd/model_format/se_e2_a.py b/deepmd/model_format/se_e2_a.py index f179b10ac3..28751cad8d 100644 --- a/deepmd/model_format/se_e2_a.py +++ b/deepmd/model_format/se_e2_a.py @@ -171,8 +171,9 @@ def __init__( ) self.env_mat = EnvMat(self.rcut, self.rcut_smth) self.nnei = np.sum(self.sel) - self.davg = np.zeros([self.ntypes, self.nnei, 4]) - self.dstd = np.ones([self.ntypes, self.nnei, 4]) + self.nneix4 = self.nnei * 4 + self.davg = np.zeros([self.ntypes, self.nneix4]) + self.dstd = np.ones([self.ntypes, self.nneix4]) self.orig_sel = self.sel def __setitem__(self, key, value): @@ -191,11 +192,6 @@ def __getitem__(self, key): else: raise KeyError(key) - @property - def dim_out(self): - """Returns the output dimension of this descriptor.""" - return self.neuron[-1] * self.axis_neuron - def cal_g( self, ss, diff --git a/deepmd/pt/model/model/dp_atomic_model.py b/deepmd/pt/model/model/dp_atomic_model.py index a222c8e6f6..853eacb875 100644 --- a/deepmd/pt/model/model/dp_atomic_model.py +++ b/deepmd/pt/model/model/dp_atomic_model.py @@ -93,11 +93,11 @@ def __init__( ) fitting_net["type"] = fitting_net.get("type", "ener") - fitting_net["ntypes"] = self.descriptor.get_ntype() - if self.descriptor_type in ["se_e2_a"]: - fitting_net["distinguish_types"] = True + if self.descriptor_type not in ["se_e2_a"]: + fitting_net["ntypes"] = 1 else: - fitting_net["distinguish_types"] = False + fitting_net["ntypes"] = self.descriptor.get_ntype() + fitting_net["use_tebd"] = False fitting_net["embedding_width"] = self.descriptor.dim_out self.grad_force = "direct" not in fitting_net["type"] @@ -165,5 +165,5 @@ def forward_atomic( ) assert descriptor is not None # energy, force - fit_ret = self.fitting_net(descriptor, atype, gr=rot_mat) + fit_ret = self.fitting_net(descriptor, atype, atype_tebd=None, rot_mat=rot_mat) return fit_ret diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py index d76abd82f9..e3ac0e7bc2 100644 --- a/deepmd/pt/model/network/mlp.py +++ b/deepmd/pt/model/network/mlp.py @@ -56,10 +56,7 @@ def __init__( precision: str = DEFAULT_PRECISION, ): super().__init__() - # only use_timestep when skip connection is established. - self.use_timestep = use_timestep and ( - num_out == num_in or num_out == num_in * 2 - ) + self.use_timestep = use_timestep self.activate_name = activation_function self.activate = ActivationFn(self.activate_name) self.precision = precision @@ -210,7 +207,7 @@ class NetworkCollection(DPNetworkCollection, nn.Module): NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = { "network": MLP, "embedding_network": EmbeddingNet, - "fitting_network": FittingNet, + # "fitting_network": FittingNet, } def __init__(self, *args, **kwargs): diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py index e40a6bda44..03043e2fcb 100644 --- a/deepmd/pt/model/task/ener.py +++ b/deepmd/pt/model/task/ener.py @@ -1,13 +1,10 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -import copy import logging from typing import ( - List, Optional, Tuple, ) -import numpy as np import torch from deepmd.model_format import ( @@ -15,10 +12,6 @@ OutputVariableDef, fitting_check_output, ) -from deepmd.pt.model.network.mlp import ( - FittingNet, - NetworkCollection, -) from deepmd.pt.model.network.network import ( ResidualDeep, ) @@ -28,35 +21,19 @@ from deepmd.pt.utils import ( env, ) -from deepmd.pt.utils.env import ( - DEFAULT_PRECISION, - PRECISION_DICT, -) -from deepmd.pt.utils.utils import ( - to_numpy_array, - to_torch_tensor, -) - -dtype = env.GLOBAL_PT_FLOAT_PRECISION -device = env.DEVICE +@Fitting.register("ener") @fitting_check_output -class InvarFitting(Fitting): +class EnergyFittingNet(Fitting): def __init__( self, - var_name: str, - ntypes: int, - dim_descrpt: int, - dim_out: int, - neuron: List[int] = [128, 128, 128], - bias_atom_e: Optional[torch.Tensor] = None, - resnet_dt: bool = True, - numb_fparam: int = 0, - numb_aparam: int = 0, - activation_function: str = "tanh", - precision: str = DEFAULT_PRECISION, - distinguish_types: bool = False, + ntypes, + embedding_width, + neuron, + bias_atom_e, + resnet_dt=True, + use_tebd=True, **kwargs, ): """Construct a fitting net for energy. @@ -69,322 +46,67 @@ def __init__( - resnet_dt: Using time-step in the ResNet construction. """ super().__init__() - self.var_name = var_name self.ntypes = ntypes - self.dim_descrpt = dim_descrpt - self.dim_out = dim_out - self.neuron = neuron - self.distinguish_types = distinguish_types - self.use_tebd = not self.distinguish_types - self.resnet_dt = resnet_dt - self.numb_fparam = numb_fparam - self.numb_aparam = numb_aparam - self.activation_function = activation_function - self.precision = precision - self.prec = PRECISION_DICT[self.precision] - if bias_atom_e is None: - bias_atom_e = np.zeros([self.ntypes, self.dim_out]) - bias_atom_e = torch.tensor(bias_atom_e, dtype=self.prec, device=device) - bias_atom_e = bias_atom_e.view([self.ntypes, self.dim_out]) - if not self.use_tebd: - assert self.ntypes == bias_atom_e.shape[0], "Element count mismatches!" + self.embedding_width = embedding_width + self.use_tebd = use_tebd + if not use_tebd: + assert self.ntypes == len(bias_atom_e), "Element count mismatches!" + bias_atom_e = torch.tensor(bias_atom_e) self.register_buffer("bias_atom_e", bias_atom_e) - # init constants - if self.numb_fparam > 0: - self.register_buffer( - "fparam_avg", - torch.zeros(self.numb_fparam, dtype=self.prec, device=device), - ) - self.register_buffer( - "fparam_inv_std", - torch.ones(self.numb_fparam, dtype=self.prec, device=device), - ) - else: - self.fparam_avg, self.fparam_inv_std = None, None - if self.numb_aparam > 0: - self.register_buffer( - "aparam_avg", - torch.zeros(self.numb_aparam, dtype=self.prec, device=device), - ) - self.register_buffer( - "aparam_inv_std", - torch.ones(self.numb_aparam, dtype=self.prec, device=device), - ) - else: - self.aparam_avg, self.aparam_inv_std = None, None - - in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam - out_dim = 1 - self.old_impl = kwargs.get("old_impl", False) - if self.old_impl: - filter_layers = [] - for type_i in range(self.ntypes): - bias_type = 0.0 - one = ResidualDeep( - type_i, - self.dim_descrpt, - self.neuron, - bias_type, - resnet_dt=self.resnet_dt, - ) - filter_layers.append(one) - self.filter_layers_old = torch.nn.ModuleList(filter_layers) - self.filter_layers = None - else: - self.filter_layers = NetworkCollection( - 1 if self.distinguish_types else 0, - self.ntypes, - network_type="fitting_network", - networks=[ - FittingNet( - in_dim, - out_dim, - self.neuron, - self.activation_function, - self.resnet_dt, - self.precision, - bias_out=True, - ) - for ii in range(self.ntypes if self.distinguish_types else 1) - ], + filter_layers = [] + for type_i in range(self.ntypes): + bias_type = 0.0 + one = ResidualDeep( + type_i, embedding_width, neuron, bias_type, resnet_dt=resnet_dt ) - self.filter_layers_old = None + filter_layers.append(one) + self.filter_layers = torch.nn.ModuleList(filter_layers) - # very bad design... if "seed" in kwargs: logging.info("Set seed to %d in fitting net.", kwargs["seed"]) torch.manual_seed(kwargs["seed"]) - def output_def(self) -> FittingOutputDef: + def output_def(self): return FittingOutputDef( [ - OutputVariableDef( - self.var_name, [self.dim_out], reduciable=True, differentiable=True - ), + OutputVariableDef("energy", [1], reduciable=True, differentiable=True), ] ) - def __setitem__(self, key, value): - if key in ["bias_atom_e"]: - # correct bias_atom_e shape. user may provide stupid shape - self.bias_atom_e = value - elif key in ["fparam_avg"]: - self.fparam_avg = value - elif key in ["fparam_inv_std"]: - self.fparam_inv_std = value - elif key in ["aparam_avg"]: - self.aparam_avg = value - elif key in ["aparam_inv_std"]: - self.aparam_inv_std = value - else: - raise KeyError(key) - - def __getitem__(self, key): - if key in ["bias_atom_e"]: - return self.bias_atom_e - elif key in ["fparam_avg"]: - return self.fparam_avg - elif key in ["fparam_inv_std"]: - return self.fparam_inv_std - elif key in ["aparam_avg"]: - return self.aparam_avg - elif key in ["aparam_inv_std"]: - return self.aparam_inv_std - else: - raise KeyError(key) - - def serialize(self) -> dict: - """Serialize the fitting to dict.""" - return { - "var_name": self.var_name, - "ntypes": self.ntypes, - "dim_descrpt": self.dim_descrpt, - "dim_out": self.dim_out, - "neuron": self.neuron, - "resnet_dt": self.resnet_dt, - "numb_fparam": self.numb_fparam, - "numb_aparam": self.numb_aparam, - "activation_function": self.activation_function, - "precision": self.precision, - "distinguish_types": self.distinguish_types, - "nets": self.filter_layers.serialize(), - "@variables": { - "bias_atom_e": to_numpy_array(self.bias_atom_e), - "fparam_avg": to_numpy_array(self.fparam_avg), - "fparam_inv_std": to_numpy_array(self.fparam_inv_std), - "aparam_avg": to_numpy_array(self.aparam_avg), - "aparam_inv_std": to_numpy_array(self.aparam_inv_std), - }, - # "rcond": self.rcond , - # "tot_ener_zero": self.tot_ener_zero , - # "trainable": self.trainable , - # "atom_ener": self.atom_ener , - # "layer_name": self.layer_name , - # "use_aparam_as_mask": self.use_aparam_as_mask , - # "spin": self.spin , - ## NOTICE: not supported by far - "rcond": None, - "tot_ener_zero": False, - "trainable": True, - "atom_ener": None, - "layer_name": None, - "use_aparam_as_mask": False, - "spin": None, - } - - @classmethod - def deserialize(cls, data: dict) -> "InvarFitting": - data = copy.deepcopy(data) - variables = data.pop("@variables") - nets = data.pop("nets") - obj = cls(**data) - for kk in variables.keys(): - obj[kk] = to_torch_tensor(variables[kk]) - obj.filter_layers = NetworkCollection.deserialize(nets) - return obj - - def _extend_f_avg_std(self, xx: torch.Tensor, nb: int) -> torch.Tensor: - return torch.tile(xx.view([1, self.numb_fparam]), [nb, 1]) - - def _extend_a_avg_std(self, xx: torch.Tensor, nb: int, nloc: int) -> torch.Tensor: - return torch.tile(xx.view([1, 1, self.numb_aparam]), [nb, nloc, 1]) - def forward( self, - descriptor: torch.Tensor, + inputs: torch.Tensor, atype: torch.Tensor, - gr: Optional[torch.Tensor] = None, - g2: Optional[torch.Tensor] = None, - h2: Optional[torch.Tensor] = None, - fparam: Optional[torch.Tensor] = None, - aparam: Optional[torch.Tensor] = None, + atype_tebd: Optional[torch.Tensor] = None, + rot_mat: Optional[torch.Tensor] = None, ): """Based on embedding net output, alculate total energy. Args: - - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt]. + - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.embedding_width]. - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. Returns ------- - `torch.Tensor`: Total energy with shape [nframes, natoms[0]]. """ - xx = descriptor - nf, nloc, nd = xx.shape - # NOTICE in tests/pt/test_model.py - # it happens that the user directly access the data memeber self.bias_atom_e - # and set it to a wrong shape! - self.bias_atom_e = self.bias_atom_e.view([self.ntypes, self.dim_out]) - # check input dim - if nd != self.dim_descrpt: - raise ValueError( - "get an input descriptor of dim {nd}," - "which is not consistent with {self.dim_descrpt}." - ) - # check fparam dim, concate to input descriptor - if self.numb_fparam > 0: - assert fparam is not None, "fparam should not be None" - assert self.fparam_avg is not None - assert self.fparam_inv_std is not None - if fparam.shape[-1] != self.numb_fparam: - raise ValueError( - "get an input fparam of dim {fparam.shape[-1]}, ", - "which is not consistent with {self.numb_fparam}.", - ) - nb, _ = fparam.shape - t_fparam_avg = self._extend_f_avg_std(self.fparam_avg, nb) - t_fparam_inv_std = self._extend_f_avg_std(self.fparam_inv_std, nb) - fparam = (fparam - t_fparam_avg) * t_fparam_inv_std - fparam = torch.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1]) - xx = torch.cat( - [xx, fparam], - dim=-1, - ) - # check aparam dim, concate to input descriptor - if self.numb_aparam > 0: - assert aparam is not None, "aparam should not be None" - assert self.aparam_avg is not None - assert self.aparam_inv_std is not None - if aparam.shape[-1] != self.numb_aparam: - raise ValueError( - "get an input aparam of dim {aparam.shape[-1]}, ", - "which is not consistent with {self.numb_aparam}.", - ) - nb, nloc, _ = aparam.shape - t_aparam_avg = self._extend_a_avg_std(self.aparam_avg, nb, nloc) - t_aparam_inv_std = self._extend_a_avg_std(self.aparam_inv_std, nb, nloc) - aparam = (aparam - t_aparam_avg) * t_aparam_inv_std - xx = torch.cat( - [xx, aparam], - dim=-1, - ) - outs = torch.zeros_like(atype).unsqueeze(-1) # jit assertion - if self.old_impl: - outs = torch.zeros_like(atype).unsqueeze(-1) # jit assertion - assert self.filter_layers_old is not None - if self.use_tebd: - atom_energy = self.filter_layers_old[0](xx) + self.bias_atom_e[ - atype - ].unsqueeze(-1) - outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] - else: - for type_i, filter_layer in enumerate(self.filter_layers_old): - mask = atype == type_i - atom_energy = filter_layer(xx) - atom_energy = atom_energy + self.bias_atom_e[type_i] - atom_energy = atom_energy * mask.unsqueeze(-1) - outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] - return {"energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION)} + if self.use_tebd: + if atype_tebd is not None: + inputs = torch.concat([inputs, atype_tebd], dim=-1) + atom_energy = self.filter_layers[0](inputs) + self.bias_atom_e[ + atype + ].unsqueeze(-1) + outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] else: - if self.use_tebd: - atom_energy = ( - self.filter_layers.networks[0](xx) + self.bias_atom_e[atype] - ) + for type_i, filter_layer in enumerate(self.filter_layers): + mask = atype == type_i + atom_energy = filter_layer(inputs) + atom_energy = atom_energy + self.bias_atom_e[type_i] + atom_energy = atom_energy * mask.unsqueeze(-1) outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] - else: - for type_i, ll in enumerate(self.filter_layers.networks): - mask = (atype == type_i).unsqueeze(-1) - mask = torch.tile(mask, (1, 1, self.dim_out)) - atom_energy = ll(xx) - atom_energy = atom_energy + self.bias_atom_e[type_i] - atom_energy = atom_energy * mask - outs = outs + atom_energy # Shape is [nframes, natoms[0], 1] - return {self.var_name: outs.to(env.GLOBAL_PT_FLOAT_PRECISION)} - - -@Fitting.register("ener") -class EnergyFittingNet(InvarFitting): - def __init__( - self, - ntypes: int, - embedding_width: int, - neuron: List[int] = [128, 128, 128], - bias_atom_e: Optional[torch.Tensor] = None, - resnet_dt: bool = True, - numb_fparam: int = 0, - numb_aparam: int = 0, - activation_function: str = "tanh", - precision: str = DEFAULT_PRECISION, - use_tebd: bool = True, - **kwargs, - ): - super().__init__( - "energy", - ntypes, - embedding_width, - 1, - neuron=neuron, - bias_atom_e=bias_atom_e, - resnet_dt=resnet_dt, - numb_fparam=numb_fparam, - numb_aparam=numb_aparam, - activation_function=activation_function, - precision=precision, - use_tebd=use_tebd, - **kwargs, - ) + return {"energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION)} @Fitting.register("direct_force") @@ -414,7 +136,7 @@ def __init__( """ super().__init__() self.ntypes = ntypes - self.dim_descrpt = embedding_width + self.embedding_width = embedding_width self.use_tebd = use_tebd self.out_dim = out_dim if not use_tebd: @@ -464,12 +186,13 @@ def forward( self, inputs: torch.Tensor, atype: torch.Tensor, - gr: Optional[torch.Tensor] = None, + atype_tebd: Optional[torch.Tensor] = None, + rot_mat: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, None]: """Based on embedding net output, alculate total energy. Args: - - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt]. + - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.embedding_width]. - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. Returns @@ -478,19 +201,19 @@ def forward( """ nframes, nloc, _ = inputs.size() if self.use_tebd: - # if atype_tebd is not None: - # inputs = torch.concat([inputs, atype_tebd], dim=-1) + if atype_tebd is not None: + inputs = torch.concat([inputs, atype_tebd], dim=-1) vec_out = self.filter_layers_dipole[0]( inputs ) # Shape is [nframes, nloc, m1] assert list(vec_out.size()) == [nframes, nloc, self.out_dim] # (nf x nloc) x 1 x od vec_out = vec_out.view(-1, 1, self.out_dim) - assert gr is not None + assert rot_mat is not None # (nf x nloc) x od x 3 - gr = gr.view(-1, self.out_dim, 3) + rot_mat = rot_mat.view(-1, self.out_dim, 3) vec_out = ( - torch.bmm(vec_out, gr).squeeze(-2).view(nframes, nloc, 3) + torch.bmm(vec_out, rot_mat).squeeze(-2).view(nframes, nloc, 3) ) # Shape is [nframes, nloc, 3] else: vec_out = torch.zeros_like(atype).unsqueeze(-1) # jit assertion diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index c6fb6b27e1..16e80f9c20 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -7,6 +7,9 @@ import numpy as np import torch +from deepmd.model_format import ( + FittingOutputDef, +) from deepmd.pt.model.task.task import ( TaskBaseMethod, ) @@ -58,9 +61,17 @@ def __new__(cls, *args, **kwargs): if fitting_type in Fitting.__plugins.plugins: cls = Fitting.__plugins.plugins[fitting_type] else: - raise RuntimeError("Unknown fitting type: " + fitting_type) + raise RuntimeError("Unknown descriptor type: " + fitting_type) return super().__new__(cls) + def output_def(self) -> FittingOutputDef: + """Definition for the task Output.""" + raise NotImplementedError + + def forward(self, **kwargs): + """Task Output.""" + raise NotImplementedError + def share_params(self, base_class, shared_level, resume=False): assert ( self.__class__ == base_class.__class__ diff --git a/deepmd/pt/model/task/task.py b/deepmd/pt/model/task/task.py index b2dc03e4bd..a9b2efeb9a 100644 --- a/deepmd/pt/model/task/task.py +++ b/deepmd/pt/model/task/task.py @@ -1,18 +1,12 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -from abc import ( - ABC, - abstractmethod, -) - import torch -from deepmd.model_format import ( - FittingOutputDef, -) +class TaskBaseMethod(torch.nn.Module): + def __init__(self, **kwargs): + """Construct a basic head for different tasks.""" + super().__init__() -class TaskBaseMethod(torch.nn.Module, ABC): - @abstractmethod - def output_def(self) -> FittingOutputDef: - """Definition for the task Output.""" + def forward(self, **kwargs): + """Task Output.""" raise NotImplementedError diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py index e83e12f608..780dbf7e62 100644 --- a/deepmd/pt/utils/utils.py +++ b/deepmd/pt/utils/utils.py @@ -4,17 +4,9 @@ Optional, ) -import numpy as np import torch import torch.nn.functional as F -from deepmd.model_format.common import PRECISION_DICT as NP_PRECISION_DICT - -from .env import ( - DEVICE, -) -from .env import PRECISION_DICT as PT_PRECISION_DICT - def get_activation_fn(activation: str) -> Callable: """Returns the activation function corresponding to `activation`.""" @@ -49,35 +41,3 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x else: raise RuntimeError(f"activation function {self.activation} not supported") - - -def to_numpy_array( - xx: torch.Tensor, -) -> np.ndarray: - if xx is None: - return None - assert xx is not None - # Create a reverse mapping of PT_PRECISION_DICT - reverse_precision_dict = {v: k for k, v in PT_PRECISION_DICT.items()} - # Use the reverse mapping to find keys with the desired value - prec = reverse_precision_dict.get(xx.dtype, None) - prec = NP_PRECISION_DICT.get(prec, None) - if prec is None: - raise ValueError(f"unknown precision {xx.dtype}") - return xx.detach().cpu().numpy().astype(prec) - - -def to_torch_tensor( - xx: np.ndarray, -) -> torch.Tensor: - if xx is None: - return None - assert xx is not None - # Create a reverse mapping of NP_PRECISION_DICT - reverse_precision_dict = {v: k for k, v in NP_PRECISION_DICT.items()} - # Use the reverse mapping to find keys with the desired value - prec = reverse_precision_dict.get(type(xx.flat[0]), None) - prec = PT_PRECISION_DICT.get(prec, None) - if prec is None: - raise ValueError(f"unknown precision {xx.dtype}") - return torch.tensor(xx, dtype=prec, device=DEVICE) diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py index eada2774d3..da03631689 100644 --- a/deepmd/tf/env.py +++ b/deepmd/tf/env.py @@ -472,11 +472,6 @@ def _get_package_constants( GLOBAL_CONFIG = _get_package_constants() -if GLOBAL_CONFIG["enable_tensorflow"] == "0": - raise RuntimeError( - "TensorFlow backend is not built. To enable it, " - "set the environmental variable DP_ENABLE_TENSORFLOW=1." - ) MODEL_VERSION = GLOBAL_CONFIG["model_version"] TF_VERSION = GLOBAL_CONFIG["tf_version"] TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"]) diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md index 389cc78c9f..ae1509f2ca 100644 --- a/doc/install/install-from-source.md +++ b/doc/install/install-from-source.md @@ -90,17 +90,7 @@ Check the compiler version on your machine gcc --version ``` -The compiler GCC 4.8 or later is supported in the DeePMD-kit. - -::::{tab-set} - -:::{tab-item} TensorFlow {{ tensorflow_icon }} - -Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`. - -::: - -:::: +The compiler GCC 4.8 or later is supported in the DeePMD-kit. Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`. Execute ```bash @@ -115,8 +105,7 @@ One may set the following environment variables before executing `pip`: | DP_VARIANT | `cpu`, `cuda`, `rocm` | `cpu` | Build CPU variant or GPU variant with CUDA or ROCM support. | | CUDAToolkit_ROOT | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. | | ROCM_ROOT | Path | Detected automatically | The path to the ROCM toolkit directory. | -| DP_ENABLE_TENSORFLOW | 0, 1 | 1 | {{ tensorflow_icon }} Enable the TensorFlow backend. -| TENSORFLOW_ROOT | Path | Detected automatically | {{ tensorflow_icon }} The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.| +| TENSORFLOW_ROOT | Path | Detected automatically | The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.| | DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1 | 0 | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. | | CMAKE_ARGS | str | - | Additional CMake arguments | | <LANG>FLAGS (``=`CXX`, `CUDA` or `HIP`) | str | - | Default compilation flags to be used when compiling `` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). | diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index d6ee3d0958..c273bc9263 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -154,22 +154,7 @@ if(ENABLE_TENSORFLOW AND NOT DEEPMD_C_ROOT) endif() if(ENABLE_PYTORCH AND NOT DEEPMD_C_ROOT) find_package(Torch REQUIRED) - string(REGEX MATCH "_GLIBCXX_USE_CXX11_ABI=([0-9]+)" CXXABI_PT_MATCH - ${TORCH_CXX_FLAGS}) - if(CXXABI_PT_MATCH) - message(STATUS "PyTorch CXX11 ABI: ${CMAKE_MATCH_1}") - if(DEFINED OP_CXX_ABI) - if(NOT ${CMAKE_MATCH_1} EQUAL ${OP_CXX_ABI}) - message( - FATAL_ERROR - "PyTorch CXX11 ABI mismatch TensorFlow: ${CMAKE_MATCH_1} != ${OP_CXX_ABI}" - ) - endif() - else() - set(OP_CXX_ABI ${CMAKE_MATCH_1}) - add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}) - endif() - endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") endif() # log enabled backends if(NOT DEEPMD_C_ROOT) @@ -180,9 +165,7 @@ if(NOT DEEPMD_C_ROOT) if(ENABLE_PYTORCH) message(STATUS "- PyTorch") endif() - if(NOT ENABLE_TENSORFLOW - AND NOT ENABLE_PYTORCH - AND NOT BUILD_PY_IF) + if(NOT ENABLE_TENSORFLOW AND NOT ENABLE_PYTORCH) message(FATAL_ERROR "No backend is enabled.") endif() endif() diff --git a/source/config/CMakeLists.txt b/source/config/CMakeLists.txt index b1ce17566f..5473b91f29 100644 --- a/source/config/CMakeLists.txt +++ b/source/config/CMakeLists.txt @@ -1,19 +1,5 @@ # config -# cmake will treat true, false, on, off, 1, 0 as booleans we hope an easy way to -# check it -if(ENABLE_TENSORFLOW) - set(ENABLE_TENSORFLOW 1) -else() - set(ENABLE_TENSORFLOW 0) -endif() - -if(ENABLE_PYTORCH) - set(ENABLE_PYTORCH 1) -else() - set(ENABLE_PYTORCH 0) -endif() - configure_file("run_config.ini" "${CMAKE_CURRENT_BINARY_DIR}/run_config.ini" @ONLY) diff --git a/source/config/run_config.ini b/source/config/run_config.ini index 11f4100e61..3f0a7a33a8 100644 --- a/source/config/run_config.ini +++ b/source/config/run_config.ini @@ -4,8 +4,6 @@ GIT_SUMM = @GIT_SUMM@ GIT_HASH = @GIT_HASH@ GIT_DATE = @GIT_DATE@ GIT_BRANCH = @GIT_BRANCH@ -ENABLE_TENSORFLOW = @ENABLE_TENSORFLOW@ -ENABLE_PYTORCH = @ENABLE_PYTORCH@ TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@ TF_LIBS = @TensorFlow_LIBRARY@ TF_VERSION = @TENSORFLOW_VERSION@ diff --git a/source/lib/src/gpu/CMakeLists.txt b/source/lib/src/gpu/CMakeLists.txt index 804e1c0506..3bd24cc620 100644 --- a/source/lib/src/gpu/CMakeLists.txt +++ b/source/lib/src/gpu/CMakeLists.txt @@ -10,10 +10,8 @@ if(USE_CUDA_TOOLKIT) endif() enable_language(CUDA) set(CMAKE_CUDA_STANDARD 11) - if(DEFINED OP_CXX_ABI) - add_compile_definitions( - "$<$:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>") - endif() + add_compile_definitions( + "$<$:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>") find_package(CUDAToolkit REQUIRED) diff --git a/source/tests/common/test_model_format_utils.py b/source/tests/common/test_model_format_utils.py index cb85fd2bb2..da76c53ed9 100644 --- a/source/tests/common/test_model_format_utils.py +++ b/source/tests/common/test_model_format_utils.py @@ -13,7 +13,6 @@ EmbeddingNet, EnvMat, FittingNet, - InvarFitting, NativeLayer, NativeNet, NetworkCollection, @@ -370,123 +369,3 @@ def test_self_consistency( mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist) for ii in [0, 1, 4]: np.testing.assert_allclose(mm0[ii], mm1[ii]) - - -class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist): - def setUp(self): - TestCaseSingleFrameWithNlist.setUp(self) - - def test_self_consistency( - self, - ): - rng = np.random.default_rng() - nf, nloc, nnei = self.nlist.shape - ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel) - dd = ds.call(self.coord_ext, self.atype_ext, self.nlist) - atype = self.atype_ext[:, :nloc] - - for ( - distinguish_types, - od, - nfp, - nap, - ) in itertools.product( - [True, False], - [1, 2], - [0, 3], - [0, 4], - ): - ifn0 = InvarFitting( - "energy", - self.nt, - ds.dim_out, - od, - numb_fparam=nfp, - numb_aparam=nap, - distinguish_types=distinguish_types, - ) - ifn1 = InvarFitting.deserialize(ifn0.serialize()) - if nfp > 0: - ifp = rng.normal(size=(self.nf, nfp)) - else: - ifp = None - if nap > 0: - iap = rng.normal(size=(self.nf, self.nloc, nap)) - else: - iap = None - ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap) - ret1 = ifn1(dd[0], atype, fparam=ifp, aparam=iap) - np.testing.assert_allclose(ret0["energy"], ret1["energy"]) - - def test_self_exception( - self, - ): - rng = np.random.default_rng() - nf, nloc, nnei = self.nlist.shape - ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel) - dd = ds.call(self.coord_ext, self.atype_ext, self.nlist) - atype = self.atype_ext[:, :nloc] - - for ( - distinguish_types, - od, - nfp, - nap, - ) in itertools.product( - [True, False], - [1, 2], - [0, 3], - [0, 4], - ): - ifn0 = InvarFitting( - "energy", - self.nt, - ds.dim_out, - od, - numb_fparam=nfp, - numb_aparam=nap, - distinguish_types=distinguish_types, - ) - - if nfp > 0: - ifp = rng.normal(size=(self.nf, nfp)) - else: - ifp = None - if nap > 0: - iap = rng.normal(size=(self.nf, self.nloc, nap)) - else: - iap = None - with self.assertRaises(ValueError) as context: - ret0 = ifn0(dd[0][:, :, :-2], atype, fparam=ifp, aparam=iap) - self.assertIn("input descriptor", context.exception) - - if nfp > 0: - ifp = rng.normal(size=(self.nf, nfp - 1)) - with self.assertRaises(ValueError) as context: - ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap) - self.assertIn("input fparam", context.exception) - - if nap > 0: - iap = rng.normal(size=(self.nf, self.nloc, nap - 1)) - with self.assertRaises(ValueError) as context: - ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap) - self.assertIn("input aparam", context.exception) - - def test_get_set(self): - ifn0 = InvarFitting( - "energy", - self.nt, - 3, - 1, - ) - rng = np.random.default_rng() - foo = rng.normal([3, 4]) - for ii in [ - "bias_atom_e", - "fparam_avg", - "fparam_inv_std", - "aparam_avg", - "aparam_inv_std", - ]: - ifn0[ii] = foo - np.testing.assert_allclose(foo, ifn0[ii]) diff --git a/source/tests/pt/test_ener_fitting.py b/source/tests/pt/test_ener_fitting.py deleted file mode 100644 index eece8447df..0000000000 --- a/source/tests/pt/test_ener_fitting.py +++ /dev/null @@ -1,181 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import itertools -import unittest - -import numpy as np -import torch - -from deepmd.model_format import InvarFitting as DPInvarFitting -from deepmd.pt.model.descriptor.se_a import ( - DescrptSeA, -) -from deepmd.pt.model.task.ener import ( - EnergyFittingNet, - InvarFitting, -) -from deepmd.pt.utils import ( - env, -) -from deepmd.pt.utils.utils import ( - to_numpy_array, -) - -from .test_env_mat import ( - TestCaseSingleFrameWithNlist, -) - -dtype = env.GLOBAL_PT_FLOAT_PRECISION - - -class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist): - def setUp(self): - TestCaseSingleFrameWithNlist.setUp(self) - - def test_consistency( - self, - ): - rng = np.random.default_rng() - nf, nloc, nnei = self.nlist.shape - dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE) - rd0, _, _, _, _ = dd0( - torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE), - torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE), - torch.tensor(self.nlist, dtype=int, device=env.DEVICE), - ) - atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE) - - for od, distinguish_types, nfp, nap in itertools.product( - [1, 3], - [True, False], - [0, 3], - [0, 4], - ): - ft0 = InvarFitting( - "foo", - self.nt, - dd0.dim_out, - od, - numb_fparam=nfp, - numb_aparam=nap, - use_tebd=(not distinguish_types), - ).to(env.DEVICE) - ft1 = DPInvarFitting.deserialize(ft0.serialize()) - ft2 = InvarFitting.deserialize(ft0.serialize()) - - if nfp > 0: - ifp = torch.tensor( - rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE - ) - else: - ifp = None - if nap > 0: - iap = torch.tensor( - rng.normal(size=(self.nf, self.nloc, nap)), - dtype=dtype, - device=env.DEVICE, - ) - else: - iap = None - - ret0 = ft0(rd0, atype, fparam=ifp, aparam=iap) - ret1 = ft1( - rd0.detach().cpu().numpy(), - atype.detach().cpu().numpy(), - fparam=to_numpy_array(ifp), - aparam=to_numpy_array(iap), - ) - ret2 = ft2(rd0, atype, fparam=ifp, aparam=iap) - np.testing.assert_allclose( - to_numpy_array(ret0["foo"]), - ret1["foo"], - ) - np.testing.assert_allclose( - to_numpy_array(ret0["foo"]), - to_numpy_array(ret2["foo"]), - ) - - def test_new_old( - self, - ): - rng = np.random.default_rng() - nf, nloc, nnei = self.nlist.shape - dd = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE) - rd0, _, _, _, _ = dd( - torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE), - torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE), - torch.tensor(self.nlist, dtype=int, device=env.DEVICE), - ) - atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE) - - od = 1 - for distinguish_types in itertools.product( - [True, False], - ): - ft0 = EnergyFittingNet( - self.nt, - dd.dim_out, - distinguish_types=distinguish_types, - ).to(env.DEVICE) - ft1 = EnergyFittingNet( - self.nt, - dd.dim_out, - distinguish_types=distinguish_types, - old_impl=True, - ).to(env.DEVICE) - dd0 = ft0.state_dict() - dd1 = ft1.state_dict() - for kk, vv in dd1.items(): - new_kk = kk - new_kk = new_kk.replace("filter_layers_old", "filter_layers.networks") - new_kk = new_kk.replace("deep_layers", "layers") - new_kk = new_kk.replace("final_layer", "layers.3") - dd1[kk] = dd0[new_kk] - if kk.split(".")[-1] in ["idt", "bias"]: - dd1[kk] = dd1[kk].unsqueeze(0) - dd1["bias_atom_e"] = dd0["bias_atom_e"] - ft1.load_state_dict(dd1) - ret0 = ft0(rd0, atype) - ret1 = ft1(rd0, atype) - np.testing.assert_allclose( - to_numpy_array(ret0["energy"]), - to_numpy_array(ret1["energy"]), - ) - - def test_jit( - self, - ): - for od, distinguish_types, nfp, nap in itertools.product( - [1, 3], - [True, False], - [0, 3], - [0, 4], - ): - ft0 = InvarFitting( - "foo", - self.nt, - 9, - od, - numb_fparam=nfp, - numb_aparam=nap, - use_tebd=(not distinguish_types), - ).to(env.DEVICE) - torch.jit.script(ft0) - - def test_get_set(self): - ifn0 = InvarFitting( - "energy", - self.nt, - 3, - 1, - ) - rng = np.random.default_rng() - foo = rng.normal([3, 4]) - for ii in [ - "bias_atom_e", - "fparam_avg", - "fparam_inv_std", - "aparam_avg", - "aparam_inv_std", - ]: - ifn0[ii] = torch.tensor(foo, dtype=dtype, device=env.DEVICE) - np.testing.assert_allclose(foo, ifn0[ii].detach().cpu().numpy()) diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py index ed2c428de5..3feb4f4739 100644 --- a/source/tests/pt/test_fitting_net.py +++ b/source/tests/pt/test_fitting_net.py @@ -102,25 +102,25 @@ def test_consistency(self): my_fn = EnergyFittingNet( self.ntypes, self.embedding_width, - neuron=self.n_neuron, - bias_atom_e=self.dp_fn.bias_atom_e, - distinguish_types=True, + self.n_neuron, + self.dp_fn.bias_atom_e, + use_tebd=False, ) for name, param in my_fn.named_parameters(): - matched = re.match( - "filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name - ) + matched = re.match("filter_layers\.(\d).deep_layers\.(\d)\.([a-z]+)", name) key = None if matched: - if int(matched.group(2)) == len(self.n_neuron): - layer_id = -1 - else: - layer_id = matched.group(2) key = gen_key( type_id=matched.group(1), - layer_id=layer_id, + layer_id=matched.group(2), w_or_b=matched.group(3), ) + else: + matched = re.match("filter_layers\.(\d).final_layer\.([a-z]+)", name) + if matched: + key = gen_key( + type_id=matched.group(1), layer_id=-1, w_or_b=matched.group(2) + ) assert key is not None var = values[key] with torch.no_grad(): @@ -132,7 +132,7 @@ def test_consistency(self): ret = my_fn(embedding, atype) my_energy = ret["energy"] my_energy = my_energy.detach() - np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1])) + self.assertTrue(np.allclose(dp_energy, my_energy.numpy().reshape([-1]))) if __name__ == "__main__": diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py index c6595e6471..5bbbc9e352 100644 --- a/source/tests/pt/test_model.py +++ b/source/tests/pt/test_model.py @@ -53,24 +53,23 @@ VariableState = collections.namedtuple("VariableState", ["value", "gradient"]) -def torch2tf(torch_name, last_layer_id=None): +def torch2tf(torch_name): fields = torch_name.split(".") offset = int(fields[2] == "networks") element_id = int(fields[2 + offset]) if fields[0] == "descriptor": layer_id = int(fields[4 + offset]) + 1 weight_type = fields[5 + offset] - ret = "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id) - elif fields[0] == "fitting_net": - layer_id = int(fields[4 + offset]) - weight_type = fields[5 + offset] - if layer_id != last_layer_id: - ret = "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type) - else: - ret = "final_layer_type_%d/%s:0" % (element_id, weight_type) + return "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id) + elif fields[3] == "deep_layers": + layer_id = int(fields[4]) + weight_type = fields[5] + return "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type) + elif fields[3] == "final_layer": + weight_type = fields[4] + return "final_layer_type_%d/%s:0" % (element_id, weight_type) else: raise RuntimeError("Unexpected parameter name: %s" % torch_name) - return ret class DpTrainer: @@ -291,7 +290,7 @@ def test_consistency(self): "neuron": self.filter_neuron, "axis_neuron": self.axis_neuron, }, - "fitting_net": {"neuron": self.n_neuron, "distinguish_types": True}, + "fitting_net": {"neuron": self.n_neuron}, "data_stat_nbatch": self.data_stat_nbatch, "type_map": self.type_map, }, @@ -324,7 +323,7 @@ def test_consistency(self): # Keep parameter value consistency between 2 implentations for name, param in my_model.named_parameters(): name = name.replace("sea.", "") - var_name = torch2tf(name, last_layer_id=len(self.n_neuron)) + var_name = torch2tf(name) var = vs_dict[var_name].value with torch.no_grad(): src = torch.from_numpy(var) @@ -405,7 +404,7 @@ def step(step_id): for name, param in my_model.named_parameters(): name = name.replace("sea.", "") - var_name = torch2tf(name, last_layer_id=len(self.n_neuron)) + var_name = torch2tf(name) var_grad = vs_dict[var_name].gradient param_grad = param.grad.cpu() var_grad = torch.tensor(var_grad) diff --git a/source/tests/pt/test_se_e2_a.py b/source/tests/pt/test_se_e2_a.py index 0da80ea1ea..c0a106cb16 100644 --- a/source/tests/pt/test_se_e2_a.py +++ b/source/tests/pt/test_se_e2_a.py @@ -25,9 +25,6 @@ PRECISION_DICT, ) -from .test_env_mat import ( - TestCaseSingleFrameWithNlist, -) from .test_mlp import ( get_tols, ) @@ -35,6 +32,36 @@ dtype = env.GLOBAL_PT_FLOAT_PRECISION +class TestCaseSingleFrameWithNlist: + def setUp(self): + # nloc == 3, nall == 4 + self.nloc = 3 + self.nall = 4 + self.nf, self.nt = 1, 2 + self.coord_ext = np.array( + [ + [0, 0, 0], + [0, 1, 0], + [0, 0, 1], + [0, -2, 0], + ], + dtype=np.float64, + ).reshape([1, self.nall * 3]) + self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall]) + # sel = [5, 2] + self.sel = [5, 2] + self.nlist = np.array( + [ + [1, 3, -1, -1, -1, 2, -1], + [0, -1, -1, -1, -1, 2, -1], + [0, 1, -1, -1, -1, 0, -1], + ], + dtype=int, + ).reshape([1, self.nloc, sum(self.sel)]) + self.rcut = 0.4 + self.rcut_smth = 2.2 + + # to be merged with the tf test case @unittest.skipIf(not support_se_e2_a, "EnvMat not supported") class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist): diff --git a/source/tests/pt/test_utils.py b/source/tests/pt/test_utils.py deleted file mode 100644 index 9c9a9479ad..0000000000 --- a/source/tests/pt/test_utils.py +++ /dev/null @@ -1,31 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import unittest - -import numpy as np -import torch - -from deepmd.pt.utils.utils import ( - to_numpy_array, - to_torch_tensor, -) - - -class TestCvt(unittest.TestCase): - def test_to_numpy(self): - rng = np.random.default_rng() - foo = rng.normal([3, 4]) - for ptp, npp in zip( - [torch.float16, torch.float32, torch.float64], - [np.float16, np.float32, np.float64], - ): - foo = foo.astype(npp) - bar = to_torch_tensor(foo) - self.assertEqual(bar.dtype, ptp) - onk = to_numpy_array(bar) - self.assertEqual(onk.dtype, npp) - with self.assertRaises(ValueError) as ee: - foo = foo.astype(np.int32) - bar = to_torch_tensor(foo) - with self.assertRaises(ValueError) as ee: - bar = to_torch_tensor(foo) - bar = to_numpy_array(bar.int()) From 07e0d967d944de8a798058314083232b89c1f31c Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:39:49 +0800 Subject: [PATCH 06/10] Fix dataloader stuck on GPU --- deepmd/pt/utils/dataloader.py | 2 -- deepmd/pt/utils/dataset.py | 36 +++++++++--------------- deepmd/pt/utils/preprocess.py | 30 ++++++++++---------- deepmd/pt/utils/stat.py | 4 +-- source/tests/pt/test_descriptor.py | 10 +++---- source/tests/pt/test_embedding_net.py | 13 +++++---- source/tests/pt/test_model.py | 11 +++++--- source/tests/pt/test_saveload_dpa1.py | 4 +-- source/tests/pt/test_saveload_se_e2_a.py | 4 +-- 9 files changed, 54 insertions(+), 60 deletions(-) diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index 5408452703..7a6684e82e 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -276,13 +276,11 @@ def collate_batch(batch): result[key] = torch.zeros( (n_frames, natoms_extended, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.DEVICE, ) else: result[key] = torch.zeros( (n_frames, natoms_extended), dtype=torch.long, - device=env.DEVICE, ) for i in range(len(batch)): natoms_tmp = list[i].shape[0] diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py index b886dbb786..83c147ef8f 100644 --- a/deepmd/pt/utils/dataset.py +++ b/deepmd/pt/utils/dataset.py @@ -479,8 +479,7 @@ def preprocess(self, batch): else: batch[kk] = torch.tensor( batch[kk], - dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.DEVICE, + dtype=env.GLOBAL_PT_FLOAT_PRECISION ) if self._data_dict[kk]["atomic"]: batch[kk] = batch[kk].view( @@ -489,7 +488,7 @@ def preprocess(self, batch): for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): - batch[kk] = torch.tensor(batch[kk], dtype=torch.long, device=env.DEVICE) + batch[kk] = torch.tensor(batch[kk], dtype=torch.long) batch["atype"] = batch.pop("type") keys = ["nlist", "nlist_loc", "nlist_type", "shift", "mapping"] @@ -523,11 +522,10 @@ def preprocess(self, batch): natoms_extended = max([item.shape[0] for item in shift]) batch["shift"] = torch.zeros( (n_frames, natoms_extended, 3), - dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.DEVICE, + dtype=env.GLOBAL_PT_FLOAT_PRECISION ) batch["mapping"] = torch.zeros( - (n_frames, natoms_extended), dtype=torch.long, device=env.DEVICE + (n_frames, natoms_extended), dtype=torch.long ) for i in range(len(shift)): natoms_tmp = shift[i].shape[0] @@ -565,15 +563,14 @@ def single_preprocess(self, batch, sid): else: batch[kk] = torch.tensor( batch[kk][sid], - dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.DEVICE, + dtype=env.GLOBAL_PT_FLOAT_PRECISION ) if self._data_dict[kk]["atomic"]: batch[kk] = batch[kk].view(-1, self._data_dict[kk]["ndof"]) for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): batch[kk] = torch.tensor( - batch[kk][sid], dtype=torch.long, device=env.DEVICE + batch[kk][sid], dtype=torch.long ) clean_coord = batch.pop("coord") clean_type = batch.pop("type") @@ -669,14 +666,13 @@ def single_preprocess(self, batch, sid): noised_coord = _clean_coord.clone().detach() noised_coord[coord_mask] += noise_on_coord batch["coord_mask"] = torch.tensor( - coord_mask, dtype=torch.bool, device=env.DEVICE + coord_mask, dtype=torch.bool ) else: noised_coord = _clean_coord batch["coord_mask"] = torch.tensor( np.zeros_like(coord_mask, dtype=bool), - dtype=torch.bool, - device=env.DEVICE, + dtype=torch.bool ) # add mask for type @@ -684,14 +680,13 @@ def single_preprocess(self, batch, sid): masked_type = clean_type.clone().detach() masked_type[type_mask] = self.mask_type_idx batch["type_mask"] = torch.tensor( - type_mask, dtype=torch.bool, device=env.DEVICE + type_mask, dtype=torch.bool ) else: masked_type = clean_type batch["type_mask"] = torch.tensor( np.zeros_like(type_mask, dtype=bool), - dtype=torch.bool, - device=env.DEVICE, + dtype=torch.bool ) if self.pbc: _coord = normalize_coord(noised_coord, region, nloc) @@ -801,7 +796,7 @@ def __len__(self): def __getitem__(self, index): """Get a frame from the selected system.""" b_data = self._data_system._get_item(index) - b_data["natoms"] = torch.tensor(self._natoms_vec, device=env.DEVICE) + b_data["natoms"] = torch.tensor(self._natoms_vec) return b_data @@ -876,7 +871,7 @@ def __getitem__(self, index=None): if index is None: index = dp_random.choice(np.arange(self.nsystems), p=self.probs) b_data = self._data_systems[index].get_batch(self._batch_size) - b_data["natoms"] = torch.tensor(self._natoms_vec[index], device=env.DEVICE) + b_data["natoms"] = torch.tensor(self._natoms_vec[index]) batch_size = b_data["coord"].shape[0] b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) return b_data @@ -887,7 +882,7 @@ def get_training_batch(self, index=None): if index is None: index = dp_random.choice(np.arange(self.nsystems), p=self.probs) b_data = self._data_systems[index].get_batch_for_train(self._batch_size) - b_data["natoms"] = torch.tensor(self._natoms_vec[index], device=env.DEVICE) + b_data["natoms"] = torch.tensor(self._natoms_vec[index]) batch_size = b_data["coord"].shape[0] b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) return b_data @@ -896,10 +891,7 @@ def get_batch(self, sys_idx=None): """TF-compatible batch for testing.""" pt_batch = self[sys_idx] np_batch = {} - for key in ["coord", "box", "force", "energy", "virial"]: - if key in pt_batch.keys(): - np_batch[key] = pt_batch[key].cpu().numpy() - for key in ["atype", "natoms"]: + for key in ["coord", "box", "force", "energy", "virial", "atype", "natoms"]: if key in pt_batch.keys(): np_batch[key] = pt_batch[key].cpu().numpy() batch_size = pt_batch["coord"].shape[0] diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py index 3ea26d0041..27acdb9209 100644 --- a/deepmd/pt/utils/preprocess.py +++ b/deepmd/pt/utils/preprocess.py @@ -99,7 +99,7 @@ def build_inside_clist(coord, region: Region3D, ncell): cell_offset[cell_offset < 0] = 0 delta = cell_offset - ncell a2c = compute_serial_cid(cell_offset, ncell) # cell id of atoms - arange = torch.arange(0, loc_ncell, 1, device=env.DEVICE) + arange = torch.arange(0, loc_ncell, 1) cellid = a2c == arange.unsqueeze(-1) # one hot cellid c2a = cellid.nonzero() lst = [] @@ -131,17 +131,17 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float): # add ghost atoms a2c, c2a = build_inside_clist(coord, region, ncell) - xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1, device=env.DEVICE) - yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1, device=env.DEVICE) - zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1, device=env.DEVICE) + xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1) + yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1) + zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1) xyz = xi.view(-1, 1, 1, 1) * torch.tensor( - [1, 0, 0], dtype=torch.long, device=env.DEVICE + [1, 0, 0], dtype=torch.long ) xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor( - [0, 1, 0], dtype=torch.long, device=env.DEVICE + [0, 1, 0], dtype=torch.long ) xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor( - [0, 0, 1], dtype=torch.long, device=env.DEVICE + [0, 0, 1], dtype=torch.long ) xyz = xyz.view(-1, 3) mask_a = (xyz >= 0).all(dim=-1) @@ -165,7 +165,7 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float): merged_coord = torch.cat([coord, tmp_coord]) merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]]) merged_atype = torch.cat([atype, tmp_atype]) - merged_mapping = torch.cat([torch.arange(atype.numel(), device=env.DEVICE), aid]) + merged_mapping = torch.cat([torch.arange(atype.numel()), aid]) return merged_coord_shift, merged_atype, merged_mapping @@ -187,7 +187,7 @@ def build_neighbor_list( distance = torch.linalg.norm(distance, dim=-1) DISTANCE_INF = distance.max().detach() + rcut distance[:nloc, :nloc] += ( - torch.eye(nloc, dtype=torch.bool, device=env.DEVICE) * DISTANCE_INF + torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF ) if min_check: if distance.min().abs() < 1e-6: @@ -195,9 +195,9 @@ def build_neighbor_list( if not type_split: sec = sec[-1:] lst = [] - nlist = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 - nlist_loc = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 - nlist_type = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1 + nlist = torch.zeros((nloc, sec[-1].item())).long() - 1 + nlist_loc = torch.zeros((nloc, sec[-1].item())).long() - 1 + nlist_type = torch.zeros((nloc, sec[-1].item())).long() - 1 for i, nnei in enumerate(sec): if i > 0: nnei = nnei - sec[i - 1] @@ -210,8 +210,8 @@ def build_neighbor_list( _sorted, indices = torch.topk(tmp, nnei, dim=1, largest=False) else: # when nnei > nall - indices = torch.zeros((nloc, nnei), device=env.DEVICE).long() - 1 - _sorted = torch.ones((nloc, nnei), device=env.DEVICE).long() * DISTANCE_INF + indices = torch.zeros((nloc, nnei)).long() - 1 + _sorted = torch.ones((nloc, nnei)).long() * DISTANCE_INF _sorted_nnei, indices_nnei = torch.topk( tmp, tmp.shape[1], dim=1, largest=False ) @@ -275,7 +275,7 @@ def make_env_mat( else: merged_coord_shift = torch.zeros_like(coord) merged_atype = atype.clone() - merged_mapping = torch.arange(atype.numel(), device=env.DEVICE) + merged_mapping = torch.arange(atype.numel()) merged_coord = coord.clone() # build nlist diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 7fffd15ca1..4826d0fb88 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -62,13 +62,11 @@ def make_stat_input(datasets, dataloaders, nbatches): shape = torch.zeros( (n_frames, extend, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.DEVICE, ) else: shape = torch.zeros( (n_frames, extend), - dtype=torch.long, - device=env.DEVICE, + dtype=torch.long ) for i in range(len(item)): natoms_tmp = l[i].shape[0] diff --git a/source/tests/pt/test_descriptor.py b/source/tests/pt/test_descriptor.py index 4f31bac7bf..2dd996349b 100644 --- a/source/tests/pt/test_descriptor.py +++ b/source/tests/pt/test_descriptor.py @@ -131,15 +131,15 @@ def test_consistency(self): stddev=std_ones.detach().cpu(), ) - pt_coord = self.pt_batch["coord"] + pt_coord = self.pt_batch["coord"].to(env.DEVICE) pt_coord.requires_grad_(True) - index = self.pt_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3) + index = self.pt_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE) extended_coord = torch.gather(pt_coord, dim=1, index=index) - extended_coord = extended_coord - self.pt_batch["shift"] + extended_coord = extended_coord - self.pt_batch["shift"].to(env.DEVICE) my_d, _, _ = prod_env_mat_se_a( extended_coord.to(DEVICE), - self.pt_batch["nlist"], - self.pt_batch["atype"], + self.pt_batch["nlist"].to(env.DEVICE), + self.pt_batch["atype"].to(env.DEVICE), avg_zero.reshape([-1, self.nnei, 4]).to(DEVICE), std_ones.reshape([-1, self.nnei, 4]).to(DEVICE), self.rcut, diff --git a/source/tests/pt/test_embedding_net.py b/source/tests/pt/test_embedding_net.py index fc98ddc9f9..312af1a8c8 100644 --- a/source/tests/pt/test_embedding_net.py +++ b/source/tests/pt/test_embedding_net.py @@ -7,6 +7,9 @@ import numpy as np import tensorflow.compat.v1 as tf import torch +from deepmd.pt.utils import ( + env, +) tf.disable_eager_execution() @@ -148,18 +151,18 @@ def test_consistency(self): # Keep parameter value consistency between 2 implentations param.data.copy_(torch.from_numpy(var)) - pt_coord = self.torch_batch["coord"] + pt_coord = self.torch_batch["coord"].to(env.DEVICE) pt_coord.requires_grad_(True) - index = self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3) + index = self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE) extended_coord = torch.gather(pt_coord, dim=1, index=index) - extended_coord = extended_coord - self.torch_batch["shift"] + extended_coord = extended_coord - self.torch_batch["shift"].to(env.DEVICE) extended_atype = torch.gather( - self.torch_batch["atype"], dim=1, index=self.torch_batch["mapping"] + self.torch_batch["atype"].to(env.DEVICE), dim=1, index=self.torch_batch["mapping"].to(env.DEVICE) ) descriptor_out, _, _, _, _ = descriptor( extended_coord, extended_atype, - self.torch_batch["nlist"], + self.torch_batch["nlist"].to(env.DEVICE), ) my_embedding = descriptor_out.cpu().detach().numpy() fake_energy = torch.sum(descriptor_out) diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py index 5bbbc9e352..f382ce4b4c 100644 --- a/source/tests/pt/test_model.py +++ b/source/tests/pt/test_model.py @@ -6,6 +6,9 @@ import numpy as np import tensorflow.compat.v1 as tf import torch +from deepmd.pt.utils import ( + env, +) tf.disable_eager_execution() @@ -339,10 +342,10 @@ def test_consistency(self): batch["natoms_vec"], device=batch["coord"].device ).unsqueeze(0) model_predict = my_model( - batch["coord"], batch["atype"], batch["box"], do_atomic_virial=True + batch["coord"].to(env.DEVICE), batch["atype"].to(env.DEVICE), batch["box"].to(env.DEVICE), do_atomic_virial=True ) model_predict_1 = my_model( - batch["coord"], batch["atype"], batch["box"], do_atomic_virial=False + batch["coord"].to(env.DEVICE), batch["atype"].to(env.DEVICE), batch["box"].to(env.DEVICE), do_atomic_virial=False ) p_energy, p_force, p_virial, p_atomic_virial = ( model_predict["energy"], @@ -356,8 +359,8 @@ def test_consistency(self): "force": p_force, } label = { - "energy": batch["energy"], - "force": batch["force"], + "energy": batch["energy"].to(env.DEVICE), + "force": batch["force"].to(env.DEVICE), } loss, _ = my_loss(model_pred, label, int(batch["natoms"][0, 0]), cur_lr) np.testing.assert_allclose( diff --git a/source/tests/pt/test_saveload_dpa1.py b/source/tests/pt/test_saveload_dpa1.py index d1043f7029..1b4c41a204 100644 --- a/source/tests/pt/test_saveload_dpa1.py +++ b/source/tests/pt/test_saveload_dpa1.py @@ -129,13 +129,13 @@ def get_data(self): input_dict = {} for item in ["coord", "atype", "box"]: if item in batch_data: - input_dict[item] = batch_data[item] + input_dict[item] = batch_data[item].to(env.DEVICE) else: input_dict[item] = None label_dict = {} for item in ["energy", "force", "virial"]: if item in batch_data: - label_dict[item] = batch_data[item] + label_dict[item] = batch_data[item].to(env.DEVICE) return input_dict, label_dict def test_saveload(self): diff --git a/source/tests/pt/test_saveload_se_e2_a.py b/source/tests/pt/test_saveload_se_e2_a.py index 95d7f97a88..7f8364a16f 100644 --- a/source/tests/pt/test_saveload_se_e2_a.py +++ b/source/tests/pt/test_saveload_se_e2_a.py @@ -123,13 +123,13 @@ def get_data(self): input_dict = {} for item in ["coord", "atype", "box"]: if item in batch_data: - input_dict[item] = batch_data[item] + input_dict[item] = batch_data[item].to(env.DEVICE) else: input_dict[item] = None label_dict = {} for item in ["energy", "force", "virial"]: if item in batch_data: - label_dict[item] = batch_data[item] + label_dict[item] = batch_data[item].to(env.DEVICE) return input_dict, label_dict def test_saveload(self): From 913efa000f6ef95e7c55a262795ce28f74d81473 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Jan 2024 10:41:20 +0000 Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/dataset.py | 33 ++++++++------------------- deepmd/pt/utils/preprocess.py | 16 ++++--------- deepmd/pt/utils/stat.py | 5 +--- source/tests/pt/test_embedding_net.py | 9 ++++++-- source/tests/pt/test_model.py | 11 +++++++-- 5 files changed, 30 insertions(+), 44 deletions(-) diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py index 83c147ef8f..68d4a09ce4 100644 --- a/deepmd/pt/utils/dataset.py +++ b/deepmd/pt/utils/dataset.py @@ -477,10 +477,7 @@ def preprocess(self, batch): if "find_" in kk: pass else: - batch[kk] = torch.tensor( - batch[kk], - dtype=env.GLOBAL_PT_FLOAT_PRECISION - ) + batch[kk] = torch.tensor(batch[kk], dtype=env.GLOBAL_PT_FLOAT_PRECISION) if self._data_dict[kk]["atomic"]: batch[kk] = batch[kk].view( n_frames, -1, self._data_dict[kk]["ndof"] @@ -521,12 +518,9 @@ def preprocess(self, batch): batch["nlist_type"] = nlist_type natoms_extended = max([item.shape[0] for item in shift]) batch["shift"] = torch.zeros( - (n_frames, natoms_extended, 3), - dtype=env.GLOBAL_PT_FLOAT_PRECISION - ) - batch["mapping"] = torch.zeros( - (n_frames, natoms_extended), dtype=torch.long + (n_frames, natoms_extended, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION ) + batch["mapping"] = torch.zeros((n_frames, natoms_extended), dtype=torch.long) for i in range(len(shift)): natoms_tmp = shift[i].shape[0] batch["shift"][i, :natoms_tmp] = shift[i] @@ -562,16 +556,13 @@ def single_preprocess(self, batch, sid): pass else: batch[kk] = torch.tensor( - batch[kk][sid], - dtype=env.GLOBAL_PT_FLOAT_PRECISION + batch[kk][sid], dtype=env.GLOBAL_PT_FLOAT_PRECISION ) if self._data_dict[kk]["atomic"]: batch[kk] = batch[kk].view(-1, self._data_dict[kk]["ndof"]) for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): - batch[kk] = torch.tensor( - batch[kk][sid], dtype=torch.long - ) + batch[kk] = torch.tensor(batch[kk][sid], dtype=torch.long) clean_coord = batch.pop("coord") clean_type = batch.pop("type") nloc = clean_type.shape[0] @@ -665,28 +656,22 @@ def single_preprocess(self, batch, sid): NotImplementedError(f"Unknown noise type {self.noise_type}!") noised_coord = _clean_coord.clone().detach() noised_coord[coord_mask] += noise_on_coord - batch["coord_mask"] = torch.tensor( - coord_mask, dtype=torch.bool - ) + batch["coord_mask"] = torch.tensor(coord_mask, dtype=torch.bool) else: noised_coord = _clean_coord batch["coord_mask"] = torch.tensor( - np.zeros_like(coord_mask, dtype=bool), - dtype=torch.bool + np.zeros_like(coord_mask, dtype=bool), dtype=torch.bool ) # add mask for type if self.mask_type: masked_type = clean_type.clone().detach() masked_type[type_mask] = self.mask_type_idx - batch["type_mask"] = torch.tensor( - type_mask, dtype=torch.bool - ) + batch["type_mask"] = torch.tensor(type_mask, dtype=torch.bool) else: masked_type = clean_type batch["type_mask"] = torch.tensor( - np.zeros_like(type_mask, dtype=bool), - dtype=torch.bool + np.zeros_like(type_mask, dtype=bool), dtype=torch.bool ) if self.pbc: _coord = normalize_coord(noised_coord, region, nloc) diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py index 27acdb9209..18c798138e 100644 --- a/deepmd/pt/utils/preprocess.py +++ b/deepmd/pt/utils/preprocess.py @@ -134,15 +134,9 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float): xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1) yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1) zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1) - xyz = xi.view(-1, 1, 1, 1) * torch.tensor( - [1, 0, 0], dtype=torch.long - ) - xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor( - [0, 1, 0], dtype=torch.long - ) - xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor( - [0, 0, 1], dtype=torch.long - ) + xyz = xi.view(-1, 1, 1, 1) * torch.tensor([1, 0, 0], dtype=torch.long) + xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor([0, 1, 0], dtype=torch.long) + xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor([0, 0, 1], dtype=torch.long) xyz = xyz.view(-1, 3) mask_a = (xyz >= 0).all(dim=-1) mask_b = (xyz < ncell).all(dim=-1) @@ -186,9 +180,7 @@ def build_neighbor_list( distance = coord_l - coord_r distance = torch.linalg.norm(distance, dim=-1) DISTANCE_INF = distance.max().detach() + rcut - distance[:nloc, :nloc] += ( - torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF - ) + distance[:nloc, :nloc] += torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF if min_check: if distance.min().abs() < 1e-6: RuntimeError("Atom dist too close!") diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 4826d0fb88..eec7179bcd 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -64,10 +64,7 @@ def make_stat_input(datasets, dataloaders, nbatches): dtype=env.GLOBAL_PT_FLOAT_PRECISION, ) else: - shape = torch.zeros( - (n_frames, extend), - dtype=torch.long - ) + shape = torch.zeros((n_frames, extend), dtype=torch.long) for i in range(len(item)): natoms_tmp = l[i].shape[0] shape[i, :natoms_tmp] = l[i] diff --git a/source/tests/pt/test_embedding_net.py b/source/tests/pt/test_embedding_net.py index 312af1a8c8..407f4949b5 100644 --- a/source/tests/pt/test_embedding_net.py +++ b/source/tests/pt/test_embedding_net.py @@ -7,6 +7,7 @@ import numpy as np import tensorflow.compat.v1 as tf import torch + from deepmd.pt.utils import ( env, ) @@ -153,11 +154,15 @@ def test_consistency(self): pt_coord = self.torch_batch["coord"].to(env.DEVICE) pt_coord.requires_grad_(True) - index = self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE) + index = ( + self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE) + ) extended_coord = torch.gather(pt_coord, dim=1, index=index) extended_coord = extended_coord - self.torch_batch["shift"].to(env.DEVICE) extended_atype = torch.gather( - self.torch_batch["atype"].to(env.DEVICE), dim=1, index=self.torch_batch["mapping"].to(env.DEVICE) + self.torch_batch["atype"].to(env.DEVICE), + dim=1, + index=self.torch_batch["mapping"].to(env.DEVICE), ) descriptor_out, _, _, _, _ = descriptor( extended_coord, diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py index 250ccb164d..e87a53969c 100644 --- a/source/tests/pt/test_model.py +++ b/source/tests/pt/test_model.py @@ -6,6 +6,7 @@ import numpy as np import tensorflow.compat.v1 as tf import torch + from deepmd.pt.utils import ( env, ) @@ -343,10 +344,16 @@ def test_consistency(self): batch["natoms_vec"], device=batch["coord"].device ).unsqueeze(0) model_predict = my_model( - batch["coord"].to(env.DEVICE), batch["atype"].to(env.DEVICE), batch["box"].to(env.DEVICE), do_atomic_virial=True + batch["coord"].to(env.DEVICE), + batch["atype"].to(env.DEVICE), + batch["box"].to(env.DEVICE), + do_atomic_virial=True, ) model_predict_1 = my_model( - batch["coord"].to(env.DEVICE), batch["atype"].to(env.DEVICE), batch["box"].to(env.DEVICE), do_atomic_virial=False + batch["coord"].to(env.DEVICE), + batch["atype"].to(env.DEVICE), + batch["box"].to(env.DEVICE), + do_atomic_virial=False, ) p_energy, p_force, p_virial, p_atomic_virial = ( model_predict["energy"], From a4892b71e30430c77fe3ace6387f6b4a7a633442 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Tue, 30 Jan 2024 20:03:41 +0800 Subject: [PATCH 08/10] Update test_fitting_net.py --- source/tests/pt/test_fitting_net.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py index ed2c428de5..0390043770 100644 --- a/source/tests/pt/test_fitting_net.py +++ b/source/tests/pt/test_fitting_net.py @@ -17,6 +17,9 @@ from deepmd.tf.fit.ener import ( EnerFitting, ) +from deepmd.pt.utils import ( + env, +) class FakeDescriptor: @@ -105,7 +108,7 @@ def test_consistency(self): neuron=self.n_neuron, bias_atom_e=self.dp_fn.bias_atom_e, distinguish_types=True, - ) + ).to(env.DEVICE) for name, param in my_fn.named_parameters(): matched = re.match( "filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name @@ -129,9 +132,9 @@ def test_consistency(self): embedding = torch.from_numpy(self.embedding) embedding = embedding.view(4, -1, self.embedding_width) atype = torch.from_numpy(self.atype) - ret = my_fn(embedding, atype) + ret = my_fn(embedding.to(env.DEVICE), atype.to(env.DEVICE)) my_energy = ret["energy"] - my_energy = my_energy.detach() + my_energy = my_energy.detach().cpu() np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1])) From 7cad8a6569b94f170466d9383f2c7c4d4ffd030b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Jan 2024 12:04:20 +0000 Subject: [PATCH 09/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- source/tests/pt/test_fitting_net.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py index 0390043770..e12a397347 100644 --- a/source/tests/pt/test_fitting_net.py +++ b/source/tests/pt/test_fitting_net.py @@ -11,15 +11,15 @@ from deepmd.pt.model.task import ( EnergyFittingNet, ) +from deepmd.pt.utils import ( + env, +) from deepmd.pt.utils.env import ( GLOBAL_NP_FLOAT_PRECISION, ) from deepmd.tf.fit.ener import ( EnerFitting, ) -from deepmd.pt.utils import ( - env, -) class FakeDescriptor: From 1c37f4450a3ef205d65106ac05da0825a5f7a727 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 30 Jan 2024 16:26:13 -0500 Subject: [PATCH 10/10] set NUM_WORKERS to 0 --- .github/workflows/test_cuda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index f164758304..45b689cb3e 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -42,6 +42,7 @@ jobs: DP_BUILD_TESTING: 1 DP_VARIANT: cuda CUDA_PATH: /usr/local/cuda-12.2 + NUM_WORKERS: 0 - run: dp --version - run: python -m pytest -s --cov=deepmd source/tests --durations=0 - run: source/install/test_cc_local.sh