From fdbccabce784a1178d3f17e340d4bcc3cabd472f Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 30 Jan 2024 17:14:36 +0800
Subject: [PATCH 01/10] Fix GPU UTs

---
 deepmd/pt/utils/dataloader.py           |  4 +-
 deepmd/pt/utils/dataset.py              | 26 +++++-----
 deepmd/pt/utils/preprocess.py           | 30 +++++------
 deepmd/pt/utils/stat.py                 |  4 +-
 source/tests/pt/test_descriptor.py      | 11 ++--
 source/tests/pt/test_descriptor_dpa1.py |  8 +--
 source/tests/pt/test_descriptor_dpa2.py |  8 +--
 source/tests/pt/test_mlp.py             | 67 +++++++++++++------------
 8 files changed, 82 insertions(+), 76 deletions(-)

diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index 7c95f66c9c..5408452703 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -276,13 +276,13 @@ def collate_batch(batch):
                 result[key] = torch.zeros(
                     (n_frames, natoms_extended, 3),
                     dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-                    device=env.PREPROCESS_DEVICE,
+                    device=env.DEVICE,
                 )
             else:
                 result[key] = torch.zeros(
                     (n_frames, natoms_extended),
                     dtype=torch.long,
-                    device=env.PREPROCESS_DEVICE,
+                    device=env.DEVICE,
                 )
             for i in range(len(batch)):
                 natoms_tmp = list[i].shape[0]
diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
index c104e64491..6b19755b59 100644
--- a/deepmd/pt/utils/dataset.py
+++ b/deepmd/pt/utils/dataset.py
@@ -480,7 +480,7 @@ def preprocess(self, batch):
                 batch[kk] = torch.tensor(
                     batch[kk],
                     dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-                    device=env.PREPROCESS_DEVICE,
+                    device=env.DEVICE,
                 )
                 if self._data_dict[kk]["atomic"]:
                     batch[kk] = batch[kk].view(
@@ -490,7 +490,7 @@ def preprocess(self, batch):
         for kk in ["type", "real_natoms_vec"]:
             if kk in batch.keys():
                 batch[kk] = torch.tensor(
-                    batch[kk], dtype=torch.long, device=env.PREPROCESS_DEVICE
+                    batch[kk], dtype=torch.long, device=env.DEVICE
                 )
         batch["atype"] = batch.pop("type")
 
@@ -526,10 +526,10 @@ def preprocess(self, batch):
         batch["shift"] = torch.zeros(
             (n_frames, natoms_extended, 3),
             dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-            device=env.PREPROCESS_DEVICE,
+            device=env.DEVICE,
         )
         batch["mapping"] = torch.zeros(
-            (n_frames, natoms_extended), dtype=torch.long, device=env.PREPROCESS_DEVICE
+            (n_frames, natoms_extended), dtype=torch.long, device=env.DEVICE
         )
         for i in range(len(shift)):
             natoms_tmp = shift[i].shape[0]
@@ -568,14 +568,14 @@ def single_preprocess(self, batch, sid):
                 batch[kk] = torch.tensor(
                     batch[kk][sid],
                     dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-                    device=env.PREPROCESS_DEVICE,
+                    device=env.DEVICE,
                 )
                 if self._data_dict[kk]["atomic"]:
                     batch[kk] = batch[kk].view(-1, self._data_dict[kk]["ndof"])
         for kk in ["type", "real_natoms_vec"]:
             if kk in batch.keys():
                 batch[kk] = torch.tensor(
-                    batch[kk][sid], dtype=torch.long, device=env.PREPROCESS_DEVICE
+                    batch[kk][sid], dtype=torch.long, device=env.DEVICE
                 )
         clean_coord = batch.pop("coord")
         clean_type = batch.pop("type")
@@ -671,14 +671,14 @@ def single_preprocess(self, batch, sid):
                     noised_coord = _clean_coord.clone().detach()
                     noised_coord[coord_mask] += noise_on_coord
                     batch["coord_mask"] = torch.tensor(
-                        coord_mask, dtype=torch.bool, device=env.PREPROCESS_DEVICE
+                        coord_mask, dtype=torch.bool, device=env.DEVICE
                     )
                 else:
                     noised_coord = _clean_coord
                     batch["coord_mask"] = torch.tensor(
                         np.zeros_like(coord_mask, dtype=bool),
                         dtype=torch.bool,
-                        device=env.PREPROCESS_DEVICE,
+                        device=env.DEVICE,
                     )
 
                 # add mask for type
@@ -686,14 +686,14 @@ def single_preprocess(self, batch, sid):
                     masked_type = clean_type.clone().detach()
                     masked_type[type_mask] = self.mask_type_idx
                     batch["type_mask"] = torch.tensor(
-                        type_mask, dtype=torch.bool, device=env.PREPROCESS_DEVICE
+                        type_mask, dtype=torch.bool, device=env.DEVICE
                     )
                 else:
                     masked_type = clean_type
                     batch["type_mask"] = torch.tensor(
                         np.zeros_like(type_mask, dtype=bool),
                         dtype=torch.bool,
-                        device=env.PREPROCESS_DEVICE,
+                        device=env.DEVICE,
                     )
                 if self.pbc:
                     _coord = normalize_coord(noised_coord, region, nloc)
@@ -803,7 +803,7 @@ def __len__(self):
     def __getitem__(self, index):
         """Get a frame from the selected system."""
         b_data = self._data_system._get_item(index)
-        b_data["natoms"] = torch.tensor(self._natoms_vec, device=env.PREPROCESS_DEVICE)
+        b_data["natoms"] = torch.tensor(self._natoms_vec, device=env.DEVICE)
         return b_data
 
 
@@ -879,7 +879,7 @@ def __getitem__(self, index=None):
             index = dp_random.choice(np.arange(self.nsystems), p=self.probs)
         b_data = self._data_systems[index].get_batch(self._batch_size)
         b_data["natoms"] = torch.tensor(
-            self._natoms_vec[index], device=env.PREPROCESS_DEVICE
+            self._natoms_vec[index], device=env.DEVICE
         )
         batch_size = b_data["coord"].shape[0]
         b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1)
@@ -892,7 +892,7 @@ def get_training_batch(self, index=None):
             index = dp_random.choice(np.arange(self.nsystems), p=self.probs)
         b_data = self._data_systems[index].get_batch_for_train(self._batch_size)
         b_data["natoms"] = torch.tensor(
-            self._natoms_vec[index], device=env.PREPROCESS_DEVICE
+            self._natoms_vec[index], device=env.DEVICE
         )
         batch_size = b_data["coord"].shape[0]
         b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1)
diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py
index 463ac112ad..be24a3a770 100644
--- a/deepmd/pt/utils/preprocess.py
+++ b/deepmd/pt/utils/preprocess.py
@@ -99,7 +99,7 @@ def build_inside_clist(coord, region: Region3D, ncell):
     cell_offset[cell_offset < 0] = 0
     delta = cell_offset - ncell
     a2c = compute_serial_cid(cell_offset, ncell)  # cell id of atoms
-    arange = torch.arange(0, loc_ncell, 1, device=env.PREPROCESS_DEVICE)
+    arange = torch.arange(0, loc_ncell, 1, device=env.DEVICE)
     cellid = a2c == arange.unsqueeze(-1)  # one hot cellid
     c2a = cellid.nonzero()
     lst = []
@@ -131,17 +131,17 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float):
 
     # add ghost atoms
     a2c, c2a = build_inside_clist(coord, region, ncell)
-    xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1, device=env.PREPROCESS_DEVICE)
-    yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1, device=env.PREPROCESS_DEVICE)
-    zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1, device=env.PREPROCESS_DEVICE)
+    xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1, device=env.DEVICE)
+    yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1, device=env.DEVICE)
+    zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1, device=env.DEVICE)
     xyz = xi.view(-1, 1, 1, 1) * torch.tensor(
-        [1, 0, 0], dtype=torch.long, device=env.PREPROCESS_DEVICE
+        [1, 0, 0], dtype=torch.long, device=env.DEVICE
     )
     xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor(
-        [0, 1, 0], dtype=torch.long, device=env.PREPROCESS_DEVICE
+        [0, 1, 0], dtype=torch.long, device=env.DEVICE
     )
     xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor(
-        [0, 0, 1], dtype=torch.long, device=env.PREPROCESS_DEVICE
+        [0, 0, 1], dtype=torch.long, device=env.DEVICE
     )
     xyz = xyz.view(-1, 3)
     mask_a = (xyz >= 0).all(dim=-1)
@@ -166,7 +166,7 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float):
     merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]])
     merged_atype = torch.cat([atype, tmp_atype])
     merged_mapping = torch.cat(
-        [torch.arange(atype.numel(), device=env.PREPROCESS_DEVICE), aid]
+        [torch.arange(atype.numel(), device=env.DEVICE), aid]
     )
     return merged_coord_shift, merged_atype, merged_mapping
 
@@ -189,7 +189,7 @@ def build_neighbor_list(
     distance = torch.linalg.norm(distance, dim=-1)
     DISTANCE_INF = distance.max().detach() + rcut
     distance[:nloc, :nloc] += (
-        torch.eye(nloc, dtype=torch.bool, device=env.PREPROCESS_DEVICE) * DISTANCE_INF
+        torch.eye(nloc, dtype=torch.bool, device=env.DEVICE) * DISTANCE_INF
     )
     if min_check:
         if distance.min().abs() < 1e-6:
@@ -197,12 +197,12 @@ def build_neighbor_list(
     if not type_split:
         sec = sec[-1:]
     lst = []
-    nlist = torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1
+    nlist = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
     nlist_loc = (
-        torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1
+        torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
     )
     nlist_type = (
-        torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1
+        torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
     )
     for i, nnei in enumerate(sec):
         if i > 0:
@@ -216,9 +216,9 @@ def build_neighbor_list(
             _sorted, indices = torch.topk(tmp, nnei, dim=1, largest=False)
         else:
             # when nnei > nall
-            indices = torch.zeros((nloc, nnei), device=env.PREPROCESS_DEVICE).long() - 1
+            indices = torch.zeros((nloc, nnei), device=env.DEVICE).long() - 1
             _sorted = (
-                torch.ones((nloc, nnei), device=env.PREPROCESS_DEVICE).long()
+                torch.ones((nloc, nnei), device=env.DEVICE).long()
                 * DISTANCE_INF
             )
             _sorted_nnei, indices_nnei = torch.topk(
@@ -284,7 +284,7 @@ def make_env_mat(
     else:
         merged_coord_shift = torch.zeros_like(coord)
         merged_atype = atype.clone()
-        merged_mapping = torch.arange(atype.numel(), device=env.PREPROCESS_DEVICE)
+        merged_mapping = torch.arange(atype.numel(), device=env.DEVICE)
         merged_coord = coord.clone()
 
     # build nlist
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 18ee4d9abe..7fffd15ca1 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -62,13 +62,13 @@ def make_stat_input(datasets, dataloaders, nbatches):
                         shape = torch.zeros(
                             (n_frames, extend, 3),
                             dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-                            device=env.PREPROCESS_DEVICE,
+                            device=env.DEVICE,
                         )
                     else:
                         shape = torch.zeros(
                             (n_frames, extend),
                             dtype=torch.long,
-                            device=env.PREPROCESS_DEVICE,
+                            device=env.DEVICE,
                         )
                     for i in range(len(item)):
                         natoms_tmp = l[i].shape[0]
diff --git a/source/tests/pt/test_descriptor.py b/source/tests/pt/test_descriptor.py
index da38cf007f..a7696e7095 100644
--- a/source/tests/pt/test_descriptor.py
+++ b/source/tests/pt/test_descriptor.py
@@ -12,6 +12,9 @@
 from pathlib import (
     Path,
 )
+from deepmd.pt.utils import (
+    env,
+)
 
 from deepmd.pt.model.descriptor import (
     prod_env_mat_se_a,
@@ -112,18 +115,18 @@ def setUp(self):
 
     def test_consistency(self):
         avg_zero = torch.zeros(
-            [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION
+            [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
         )
         std_ones = torch.ones(
-            [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION
+            [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
         )
         base_d, base_force, nlist = base_se_a(
             rcut=self.rcut,
             rcut_smth=self.rcut_smth,
             sel=self.sel,
             batch=self.np_batch,
-            mean=avg_zero,
-            stddev=std_ones,
+            mean=avg_zero.detach().cpu(),
+            stddev=std_ones.detach().cpu(),
         )
 
         pt_coord = self.pt_batch["coord"]
diff --git a/source/tests/pt/test_descriptor_dpa1.py b/source/tests/pt/test_descriptor_dpa1.py
index 689fa7e49c..725369d68d 100644
--- a/source/tests/pt/test_descriptor_dpa1.py
+++ b/source/tests/pt/test_descriptor_dpa1.py
@@ -243,7 +243,7 @@ def test_descriptor_block(self):
         dparams["ntypes"] = ntypes
         des = DescrptBlockSeAtten(
             **dparams,
-        )
+        ).to(env.DEVICE)
         des.load_state_dict(torch.load(self.file_model_param))
         rcut = dparams["rcut"]
         nsel = dparams["sel"]
@@ -260,7 +260,7 @@ def test_descriptor_block(self):
             extended_coord, extended_atype, nloc, rcut, nsel, distinguish_types=False
         )
         # handel type_embedding
-        type_embedding = TypeEmbedNet(ntypes, 8)
+        type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE)
         type_embedding.load_state_dict(torch.load(self.file_type_embed))
 
         ## to save model parameters
@@ -293,7 +293,7 @@ def test_descriptor(self):
         dparams["concat_output_tebd"] = False
         des = DescrptDPA1(
             **dparams,
-        )
+        ).to(env.DEVICE)
         target_dict = des.state_dict()
         source_dict = torch.load(self.file_model_param)
         type_embd_dict = torch.load(self.file_type_embed)
@@ -337,7 +337,7 @@ def test_descriptor(self):
         dparams["concat_output_tebd"] = True
         des = DescrptDPA1(
             **dparams,
-        )
+        ).to(env.DEVICE)
         descriptor, env_mat, diff, rot_mat, sw = des(
             extended_coord,
             extended_atype,
diff --git a/source/tests/pt/test_descriptor_dpa2.py b/source/tests/pt/test_descriptor_dpa2.py
index 45c95961fe..aa6b16964e 100644
--- a/source/tests/pt/test_descriptor_dpa2.py
+++ b/source/tests/pt/test_descriptor_dpa2.py
@@ -124,7 +124,7 @@ def test_descriptor_hyb(self):
             dlist,
             ntypes,
             hybrid_mode=dparams["hybrid_mode"],
-        )
+        ).to(env.DEVICE)
         model_dict = torch.load(self.file_model_param)
         # type_embd of repformer is removed
         model_dict.pop("descriptor_list.1.type_embd.embedding.weight")
@@ -158,7 +158,7 @@ def test_descriptor_hyb(self):
             )
         nlist = torch.cat(nlist_list, -1)
         # handel type_embedding
-        type_embedding = TypeEmbedNet(ntypes, 8)
+        type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE)
         type_embedding.load_state_dict(torch.load(self.file_type_embed))
 
         ## to save model parameters
@@ -186,7 +186,7 @@ def test_descriptor(self):
         dparams["concat_output_tebd"] = False
         des = DescrptDPA2(
             **dparams,
-        )
+        ).to(env.DEVICE)
         target_dict = des.state_dict()
         source_dict = torch.load(self.file_model_param)
         # type_embd of repformer is removed
@@ -232,7 +232,7 @@ def test_descriptor(self):
         dparams["concat_output_tebd"] = True
         des = DescrptDPA2(
             **dparams,
-        )
+        ).to(env.DEVICE)
         descriptor, env_mat, diff, rot_mat, sw = des(
             extended_coord,
             extended_atype,
diff --git a/source/tests/pt/test_mlp.py b/source/tests/pt/test_mlp.py
index c06047b2a5..658f472180 100644
--- a/source/tests/pt/test_mlp.py
+++ b/source/tests/pt/test_mlp.py
@@ -8,6 +8,9 @@
 from deepmd.pt.utils.env import (
     PRECISION_DICT,
 )
+from deepmd.pt.utils import (
+    env,
+)
 
 try:
     from deepmd.pt.model.network.mlp import (
@@ -104,23 +107,23 @@ def test_match_native_layer(
                 inp_shap = ashp + inp_shap
             rtol, atol = get_tols(prec)
             dtype = PRECISION_DICT[prec]
-            xx = torch.arange(np.prod(inp_shap), dtype=dtype).view(inp_shap)
+            xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(inp_shap)
             # def mlp layer
-            ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec)
+            ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to(env.DEVICE)
             # check consistency
             nl = NativeLayer.deserialize(ml.serialize())
             np.testing.assert_allclose(
-                ml.forward(xx).detach().numpy(),
-                nl.call(xx.detach().numpy()),
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
                 rtol=rtol,
                 atol=atol,
                 err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
             )
             # check self-consistency
-            ml1 = MLPLayer.deserialize(ml.serialize())
+            ml1 = MLPLayer.deserialize(ml.serialize()).to(env.DEVICE)
             np.testing.assert_allclose(
-                ml.forward(xx).detach().numpy(),
-                ml1.forward(xx).detach().numpy(),
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
                 rtol=rtol,
                 atol=atol,
                 err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
@@ -157,7 +160,7 @@ def test_match_native_net(
                 inp_shap = ashp + inp_shap
             rtol, atol = get_tols(prec)
             dtype = PRECISION_DICT[prec]
-            xx = torch.arange(np.prod(inp_shap), dtype=dtype).view(inp_shap)
+            xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(inp_shap)
             # def MLP
             layers = []
             for ii in range(1, len(ndims)):
@@ -166,21 +169,21 @@ def test_match_native_net(
                         ndims[ii - 1], ndims[ii], bias, ut, ac, resnet, precision=prec
                     ).serialize()
                 )
-            ml = MLP(layers)
+            ml = MLP(layers).to(env.DEVICE)
             # check consistency
             nl = NativeNet.deserialize(ml.serialize())
             np.testing.assert_allclose(
-                ml.forward(xx).detach().numpy(),
-                nl.call(xx.detach().numpy()),
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
                 rtol=rtol,
                 atol=atol,
                 err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
             )
             # check self-consistency
-            ml1 = MLP.deserialize(ml.serialize())
+            ml1 = MLP.deserialize(ml.serialize()).to(env.DEVICE)
             np.testing.assert_allclose(
-                ml.forward(xx).detach().numpy(),
-                ml1.forward(xx).detach().numpy(),
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
                 rtol=rtol,
                 atol=atol,
                 err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}",
@@ -219,23 +222,23 @@ def test_match_embedding_net(
             # input
             rtol, atol = get_tols(prec)
             dtype = PRECISION_DICT[prec]
-            xx = torch.arange(idim, dtype=dtype)
+            xx = torch.arange(idim, dtype=dtype, device=env.DEVICE)
             # def MLP
-            ml = EmbeddingNet(idim, nn, act, idt, prec)
+            ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE)
             # check consistency
             nl = DPEmbeddingNet.deserialize(ml.serialize())
             np.testing.assert_allclose(
-                ml.forward(xx).detach().numpy(),
-                nl.call(xx.detach().numpy()),
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
                 rtol=rtol,
                 atol=atol,
                 err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
             )
             # check self-consistency
-            ml1 = EmbeddingNet.deserialize(ml.serialize())
+            ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE)
             np.testing.assert_allclose(
-                ml.forward(xx).detach().numpy(),
-                ml1.forward(xx).detach().numpy(),
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
                 rtol=rtol,
                 atol=atol,
                 err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
@@ -246,8 +249,8 @@ def test_jit(
     ):
         for idim, nn, act, idt, prec in self.test_cases:
             # def MLP
-            ml = EmbeddingNet(idim, nn, act, idt, prec)
-            ml1 = EmbeddingNet.deserialize(ml.serialize())
+            ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE)
+            ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE)
             model = torch.jit.script(ml)
             model = torch.jit.script(ml1)
 
@@ -272,7 +275,7 @@ def test_match_fitting_net(
             # input
             rtol, atol = get_tols(prec)
             dtype = PRECISION_DICT[prec]
-            xx = torch.arange(idim, dtype=dtype)
+            xx = torch.arange(idim, dtype=dtype, device=env.DEVICE)
             # def MLP
             ml = FittingNet(
                 idim,
@@ -282,21 +285,21 @@ def test_match_fitting_net(
                 resnet_dt=idt,
                 precision=prec,
                 bias_out=ob,
-            )
+            ).to(env.DEVICE)
             # check consistency
             nl = DPFittingNet.deserialize(ml.serialize())
             np.testing.assert_allclose(
-                ml.forward(xx).detach().numpy(),
-                nl.call(xx.detach().numpy()),
+                ml.forward(xx).detach().cpu().numpy(),
+                nl.call(xx.detach().cpu().numpy()),
                 rtol=rtol,
                 atol=atol,
                 err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
             )
             # check self-consistency
-            ml1 = FittingNet.deserialize(ml.serialize())
+            ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE)
             np.testing.assert_allclose(
-                ml.forward(xx).detach().numpy(),
-                ml1.forward(xx).detach().numpy(),
+                ml.forward(xx).detach().cpu().numpy(),
+                ml1.forward(xx).detach().cpu().numpy(),
                 rtol=rtol,
                 atol=atol,
                 err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}",
@@ -315,7 +318,7 @@ def test_jit(
                 resnet_dt=idt,
                 precision=prec,
                 bias_out=ob,
-            )
-            ml1 = FittingNet.deserialize(ml.serialize())
+            ).to(env.DEVICE)
+            ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE)
             model = torch.jit.script(ml)
             model = torch.jit.script(ml1)

From 3f0f1f8de65dc6818395d1456c7464197c632f24 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 30 Jan 2024 17:18:29 +0800
Subject: [PATCH 02/10] Update env.py

---
 deepmd/pt/utils/env.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py
index 6fa72943c7..559dba0167 100644
--- a/deepmd/pt/utils/env.py
+++ b/deepmd/pt/utils/env.py
@@ -24,11 +24,6 @@
 else:
     DEVICE = torch.device(f"cuda:{LOCAL_RANK}")
 
-if os.environ.get("PREPROCESS_DEVICE") == "gpu":
-    PREPROCESS_DEVICE = torch.device(f"cuda:{LOCAL_RANK}")
-else:
-    PREPROCESS_DEVICE = torch.device("cpu")
-
 JIT = False
 CACHE_PER_SYS = 5  # keep at most so many sets per sys in memory
 ENERGY_BIAS_TRAINABLE = True

From 3dd415b6630c487b4b0be33fd5a063055cb5d643 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 09:20:41 +0000
Subject: [PATCH 03/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/pt/utils/dataset.py         | 12 +++---------
 deepmd/pt/utils/preprocess.py      | 17 ++++-------------
 source/tests/pt/test_descriptor.py | 12 +++++++-----
 source/tests/pt/test_mlp.py        | 18 ++++++++++++------
 4 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
index 6b19755b59..b886dbb786 100644
--- a/deepmd/pt/utils/dataset.py
+++ b/deepmd/pt/utils/dataset.py
@@ -489,9 +489,7 @@ def preprocess(self, batch):
 
         for kk in ["type", "real_natoms_vec"]:
             if kk in batch.keys():
-                batch[kk] = torch.tensor(
-                    batch[kk], dtype=torch.long, device=env.DEVICE
-                )
+                batch[kk] = torch.tensor(batch[kk], dtype=torch.long, device=env.DEVICE)
         batch["atype"] = batch.pop("type")
 
         keys = ["nlist", "nlist_loc", "nlist_type", "shift", "mapping"]
@@ -878,9 +876,7 @@ def __getitem__(self, index=None):
         if index is None:
             index = dp_random.choice(np.arange(self.nsystems), p=self.probs)
         b_data = self._data_systems[index].get_batch(self._batch_size)
-        b_data["natoms"] = torch.tensor(
-            self._natoms_vec[index], device=env.DEVICE
-        )
+        b_data["natoms"] = torch.tensor(self._natoms_vec[index], device=env.DEVICE)
         batch_size = b_data["coord"].shape[0]
         b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1)
         return b_data
@@ -891,9 +887,7 @@ def get_training_batch(self, index=None):
         if index is None:
             index = dp_random.choice(np.arange(self.nsystems), p=self.probs)
         b_data = self._data_systems[index].get_batch_for_train(self._batch_size)
-        b_data["natoms"] = torch.tensor(
-            self._natoms_vec[index], device=env.DEVICE
-        )
+        b_data["natoms"] = torch.tensor(self._natoms_vec[index], device=env.DEVICE)
         batch_size = b_data["coord"].shape[0]
         b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1)
         return b_data
diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py
index be24a3a770..3ea26d0041 100644
--- a/deepmd/pt/utils/preprocess.py
+++ b/deepmd/pt/utils/preprocess.py
@@ -165,9 +165,7 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float):
     merged_coord = torch.cat([coord, tmp_coord])
     merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]])
     merged_atype = torch.cat([atype, tmp_atype])
-    merged_mapping = torch.cat(
-        [torch.arange(atype.numel(), device=env.DEVICE), aid]
-    )
+    merged_mapping = torch.cat([torch.arange(atype.numel(), device=env.DEVICE), aid])
     return merged_coord_shift, merged_atype, merged_mapping
 
 
@@ -198,12 +196,8 @@ def build_neighbor_list(
         sec = sec[-1:]
     lst = []
     nlist = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
-    nlist_loc = (
-        torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
-    )
-    nlist_type = (
-        torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
-    )
+    nlist_loc = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
+    nlist_type = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
     for i, nnei in enumerate(sec):
         if i > 0:
             nnei = nnei - sec[i - 1]
@@ -217,10 +211,7 @@ def build_neighbor_list(
         else:
             # when nnei > nall
             indices = torch.zeros((nloc, nnei), device=env.DEVICE).long() - 1
-            _sorted = (
-                torch.ones((nloc, nnei), device=env.DEVICE).long()
-                * DISTANCE_INF
-            )
+            _sorted = torch.ones((nloc, nnei), device=env.DEVICE).long() * DISTANCE_INF
             _sorted_nnei, indices_nnei = torch.topk(
                 tmp, tmp.shape[1], dim=1, largest=False
             )
diff --git a/source/tests/pt/test_descriptor.py b/source/tests/pt/test_descriptor.py
index a7696e7095..4f31bac7bf 100644
--- a/source/tests/pt/test_descriptor.py
+++ b/source/tests/pt/test_descriptor.py
@@ -12,15 +12,13 @@
 from pathlib import (
     Path,
 )
-from deepmd.pt.utils import (
-    env,
-)
 
 from deepmd.pt.model.descriptor import (
     prod_env_mat_se_a,
 )
 from deepmd.pt.utils import (
     dp_random,
+    env,
 )
 from deepmd.pt.utils.dataset import (
     DeepmdDataSet,
@@ -115,10 +113,14 @@ def setUp(self):
 
     def test_consistency(self):
         avg_zero = torch.zeros(
-            [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+            [self.ntypes, self.nnei * 4],
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
         )
         std_ones = torch.ones(
-            [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+            [self.ntypes, self.nnei * 4],
+            dtype=GLOBAL_PT_FLOAT_PRECISION,
+            device=env.DEVICE,
         )
         base_d, base_force, nlist = base_se_a(
             rcut=self.rcut,
diff --git a/source/tests/pt/test_mlp.py b/source/tests/pt/test_mlp.py
index 658f472180..26f0041bf9 100644
--- a/source/tests/pt/test_mlp.py
+++ b/source/tests/pt/test_mlp.py
@@ -5,12 +5,12 @@
 import numpy as np
 import torch
 
-from deepmd.pt.utils.env import (
-    PRECISION_DICT,
-)
 from deepmd.pt.utils import (
     env,
 )
+from deepmd.pt.utils.env import (
+    PRECISION_DICT,
+)
 
 try:
     from deepmd.pt.model.network.mlp import (
@@ -107,9 +107,13 @@ def test_match_native_layer(
                 inp_shap = ashp + inp_shap
             rtol, atol = get_tols(prec)
             dtype = PRECISION_DICT[prec]
-            xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(inp_shap)
+            xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(
+                inp_shap
+            )
             # def mlp layer
-            ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to(env.DEVICE)
+            ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to(
+                env.DEVICE
+            )
             # check consistency
             nl = NativeLayer.deserialize(ml.serialize())
             np.testing.assert_allclose(
@@ -160,7 +164,9 @@ def test_match_native_net(
                 inp_shap = ashp + inp_shap
             rtol, atol = get_tols(prec)
             dtype = PRECISION_DICT[prec]
-            xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(inp_shap)
+            xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view(
+                inp_shap
+            )
             # def MLP
             layers = []
             for ii in range(1, len(ndims)):

From cb4cc67c56e648ea04e462e649fad98f25fd85d4 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 30 Jan 2024 18:25:03 +0800
Subject: [PATCH 04/10] Devel update (#30)

* throw errors when PyTorch CXX11 ABI is different from TensorFlow (#3201)

If so, throw the following error:
```
-- PyTorch CXX11 ABI: 0
CMake Error at CMakeLists.txt:162 (message):
  PyTorch CXX11 ABI mismatch TensorFlow: 0 != 1
```

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>

* allow disabling TensorFlow backend during Python installation (#3200)

Fix #3120.

One can disable building the TensorFlow backend during `pip install` by
setting `DP_ENABLE_TENSORFLOW=0`.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>

* breaking: pt: add dp model format and refactor pt impl for the fitting net. (#3199)

- add dp model format (backend independent definition) for the fitting
- refactor torch support, compatible with dp model format
- fix mlp issue: the idt should only be used when a skip connection is
available.
- add tools `to_numpy_array` and `to_torch_tensor`.

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>

* remove duplicated fitting output check. fix codeql (#3202)

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 backend/find_tensorflow.py                    |   6 +
 backend/read_env.py                           |  24 +-
 deepmd/model_format/__init__.py               |   4 +
 deepmd/model_format/fitting.py                | 355 +++++++++++++++++
 deepmd/model_format/network.py                |   2 +
 deepmd/model_format/se_e2_a.py                |  10 +-
 deepmd/pt/model/model/dp_atomic_model.py      |  10 +-
 deepmd/pt/model/network/mlp.py                |   7 +-
 deepmd/pt/model/task/ener.py                  | 373 +++++++++++++++---
 deepmd/pt/model/task/fitting.py               |  13 +-
 deepmd/pt/model/task/task.py                  |  18 +-
 deepmd/pt/utils/utils.py                      |  40 ++
 deepmd/tf/env.py                              |   5 +
 doc/install/install-from-source.md            |  15 +-
 source/CMakeLists.txt                         |  21 +-
 source/config/CMakeLists.txt                  |  14 +
 source/config/run_config.ini                  |   2 +
 source/lib/src/gpu/CMakeLists.txt             |   6 +-
 .../tests/common/test_model_format_utils.py   | 121 ++++++
 source/tests/pt/test_ener_fitting.py          | 181 +++++++++
 source/tests/pt/test_fitting_net.py           |  24 +-
 source/tests/pt/test_model.py                 |  25 +-
 source/tests/pt/test_se_e2_a.py               |  33 +-
 source/tests/pt/test_utils.py                 |  31 ++
 24 files changed, 1197 insertions(+), 143 deletions(-)
 create mode 100644 deepmd/model_format/fitting.py
 create mode 100644 source/tests/pt/test_ener_fitting.py
 create mode 100644 source/tests/pt/test_utils.py

diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index 32ae62469c..083e2673f7 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -127,6 +127,12 @@ def get_tf_requirement(tf_version: str = "") -> dict:
     dict
         TensorFlow requirement, including cpu and gpu.
     """
+    if tf_version is None:
+        return {
+            "cpu": [],
+            "gpu": [],
+            "mpi": [],
+        }
     if tf_version == "":
         tf_version = os.environ.get("TENSORFLOW_VERSION", "")
 
diff --git a/backend/read_env.py b/backend/read_env.py
index 2cf433181a..bee5d607e3 100644
--- a/backend/read_env.py
+++ b/backend/read_env.py
@@ -80,16 +80,26 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
         cmake_args.append("-DENABLE_IPI:BOOL=TRUE")
         extra_scripts["dp_ipi"] = "deepmd.tf.entrypoints.ipi:dp_ipi"
 
-    tf_install_dir, _ = find_tensorflow()
-    tf_version = get_tf_version(tf_install_dir)
-    if tf_version == "" or Version(tf_version) >= Version("2.12"):
-        find_libpython_requires = []
+    if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "1":
+        tf_install_dir, _ = find_tensorflow()
+        tf_version = get_tf_version(tf_install_dir)
+        if tf_version == "" or Version(tf_version) >= Version("2.12"):
+            find_libpython_requires = []
+        else:
+            find_libpython_requires = ["find_libpython"]
+        cmake_args.extend(
+            [
+                "-DENABLE_TENSORFLOW=ON",
+                f"-DTENSORFLOW_VERSION={tf_version}",
+                f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}",
+            ]
+        )
     else:
-        find_libpython_requires = ["find_libpython"]
-    cmake_args.append(f"-DTENSORFLOW_VERSION={tf_version}")
+        find_libpython_requires = []
+        cmake_args.append("-DENABLE_TENSORFLOW=OFF")
+        tf_version = None
 
     cmake_args = [
-        f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}",
         "-DBUILD_PY_IF:BOOL=TRUE",
         *cmake_args,
     ]
diff --git a/deepmd/model_format/__init__.py b/deepmd/model_format/__init__.py
index 253bca3507..e15f73758e 100644
--- a/deepmd/model_format/__init__.py
+++ b/deepmd/model_format/__init__.py
@@ -7,6 +7,9 @@
 from .env_mat import (
     EnvMat,
 )
+from .fitting import (
+    InvarFitting,
+)
 from .network import (
     EmbeddingNet,
     FittingNet,
@@ -34,6 +37,7 @@
 )
 
 __all__ = [
+    "InvarFitting",
     "DescrptSeA",
     "EnvMat",
     "make_multilayer_network",
diff --git a/deepmd/model_format/fitting.py b/deepmd/model_format/fitting.py
new file mode 100644
index 0000000000..904fb42b76
--- /dev/null
+++ b/deepmd/model_format/fitting.py
@@ -0,0 +1,355 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+from typing import (
+    Any,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+from .common import (
+    DEFAULT_PRECISION,
+    NativeOP,
+)
+from .network import (
+    FittingNet,
+    NetworkCollection,
+)
+from .output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+    fitting_check_output,
+)
+
+
+@fitting_check_output
+class InvarFitting(NativeOP):
+    r"""Fitting the energy (or a porperty of `dim_out`) of the system. The force and the virial can also be trained.
+
+    Lets take the energy fitting task as an example.
+    The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`:
+
+    .. math::
+        E(\mathcal{D}) = \mathcal{L}^{(n)} \circ \mathcal{L}^{(n-1)}
+        \circ \cdots \circ \mathcal{L}^{(1)} \circ \mathcal{L}^{(0)}
+
+    The first :math:`n` hidden layers :math:`\mathcal{L}^{(0)}, \cdots, \mathcal{L}^{(n-1)}` are given by
+
+    .. math::
+        \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})=
+            \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b})
+
+    where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}`
+    is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and
+    :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively,
+    both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}`
+    is the activation function.
+
+    The output layer :math:`\mathcal{L}^{(n)}` is given by
+
+    .. math::
+        \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})=
+            \mathbf{x}^T\mathbf{w}+\mathbf{b}
+
+    where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}`
+    is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and
+    :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively,
+    both of which are trainable if `trainable[n]` is `True`.
+
+    Parameters
+    ----------
+    var_name
+            The name of the output variable.
+    ntypes
+            The number of atom types.
+    dim_descrpt
+            The dimension of the input descriptor.
+    dim_out
+            The dimension of the output fit property.
+    neuron
+            Number of neurons :math:`N` in each hidden layer of the fitting net
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            :math:`y = x + dt * \phi (Wx + b)`
+    numb_fparam
+            Number of frame parameter
+    numb_aparam
+            Number of atomic parameter
+    rcond
+            The condition number for the regression of atomic energy.
+    tot_ener_zero
+            Force the total energy to zero. Useful for the charge fitting.
+    trainable
+            If the weights of fitting net are trainable.
+            Suppose that we have :math:`N_l` hidden layers in the fitting net,
+            this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
+    atom_ener
+            Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
+    activation_function
+            The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    layer_name : list[Optional[str]], optional
+            The name of the each layer. If two layers, either in the same fitting or different fittings,
+            have the same name, they will share the same neural network parameters.
+    use_aparam_as_mask: bool, optional
+            If True, the atomic parameters will be used as a mask that determines the atom is real/virtual.
+            And the aparam will not be used as the atomic parameters for embedding.
+    distinguish_types
+            Different atomic types uses different fitting net.
+
+    """
+
+    def __init__(
+        self,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        dim_out: int,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        rcond: Optional[float] = None,
+        tot_ener_zero: bool = False,
+        trainable: Optional[List[bool]] = None,
+        atom_ener: Optional[List[float]] = None,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        layer_name: Optional[List[Optional[str]]] = None,
+        use_aparam_as_mask: bool = False,
+        spin: Any = None,
+        distinguish_types: bool = False,
+    ):
+        # seed, uniform_seed are not included
+        if tot_ener_zero:
+            raise NotImplementedError("tot_ener_zero is not implemented")
+        if spin is not None:
+            raise NotImplementedError("spin is not implemented")
+        if use_aparam_as_mask:
+            raise NotImplementedError("use_aparam_as_mask is not implemented")
+        if use_aparam_as_mask:
+            raise NotImplementedError("use_aparam_as_mask is not implemented")
+        if layer_name is not None:
+            raise NotImplementedError("layer_name is not implemented")
+        if atom_ener is not None:
+            raise NotImplementedError("atom_ener is not implemented")
+
+        self.var_name = var_name
+        self.ntypes = ntypes
+        self.dim_descrpt = dim_descrpt
+        self.dim_out = dim_out
+        self.neuron = neuron
+        self.resnet_dt = resnet_dt
+        self.numb_fparam = numb_fparam
+        self.numb_aparam = numb_aparam
+        self.rcond = rcond
+        self.tot_ener_zero = tot_ener_zero
+        self.trainable = trainable
+        self.atom_ener = atom_ener
+        self.activation_function = activation_function
+        self.precision = precision
+        self.layer_name = layer_name
+        self.use_aparam_as_mask = use_aparam_as_mask
+        self.spin = spin
+        self.distinguish_types = distinguish_types
+        if self.spin is not None:
+            raise NotImplementedError("spin is not supported")
+
+        # init constants
+        self.bias_atom_e = np.zeros([self.ntypes, self.dim_out])
+        if self.numb_fparam > 0:
+            self.fparam_avg = np.zeros(self.numb_fparam)
+            self.fparam_inv_std = np.ones(self.numb_fparam)
+        else:
+            self.fparam_avg, self.fparam_inv_std = None, None
+        if self.numb_aparam > 0:
+            self.aparam_avg = np.zeros(self.numb_aparam)
+            self.aparam_inv_std = np.ones(self.numb_aparam)
+        else:
+            self.aparam_avg, self.aparam_inv_std = None, None
+        # init networks
+        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
+        out_dim = self.dim_out
+        self.nets = NetworkCollection(
+            1 if self.distinguish_types else 0,
+            self.ntypes,
+            network_type="fitting_network",
+            networks=[
+                FittingNet(
+                    in_dim,
+                    out_dim,
+                    self.neuron,
+                    self.activation_function,
+                    self.resnet_dt,
+                    self.precision,
+                    bias_out=True,
+                )
+                for ii in range(self.ntypes if self.distinguish_types else 1)
+            ],
+        )
+
+    def output_def(self):
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name, [self.dim_out], reduciable=True, differentiable=True
+                ),
+            ]
+        )
+
+    def __setitem__(self, key, value):
+        if key in ["bias_atom_e"]:
+            self.bias_atom_e = value
+        elif key in ["fparam_avg"]:
+            self.fparam_avg = value
+        elif key in ["fparam_inv_std"]:
+            self.fparam_inv_std = value
+        elif key in ["aparam_avg"]:
+            self.aparam_avg = value
+        elif key in ["aparam_inv_std"]:
+            self.aparam_inv_std = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ["bias_atom_e"]:
+            return self.bias_atom_e
+        elif key in ["fparam_avg"]:
+            return self.fparam_avg
+        elif key in ["fparam_inv_std"]:
+            return self.fparam_inv_std
+        elif key in ["aparam_avg"]:
+            return self.aparam_avg
+        elif key in ["aparam_inv_std"]:
+            return self.aparam_inv_std
+        else:
+            raise KeyError(key)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        return {
+            "var_name": self.var_name,
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "dim_out": self.dim_out,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "numb_fparam": self.numb_fparam,
+            "numb_aparam": self.numb_aparam,
+            "rcond": self.rcond,
+            "activation_function": self.activation_function,
+            "precision": self.precision,
+            "distinguish_types": self.distinguish_types,
+            "nets": self.nets.serialize(),
+            "@variables": {
+                "bias_atom_e": self.bias_atom_e,
+                "fparam_avg": self.fparam_avg,
+                "fparam_inv_std": self.fparam_inv_std,
+                "aparam_avg": self.aparam_avg,
+                "aparam_inv_std": self.aparam_inv_std,
+            },
+            # not supported
+            "tot_ener_zero": self.tot_ener_zero,
+            "trainable": self.trainable,
+            "atom_ener": self.atom_ener,
+            "layer_name": self.layer_name,
+            "use_aparam_as_mask": self.use_aparam_as_mask,
+            "spin": self.spin,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "InvarFitting":
+        data = copy.deepcopy(data)
+        variables = data.pop("@variables")
+        nets = data.pop("nets")
+        obj = cls(**data)
+        for kk in variables.keys():
+            obj[kk] = variables[kk]
+        obj.nets = NetworkCollection.deserialize(nets)
+        return obj
+
+    def call(
+        self,
+        descriptor: np.array,
+        atype: np.array,
+        gr: Optional[np.array] = None,
+        g2: Optional[np.array] = None,
+        h2: Optional[np.array] = None,
+        fparam: Optional[np.array] = None,
+        aparam: Optional[np.array] = None,
+    ):
+        """Calculate the fitting.
+
+        Parameters
+        ----------
+        descriptor
+            input descriptor. shape: nf x nloc x nd
+        atype
+            the atom type. shape: nf x nloc
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        fparam
+            The frame parameter. shape: nf x nfp. nfp being `numb_fparam`
+        aparam
+            The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam`
+
+        """
+        nf, nloc, nd = descriptor.shape
+        # check input dim
+        if nd != self.dim_descrpt:
+            raise ValueError(
+                "get an input descriptor of dim {nd},"
+                "which is not consistent with {self.dim_descrpt}."
+            )
+        xx = descriptor
+        # check fparam dim, concate to input descriptor
+        if self.numb_fparam > 0:
+            assert fparam is not None, "fparam should not be None"
+            if fparam.shape[-1] != self.numb_fparam:
+                raise ValueError(
+                    "get an input fparam of dim {fparam.shape[-1]}, ",
+                    "which is not consistent with {self.numb_fparam}.",
+                )
+            fparam = (fparam - self.fparam_avg) * self.fparam_inv_std
+            fparam = np.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1])
+            xx = np.concatenate(
+                [xx, fparam],
+                axis=-1,
+            )
+        # check aparam dim, concate to input descriptor
+        if self.numb_aparam > 0:
+            assert aparam is not None, "aparam should not be None"
+            if aparam.shape[-1] != self.numb_aparam:
+                raise ValueError(
+                    "get an input aparam of dim {aparam.shape[-1]}, ",
+                    "which is not consistent with {self.numb_aparam}.",
+                )
+            aparam = (aparam - self.aparam_avg) * self.aparam_inv_std
+            xx = np.concatenate(
+                [xx, aparam],
+                axis=-1,
+            )
+
+        # calcualte the prediction
+        if self.distinguish_types:
+            outs = np.zeros([nf, nloc, self.dim_out])
+            for type_i in range(self.ntypes):
+                mask = np.tile(
+                    (atype == type_i).reshape([nf, nloc, 1]), [1, 1, self.dim_out]
+                )
+                atom_energy = self.nets[(type_i,)](xx)
+                atom_energy = atom_energy + self.bias_atom_e[type_i]
+                atom_energy = atom_energy * mask
+                outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
+        else:
+            outs = self.nets[()](xx) + self.bias_atom_e[atype]
+        return {self.var_name: outs}
diff --git a/deepmd/model_format/network.py b/deepmd/model_format/network.py
index a327d990c9..f2056c0b95 100644
--- a/deepmd/model_format/network.py
+++ b/deepmd/model_format/network.py
@@ -161,6 +161,8 @@ def __init__(
     ) -> None:
         prec = PRECISION_DICT[precision.lower()]
         self.precision = precision
+        # only use_timestep when skip connection is established.
+        use_timestep = use_timestep and (num_out == num_in or num_out == num_in * 2)
         rng = np.random.default_rng()
         self.w = rng.normal(size=(num_in, num_out)).astype(prec)
         self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None
diff --git a/deepmd/model_format/se_e2_a.py b/deepmd/model_format/se_e2_a.py
index 28751cad8d..f179b10ac3 100644
--- a/deepmd/model_format/se_e2_a.py
+++ b/deepmd/model_format/se_e2_a.py
@@ -171,9 +171,8 @@ def __init__(
             )
         self.env_mat = EnvMat(self.rcut, self.rcut_smth)
         self.nnei = np.sum(self.sel)
-        self.nneix4 = self.nnei * 4
-        self.davg = np.zeros([self.ntypes, self.nneix4])
-        self.dstd = np.ones([self.ntypes, self.nneix4])
+        self.davg = np.zeros([self.ntypes, self.nnei, 4])
+        self.dstd = np.ones([self.ntypes, self.nnei, 4])
         self.orig_sel = self.sel
 
     def __setitem__(self, key, value):
@@ -192,6 +191,11 @@ def __getitem__(self, key):
         else:
             raise KeyError(key)
 
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.neuron[-1] * self.axis_neuron
+
     def cal_g(
         self,
         ss,
diff --git a/deepmd/pt/model/model/dp_atomic_model.py b/deepmd/pt/model/model/dp_atomic_model.py
index 853eacb875..a222c8e6f6 100644
--- a/deepmd/pt/model/model/dp_atomic_model.py
+++ b/deepmd/pt/model/model/dp_atomic_model.py
@@ -93,11 +93,11 @@ def __init__(
         )
 
         fitting_net["type"] = fitting_net.get("type", "ener")
-        if self.descriptor_type not in ["se_e2_a"]:
-            fitting_net["ntypes"] = 1
+        fitting_net["ntypes"] = self.descriptor.get_ntype()
+        if self.descriptor_type in ["se_e2_a"]:
+            fitting_net["distinguish_types"] = True
         else:
-            fitting_net["ntypes"] = self.descriptor.get_ntype()
-            fitting_net["use_tebd"] = False
+            fitting_net["distinguish_types"] = False
         fitting_net["embedding_width"] = self.descriptor.dim_out
 
         self.grad_force = "direct" not in fitting_net["type"]
@@ -165,5 +165,5 @@ def forward_atomic(
         )
         assert descriptor is not None
         # energy, force
-        fit_ret = self.fitting_net(descriptor, atype, atype_tebd=None, rot_mat=rot_mat)
+        fit_ret = self.fitting_net(descriptor, atype, gr=rot_mat)
         return fit_ret
diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py
index e3ac0e7bc2..d76abd82f9 100644
--- a/deepmd/pt/model/network/mlp.py
+++ b/deepmd/pt/model/network/mlp.py
@@ -56,7 +56,10 @@ def __init__(
         precision: str = DEFAULT_PRECISION,
     ):
         super().__init__()
-        self.use_timestep = use_timestep
+        # only use_timestep when skip connection is established.
+        self.use_timestep = use_timestep and (
+            num_out == num_in or num_out == num_in * 2
+        )
         self.activate_name = activation_function
         self.activate = ActivationFn(self.activate_name)
         self.precision = precision
@@ -207,7 +210,7 @@ class NetworkCollection(DPNetworkCollection, nn.Module):
     NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = {
         "network": MLP,
         "embedding_network": EmbeddingNet,
-        # "fitting_network": FittingNet,
+        "fitting_network": FittingNet,
     }
 
     def __init__(self, *args, **kwargs):
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index 03043e2fcb..e40a6bda44 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -1,10 +1,13 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 import logging
 from typing import (
+    List,
     Optional,
     Tuple,
 )
 
+import numpy as np
 import torch
 
 from deepmd.model_format import (
@@ -12,6 +15,10 @@
     OutputVariableDef,
     fitting_check_output,
 )
+from deepmd.pt.model.network.mlp import (
+    FittingNet,
+    NetworkCollection,
+)
 from deepmd.pt.model.network.network import (
     ResidualDeep,
 )
@@ -21,19 +28,35 @@
 from deepmd.pt.utils import (
     env,
 )
+from deepmd.pt.utils.env import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+device = env.DEVICE
 
 
-@Fitting.register("ener")
 @fitting_check_output
-class EnergyFittingNet(Fitting):
+class InvarFitting(Fitting):
     def __init__(
         self,
-        ntypes,
-        embedding_width,
-        neuron,
-        bias_atom_e,
-        resnet_dt=True,
-        use_tebd=True,
+        var_name: str,
+        ntypes: int,
+        dim_descrpt: int,
+        dim_out: int,
+        neuron: List[int] = [128, 128, 128],
+        bias_atom_e: Optional[torch.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        distinguish_types: bool = False,
         **kwargs,
     ):
         """Construct a fitting net for energy.
@@ -46,67 +69,322 @@ def __init__(
         - resnet_dt: Using time-step in the ResNet construction.
         """
         super().__init__()
+        self.var_name = var_name
         self.ntypes = ntypes
-        self.embedding_width = embedding_width
-        self.use_tebd = use_tebd
-        if not use_tebd:
-            assert self.ntypes == len(bias_atom_e), "Element count mismatches!"
-        bias_atom_e = torch.tensor(bias_atom_e)
+        self.dim_descrpt = dim_descrpt
+        self.dim_out = dim_out
+        self.neuron = neuron
+        self.distinguish_types = distinguish_types
+        self.use_tebd = not self.distinguish_types
+        self.resnet_dt = resnet_dt
+        self.numb_fparam = numb_fparam
+        self.numb_aparam = numb_aparam
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        if bias_atom_e is None:
+            bias_atom_e = np.zeros([self.ntypes, self.dim_out])
+        bias_atom_e = torch.tensor(bias_atom_e, dtype=self.prec, device=device)
+        bias_atom_e = bias_atom_e.view([self.ntypes, self.dim_out])
+        if not self.use_tebd:
+            assert self.ntypes == bias_atom_e.shape[0], "Element count mismatches!"
         self.register_buffer("bias_atom_e", bias_atom_e)
+        # init constants
+        if self.numb_fparam > 0:
+            self.register_buffer(
+                "fparam_avg",
+                torch.zeros(self.numb_fparam, dtype=self.prec, device=device),
+            )
+            self.register_buffer(
+                "fparam_inv_std",
+                torch.ones(self.numb_fparam, dtype=self.prec, device=device),
+            )
+        else:
+            self.fparam_avg, self.fparam_inv_std = None, None
+        if self.numb_aparam > 0:
+            self.register_buffer(
+                "aparam_avg",
+                torch.zeros(self.numb_aparam, dtype=self.prec, device=device),
+            )
+            self.register_buffer(
+                "aparam_inv_std",
+                torch.ones(self.numb_aparam, dtype=self.prec, device=device),
+            )
+        else:
+            self.aparam_avg, self.aparam_inv_std = None, None
 
-        filter_layers = []
-        for type_i in range(self.ntypes):
-            bias_type = 0.0
-            one = ResidualDeep(
-                type_i, embedding_width, neuron, bias_type, resnet_dt=resnet_dt
+        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
+        out_dim = 1
+
+        self.old_impl = kwargs.get("old_impl", False)
+        if self.old_impl:
+            filter_layers = []
+            for type_i in range(self.ntypes):
+                bias_type = 0.0
+                one = ResidualDeep(
+                    type_i,
+                    self.dim_descrpt,
+                    self.neuron,
+                    bias_type,
+                    resnet_dt=self.resnet_dt,
+                )
+                filter_layers.append(one)
+            self.filter_layers_old = torch.nn.ModuleList(filter_layers)
+            self.filter_layers = None
+        else:
+            self.filter_layers = NetworkCollection(
+                1 if self.distinguish_types else 0,
+                self.ntypes,
+                network_type="fitting_network",
+                networks=[
+                    FittingNet(
+                        in_dim,
+                        out_dim,
+                        self.neuron,
+                        self.activation_function,
+                        self.resnet_dt,
+                        self.precision,
+                        bias_out=True,
+                    )
+                    for ii in range(self.ntypes if self.distinguish_types else 1)
+                ],
             )
-            filter_layers.append(one)
-        self.filter_layers = torch.nn.ModuleList(filter_layers)
+            self.filter_layers_old = None
 
+        # very bad design...
         if "seed" in kwargs:
             logging.info("Set seed to %d in fitting net.", kwargs["seed"])
             torch.manual_seed(kwargs["seed"])
 
-    def output_def(self):
+    def output_def(self) -> FittingOutputDef:
         return FittingOutputDef(
             [
-                OutputVariableDef("energy", [1], reduciable=True, differentiable=True),
+                OutputVariableDef(
+                    self.var_name, [self.dim_out], reduciable=True, differentiable=True
+                ),
             ]
         )
 
+    def __setitem__(self, key, value):
+        if key in ["bias_atom_e"]:
+            # correct bias_atom_e shape. user may provide stupid  shape
+            self.bias_atom_e = value
+        elif key in ["fparam_avg"]:
+            self.fparam_avg = value
+        elif key in ["fparam_inv_std"]:
+            self.fparam_inv_std = value
+        elif key in ["aparam_avg"]:
+            self.aparam_avg = value
+        elif key in ["aparam_inv_std"]:
+            self.aparam_inv_std = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ["bias_atom_e"]:
+            return self.bias_atom_e
+        elif key in ["fparam_avg"]:
+            return self.fparam_avg
+        elif key in ["fparam_inv_std"]:
+            return self.fparam_inv_std
+        elif key in ["aparam_avg"]:
+            return self.aparam_avg
+        elif key in ["aparam_inv_std"]:
+            return self.aparam_inv_std
+        else:
+            raise KeyError(key)
+
+    def serialize(self) -> dict:
+        """Serialize the fitting to dict."""
+        return {
+            "var_name": self.var_name,
+            "ntypes": self.ntypes,
+            "dim_descrpt": self.dim_descrpt,
+            "dim_out": self.dim_out,
+            "neuron": self.neuron,
+            "resnet_dt": self.resnet_dt,
+            "numb_fparam": self.numb_fparam,
+            "numb_aparam": self.numb_aparam,
+            "activation_function": self.activation_function,
+            "precision": self.precision,
+            "distinguish_types": self.distinguish_types,
+            "nets": self.filter_layers.serialize(),
+            "@variables": {
+                "bias_atom_e": to_numpy_array(self.bias_atom_e),
+                "fparam_avg": to_numpy_array(self.fparam_avg),
+                "fparam_inv_std": to_numpy_array(self.fparam_inv_std),
+                "aparam_avg": to_numpy_array(self.aparam_avg),
+                "aparam_inv_std": to_numpy_array(self.aparam_inv_std),
+            },
+            # "rcond": self.rcond ,
+            # "tot_ener_zero": self.tot_ener_zero ,
+            # "trainable": self.trainable ,
+            # "atom_ener": self.atom_ener ,
+            # "layer_name": self.layer_name ,
+            # "use_aparam_as_mask": self.use_aparam_as_mask ,
+            # "spin": self.spin ,
+            ## NOTICE:  not supported by far
+            "rcond": None,
+            "tot_ener_zero": False,
+            "trainable": True,
+            "atom_ener": None,
+            "layer_name": None,
+            "use_aparam_as_mask": False,
+            "spin": None,
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "InvarFitting":
+        data = copy.deepcopy(data)
+        variables = data.pop("@variables")
+        nets = data.pop("nets")
+        obj = cls(**data)
+        for kk in variables.keys():
+            obj[kk] = to_torch_tensor(variables[kk])
+        obj.filter_layers = NetworkCollection.deserialize(nets)
+        return obj
+
+    def _extend_f_avg_std(self, xx: torch.Tensor, nb: int) -> torch.Tensor:
+        return torch.tile(xx.view([1, self.numb_fparam]), [nb, 1])
+
+    def _extend_a_avg_std(self, xx: torch.Tensor, nb: int, nloc: int) -> torch.Tensor:
+        return torch.tile(xx.view([1, 1, self.numb_aparam]), [nb, nloc, 1])
+
     def forward(
         self,
-        inputs: torch.Tensor,
+        descriptor: torch.Tensor,
         atype: torch.Tensor,
-        atype_tebd: Optional[torch.Tensor] = None,
-        rot_mat: Optional[torch.Tensor] = None,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
     ):
         """Based on embedding net output, alculate total energy.
 
         Args:
-        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.embedding_width].
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
         - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
 
         Returns
         -------
         - `torch.Tensor`: Total energy with shape [nframes, natoms[0]].
         """
+        xx = descriptor
+        nf, nloc, nd = xx.shape
+        # NOTICE in tests/pt/test_model.py
+        # it happens that the user directly access the data memeber self.bias_atom_e
+        # and set it to a wrong shape!
+        self.bias_atom_e = self.bias_atom_e.view([self.ntypes, self.dim_out])
+        # check input dim
+        if nd != self.dim_descrpt:
+            raise ValueError(
+                "get an input descriptor of dim {nd},"
+                "which is not consistent with {self.dim_descrpt}."
+            )
+        # check fparam dim, concate to input descriptor
+        if self.numb_fparam > 0:
+            assert fparam is not None, "fparam should not be None"
+            assert self.fparam_avg is not None
+            assert self.fparam_inv_std is not None
+            if fparam.shape[-1] != self.numb_fparam:
+                raise ValueError(
+                    "get an input fparam of dim {fparam.shape[-1]}, ",
+                    "which is not consistent with {self.numb_fparam}.",
+                )
+            nb, _ = fparam.shape
+            t_fparam_avg = self._extend_f_avg_std(self.fparam_avg, nb)
+            t_fparam_inv_std = self._extend_f_avg_std(self.fparam_inv_std, nb)
+            fparam = (fparam - t_fparam_avg) * t_fparam_inv_std
+            fparam = torch.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1])
+            xx = torch.cat(
+                [xx, fparam],
+                dim=-1,
+            )
+        # check aparam dim, concate to input descriptor
+        if self.numb_aparam > 0:
+            assert aparam is not None, "aparam should not be None"
+            assert self.aparam_avg is not None
+            assert self.aparam_inv_std is not None
+            if aparam.shape[-1] != self.numb_aparam:
+                raise ValueError(
+                    "get an input aparam of dim {aparam.shape[-1]}, ",
+                    "which is not consistent with {self.numb_aparam}.",
+                )
+            nb, nloc, _ = aparam.shape
+            t_aparam_avg = self._extend_a_avg_std(self.aparam_avg, nb, nloc)
+            t_aparam_inv_std = self._extend_a_avg_std(self.aparam_inv_std, nb, nloc)
+            aparam = (aparam - t_aparam_avg) * t_aparam_inv_std
+            xx = torch.cat(
+                [xx, aparam],
+                dim=-1,
+            )
+
         outs = torch.zeros_like(atype).unsqueeze(-1)  # jit assertion
-        if self.use_tebd:
-            if atype_tebd is not None:
-                inputs = torch.concat([inputs, atype_tebd], dim=-1)
-            atom_energy = self.filter_layers[0](inputs) + self.bias_atom_e[
-                atype
-            ].unsqueeze(-1)
-            outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
+        if self.old_impl:
+            outs = torch.zeros_like(atype).unsqueeze(-1)  # jit assertion
+            assert self.filter_layers_old is not None
+            if self.use_tebd:
+                atom_energy = self.filter_layers_old[0](xx) + self.bias_atom_e[
+                    atype
+                ].unsqueeze(-1)
+                outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
+            else:
+                for type_i, filter_layer in enumerate(self.filter_layers_old):
+                    mask = atype == type_i
+                    atom_energy = filter_layer(xx)
+                    atom_energy = atom_energy + self.bias_atom_e[type_i]
+                    atom_energy = atom_energy * mask.unsqueeze(-1)
+                    outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
+            return {"energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION)}
         else:
-            for type_i, filter_layer in enumerate(self.filter_layers):
-                mask = atype == type_i
-                atom_energy = filter_layer(inputs)
-                atom_energy = atom_energy + self.bias_atom_e[type_i]
-                atom_energy = atom_energy * mask.unsqueeze(-1)
+            if self.use_tebd:
+                atom_energy = (
+                    self.filter_layers.networks[0](xx) + self.bias_atom_e[atype]
+                )
                 outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
-        return {"energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION)}
+            else:
+                for type_i, ll in enumerate(self.filter_layers.networks):
+                    mask = (atype == type_i).unsqueeze(-1)
+                    mask = torch.tile(mask, (1, 1, self.dim_out))
+                    atom_energy = ll(xx)
+                    atom_energy = atom_energy + self.bias_atom_e[type_i]
+                    atom_energy = atom_energy * mask
+                    outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
+            return {self.var_name: outs.to(env.GLOBAL_PT_FLOAT_PRECISION)}
+
+
+@Fitting.register("ener")
+class EnergyFittingNet(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        embedding_width: int,
+        neuron: List[int] = [128, 128, 128],
+        bias_atom_e: Optional[torch.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        use_tebd: bool = True,
+        **kwargs,
+    ):
+        super().__init__(
+            "energy",
+            ntypes,
+            embedding_width,
+            1,
+            neuron=neuron,
+            bias_atom_e=bias_atom_e,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            activation_function=activation_function,
+            precision=precision,
+            use_tebd=use_tebd,
+            **kwargs,
+        )
 
 
 @Fitting.register("direct_force")
@@ -136,7 +414,7 @@ def __init__(
         """
         super().__init__()
         self.ntypes = ntypes
-        self.embedding_width = embedding_width
+        self.dim_descrpt = embedding_width
         self.use_tebd = use_tebd
         self.out_dim = out_dim
         if not use_tebd:
@@ -186,13 +464,12 @@ def forward(
         self,
         inputs: torch.Tensor,
         atype: torch.Tensor,
-        atype_tebd: Optional[torch.Tensor] = None,
-        rot_mat: Optional[torch.Tensor] = None,
+        gr: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, None]:
         """Based on embedding net output, alculate total energy.
 
         Args:
-        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.embedding_width].
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
         - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
 
         Returns
@@ -201,19 +478,19 @@ def forward(
         """
         nframes, nloc, _ = inputs.size()
         if self.use_tebd:
-            if atype_tebd is not None:
-                inputs = torch.concat([inputs, atype_tebd], dim=-1)
+            # if atype_tebd is not None:
+            #     inputs = torch.concat([inputs, atype_tebd], dim=-1)
             vec_out = self.filter_layers_dipole[0](
                 inputs
             )  # Shape is [nframes, nloc, m1]
             assert list(vec_out.size()) == [nframes, nloc, self.out_dim]
             # (nf x nloc) x 1 x od
             vec_out = vec_out.view(-1, 1, self.out_dim)
-            assert rot_mat is not None
+            assert gr is not None
             # (nf x nloc) x od x 3
-            rot_mat = rot_mat.view(-1, self.out_dim, 3)
+            gr = gr.view(-1, self.out_dim, 3)
             vec_out = (
-                torch.bmm(vec_out, rot_mat).squeeze(-2).view(nframes, nloc, 3)
+                torch.bmm(vec_out, gr).squeeze(-2).view(nframes, nloc, 3)
             )  # Shape is [nframes, nloc, 3]
         else:
             vec_out = torch.zeros_like(atype).unsqueeze(-1)  # jit assertion
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 16e80f9c20..c6fb6b27e1 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -7,9 +7,6 @@
 import numpy as np
 import torch
 
-from deepmd.model_format import (
-    FittingOutputDef,
-)
 from deepmd.pt.model.task.task import (
     TaskBaseMethod,
 )
@@ -61,17 +58,9 @@ def __new__(cls, *args, **kwargs):
             if fitting_type in Fitting.__plugins.plugins:
                 cls = Fitting.__plugins.plugins[fitting_type]
             else:
-                raise RuntimeError("Unknown descriptor type: " + fitting_type)
+                raise RuntimeError("Unknown fitting type: " + fitting_type)
         return super().__new__(cls)
 
-    def output_def(self) -> FittingOutputDef:
-        """Definition for the task Output."""
-        raise NotImplementedError
-
-    def forward(self, **kwargs):
-        """Task Output."""
-        raise NotImplementedError
-
     def share_params(self, base_class, shared_level, resume=False):
         assert (
             self.__class__ == base_class.__class__
diff --git a/deepmd/pt/model/task/task.py b/deepmd/pt/model/task/task.py
index a9b2efeb9a..b2dc03e4bd 100644
--- a/deepmd/pt/model/task/task.py
+++ b/deepmd/pt/model/task/task.py
@@ -1,12 +1,18 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from abc import (
+    ABC,
+    abstractmethod,
+)
+
 import torch
 
+from deepmd.model_format import (
+    FittingOutputDef,
+)
 
-class TaskBaseMethod(torch.nn.Module):
-    def __init__(self, **kwargs):
-        """Construct a basic head for different tasks."""
-        super().__init__()
 
-    def forward(self, **kwargs):
-        """Task Output."""
+class TaskBaseMethod(torch.nn.Module, ABC):
+    @abstractmethod
+    def output_def(self) -> FittingOutputDef:
+        """Definition for the task Output."""
         raise NotImplementedError
diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py
index 780dbf7e62..e83e12f608 100644
--- a/deepmd/pt/utils/utils.py
+++ b/deepmd/pt/utils/utils.py
@@ -4,9 +4,17 @@
     Optional,
 )
 
+import numpy as np
 import torch
 import torch.nn.functional as F
 
+from deepmd.model_format.common import PRECISION_DICT as NP_PRECISION_DICT
+
+from .env import (
+    DEVICE,
+)
+from .env import PRECISION_DICT as PT_PRECISION_DICT
+
 
 def get_activation_fn(activation: str) -> Callable:
     """Returns the activation function corresponding to `activation`."""
@@ -41,3 +49,35 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             return x
         else:
             raise RuntimeError(f"activation function {self.activation} not supported")
+
+
+def to_numpy_array(
+    xx: torch.Tensor,
+) -> np.ndarray:
+    if xx is None:
+        return None
+    assert xx is not None
+    # Create a reverse mapping of PT_PRECISION_DICT
+    reverse_precision_dict = {v: k for k, v in PT_PRECISION_DICT.items()}
+    # Use the reverse mapping to find keys with the desired value
+    prec = reverse_precision_dict.get(xx.dtype, None)
+    prec = NP_PRECISION_DICT.get(prec, None)
+    if prec is None:
+        raise ValueError(f"unknown precision {xx.dtype}")
+    return xx.detach().cpu().numpy().astype(prec)
+
+
+def to_torch_tensor(
+    xx: np.ndarray,
+) -> torch.Tensor:
+    if xx is None:
+        return None
+    assert xx is not None
+    # Create a reverse mapping of NP_PRECISION_DICT
+    reverse_precision_dict = {v: k for k, v in NP_PRECISION_DICT.items()}
+    # Use the reverse mapping to find keys with the desired value
+    prec = reverse_precision_dict.get(type(xx.flat[0]), None)
+    prec = PT_PRECISION_DICT.get(prec, None)
+    if prec is None:
+        raise ValueError(f"unknown precision {xx.dtype}")
+    return torch.tensor(xx, dtype=prec, device=DEVICE)
diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py
index da03631689..eada2774d3 100644
--- a/deepmd/tf/env.py
+++ b/deepmd/tf/env.py
@@ -472,6 +472,11 @@ def _get_package_constants(
 
 
 GLOBAL_CONFIG = _get_package_constants()
+if GLOBAL_CONFIG["enable_tensorflow"] == "0":
+    raise RuntimeError(
+        "TensorFlow backend is not built. To enable it, "
+        "set the environmental variable DP_ENABLE_TENSORFLOW=1."
+    )
 MODEL_VERSION = GLOBAL_CONFIG["model_version"]
 TF_VERSION = GLOBAL_CONFIG["tf_version"]
 TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"])
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index ae1509f2ca..389cc78c9f 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -90,7 +90,17 @@ Check the compiler version on your machine
 gcc --version
 ```
 
-The compiler GCC 4.8 or later is supported in the DeePMD-kit. Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`.
+The compiler GCC 4.8 or later is supported in the DeePMD-kit.
+
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
+Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`.
+
+:::
+
+::::
 
 Execute
 ```bash
@@ -105,7 +115,8 @@ One may set the following environment variables before executing `pip`:
 | DP_VARIANT            | `cpu`, `cuda`, `rocm`  | `cpu`         | Build CPU variant or GPU variant with CUDA or ROCM support. |
 | CUDAToolkit_ROOT | Path                   | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. |
 | ROCM_ROOT             | Path                   | Detected automatically | The path to the ROCM toolkit directory. |
-| TENSORFLOW_ROOT       | Path                   | Detected automatically | The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.|
+| DP_ENABLE_TENSORFLOW  | 0, 1                   | 1             | {{ tensorflow_icon }} Enable the TensorFlow backend.
+| TENSORFLOW_ROOT       | Path                   | Detected automatically | {{ tensorflow_icon }} The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.|
 | DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1           | 0             | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
 | CMAKE_ARGS             | str                   | -             | Additional CMake arguments |
 | &lt;LANG&gt;FLAGS (`<LANG>`=`CXX`, `CUDA` or `HIP`)   | str            | -             | Default compilation flags to be used when compiling `<LANG>` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). |
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index c273bc9263..d6ee3d0958 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -154,7 +154,22 @@ if(ENABLE_TENSORFLOW AND NOT DEEPMD_C_ROOT)
 endif()
 if(ENABLE_PYTORCH AND NOT DEEPMD_C_ROOT)
   find_package(Torch REQUIRED)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
+  string(REGEX MATCH "_GLIBCXX_USE_CXX11_ABI=([0-9]+)" CXXABI_PT_MATCH
+               ${TORCH_CXX_FLAGS})
+  if(CXXABI_PT_MATCH)
+    message(STATUS "PyTorch CXX11 ABI: ${CMAKE_MATCH_1}")
+    if(DEFINED OP_CXX_ABI)
+      if(NOT ${CMAKE_MATCH_1} EQUAL ${OP_CXX_ABI})
+        message(
+          FATAL_ERROR
+            "PyTorch CXX11 ABI mismatch TensorFlow: ${CMAKE_MATCH_1} != ${OP_CXX_ABI}"
+        )
+      endif()
+    else()
+      set(OP_CXX_ABI ${CMAKE_MATCH_1})
+      add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
+    endif()
+  endif()
 endif()
 # log enabled backends
 if(NOT DEEPMD_C_ROOT)
@@ -165,7 +180,9 @@ if(NOT DEEPMD_C_ROOT)
   if(ENABLE_PYTORCH)
     message(STATUS "- PyTorch")
   endif()
-  if(NOT ENABLE_TENSORFLOW AND NOT ENABLE_PYTORCH)
+  if(NOT ENABLE_TENSORFLOW
+     AND NOT ENABLE_PYTORCH
+     AND NOT BUILD_PY_IF)
     message(FATAL_ERROR "No backend is enabled.")
   endif()
 endif()
diff --git a/source/config/CMakeLists.txt b/source/config/CMakeLists.txt
index 5473b91f29..b1ce17566f 100644
--- a/source/config/CMakeLists.txt
+++ b/source/config/CMakeLists.txt
@@ -1,5 +1,19 @@
 # config
 
+# cmake will treat true, false, on, off, 1, 0 as booleans we hope an easy way to
+# check it
+if(ENABLE_TENSORFLOW)
+  set(ENABLE_TENSORFLOW 1)
+else()
+  set(ENABLE_TENSORFLOW 0)
+endif()
+
+if(ENABLE_PYTORCH)
+  set(ENABLE_PYTORCH 1)
+else()
+  set(ENABLE_PYTORCH 0)
+endif()
+
 configure_file("run_config.ini" "${CMAKE_CURRENT_BINARY_DIR}/run_config.ini"
                @ONLY)
 
diff --git a/source/config/run_config.ini b/source/config/run_config.ini
index 3f0a7a33a8..11f4100e61 100644
--- a/source/config/run_config.ini
+++ b/source/config/run_config.ini
@@ -4,6 +4,8 @@ GIT_SUMM = @GIT_SUMM@
 GIT_HASH = @GIT_HASH@
 GIT_DATE = @GIT_DATE@
 GIT_BRANCH = @GIT_BRANCH@
+ENABLE_TENSORFLOW = @ENABLE_TENSORFLOW@
+ENABLE_PYTORCH = @ENABLE_PYTORCH@
 TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@
 TF_LIBS = @TensorFlow_LIBRARY@
 TF_VERSION = @TENSORFLOW_VERSION@
diff --git a/source/lib/src/gpu/CMakeLists.txt b/source/lib/src/gpu/CMakeLists.txt
index 3bd24cc620..804e1c0506 100644
--- a/source/lib/src/gpu/CMakeLists.txt
+++ b/source/lib/src/gpu/CMakeLists.txt
@@ -10,8 +10,10 @@ if(USE_CUDA_TOOLKIT)
   endif()
   enable_language(CUDA)
   set(CMAKE_CUDA_STANDARD 11)
-  add_compile_definitions(
-    "$<$<COMPILE_LANGUAGE:CUDA>:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>")
+  if(DEFINED OP_CXX_ABI)
+    add_compile_definitions(
+      "$<$<COMPILE_LANGUAGE:CUDA>:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>")
+  endif()
 
   find_package(CUDAToolkit REQUIRED)
 
diff --git a/source/tests/common/test_model_format_utils.py b/source/tests/common/test_model_format_utils.py
index da76c53ed9..cb85fd2bb2 100644
--- a/source/tests/common/test_model_format_utils.py
+++ b/source/tests/common/test_model_format_utils.py
@@ -13,6 +13,7 @@
     EmbeddingNet,
     EnvMat,
     FittingNet,
+    InvarFitting,
     NativeLayer,
     NativeNet,
     NetworkCollection,
@@ -369,3 +370,123 @@ def test_self_consistency(
         mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist)
         for ii in [0, 1, 4]:
             np.testing.assert_allclose(mm0[ii], mm1[ii])
+
+
+class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_self_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
+        dd = ds.call(self.coord_ext, self.atype_ext, self.nlist)
+        atype = self.atype_ext[:, :nloc]
+
+        for (
+            distinguish_types,
+            od,
+            nfp,
+            nap,
+        ) in itertools.product(
+            [True, False],
+            [1, 2],
+            [0, 3],
+            [0, 4],
+        ):
+            ifn0 = InvarFitting(
+                "energy",
+                self.nt,
+                ds.dim_out,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                distinguish_types=distinguish_types,
+            )
+            ifn1 = InvarFitting.deserialize(ifn0.serialize())
+            if nfp > 0:
+                ifp = rng.normal(size=(self.nf, nfp))
+            else:
+                ifp = None
+            if nap > 0:
+                iap = rng.normal(size=(self.nf, self.nloc, nap))
+            else:
+                iap = None
+            ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap)
+            ret1 = ifn1(dd[0], atype, fparam=ifp, aparam=iap)
+            np.testing.assert_allclose(ret0["energy"], ret1["energy"])
+
+    def test_self_exception(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
+        dd = ds.call(self.coord_ext, self.atype_ext, self.nlist)
+        atype = self.atype_ext[:, :nloc]
+
+        for (
+            distinguish_types,
+            od,
+            nfp,
+            nap,
+        ) in itertools.product(
+            [True, False],
+            [1, 2],
+            [0, 3],
+            [0, 4],
+        ):
+            ifn0 = InvarFitting(
+                "energy",
+                self.nt,
+                ds.dim_out,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                distinguish_types=distinguish_types,
+            )
+
+            if nfp > 0:
+                ifp = rng.normal(size=(self.nf, nfp))
+            else:
+                ifp = None
+            if nap > 0:
+                iap = rng.normal(size=(self.nf, self.nloc, nap))
+            else:
+                iap = None
+            with self.assertRaises(ValueError) as context:
+                ret0 = ifn0(dd[0][:, :, :-2], atype, fparam=ifp, aparam=iap)
+                self.assertIn("input descriptor", context.exception)
+
+            if nfp > 0:
+                ifp = rng.normal(size=(self.nf, nfp - 1))
+                with self.assertRaises(ValueError) as context:
+                    ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap)
+                    self.assertIn("input fparam", context.exception)
+
+            if nap > 0:
+                iap = rng.normal(size=(self.nf, self.nloc, nap - 1))
+                with self.assertRaises(ValueError) as context:
+                    ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap)
+                    self.assertIn("input aparam", context.exception)
+
+    def test_get_set(self):
+        ifn0 = InvarFitting(
+            "energy",
+            self.nt,
+            3,
+            1,
+        )
+        rng = np.random.default_rng()
+        foo = rng.normal([3, 4])
+        for ii in [
+            "bias_atom_e",
+            "fparam_avg",
+            "fparam_inv_std",
+            "aparam_avg",
+            "aparam_inv_std",
+        ]:
+            ifn0[ii] = foo
+            np.testing.assert_allclose(foo, ifn0[ii])
diff --git a/source/tests/pt/test_ener_fitting.py b/source/tests/pt/test_ener_fitting.py
new file mode 100644
index 0000000000..eece8447df
--- /dev/null
+++ b/source/tests/pt/test_ener_fitting.py
@@ -0,0 +1,181 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.model_format import InvarFitting as DPInvarFitting
+from deepmd.pt.model.descriptor.se_a import (
+    DescrptSeA,
+)
+from deepmd.pt.model.task.ener import (
+    EnergyFittingNet,
+    InvarFitting,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+)
+
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+
+dtype = env.GLOBAL_PT_FLOAT_PRECISION
+
+
+class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self):
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        rd0, _, _, _, _ = dd0(
+            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+        )
+        atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
+
+        for od, distinguish_types, nfp, nap in itertools.product(
+            [1, 3],
+            [True, False],
+            [0, 3],
+            [0, 4],
+        ):
+            ft0 = InvarFitting(
+                "foo",
+                self.nt,
+                dd0.dim_out,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                use_tebd=(not distinguish_types),
+            ).to(env.DEVICE)
+            ft1 = DPInvarFitting.deserialize(ft0.serialize())
+            ft2 = InvarFitting.deserialize(ft0.serialize())
+
+            if nfp > 0:
+                ifp = torch.tensor(
+                    rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE
+                )
+            else:
+                ifp = None
+            if nap > 0:
+                iap = torch.tensor(
+                    rng.normal(size=(self.nf, self.nloc, nap)),
+                    dtype=dtype,
+                    device=env.DEVICE,
+                )
+            else:
+                iap = None
+
+            ret0 = ft0(rd0, atype, fparam=ifp, aparam=iap)
+            ret1 = ft1(
+                rd0.detach().cpu().numpy(),
+                atype.detach().cpu().numpy(),
+                fparam=to_numpy_array(ifp),
+                aparam=to_numpy_array(iap),
+            )
+            ret2 = ft2(rd0, atype, fparam=ifp, aparam=iap)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["foo"]),
+                ret1["foo"],
+            )
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["foo"]),
+                to_numpy_array(ret2["foo"]),
+            )
+
+    def test_new_old(
+        self,
+    ):
+        rng = np.random.default_rng()
+        nf, nloc, nnei = self.nlist.shape
+        dd = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
+        rd0, _, _, _, _ = dd(
+            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
+            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
+            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
+        )
+        atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
+
+        od = 1
+        for distinguish_types in itertools.product(
+            [True, False],
+        ):
+            ft0 = EnergyFittingNet(
+                self.nt,
+                dd.dim_out,
+                distinguish_types=distinguish_types,
+            ).to(env.DEVICE)
+            ft1 = EnergyFittingNet(
+                self.nt,
+                dd.dim_out,
+                distinguish_types=distinguish_types,
+                old_impl=True,
+            ).to(env.DEVICE)
+            dd0 = ft0.state_dict()
+            dd1 = ft1.state_dict()
+            for kk, vv in dd1.items():
+                new_kk = kk
+                new_kk = new_kk.replace("filter_layers_old", "filter_layers.networks")
+                new_kk = new_kk.replace("deep_layers", "layers")
+                new_kk = new_kk.replace("final_layer", "layers.3")
+                dd1[kk] = dd0[new_kk]
+                if kk.split(".")[-1] in ["idt", "bias"]:
+                    dd1[kk] = dd1[kk].unsqueeze(0)
+            dd1["bias_atom_e"] = dd0["bias_atom_e"]
+            ft1.load_state_dict(dd1)
+            ret0 = ft0(rd0, atype)
+            ret1 = ft1(rd0, atype)
+            np.testing.assert_allclose(
+                to_numpy_array(ret0["energy"]),
+                to_numpy_array(ret1["energy"]),
+            )
+
+    def test_jit(
+        self,
+    ):
+        for od, distinguish_types, nfp, nap in itertools.product(
+            [1, 3],
+            [True, False],
+            [0, 3],
+            [0, 4],
+        ):
+            ft0 = InvarFitting(
+                "foo",
+                self.nt,
+                9,
+                od,
+                numb_fparam=nfp,
+                numb_aparam=nap,
+                use_tebd=(not distinguish_types),
+            ).to(env.DEVICE)
+            torch.jit.script(ft0)
+
+    def test_get_set(self):
+        ifn0 = InvarFitting(
+            "energy",
+            self.nt,
+            3,
+            1,
+        )
+        rng = np.random.default_rng()
+        foo = rng.normal([3, 4])
+        for ii in [
+            "bias_atom_e",
+            "fparam_avg",
+            "fparam_inv_std",
+            "aparam_avg",
+            "aparam_inv_std",
+        ]:
+            ifn0[ii] = torch.tensor(foo, dtype=dtype, device=env.DEVICE)
+            np.testing.assert_allclose(foo, ifn0[ii].detach().cpu().numpy())
diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py
index 3feb4f4739..ed2c428de5 100644
--- a/source/tests/pt/test_fitting_net.py
+++ b/source/tests/pt/test_fitting_net.py
@@ -102,25 +102,25 @@ def test_consistency(self):
         my_fn = EnergyFittingNet(
             self.ntypes,
             self.embedding_width,
-            self.n_neuron,
-            self.dp_fn.bias_atom_e,
-            use_tebd=False,
+            neuron=self.n_neuron,
+            bias_atom_e=self.dp_fn.bias_atom_e,
+            distinguish_types=True,
         )
         for name, param in my_fn.named_parameters():
-            matched = re.match("filter_layers\.(\d).deep_layers\.(\d)\.([a-z]+)", name)
+            matched = re.match(
+                "filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name
+            )
             key = None
             if matched:
+                if int(matched.group(2)) == len(self.n_neuron):
+                    layer_id = -1
+                else:
+                    layer_id = matched.group(2)
                 key = gen_key(
                     type_id=matched.group(1),
-                    layer_id=matched.group(2),
+                    layer_id=layer_id,
                     w_or_b=matched.group(3),
                 )
-            else:
-                matched = re.match("filter_layers\.(\d).final_layer\.([a-z]+)", name)
-                if matched:
-                    key = gen_key(
-                        type_id=matched.group(1), layer_id=-1, w_or_b=matched.group(2)
-                    )
             assert key is not None
             var = values[key]
             with torch.no_grad():
@@ -132,7 +132,7 @@ def test_consistency(self):
         ret = my_fn(embedding, atype)
         my_energy = ret["energy"]
         my_energy = my_energy.detach()
-        self.assertTrue(np.allclose(dp_energy, my_energy.numpy().reshape([-1])))
+        np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1]))
 
 
 if __name__ == "__main__":
diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py
index 5bbbc9e352..c6595e6471 100644
--- a/source/tests/pt/test_model.py
+++ b/source/tests/pt/test_model.py
@@ -53,23 +53,24 @@
 VariableState = collections.namedtuple("VariableState", ["value", "gradient"])
 
 
-def torch2tf(torch_name):
+def torch2tf(torch_name, last_layer_id=None):
     fields = torch_name.split(".")
     offset = int(fields[2] == "networks")
     element_id = int(fields[2 + offset])
     if fields[0] == "descriptor":
         layer_id = int(fields[4 + offset]) + 1
         weight_type = fields[5 + offset]
-        return "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id)
-    elif fields[3] == "deep_layers":
-        layer_id = int(fields[4])
-        weight_type = fields[5]
-        return "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type)
-    elif fields[3] == "final_layer":
-        weight_type = fields[4]
-        return "final_layer_type_%d/%s:0" % (element_id, weight_type)
+        ret = "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id)
+    elif fields[0] == "fitting_net":
+        layer_id = int(fields[4 + offset])
+        weight_type = fields[5 + offset]
+        if layer_id != last_layer_id:
+            ret = "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type)
+        else:
+            ret = "final_layer_type_%d/%s:0" % (element_id, weight_type)
     else:
         raise RuntimeError("Unexpected parameter name: %s" % torch_name)
+    return ret
 
 
 class DpTrainer:
@@ -290,7 +291,7 @@ def test_consistency(self):
                     "neuron": self.filter_neuron,
                     "axis_neuron": self.axis_neuron,
                 },
-                "fitting_net": {"neuron": self.n_neuron},
+                "fitting_net": {"neuron": self.n_neuron, "distinguish_types": True},
                 "data_stat_nbatch": self.data_stat_nbatch,
                 "type_map": self.type_map,
             },
@@ -323,7 +324,7 @@ def test_consistency(self):
         # Keep parameter value consistency between 2 implentations
         for name, param in my_model.named_parameters():
             name = name.replace("sea.", "")
-            var_name = torch2tf(name)
+            var_name = torch2tf(name, last_layer_id=len(self.n_neuron))
             var = vs_dict[var_name].value
             with torch.no_grad():
                 src = torch.from_numpy(var)
@@ -404,7 +405,7 @@ def step(step_id):
 
         for name, param in my_model.named_parameters():
             name = name.replace("sea.", "")
-            var_name = torch2tf(name)
+            var_name = torch2tf(name, last_layer_id=len(self.n_neuron))
             var_grad = vs_dict[var_name].gradient
             param_grad = param.grad.cpu()
             var_grad = torch.tensor(var_grad)
diff --git a/source/tests/pt/test_se_e2_a.py b/source/tests/pt/test_se_e2_a.py
index c0a106cb16..0da80ea1ea 100644
--- a/source/tests/pt/test_se_e2_a.py
+++ b/source/tests/pt/test_se_e2_a.py
@@ -25,6 +25,9 @@
     PRECISION_DICT,
 )
 
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
 from .test_mlp import (
     get_tols,
 )
@@ -32,36 +35,6 @@
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
 
 
-class TestCaseSingleFrameWithNlist:
-    def setUp(self):
-        # nloc == 3, nall == 4
-        self.nloc = 3
-        self.nall = 4
-        self.nf, self.nt = 1, 2
-        self.coord_ext = np.array(
-            [
-                [0, 0, 0],
-                [0, 1, 0],
-                [0, 0, 1],
-                [0, -2, 0],
-            ],
-            dtype=np.float64,
-        ).reshape([1, self.nall * 3])
-        self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
-        # sel = [5, 2]
-        self.sel = [5, 2]
-        self.nlist = np.array(
-            [
-                [1, 3, -1, -1, -1, 2, -1],
-                [0, -1, -1, -1, -1, 2, -1],
-                [0, 1, -1, -1, -1, 0, -1],
-            ],
-            dtype=int,
-        ).reshape([1, self.nloc, sum(self.sel)])
-        self.rcut = 0.4
-        self.rcut_smth = 2.2
-
-
 # to be merged with the tf test case
 @unittest.skipIf(not support_se_e2_a, "EnvMat not supported")
 class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist):
diff --git a/source/tests/pt/test_utils.py b/source/tests/pt/test_utils.py
new file mode 100644
index 0000000000..9c9a9479ad
--- /dev/null
+++ b/source/tests/pt/test_utils.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.pt.utils.utils import (
+    to_numpy_array,
+    to_torch_tensor,
+)
+
+
+class TestCvt(unittest.TestCase):
+    def test_to_numpy(self):
+        rng = np.random.default_rng()
+        foo = rng.normal([3, 4])
+        for ptp, npp in zip(
+            [torch.float16, torch.float32, torch.float64],
+            [np.float16, np.float32, np.float64],
+        ):
+            foo = foo.astype(npp)
+            bar = to_torch_tensor(foo)
+            self.assertEqual(bar.dtype, ptp)
+            onk = to_numpy_array(bar)
+            self.assertEqual(onk.dtype, npp)
+        with self.assertRaises(ValueError) as ee:
+            foo = foo.astype(np.int32)
+            bar = to_torch_tensor(foo)
+        with self.assertRaises(ValueError) as ee:
+            bar = to_torch_tensor(foo)
+            bar = to_numpy_array(bar.int())

From 4cd82586fee0b47a26bfed7471d97f9ae87dea31 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 30 Jan 2024 18:28:37 +0800
Subject: [PATCH 05/10] Revert "Devel update (#30)"

This reverts commit cb4cc67c56e648ea04e462e649fad98f25fd85d4.
---
 backend/find_tensorflow.py                    |   6 -
 backend/read_env.py                           |  24 +-
 deepmd/model_format/__init__.py               |   4 -
 deepmd/model_format/fitting.py                | 355 -----------------
 deepmd/model_format/network.py                |   2 -
 deepmd/model_format/se_e2_a.py                |  10 +-
 deepmd/pt/model/model/dp_atomic_model.py      |  10 +-
 deepmd/pt/model/network/mlp.py                |   7 +-
 deepmd/pt/model/task/ener.py                  | 373 +++---------------
 deepmd/pt/model/task/fitting.py               |  13 +-
 deepmd/pt/model/task/task.py                  |  18 +-
 deepmd/pt/utils/utils.py                      |  40 --
 deepmd/tf/env.py                              |   5 -
 doc/install/install-from-source.md            |  15 +-
 source/CMakeLists.txt                         |  21 +-
 source/config/CMakeLists.txt                  |  14 -
 source/config/run_config.ini                  |   2 -
 source/lib/src/gpu/CMakeLists.txt             |   6 +-
 .../tests/common/test_model_format_utils.py   | 121 ------
 source/tests/pt/test_ener_fitting.py          | 181 ---------
 source/tests/pt/test_fitting_net.py           |  24 +-
 source/tests/pt/test_model.py                 |  25 +-
 source/tests/pt/test_se_e2_a.py               |  33 +-
 source/tests/pt/test_utils.py                 |  31 --
 24 files changed, 143 insertions(+), 1197 deletions(-)
 delete mode 100644 deepmd/model_format/fitting.py
 delete mode 100644 source/tests/pt/test_ener_fitting.py
 delete mode 100644 source/tests/pt/test_utils.py

diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index 083e2673f7..32ae62469c 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -127,12 +127,6 @@ def get_tf_requirement(tf_version: str = "") -> dict:
     dict
         TensorFlow requirement, including cpu and gpu.
     """
-    if tf_version is None:
-        return {
-            "cpu": [],
-            "gpu": [],
-            "mpi": [],
-        }
     if tf_version == "":
         tf_version = os.environ.get("TENSORFLOW_VERSION", "")
 
diff --git a/backend/read_env.py b/backend/read_env.py
index bee5d607e3..2cf433181a 100644
--- a/backend/read_env.py
+++ b/backend/read_env.py
@@ -80,26 +80,16 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
         cmake_args.append("-DENABLE_IPI:BOOL=TRUE")
         extra_scripts["dp_ipi"] = "deepmd.tf.entrypoints.ipi:dp_ipi"
 
-    if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "1":
-        tf_install_dir, _ = find_tensorflow()
-        tf_version = get_tf_version(tf_install_dir)
-        if tf_version == "" or Version(tf_version) >= Version("2.12"):
-            find_libpython_requires = []
-        else:
-            find_libpython_requires = ["find_libpython"]
-        cmake_args.extend(
-            [
-                "-DENABLE_TENSORFLOW=ON",
-                f"-DTENSORFLOW_VERSION={tf_version}",
-                f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}",
-            ]
-        )
-    else:
+    tf_install_dir, _ = find_tensorflow()
+    tf_version = get_tf_version(tf_install_dir)
+    if tf_version == "" or Version(tf_version) >= Version("2.12"):
         find_libpython_requires = []
-        cmake_args.append("-DENABLE_TENSORFLOW=OFF")
-        tf_version = None
+    else:
+        find_libpython_requires = ["find_libpython"]
+    cmake_args.append(f"-DTENSORFLOW_VERSION={tf_version}")
 
     cmake_args = [
+        f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}",
         "-DBUILD_PY_IF:BOOL=TRUE",
         *cmake_args,
     ]
diff --git a/deepmd/model_format/__init__.py b/deepmd/model_format/__init__.py
index e15f73758e..253bca3507 100644
--- a/deepmd/model_format/__init__.py
+++ b/deepmd/model_format/__init__.py
@@ -7,9 +7,6 @@
 from .env_mat import (
     EnvMat,
 )
-from .fitting import (
-    InvarFitting,
-)
 from .network import (
     EmbeddingNet,
     FittingNet,
@@ -37,7 +34,6 @@
 )
 
 __all__ = [
-    "InvarFitting",
     "DescrptSeA",
     "EnvMat",
     "make_multilayer_network",
diff --git a/deepmd/model_format/fitting.py b/deepmd/model_format/fitting.py
deleted file mode 100644
index 904fb42b76..0000000000
--- a/deepmd/model_format/fitting.py
+++ /dev/null
@@ -1,355 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import copy
-from typing import (
-    Any,
-    List,
-    Optional,
-)
-
-import numpy as np
-
-from .common import (
-    DEFAULT_PRECISION,
-    NativeOP,
-)
-from .network import (
-    FittingNet,
-    NetworkCollection,
-)
-from .output_def import (
-    FittingOutputDef,
-    OutputVariableDef,
-    fitting_check_output,
-)
-
-
-@fitting_check_output
-class InvarFitting(NativeOP):
-    r"""Fitting the energy (or a porperty of `dim_out`) of the system. The force and the virial can also be trained.
-
-    Lets take the energy fitting task as an example.
-    The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`:
-
-    .. math::
-        E(\mathcal{D}) = \mathcal{L}^{(n)} \circ \mathcal{L}^{(n-1)}
-        \circ \cdots \circ \mathcal{L}^{(1)} \circ \mathcal{L}^{(0)}
-
-    The first :math:`n` hidden layers :math:`\mathcal{L}^{(0)}, \cdots, \mathcal{L}^{(n-1)}` are given by
-
-    .. math::
-        \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})=
-            \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b})
-
-    where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}`
-    is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and
-    :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively,
-    both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}`
-    is the activation function.
-
-    The output layer :math:`\mathcal{L}^{(n)}` is given by
-
-    .. math::
-        \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})=
-            \mathbf{x}^T\mathbf{w}+\mathbf{b}
-
-    where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}`
-    is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and
-    :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively,
-    both of which are trainable if `trainable[n]` is `True`.
-
-    Parameters
-    ----------
-    var_name
-            The name of the output variable.
-    ntypes
-            The number of atom types.
-    dim_descrpt
-            The dimension of the input descriptor.
-    dim_out
-            The dimension of the output fit property.
-    neuron
-            Number of neurons :math:`N` in each hidden layer of the fitting net
-    resnet_dt
-            Time-step `dt` in the resnet construction:
-            :math:`y = x + dt * \phi (Wx + b)`
-    numb_fparam
-            Number of frame parameter
-    numb_aparam
-            Number of atomic parameter
-    rcond
-            The condition number for the regression of atomic energy.
-    tot_ener_zero
-            Force the total energy to zero. Useful for the charge fitting.
-    trainable
-            If the weights of fitting net are trainable.
-            Suppose that we have :math:`N_l` hidden layers in the fitting net,
-            this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
-    atom_ener
-            Specifying atomic energy contribution in vacuum. The `set_davg_zero` key in the descrptor should be set.
-    activation_function
-            The activation function :math:`\boldsymbol{\phi}` in the embedding net. Supported options are |ACTIVATION_FN|
-    precision
-            The precision of the embedding net parameters. Supported options are |PRECISION|
-    layer_name : list[Optional[str]], optional
-            The name of the each layer. If two layers, either in the same fitting or different fittings,
-            have the same name, they will share the same neural network parameters.
-    use_aparam_as_mask: bool, optional
-            If True, the atomic parameters will be used as a mask that determines the atom is real/virtual.
-            And the aparam will not be used as the atomic parameters for embedding.
-    distinguish_types
-            Different atomic types uses different fitting net.
-
-    """
-
-    def __init__(
-        self,
-        var_name: str,
-        ntypes: int,
-        dim_descrpt: int,
-        dim_out: int,
-        neuron: List[int] = [120, 120, 120],
-        resnet_dt: bool = True,
-        numb_fparam: int = 0,
-        numb_aparam: int = 0,
-        rcond: Optional[float] = None,
-        tot_ener_zero: bool = False,
-        trainable: Optional[List[bool]] = None,
-        atom_ener: Optional[List[float]] = None,
-        activation_function: str = "tanh",
-        precision: str = DEFAULT_PRECISION,
-        layer_name: Optional[List[Optional[str]]] = None,
-        use_aparam_as_mask: bool = False,
-        spin: Any = None,
-        distinguish_types: bool = False,
-    ):
-        # seed, uniform_seed are not included
-        if tot_ener_zero:
-            raise NotImplementedError("tot_ener_zero is not implemented")
-        if spin is not None:
-            raise NotImplementedError("spin is not implemented")
-        if use_aparam_as_mask:
-            raise NotImplementedError("use_aparam_as_mask is not implemented")
-        if use_aparam_as_mask:
-            raise NotImplementedError("use_aparam_as_mask is not implemented")
-        if layer_name is not None:
-            raise NotImplementedError("layer_name is not implemented")
-        if atom_ener is not None:
-            raise NotImplementedError("atom_ener is not implemented")
-
-        self.var_name = var_name
-        self.ntypes = ntypes
-        self.dim_descrpt = dim_descrpt
-        self.dim_out = dim_out
-        self.neuron = neuron
-        self.resnet_dt = resnet_dt
-        self.numb_fparam = numb_fparam
-        self.numb_aparam = numb_aparam
-        self.rcond = rcond
-        self.tot_ener_zero = tot_ener_zero
-        self.trainable = trainable
-        self.atom_ener = atom_ener
-        self.activation_function = activation_function
-        self.precision = precision
-        self.layer_name = layer_name
-        self.use_aparam_as_mask = use_aparam_as_mask
-        self.spin = spin
-        self.distinguish_types = distinguish_types
-        if self.spin is not None:
-            raise NotImplementedError("spin is not supported")
-
-        # init constants
-        self.bias_atom_e = np.zeros([self.ntypes, self.dim_out])
-        if self.numb_fparam > 0:
-            self.fparam_avg = np.zeros(self.numb_fparam)
-            self.fparam_inv_std = np.ones(self.numb_fparam)
-        else:
-            self.fparam_avg, self.fparam_inv_std = None, None
-        if self.numb_aparam > 0:
-            self.aparam_avg = np.zeros(self.numb_aparam)
-            self.aparam_inv_std = np.ones(self.numb_aparam)
-        else:
-            self.aparam_avg, self.aparam_inv_std = None, None
-        # init networks
-        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
-        out_dim = self.dim_out
-        self.nets = NetworkCollection(
-            1 if self.distinguish_types else 0,
-            self.ntypes,
-            network_type="fitting_network",
-            networks=[
-                FittingNet(
-                    in_dim,
-                    out_dim,
-                    self.neuron,
-                    self.activation_function,
-                    self.resnet_dt,
-                    self.precision,
-                    bias_out=True,
-                )
-                for ii in range(self.ntypes if self.distinguish_types else 1)
-            ],
-        )
-
-    def output_def(self):
-        return FittingOutputDef(
-            [
-                OutputVariableDef(
-                    self.var_name, [self.dim_out], reduciable=True, differentiable=True
-                ),
-            ]
-        )
-
-    def __setitem__(self, key, value):
-        if key in ["bias_atom_e"]:
-            self.bias_atom_e = value
-        elif key in ["fparam_avg"]:
-            self.fparam_avg = value
-        elif key in ["fparam_inv_std"]:
-            self.fparam_inv_std = value
-        elif key in ["aparam_avg"]:
-            self.aparam_avg = value
-        elif key in ["aparam_inv_std"]:
-            self.aparam_inv_std = value
-        else:
-            raise KeyError(key)
-
-    def __getitem__(self, key):
-        if key in ["bias_atom_e"]:
-            return self.bias_atom_e
-        elif key in ["fparam_avg"]:
-            return self.fparam_avg
-        elif key in ["fparam_inv_std"]:
-            return self.fparam_inv_std
-        elif key in ["aparam_avg"]:
-            return self.aparam_avg
-        elif key in ["aparam_inv_std"]:
-            return self.aparam_inv_std
-        else:
-            raise KeyError(key)
-
-    def serialize(self) -> dict:
-        """Serialize the fitting to dict."""
-        return {
-            "var_name": self.var_name,
-            "ntypes": self.ntypes,
-            "dim_descrpt": self.dim_descrpt,
-            "dim_out": self.dim_out,
-            "neuron": self.neuron,
-            "resnet_dt": self.resnet_dt,
-            "numb_fparam": self.numb_fparam,
-            "numb_aparam": self.numb_aparam,
-            "rcond": self.rcond,
-            "activation_function": self.activation_function,
-            "precision": self.precision,
-            "distinguish_types": self.distinguish_types,
-            "nets": self.nets.serialize(),
-            "@variables": {
-                "bias_atom_e": self.bias_atom_e,
-                "fparam_avg": self.fparam_avg,
-                "fparam_inv_std": self.fparam_inv_std,
-                "aparam_avg": self.aparam_avg,
-                "aparam_inv_std": self.aparam_inv_std,
-            },
-            # not supported
-            "tot_ener_zero": self.tot_ener_zero,
-            "trainable": self.trainable,
-            "atom_ener": self.atom_ener,
-            "layer_name": self.layer_name,
-            "use_aparam_as_mask": self.use_aparam_as_mask,
-            "spin": self.spin,
-        }
-
-    @classmethod
-    def deserialize(cls, data: dict) -> "InvarFitting":
-        data = copy.deepcopy(data)
-        variables = data.pop("@variables")
-        nets = data.pop("nets")
-        obj = cls(**data)
-        for kk in variables.keys():
-            obj[kk] = variables[kk]
-        obj.nets = NetworkCollection.deserialize(nets)
-        return obj
-
-    def call(
-        self,
-        descriptor: np.array,
-        atype: np.array,
-        gr: Optional[np.array] = None,
-        g2: Optional[np.array] = None,
-        h2: Optional[np.array] = None,
-        fparam: Optional[np.array] = None,
-        aparam: Optional[np.array] = None,
-    ):
-        """Calculate the fitting.
-
-        Parameters
-        ----------
-        descriptor
-            input descriptor. shape: nf x nloc x nd
-        atype
-            the atom type. shape: nf x nloc
-        gr
-            The rotationally equivariant and permutationally invariant single particle
-            representation. shape: nf x nloc x ng x 3
-        g2
-            The rotationally invariant pair-partical representation.
-            shape: nf x nloc x nnei x ng
-        h2
-            The rotationally equivariant pair-partical representation.
-            shape: nf x nloc x nnei x 3
-        fparam
-            The frame parameter. shape: nf x nfp. nfp being `numb_fparam`
-        aparam
-            The atomic parameter. shape: nf x nloc x nap. nap being `numb_aparam`
-
-        """
-        nf, nloc, nd = descriptor.shape
-        # check input dim
-        if nd != self.dim_descrpt:
-            raise ValueError(
-                "get an input descriptor of dim {nd},"
-                "which is not consistent with {self.dim_descrpt}."
-            )
-        xx = descriptor
-        # check fparam dim, concate to input descriptor
-        if self.numb_fparam > 0:
-            assert fparam is not None, "fparam should not be None"
-            if fparam.shape[-1] != self.numb_fparam:
-                raise ValueError(
-                    "get an input fparam of dim {fparam.shape[-1]}, ",
-                    "which is not consistent with {self.numb_fparam}.",
-                )
-            fparam = (fparam - self.fparam_avg) * self.fparam_inv_std
-            fparam = np.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1])
-            xx = np.concatenate(
-                [xx, fparam],
-                axis=-1,
-            )
-        # check aparam dim, concate to input descriptor
-        if self.numb_aparam > 0:
-            assert aparam is not None, "aparam should not be None"
-            if aparam.shape[-1] != self.numb_aparam:
-                raise ValueError(
-                    "get an input aparam of dim {aparam.shape[-1]}, ",
-                    "which is not consistent with {self.numb_aparam}.",
-                )
-            aparam = (aparam - self.aparam_avg) * self.aparam_inv_std
-            xx = np.concatenate(
-                [xx, aparam],
-                axis=-1,
-            )
-
-        # calcualte the prediction
-        if self.distinguish_types:
-            outs = np.zeros([nf, nloc, self.dim_out])
-            for type_i in range(self.ntypes):
-                mask = np.tile(
-                    (atype == type_i).reshape([nf, nloc, 1]), [1, 1, self.dim_out]
-                )
-                atom_energy = self.nets[(type_i,)](xx)
-                atom_energy = atom_energy + self.bias_atom_e[type_i]
-                atom_energy = atom_energy * mask
-                outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
-        else:
-            outs = self.nets[()](xx) + self.bias_atom_e[atype]
-        return {self.var_name: outs}
diff --git a/deepmd/model_format/network.py b/deepmd/model_format/network.py
index f2056c0b95..a327d990c9 100644
--- a/deepmd/model_format/network.py
+++ b/deepmd/model_format/network.py
@@ -161,8 +161,6 @@ def __init__(
     ) -> None:
         prec = PRECISION_DICT[precision.lower()]
         self.precision = precision
-        # only use_timestep when skip connection is established.
-        use_timestep = use_timestep and (num_out == num_in or num_out == num_in * 2)
         rng = np.random.default_rng()
         self.w = rng.normal(size=(num_in, num_out)).astype(prec)
         self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None
diff --git a/deepmd/model_format/se_e2_a.py b/deepmd/model_format/se_e2_a.py
index f179b10ac3..28751cad8d 100644
--- a/deepmd/model_format/se_e2_a.py
+++ b/deepmd/model_format/se_e2_a.py
@@ -171,8 +171,9 @@ def __init__(
             )
         self.env_mat = EnvMat(self.rcut, self.rcut_smth)
         self.nnei = np.sum(self.sel)
-        self.davg = np.zeros([self.ntypes, self.nnei, 4])
-        self.dstd = np.ones([self.ntypes, self.nnei, 4])
+        self.nneix4 = self.nnei * 4
+        self.davg = np.zeros([self.ntypes, self.nneix4])
+        self.dstd = np.ones([self.ntypes, self.nneix4])
         self.orig_sel = self.sel
 
     def __setitem__(self, key, value):
@@ -191,11 +192,6 @@ def __getitem__(self, key):
         else:
             raise KeyError(key)
 
-    @property
-    def dim_out(self):
-        """Returns the output dimension of this descriptor."""
-        return self.neuron[-1] * self.axis_neuron
-
     def cal_g(
         self,
         ss,
diff --git a/deepmd/pt/model/model/dp_atomic_model.py b/deepmd/pt/model/model/dp_atomic_model.py
index a222c8e6f6..853eacb875 100644
--- a/deepmd/pt/model/model/dp_atomic_model.py
+++ b/deepmd/pt/model/model/dp_atomic_model.py
@@ -93,11 +93,11 @@ def __init__(
         )
 
         fitting_net["type"] = fitting_net.get("type", "ener")
-        fitting_net["ntypes"] = self.descriptor.get_ntype()
-        if self.descriptor_type in ["se_e2_a"]:
-            fitting_net["distinguish_types"] = True
+        if self.descriptor_type not in ["se_e2_a"]:
+            fitting_net["ntypes"] = 1
         else:
-            fitting_net["distinguish_types"] = False
+            fitting_net["ntypes"] = self.descriptor.get_ntype()
+            fitting_net["use_tebd"] = False
         fitting_net["embedding_width"] = self.descriptor.dim_out
 
         self.grad_force = "direct" not in fitting_net["type"]
@@ -165,5 +165,5 @@ def forward_atomic(
         )
         assert descriptor is not None
         # energy, force
-        fit_ret = self.fitting_net(descriptor, atype, gr=rot_mat)
+        fit_ret = self.fitting_net(descriptor, atype, atype_tebd=None, rot_mat=rot_mat)
         return fit_ret
diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py
index d76abd82f9..e3ac0e7bc2 100644
--- a/deepmd/pt/model/network/mlp.py
+++ b/deepmd/pt/model/network/mlp.py
@@ -56,10 +56,7 @@ def __init__(
         precision: str = DEFAULT_PRECISION,
     ):
         super().__init__()
-        # only use_timestep when skip connection is established.
-        self.use_timestep = use_timestep and (
-            num_out == num_in or num_out == num_in * 2
-        )
+        self.use_timestep = use_timestep
         self.activate_name = activation_function
         self.activate = ActivationFn(self.activate_name)
         self.precision = precision
@@ -210,7 +207,7 @@ class NetworkCollection(DPNetworkCollection, nn.Module):
     NETWORK_TYPE_MAP: ClassVar[Dict[str, type]] = {
         "network": MLP,
         "embedding_network": EmbeddingNet,
-        "fitting_network": FittingNet,
+        # "fitting_network": FittingNet,
     }
 
     def __init__(self, *args, **kwargs):
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index e40a6bda44..03043e2fcb 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -1,13 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import copy
 import logging
 from typing import (
-    List,
     Optional,
     Tuple,
 )
 
-import numpy as np
 import torch
 
 from deepmd.model_format import (
@@ -15,10 +12,6 @@
     OutputVariableDef,
     fitting_check_output,
 )
-from deepmd.pt.model.network.mlp import (
-    FittingNet,
-    NetworkCollection,
-)
 from deepmd.pt.model.network.network import (
     ResidualDeep,
 )
@@ -28,35 +21,19 @@
 from deepmd.pt.utils import (
     env,
 )
-from deepmd.pt.utils.env import (
-    DEFAULT_PRECISION,
-    PRECISION_DICT,
-)
-from deepmd.pt.utils.utils import (
-    to_numpy_array,
-    to_torch_tensor,
-)
-
-dtype = env.GLOBAL_PT_FLOAT_PRECISION
-device = env.DEVICE
 
 
+@Fitting.register("ener")
 @fitting_check_output
-class InvarFitting(Fitting):
+class EnergyFittingNet(Fitting):
     def __init__(
         self,
-        var_name: str,
-        ntypes: int,
-        dim_descrpt: int,
-        dim_out: int,
-        neuron: List[int] = [128, 128, 128],
-        bias_atom_e: Optional[torch.Tensor] = None,
-        resnet_dt: bool = True,
-        numb_fparam: int = 0,
-        numb_aparam: int = 0,
-        activation_function: str = "tanh",
-        precision: str = DEFAULT_PRECISION,
-        distinguish_types: bool = False,
+        ntypes,
+        embedding_width,
+        neuron,
+        bias_atom_e,
+        resnet_dt=True,
+        use_tebd=True,
         **kwargs,
     ):
         """Construct a fitting net for energy.
@@ -69,322 +46,67 @@ def __init__(
         - resnet_dt: Using time-step in the ResNet construction.
         """
         super().__init__()
-        self.var_name = var_name
         self.ntypes = ntypes
-        self.dim_descrpt = dim_descrpt
-        self.dim_out = dim_out
-        self.neuron = neuron
-        self.distinguish_types = distinguish_types
-        self.use_tebd = not self.distinguish_types
-        self.resnet_dt = resnet_dt
-        self.numb_fparam = numb_fparam
-        self.numb_aparam = numb_aparam
-        self.activation_function = activation_function
-        self.precision = precision
-        self.prec = PRECISION_DICT[self.precision]
-        if bias_atom_e is None:
-            bias_atom_e = np.zeros([self.ntypes, self.dim_out])
-        bias_atom_e = torch.tensor(bias_atom_e, dtype=self.prec, device=device)
-        bias_atom_e = bias_atom_e.view([self.ntypes, self.dim_out])
-        if not self.use_tebd:
-            assert self.ntypes == bias_atom_e.shape[0], "Element count mismatches!"
+        self.embedding_width = embedding_width
+        self.use_tebd = use_tebd
+        if not use_tebd:
+            assert self.ntypes == len(bias_atom_e), "Element count mismatches!"
+        bias_atom_e = torch.tensor(bias_atom_e)
         self.register_buffer("bias_atom_e", bias_atom_e)
-        # init constants
-        if self.numb_fparam > 0:
-            self.register_buffer(
-                "fparam_avg",
-                torch.zeros(self.numb_fparam, dtype=self.prec, device=device),
-            )
-            self.register_buffer(
-                "fparam_inv_std",
-                torch.ones(self.numb_fparam, dtype=self.prec, device=device),
-            )
-        else:
-            self.fparam_avg, self.fparam_inv_std = None, None
-        if self.numb_aparam > 0:
-            self.register_buffer(
-                "aparam_avg",
-                torch.zeros(self.numb_aparam, dtype=self.prec, device=device),
-            )
-            self.register_buffer(
-                "aparam_inv_std",
-                torch.ones(self.numb_aparam, dtype=self.prec, device=device),
-            )
-        else:
-            self.aparam_avg, self.aparam_inv_std = None, None
-
-        in_dim = self.dim_descrpt + self.numb_fparam + self.numb_aparam
-        out_dim = 1
 
-        self.old_impl = kwargs.get("old_impl", False)
-        if self.old_impl:
-            filter_layers = []
-            for type_i in range(self.ntypes):
-                bias_type = 0.0
-                one = ResidualDeep(
-                    type_i,
-                    self.dim_descrpt,
-                    self.neuron,
-                    bias_type,
-                    resnet_dt=self.resnet_dt,
-                )
-                filter_layers.append(one)
-            self.filter_layers_old = torch.nn.ModuleList(filter_layers)
-            self.filter_layers = None
-        else:
-            self.filter_layers = NetworkCollection(
-                1 if self.distinguish_types else 0,
-                self.ntypes,
-                network_type="fitting_network",
-                networks=[
-                    FittingNet(
-                        in_dim,
-                        out_dim,
-                        self.neuron,
-                        self.activation_function,
-                        self.resnet_dt,
-                        self.precision,
-                        bias_out=True,
-                    )
-                    for ii in range(self.ntypes if self.distinguish_types else 1)
-                ],
+        filter_layers = []
+        for type_i in range(self.ntypes):
+            bias_type = 0.0
+            one = ResidualDeep(
+                type_i, embedding_width, neuron, bias_type, resnet_dt=resnet_dt
             )
-            self.filter_layers_old = None
+            filter_layers.append(one)
+        self.filter_layers = torch.nn.ModuleList(filter_layers)
 
-        # very bad design...
         if "seed" in kwargs:
             logging.info("Set seed to %d in fitting net.", kwargs["seed"])
             torch.manual_seed(kwargs["seed"])
 
-    def output_def(self) -> FittingOutputDef:
+    def output_def(self):
         return FittingOutputDef(
             [
-                OutputVariableDef(
-                    self.var_name, [self.dim_out], reduciable=True, differentiable=True
-                ),
+                OutputVariableDef("energy", [1], reduciable=True, differentiable=True),
             ]
         )
 
-    def __setitem__(self, key, value):
-        if key in ["bias_atom_e"]:
-            # correct bias_atom_e shape. user may provide stupid  shape
-            self.bias_atom_e = value
-        elif key in ["fparam_avg"]:
-            self.fparam_avg = value
-        elif key in ["fparam_inv_std"]:
-            self.fparam_inv_std = value
-        elif key in ["aparam_avg"]:
-            self.aparam_avg = value
-        elif key in ["aparam_inv_std"]:
-            self.aparam_inv_std = value
-        else:
-            raise KeyError(key)
-
-    def __getitem__(self, key):
-        if key in ["bias_atom_e"]:
-            return self.bias_atom_e
-        elif key in ["fparam_avg"]:
-            return self.fparam_avg
-        elif key in ["fparam_inv_std"]:
-            return self.fparam_inv_std
-        elif key in ["aparam_avg"]:
-            return self.aparam_avg
-        elif key in ["aparam_inv_std"]:
-            return self.aparam_inv_std
-        else:
-            raise KeyError(key)
-
-    def serialize(self) -> dict:
-        """Serialize the fitting to dict."""
-        return {
-            "var_name": self.var_name,
-            "ntypes": self.ntypes,
-            "dim_descrpt": self.dim_descrpt,
-            "dim_out": self.dim_out,
-            "neuron": self.neuron,
-            "resnet_dt": self.resnet_dt,
-            "numb_fparam": self.numb_fparam,
-            "numb_aparam": self.numb_aparam,
-            "activation_function": self.activation_function,
-            "precision": self.precision,
-            "distinguish_types": self.distinguish_types,
-            "nets": self.filter_layers.serialize(),
-            "@variables": {
-                "bias_atom_e": to_numpy_array(self.bias_atom_e),
-                "fparam_avg": to_numpy_array(self.fparam_avg),
-                "fparam_inv_std": to_numpy_array(self.fparam_inv_std),
-                "aparam_avg": to_numpy_array(self.aparam_avg),
-                "aparam_inv_std": to_numpy_array(self.aparam_inv_std),
-            },
-            # "rcond": self.rcond ,
-            # "tot_ener_zero": self.tot_ener_zero ,
-            # "trainable": self.trainable ,
-            # "atom_ener": self.atom_ener ,
-            # "layer_name": self.layer_name ,
-            # "use_aparam_as_mask": self.use_aparam_as_mask ,
-            # "spin": self.spin ,
-            ## NOTICE:  not supported by far
-            "rcond": None,
-            "tot_ener_zero": False,
-            "trainable": True,
-            "atom_ener": None,
-            "layer_name": None,
-            "use_aparam_as_mask": False,
-            "spin": None,
-        }
-
-    @classmethod
-    def deserialize(cls, data: dict) -> "InvarFitting":
-        data = copy.deepcopy(data)
-        variables = data.pop("@variables")
-        nets = data.pop("nets")
-        obj = cls(**data)
-        for kk in variables.keys():
-            obj[kk] = to_torch_tensor(variables[kk])
-        obj.filter_layers = NetworkCollection.deserialize(nets)
-        return obj
-
-    def _extend_f_avg_std(self, xx: torch.Tensor, nb: int) -> torch.Tensor:
-        return torch.tile(xx.view([1, self.numb_fparam]), [nb, 1])
-
-    def _extend_a_avg_std(self, xx: torch.Tensor, nb: int, nloc: int) -> torch.Tensor:
-        return torch.tile(xx.view([1, 1, self.numb_aparam]), [nb, nloc, 1])
-
     def forward(
         self,
-        descriptor: torch.Tensor,
+        inputs: torch.Tensor,
         atype: torch.Tensor,
-        gr: Optional[torch.Tensor] = None,
-        g2: Optional[torch.Tensor] = None,
-        h2: Optional[torch.Tensor] = None,
-        fparam: Optional[torch.Tensor] = None,
-        aparam: Optional[torch.Tensor] = None,
+        atype_tebd: Optional[torch.Tensor] = None,
+        rot_mat: Optional[torch.Tensor] = None,
     ):
         """Based on embedding net output, alculate total energy.
 
         Args:
-        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.embedding_width].
         - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
 
         Returns
         -------
         - `torch.Tensor`: Total energy with shape [nframes, natoms[0]].
         """
-        xx = descriptor
-        nf, nloc, nd = xx.shape
-        # NOTICE in tests/pt/test_model.py
-        # it happens that the user directly access the data memeber self.bias_atom_e
-        # and set it to a wrong shape!
-        self.bias_atom_e = self.bias_atom_e.view([self.ntypes, self.dim_out])
-        # check input dim
-        if nd != self.dim_descrpt:
-            raise ValueError(
-                "get an input descriptor of dim {nd},"
-                "which is not consistent with {self.dim_descrpt}."
-            )
-        # check fparam dim, concate to input descriptor
-        if self.numb_fparam > 0:
-            assert fparam is not None, "fparam should not be None"
-            assert self.fparam_avg is not None
-            assert self.fparam_inv_std is not None
-            if fparam.shape[-1] != self.numb_fparam:
-                raise ValueError(
-                    "get an input fparam of dim {fparam.shape[-1]}, ",
-                    "which is not consistent with {self.numb_fparam}.",
-                )
-            nb, _ = fparam.shape
-            t_fparam_avg = self._extend_f_avg_std(self.fparam_avg, nb)
-            t_fparam_inv_std = self._extend_f_avg_std(self.fparam_inv_std, nb)
-            fparam = (fparam - t_fparam_avg) * t_fparam_inv_std
-            fparam = torch.tile(fparam.reshape([nf, 1, -1]), [1, nloc, 1])
-            xx = torch.cat(
-                [xx, fparam],
-                dim=-1,
-            )
-        # check aparam dim, concate to input descriptor
-        if self.numb_aparam > 0:
-            assert aparam is not None, "aparam should not be None"
-            assert self.aparam_avg is not None
-            assert self.aparam_inv_std is not None
-            if aparam.shape[-1] != self.numb_aparam:
-                raise ValueError(
-                    "get an input aparam of dim {aparam.shape[-1]}, ",
-                    "which is not consistent with {self.numb_aparam}.",
-                )
-            nb, nloc, _ = aparam.shape
-            t_aparam_avg = self._extend_a_avg_std(self.aparam_avg, nb, nloc)
-            t_aparam_inv_std = self._extend_a_avg_std(self.aparam_inv_std, nb, nloc)
-            aparam = (aparam - t_aparam_avg) * t_aparam_inv_std
-            xx = torch.cat(
-                [xx, aparam],
-                dim=-1,
-            )
-
         outs = torch.zeros_like(atype).unsqueeze(-1)  # jit assertion
-        if self.old_impl:
-            outs = torch.zeros_like(atype).unsqueeze(-1)  # jit assertion
-            assert self.filter_layers_old is not None
-            if self.use_tebd:
-                atom_energy = self.filter_layers_old[0](xx) + self.bias_atom_e[
-                    atype
-                ].unsqueeze(-1)
-                outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
-            else:
-                for type_i, filter_layer in enumerate(self.filter_layers_old):
-                    mask = atype == type_i
-                    atom_energy = filter_layer(xx)
-                    atom_energy = atom_energy + self.bias_atom_e[type_i]
-                    atom_energy = atom_energy * mask.unsqueeze(-1)
-                    outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
-            return {"energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION)}
+        if self.use_tebd:
+            if atype_tebd is not None:
+                inputs = torch.concat([inputs, atype_tebd], dim=-1)
+            atom_energy = self.filter_layers[0](inputs) + self.bias_atom_e[
+                atype
+            ].unsqueeze(-1)
+            outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
         else:
-            if self.use_tebd:
-                atom_energy = (
-                    self.filter_layers.networks[0](xx) + self.bias_atom_e[atype]
-                )
+            for type_i, filter_layer in enumerate(self.filter_layers):
+                mask = atype == type_i
+                atom_energy = filter_layer(inputs)
+                atom_energy = atom_energy + self.bias_atom_e[type_i]
+                atom_energy = atom_energy * mask.unsqueeze(-1)
                 outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
-            else:
-                for type_i, ll in enumerate(self.filter_layers.networks):
-                    mask = (atype == type_i).unsqueeze(-1)
-                    mask = torch.tile(mask, (1, 1, self.dim_out))
-                    atom_energy = ll(xx)
-                    atom_energy = atom_energy + self.bias_atom_e[type_i]
-                    atom_energy = atom_energy * mask
-                    outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
-            return {self.var_name: outs.to(env.GLOBAL_PT_FLOAT_PRECISION)}
-
-
-@Fitting.register("ener")
-class EnergyFittingNet(InvarFitting):
-    def __init__(
-        self,
-        ntypes: int,
-        embedding_width: int,
-        neuron: List[int] = [128, 128, 128],
-        bias_atom_e: Optional[torch.Tensor] = None,
-        resnet_dt: bool = True,
-        numb_fparam: int = 0,
-        numb_aparam: int = 0,
-        activation_function: str = "tanh",
-        precision: str = DEFAULT_PRECISION,
-        use_tebd: bool = True,
-        **kwargs,
-    ):
-        super().__init__(
-            "energy",
-            ntypes,
-            embedding_width,
-            1,
-            neuron=neuron,
-            bias_atom_e=bias_atom_e,
-            resnet_dt=resnet_dt,
-            numb_fparam=numb_fparam,
-            numb_aparam=numb_aparam,
-            activation_function=activation_function,
-            precision=precision,
-            use_tebd=use_tebd,
-            **kwargs,
-        )
+        return {"energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION)}
 
 
 @Fitting.register("direct_force")
@@ -414,7 +136,7 @@ def __init__(
         """
         super().__init__()
         self.ntypes = ntypes
-        self.dim_descrpt = embedding_width
+        self.embedding_width = embedding_width
         self.use_tebd = use_tebd
         self.out_dim = out_dim
         if not use_tebd:
@@ -464,12 +186,13 @@ def forward(
         self,
         inputs: torch.Tensor,
         atype: torch.Tensor,
-        gr: Optional[torch.Tensor] = None,
+        atype_tebd: Optional[torch.Tensor] = None,
+        rot_mat: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, None]:
         """Based on embedding net output, alculate total energy.
 
         Args:
-        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.embedding_width].
         - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
 
         Returns
@@ -478,19 +201,19 @@ def forward(
         """
         nframes, nloc, _ = inputs.size()
         if self.use_tebd:
-            # if atype_tebd is not None:
-            #     inputs = torch.concat([inputs, atype_tebd], dim=-1)
+            if atype_tebd is not None:
+                inputs = torch.concat([inputs, atype_tebd], dim=-1)
             vec_out = self.filter_layers_dipole[0](
                 inputs
             )  # Shape is [nframes, nloc, m1]
             assert list(vec_out.size()) == [nframes, nloc, self.out_dim]
             # (nf x nloc) x 1 x od
             vec_out = vec_out.view(-1, 1, self.out_dim)
-            assert gr is not None
+            assert rot_mat is not None
             # (nf x nloc) x od x 3
-            gr = gr.view(-1, self.out_dim, 3)
+            rot_mat = rot_mat.view(-1, self.out_dim, 3)
             vec_out = (
-                torch.bmm(vec_out, gr).squeeze(-2).view(nframes, nloc, 3)
+                torch.bmm(vec_out, rot_mat).squeeze(-2).view(nframes, nloc, 3)
             )  # Shape is [nframes, nloc, 3]
         else:
             vec_out = torch.zeros_like(atype).unsqueeze(-1)  # jit assertion
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index c6fb6b27e1..16e80f9c20 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -7,6 +7,9 @@
 import numpy as np
 import torch
 
+from deepmd.model_format import (
+    FittingOutputDef,
+)
 from deepmd.pt.model.task.task import (
     TaskBaseMethod,
 )
@@ -58,9 +61,17 @@ def __new__(cls, *args, **kwargs):
             if fitting_type in Fitting.__plugins.plugins:
                 cls = Fitting.__plugins.plugins[fitting_type]
             else:
-                raise RuntimeError("Unknown fitting type: " + fitting_type)
+                raise RuntimeError("Unknown descriptor type: " + fitting_type)
         return super().__new__(cls)
 
+    def output_def(self) -> FittingOutputDef:
+        """Definition for the task Output."""
+        raise NotImplementedError
+
+    def forward(self, **kwargs):
+        """Task Output."""
+        raise NotImplementedError
+
     def share_params(self, base_class, shared_level, resume=False):
         assert (
             self.__class__ == base_class.__class__
diff --git a/deepmd/pt/model/task/task.py b/deepmd/pt/model/task/task.py
index b2dc03e4bd..a9b2efeb9a 100644
--- a/deepmd/pt/model/task/task.py
+++ b/deepmd/pt/model/task/task.py
@@ -1,18 +1,12 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from abc import (
-    ABC,
-    abstractmethod,
-)
-
 import torch
 
-from deepmd.model_format import (
-    FittingOutputDef,
-)
 
+class TaskBaseMethod(torch.nn.Module):
+    def __init__(self, **kwargs):
+        """Construct a basic head for different tasks."""
+        super().__init__()
 
-class TaskBaseMethod(torch.nn.Module, ABC):
-    @abstractmethod
-    def output_def(self) -> FittingOutputDef:
-        """Definition for the task Output."""
+    def forward(self, **kwargs):
+        """Task Output."""
         raise NotImplementedError
diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py
index e83e12f608..780dbf7e62 100644
--- a/deepmd/pt/utils/utils.py
+++ b/deepmd/pt/utils/utils.py
@@ -4,17 +4,9 @@
     Optional,
 )
 
-import numpy as np
 import torch
 import torch.nn.functional as F
 
-from deepmd.model_format.common import PRECISION_DICT as NP_PRECISION_DICT
-
-from .env import (
-    DEVICE,
-)
-from .env import PRECISION_DICT as PT_PRECISION_DICT
-
 
 def get_activation_fn(activation: str) -> Callable:
     """Returns the activation function corresponding to `activation`."""
@@ -49,35 +41,3 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             return x
         else:
             raise RuntimeError(f"activation function {self.activation} not supported")
-
-
-def to_numpy_array(
-    xx: torch.Tensor,
-) -> np.ndarray:
-    if xx is None:
-        return None
-    assert xx is not None
-    # Create a reverse mapping of PT_PRECISION_DICT
-    reverse_precision_dict = {v: k for k, v in PT_PRECISION_DICT.items()}
-    # Use the reverse mapping to find keys with the desired value
-    prec = reverse_precision_dict.get(xx.dtype, None)
-    prec = NP_PRECISION_DICT.get(prec, None)
-    if prec is None:
-        raise ValueError(f"unknown precision {xx.dtype}")
-    return xx.detach().cpu().numpy().astype(prec)
-
-
-def to_torch_tensor(
-    xx: np.ndarray,
-) -> torch.Tensor:
-    if xx is None:
-        return None
-    assert xx is not None
-    # Create a reverse mapping of NP_PRECISION_DICT
-    reverse_precision_dict = {v: k for k, v in NP_PRECISION_DICT.items()}
-    # Use the reverse mapping to find keys with the desired value
-    prec = reverse_precision_dict.get(type(xx.flat[0]), None)
-    prec = PT_PRECISION_DICT.get(prec, None)
-    if prec is None:
-        raise ValueError(f"unknown precision {xx.dtype}")
-    return torch.tensor(xx, dtype=prec, device=DEVICE)
diff --git a/deepmd/tf/env.py b/deepmd/tf/env.py
index eada2774d3..da03631689 100644
--- a/deepmd/tf/env.py
+++ b/deepmd/tf/env.py
@@ -472,11 +472,6 @@ def _get_package_constants(
 
 
 GLOBAL_CONFIG = _get_package_constants()
-if GLOBAL_CONFIG["enable_tensorflow"] == "0":
-    raise RuntimeError(
-        "TensorFlow backend is not built. To enable it, "
-        "set the environmental variable DP_ENABLE_TENSORFLOW=1."
-    )
 MODEL_VERSION = GLOBAL_CONFIG["model_version"]
 TF_VERSION = GLOBAL_CONFIG["tf_version"]
 TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"])
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index 389cc78c9f..ae1509f2ca 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -90,17 +90,7 @@ Check the compiler version on your machine
 gcc --version
 ```
 
-The compiler GCC 4.8 or later is supported in the DeePMD-kit.
-
-::::{tab-set}
-
-:::{tab-item} TensorFlow {{ tensorflow_icon }}
-
-Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`.
-
-:::
-
-::::
+The compiler GCC 4.8 or later is supported in the DeePMD-kit. Note that TensorFlow may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by TensorFlow. It is recommended to use [the same compiler version as TensorFlow](https://www.tensorflow.org/install/source#tested_build_configurations), which can be printed by `python -c "import tensorflow;print(tensorflow.version.COMPILER_VERSION)"`.
 
 Execute
 ```bash
@@ -115,8 +105,7 @@ One may set the following environment variables before executing `pip`:
 | DP_VARIANT            | `cpu`, `cuda`, `rocm`  | `cpu`         | Build CPU variant or GPU variant with CUDA or ROCM support. |
 | CUDAToolkit_ROOT | Path                   | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. |
 | ROCM_ROOT             | Path                   | Detected automatically | The path to the ROCM toolkit directory. |
-| DP_ENABLE_TENSORFLOW  | 0, 1                   | 1             | {{ tensorflow_icon }} Enable the TensorFlow backend.
-| TENSORFLOW_ROOT       | Path                   | Detected automatically | {{ tensorflow_icon }} The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.|
+| TENSORFLOW_ROOT       | Path                   | Detected automatically | The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.|
 | DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1           | 0             | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
 | CMAKE_ARGS             | str                   | -             | Additional CMake arguments |
 | &lt;LANG&gt;FLAGS (`<LANG>`=`CXX`, `CUDA` or `HIP`)   | str            | -             | Default compilation flags to be used when compiling `<LANG>` files. See [CMake documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html). |
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index d6ee3d0958..c273bc9263 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -154,22 +154,7 @@ if(ENABLE_TENSORFLOW AND NOT DEEPMD_C_ROOT)
 endif()
 if(ENABLE_PYTORCH AND NOT DEEPMD_C_ROOT)
   find_package(Torch REQUIRED)
-  string(REGEX MATCH "_GLIBCXX_USE_CXX11_ABI=([0-9]+)" CXXABI_PT_MATCH
-               ${TORCH_CXX_FLAGS})
-  if(CXXABI_PT_MATCH)
-    message(STATUS "PyTorch CXX11 ABI: ${CMAKE_MATCH_1}")
-    if(DEFINED OP_CXX_ABI)
-      if(NOT ${CMAKE_MATCH_1} EQUAL ${OP_CXX_ABI})
-        message(
-          FATAL_ERROR
-            "PyTorch CXX11 ABI mismatch TensorFlow: ${CMAKE_MATCH_1} != ${OP_CXX_ABI}"
-        )
-      endif()
-    else()
-      set(OP_CXX_ABI ${CMAKE_MATCH_1})
-      add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
-    endif()
-  endif()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 endif()
 # log enabled backends
 if(NOT DEEPMD_C_ROOT)
@@ -180,9 +165,7 @@ if(NOT DEEPMD_C_ROOT)
   if(ENABLE_PYTORCH)
     message(STATUS "- PyTorch")
   endif()
-  if(NOT ENABLE_TENSORFLOW
-     AND NOT ENABLE_PYTORCH
-     AND NOT BUILD_PY_IF)
+  if(NOT ENABLE_TENSORFLOW AND NOT ENABLE_PYTORCH)
     message(FATAL_ERROR "No backend is enabled.")
   endif()
 endif()
diff --git a/source/config/CMakeLists.txt b/source/config/CMakeLists.txt
index b1ce17566f..5473b91f29 100644
--- a/source/config/CMakeLists.txt
+++ b/source/config/CMakeLists.txt
@@ -1,19 +1,5 @@
 # config
 
-# cmake will treat true, false, on, off, 1, 0 as booleans we hope an easy way to
-# check it
-if(ENABLE_TENSORFLOW)
-  set(ENABLE_TENSORFLOW 1)
-else()
-  set(ENABLE_TENSORFLOW 0)
-endif()
-
-if(ENABLE_PYTORCH)
-  set(ENABLE_PYTORCH 1)
-else()
-  set(ENABLE_PYTORCH 0)
-endif()
-
 configure_file("run_config.ini" "${CMAKE_CURRENT_BINARY_DIR}/run_config.ini"
                @ONLY)
 
diff --git a/source/config/run_config.ini b/source/config/run_config.ini
index 11f4100e61..3f0a7a33a8 100644
--- a/source/config/run_config.ini
+++ b/source/config/run_config.ini
@@ -4,8 +4,6 @@ GIT_SUMM = @GIT_SUMM@
 GIT_HASH = @GIT_HASH@
 GIT_DATE = @GIT_DATE@
 GIT_BRANCH = @GIT_BRANCH@
-ENABLE_TENSORFLOW = @ENABLE_TENSORFLOW@
-ENABLE_PYTORCH = @ENABLE_PYTORCH@
 TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@
 TF_LIBS = @TensorFlow_LIBRARY@
 TF_VERSION = @TENSORFLOW_VERSION@
diff --git a/source/lib/src/gpu/CMakeLists.txt b/source/lib/src/gpu/CMakeLists.txt
index 804e1c0506..3bd24cc620 100644
--- a/source/lib/src/gpu/CMakeLists.txt
+++ b/source/lib/src/gpu/CMakeLists.txt
@@ -10,10 +10,8 @@ if(USE_CUDA_TOOLKIT)
   endif()
   enable_language(CUDA)
   set(CMAKE_CUDA_STANDARD 11)
-  if(DEFINED OP_CXX_ABI)
-    add_compile_definitions(
-      "$<$<COMPILE_LANGUAGE:CUDA>:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>")
-  endif()
+  add_compile_definitions(
+    "$<$<COMPILE_LANGUAGE:CUDA>:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>")
 
   find_package(CUDAToolkit REQUIRED)
 
diff --git a/source/tests/common/test_model_format_utils.py b/source/tests/common/test_model_format_utils.py
index cb85fd2bb2..da76c53ed9 100644
--- a/source/tests/common/test_model_format_utils.py
+++ b/source/tests/common/test_model_format_utils.py
@@ -13,7 +13,6 @@
     EmbeddingNet,
     EnvMat,
     FittingNet,
-    InvarFitting,
     NativeLayer,
     NativeNet,
     NetworkCollection,
@@ -370,123 +369,3 @@ def test_self_consistency(
         mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist)
         for ii in [0, 1, 4]:
             np.testing.assert_allclose(mm0[ii], mm1[ii])
-
-
-class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
-    def setUp(self):
-        TestCaseSingleFrameWithNlist.setUp(self)
-
-    def test_self_consistency(
-        self,
-    ):
-        rng = np.random.default_rng()
-        nf, nloc, nnei = self.nlist.shape
-        ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
-        dd = ds.call(self.coord_ext, self.atype_ext, self.nlist)
-        atype = self.atype_ext[:, :nloc]
-
-        for (
-            distinguish_types,
-            od,
-            nfp,
-            nap,
-        ) in itertools.product(
-            [True, False],
-            [1, 2],
-            [0, 3],
-            [0, 4],
-        ):
-            ifn0 = InvarFitting(
-                "energy",
-                self.nt,
-                ds.dim_out,
-                od,
-                numb_fparam=nfp,
-                numb_aparam=nap,
-                distinguish_types=distinguish_types,
-            )
-            ifn1 = InvarFitting.deserialize(ifn0.serialize())
-            if nfp > 0:
-                ifp = rng.normal(size=(self.nf, nfp))
-            else:
-                ifp = None
-            if nap > 0:
-                iap = rng.normal(size=(self.nf, self.nloc, nap))
-            else:
-                iap = None
-            ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap)
-            ret1 = ifn1(dd[0], atype, fparam=ifp, aparam=iap)
-            np.testing.assert_allclose(ret0["energy"], ret1["energy"])
-
-    def test_self_exception(
-        self,
-    ):
-        rng = np.random.default_rng()
-        nf, nloc, nnei = self.nlist.shape
-        ds = DescrptSeA(self.rcut, self.rcut_smth, self.sel)
-        dd = ds.call(self.coord_ext, self.atype_ext, self.nlist)
-        atype = self.atype_ext[:, :nloc]
-
-        for (
-            distinguish_types,
-            od,
-            nfp,
-            nap,
-        ) in itertools.product(
-            [True, False],
-            [1, 2],
-            [0, 3],
-            [0, 4],
-        ):
-            ifn0 = InvarFitting(
-                "energy",
-                self.nt,
-                ds.dim_out,
-                od,
-                numb_fparam=nfp,
-                numb_aparam=nap,
-                distinguish_types=distinguish_types,
-            )
-
-            if nfp > 0:
-                ifp = rng.normal(size=(self.nf, nfp))
-            else:
-                ifp = None
-            if nap > 0:
-                iap = rng.normal(size=(self.nf, self.nloc, nap))
-            else:
-                iap = None
-            with self.assertRaises(ValueError) as context:
-                ret0 = ifn0(dd[0][:, :, :-2], atype, fparam=ifp, aparam=iap)
-                self.assertIn("input descriptor", context.exception)
-
-            if nfp > 0:
-                ifp = rng.normal(size=(self.nf, nfp - 1))
-                with self.assertRaises(ValueError) as context:
-                    ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap)
-                    self.assertIn("input fparam", context.exception)
-
-            if nap > 0:
-                iap = rng.normal(size=(self.nf, self.nloc, nap - 1))
-                with self.assertRaises(ValueError) as context:
-                    ret0 = ifn0(dd[0], atype, fparam=ifp, aparam=iap)
-                    self.assertIn("input aparam", context.exception)
-
-    def test_get_set(self):
-        ifn0 = InvarFitting(
-            "energy",
-            self.nt,
-            3,
-            1,
-        )
-        rng = np.random.default_rng()
-        foo = rng.normal([3, 4])
-        for ii in [
-            "bias_atom_e",
-            "fparam_avg",
-            "fparam_inv_std",
-            "aparam_avg",
-            "aparam_inv_std",
-        ]:
-            ifn0[ii] = foo
-            np.testing.assert_allclose(foo, ifn0[ii])
diff --git a/source/tests/pt/test_ener_fitting.py b/source/tests/pt/test_ener_fitting.py
deleted file mode 100644
index eece8447df..0000000000
--- a/source/tests/pt/test_ener_fitting.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import itertools
-import unittest
-
-import numpy as np
-import torch
-
-from deepmd.model_format import InvarFitting as DPInvarFitting
-from deepmd.pt.model.descriptor.se_a import (
-    DescrptSeA,
-)
-from deepmd.pt.model.task.ener import (
-    EnergyFittingNet,
-    InvarFitting,
-)
-from deepmd.pt.utils import (
-    env,
-)
-from deepmd.pt.utils.utils import (
-    to_numpy_array,
-)
-
-from .test_env_mat import (
-    TestCaseSingleFrameWithNlist,
-)
-
-dtype = env.GLOBAL_PT_FLOAT_PRECISION
-
-
-class TestInvarFitting(unittest.TestCase, TestCaseSingleFrameWithNlist):
-    def setUp(self):
-        TestCaseSingleFrameWithNlist.setUp(self)
-
-    def test_consistency(
-        self,
-    ):
-        rng = np.random.default_rng()
-        nf, nloc, nnei = self.nlist.shape
-        dd0 = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
-        rd0, _, _, _, _ = dd0(
-            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
-            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
-            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
-        )
-        atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
-
-        for od, distinguish_types, nfp, nap in itertools.product(
-            [1, 3],
-            [True, False],
-            [0, 3],
-            [0, 4],
-        ):
-            ft0 = InvarFitting(
-                "foo",
-                self.nt,
-                dd0.dim_out,
-                od,
-                numb_fparam=nfp,
-                numb_aparam=nap,
-                use_tebd=(not distinguish_types),
-            ).to(env.DEVICE)
-            ft1 = DPInvarFitting.deserialize(ft0.serialize())
-            ft2 = InvarFitting.deserialize(ft0.serialize())
-
-            if nfp > 0:
-                ifp = torch.tensor(
-                    rng.normal(size=(self.nf, nfp)), dtype=dtype, device=env.DEVICE
-                )
-            else:
-                ifp = None
-            if nap > 0:
-                iap = torch.tensor(
-                    rng.normal(size=(self.nf, self.nloc, nap)),
-                    dtype=dtype,
-                    device=env.DEVICE,
-                )
-            else:
-                iap = None
-
-            ret0 = ft0(rd0, atype, fparam=ifp, aparam=iap)
-            ret1 = ft1(
-                rd0.detach().cpu().numpy(),
-                atype.detach().cpu().numpy(),
-                fparam=to_numpy_array(ifp),
-                aparam=to_numpy_array(iap),
-            )
-            ret2 = ft2(rd0, atype, fparam=ifp, aparam=iap)
-            np.testing.assert_allclose(
-                to_numpy_array(ret0["foo"]),
-                ret1["foo"],
-            )
-            np.testing.assert_allclose(
-                to_numpy_array(ret0["foo"]),
-                to_numpy_array(ret2["foo"]),
-            )
-
-    def test_new_old(
-        self,
-    ):
-        rng = np.random.default_rng()
-        nf, nloc, nnei = self.nlist.shape
-        dd = DescrptSeA(self.rcut, self.rcut_smth, self.sel).to(env.DEVICE)
-        rd0, _, _, _, _ = dd(
-            torch.tensor(self.coord_ext, dtype=dtype, device=env.DEVICE),
-            torch.tensor(self.atype_ext, dtype=int, device=env.DEVICE),
-            torch.tensor(self.nlist, dtype=int, device=env.DEVICE),
-        )
-        atype = torch.tensor(self.atype_ext[:, :nloc], dtype=int, device=env.DEVICE)
-
-        od = 1
-        for distinguish_types in itertools.product(
-            [True, False],
-        ):
-            ft0 = EnergyFittingNet(
-                self.nt,
-                dd.dim_out,
-                distinguish_types=distinguish_types,
-            ).to(env.DEVICE)
-            ft1 = EnergyFittingNet(
-                self.nt,
-                dd.dim_out,
-                distinguish_types=distinguish_types,
-                old_impl=True,
-            ).to(env.DEVICE)
-            dd0 = ft0.state_dict()
-            dd1 = ft1.state_dict()
-            for kk, vv in dd1.items():
-                new_kk = kk
-                new_kk = new_kk.replace("filter_layers_old", "filter_layers.networks")
-                new_kk = new_kk.replace("deep_layers", "layers")
-                new_kk = new_kk.replace("final_layer", "layers.3")
-                dd1[kk] = dd0[new_kk]
-                if kk.split(".")[-1] in ["idt", "bias"]:
-                    dd1[kk] = dd1[kk].unsqueeze(0)
-            dd1["bias_atom_e"] = dd0["bias_atom_e"]
-            ft1.load_state_dict(dd1)
-            ret0 = ft0(rd0, atype)
-            ret1 = ft1(rd0, atype)
-            np.testing.assert_allclose(
-                to_numpy_array(ret0["energy"]),
-                to_numpy_array(ret1["energy"]),
-            )
-
-    def test_jit(
-        self,
-    ):
-        for od, distinguish_types, nfp, nap in itertools.product(
-            [1, 3],
-            [True, False],
-            [0, 3],
-            [0, 4],
-        ):
-            ft0 = InvarFitting(
-                "foo",
-                self.nt,
-                9,
-                od,
-                numb_fparam=nfp,
-                numb_aparam=nap,
-                use_tebd=(not distinguish_types),
-            ).to(env.DEVICE)
-            torch.jit.script(ft0)
-
-    def test_get_set(self):
-        ifn0 = InvarFitting(
-            "energy",
-            self.nt,
-            3,
-            1,
-        )
-        rng = np.random.default_rng()
-        foo = rng.normal([3, 4])
-        for ii in [
-            "bias_atom_e",
-            "fparam_avg",
-            "fparam_inv_std",
-            "aparam_avg",
-            "aparam_inv_std",
-        ]:
-            ifn0[ii] = torch.tensor(foo, dtype=dtype, device=env.DEVICE)
-            np.testing.assert_allclose(foo, ifn0[ii].detach().cpu().numpy())
diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py
index ed2c428de5..3feb4f4739 100644
--- a/source/tests/pt/test_fitting_net.py
+++ b/source/tests/pt/test_fitting_net.py
@@ -102,25 +102,25 @@ def test_consistency(self):
         my_fn = EnergyFittingNet(
             self.ntypes,
             self.embedding_width,
-            neuron=self.n_neuron,
-            bias_atom_e=self.dp_fn.bias_atom_e,
-            distinguish_types=True,
+            self.n_neuron,
+            self.dp_fn.bias_atom_e,
+            use_tebd=False,
         )
         for name, param in my_fn.named_parameters():
-            matched = re.match(
-                "filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name
-            )
+            matched = re.match("filter_layers\.(\d).deep_layers\.(\d)\.([a-z]+)", name)
             key = None
             if matched:
-                if int(matched.group(2)) == len(self.n_neuron):
-                    layer_id = -1
-                else:
-                    layer_id = matched.group(2)
                 key = gen_key(
                     type_id=matched.group(1),
-                    layer_id=layer_id,
+                    layer_id=matched.group(2),
                     w_or_b=matched.group(3),
                 )
+            else:
+                matched = re.match("filter_layers\.(\d).final_layer\.([a-z]+)", name)
+                if matched:
+                    key = gen_key(
+                        type_id=matched.group(1), layer_id=-1, w_or_b=matched.group(2)
+                    )
             assert key is not None
             var = values[key]
             with torch.no_grad():
@@ -132,7 +132,7 @@ def test_consistency(self):
         ret = my_fn(embedding, atype)
         my_energy = ret["energy"]
         my_energy = my_energy.detach()
-        np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1]))
+        self.assertTrue(np.allclose(dp_energy, my_energy.numpy().reshape([-1])))
 
 
 if __name__ == "__main__":
diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py
index c6595e6471..5bbbc9e352 100644
--- a/source/tests/pt/test_model.py
+++ b/source/tests/pt/test_model.py
@@ -53,24 +53,23 @@
 VariableState = collections.namedtuple("VariableState", ["value", "gradient"])
 
 
-def torch2tf(torch_name, last_layer_id=None):
+def torch2tf(torch_name):
     fields = torch_name.split(".")
     offset = int(fields[2] == "networks")
     element_id = int(fields[2 + offset])
     if fields[0] == "descriptor":
         layer_id = int(fields[4 + offset]) + 1
         weight_type = fields[5 + offset]
-        ret = "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id)
-    elif fields[0] == "fitting_net":
-        layer_id = int(fields[4 + offset])
-        weight_type = fields[5 + offset]
-        if layer_id != last_layer_id:
-            ret = "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type)
-        else:
-            ret = "final_layer_type_%d/%s:0" % (element_id, weight_type)
+        return "filter_type_all/%s_%d_%d:0" % (weight_type, layer_id, element_id)
+    elif fields[3] == "deep_layers":
+        layer_id = int(fields[4])
+        weight_type = fields[5]
+        return "layer_%d_type_%d/%s:0" % (layer_id, element_id, weight_type)
+    elif fields[3] == "final_layer":
+        weight_type = fields[4]
+        return "final_layer_type_%d/%s:0" % (element_id, weight_type)
     else:
         raise RuntimeError("Unexpected parameter name: %s" % torch_name)
-    return ret
 
 
 class DpTrainer:
@@ -291,7 +290,7 @@ def test_consistency(self):
                     "neuron": self.filter_neuron,
                     "axis_neuron": self.axis_neuron,
                 },
-                "fitting_net": {"neuron": self.n_neuron, "distinguish_types": True},
+                "fitting_net": {"neuron": self.n_neuron},
                 "data_stat_nbatch": self.data_stat_nbatch,
                 "type_map": self.type_map,
             },
@@ -324,7 +323,7 @@ def test_consistency(self):
         # Keep parameter value consistency between 2 implentations
         for name, param in my_model.named_parameters():
             name = name.replace("sea.", "")
-            var_name = torch2tf(name, last_layer_id=len(self.n_neuron))
+            var_name = torch2tf(name)
             var = vs_dict[var_name].value
             with torch.no_grad():
                 src = torch.from_numpy(var)
@@ -405,7 +404,7 @@ def step(step_id):
 
         for name, param in my_model.named_parameters():
             name = name.replace("sea.", "")
-            var_name = torch2tf(name, last_layer_id=len(self.n_neuron))
+            var_name = torch2tf(name)
             var_grad = vs_dict[var_name].gradient
             param_grad = param.grad.cpu()
             var_grad = torch.tensor(var_grad)
diff --git a/source/tests/pt/test_se_e2_a.py b/source/tests/pt/test_se_e2_a.py
index 0da80ea1ea..c0a106cb16 100644
--- a/source/tests/pt/test_se_e2_a.py
+++ b/source/tests/pt/test_se_e2_a.py
@@ -25,9 +25,6 @@
     PRECISION_DICT,
 )
 
-from .test_env_mat import (
-    TestCaseSingleFrameWithNlist,
-)
 from .test_mlp import (
     get_tols,
 )
@@ -35,6 +32,36 @@
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
 
 
+class TestCaseSingleFrameWithNlist:
+    def setUp(self):
+        # nloc == 3, nall == 4
+        self.nloc = 3
+        self.nall = 4
+        self.nf, self.nt = 1, 2
+        self.coord_ext = np.array(
+            [
+                [0, 0, 0],
+                [0, 1, 0],
+                [0, 0, 1],
+                [0, -2, 0],
+            ],
+            dtype=np.float64,
+        ).reshape([1, self.nall * 3])
+        self.atype_ext = np.array([0, 0, 1, 0], dtype=int).reshape([1, self.nall])
+        # sel = [5, 2]
+        self.sel = [5, 2]
+        self.nlist = np.array(
+            [
+                [1, 3, -1, -1, -1, 2, -1],
+                [0, -1, -1, -1, -1, 2, -1],
+                [0, 1, -1, -1, -1, 0, -1],
+            ],
+            dtype=int,
+        ).reshape([1, self.nloc, sum(self.sel)])
+        self.rcut = 0.4
+        self.rcut_smth = 2.2
+
+
 # to be merged with the tf test case
 @unittest.skipIf(not support_se_e2_a, "EnvMat not supported")
 class TestDescrptSeA(unittest.TestCase, TestCaseSingleFrameWithNlist):
diff --git a/source/tests/pt/test_utils.py b/source/tests/pt/test_utils.py
deleted file mode 100644
index 9c9a9479ad..0000000000
--- a/source/tests/pt/test_utils.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# SPDX-License-Identifier: LGPL-3.0-or-later
-import unittest
-
-import numpy as np
-import torch
-
-from deepmd.pt.utils.utils import (
-    to_numpy_array,
-    to_torch_tensor,
-)
-
-
-class TestCvt(unittest.TestCase):
-    def test_to_numpy(self):
-        rng = np.random.default_rng()
-        foo = rng.normal([3, 4])
-        for ptp, npp in zip(
-            [torch.float16, torch.float32, torch.float64],
-            [np.float16, np.float32, np.float64],
-        ):
-            foo = foo.astype(npp)
-            bar = to_torch_tensor(foo)
-            self.assertEqual(bar.dtype, ptp)
-            onk = to_numpy_array(bar)
-            self.assertEqual(onk.dtype, npp)
-        with self.assertRaises(ValueError) as ee:
-            foo = foo.astype(np.int32)
-            bar = to_torch_tensor(foo)
-        with self.assertRaises(ValueError) as ee:
-            bar = to_torch_tensor(foo)
-            bar = to_numpy_array(bar.int())

From 07e0d967d944de8a798058314083232b89c1f31c Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 30 Jan 2024 18:39:49 +0800
Subject: [PATCH 06/10] Fix dataloader stuck on GPU

---
 deepmd/pt/utils/dataloader.py            |  2 --
 deepmd/pt/utils/dataset.py               | 36 +++++++++---------------
 deepmd/pt/utils/preprocess.py            | 30 ++++++++++----------
 deepmd/pt/utils/stat.py                  |  4 +--
 source/tests/pt/test_descriptor.py       | 10 +++----
 source/tests/pt/test_embedding_net.py    | 13 +++++----
 source/tests/pt/test_model.py            | 11 +++++---
 source/tests/pt/test_saveload_dpa1.py    |  4 +--
 source/tests/pt/test_saveload_se_e2_a.py |  4 +--
 9 files changed, 54 insertions(+), 60 deletions(-)

diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index 5408452703..7a6684e82e 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -276,13 +276,11 @@ def collate_batch(batch):
                 result[key] = torch.zeros(
                     (n_frames, natoms_extended, 3),
                     dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-                    device=env.DEVICE,
                 )
             else:
                 result[key] = torch.zeros(
                     (n_frames, natoms_extended),
                     dtype=torch.long,
-                    device=env.DEVICE,
                 )
             for i in range(len(batch)):
                 natoms_tmp = list[i].shape[0]
diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
index b886dbb786..83c147ef8f 100644
--- a/deepmd/pt/utils/dataset.py
+++ b/deepmd/pt/utils/dataset.py
@@ -479,8 +479,7 @@ def preprocess(self, batch):
             else:
                 batch[kk] = torch.tensor(
                     batch[kk],
-                    dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-                    device=env.DEVICE,
+                    dtype=env.GLOBAL_PT_FLOAT_PRECISION
                 )
                 if self._data_dict[kk]["atomic"]:
                     batch[kk] = batch[kk].view(
@@ -489,7 +488,7 @@ def preprocess(self, batch):
 
         for kk in ["type", "real_natoms_vec"]:
             if kk in batch.keys():
-                batch[kk] = torch.tensor(batch[kk], dtype=torch.long, device=env.DEVICE)
+                batch[kk] = torch.tensor(batch[kk], dtype=torch.long)
         batch["atype"] = batch.pop("type")
 
         keys = ["nlist", "nlist_loc", "nlist_type", "shift", "mapping"]
@@ -523,11 +522,10 @@ def preprocess(self, batch):
         natoms_extended = max([item.shape[0] for item in shift])
         batch["shift"] = torch.zeros(
             (n_frames, natoms_extended, 3),
-            dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-            device=env.DEVICE,
+            dtype=env.GLOBAL_PT_FLOAT_PRECISION
         )
         batch["mapping"] = torch.zeros(
-            (n_frames, natoms_extended), dtype=torch.long, device=env.DEVICE
+            (n_frames, natoms_extended), dtype=torch.long
         )
         for i in range(len(shift)):
             natoms_tmp = shift[i].shape[0]
@@ -565,15 +563,14 @@ def single_preprocess(self, batch, sid):
             else:
                 batch[kk] = torch.tensor(
                     batch[kk][sid],
-                    dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-                    device=env.DEVICE,
+                    dtype=env.GLOBAL_PT_FLOAT_PRECISION
                 )
                 if self._data_dict[kk]["atomic"]:
                     batch[kk] = batch[kk].view(-1, self._data_dict[kk]["ndof"])
         for kk in ["type", "real_natoms_vec"]:
             if kk in batch.keys():
                 batch[kk] = torch.tensor(
-                    batch[kk][sid], dtype=torch.long, device=env.DEVICE
+                    batch[kk][sid], dtype=torch.long
                 )
         clean_coord = batch.pop("coord")
         clean_type = batch.pop("type")
@@ -669,14 +666,13 @@ def single_preprocess(self, batch, sid):
                     noised_coord = _clean_coord.clone().detach()
                     noised_coord[coord_mask] += noise_on_coord
                     batch["coord_mask"] = torch.tensor(
-                        coord_mask, dtype=torch.bool, device=env.DEVICE
+                        coord_mask, dtype=torch.bool
                     )
                 else:
                     noised_coord = _clean_coord
                     batch["coord_mask"] = torch.tensor(
                         np.zeros_like(coord_mask, dtype=bool),
-                        dtype=torch.bool,
-                        device=env.DEVICE,
+                        dtype=torch.bool
                     )
 
                 # add mask for type
@@ -684,14 +680,13 @@ def single_preprocess(self, batch, sid):
                     masked_type = clean_type.clone().detach()
                     masked_type[type_mask] = self.mask_type_idx
                     batch["type_mask"] = torch.tensor(
-                        type_mask, dtype=torch.bool, device=env.DEVICE
+                        type_mask, dtype=torch.bool
                     )
                 else:
                     masked_type = clean_type
                     batch["type_mask"] = torch.tensor(
                         np.zeros_like(type_mask, dtype=bool),
-                        dtype=torch.bool,
-                        device=env.DEVICE,
+                        dtype=torch.bool
                     )
                 if self.pbc:
                     _coord = normalize_coord(noised_coord, region, nloc)
@@ -801,7 +796,7 @@ def __len__(self):
     def __getitem__(self, index):
         """Get a frame from the selected system."""
         b_data = self._data_system._get_item(index)
-        b_data["natoms"] = torch.tensor(self._natoms_vec, device=env.DEVICE)
+        b_data["natoms"] = torch.tensor(self._natoms_vec)
         return b_data
 
 
@@ -876,7 +871,7 @@ def __getitem__(self, index=None):
         if index is None:
             index = dp_random.choice(np.arange(self.nsystems), p=self.probs)
         b_data = self._data_systems[index].get_batch(self._batch_size)
-        b_data["natoms"] = torch.tensor(self._natoms_vec[index], device=env.DEVICE)
+        b_data["natoms"] = torch.tensor(self._natoms_vec[index])
         batch_size = b_data["coord"].shape[0]
         b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1)
         return b_data
@@ -887,7 +882,7 @@ def get_training_batch(self, index=None):
         if index is None:
             index = dp_random.choice(np.arange(self.nsystems), p=self.probs)
         b_data = self._data_systems[index].get_batch_for_train(self._batch_size)
-        b_data["natoms"] = torch.tensor(self._natoms_vec[index], device=env.DEVICE)
+        b_data["natoms"] = torch.tensor(self._natoms_vec[index])
         batch_size = b_data["coord"].shape[0]
         b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1)
         return b_data
@@ -896,10 +891,7 @@ def get_batch(self, sys_idx=None):
         """TF-compatible batch for testing."""
         pt_batch = self[sys_idx]
         np_batch = {}
-        for key in ["coord", "box", "force", "energy", "virial"]:
-            if key in pt_batch.keys():
-                np_batch[key] = pt_batch[key].cpu().numpy()
-        for key in ["atype", "natoms"]:
+        for key in ["coord", "box", "force", "energy", "virial", "atype", "natoms"]:
             if key in pt_batch.keys():
                 np_batch[key] = pt_batch[key].cpu().numpy()
         batch_size = pt_batch["coord"].shape[0]
diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py
index 3ea26d0041..27acdb9209 100644
--- a/deepmd/pt/utils/preprocess.py
+++ b/deepmd/pt/utils/preprocess.py
@@ -99,7 +99,7 @@ def build_inside_clist(coord, region: Region3D, ncell):
     cell_offset[cell_offset < 0] = 0
     delta = cell_offset - ncell
     a2c = compute_serial_cid(cell_offset, ncell)  # cell id of atoms
-    arange = torch.arange(0, loc_ncell, 1, device=env.DEVICE)
+    arange = torch.arange(0, loc_ncell, 1)
     cellid = a2c == arange.unsqueeze(-1)  # one hot cellid
     c2a = cellid.nonzero()
     lst = []
@@ -131,17 +131,17 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float):
 
     # add ghost atoms
     a2c, c2a = build_inside_clist(coord, region, ncell)
-    xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1, device=env.DEVICE)
-    yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1, device=env.DEVICE)
-    zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1, device=env.DEVICE)
+    xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1)
+    yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1)
+    zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1)
     xyz = xi.view(-1, 1, 1, 1) * torch.tensor(
-        [1, 0, 0], dtype=torch.long, device=env.DEVICE
+        [1, 0, 0], dtype=torch.long
     )
     xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor(
-        [0, 1, 0], dtype=torch.long, device=env.DEVICE
+        [0, 1, 0], dtype=torch.long
     )
     xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor(
-        [0, 0, 1], dtype=torch.long, device=env.DEVICE
+        [0, 0, 1], dtype=torch.long
     )
     xyz = xyz.view(-1, 3)
     mask_a = (xyz >= 0).all(dim=-1)
@@ -165,7 +165,7 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float):
     merged_coord = torch.cat([coord, tmp_coord])
     merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]])
     merged_atype = torch.cat([atype, tmp_atype])
-    merged_mapping = torch.cat([torch.arange(atype.numel(), device=env.DEVICE), aid])
+    merged_mapping = torch.cat([torch.arange(atype.numel()), aid])
     return merged_coord_shift, merged_atype, merged_mapping
 
 
@@ -187,7 +187,7 @@ def build_neighbor_list(
     distance = torch.linalg.norm(distance, dim=-1)
     DISTANCE_INF = distance.max().detach() + rcut
     distance[:nloc, :nloc] += (
-        torch.eye(nloc, dtype=torch.bool, device=env.DEVICE) * DISTANCE_INF
+        torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF
     )
     if min_check:
         if distance.min().abs() < 1e-6:
@@ -195,9 +195,9 @@ def build_neighbor_list(
     if not type_split:
         sec = sec[-1:]
     lst = []
-    nlist = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
-    nlist_loc = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
-    nlist_type = torch.zeros((nloc, sec[-1].item()), device=env.DEVICE).long() - 1
+    nlist = torch.zeros((nloc, sec[-1].item())).long() - 1
+    nlist_loc = torch.zeros((nloc, sec[-1].item())).long() - 1
+    nlist_type = torch.zeros((nloc, sec[-1].item())).long() - 1
     for i, nnei in enumerate(sec):
         if i > 0:
             nnei = nnei - sec[i - 1]
@@ -210,8 +210,8 @@ def build_neighbor_list(
             _sorted, indices = torch.topk(tmp, nnei, dim=1, largest=False)
         else:
             # when nnei > nall
-            indices = torch.zeros((nloc, nnei), device=env.DEVICE).long() - 1
-            _sorted = torch.ones((nloc, nnei), device=env.DEVICE).long() * DISTANCE_INF
+            indices = torch.zeros((nloc, nnei)).long() - 1
+            _sorted = torch.ones((nloc, nnei)).long() * DISTANCE_INF
             _sorted_nnei, indices_nnei = torch.topk(
                 tmp, tmp.shape[1], dim=1, largest=False
             )
@@ -275,7 +275,7 @@ def make_env_mat(
     else:
         merged_coord_shift = torch.zeros_like(coord)
         merged_atype = atype.clone()
-        merged_mapping = torch.arange(atype.numel(), device=env.DEVICE)
+        merged_mapping = torch.arange(atype.numel())
         merged_coord = coord.clone()
 
     # build nlist
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 7fffd15ca1..4826d0fb88 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -62,13 +62,11 @@ def make_stat_input(datasets, dataloaders, nbatches):
                         shape = torch.zeros(
                             (n_frames, extend, 3),
                             dtype=env.GLOBAL_PT_FLOAT_PRECISION,
-                            device=env.DEVICE,
                         )
                     else:
                         shape = torch.zeros(
                             (n_frames, extend),
-                            dtype=torch.long,
-                            device=env.DEVICE,
+                            dtype=torch.long
                         )
                     for i in range(len(item)):
                         natoms_tmp = l[i].shape[0]
diff --git a/source/tests/pt/test_descriptor.py b/source/tests/pt/test_descriptor.py
index 4f31bac7bf..2dd996349b 100644
--- a/source/tests/pt/test_descriptor.py
+++ b/source/tests/pt/test_descriptor.py
@@ -131,15 +131,15 @@ def test_consistency(self):
             stddev=std_ones.detach().cpu(),
         )
 
-        pt_coord = self.pt_batch["coord"]
+        pt_coord = self.pt_batch["coord"].to(env.DEVICE)
         pt_coord.requires_grad_(True)
-        index = self.pt_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3)
+        index = self.pt_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE)
         extended_coord = torch.gather(pt_coord, dim=1, index=index)
-        extended_coord = extended_coord - self.pt_batch["shift"]
+        extended_coord = extended_coord - self.pt_batch["shift"].to(env.DEVICE)
         my_d, _, _ = prod_env_mat_se_a(
             extended_coord.to(DEVICE),
-            self.pt_batch["nlist"],
-            self.pt_batch["atype"],
+            self.pt_batch["nlist"].to(env.DEVICE),
+            self.pt_batch["atype"].to(env.DEVICE),
             avg_zero.reshape([-1, self.nnei, 4]).to(DEVICE),
             std_ones.reshape([-1, self.nnei, 4]).to(DEVICE),
             self.rcut,
diff --git a/source/tests/pt/test_embedding_net.py b/source/tests/pt/test_embedding_net.py
index fc98ddc9f9..312af1a8c8 100644
--- a/source/tests/pt/test_embedding_net.py
+++ b/source/tests/pt/test_embedding_net.py
@@ -7,6 +7,9 @@
 import numpy as np
 import tensorflow.compat.v1 as tf
 import torch
+from deepmd.pt.utils import (
+    env,
+)
 
 tf.disable_eager_execution()
 
@@ -148,18 +151,18 @@ def test_consistency(self):
                     # Keep parameter value consistency between 2 implentations
                     param.data.copy_(torch.from_numpy(var))
 
-        pt_coord = self.torch_batch["coord"]
+        pt_coord = self.torch_batch["coord"].to(env.DEVICE)
         pt_coord.requires_grad_(True)
-        index = self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3)
+        index = self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE)
         extended_coord = torch.gather(pt_coord, dim=1, index=index)
-        extended_coord = extended_coord - self.torch_batch["shift"]
+        extended_coord = extended_coord - self.torch_batch["shift"].to(env.DEVICE)
         extended_atype = torch.gather(
-            self.torch_batch["atype"], dim=1, index=self.torch_batch["mapping"]
+            self.torch_batch["atype"].to(env.DEVICE), dim=1, index=self.torch_batch["mapping"].to(env.DEVICE)
         )
         descriptor_out, _, _, _, _ = descriptor(
             extended_coord,
             extended_atype,
-            self.torch_batch["nlist"],
+            self.torch_batch["nlist"].to(env.DEVICE),
         )
         my_embedding = descriptor_out.cpu().detach().numpy()
         fake_energy = torch.sum(descriptor_out)
diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py
index 5bbbc9e352..f382ce4b4c 100644
--- a/source/tests/pt/test_model.py
+++ b/source/tests/pt/test_model.py
@@ -6,6 +6,9 @@
 import numpy as np
 import tensorflow.compat.v1 as tf
 import torch
+from deepmd.pt.utils import (
+    env,
+)
 
 tf.disable_eager_execution()
 
@@ -339,10 +342,10 @@ def test_consistency(self):
             batch["natoms_vec"], device=batch["coord"].device
         ).unsqueeze(0)
         model_predict = my_model(
-            batch["coord"], batch["atype"], batch["box"], do_atomic_virial=True
+            batch["coord"].to(env.DEVICE), batch["atype"].to(env.DEVICE), batch["box"].to(env.DEVICE), do_atomic_virial=True
         )
         model_predict_1 = my_model(
-            batch["coord"], batch["atype"], batch["box"], do_atomic_virial=False
+            batch["coord"].to(env.DEVICE), batch["atype"].to(env.DEVICE), batch["box"].to(env.DEVICE), do_atomic_virial=False
         )
         p_energy, p_force, p_virial, p_atomic_virial = (
             model_predict["energy"],
@@ -356,8 +359,8 @@ def test_consistency(self):
             "force": p_force,
         }
         label = {
-            "energy": batch["energy"],
-            "force": batch["force"],
+            "energy": batch["energy"].to(env.DEVICE),
+            "force": batch["force"].to(env.DEVICE),
         }
         loss, _ = my_loss(model_pred, label, int(batch["natoms"][0, 0]), cur_lr)
         np.testing.assert_allclose(
diff --git a/source/tests/pt/test_saveload_dpa1.py b/source/tests/pt/test_saveload_dpa1.py
index d1043f7029..1b4c41a204 100644
--- a/source/tests/pt/test_saveload_dpa1.py
+++ b/source/tests/pt/test_saveload_dpa1.py
@@ -129,13 +129,13 @@ def get_data(self):
         input_dict = {}
         for item in ["coord", "atype", "box"]:
             if item in batch_data:
-                input_dict[item] = batch_data[item]
+                input_dict[item] = batch_data[item].to(env.DEVICE)
             else:
                 input_dict[item] = None
         label_dict = {}
         for item in ["energy", "force", "virial"]:
             if item in batch_data:
-                label_dict[item] = batch_data[item]
+                label_dict[item] = batch_data[item].to(env.DEVICE)
         return input_dict, label_dict
 
     def test_saveload(self):
diff --git a/source/tests/pt/test_saveload_se_e2_a.py b/source/tests/pt/test_saveload_se_e2_a.py
index 95d7f97a88..7f8364a16f 100644
--- a/source/tests/pt/test_saveload_se_e2_a.py
+++ b/source/tests/pt/test_saveload_se_e2_a.py
@@ -123,13 +123,13 @@ def get_data(self):
         input_dict = {}
         for item in ["coord", "atype", "box"]:
             if item in batch_data:
-                input_dict[item] = batch_data[item]
+                input_dict[item] = batch_data[item].to(env.DEVICE)
             else:
                 input_dict[item] = None
         label_dict = {}
         for item in ["energy", "force", "virial"]:
             if item in batch_data:
-                label_dict[item] = batch_data[item]
+                label_dict[item] = batch_data[item].to(env.DEVICE)
         return input_dict, label_dict
 
     def test_saveload(self):

From 913efa000f6ef95e7c55a262795ce28f74d81473 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 10:41:20 +0000
Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/pt/utils/dataset.py            | 33 ++++++++-------------------
 deepmd/pt/utils/preprocess.py         | 16 ++++---------
 deepmd/pt/utils/stat.py               |  5 +---
 source/tests/pt/test_embedding_net.py |  9 ++++++--
 source/tests/pt/test_model.py         | 11 +++++++--
 5 files changed, 30 insertions(+), 44 deletions(-)

diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
index 83c147ef8f..68d4a09ce4 100644
--- a/deepmd/pt/utils/dataset.py
+++ b/deepmd/pt/utils/dataset.py
@@ -477,10 +477,7 @@ def preprocess(self, batch):
             if "find_" in kk:
                 pass
             else:
-                batch[kk] = torch.tensor(
-                    batch[kk],
-                    dtype=env.GLOBAL_PT_FLOAT_PRECISION
-                )
+                batch[kk] = torch.tensor(batch[kk], dtype=env.GLOBAL_PT_FLOAT_PRECISION)
                 if self._data_dict[kk]["atomic"]:
                     batch[kk] = batch[kk].view(
                         n_frames, -1, self._data_dict[kk]["ndof"]
@@ -521,12 +518,9 @@ def preprocess(self, batch):
         batch["nlist_type"] = nlist_type
         natoms_extended = max([item.shape[0] for item in shift])
         batch["shift"] = torch.zeros(
-            (n_frames, natoms_extended, 3),
-            dtype=env.GLOBAL_PT_FLOAT_PRECISION
-        )
-        batch["mapping"] = torch.zeros(
-            (n_frames, natoms_extended), dtype=torch.long
+            (n_frames, natoms_extended, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION
         )
+        batch["mapping"] = torch.zeros((n_frames, natoms_extended), dtype=torch.long)
         for i in range(len(shift)):
             natoms_tmp = shift[i].shape[0]
             batch["shift"][i, :natoms_tmp] = shift[i]
@@ -562,16 +556,13 @@ def single_preprocess(self, batch, sid):
                 pass
             else:
                 batch[kk] = torch.tensor(
-                    batch[kk][sid],
-                    dtype=env.GLOBAL_PT_FLOAT_PRECISION
+                    batch[kk][sid], dtype=env.GLOBAL_PT_FLOAT_PRECISION
                 )
                 if self._data_dict[kk]["atomic"]:
                     batch[kk] = batch[kk].view(-1, self._data_dict[kk]["ndof"])
         for kk in ["type", "real_natoms_vec"]:
             if kk in batch.keys():
-                batch[kk] = torch.tensor(
-                    batch[kk][sid], dtype=torch.long
-                )
+                batch[kk] = torch.tensor(batch[kk][sid], dtype=torch.long)
         clean_coord = batch.pop("coord")
         clean_type = batch.pop("type")
         nloc = clean_type.shape[0]
@@ -665,28 +656,22 @@ def single_preprocess(self, batch, sid):
                         NotImplementedError(f"Unknown noise type {self.noise_type}!")
                     noised_coord = _clean_coord.clone().detach()
                     noised_coord[coord_mask] += noise_on_coord
-                    batch["coord_mask"] = torch.tensor(
-                        coord_mask, dtype=torch.bool
-                    )
+                    batch["coord_mask"] = torch.tensor(coord_mask, dtype=torch.bool)
                 else:
                     noised_coord = _clean_coord
                     batch["coord_mask"] = torch.tensor(
-                        np.zeros_like(coord_mask, dtype=bool),
-                        dtype=torch.bool
+                        np.zeros_like(coord_mask, dtype=bool), dtype=torch.bool
                     )
 
                 # add mask for type
                 if self.mask_type:
                     masked_type = clean_type.clone().detach()
                     masked_type[type_mask] = self.mask_type_idx
-                    batch["type_mask"] = torch.tensor(
-                        type_mask, dtype=torch.bool
-                    )
+                    batch["type_mask"] = torch.tensor(type_mask, dtype=torch.bool)
                 else:
                     masked_type = clean_type
                     batch["type_mask"] = torch.tensor(
-                        np.zeros_like(type_mask, dtype=bool),
-                        dtype=torch.bool
+                        np.zeros_like(type_mask, dtype=bool), dtype=torch.bool
                     )
                 if self.pbc:
                     _coord = normalize_coord(noised_coord, region, nloc)
diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py
index 27acdb9209..18c798138e 100644
--- a/deepmd/pt/utils/preprocess.py
+++ b/deepmd/pt/utils/preprocess.py
@@ -134,15 +134,9 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float):
     xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1)
     yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1)
     zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1)
-    xyz = xi.view(-1, 1, 1, 1) * torch.tensor(
-        [1, 0, 0], dtype=torch.long
-    )
-    xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor(
-        [0, 1, 0], dtype=torch.long
-    )
-    xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor(
-        [0, 0, 1], dtype=torch.long
-    )
+    xyz = xi.view(-1, 1, 1, 1) * torch.tensor([1, 0, 0], dtype=torch.long)
+    xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor([0, 1, 0], dtype=torch.long)
+    xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor([0, 0, 1], dtype=torch.long)
     xyz = xyz.view(-1, 3)
     mask_a = (xyz >= 0).all(dim=-1)
     mask_b = (xyz < ncell).all(dim=-1)
@@ -186,9 +180,7 @@ def build_neighbor_list(
     distance = coord_l - coord_r
     distance = torch.linalg.norm(distance, dim=-1)
     DISTANCE_INF = distance.max().detach() + rcut
-    distance[:nloc, :nloc] += (
-        torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF
-    )
+    distance[:nloc, :nloc] += torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF
     if min_check:
         if distance.min().abs() < 1e-6:
             RuntimeError("Atom dist too close!")
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 4826d0fb88..eec7179bcd 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -64,10 +64,7 @@ def make_stat_input(datasets, dataloaders, nbatches):
                             dtype=env.GLOBAL_PT_FLOAT_PRECISION,
                         )
                     else:
-                        shape = torch.zeros(
-                            (n_frames, extend),
-                            dtype=torch.long
-                        )
+                        shape = torch.zeros((n_frames, extend), dtype=torch.long)
                     for i in range(len(item)):
                         natoms_tmp = l[i].shape[0]
                         shape[i, :natoms_tmp] = l[i]
diff --git a/source/tests/pt/test_embedding_net.py b/source/tests/pt/test_embedding_net.py
index 312af1a8c8..407f4949b5 100644
--- a/source/tests/pt/test_embedding_net.py
+++ b/source/tests/pt/test_embedding_net.py
@@ -7,6 +7,7 @@
 import numpy as np
 import tensorflow.compat.v1 as tf
 import torch
+
 from deepmd.pt.utils import (
     env,
 )
@@ -153,11 +154,15 @@ def test_consistency(self):
 
         pt_coord = self.torch_batch["coord"].to(env.DEVICE)
         pt_coord.requires_grad_(True)
-        index = self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE)
+        index = (
+            self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE)
+        )
         extended_coord = torch.gather(pt_coord, dim=1, index=index)
         extended_coord = extended_coord - self.torch_batch["shift"].to(env.DEVICE)
         extended_atype = torch.gather(
-            self.torch_batch["atype"].to(env.DEVICE), dim=1, index=self.torch_batch["mapping"].to(env.DEVICE)
+            self.torch_batch["atype"].to(env.DEVICE),
+            dim=1,
+            index=self.torch_batch["mapping"].to(env.DEVICE),
         )
         descriptor_out, _, _, _, _ = descriptor(
             extended_coord,
diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py
index 250ccb164d..e87a53969c 100644
--- a/source/tests/pt/test_model.py
+++ b/source/tests/pt/test_model.py
@@ -6,6 +6,7 @@
 import numpy as np
 import tensorflow.compat.v1 as tf
 import torch
+
 from deepmd.pt.utils import (
     env,
 )
@@ -343,10 +344,16 @@ def test_consistency(self):
             batch["natoms_vec"], device=batch["coord"].device
         ).unsqueeze(0)
         model_predict = my_model(
-            batch["coord"].to(env.DEVICE), batch["atype"].to(env.DEVICE), batch["box"].to(env.DEVICE), do_atomic_virial=True
+            batch["coord"].to(env.DEVICE),
+            batch["atype"].to(env.DEVICE),
+            batch["box"].to(env.DEVICE),
+            do_atomic_virial=True,
         )
         model_predict_1 = my_model(
-            batch["coord"].to(env.DEVICE), batch["atype"].to(env.DEVICE), batch["box"].to(env.DEVICE), do_atomic_virial=False
+            batch["coord"].to(env.DEVICE),
+            batch["atype"].to(env.DEVICE),
+            batch["box"].to(env.DEVICE),
+            do_atomic_virial=False,
         )
         p_energy, p_force, p_virial, p_atomic_virial = (
             model_predict["energy"],

From a4892b71e30430c77fe3ace6387f6b4a7a633442 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 30 Jan 2024 20:03:41 +0800
Subject: [PATCH 08/10] Update test_fitting_net.py

---
 source/tests/pt/test_fitting_net.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py
index ed2c428de5..0390043770 100644
--- a/source/tests/pt/test_fitting_net.py
+++ b/source/tests/pt/test_fitting_net.py
@@ -17,6 +17,9 @@
 from deepmd.tf.fit.ener import (
     EnerFitting,
 )
+from deepmd.pt.utils import (
+    env,
+)
 
 
 class FakeDescriptor:
@@ -105,7 +108,7 @@ def test_consistency(self):
             neuron=self.n_neuron,
             bias_atom_e=self.dp_fn.bias_atom_e,
             distinguish_types=True,
-        )
+        ).to(env.DEVICE)
         for name, param in my_fn.named_parameters():
             matched = re.match(
                 "filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name
@@ -129,9 +132,9 @@ def test_consistency(self):
         embedding = torch.from_numpy(self.embedding)
         embedding = embedding.view(4, -1, self.embedding_width)
         atype = torch.from_numpy(self.atype)
-        ret = my_fn(embedding, atype)
+        ret = my_fn(embedding.to(env.DEVICE), atype.to(env.DEVICE))
         my_energy = ret["energy"]
-        my_energy = my_energy.detach()
+        my_energy = my_energy.detach().cpu()
         np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1]))
 
 

From 7cad8a6569b94f170466d9383f2c7c4d4ffd030b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 12:04:20 +0000
Subject: [PATCH 09/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 source/tests/pt/test_fitting_net.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py
index 0390043770..e12a397347 100644
--- a/source/tests/pt/test_fitting_net.py
+++ b/source/tests/pt/test_fitting_net.py
@@ -11,15 +11,15 @@
 from deepmd.pt.model.task import (
     EnergyFittingNet,
 )
+from deepmd.pt.utils import (
+    env,
+)
 from deepmd.pt.utils.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
 from deepmd.tf.fit.ener import (
     EnerFitting,
 )
-from deepmd.pt.utils import (
-    env,
-)
 
 
 class FakeDescriptor:

From 1c37f4450a3ef205d65106ac05da0825a5f7a727 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 30 Jan 2024 16:26:13 -0500
Subject: [PATCH 10/10] set NUM_WORKERS to 0

---
 .github/workflows/test_cuda.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index f164758304..45b689cb3e 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -42,6 +42,7 @@ jobs:
         DP_BUILD_TESTING: 1
         DP_VARIANT: cuda
         CUDA_PATH: /usr/local/cuda-12.2
+        NUM_WORKERS: 0
     - run: dp --version
     - run: python -m pytest -s --cov=deepmd source/tests --durations=0
     - run: source/install/test_cc_local.sh