PaddlePaddle · intelyoungway · Oct 21, 2022 · Dec 5, 2022 · Dec 8, 2022 · Dec 13, 2022
diff --git a/apps/drug_drug_synergy/RGCN/train.py b/apps/drug_drug_synergy/RGCN/train.py
@@ -87,7 +87,7 @@ def train(num_subgraph, graph, label_idx, epochs, sub_neighbours=[10, 10], init=
                 fpr, tpr, _ = roc_curve(y_true=ground_truth, y_score=pred_prob)
                 auc_v = auc(fpr, tpr)
                 print("sub_graph index : {} | epoch: {} | training loss: {:.4f} | AUC: {:.3f}".format(
-                sub_g, epoch, train_loss.numpy()[0], auc_v))
+                sub_g, epoch, float(train_loss), auc_v))
 
     return model
 

diff --git a/apps/drug_target_interaction/batchdta/pairwise/DeepDTA/utils.py b/apps/drug_target_interaction/batchdta/pairwise/DeepDTA/utils.py
@@ -312,7 +312,7 @@ def model_eval(model,val_dataloader):
 
         for i_target_score in range(batch_smiles.shape[0]):
 
-            i_target_len = int(batch_len[i_target_score].numpy()[0])
+            i_target_len = int(batch_len[i_target_score])
             smiles = batch_smiles[i_target_score][0:i_target_len]
             target = batch_protein[i_target_score][0:i_target_len]
             y_label = batch_y[i_target_score][0:i_target_len].numpy()

diff --git a/apps/drug_target_interaction/batchdta/pairwise/GraphDTA/run_pairwise_GraphDTA_CV.py b/apps/drug_target_interaction/batchdta/pairwise/GraphDTA/run_pairwise_GraphDTA_CV.py
@@ -195,9 +195,9 @@ def model_eval(model,val_dataloader,device):
                 i_data = i_data.to(device)                    
                 pred_scores = model.forward_single(i_data)
                 # get the predicted labels
-                i_target_pred_scores.append(pred_scores.cpu().numpy()[0])              
+                i_target_pred_scores.append(float(pred_scores))              
                 # get the true labels
-                i_target_y_label.append(i_data.y.cpu().numpy()[0])
+                i_target_y_label.append(float(i_data.y.cpu()))
 
             i_target_pred_scores = np.array(i_target_pred_scores)
             i_target_y_label = np.array(i_target_y_label)

diff --git a/apps/drug_target_interaction/batchdta/pairwise/Moltrans/helper/utils/paddle_tensor.py b/apps/drug_target_interaction/batchdta/pairwise/Moltrans/helper/utils/paddle_tensor.py
@@ -32,7 +32,7 @@ def item(self):
     """
     Item function
     """
-    return self.numpy()[0]
+    return float(self)
 
 
 @add_tensor_function

diff --git a/apps/drug_target_interaction/batchdta/pairwise/Moltrans/run_pairwise_Moltrans_CV.py b/apps/drug_target_interaction/batchdta/pairwise/Moltrans/run_pairwise_Moltrans_CV.py
@@ -297,7 +297,7 @@ def model_eval(model,val_dataloader,len_SMILES,len_target):
 
         for i_target_score in range(batch_x.shape[0]):
 
-            i_target_len = int(batch_len[i_target_score].numpy()[0])
+            i_target_len = int(batch_len[i_target_score])
             smiles = batch_x_smiles[i_target_score][0:i_target_len]
             target = batch_x_protein[i_target_score][0:i_target_len]
             smiles_mask = batch_x_smiles_mask[i_target_score][0:i_target_len]

diff --git a/apps/drug_target_interaction/batchdta/pairwise/Moltrans/run_pairwise_Moltrans_bindingDB.py b/apps/drug_target_interaction/batchdta/pairwise/Moltrans/run_pairwise_Moltrans_bindingDB.py
@@ -282,7 +282,7 @@ def model_eval(model,val_dataloader,len_SMILES,len_target):
 
         for i_target_score in range(batch_x.shape[0]):
 
-            i_target_len = int(batch_len[i_target_score].numpy()[0])
+            i_target_len = int(batch_len[i_target_score])
             smiles = batch_x_smiles[i_target_score][0:i_target_len]
             target = batch_x_protein[i_target_score][0:i_target_len]
             smiles_mask = batch_x_smiles_mask[i_target_score][0:i_target_len]

diff --git a/apps/drug_target_interaction/batchdta/pointwise/DeepDTA/train_bindingdb.py b/apps/drug_target_interaction/batchdta/pointwise/DeepDTA/train_bindingdb.py
@@ -60,7 +60,7 @@ def training(model, training_loader, optim):
         optim.clear_grad()
         loss.backward()
         optim.step()
-        res_loss = loss.numpy()[0]
+        res_loss = float(loss)
     return res_loss
 
 

diff --git a/apps/drug_target_interaction/batchdta/pointwise/DeepDTA/train_davis.py b/apps/drug_target_interaction/batchdta/pointwise/DeepDTA/train_davis.py
@@ -60,7 +60,7 @@ def training(model, training_loader, optim):
         optim.clear_grad()
         loss.backward()
         optim.step()
-        res_loss = loss.numpy()[0]
+        res_loss = float(loss)
     return res_loss
 
 

diff --git a/apps/drug_target_interaction/batchdta/pointwise/DeepDTA/train_kiba.py b/apps/drug_target_interaction/batchdta/pointwise/DeepDTA/train_kiba.py
@@ -63,7 +63,7 @@ def training(model, training_loader, optim):
         optim.clear_grad()
         loss.backward()
         optim.step()
-        res_loss = loss.numpy()[0]
+        res_loss = float(loss.numpy())
     return res_loss
 
 

diff --git a/apps/drug_target_interaction/batchdta/pointwise/Moltrans/helper/utils/paddle_tensor.py b/apps/drug_target_interaction/batchdta/pointwise/Moltrans/helper/utils/paddle_tensor.py
@@ -32,7 +32,7 @@ def item(self):
     """
     Item function
     """
-    return self.numpy()[0]
+    return float(self)
 
 
 @add_tensor_function

diff --git a/apps/drug_target_interaction/moltrans_dti/helper/utils/paddle_tensor.py b/apps/drug_target_interaction/moltrans_dti/helper/utils/paddle_tensor.py
@@ -32,7 +32,7 @@ def item(self):
     """
     Item function
     """
-    return self.numpy()[0]
+    return float(self.numpy())
 
 
 @add_tensor_function

diff --git a/apps/fewshot_molecular_property/chem_lib/models/trainer.py b/apps/fewshot_molecular_property/chem_lib/models/trainer.py
@@ -294,7 +294,7 @@ def train_step(self):
             losses_eval.backward()
             self.optimizer.step()
 
-            print('Train Epoch:',self.train_epoch,', train update step:', k, ', loss_eval:', losses_eval.numpy()[0])
+            print('Train Epoch:',self.train_epoch,', train update step:', k, ', loss_eval:', float(losses_eval))
 
         return self.model.layers
 

diff --git a/apps/molecular_generation/SD_VAE/train_zinc.py b/apps/molecular_generation/SD_VAE/train_zinc.py
@@ -122,9 +122,9 @@ def _train_epoch(model, data_loader, epoch, kl_weight, optimizer=None):
             optimizer.clear_grad()
 
         # Log
-        kl_loss_values.append(kl_loss.numpy()[0])
-        perplexity_loss_values.append(perplexity.numpy()[0])
-        loss_values.append(loss.numpy()[0])
+        kl_loss_values.append(float(kl_loss))
+        perplexity_loss_values.append(float(perplexity))
+        loss_values.append(float(loss))
         lr = (optimizer.get_lr()
                   if optimizer is not None
                   else 0)

diff --git a/apps/pretrained_compound/ChemRL/GEM-2/src/paddle_utils.py b/apps/pretrained_compound/ChemRL/GEM-2/src/paddle_utils.py
@@ -37,8 +37,8 @@ def dist_mean(array, distributed=False):
     n = len(array)
     x_sum = 0 if n == 0 else np.sum(array)
     if distributed:
-        n = dist_all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
-        x_sum = dist_all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
+        n = int(dist_all_reduce(paddle.to_tensor(n, dtype='int64')))
+        x_sum = float(dist_all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
     x_mean = 0 if n == 0 else x_sum / n
     return x_mean
 
@@ -47,14 +47,14 @@ def dist_sum(array, distributed=False):
     n = len(array)
     x_sum = 0 if n == 0 else np.sum(array)
     if distributed:
-        x_sum = dist_all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
+        x_sum = float(dist_all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
     return x_sum
 
 
 def dist_length(array, distributed=False):
     n = len(array)
     if distributed:
-        n = dist_all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
+        n = int(dist_all_reduce(paddle.to_tensor(n, dtype='int64')))
     return n
 
 

diff --git a/apps/pretrained_compound/ChemRL/GEM-2/train_gem2.py b/apps/pretrained_compound/ChemRL/GEM-2/train_gem2.py
@@ -80,7 +80,7 @@ def get_train_steps_per_epoch(dataset_len, args):
         min_data_len = paddle.to_tensor(dataset_len)
         from paddle.distributed import ReduceOp
         dist.all_reduce(min_data_len, ReduceOp.MIN)
-        dataset_len = min_data_len.numpy()[0]
+        dataset_len = int(min_data_len)
         logging.info(f'min dataset len: {dataset_len}')
     return int(dataset_len / args.batch_size) - 5
 

diff --git a/apps/protein_folding/helixfold-single/helixfold_single_inference.py b/apps/protein_folding/helixfold-single/helixfold_single_inference.py
@@ -113,7 +113,7 @@ def main(args):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument("--init_model", type=str, help='tape + af2 stacked model')
+    parser.add_argument("--init_model", type=str, help='path to pretrained model')
     parser.add_argument("--fasta_file", type=str, help='path to fasta file to be predicted')
     parser.add_argument("--output_dir", type=str, help='path to prediction outputs')
     args = parser.parse_args()

diff --git a/apps/protein_folding/helixfold-single/tape/others/utils.py b/apps/protein_folding/helixfold-single/tape/others/utils.py
@@ -48,8 +48,8 @@ def dist_all_reduce(x, return_num=False, distributed=False):
     n = len(x)
     x_sum = 0 if n == 0 else np.sum(x)
     if distributed:
-        n = dist.all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
-        x_sum = dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
+        n = int(dist.all_reduce(paddle.to_tensor(n, dtype='int64')))
+        x_sum = float(dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
     x_mean = 0 if n == 0 else x_sum / n
     if return_num:
         return x_mean, n
@@ -62,8 +62,8 @@ def dist_mean(x, distributed=False):
     n = len(x)
     x_sum = 0 if n == 0 else np.sum(x)
     if distributed:
-        n = dist.all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
-        x_sum = dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
+        n = int(dist.all_reduce(paddle.to_tensor(n, dtype='int64')))
+        x_sum = float(dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
     x_mean = 0 if n == 0 else x_sum / n
     return x_mean
 
@@ -73,15 +73,15 @@ def dist_sum(x, distributed=False):
     n = len(x)
     x_sum = 0 if n == 0 else np.sum(x)
     if distributed:
-        x_sum = dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
+        x_sum = float(dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
     return x_sum
 
 
 def dist_length(x, distributed=False):
     """tbd"""
     n = len(x)
     if distributed:
-        n = dist.all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
+        n = int(dist.all_reduce(paddle.to_tensor(n, dtype='int64')))
     return n
 
 

diff --git a/apps/protein_folding/helixfold/README_inference.md b/apps/protein_folding/helixfold/README_inference.md
@@ -6,7 +6,7 @@ Python dependencies available through `pip` is provided in `requirements.txt`. H
 
 We provide a script `setup_env` that setup a `conda` environment and installs all dependencies. You can change the name of the environment and CUDA version in `setup_env`. Run:
 ```bash
-wget https://paddle-wheel.bj.bcebos.com/develop/linux/linux-gpu-cuda11.2-cudnn8-mkl-gcc8.2-avx/paddlepaddle_gpu-0.0.0.post112-cp37-cp37m-linux_x86_64.whl
+wget https://baidu-nlp.bj.bcebos.com/PaddleHelix/HelixFold/paddlepaddle_gpu-2.4.1-cp37-cp37m-linux_x86_64.whl
 sh setup_env
 conda activate helixfold # activate the conda environment
 ```

diff --git a/apps/protein_folding/helixfold/README_train.md b/apps/protein_folding/helixfold/README_train.md
@@ -14,7 +14,7 @@ To reproduce the results reported in our paper, specific environment settings ar
 ## Installation
 PaddlePaddle `dev` package is required to run HelixFold. Script `setup_env` is used to setup the `conda` environment, installing all dependencies. Locate to the directory of `helixfold` and run:
 ```bash
-wget https://paddle-wheel.bj.bcebos.com/develop/linux/linux-gpu-cuda11.2-cudnn8-mkl-gcc8.2-avx/paddlepaddle_gpu-0.0.0.post112-cp37-cp37m-linux_x86_64.whl
+wget https://baidu-nlp.bj.bcebos.com/PaddleHelix/HelixFold/paddlepaddle_gpu-2.4.1-cp37-cp37m-linux_x86_64.whl
 sh setup_env
 conda activate helixfold # activate the conda environment
 ```

diff --git a/apps/protein_folding/helixfold/alphafold_paddle/data/pipeline.py b/apps/protein_folding/helixfold/alphafold_paddle/data/pipeline.py
@@ -157,6 +157,8 @@ def process(self, input_fasta_path: str, msa_output_dir: str) -> FeatureDict:
     hhsearch_hits = parsers.parse_hhr(hhsearch_result)
     mgnify_msa = mgnify_msa[:self.mgnify_max_hits]
     mgnify_deletion_matrix = mgnify_deletion_matrix[:self.mgnify_max_hits]
+    uniref90_msa = uniref90_msa[:self.uniref_max_hits]
+    uniref90_deletion_matrix = uniref90_deletion_matrix[:self.uniref_max_hits]
 
     if self._use_small_bfd:
       jackhmmer_small_bfd_result = self.jackhmmer_small_bfd_runner.query(

diff --git a/apps/protein_folding/helixfold/alphafold_paddle/data/utils.py b/apps/protein_folding/helixfold/alphafold_paddle/data/utils.py
@@ -114,13 +114,16 @@ def load_labels(cif_path: str, pdb_id: str, chain_id: str = 'A') -> FeatureDict:
 
 # keys that should be ignored when conducting crop & pad
 def is_ignored_key(k):
+    """tbd."""
     return k in ignored_keys
 
 # keys that have batch dim, e.g. msa features which have shape [N_msa, N_res, ...]
 def is_batched_key(k):
+    """tbd."""
     return k in batched_keys
 
 def align_feat(feat, size):
+    """Align feature."""
     # get num res from aatype
     assert 'aatype' in feat.keys(), \
         "'aatype' missing from batch, which is not expected."
@@ -148,7 +151,32 @@ def pad(key, array, start_axis, align_size, num_res):
     return feat
 
 
+def align_label(label, size):
+    """Align label."""
+    num_res = label['all_atom_mask'].shape[1]
+
+    if num_res % size != 0:
+        align_size = (num_res // size + 1) * size
+
+        def pad(key, array, start_axis, align_size, num_res):
+            if is_ignored_key(key):
+                return array
+            d_seq = start_axis      # choose the dim to crop / pad
+            if is_batched_key(key):
+                d_seq += 1
+            pad_shape = list(array.shape)
+            pad_shape[d_seq] = align_size - num_res
+            pad_array = paddle.zeros(pad_shape, dtype=array.dtype)
+            array = paddle.concat([array, pad_array], axis=d_seq)
+            return array
+
+        label = {k: pad(k, v, 1, align_size, num_res) for k, v in label.items()}
+
+    return label
+
+
 def unpad_prediction(feat, pred):
+    """Unpad prediction."""
     unpad_pred = deepcopy(pred)
     n = feat['aatype'].shape[0]
 

diff --git a/apps/protein_folding/helixfold/gpu_infer.sh b/apps/protein_folding/helixfold/gpu_infer.sh
@@ -58,6 +58,7 @@ else
           --model_names=${MODELS} \
           --output_dir=${OUTPUT_DIR} \
           --disable_amber_relax \
+          --seed 2022 \
           --preset='reduced_dbs' \
           --random_seed=0 \
           ${@:2}

diff --git a/apps/protein_folding/helixfold/requirements.txt b/apps/protein_folding/helixfold/requirements.txt
@@ -13,4 +13,4 @@ scipy==1.7.0
 tensorflow-cpu==2.5.0
 tensorboardX==2.5
 etcd3
-./paddlepaddle_gpu-0.0.0.post112-cp37-cp37m-linux_x86_64.whl
+./paddlepaddle_gpu-2.4.1-cp37-cp37m-linux_x86_64.whl
diff --git a/apps/protein_folding/helixfold/train.py b/apps/protein_folding/helixfold/train.py
@@ -39,7 +39,7 @@
 from utils.init_env import init_seed, init_distributed_env
 from utils.misc import TrainLogger, set_logging_level
 from alphafold_paddle.model import config
-from alphafold_paddle.data.utils import align_feat
+from alphafold_paddle.data.utils import align_feat, align_label
 from ppfleetx.distributed.protein_folding import dap, bp, dp
 from ppfleetx.distributed.protein_folding.scg import scg
 
@@ -164,6 +164,7 @@ def eval(args, model, eval_dataset, compute_loss, cache_dir=None):
         s1 = time_me()
         if args.dap_degree > 1:
             batch['feat'] = align_feat(batch['feat'], args.dap_degree)
+            batch['label'] = align_label(batch['label'], args.dap_degree)
 
         res = model(batch, compute_loss=compute_loss)
         if compute_loss:

diff --git a/apps/protein_folding/helixfold/utils/metric.py b/apps/protein_folding/helixfold/utils/metric.py
@@ -30,8 +30,8 @@ def dist_all_reduce(x, return_num=False, distributed=False):
     x_num = len(x)
     x_sum = 0 if x_num == 0 else np.sum(x)
     if distributed:
-        x_num = dp.all_reduce(paddle.to_tensor(x_num, dtype='int64')).numpy()[0]
-        x_sum = dp.all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
+        x_num = int(dp.all_reduce(paddle.to_tensor(x_num, dtype='int64')))
+        x_sum = float(dp.all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
     x_mean = 0 if x_num == 0 else x_sum / x_num
     if return_num:
         return x_mean, x_num

diff --git a/apps/protein_folding/helixfold_cpu/.github/BP_DAP_DP.png b/apps/protein_folding/helixfold_cpu/.github/BP_DAP_DP.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/HelixFold_accuracy.png b/apps/protein_folding/helixfold_cpu/.github/HelixFold_accuracy.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/HelixFold_computational_performance.png b/apps/protein_folding/helixfold_cpu/.github/HelixFold_computational_performance.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/HelixFold_perf.png b/apps/protein_folding/helixfold_cpu/.github/HelixFold_perf.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/HelixFold_perf_compare.png b/apps/protein_folding/helixfold_cpu/.github/HelixFold_perf_compare.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/LIT-PCBA_result.png b/apps/protein_folding/helixfold_cpu/.github/LIT-PCBA_result.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/LinearRNA.jpg b/apps/protein_folding/helixfold_cpu/.github/LinearRNA.jpg
diff --git a/apps/protein_folding/helixfold_cpu/.github/PaddleHelix_Structure.png b/apps/protein_folding/helixfold_cpu/.github/PaddleHelix_Structure.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/helixfold_pipeline.png b/apps/protein_folding/helixfold_cpu/.github/helixfold_pipeline.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/memory_optimize.png b/apps/protein_folding/helixfold_cpu/.github/memory_optimize.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/op_fuse.png b/apps/protein_folding/helixfold_cpu/.github/op_fuse.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/optimus_framework3.png b/apps/protein_folding/helixfold_cpu/.github/optimus_framework3.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/paddlehelix_features.jpg b/apps/protein_folding/helixfold_cpu/.github/paddlehelix_features.jpg
diff --git a/apps/protein_folding/helixfold_cpu/.github/paddlehelix_logo.png b/apps/protein_folding/helixfold_cpu/.github/paddlehelix_logo.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/pcqm4mv2_result.png b/apps/protein_folding/helixfold_cpu/.github/pcqm4mv2_result.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/tensor_fuse.png b/apps/protein_folding/helixfold_cpu/.github/tensor_fuse.png
diff --git a/apps/protein_folding/helixfold_cpu/.github/飞桨-螺旋桨_logo.png b/apps/protein_folding/helixfold_cpu/.github/飞桨-螺旋桨_logo.png
diff --git a/apps/protein_folding/helixfold_cpu/.gitignore b/apps/protein_folding/helixfold_cpu/.gitignore
@@ -0,0 +1,10 @@
+**/*pyc
+**/__pycache__
+*/*/__pycache__
+*/*/*/__pycache__
+*/*/*/scripts
+paddlecloud*
+internal*
+*/internal*
+.DS_Store
+*/.DS_Store