Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update the first version of Helixfold cpu onto helixfold_cpu branch #246

Open
wants to merge 9 commits into
base: helixfold_cpu
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/drug_drug_synergy/RGCN/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def train(num_subgraph, graph, label_idx, epochs, sub_neighbours=[10, 10], init=
fpr, tpr, _ = roc_curve(y_true=ground_truth, y_score=pred_prob)
auc_v = auc(fpr, tpr)
print("sub_graph index : {} | epoch: {} | training loss: {:.4f} | AUC: {:.3f}".format(
sub_g, epoch, train_loss.numpy()[0], auc_v))
sub_g, epoch, float(train_loss), auc_v))

return model

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def model_eval(model,val_dataloader):

for i_target_score in range(batch_smiles.shape[0]):

i_target_len = int(batch_len[i_target_score].numpy()[0])
i_target_len = int(batch_len[i_target_score])
smiles = batch_smiles[i_target_score][0:i_target_len]
target = batch_protein[i_target_score][0:i_target_len]
y_label = batch_y[i_target_score][0:i_target_len].numpy()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@ def model_eval(model,val_dataloader,device):
i_data = i_data.to(device)
pred_scores = model.forward_single(i_data)
# get the predicted labels
i_target_pred_scores.append(pred_scores.cpu().numpy()[0])
i_target_pred_scores.append(float(pred_scores))
# get the true labels
i_target_y_label.append(i_data.y.cpu().numpy()[0])
i_target_y_label.append(float(i_data.y.cpu()))

i_target_pred_scores = np.array(i_target_pred_scores)
i_target_y_label = np.array(i_target_y_label)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def item(self):
"""
Item function
"""
return self.numpy()[0]
return float(self)


@add_tensor_function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def model_eval(model,val_dataloader,len_SMILES,len_target):

for i_target_score in range(batch_x.shape[0]):

i_target_len = int(batch_len[i_target_score].numpy()[0])
i_target_len = int(batch_len[i_target_score])
smiles = batch_x_smiles[i_target_score][0:i_target_len]
target = batch_x_protein[i_target_score][0:i_target_len]
smiles_mask = batch_x_smiles_mask[i_target_score][0:i_target_len]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def model_eval(model,val_dataloader,len_SMILES,len_target):

for i_target_score in range(batch_x.shape[0]):

i_target_len = int(batch_len[i_target_score].numpy()[0])
i_target_len = int(batch_len[i_target_score])
smiles = batch_x_smiles[i_target_score][0:i_target_len]
target = batch_x_protein[i_target_score][0:i_target_len]
smiles_mask = batch_x_smiles_mask[i_target_score][0:i_target_len]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def training(model, training_loader, optim):
optim.clear_grad()
loss.backward()
optim.step()
res_loss = loss.numpy()[0]
res_loss = float(loss)
return res_loss


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def training(model, training_loader, optim):
optim.clear_grad()
loss.backward()
optim.step()
res_loss = loss.numpy()[0]
res_loss = float(loss)
return res_loss


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def training(model, training_loader, optim):
optim.clear_grad()
loss.backward()
optim.step()
res_loss = loss.numpy()[0]
res_loss = float(loss.numpy())
return res_loss


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def item(self):
"""
Item function
"""
return self.numpy()[0]
return float(self)


@add_tensor_function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def item(self):
"""
Item function
"""
return self.numpy()[0]
return float(self.numpy())


@add_tensor_function
Expand Down
2 changes: 1 addition & 1 deletion apps/fewshot_molecular_property/chem_lib/models/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def train_step(self):
losses_eval.backward()
self.optimizer.step()

print('Train Epoch:',self.train_epoch,', train update step:', k, ', loss_eval:', losses_eval.numpy()[0])
print('Train Epoch:',self.train_epoch,', train update step:', k, ', loss_eval:', float(losses_eval))

return self.model.layers

Expand Down
6 changes: 3 additions & 3 deletions apps/molecular_generation/SD_VAE/train_zinc.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ def _train_epoch(model, data_loader, epoch, kl_weight, optimizer=None):
optimizer.clear_grad()

# Log
kl_loss_values.append(kl_loss.numpy()[0])
perplexity_loss_values.append(perplexity.numpy()[0])
loss_values.append(loss.numpy()[0])
kl_loss_values.append(float(kl_loss))
perplexity_loss_values.append(float(perplexity))
loss_values.append(float(loss))
lr = (optimizer.get_lr()
if optimizer is not None
else 0)
Expand Down
8 changes: 4 additions & 4 deletions apps/pretrained_compound/ChemRL/GEM-2/src/paddle_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def dist_mean(array, distributed=False):
n = len(array)
x_sum = 0 if n == 0 else np.sum(array)
if distributed:
n = dist_all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
x_sum = dist_all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
n = int(dist_all_reduce(paddle.to_tensor(n, dtype='int64')))
x_sum = float(dist_all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
x_mean = 0 if n == 0 else x_sum / n
return x_mean

Expand All @@ -47,14 +47,14 @@ def dist_sum(array, distributed=False):
n = len(array)
x_sum = 0 if n == 0 else np.sum(array)
if distributed:
x_sum = dist_all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
x_sum = float(dist_all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
return x_sum


def dist_length(array, distributed=False):
n = len(array)
if distributed:
n = dist_all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
n = int(dist_all_reduce(paddle.to_tensor(n, dtype='int64')))
return n


Expand Down
2 changes: 1 addition & 1 deletion apps/pretrained_compound/ChemRL/GEM-2/train_gem2.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def get_train_steps_per_epoch(dataset_len, args):
min_data_len = paddle.to_tensor(dataset_len)
from paddle.distributed import ReduceOp
dist.all_reduce(min_data_len, ReduceOp.MIN)
dataset_len = min_data_len.numpy()[0]
dataset_len = int(min_data_len)
logging.info(f'min dataset len: {dataset_len}')
return int(dataset_len / args.batch_size) - 5

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def main(args):

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--init_model", type=str, help='tape + af2 stacked model')
parser.add_argument("--init_model", type=str, help='path to pretrained model')
parser.add_argument("--fasta_file", type=str, help='path to fasta file to be predicted')
parser.add_argument("--output_dir", type=str, help='path to prediction outputs')
args = parser.parse_args()
Expand Down
12 changes: 6 additions & 6 deletions apps/protein_folding/helixfold-single/tape/others/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def dist_all_reduce(x, return_num=False, distributed=False):
n = len(x)
x_sum = 0 if n == 0 else np.sum(x)
if distributed:
n = dist.all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
x_sum = dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
n = int(dist.all_reduce(paddle.to_tensor(n, dtype='int64')))
x_sum = float(dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
x_mean = 0 if n == 0 else x_sum / n
if return_num:
return x_mean, n
Expand All @@ -62,8 +62,8 @@ def dist_mean(x, distributed=False):
n = len(x)
x_sum = 0 if n == 0 else np.sum(x)
if distributed:
n = dist.all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
x_sum = dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
n = int(dist.all_reduce(paddle.to_tensor(n, dtype='int64')))
x_sum = float(dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
x_mean = 0 if n == 0 else x_sum / n
return x_mean

Expand All @@ -73,15 +73,15 @@ def dist_sum(x, distributed=False):
n = len(x)
x_sum = 0 if n == 0 else np.sum(x)
if distributed:
x_sum = dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
x_sum = float(dist.all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
return x_sum


def dist_length(x, distributed=False):
"""tbd"""
n = len(x)
if distributed:
n = dist.all_reduce(paddle.to_tensor(n, dtype='int64')).numpy()[0]
n = int(dist.all_reduce(paddle.to_tensor(n, dtype='int64')))
return n


Expand Down
2 changes: 1 addition & 1 deletion apps/protein_folding/helixfold/README_inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Python dependencies available through `pip` is provided in `requirements.txt`. H

We provide a script `setup_env` that setup a `conda` environment and installs all dependencies. You can change the name of the environment and CUDA version in `setup_env`. Run:
```bash
wget https://paddle-wheel.bj.bcebos.com/develop/linux/linux-gpu-cuda11.2-cudnn8-mkl-gcc8.2-avx/paddlepaddle_gpu-0.0.0.post112-cp37-cp37m-linux_x86_64.whl
wget https://baidu-nlp.bj.bcebos.com/PaddleHelix/HelixFold/paddlepaddle_gpu-2.4.1-cp37-cp37m-linux_x86_64.whl
sh setup_env
conda activate helixfold # activate the conda environment
```
Expand Down
2 changes: 1 addition & 1 deletion apps/protein_folding/helixfold/README_train.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ To reproduce the results reported in our paper, specific environment settings ar
## Installation
PaddlePaddle `dev` package is required to run HelixFold. Script `setup_env` is used to setup the `conda` environment, installing all dependencies. Locate to the directory of `helixfold` and run:
```bash
wget https://paddle-wheel.bj.bcebos.com/develop/linux/linux-gpu-cuda11.2-cudnn8-mkl-gcc8.2-avx/paddlepaddle_gpu-0.0.0.post112-cp37-cp37m-linux_x86_64.whl
wget https://baidu-nlp.bj.bcebos.com/PaddleHelix/HelixFold/paddlepaddle_gpu-2.4.1-cp37-cp37m-linux_x86_64.whl
sh setup_env
conda activate helixfold # activate the conda environment
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ def process(self, input_fasta_path: str, msa_output_dir: str) -> FeatureDict:
hhsearch_hits = parsers.parse_hhr(hhsearch_result)
mgnify_msa = mgnify_msa[:self.mgnify_max_hits]
mgnify_deletion_matrix = mgnify_deletion_matrix[:self.mgnify_max_hits]
uniref90_msa = uniref90_msa[:self.uniref_max_hits]
uniref90_deletion_matrix = uniref90_deletion_matrix[:self.uniref_max_hits]

if self._use_small_bfd:
jackhmmer_small_bfd_result = self.jackhmmer_small_bfd_runner.query(
Expand Down
28 changes: 28 additions & 0 deletions apps/protein_folding/helixfold/alphafold_paddle/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,16 @@ def load_labels(cif_path: str, pdb_id: str, chain_id: str = 'A') -> FeatureDict:

# keys that should be ignored when conducting crop & pad
def is_ignored_key(k):
"""tbd."""
return k in ignored_keys

# keys that have batch dim, e.g. msa features which have shape [N_msa, N_res, ...]
def is_batched_key(k):
"""tbd."""
return k in batched_keys

def align_feat(feat, size):
"""Align feature."""
# get num res from aatype
assert 'aatype' in feat.keys(), \
"'aatype' missing from batch, which is not expected."
Expand Down Expand Up @@ -148,7 +151,32 @@ def pad(key, array, start_axis, align_size, num_res):
return feat


def align_label(label, size):
"""Align label."""
num_res = label['all_atom_mask'].shape[1]

if num_res % size != 0:
align_size = (num_res // size + 1) * size

def pad(key, array, start_axis, align_size, num_res):
if is_ignored_key(key):
return array
d_seq = start_axis # choose the dim to crop / pad
if is_batched_key(key):
d_seq += 1
pad_shape = list(array.shape)
pad_shape[d_seq] = align_size - num_res
pad_array = paddle.zeros(pad_shape, dtype=array.dtype)
array = paddle.concat([array, pad_array], axis=d_seq)
return array

label = {k: pad(k, v, 1, align_size, num_res) for k, v in label.items()}

return label


def unpad_prediction(feat, pred):
"""Unpad prediction."""
unpad_pred = deepcopy(pred)
n = feat['aatype'].shape[0]

Expand Down
1 change: 1 addition & 0 deletions apps/protein_folding/helixfold/gpu_infer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ else
--model_names=${MODELS} \
--output_dir=${OUTPUT_DIR} \
--disable_amber_relax \
--seed 2022 \
--preset='reduced_dbs' \
--random_seed=0 \
${@:2}
Expand Down
2 changes: 1 addition & 1 deletion apps/protein_folding/helixfold/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ scipy==1.7.0
tensorflow-cpu==2.5.0
tensorboardX==2.5
etcd3
./paddlepaddle_gpu-0.0.0.post112-cp37-cp37m-linux_x86_64.whl
./paddlepaddle_gpu-2.4.1-cp37-cp37m-linux_x86_64.whl
3 changes: 2 additions & 1 deletion apps/protein_folding/helixfold/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from utils.init_env import init_seed, init_distributed_env
from utils.misc import TrainLogger, set_logging_level
from alphafold_paddle.model import config
from alphafold_paddle.data.utils import align_feat
from alphafold_paddle.data.utils import align_feat, align_label
from ppfleetx.distributed.protein_folding import dap, bp, dp
from ppfleetx.distributed.protein_folding.scg import scg

Expand Down Expand Up @@ -164,6 +164,7 @@ def eval(args, model, eval_dataset, compute_loss, cache_dir=None):
s1 = time_me()
if args.dap_degree > 1:
batch['feat'] = align_feat(batch['feat'], args.dap_degree)
batch['label'] = align_label(batch['label'], args.dap_degree)

res = model(batch, compute_loss=compute_loss)
if compute_loss:
Expand Down
4 changes: 2 additions & 2 deletions apps/protein_folding/helixfold/utils/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def dist_all_reduce(x, return_num=False, distributed=False):
x_num = len(x)
x_sum = 0 if x_num == 0 else np.sum(x)
if distributed:
x_num = dp.all_reduce(paddle.to_tensor(x_num, dtype='int64')).numpy()[0]
x_sum = dp.all_reduce(paddle.to_tensor(x_sum, dtype='float32')).numpy()[0]
x_num = int(dp.all_reduce(paddle.to_tensor(x_num, dtype='int64')))
x_sum = float(dp.all_reduce(paddle.to_tensor(x_sum, dtype='float32')))
x_mean = 0 if x_num == 0 else x_sum / x_num
if return_num:
return x_mean, x_num
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions apps/protein_folding/helixfold_cpu/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
**/*pyc
**/__pycache__
*/*/__pycache__
*/*/*/__pycache__
*/*/*/scripts
paddlecloud*
internal*
*/internal*
.DS_Store
*/.DS_Store
Loading