Skip to content

Commit

Permalink
Made SMILES parser keep hydrogen information if available.
Browse files Browse the repository at this point in the history
When parsing SMILES into RDKit `Mol`s, KPM now retains informaiton about hydrogen atoms, allowing them to remain atom-mapped.
  • Loading branch information
joegilkes committed Jan 19, 2024
1 parent 87d4f66 commit 03368c5
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions KPM/utils/data_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from numpy.typing import ArrayLike
from typing import Tuple
from rdkit import Chem
from rdkit.Chem import rdmolfiles
from sklearn.model_selection import RepeatedKFold, train_test_split, ShuffleSplit
from sklearn.utils import shuffle

Expand Down Expand Up @@ -57,31 +58,34 @@ def extract_data(ea: list, dh: list, rs: list, ps: list, num_reacs: int, train_d
rmol = []
pmol = []

smiles_params = rdmolfiles.SmilesParserParams()
smiles_params.removeHs = False

# Rework arrays based on train_direction
if train_direction == 'forward':
for i in range(num_reacs):
Eact[i] = ea[i]
dH[i] = dh[i]
rmol.append(Chem.MolFromSmiles(rs[i]))
pmol.append(Chem.MolFromSmiles(ps[i]))
rmol.append(Chem.MolFromSmiles(rs[i], smiles_params))
pmol.append(Chem.MolFromSmiles(ps[i], smiles_params))
elif train_direction == 'backward':
for i in range(num_reacs):
Eact[i] = ea[i] - dh[i]
dH[i] = -dh[i]
rmol.append(Chem.MolFromSmiles(ps[i]))
pmol.append(Chem.MolFromSmiles(rs[i]))
rmol.append(Chem.MolFromSmiles(ps[i], smiles_params))
pmol.append(Chem.MolFromSmiles(rs[i], smiles_params))
elif train_direction == 'both':
half_reacs = int(num_reacs/2)
for i in range(half_reacs):
Eact[i] = ea[i]
dH[i] = dh[i]
rmol.append(Chem.MolFromSmiles(rs[i]))
pmol.append(Chem.MolFromSmiles(ps[i]))
rmol.append(Chem.MolFromSmiles(rs[i], smiles_params))
pmol.append(Chem.MolFromSmiles(ps[i], smiles_params))
for i in range(half_reacs):
Eact[i+half_reacs] = ea[i] - dh[i]
dH[i+half_reacs] = -dh[i]
rmol.append(Chem.MolFromSmiles(ps[i]))
pmol.append(Chem.MolFromSmiles(rs[i]))
rmol.append(Chem.MolFromSmiles(ps[i], smiles_params))
pmol.append(Chem.MolFromSmiles(rs[i], smiles_params))

return Eact, dH, rmol, pmol

Expand Down

0 comments on commit 03368c5

Please sign in to comment.