Skip to content

Commit

Permalink
breaking: remove implicit Hs from RDKit SMILES
Browse files Browse the repository at this point in the history
Fix #2087.

Signed-off-by: Jinzhe Zeng <[email protected]>
  • Loading branch information
njzjz committed Jan 6, 2024
1 parent d250806 commit 0c3846f
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 44 deletions.
35 changes: 34 additions & 1 deletion reacnetgenerator/_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"""

import itertools
import re
from abc import ABCMeta, abstractmethod
from collections import Counter, defaultdict

Expand Down Expand Up @@ -215,6 +216,38 @@ def _printatomroute(self, atomeach, timeaxis=None):
allmoleculeroute = np.unique(allmoleculeroute, axis=0)
return allmoleculeroute

def _re(self, smi):
"""If you use RDkit to convert a methyl radical to SMILES, you will get something
like [H]C([H])[H]. However, OpenBabel will consider it as a methane molecule. So,
you have to use [H][C]([H])[H], if you need to process some radicals.
Examples
--------
>>> self._re('C')
[C]
>>> self._re('[C]')
[C]
>>> self._re('[CH]')
[CH]
>>> self._re('Na')
[Na]
>>> self._re('[H]c(Cl)C([H])Cl')
[H][c]([Cl])[C]([H])[Cl]
"""
if "_unknownSMILES" in smi:
# not SMILES
return smi

Check warning on line 239 in reacnetgenerator/_path.py

View check run for this annotation

Codecov / codecov/patch

reacnetgenerator/_path.py#L239

Added line #L239 was not covered by tests
Satom = sorted(self.atomname, key=len, reverse=True)
elements = "|".join(
[
((an.upper() + "|" + an.lower()) if len(an) == 1 else an)
for an in Satom
if an != "H"
]
)
smi = re.sub(r"(?<!\[)(" + elements + r")(?!H)", r"[\1]", smi)
return smi.replace("[HH]", "[H]")

def convertSMILES(self, atoms, bonds):
"""Convert atoms and bonds information to SMILES.
Expand All @@ -230,7 +263,7 @@ def convertSMILES(self, atoms, bonds):
for atom1, atom2, level in bonds:
m.AddBond(d[atom1], d[atom2], Chem.BondType(level)) # type: ignore
name = Chem.MolToSmiles(m) # type: ignore
return name
return self._re(name)

def _getatomsandbonds(self, line):
atoms = np.array(bytestolist(line[0]), dtype=int)
Expand Down
36 changes: 2 additions & 34 deletions reacnetgenerator/_reachtml.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,43 +66,11 @@ def report(self):
f"Report is generated. Please see {self.resultfilename} for more details."
)

def _re(self, smi):
"""If you use RDkit to convert a methyl radical to SMILES, you will get something
like [H]C([H])[H]. However, OpenBabel will consider it as a methane molecule. So,
you have to use [H][C]([H])[H], if you need to process some radicals.
Examples
--------
>>> self._re('C')
[C]
>>> self._re('[C]')
[C]
>>> self._re('[CH]')
[CH]
>>> self._re('Na')
[Na]
>>> self._re('[H]c(Cl)C([H])Cl')
[H][c]([Cl])[C]([H])[Cl]
"""
if "_unknownSMILES" in smi:
# not SMILES
return smi
Satom = sorted(self.atomname, key=lambda i: len(i), reverse=True)
elements = "|".join(
[
((an.upper() + "|" + an.lower()) if len(an) == 1 else an)
for an in Satom
if an != "H"
]
)
smi = re.sub(r"(?<!\[)(" + elements + r")(?!H)", r"[\1]", smi)
return smi.replace("[HH]", "[H]")

def _handlereaction(self, line):
sx = line.split()
left, right = sx[1].split("->")
left = [self._re(spec) for spec in left.split("+")]
right = [self._re(spec) for spec in right.split("+")]
left = left.split("+")
right = right.split("+")
num = int(sx[0])
return left, right, num

Expand Down
11 changes: 7 additions & 4 deletions tests/test.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
]
]
},
"reaction_sha256": "5eb802a5a4f5c537ea556528a7db9402ab401f9cbd650e75b2a03e809ab9c8ad"
"reaction_sha256": "12a2c53d81db733405d24dfc67dd390c85195c4011cef53305629e8083a5d2f4"
},
{
"rngparams": {
Expand All @@ -64,7 +64,8 @@
"speciescenter": "[H][H]"
},
"reaction_sha256": [
"3ad36fbbbe2f52f4533b42e19e9efa393129d505860f00efb544aac7820caac7"
"7b26f8b66b9134a726e1aadc4adf200f42f5921692d30a1c7867fcf437b1fb88",
"1f3a88d3281b7627bc9bed1a533d7b08f1edd4d7811291d4d07cc5bbf7ac9bd4"
]
},
{
Expand All @@ -88,8 +89,10 @@
"miso": 1
},
"reaction_sha256": [
"ec826635846cbe4d70a2454056ae973aaaf2c7bb760514017ebd6c804a85b6c2",
"2b1f9276faeabd4dbbd3349a9ddd5ffbcb64ebc11dcf3f16d661c34372f36dca"
"c7d2d7b905d247aaeba36b0c272506ab287c557c5365b7090bd9a04a45e1fc48",
"3d99317c870758bd9d33e29d474d9496643b74bd7b4b07fa1e31afff33dbcd07",
"88e88f1fd4e83f37f560b6786cee0feadf1139bfb7de8551b2946a11e4b767f9",
"e1f445eab53f7208ac9b176a09ac79360999957edbf2e5a9d9e8990bc20b91d7"
]
},
{
Expand Down
10 changes: 5 additions & 5 deletions tests/test_reacnetgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from reacnetgenerator import ReacNetGenerator
from reacnetgenerator._detect import _Detect
from reacnetgenerator._hmmfilter import _HMMFilter
from reacnetgenerator._reachtml import _HTMLResult
from reacnetgenerator._path import _CollectSMILESPaths
from reacnetgenerator.commandline import parm2cmd
from reacnetgenerator.gui import GUI
from reacnetgenerator.utils import checksha256, download_multifiles, listtobytes
Expand Down Expand Up @@ -103,17 +103,17 @@ def test_gui_openandrun(self, reacnetgengui, mocker, reacnetgen_param):

def test_commandline_help(self, script_runner):
"""Test commandline of ReacNetGenerator."""
ret = script_runner.run("reacnetgenerator", "-h")
ret = script_runner.run(["reacnetgenerator", "-h"])
assert ret.success

def test_commandline_version(self, script_runner):
"""Test commandline of ReacNetGenerator."""
ret = script_runner.run("reacnetgenerator", "--version")
ret = script_runner.run(["reacnetgenerator", "--version"])
assert ret.success

def test_commandline_run(self, script_runner, reacnetgen_param):
"""Test commandline of ReacNetGenerator."""
ret = script_runner.run(*parm2cmd(reacnetgen_param["rngparams"]))
ret = script_runner.run(parm2cmd(reacnetgen_param["rngparams"]))
assert ret.success

def test_benchmark_detect(self, benchmark, reacnetgen_param):
Expand Down Expand Up @@ -146,7 +146,7 @@ def test_benchmark_hmm(self, benchmark, reacnetgen_param):
def test_re(self, reacnetgen_param):
"""Test regular expression of _HTMLResult."""
reacnetgen = ReacNetGenerator(**reacnetgen_param["rngparams"])
r = _HTMLResult(reacnetgen)
r = _CollectSMILESPaths(reacnetgen)
r.atomname = ["C", "H", "O", "Na", "Cl"]
assert r._re("C"), "[C]"
assert r._re("[C]"), "[C]"
Expand Down

0 comments on commit 0c3846f

Please sign in to comment.