From 0c3846f496aeb5aae405fda8b91174ceb8c8e4ed Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 6 Jan 2024 01:53:30 -0500 Subject: [PATCH] breaking: remove implicit Hs from RDKit SMILES Fix #2087. Signed-off-by: Jinzhe Zeng --- reacnetgenerator/_path.py | 35 +++++++++++++++++++++++++++++++++- reacnetgenerator/_reachtml.py | 36 ++--------------------------------- tests/test.json | 11 +++++++---- tests/test_reacnetgen.py | 10 +++++----- 4 files changed, 48 insertions(+), 44 deletions(-) diff --git a/reacnetgenerator/_path.py b/reacnetgenerator/_path.py index 4fa330549..146117511 100644 --- a/reacnetgenerator/_path.py +++ b/reacnetgenerator/_path.py @@ -19,6 +19,7 @@ """ import itertools +import re from abc import ABCMeta, abstractmethod from collections import Counter, defaultdict @@ -215,6 +216,38 @@ def _printatomroute(self, atomeach, timeaxis=None): allmoleculeroute = np.unique(allmoleculeroute, axis=0) return allmoleculeroute + def _re(self, smi): + """If you use RDkit to convert a methyl radical to SMILES, you will get something + like [H]C([H])[H]. However, OpenBabel will consider it as a methane molecule. So, + you have to use [H][C]([H])[H], if you need to process some radicals. + + Examples + -------- + >>> self._re('C') + [C] + >>> self._re('[C]') + [C] + >>> self._re('[CH]') + [CH] + >>> self._re('Na') + [Na] + >>> self._re('[H]c(Cl)C([H])Cl') + [H][c]([Cl])[C]([H])[Cl] + """ + if "_unknownSMILES" in smi: + # not SMILES + return smi + Satom = sorted(self.atomname, key=len, reverse=True) + elements = "|".join( + [ + ((an.upper() + "|" + an.lower()) if len(an) == 1 else an) + for an in Satom + if an != "H" + ] + ) + smi = re.sub(r"(?>> self._re('C') - [C] - >>> self._re('[C]') - [C] - >>> self._re('[CH]') - [CH] - >>> self._re('Na') - [Na] - >>> self._re('[H]c(Cl)C([H])Cl') - [H][c]([Cl])[C]([H])[Cl] - """ - if "_unknownSMILES" in smi: - # not SMILES - return smi - Satom = sorted(self.atomname, key=lambda i: len(i), reverse=True) - elements = "|".join( - [ - ((an.upper() + "|" + an.lower()) if len(an) == 1 else an) - for an in Satom - if an != "H" - ] - ) - smi = re.sub(r"(?") - left = [self._re(spec) for spec in left.split("+")] - right = [self._re(spec) for spec in right.split("+")] + left = left.split("+") + right = right.split("+") num = int(sx[0]) return left, right, num diff --git a/tests/test.json b/tests/test.json index 52836a9e9..17c65fa0d 100644 --- a/tests/test.json +++ b/tests/test.json @@ -40,7 +40,7 @@ ] ] }, - "reaction_sha256": "5eb802a5a4f5c537ea556528a7db9402ab401f9cbd650e75b2a03e809ab9c8ad" + "reaction_sha256": "12a2c53d81db733405d24dfc67dd390c85195c4011cef53305629e8083a5d2f4" }, { "rngparams": { @@ -64,7 +64,8 @@ "speciescenter": "[H][H]" }, "reaction_sha256": [ - "3ad36fbbbe2f52f4533b42e19e9efa393129d505860f00efb544aac7820caac7" + "7b26f8b66b9134a726e1aadc4adf200f42f5921692d30a1c7867fcf437b1fb88", + "1f3a88d3281b7627bc9bed1a533d7b08f1edd4d7811291d4d07cc5bbf7ac9bd4" ] }, { @@ -88,8 +89,10 @@ "miso": 1 }, "reaction_sha256": [ - "ec826635846cbe4d70a2454056ae973aaaf2c7bb760514017ebd6c804a85b6c2", - "2b1f9276faeabd4dbbd3349a9ddd5ffbcb64ebc11dcf3f16d661c34372f36dca" + "c7d2d7b905d247aaeba36b0c272506ab287c557c5365b7090bd9a04a45e1fc48", + "3d99317c870758bd9d33e29d474d9496643b74bd7b4b07fa1e31afff33dbcd07", + "88e88f1fd4e83f37f560b6786cee0feadf1139bfb7de8551b2946a11e4b767f9", + "e1f445eab53f7208ac9b176a09ac79360999957edbf2e5a9d9e8990bc20b91d7" ] }, { diff --git a/tests/test_reacnetgen.py b/tests/test_reacnetgen.py index fc6f38828..8a50d6ab9 100644 --- a/tests/test_reacnetgen.py +++ b/tests/test_reacnetgen.py @@ -15,7 +15,7 @@ from reacnetgenerator import ReacNetGenerator from reacnetgenerator._detect import _Detect from reacnetgenerator._hmmfilter import _HMMFilter -from reacnetgenerator._reachtml import _HTMLResult +from reacnetgenerator._path import _CollectSMILESPaths from reacnetgenerator.commandline import parm2cmd from reacnetgenerator.gui import GUI from reacnetgenerator.utils import checksha256, download_multifiles, listtobytes @@ -103,17 +103,17 @@ def test_gui_openandrun(self, reacnetgengui, mocker, reacnetgen_param): def test_commandline_help(self, script_runner): """Test commandline of ReacNetGenerator.""" - ret = script_runner.run("reacnetgenerator", "-h") + ret = script_runner.run(["reacnetgenerator", "-h"]) assert ret.success def test_commandline_version(self, script_runner): """Test commandline of ReacNetGenerator.""" - ret = script_runner.run("reacnetgenerator", "--version") + ret = script_runner.run(["reacnetgenerator", "--version"]) assert ret.success def test_commandline_run(self, script_runner, reacnetgen_param): """Test commandline of ReacNetGenerator.""" - ret = script_runner.run(*parm2cmd(reacnetgen_param["rngparams"])) + ret = script_runner.run(parm2cmd(reacnetgen_param["rngparams"])) assert ret.success def test_benchmark_detect(self, benchmark, reacnetgen_param): @@ -146,7 +146,7 @@ def test_benchmark_hmm(self, benchmark, reacnetgen_param): def test_re(self, reacnetgen_param): """Test regular expression of _HTMLResult.""" reacnetgen = ReacNetGenerator(**reacnetgen_param["rngparams"]) - r = _HTMLResult(reacnetgen) + r = _CollectSMILESPaths(reacnetgen) r.atomname = ["C", "H", "O", "Na", "Cl"] assert r._re("C"), "[C]" assert r._re("[C]"), "[C]"