From f0ec66abfabaa14301976546188ce720e6534175 Mon Sep 17 00:00:00 2001 From: "Federico E. Benelli" Date: Tue, 23 Apr 2024 14:30:30 -0300 Subject: [PATCH 1/3] feat(cache): cache the call to RDKit, gives a better interactive experience --- ugropy/core/get_rdkit_object.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ugropy/core/get_rdkit_object.py b/ugropy/core/get_rdkit_object.py index 6880d21..dabb4c6 100644 --- a/ugropy/core/get_rdkit_object.py +++ b/ugropy/core/get_rdkit_object.py @@ -1,5 +1,6 @@ """get_rdkit_object module.""" +from functools import cache from typing import Union import pubchempy as pcp @@ -7,6 +8,7 @@ from rdkit import Chem +@cache def instantiate_mol_object( identifier: Union[str, Chem.rdchem.Mol], identifier_type: str = "name" ) -> Chem.rdchem.Mol: From 64a339869350faa69a2908336d3893b8815906f7 Mon Sep 17 00:00:00 2001 From: "Federico E. Benelli" Date: Tue, 23 Apr 2024 14:31:09 -0300 Subject: [PATCH 2/3] moved to `pathlib` for the file writers The use of `pathlib` gives a more portable usage of paths to directories and files in comparison to just using plain strings. --- tests/writers/test_to_clapeyron.py | 5 ----- ugropy/groups.py | 1 + ugropy/writers/clapeyron.py | 18 ++++++++++++++---- ugropy/writers/clapeyron_writers/critical.py | 8 +++++--- ugropy/writers/clapeyron_writers/molar_mass.py | 11 ++++++++--- .../writers/clapeyron_writers/psrk_groups.py | 16 +++++++++------- .../writers/clapeyron_writers/unifac_groups.py | 16 +++++++++------- 7 files changed, 46 insertions(+), 29 deletions(-) diff --git a/tests/writers/test_to_clapeyron.py b/tests/writers/test_to_clapeyron.py index 82fc162..0b578a9 100644 --- a/tests/writers/test_to_clapeyron.py +++ b/tests/writers/test_to_clapeyron.py @@ -24,8 +24,6 @@ def test_to_clapeyron(): with open(f"{path_db}/PSRK_groups.csv", mode="r") as f: df_psrk = pd.read_csv(f, sep="|", index_col=None) - os.mkdir(f"{here}/database") - limonene = Groups("CC1=CCC(CC1)C(=C)C", "smiles") ethanol = Groups("CCO", "smiles", normal_boiling_temperature=78 + 273.15) @@ -93,8 +91,6 @@ def test_to_clapeyron_batch_name(): with open(f"{path_db}/PSRK_groups.csv", mode="r") as f: df_psrk = pd.read_csv(f, sep="|", index_col=None) - os.mkdir(f"{here}/database") - limonene = Groups("CC1=CCC(CC1)C(=C)C", "smiles") ethanol = Groups("CCO", "smiles", normal_boiling_temperature=78 + 273.15) @@ -156,7 +152,6 @@ def test_to_clapeyron_batch_name(): def test_molar_mass_csv(): - os.mkdir(f"{here}/database") limonene = Groups("CC1=CCC(CC1)C(=C)C", "smiles") ethanol = Groups("CCO", "smiles", normal_boiling_temperature=78 + 273.15) diff --git a/ugropy/groups.py b/ugropy/groups.py index cfe89cf..3810fd6 100644 --- a/ugropy/groups.py +++ b/ugropy/groups.py @@ -53,6 +53,7 @@ def __init__( identifier_type: str = "name", normal_boiling_temperature: float = None, ) -> None: + self.identifier_type = identifier_type.lower() self.identifier = identifier self.mol_object = instantiate_mol_object(identifier, identifier_type) diff --git a/ugropy/writers/clapeyron.py b/ugropy/writers/clapeyron.py index 1445c21..2fd65c4 100644 --- a/ugropy/writers/clapeyron.py +++ b/ugropy/writers/clapeyron.py @@ -1,5 +1,6 @@ """to_clapeyron module.""" +import pathlib from typing import List from ugropy.properties.joback_properties import JobackProperties @@ -43,10 +44,17 @@ def to_clapeyron( "batch1_ogUNIFAC_groups.csv". With the default value will be "ogUNIFAC_groups.csv", by default "". """ + + # Use pathlib's Path internally + path_pathlib = pathlib.Path(path) + # Check if all list have correct data: if len(molecules_names) == 0: raise ValueError("No names provided for the molecules.") + if not path_pathlib.is_dir(): + path_pathlib.mkdir(parents=True) + if unifac_groups and len(unifac_groups) != len(molecules_names): raise ValueError( "UNIFAC groups list must have the same amount of elements than" @@ -67,7 +75,7 @@ def to_clapeyron( # Molar mass write_molar_mass( - path, + path_pathlib, batch_name, molecules_names, unifac_groups, @@ -77,12 +85,14 @@ def to_clapeyron( # LV-UNIFAC if unifac_groups: - write_unifac(path, batch_name, molecules_names, unifac_groups) + write_unifac(path_pathlib, batch_name, molecules_names, unifac_groups) # PSRK if psrk_groups: - write_psrk(path, batch_name, molecules_names, psrk_groups) + write_psrk(path_pathlib, batch_name, molecules_names, psrk_groups) # Critical if joback_objects: - write_critical(path, batch_name, molecules_names, joback_objects) + write_critical( + path_pathlib, batch_name, molecules_names, joback_objects + ) diff --git a/ugropy/writers/clapeyron_writers/critical.py b/ugropy/writers/clapeyron_writers/critical.py index 93b33f1..2eabc46 100644 --- a/ugropy/writers/clapeyron_writers/critical.py +++ b/ugropy/writers/clapeyron_writers/critical.py @@ -1,5 +1,6 @@ """Joback critical properties writer module.""" +import pathlib from io import StringIO from typing import List @@ -9,7 +10,7 @@ def write_critical( - path: str, + path: pathlib.Path, batch_name: str, molecules_names: List[str], joback_objects: List[JobackProperties] = [], @@ -42,6 +43,7 @@ def write_critical( "Critical Single Parameters,,,,,\n" "species,CAS,Tc,Pc,Vc,acentricfactor\n" ) + path_critical = pathlib.Path(path) # ========================================================================= # Build dataframe # ========================================================================= @@ -62,13 +64,13 @@ def write_critical( if batch_name == "": with open( - f"{path}/critical.csv", "w", newline="", encoding="utf-8" + path_critical / "critical.csv", "w", newline="", encoding="utf-8" ) as file: df.to_csv(file, index=False) else: with open( - f"{path}/{batch_name}_critical.csv", + path_critical / f"{batch_name}_critical.csv", "w", newline="", encoding="utf-8", diff --git a/ugropy/writers/clapeyron_writers/molar_mass.py b/ugropy/writers/clapeyron_writers/molar_mass.py index dc42e5b..44ef8ae 100644 --- a/ugropy/writers/clapeyron_writers/molar_mass.py +++ b/ugropy/writers/clapeyron_writers/molar_mass.py @@ -1,5 +1,6 @@ """Molar mass writer module.""" +import pathlib from io import StringIO from typing import List @@ -12,7 +13,7 @@ def write_molar_mass( - path: str, + path: pathlib.Path, batch_name: str, molecules_names: List[str], unifac_groups: List[dict] = [], @@ -49,6 +50,7 @@ def write_molar_mass( "Molar Mases Single Params,,\n" "species,CAS,Mw\n" ) + path_molar_mass = pathlib.Path(path) # ========================================================================= # Get molecular weights # ========================================================================= @@ -90,13 +92,16 @@ def write_molar_mass( if batch_name == "": with open( - f"{path}/molarmass.csv", "w", newline="", encoding="utf-8" + path_molar_mass / "molarmass.csv", + "w", + newline="", + encoding="utf-8", ) as file: df.to_csv(file, index=False) else: with open( - f"{path}/{batch_name}_molarmass.csv", + path_molar_mass / f"{batch_name}_molarmass.csv", "w", newline="", encoding="utf-8", diff --git a/ugropy/writers/clapeyron_writers/psrk_groups.py b/ugropy/writers/clapeyron_writers/psrk_groups.py index a04ebcb..bbcfc44 100644 --- a/ugropy/writers/clapeyron_writers/psrk_groups.py +++ b/ugropy/writers/clapeyron_writers/psrk_groups.py @@ -1,11 +1,11 @@ """PSRK groups writer module.""" -import os +import pathlib from typing import List def write_psrk( - path: str, + path: pathlib.Path, batch_name: str, molecules_names: List[str], psrk_groups: List[dict], @@ -37,6 +37,8 @@ def write_psrk( "species,groups\n" ] + path_psrk = path / "PSRK" + for name, groups in zip(molecules_names, psrk_groups): groups_str = '"[' @@ -51,14 +53,14 @@ def write_psrk( lines.extend(new_line) # Create folder for PSRK groups - if not os.path.exists(f"{path}/PSRK"): - os.makedirs(f"{path}/PSRK") + if not path_psrk.is_dir(): + path_psrk.mkdir(parents=True) # Write .csv if batch_name == "": - write_path = f"{path}/PSRK/PSRK_groups.csv" + write_path = path_psrk / "PSRK_groups.csv" else: - write_path = f"{path}/PSRK/{batch_name}_PSRK_groups.csv" + write_path = path_psrk / f"{batch_name}_PSRK_groups.csv" - with open(f"{write_path}", "w", encoding="utf-8", newline="\n") as file: + with open(write_path, "w", encoding="utf-8", newline="\n") as file: file.writelines(lines) diff --git a/ugropy/writers/clapeyron_writers/unifac_groups.py b/ugropy/writers/clapeyron_writers/unifac_groups.py index 241fe6e..7706fc9 100644 --- a/ugropy/writers/clapeyron_writers/unifac_groups.py +++ b/ugropy/writers/clapeyron_writers/unifac_groups.py @@ -1,11 +1,11 @@ """UNIFAC groups writer module.""" -import os +import pathlib from typing import List def write_unifac( - path: str, + path: pathlib.Path, batch_name: str, molecules_names: List[str], unifac_groups: List[dict], @@ -32,6 +32,8 @@ def write_unifac( "species,groups\n" ] + path_ogunifac = path / "ogUNIFAC" + for name, groups in zip(molecules_names, unifac_groups): groups_str = '"[' @@ -46,14 +48,14 @@ def write_unifac( lines.extend(new_line) # Create folder for ogUNIFAC groups - if not os.path.exists(f"{path}/ogUNIFAC"): - os.makedirs(f"{path}/ogUNIFAC") + if not path_ogunifac.is_dir(): + path_ogunifac.mkdir(parents=True) # Write .csv if batch_name == "": - write_path = f"{path}/ogUNIFAC/ogUNIFAC_groups.csv" + write_path = path_ogunifac / "ogUNIFAC_groups.csv" else: - write_path = f"{path}/ogUNIFAC/{batch_name}_ogUNIFAC_groups.csv" + write_path = path_ogunifac / f"{batch_name}_ogUNIFAC_groups.csv" - with open(f"{write_path}", "w", encoding="utf-8", newline="\n") as file: + with open(write_path, "w", encoding="utf-8", newline="\n") as file: file.writelines(lines) From 58381aefea3f25f886f328227983853bec3ca812 Mon Sep 17 00:00:00 2001 From: "Federico E. Benelli" Date: Tue, 23 Apr 2024 14:53:31 -0300 Subject: [PATCH 3/3] docs --- ugropy/writers/clapeyron.py | 1 - ugropy/writers/clapeyron_writers/critical.py | 2 +- ugropy/writers/clapeyron_writers/molar_mass.py | 2 +- ugropy/writers/clapeyron_writers/psrk_groups.py | 2 +- ugropy/writers/clapeyron_writers/unifac_groups.py | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ugropy/writers/clapeyron.py b/ugropy/writers/clapeyron.py index 2fd65c4..f01f71a 100644 --- a/ugropy/writers/clapeyron.py +++ b/ugropy/writers/clapeyron.py @@ -44,7 +44,6 @@ def to_clapeyron( "batch1_ogUNIFAC_groups.csv". With the default value will be "ogUNIFAC_groups.csv", by default "". """ - # Use pathlib's Path internally path_pathlib = pathlib.Path(path) diff --git a/ugropy/writers/clapeyron_writers/critical.py b/ugropy/writers/clapeyron_writers/critical.py index 2eabc46..3d7ffd1 100644 --- a/ugropy/writers/clapeyron_writers/critical.py +++ b/ugropy/writers/clapeyron_writers/critical.py @@ -21,7 +21,7 @@ def write_critical( Parameters ---------- - path : str, optional + path : pathlib.Path, optional Path to the directory to store de .csv files, by default "./database". batch_name : str, optional Name of the writing batch. For example, if you name the batch with diff --git a/ugropy/writers/clapeyron_writers/molar_mass.py b/ugropy/writers/clapeyron_writers/molar_mass.py index 44ef8ae..2792206 100644 --- a/ugropy/writers/clapeyron_writers/molar_mass.py +++ b/ugropy/writers/clapeyron_writers/molar_mass.py @@ -24,7 +24,7 @@ def write_molar_mass( Parameters ---------- - path : str, optional + path : pathlib.Path Path to the directory to store de .csv files, by default "./database". batch_name : str, optional Name of the writing batch. For example, if you name the batch with diff --git a/ugropy/writers/clapeyron_writers/psrk_groups.py b/ugropy/writers/clapeyron_writers/psrk_groups.py index bbcfc44..c7e087a 100644 --- a/ugropy/writers/clapeyron_writers/psrk_groups.py +++ b/ugropy/writers/clapeyron_writers/psrk_groups.py @@ -14,7 +14,7 @@ def write_psrk( Parameters ---------- - path : str, optional + path : pathlib.Path Path to the directory to store de .csv files, by default "./database". batch_name : str, optional Name of the writing batch. For example, if you name the batch with diff --git a/ugropy/writers/clapeyron_writers/unifac_groups.py b/ugropy/writers/clapeyron_writers/unifac_groups.py index 7706fc9..16e8812 100644 --- a/ugropy/writers/clapeyron_writers/unifac_groups.py +++ b/ugropy/writers/clapeyron_writers/unifac_groups.py @@ -14,7 +14,7 @@ def write_unifac( Parameters ---------- - path : str, optional + path : pathlib.Path Path to the directory to store de .csv files, by default "./database". batch_name : str, optional Name of the writing batch. For example, if you name the batch with