Skip to content

Commit

Permalink
Merge pull request #23 from fedebenelli/main
Browse files Browse the repository at this point in the history
Migration to `pathlib` and cache groups for better end-user experience
  • Loading branch information
SalvadorBrandolin authored Apr 23, 2024
2 parents 4c26e12 + 58381ae commit 0686e89
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 33 deletions.
5 changes: 0 additions & 5 deletions tests/writers/test_to_clapeyron.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ def test_to_clapeyron():
with open(f"{path_db}/PSRK_groups.csv", mode="r") as f:
df_psrk = pd.read_csv(f, sep="|", index_col=None)

os.mkdir(f"{here}/database")

limonene = Groups("CC1=CCC(CC1)C(=C)C", "smiles")
ethanol = Groups("CCO", "smiles", normal_boiling_temperature=78 + 273.15)

Expand Down Expand Up @@ -93,8 +91,6 @@ def test_to_clapeyron_batch_name():
with open(f"{path_db}/PSRK_groups.csv", mode="r") as f:
df_psrk = pd.read_csv(f, sep="|", index_col=None)

os.mkdir(f"{here}/database")

limonene = Groups("CC1=CCC(CC1)C(=C)C", "smiles")
ethanol = Groups("CCO", "smiles", normal_boiling_temperature=78 + 273.15)

Expand Down Expand Up @@ -156,7 +152,6 @@ def test_to_clapeyron_batch_name():


def test_molar_mass_csv():
os.mkdir(f"{here}/database")

limonene = Groups("CC1=CCC(CC1)C(=C)C", "smiles")
ethanol = Groups("CCO", "smiles", normal_boiling_temperature=78 + 273.15)
Expand Down
2 changes: 2 additions & 0 deletions ugropy/core/get_rdkit_object.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""get_rdkit_object module."""

from functools import cache
from typing import Union

import pubchempy as pcp

from rdkit import Chem


@cache
def instantiate_mol_object(
identifier: Union[str, Chem.rdchem.Mol], identifier_type: str = "name"
) -> Chem.rdchem.Mol:
Expand Down
1 change: 1 addition & 0 deletions ugropy/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(
identifier_type: str = "name",
normal_boiling_temperature: float = None,
) -> None:

self.identifier_type = identifier_type.lower()
self.identifier = identifier
self.mol_object = instantiate_mol_object(identifier, identifier_type)
Expand Down
17 changes: 13 additions & 4 deletions ugropy/writers/clapeyron.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""to_clapeyron module."""

import pathlib
from typing import List

from ugropy.properties.joback_properties import JobackProperties
Expand Down Expand Up @@ -43,10 +44,16 @@ def to_clapeyron(
"batch1_ogUNIFAC_groups.csv". With the default value will be
"ogUNIFAC_groups.csv", by default "".
"""
# Use pathlib's Path internally
path_pathlib = pathlib.Path(path)

# Check if all list have correct data:
if len(molecules_names) == 0:
raise ValueError("No names provided for the molecules.")

if not path_pathlib.is_dir():
path_pathlib.mkdir(parents=True)

if unifac_groups and len(unifac_groups) != len(molecules_names):
raise ValueError(
"UNIFAC groups list must have the same amount of elements than"
Expand All @@ -67,7 +74,7 @@ def to_clapeyron(

# Molar mass
write_molar_mass(
path,
path_pathlib,
batch_name,
molecules_names,
unifac_groups,
Expand All @@ -77,12 +84,14 @@ def to_clapeyron(

# LV-UNIFAC
if unifac_groups:
write_unifac(path, batch_name, molecules_names, unifac_groups)
write_unifac(path_pathlib, batch_name, molecules_names, unifac_groups)

# PSRK
if psrk_groups:
write_psrk(path, batch_name, molecules_names, psrk_groups)
write_psrk(path_pathlib, batch_name, molecules_names, psrk_groups)

# Critical
if joback_objects:
write_critical(path, batch_name, molecules_names, joback_objects)
write_critical(
path_pathlib, batch_name, molecules_names, joback_objects
)
10 changes: 6 additions & 4 deletions ugropy/writers/clapeyron_writers/critical.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Joback critical properties writer module."""

import pathlib
from io import StringIO
from typing import List

Expand All @@ -9,7 +10,7 @@


def write_critical(
path: str,
path: pathlib.Path,
batch_name: str,
molecules_names: List[str],
joback_objects: List[JobackProperties] = [],
Expand All @@ -20,7 +21,7 @@ def write_critical(
Parameters
----------
path : str, optional
path : pathlib.Path, optional
Path to the directory to store de .csv files, by default "./database".
batch_name : str, optional
Name of the writing batch. For example, if you name the batch with
Expand All @@ -42,6 +43,7 @@ def write_critical(
"Critical Single Parameters,,,,,\n"
"species,CAS,Tc,Pc,Vc,acentricfactor\n"
)
path_critical = pathlib.Path(path)
# =========================================================================
# Build dataframe
# =========================================================================
Expand All @@ -62,13 +64,13 @@ def write_critical(

if batch_name == "":
with open(
f"{path}/critical.csv", "w", newline="", encoding="utf-8"
path_critical / "critical.csv", "w", newline="", encoding="utf-8"
) as file:
df.to_csv(file, index=False)

else:
with open(
f"{path}/{batch_name}_critical.csv",
path_critical / f"{batch_name}_critical.csv",
"w",
newline="",
encoding="utf-8",
Expand Down
13 changes: 9 additions & 4 deletions ugropy/writers/clapeyron_writers/molar_mass.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Molar mass writer module."""

import pathlib
from io import StringIO
from typing import List

Expand All @@ -12,7 +13,7 @@


def write_molar_mass(
path: str,
path: pathlib.Path,
batch_name: str,
molecules_names: List[str],
unifac_groups: List[dict] = [],
Expand All @@ -23,7 +24,7 @@ def write_molar_mass(
Parameters
----------
path : str, optional
path : pathlib.Path
Path to the directory to store de .csv files, by default "./database".
batch_name : str, optional
Name of the writing batch. For example, if you name the batch with
Expand All @@ -49,6 +50,7 @@ def write_molar_mass(
"Molar Mases Single Params,,\n"
"species,CAS,Mw\n"
)
path_molar_mass = pathlib.Path(path)
# =========================================================================
# Get molecular weights
# =========================================================================
Expand Down Expand Up @@ -90,13 +92,16 @@ def write_molar_mass(

if batch_name == "":
with open(
f"{path}/molarmass.csv", "w", newline="", encoding="utf-8"
path_molar_mass / "molarmass.csv",
"w",
newline="",
encoding="utf-8",
) as file:
df.to_csv(file, index=False)

else:
with open(
f"{path}/{batch_name}_molarmass.csv",
path_molar_mass / f"{batch_name}_molarmass.csv",
"w",
newline="",
encoding="utf-8",
Expand Down
18 changes: 10 additions & 8 deletions ugropy/writers/clapeyron_writers/psrk_groups.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""PSRK groups writer module."""

import os
import pathlib
from typing import List


def write_psrk(
path: str,
path: pathlib.Path,
batch_name: str,
molecules_names: List[str],
psrk_groups: List[dict],
Expand All @@ -14,7 +14,7 @@ def write_psrk(
Parameters
----------
path : str, optional
path : pathlib.Path
Path to the directory to store de .csv files, by default "./database".
batch_name : str, optional
Name of the writing batch. For example, if you name the batch with
Expand All @@ -37,6 +37,8 @@ def write_psrk(
"species,groups\n"
]

path_psrk = path / "PSRK"

for name, groups in zip(molecules_names, psrk_groups):
groups_str = '"['

Expand All @@ -51,14 +53,14 @@ def write_psrk(
lines.extend(new_line)

# Create folder for PSRK groups
if not os.path.exists(f"{path}/PSRK"):
os.makedirs(f"{path}/PSRK")
if not path_psrk.is_dir():
path_psrk.mkdir(parents=True)

# Write .csv
if batch_name == "":
write_path = f"{path}/PSRK/PSRK_groups.csv"
write_path = path_psrk / "PSRK_groups.csv"
else:
write_path = f"{path}/PSRK/{batch_name}_PSRK_groups.csv"
write_path = path_psrk / f"{batch_name}_PSRK_groups.csv"

with open(f"{write_path}", "w", encoding="utf-8", newline="\n") as file:
with open(write_path, "w", encoding="utf-8", newline="\n") as file:
file.writelines(lines)
18 changes: 10 additions & 8 deletions ugropy/writers/clapeyron_writers/unifac_groups.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""UNIFAC groups writer module."""

import os
import pathlib
from typing import List


def write_unifac(
path: str,
path: pathlib.Path,
batch_name: str,
molecules_names: List[str],
unifac_groups: List[dict],
Expand All @@ -14,7 +14,7 @@ def write_unifac(
Parameters
----------
path : str, optional
path : pathlib.Path
Path to the directory to store de .csv files, by default "./database".
batch_name : str, optional
Name of the writing batch. For example, if you name the batch with
Expand All @@ -32,6 +32,8 @@ def write_unifac(
"species,groups\n"
]

path_ogunifac = path / "ogUNIFAC"

for name, groups in zip(molecules_names, unifac_groups):
groups_str = '"['

Expand All @@ -46,14 +48,14 @@ def write_unifac(
lines.extend(new_line)

# Create folder for ogUNIFAC groups
if not os.path.exists(f"{path}/ogUNIFAC"):
os.makedirs(f"{path}/ogUNIFAC")
if not path_ogunifac.is_dir():
path_ogunifac.mkdir(parents=True)

# Write .csv
if batch_name == "":
write_path = f"{path}/ogUNIFAC/ogUNIFAC_groups.csv"
write_path = path_ogunifac / "ogUNIFAC_groups.csv"
else:
write_path = f"{path}/ogUNIFAC/{batch_name}_ogUNIFAC_groups.csv"
write_path = path_ogunifac / f"{batch_name}_ogUNIFAC_groups.csv"

with open(f"{write_path}", "w", encoding="utf-8", newline="\n") as file:
with open(write_path, "w", encoding="utf-8", newline="\n") as file:
file.writelines(lines)

0 comments on commit 0686e89

Please sign in to comment.