Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hdxms-datasets #327

Merged
merged 21 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions pyhdx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
Coverage,
HDXMeasurementSet,
)
from pyhdx.fileIO import read_dynamx
from pyhdx.datasets import read_dynamx
from pyhdx.fitting_torch import TorchFitResult, TorchFitResultSet
from pyhdx.batch_processing import StateParser
from pyhdx.__version__ import __version__

VERSION_STRING = f"PyHDX {__version__}"
Expand Down
8 changes: 8 additions & 0 deletions pyhdx/batch_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from io import StringIO
from pathlib import Path
from typing import Union, Literal, Optional
import warnings

import pandas as pd

Expand All @@ -27,6 +28,9 @@ class DataFile(object):

filepath_or_buffer: Union[Path, StringIO]

def __post_init__(self):
warnings.warn("Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning)

@cached_property
def data(self) -> pd.DataFrame:
if self.format == "DynamX":
Expand Down Expand Up @@ -58,6 +62,8 @@ def __init__(
# filter_kwargs: Optional[dict[str, Any]] = None,
# correction_kwargs: Optional[dict[str, Any]] = None,
) -> None:

warnings.warn("Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning)
self.hdx_spec = hdx_spec
self.data_files: dict[str, DataFile] = {}

Expand Down Expand Up @@ -177,6 +183,7 @@ def batch_filter_peptides(
Returns:
Filtered dataframe.
"""
warnings.warn("Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning)

if state is not None:
df = df[df["state"] == state]
Expand Down Expand Up @@ -212,6 +219,7 @@ def batch_convert_time(
Converted time value(s).
"""

warnings.warn("Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning)
src_unit = time_dict["unit"]

time_factor = time_factors[src_unit] / time_factors[target_unit]
Expand Down
23 changes: 22 additions & 1 deletion pyhdx/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

from contextlib import contextmanager
from os import PathLike
from pathlib import Path
from typing import Union, Dict, Any, Optional
from typing import Union, Dict, Any, Optional, Generator

import torch
from omegaconf import OmegaConf, DictConfig, DictKeyType
Expand Down Expand Up @@ -107,6 +108,14 @@ def log_dir(self) -> Path:

return log_dir

@property
def database_dir(self) -> Path:
"""HDXMS-datasets database directory"""
spec_path = self.conf.server.database_dir
database_dir = Path(spec_path.replace("~", str(Path().home())))

return database_dir

@property
def TORCH_DTYPE(self) -> Union[torch.float64, torch.float32]:
"""PyTorch dtype used for ΔG calculations"""
Expand All @@ -124,6 +133,18 @@ def TORCH_DEVICE(self) -> torch.device:
device = self.conf.fitting.device
return torch.device(device)

@contextmanager
def context(self, settings: dict) -> Generator[PyHDXConfig, None, None]:
from pyhdx.support import rsetattr

original_config = self.conf.copy()

try:
for attr, value in settings.items():
rsetattr(cfg, attr, value)
yield cfg
finally:
cfg.conf = original_config

def valid_config() -> bool:
"""Checks if the current config file in the user home directory is a valid config
Expand Down
1 change: 1 addition & 0 deletions pyhdx/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ cluster:
server:
assets_dir: ~/.pyhdx/assets
log_dir: ~/.pyhdx/logs
database_dir : ~/.hdxms_datasets/datasets

fitting:
dtype: float64
Expand Down
1 change: 1 addition & 0 deletions pyhdx/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from hdxms_datasets import *
13 changes: 8 additions & 5 deletions pyhdx/fileIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
import re
import shutil
from datetime import datetime
from io import StringIO
from io import StringIO, BytesIO
from pathlib import Path
from typing import Union, Literal, Tuple, List, TextIO, Optional, TYPE_CHECKING, Any
from typing import Union, Literal, Tuple, List, TextIO, Optional, TYPE_CHECKING, Any, BinaryIO
from importlib import import_module
import torch.nn as nn
import torch as t
import pandas as pd
import yaml
import warnings

import pyhdx

Expand Down Expand Up @@ -43,6 +44,8 @@ def read_dynamx(
Peptide table as a pandas DataFrame.
"""

warnings.warn("Will be removed in favour of the `hdxms-datasets` package ", DeprecationWarning)

if isinstance(filepath_or_buffer, StringIO):
hdr = filepath_or_buffer.readline().strip("# \n\t")
filepath_or_buffer.seek(0)
Expand All @@ -64,7 +67,7 @@ def read_dynamx(
return df


def read_header(file_obj: TextIO, comment: str = "#") -> List[str]:
def read_header(file_obj: Union[TextIO, BinaryIO], comment: str = "#") -> List[str]:
header = []

while True:
Expand All @@ -77,7 +80,7 @@ def read_header(file_obj: TextIO, comment: str = "#") -> List[str]:
return header


def parse_header(filepath_or_buffer: Union[Path[str], str, StringIO], comment: str = "#") -> dict:
def parse_header(filepath_or_buffer: Union[Path[str], str, StringIO, BytesIO], comment: str = "#") -> dict:
"""
Reads the header from a file and returns JSON metadata from header lines marked as comment.

Expand All @@ -89,7 +92,7 @@ def parse_header(filepath_or_buffer: Union[Path[str], str, StringIO], comment: s
Dictionary of read metadata.
"""

if isinstance(filepath_or_buffer, StringIO):
if isinstance(filepath_or_buffer, (StringIO, BytesIO)):
header = read_header(filepath_or_buffer, comment=comment)
filepath_or_buffer.seek(0)
else:
Expand Down
2 changes: 1 addition & 1 deletion pyhdx/fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ def fit_gibbs_global(
optimizer="SGD",
callbacks=None,
**optimizer_kwargs,
):
) -> TorchFitResult:
"""
Fit Gibbs free energies globally to all D-uptake data in the supplied hdxm

Expand Down
57 changes: 52 additions & 5 deletions pyhdx/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import warnings
from functools import partial
from numbers import Number
from typing import Optional, Any, Union
from typing import Optional, Any, Union, TYPE_CHECKING

import numpy as np
import numpy.typing as npt
Expand All @@ -18,10 +18,13 @@

from pyhdx.alignment import align_dataframes
from pyhdx.fileIO import dataframe_to_file
from pyhdx.process import verify_sequence, parse_temperature
from pyhdx.process import verify_sequence, parse_temperature, correct_d_uptake, apply_control
from pyhdx.support import reduce_inter, dataframe_intersection, array_intersection
from pyhdx.config import cfg

if TYPE_CHECKING:
from hdxms_datasets import HDXDataSet


class Coverage:
"""
Expand Down Expand Up @@ -283,6 +286,46 @@ def __init__(self, data: pd.DataFrame, **metadata: Any):
.sort_index(axis=1, level=0, sort_remaining=False)
)

@classmethod
def from_dataset(cls, dataset: HDXDataSet, state: str | int, **metadata) -> HDXMeasurement:
"""Create an HDXMeasurement object from a HDXDataSet object.

Args:
dataset: HDXDataSet object
state: State label or index for measurement in the dataset


Returns:
HDXMeasurement object.

"""

state = dataset.states[state] if isinstance(state, int) else state
peptide_spec = dataset.hdx_spec["states"][state]["peptides"]

peptides = dataset.load_peptides(state, "experiment")
fd_peptides = (
dataset.load_peptides(state, "FD_control") if "FD_control" in peptide_spec else None
)
nd_peptides = (
dataset.load_peptides(state, "ND_control") if "ND_control" in peptide_spec else None
)

# take globally defined metadata and update with state specific metadata
spec_metadata = dataset.hdx_spec.get("metadata", {})
spec_metadata.update(dataset.hdx_spec["states"][state]["metadata"])

metadata = {**spec_metadata, **metadata}

peptides = apply_control(peptides, fd_peptides, nd_peptides)
peptides = correct_d_uptake(
peptides,
drop_first=cfg.analysis.drop_first,
d_percentage=metadata.get("d_percentage", 100.0),
)

return HDXMeasurement(peptides, name=state, **metadata)

def __str__(self) -> str:
"""String representation of this HDX measurement object.

Expand Down Expand Up @@ -329,9 +372,7 @@ def temperature(self) -> Optional[float]:
return temperature
elif isinstance(temperature, dict):
return parse_temperature(**temperature)

return self.metadata.get("temperature", None)


@property
def pH(self) -> Optional[float]:
"""pH of the H/D exchange reaction."""
Expand Down Expand Up @@ -765,6 +806,12 @@ def __iter__(self):
def __getitem__(self, item: int) -> HDXMeasurement:
return self.hdxm_list.__getitem__(item)

@classmethod
def from_dataset(self, dataset: HDXDataSet, **metadata) -> HDXMeasurementSet:
hdxm_list = [HDXMeasurement.from_dataset(dataset, state, **metadata) for state in dataset.states]

return HDXMeasurementSet(hdxm_list)

def get(self, name: str) -> HDXMeasurement:
"""
Get HDXMeasurement object by name.
Expand Down
6 changes: 6 additions & 0 deletions pyhdx/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,13 +615,19 @@ def linear_bars(
sort=False,
**figure_kwargs,
):

# input data should always be 3 levels
# grouping is done by the first level
# second level gives each bar
# third level should have columns with the specified 'field'
if data.columns.nlevels == 2:
data = data.copy()
columns = pd.MultiIndex.from_tuples(
[("", *tup) for tup in data.columns], names=["group"] + data.columns.names
)
data.columns = columns

# todo this should be done by the 'user'
data = data.xs(level=-1, key=field, drop_level=False, axis=1)

groupby = groupby or data.columns.names[0]
Expand Down
2 changes: 2 additions & 0 deletions pyhdx/process.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from functools import reduce
import warnings
from typing import Optional, Literal, Union

import pandas as pd
Expand Down Expand Up @@ -254,6 +255,7 @@ def filter_peptides(

"""

warnings.warn("`filter_peptides` will be moved to the `hdxms-datasets` package", DeprecationWarning)
if state:
df = df[df["state"] == state]

Expand Down
12 changes: 12 additions & 0 deletions pyhdx/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,3 +847,15 @@ def array_intersection(arrays: Iterable[np.ndarray], fields: Iterable[str]) -> l
selected = [elem[np.isin(fields_view(elem, fields), intersection)] for elem in arrays]

return selected

# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties
def rsetattr(obj, attr, val):
pre, _, post = attr.rpartition(".")
return setattr(rgetattr(obj, pre) if pre else obj, post, val)


def rgetattr(obj, attr, *args):
def _getattr(obj, attr):
return getattr(obj, attr, *args)

return reduce(_getattr, [obj] + attr.split("."))
Loading
Loading