Skip to content

Commit

Permalink
docs(io): update docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
kmnhan committed Sep 9, 2024
1 parent 79597bf commit ac37328
Showing 1 changed file with 88 additions and 28 deletions.
116 changes: 88 additions & 28 deletions src/erlab/io/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import itertools
import os
import warnings
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any, ClassVar, Self, cast

import numpy as np
Expand All @@ -34,7 +35,6 @@
from collections.abc import (
Callable,
ItemsView,
Iterable,
Iterator,
KeysView,
Mapping,
Expand Down Expand Up @@ -160,12 +160,16 @@ class LoaderBase(metaclass=_Loader):
"""

skip_validate: bool = False
"""If `True`, validation checks will be skipped."""
"""
If `True`, validation checks will be skipped. If `False`, data will be checked with
:meth:`validate <erlab.io.dataloader.LoaderBase.validate>` every time it is loaded.
"""

strict_validation: bool = False
"""
If `True`, validation check will raise a `ValidationError` on the first failure
instead of warning. Useful for debugging data loaders.
If `True`, validation checks will raise a `ValidationError` on the first failure
instead of warning. Useful for debugging data loaders. This has no effect if
`skip_validate` is `True`.
"""

@property
Expand Down Expand Up @@ -237,11 +241,11 @@ def __init_subclass__(cls, **kwargs) -> None:
LoaderRegistry.instance().register(cls)

@classmethod
def formatter(cls, val: object) -> str:
def value_to_string(cls, val: object) -> str:
"""Format the given value based on its type.
The default behavior formats the given value with :func:`format_value
<erlab.utils.formatting.format_value>`. Override this method to change the
<erlab.utils.formatting.format_value>`. Override this classmethod to change the
printed format of each cell.
"""
Expand All @@ -251,8 +255,10 @@ def formatter(cls, val: object) -> str:
def get_styler(cls, df: pandas.DataFrame) -> pandas.io.formats.style.Styler:
"""Return a styled version of the given dataframe.
This method, along with `formatter`, determines the display formatting of the
summary dataframe. Override this method to change the display style.
This method, along with :meth:`value_to_string
<erlab.io.dataloader.LoaderBase.value_to_string>`, determines the display
formatting of the summary dataframe. Override this classmethod to change the
display style.
Parameters
----------
Expand All @@ -265,7 +271,7 @@ def get_styler(cls, df: pandas.DataFrame) -> pandas.io.formats.style.Styler:
The styler to be displayed.
"""
style = df.style.format(cls.formatter)
style = df.style.format(cls.value_to_string)

hidden = [c for c in ("Time", "Path") if c in df.columns]
if len(hidden) > 0:
Expand All @@ -275,22 +281,30 @@ def get_styler(cls, df: pandas.DataFrame) -> pandas.io.formats.style.Styler:

def load(
self,
identifier: str | int,
data_dir: str | None = None,
identifier: str | os.PathLike | int,
data_dir: str | os.PathLike | None = None,
**kwargs,
) -> xr.DataArray | xr.Dataset | list[xr.DataArray]:
"""Load ARPES data.
Parameters
----------
identifier
Value that identifies a scan uniquely. If a string or path-like object is
given, it is assumed to be the path to the data file. If an integer is
given, it is assumed to be a number that specifies the scan number, and is
used to automatically determine the path to the data file(s).
Value that identifies a scan uniquely.
- If a string or path-like object is given, it is assumed to be the path to
the data file relative to `data_dir`. If `data_dir` is not specified, it
is assumed to be the full path to the data file.
- If an integer is given, it is assumed to be a number that specifies the
scan number, and is used to automatically determine the path to the data
file(s). In this case, the `data_dir` argument must be specified.
data_dir
Where to look for the data. If `None`, the default data directory will be
used.
Where to look for the data. Must be a path to a valid directory. This
argument is required when `identifier` is an integer.
When called as :func:`erlab.io.load`, this argument defaults to the value
set by :func:`erlab.io.set_data_dir` or :func:`erlab.io.loader_context`.
single
For some setups, data for a single scan is saved over multiple files. This
argument is only used for such setups. When `identifier` is resolved to a
Expand All @@ -311,6 +325,44 @@ def load(
xarray.DataArray or xarray.Dataset or list of xarray.DataArray
The loaded data.
Notes
-----
- The `data_dir` set by :func:`erlab.io.set_data_dir` or
:func:`erlab.io.loader_context` are only used when called as
:func:`erlab.io.load`. When called directly on a loader instance, the
`data_dir` argument must be specified.
- The `data_dir` set by :func:`erlab.io.set_data_dir` or
:func:`erlab.io.loader_context` is silently ignored when *all* of the
following are satisfied:
- `identifier` is an absolute path to an existing file.
- `data_dir` is not provided.
- The path created by joining `data_dir` and `identifier` does not point to an
existing file.
For instance, consider the following directory structure.
.. code-block:: none
cwd/
├── data/
└── example.txt
The following code will load ``./example.txt`` instead of raising an error
that ``./data/example.txt`` is missing:
.. code-block:: python
import erlab.io
erlab.io.set_data_dir("data")
erlab.io.load("example.txt")
However, if ``./data/example.txt`` also exists, the same code will load that
one instead. This behavior may lead to unexpected results when the directory
structure is not organized. Keep this in mind and try to keep all data files
in the same level.
"""
single = kwargs.pop("single", False)
parallel = kwargs.pop("parallel", None)
Expand All @@ -323,7 +375,10 @@ def load(
raise ValueError(
"data_dir must be specified when identifier is an integer"
)
file_paths, coord_dict = self.identify(identifier, data_dir, **kwargs)
file_paths, coord_dict = cast(
tuple[list[str], dict[str, Iterable]],
self.identify(identifier, data_dir, **kwargs),
) # Return type enforced by metaclass, cast to avoid mypy error

if len(file_paths) == 0:
raise ValueError(
Expand Down Expand Up @@ -540,7 +595,7 @@ def _format_data_info(series: pandas.Series) -> str:
continue
table += "<tr>"
table += f"<td style='text-align:left;'><b>{k}</b></td>"
table += f"<td style='text-align:left;'>{self.formatter(v)}</td>"
table += f"<td style='text-align:left;'>{self.value_to_string(v)}</td>"
table += "</tr>"

table += "</tbody></table>"
Expand Down Expand Up @@ -1197,8 +1252,8 @@ def loader_context(
def set_data_dir(self, data_dir: str | os.PathLike | None) -> None:
"""Set the default data directory for the data loader.
All subsequent calls to `load` will use the `data_dir` set here unless
specified.
All subsequent calls to :func:`erlab.io.load` will use the `data_dir` set here
unless specified.
Parameters
----------
Expand All @@ -1207,8 +1262,8 @@ def set_data_dir(self, data_dir: str | os.PathLike | None) -> None:
Note
----
This will only affect `load`. If the loader's ``load`` method is called
directly, it will not use the default data directory.
This will only affect :func:`erlab.io.load`. If the loader's ``load`` method is
called directly, it will not use the default data directory.
"""
if data_dir is not None and not os.path.isdir(data_dir):
Expand All @@ -1217,19 +1272,24 @@ def set_data_dir(self, data_dir: str | os.PathLike | None) -> None:

def load(
self,
identifier: str | os.PathLike | int | None,
identifier: str | os.PathLike | int,
data_dir: str | os.PathLike | None = None,
**kwargs,
) -> xr.DataArray | xr.Dataset | list[xr.DataArray]:
loader, default_dir = self._get_current_defaults()

if (
default_dir is not None
and not isinstance(identifier, int)
and os.path.isfile(identifier)
and not os.path.isfile(os.path.join(default_dir, identifier))
):
# If the identifier is a path to a file, ignore default_dir
default_dir = None

Check warning on line 1288 in src/erlab/io/dataloader.py

View check run for this annotation

Codecov / codecov/patch

src/erlab/io/dataloader.py#L1288

Added line #L1288 was not covered by tests

if data_dir is None:
data_dir = default_dir

if not isinstance(identifier, int) and os.path.isfile(identifier):
# If the identifier is a path to a file, ignore data_dir
data_dir = None

return loader.load(identifier, data_dir=data_dir, **kwargs)

def summarize(
Expand Down

0 comments on commit ac37328

Please sign in to comment.