Skip to content

Commit

Permalink
Refactor the data_kind and the virtualfile_to_data functions
Browse files Browse the repository at this point in the history
  • Loading branch information
seisman committed Oct 14, 2023
1 parent b7b11c5 commit 70fc9e4
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 105 deletions.
35 changes: 16 additions & 19 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
fmt_docstring,
tempfile_from_geojson,
tempfile_from_image,
validate_data_input,
)

FAMILIES = [
Expand Down Expand Up @@ -1474,11 +1475,8 @@ def virtualfile_from_data(
self,
check_kind=None,
data=None,
x=None,
y=None,
z=None,
extra_arrays=None,
required_z=False,
vectors=None,
ncols=1,
required_data=True,
):
"""
Expand All @@ -1497,13 +1495,11 @@ def virtualfile_from_data(
Any raster or vector data format. This could be a file name or
path, a raster grid, a vector matrix/arrays, or other supported
data input.
x/y/z : 1-D arrays or None
x, y, and z columns as numpy arrays.
extra_arrays : list of 1-D arrays
Optional. A list of numpy arrays in addition to x, y, and z.
All of these arrays must be of the same size as the x/y/z arrays.
required_z : bool
State whether the 'z' column is required.
vectors : list of 1-D arrays or None
A list of 1-D arrays. Each array will be a column in the table.
All of these arrays must be of the same size.
ncols : int
The minimum number of columns required for the data.
required_data : bool
Set to True when 'data' is required, or False when dealing with
optional virtual files. [Default is True].
Expand Down Expand Up @@ -1537,8 +1533,13 @@ def virtualfile_from_data(
...
<vector memory>: N = 3 <7/9> <4/6> <1/3>
"""
kind = data_kind(
data, x, y, z, required_z=required_z, required_data=required_data
kind = data_kind(data, required=required_data)
validate_data_input(
data=data,
vectors=vectors,
ncols=ncols,
required_data=required_data,
kind=kind,
)

if check_kind:
Expand Down Expand Up @@ -1579,11 +1580,7 @@ def virtualfile_from_data(
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
_data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),)
elif kind == "vectors":
_data = [np.atleast_1d(x), np.atleast_1d(y)]
if z is not None:
_data.append(np.atleast_1d(z))
if extra_arrays:
_data.extend(extra_arrays)
_data = [np.atleast_1d(v) for v in vectors]
elif kind == "matrix": # turn 2-D arrays into list of vectors
try:
# pandas.Series will be handled below like a 1-D numpy.ndarray
Expand Down
1 change: 1 addition & 0 deletions pygmt/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@
is_nonstr_iter,
launch_external_viewer,
non_ascii_to_octal,
validate_data_input,
)
166 changes: 81 additions & 85 deletions pygmt/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,127 +15,133 @@
from pygmt.exceptions import GMTInvalidInput


def _validate_data_input(
data=None, x=None, y=None, z=None, required_z=False, required_data=True, kind=None
def validate_data_input(
data=None, vectors=None, ncols=2, required_data=True, kind=None
):
"""
Check if the combination of data/x/y/z is valid.
Check if the data input is valid.
Examples
--------
>>> _validate_data_input(data="infile")
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6])
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], z=[7, 8, 9])
>>> _validate_data_input(data=None, required_data=False)
>>> _validate_data_input()
>>> validate_data_input(data="infile")
>>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6]], ncols=2)
>>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ncols=3)
>>> validate_data_input(data=None, required_data=False)
>>> validate_data_input()
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: No input data provided.
>>> _validate_data_input(x=[1, 2, 3])
>>> validate_data_input(vectors=[[1, 2, 3], None], ncols=2)
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
>>> _validate_data_input(y=[4, 5, 6])
pygmt.exceptions.GMTInvalidInput: The 'y' column can't be None.
>>> validate_data_input(vectors=[None, [4, 5, 6]], ncols=2)
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], required_z=True)
pygmt.exceptions.GMTInvalidInput: The 'x' column can't be None.
>>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6], None], ncols=3)
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Must provide x, y, and z.
pygmt.exceptions.GMTInvalidInput: The 'z' column can't be None.
>>> import numpy as np
>>> import pandas as pd
>>> import xarray as xr
>>> data = np.arange(8).reshape((4, 2))
>>> _validate_data_input(data=data, required_z=True, kind="matrix")
>>> validate_data_input(data=data, ncols=3, kind="matrix")
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
>>> _validate_data_input(
pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
>>> validate_data_input(
... data=pd.DataFrame(data, columns=["x", "y"]),
... required_z=True,
... ncols=3,
... kind="matrix",
... )
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
>>> _validate_data_input(
pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
>>> validate_data_input(
... data=xr.Dataset(pd.DataFrame(data, columns=["x", "y"])),
... required_z=True,
... ncols=3,
... kind="matrix",
... )
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
>>> _validate_data_input(data="infile", x=[1, 2, 3])
pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
>>> validate_data_input(data="infile", vectors=[[1, 2, 3], None])
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
>>> _validate_data_input(data="infile", y=[4, 5, 6])
pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505
>>> validate_data_input(data="infile", vectors=[None, [4, 5, 6]])
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
>>> _validate_data_input(data="infile", z=[7, 8, 9])
pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505
>>> validate_data_input(data="infile", vectors=[None, None, [7, 8, 9]])
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505
Raises
------
GMTInvalidInput
If the data input is not valid.
"""
if data is None: # data is None
if x is None and y is None: # both x and y are None
if required_data: # data is not optional
raise GMTInvalidInput("No input data provided.")
elif x is None or y is None: # either x or y is None
raise GMTInvalidInput("Must provide both x and y.")
if required_z and z is None: # both x and y are not None, now check z
raise GMTInvalidInput("Must provide x, y, and z.")
else: # data is not None
if x is not None or y is not None or z is not None:
raise GMTInvalidInput("Too much data. Use either data or x/y/z.")
# For 'matrix' kind, check if data has the required z column
if kind == "matrix" and required_z:
if kind is None:
kind = data_kind(data=data, required=required_data)

if kind == "vectors": # From data_kind, we know that data is None
if vectors is None:
raise GMTInvalidInput("No input data provided.")
if len(vectors) < ncols:
raise GMTInvalidInput(
f"Requires {ncols} 1-D arrays but got {len(vectors)}."
)
for i, v in enumerate(vectors[:ncols]):
if v is None:
if i < 3:
msg = f"The '{'xyz'[i]}' column can't be None."
else:
msg = "Column {i} can't be None."
raise GMTInvalidInput(msg)
else:
if vectors is not None and any(v is not None for v in vectors):
raise GMTInvalidInput("Too much data. Pass in either 'data' or 1-D arrays.")
if kind == "matrix": # check number of columns for matrix-like data
if hasattr(data, "shape"): # np.ndarray or pd.DataFrame
if len(data.shape) == 1 and data.shape[0] < 3:
raise GMTInvalidInput("data must provide x, y, and z columns.")
if len(data.shape) > 1 and data.shape[1] < 3:
raise GMTInvalidInput("data must provide x, y, and z columns.")
if hasattr(data, "data_vars") and len(data.data_vars) < 3: # xr.Dataset
raise GMTInvalidInput("data must provide x, y, and z columns.")
if len(data.shape) == 1 and data.shape[0] < ncols:
raise GMTInvalidInput(f"data must have at least {ncols} columns.")
if len(data.shape) > 1 and data.shape[1] < ncols:
raise GMTInvalidInput(f"data must have at least {ncols} columns.")
if hasattr(data, "data_vars") and len(data.data_vars) < ncols: # xr.Dataset
raise GMTInvalidInput(f"data must have at least {ncols} columns.")


def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data=True):
def data_kind(data=None, required=True):
"""
Check what kind of data is provided to a module.
Determine the kind of data that will be passed to a module.
Possible types:
It checks the type of the ``data`` argument and determines the kind of
data. Falls back to ``"vectors"`` if ``data`` is None but required.
* a file name provided as 'data'
* a pathlib.PurePath object provided as 'data'
* an xarray.DataArray object provided as 'data'
* a 2-D matrix provided as 'data'
* 1-D arrays x and y (and z, optionally)
* an optional argument (None, bool, int or float) provided as 'data'
Possible data kinds:
Arguments should be ``None`` if not used. If doesn't fit any of these
categories (or fits more than one), will raise an exception.
- ``'file'``: a file name or a pathlib.PurePath object providfed as 'data'
- ``'arg'``: an optional argument (None, bool, int or float) provided
as 'data'
- ``'grid'``: an xarray.DataArray with 2 dimensions provided as 'data'
- ``'image'``: an xarray.DataArray with 3 dimensions provided as 'data'
- ``'geojson'``: a geo-like Python object that implements
``__geo_interface__`` (geopandas.GeoDataFrame or shapely.geometry)
provided as 'data'
- ``'matrix'``: a 2-D array provided as 'data'
- ``'vectors'``: a list of 1-D arrays provided as 'vectors'
Parameters
----------
data : str, pathlib.PurePath, None, bool, xarray.DataArray or {table-like}
Pass in either a file name or :class:`pathlib.Path` to an ASCII data
table, an :class:`xarray.DataArray`, a 1-D/2-D
{table-classes} or an option argument.
x/y : 1-D arrays or None
x and y columns as numpy arrays.
z : 1-D array or None
z column as numpy array. To be used optionally when x and y are given.
required_z : bool
State whether the 'z' column is required.
required_data : bool
required : bool
Set to True when 'data' is required, or False when dealing with
optional virtual files. [Default is True].
Expand All @@ -151,49 +157,39 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data
>>> import numpy as np
>>> import xarray as xr
>>> import pathlib
>>> data_kind(data=None, x=np.array([1, 2, 3]), y=np.array([4, 5, 6]))
>>> data_kind(data=None)
'vectors'
>>> data_kind(data=np.arange(10).reshape((5, 2)), x=None, y=None)
>>> data_kind(data=np.arange(10).reshape((5, 2)))
'matrix'
>>> data_kind(data="my-data-file.txt", x=None, y=None)
>>> data_kind(data="my-data-file.txt")
'file'
>>> data_kind(data=pathlib.Path("my-data-file.txt"), x=None, y=None)
>>> data_kind(data=pathlib.Path("my-data-file.txt"))
'file'
>>> data_kind(data=None, x=None, y=None, required_data=False)
>>> data_kind(data=None, required=False)
'arg'
>>> data_kind(data=2.0, x=None, y=None, required_data=False)
>>> data_kind(data=2.0, required=False)
'arg'
>>> data_kind(data=True, x=None, y=None, required_data=False)
>>> data_kind(data=True, required=False)
'arg'
>>> data_kind(data=xr.DataArray(np.random.rand(4, 3)))
'grid'
>>> data_kind(data=xr.DataArray(np.random.rand(3, 4, 5)))
'image'
"""
# determine the data kind
if isinstance(data, (str, pathlib.PurePath)):
kind = "file"
elif isinstance(data, (bool, int, float)) or (data is None and not required_data):
elif isinstance(data, (bool, int, float)) or (data is None and not required):
kind = "arg"
elif isinstance(data, xr.DataArray):
kind = "image" if len(data.dims) == 3 else "grid"
elif hasattr(data, "__geo_interface__"):
# geo-like Python object that implements ``__geo_interface__``
# (geopandas.GeoDataFrame or shapely.geometry)
kind = "geojson"
elif data is not None:
elif data is not None: # anything but None is taken as a matrix
kind = "matrix"
else:
else: # fallback to vectors if data is None but required
kind = "vectors"
_validate_data_input(
data=data,
x=x,
y=y,
z=z,
required_z=required_z,
required_data=required_data,
kind=kind,
)
return kind


Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/contour.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def contour(self, data=None, x=None, y=None, z=None, **kwargs):

with Session() as lib:
file_context = lib.virtualfile_from_data(
check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
check_kind="vector", data=data, vectors=[x, y, z], ncols=3
)
with file_context as fname:
lib.call_module(
Expand Down

0 comments on commit 70fc9e4

Please sign in to comment.