Skip to content

Commit

Permalink
Refactor the data_kind and the virtualfile_to_data functions
Browse files Browse the repository at this point in the history
  • Loading branch information
seisman committed Oct 14, 2023
1 parent b7b11c5 commit 5512d2f
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 134 deletions.
27 changes: 9 additions & 18 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1474,11 +1474,8 @@ def virtualfile_from_data(
self,
check_kind=None,
data=None,
x=None,
y=None,
z=None,
extra_arrays=None,
required_z=False,
vectors=None,
ncols=1,
required_data=True,
):
"""
Expand All @@ -1497,13 +1494,11 @@ def virtualfile_from_data(
Any raster or vector data format. This could be a file name or
path, a raster grid, a vector matrix/arrays, or other supported
data input.
x/y/z : 1-D arrays or None
x, y, and z columns as numpy arrays.
extra_arrays : list of 1-D arrays
Optional. A list of numpy arrays in addition to x, y, and z.
All of these arrays must be of the same size as the x/y/z arrays.
required_z : bool
State whether the 'z' column is required.
vectors : list of 1-D arrays or None
A list of 1-D arrays. Each array will be a column in the table.
All of these arrays must be of the same size.
ncols : int
The minimum number of columns required for the data.
required_data : bool
Set to True when 'data' is required, or False when dealing with
optional virtual files. [Default is True].
Expand Down Expand Up @@ -1538,7 +1533,7 @@ def virtualfile_from_data(
<vector memory>: N = 3 <7/9> <4/6> <1/3>
"""
kind = data_kind(
data, x, y, z, required_z=required_z, required_data=required_data
data, vectors=vectors, ncols=ncols, required_data=required_data
)

if check_kind:
Expand Down Expand Up @@ -1579,11 +1574,7 @@ def virtualfile_from_data(
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
_data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),)
elif kind == "vectors":
_data = [np.atleast_1d(x), np.atleast_1d(y)]
if z is not None:
_data.append(np.atleast_1d(z))
if extra_arrays:
_data.extend(extra_arrays)
_data = [np.atleast_1d(v) for v in vectors]
elif kind == "matrix": # turn 2-D arrays into list of vectors
try:
# pandas.Series will be handled below like a 1-D numpy.ndarray
Expand Down
156 changes: 41 additions & 115 deletions pygmt/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,99 +15,7 @@
from pygmt.exceptions import GMTInvalidInput


def _validate_data_input(
data=None, x=None, y=None, z=None, required_z=False, required_data=True, kind=None
):
"""
Check if the combination of data/x/y/z is valid.
Examples
--------
>>> _validate_data_input(data="infile")
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6])
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], z=[7, 8, 9])
>>> _validate_data_input(data=None, required_data=False)
>>> _validate_data_input()
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: No input data provided.
>>> _validate_data_input(x=[1, 2, 3])
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
>>> _validate_data_input(y=[4, 5, 6])
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
>>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], required_z=True)
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Must provide x, y, and z.
>>> import numpy as np
>>> import pandas as pd
>>> import xarray as xr
>>> data = np.arange(8).reshape((4, 2))
>>> _validate_data_input(data=data, required_z=True, kind="matrix")
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
>>> _validate_data_input(
... data=pd.DataFrame(data, columns=["x", "y"]),
... required_z=True,
... kind="matrix",
... )
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
>>> _validate_data_input(
... data=xr.Dataset(pd.DataFrame(data, columns=["x", "y"])),
... required_z=True,
... kind="matrix",
... )
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
>>> _validate_data_input(data="infile", x=[1, 2, 3])
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
>>> _validate_data_input(data="infile", y=[4, 5, 6])
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
>>> _validate_data_input(data="infile", z=[7, 8, 9])
Traceback (most recent call last):
...
pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
Raises
------
GMTInvalidInput
If the data input is not valid.
"""
if data is None: # data is None
if x is None and y is None: # both x and y are None
if required_data: # data is not optional
raise GMTInvalidInput("No input data provided.")
elif x is None or y is None: # either x or y is None
raise GMTInvalidInput("Must provide both x and y.")
if required_z and z is None: # both x and y are not None, now check z
raise GMTInvalidInput("Must provide x, y, and z.")
else: # data is not None
if x is not None or y is not None or z is not None:
raise GMTInvalidInput("Too much data. Use either data or x/y/z.")
# For 'matrix' kind, check if data has the required z column
if kind == "matrix" and required_z:
if hasattr(data, "shape"): # np.ndarray or pd.DataFrame
if len(data.shape) == 1 and data.shape[0] < 3:
raise GMTInvalidInput("data must provide x, y, and z columns.")
if len(data.shape) > 1 and data.shape[1] < 3:
raise GMTInvalidInput("data must provide x, y, and z columns.")
if hasattr(data, "data_vars") and len(data.data_vars) < 3: # xr.Dataset
raise GMTInvalidInput("data must provide x, y, and z columns.")


def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data=True):
def data_kind(data=None, vectors=None, ncols=1, required_data=True):
"""
Check what kind of data is provided to a module.
Expand All @@ -129,12 +37,10 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data
Pass in either a file name or :class:`pathlib.Path` to an ASCII data
table, an :class:`xarray.DataArray`, a 1-D/2-D
{table-classes} or an option argument.
x/y : 1-D arrays or None
x and y columns as numpy arrays.
z : 1-D array or None
z column as numpy array. To be used optionally when x and y are given.
required_z : bool
State whether the 'z' column is required.
vectors : list of 1-D arrays or None
1-D arrays.
ncols : int
The minimum number of columns required for the data.
required_data : bool
Set to True when 'data' is required, or False when dealing with
optional virtual files. [Default is True].
Expand All @@ -151,25 +57,38 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data
>>> import numpy as np
>>> import xarray as xr
>>> import pathlib
>>> data_kind(data=None, x=np.array([1, 2, 3]), y=np.array([4, 5, 6]))
>>> data_kind(
... data=None, vectors=[np.array([1, 2, 3]), np.array([4, 5, 6])]
... )
'vectors'
>>> data_kind(data=np.arange(10).reshape((5, 2)), x=None, y=None)
>>> data_kind(data=np.arange(10).reshape((5, 2)), vectors=[None, None])
'matrix'
>>> data_kind(data="my-data-file.txt", x=None, y=None)
>>> data_kind(data="my-data-file.txt", vectors=[None, None])
'file'
>>> data_kind(data=pathlib.Path("my-data-file.txt"), x=None, y=None)
>>> data_kind(data=pathlib.Path("my-data-file.txt"), vectors=[None, None])
'file'
>>> data_kind(data=None, x=None, y=None, required_data=False)
>>> data_kind(data=None, vectors=[None, None], required_data=False)
'arg'
>>> data_kind(data=2.0, x=None, y=None, required_data=False)
>>> data_kind(data=2.0, vectors=[None, None], required_data=False)
'arg'
>>> data_kind(data=True, x=None, y=None, required_data=False)
>>> data_kind(data=True, vectors=[None, None], required_data=False)
'arg'
>>> data_kind(data=xr.DataArray(np.random.rand(4, 3)))
'grid'
>>> data_kind(data=xr.DataArray(np.random.rand(3, 4, 5)))
'image'
"""
# Check the combination of data and vectors
if data is None and (vectors is None or all(v is None for v in vectors[:ncols])):
if required_data:
raise GMTInvalidInput("No input data provided.")
if (
data is not None
and vectors is not None
and any(v is not None for v in vectors[:ncols])
):
raise GMTInvalidInput("Too much data. Pass in either 'data' or 1-D arrays.")

# determine the data kind
if isinstance(data, (str, pathlib.PurePath)):
kind = "file"
Expand All @@ -185,15 +104,22 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data
kind = "matrix"
else:
kind = "vectors"
_validate_data_input(
data=data,
x=x,
y=y,
z=z,
required_z=required_z,
required_data=required_data,
kind=kind,
)

# Check if the data/vector input is valid
if kind == "vectors":
if len(vectors) < ncols:
raise GMTInvalidInput(f"Must provide at least {ncols} 1-D arrays.")
if any(v is None for v in vectors[:ncols]):
raise GMTInvalidInput("Must provide both x and y.")
elif kind == "matrix":
if hasattr(data, "shape"): # np.ndarray or pd.DataFrame
if len(data.shape) == 1 and data.shape[0] < ncols:
raise GMTInvalidInput(f"data must have at least {ncols} columns.")
if len(data.shape) > 1 and data.shape[1] < ncols:
raise GMTInvalidInput(f"data must have at least {ncols} columns.")
if hasattr(data, "data_vars") and len(data.data_vars) < ncols: # xr.Dataset
raise GMTInvalidInput(f"data must have at least {ncols} columns.")

return kind


Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/contour.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def contour(self, data=None, x=None, y=None, z=None, **kwargs):

with Session() as lib:
file_context = lib.virtualfile_from_data(
check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
check_kind="vector", data=data, vectors=[x, y, z], ncols=3
)
with file_context as fname:
lib.call_module(
Expand Down

0 comments on commit 5512d2f

Please sign in to comment.