From 49b1b3ff75d5b43de4c85b80b226901cacfc8b9d Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 13 Oct 2023 22:08:43 +0800 Subject: [PATCH] Refactor the data_kind and the virtualfile_to_data functions --- pygmt/clib/session.py | 27 +++---- pygmt/helpers/utils.py | 156 +++++++++++------------------------------ pygmt/src/contour.py | 2 +- 3 files changed, 51 insertions(+), 134 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 8db686812c1..de50f593662 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1474,11 +1474,8 @@ def virtualfile_from_data( self, check_kind=None, data=None, - x=None, - y=None, - z=None, - extra_arrays=None, - required_z=False, + vectors=None, + ncols=1, required_data=True, ): """ @@ -1497,13 +1494,11 @@ def virtualfile_from_data( Any raster or vector data format. This could be a file name or path, a raster grid, a vector matrix/arrays, or other supported data input. - x/y/z : 1-D arrays or None - x, y, and z columns as numpy arrays. - extra_arrays : list of 1-D arrays - Optional. A list of numpy arrays in addition to x, y, and z. - All of these arrays must be of the same size as the x/y/z arrays. - required_z : bool - State whether the 'z' column is required. + vectors : list of 1-D arrays + A list of 1-D arrays. Each array will be a column in the table. + All of these arrays must be of the same size. + ncols : int + The minimum number of columns required for the data. required_data : bool Set to True when 'data' is required, or False when dealing with optional virtual files. [Default is True]. @@ -1538,7 +1533,7 @@ def virtualfile_from_data( : N = 3 <7/9> <4/6> <1/3> """ kind = data_kind( - data, x, y, z, required_z=required_z, required_data=required_data + data, vectors=vectors, ncols=ncols, required_data=required_data ) if check_kind: @@ -1579,11 +1574,7 @@ def virtualfile_from_data( warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) _data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),) elif kind == "vectors": - _data = [np.atleast_1d(x), np.atleast_1d(y)] - if z is not None: - _data.append(np.atleast_1d(z)) - if extra_arrays: - _data.extend(extra_arrays) + _data = [np.atleast_1d(v) for v in vectors] elif kind == "matrix": # turn 2-D arrays into list of vectors try: # pandas.Series will be handled below like a 1-D numpy.ndarray diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 31629a6ea52..241ce7582ef 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -15,99 +15,7 @@ from pygmt.exceptions import GMTInvalidInput -def _validate_data_input( - data=None, x=None, y=None, z=None, required_z=False, required_data=True, kind=None -): - """ - Check if the combination of data/x/y/z is valid. - - Examples - -------- - >>> _validate_data_input(data="infile") - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6]) - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], z=[7, 8, 9]) - >>> _validate_data_input(data=None, required_data=False) - >>> _validate_data_input() - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: No input data provided. - >>> _validate_data_input(x=[1, 2, 3]) - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: Must provide both x and y. - >>> _validate_data_input(y=[4, 5, 6]) - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: Must provide both x and y. - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], required_z=True) - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: Must provide x, y, and z. - >>> import numpy as np - >>> import pandas as pd - >>> import xarray as xr - >>> data = np.arange(8).reshape((4, 2)) - >>> _validate_data_input(data=data, required_z=True, kind="matrix") - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. - >>> _validate_data_input( - ... data=pd.DataFrame(data, columns=["x", "y"]), - ... required_z=True, - ... kind="matrix", - ... ) - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. - >>> _validate_data_input( - ... data=xr.Dataset(pd.DataFrame(data, columns=["x", "y"])), - ... required_z=True, - ... kind="matrix", - ... ) - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. - >>> _validate_data_input(data="infile", x=[1, 2, 3]) - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. - >>> _validate_data_input(data="infile", y=[4, 5, 6]) - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. - >>> _validate_data_input(data="infile", z=[7, 8, 9]) - Traceback (most recent call last): - ... - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. - - Raises - ------ - GMTInvalidInput - If the data input is not valid. - """ - if data is None: # data is None - if x is None and y is None: # both x and y are None - if required_data: # data is not optional - raise GMTInvalidInput("No input data provided.") - elif x is None or y is None: # either x or y is None - raise GMTInvalidInput("Must provide both x and y.") - if required_z and z is None: # both x and y are not None, now check z - raise GMTInvalidInput("Must provide x, y, and z.") - else: # data is not None - if x is not None or y is not None or z is not None: - raise GMTInvalidInput("Too much data. Use either data or x/y/z.") - # For 'matrix' kind, check if data has the required z column - if kind == "matrix" and required_z: - if hasattr(data, "shape"): # np.ndarray or pd.DataFrame - if len(data.shape) == 1 and data.shape[0] < 3: - raise GMTInvalidInput("data must provide x, y, and z columns.") - if len(data.shape) > 1 and data.shape[1] < 3: - raise GMTInvalidInput("data must provide x, y, and z columns.") - if hasattr(data, "data_vars") and len(data.data_vars) < 3: # xr.Dataset - raise GMTInvalidInput("data must provide x, y, and z columns.") - - -def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data=True): +def data_kind(data=None, vectors=None, ncols=1, required_data=True): """ Check what kind of data is provided to a module. @@ -129,12 +37,10 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data Pass in either a file name or :class:`pathlib.Path` to an ASCII data table, an :class:`xarray.DataArray`, a 1-D/2-D {table-classes} or an option argument. - x/y : 1-D arrays or None - x and y columns as numpy arrays. - z : 1-D array or None - z column as numpy array. To be used optionally when x and y are given. - required_z : bool - State whether the 'z' column is required. + vectors : list of 1-D arrays or None + 1-D arrays. + ncols : int + The minimum number of columns required for the data. required_data : bool Set to True when 'data' is required, or False when dealing with optional virtual files. [Default is True]. @@ -151,25 +57,38 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data >>> import numpy as np >>> import xarray as xr >>> import pathlib - >>> data_kind(data=None, x=np.array([1, 2, 3]), y=np.array([4, 5, 6])) + >>> data_kind( + ... data=None, vectors=[np.array([1, 2, 3]), np.array([4, 5, 6])] + ... ) 'vectors' - >>> data_kind(data=np.arange(10).reshape((5, 2)), x=None, y=None) + >>> data_kind(data=np.arange(10).reshape((5, 2)), vectors=[None, None]) 'matrix' - >>> data_kind(data="my-data-file.txt", x=None, y=None) + >>> data_kind(data="my-data-file.txt", vectors=[None, None]) 'file' - >>> data_kind(data=pathlib.Path("my-data-file.txt"), x=None, y=None) + >>> data_kind(data=pathlib.Path("my-data-file.txt"), vectors=[None, None]) 'file' - >>> data_kind(data=None, x=None, y=None, required_data=False) + >>> data_kind(data=None, vectors=[None, None], required_data=False) 'arg' - >>> data_kind(data=2.0, x=None, y=None, required_data=False) + >>> data_kind(data=2.0, vectors=[None, None], required_data=False) 'arg' - >>> data_kind(data=True, x=None, y=None, required_data=False) + >>> data_kind(data=True, vectors=[None, None], required_data=False) 'arg' >>> data_kind(data=xr.DataArray(np.random.rand(4, 3))) 'grid' >>> data_kind(data=xr.DataArray(np.random.rand(3, 4, 5))) 'image' """ + # Check the combination of data and vectors + if data is None and (vectors is None or all(v is None for v in vectors[:ncols])): + if required_data: + raise GMTInvalidInput("No input data provided.") + if ( + data is not None + and vectors is not None + and any(v is not None for v in vectors[:ncols]) + ): + raise GMTInvalidInput("Too much data. Pass in either 'data' or 1-D arrays.") + # determine the data kind if isinstance(data, (str, pathlib.PurePath)): kind = "file" @@ -185,15 +104,22 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data kind = "matrix" else: kind = "vectors" - _validate_data_input( - data=data, - x=x, - y=y, - z=z, - required_z=required_z, - required_data=required_data, - kind=kind, - ) + + # Check if the data/vector input is valid + if kind == "vectors": + if len(vectors) < ncols: + raise GMTInvalidInput(f"Must provide at least {ncols} 1-D arrays.") + if any(v is None for v in vectors[:ncols]): + raise GMTInvalidInput("Must provide both x and y.") + elif kind == "matrix": + if hasattr(data, "shape"): # np.ndarray or pd.DataFrame + if len(data.shape) == 1 and data.shape[0] < ncols: + raise GMTInvalidInput(f"data must have at least {ncols} columns.") + if len(data.shape) > 1 and data.shape[1] < ncols: + raise GMTInvalidInput(f"data must have at least {ncols} columns.") + if hasattr(data, "data_vars") and len(data.data_vars) < ncols: # xr.Dataset + raise GMTInvalidInput(f"data must have at least {ncols} columns.") + return kind diff --git a/pygmt/src/contour.py b/pygmt/src/contour.py index 6aaf22b7cd6..ac34dcb5d95 100644 --- a/pygmt/src/contour.py +++ b/pygmt/src/contour.py @@ -116,7 +116,7 @@ def contour(self, data=None, x=None, y=None, z=None, **kwargs): with Session() as lib: file_context = lib.virtualfile_from_data( - check_kind="vector", data=data, x=x, y=y, z=z, required_z=True + check_kind="vector", data=data, vectors=[x, y, z], ncols=3 ) with file_context as fname: lib.call_module(