GenericMappingTools · seisman · Oct 13, 2023 · Oct 14, 2023 · Oct 15, 2023 · Oct 15, 2023
diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py
@@ -32,6 +32,7 @@
     fmt_docstring,
     tempfile_from_geojson,
     tempfile_from_image,
+    validate_data_input,
 )
 
 FAMILIES = [
@@ -1474,11 +1475,8 @@ def virtualfile_from_data(
         self,
         check_kind=None,
         data=None,
-        x=None,
-        y=None,
-        z=None,
-        extra_arrays=None,
-        required_z=False,
+        vectors=None,
+        ncols=2,
         required_data=True,
     ):
         """
@@ -1497,13 +1495,11 @@ def virtualfile_from_data(
             Any raster or vector data format. This could be a file name or
             path, a raster grid, a vector matrix/arrays, or other supported
             data input.
-        x/y/z : 1-D arrays or None
-            x, y, and z columns as numpy arrays.
-        extra_arrays : list of 1-D arrays
-            Optional. A list of numpy arrays in addition to x, y, and z.
-            All of these arrays must be of the same size as the x/y/z arrays.
-        required_z : bool
-            State whether the 'z' column is required.
+        vectors : list of 1-D arrays or None
+            A list of 1-D arrays. Each array will be a column in the table.
+            All of these arrays must be of the same size.
+        ncols : int
+            The minimum number of columns required for the data.
         required_data : bool
             Set to True when 'data' is required, or False when dealing with
             optional virtual files. [Default is True].
@@ -1537,8 +1533,13 @@ def virtualfile_from_data(
         ...
         <vector memory>: N = 3 <7/9> <4/6> <1/3>
         """
-        kind = data_kind(
-            data, x, y, z, required_z=required_z, required_data=required_data
+        kind = data_kind(data, required=required_data)
+        validate_data_input(
+            data=data,
+            vectors=vectors,
+            ncols=ncols,
+            required_data=required_data,
+            kind=kind,
         )
 
         if check_kind:
@@ -1579,11 +1580,7 @@ def virtualfile_from_data(
                 warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
             _data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),)
         elif kind == "vectors":
-            _data = [np.atleast_1d(x), np.atleast_1d(y)]
-            if z is not None:
-                _data.append(np.atleast_1d(z))
-            if extra_arrays:
-                _data.extend(extra_arrays)
+            _data = [np.atleast_1d(v) for v in vectors]
         elif kind == "matrix":  # turn 2-D arrays into list of vectors
             try:
                 # pandas.Series will be handled below like a 1-D numpy.ndarray

diff --git a/pygmt/helpers/__init__.py b/pygmt/helpers/__init__.py
@@ -20,4 +20,5 @@
     is_nonstr_iter,
     launch_external_viewer,
     non_ascii_to_octal,
+    validate_data_input,
 )
diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py
@@ -15,127 +15,133 @@
 from pygmt.exceptions import GMTInvalidInput
 
 
-def _validate_data_input(
-    data=None, x=None, y=None, z=None, required_z=False, required_data=True, kind=None
+def validate_data_input(
+    data=None, vectors=None, ncols=2, required_data=True, kind=None
 ):
     """
-    Check if the combination of data/x/y/z is valid.
+    Check if the data input is valid.
 
     Examples
     --------
-    >>> _validate_data_input(data="infile")
-    >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6])
-    >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], z=[7, 8, 9])
-    >>> _validate_data_input(data=None, required_data=False)
-    >>> _validate_data_input()
+    >>> validate_data_input(data="infile")
+    >>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6]], ncols=2)
+    >>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ncols=3)
+    >>> validate_data_input(data=None, required_data=False)
+    >>> validate_data_input()
     Traceback (most recent call last):
         ...
     pygmt.exceptions.GMTInvalidInput: No input data provided.
-    >>> _validate_data_input(x=[1, 2, 3])
+    >>> validate_data_input(vectors=[[1, 2, 3], None], ncols=2)
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
-    >>> _validate_data_input(y=[4, 5, 6])
+    pygmt.exceptions.GMTInvalidInput: The 'y' column can't be None.
+    >>> validate_data_input(vectors=[None, [4, 5, 6]], ncols=2)
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
-    >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], required_z=True)
+    pygmt.exceptions.GMTInvalidInput: The 'x' column can't be None.
+    >>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6], None], ncols=3)
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: Must provide x, y, and z.
+    pygmt.exceptions.GMTInvalidInput: The 'z' column can't be None.
     >>> import numpy as np
     >>> import pandas as pd
     >>> import xarray as xr
     >>> data = np.arange(8).reshape((4, 2))
-    >>> _validate_data_input(data=data, required_z=True, kind="matrix")
+    >>> validate_data_input(data=data, ncols=3, kind="matrix")
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
-    >>> _validate_data_input(
+    pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
+    >>> validate_data_input(
     ...     data=pd.DataFrame(data, columns=["x", "y"]),
-    ...     required_z=True,
+    ...     ncols=3,
     ...     kind="matrix",
     ... )
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
-    >>> _validate_data_input(
+    pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
+    >>> validate_data_input(
     ...     data=xr.Dataset(pd.DataFrame(data, columns=["x", "y"])),
-    ...     required_z=True,
+    ...     ncols=3,
     ...     kind="matrix",
     ... )
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns.
-    >>> _validate_data_input(data="infile", x=[1, 2, 3])
+    pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns.
+    >>> validate_data_input(data="infile", vectors=[[1, 2, 3], None])
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
-    >>> _validate_data_input(data="infile", y=[4, 5, 6])
+    pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays.  # noqa: W505
+    >>> validate_data_input(data="infile", vectors=[None, [4, 5, 6]])
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
-    >>> _validate_data_input(data="infile", z=[7, 8, 9])
+    pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505
+    >>> validate_data_input(data="infile", vectors=[None, None, [7, 8, 9]])
     Traceback (most recent call last):
         ...
-    pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z.
+    pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays.  # noqa: W505
 
     Raises
     ------
     GMTInvalidInput
         If the data input is not valid.
     """
-    if data is None:  # data is None
-        if x is None and y is None:  # both x and y are None
-            if required_data:  # data is not optional
-                raise GMTInvalidInput("No input data provided.")
-        elif x is None or y is None:  # either x or y is None
-            raise GMTInvalidInput("Must provide both x and y.")
-        if required_z and z is None:  # both x and y are not None, now check z
-            raise GMTInvalidInput("Must provide x, y, and z.")
-    else:  # data is not None
-        if x is not None or y is not None or z is not None:
-            raise GMTInvalidInput("Too much data. Use either data or x/y/z.")
-        # For 'matrix' kind, check if data has the required z column
-        if kind == "matrix" and required_z:
+    if kind is None:
+        kind = data_kind(data=data, required=required_data)
+
+    if kind == "vectors":  # From data_kind, we know that data is None
+        if vectors is None:
+            raise GMTInvalidInput("No input data provided.")
+        if len(vectors) < ncols:
+            raise GMTInvalidInput(
+                f"Requires {ncols} 1-D arrays but got {len(vectors)}."
+            )
+        for i, v in enumerate(vectors[:ncols]):
+            if v is None:
+                if i < 3:
+                    msg = f"The '{'xyz'[i]}' column can't be None."
+                else:
+                    msg = "Column {i} can't be None."
+                raise GMTInvalidInput(msg)
+    else:
+        if vectors is not None and any(v is not None for v in vectors):
+            raise GMTInvalidInput("Too much data. Pass in either 'data' or 1-D arrays.")
+        if kind == "matrix":  # check number of columns for matrix-like data
             if hasattr(data, "shape"):  # np.ndarray or pd.DataFrame
-                if len(data.shape) == 1 and data.shape[0] < 3:
-                    raise GMTInvalidInput("data must provide x, y, and z columns.")
-                if len(data.shape) > 1 and data.shape[1] < 3:
-                    raise GMTInvalidInput("data must provide x, y, and z columns.")
-            if hasattr(data, "data_vars") and len(data.data_vars) < 3:  # xr.Dataset
-                raise GMTInvalidInput("data must provide x, y, and z columns.")
+                if len(data.shape) == 1 and data.shape[0] < ncols:
+                    raise GMTInvalidInput(f"data must have at least {ncols} columns.")
+                if len(data.shape) > 1 and data.shape[1] < ncols:
+                    raise GMTInvalidInput(f"data must have at least {ncols} columns.")
+            if hasattr(data, "data_vars") and len(data.data_vars) < ncols:  # xr.Dataset
+                raise GMTInvalidInput(f"data must have at least {ncols} columns.")
 
 
-def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data=True):
+def data_kind(data=None, required=True):
     """
-    Check what kind of data is provided to a module.
+    Determine the kind of data that will be passed to a module.
 
-    Possible types:
+    It checks the type of the ``data`` argument and determines the kind of
+    data. Falls back to ``"vectors"`` if ``data`` is None but required.
 
-    * a file name provided as 'data'
-    * a pathlib.PurePath object provided as 'data'
-    * an xarray.DataArray object provided as 'data'
-    * a 2-D matrix provided as 'data'
-    * 1-D arrays x and y (and z, optionally)
-    * an optional argument (None, bool, int or float) provided as 'data'
+    Possible data kinds:
 
-    Arguments should be ``None`` if not used. If doesn't fit any of these
-    categories (or fits more than one), will raise an exception.
+    - ``'file'``: a file name or a pathlib.PurePath object providfed as 'data'
+    - ``'arg'``: an optional argument (None, bool, int or float) provided
+      as 'data'
+    - ``'grid'``: an xarray.DataArray with 2 dimensions provided as 'data'
+    - ``'image'``: an xarray.DataArray with 3 dimensions provided as 'data'
+    - ``'geojson'``: a geo-like Python object that implements
+      ``__geo_interface__`` (geopandas.GeoDataFrame or shapely.geometry)
+      provided as 'data'
+    - ``'matrix'``: a 2-D array provided as 'data'
+    - ``'vectors'``: a list of 1-D arrays provided as 'vectors'
 
     Parameters
     ----------
     data : str, pathlib.PurePath, None, bool, xarray.DataArray or {table-like}
         Pass in either a file name or :class:`pathlib.Path` to an ASCII data
         table, an :class:`xarray.DataArray`, a 1-D/2-D
         {table-classes} or an option argument.
-    x/y : 1-D arrays or None
-        x and y columns as numpy arrays.
-    z : 1-D array or None
-        z column as numpy array. To be used optionally when x and y are given.
-    required_z : bool
-        State whether the 'z' column is required.
-    required_data : bool
+    required : bool
         Set to True when 'data' is required, or False when dealing with
         optional virtual files. [Default is True].
 
@@ -151,49 +157,39 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data
     >>> import numpy as np
     >>> import xarray as xr
     >>> import pathlib
-    >>> data_kind(data=None, x=np.array([1, 2, 3]), y=np.array([4, 5, 6]))
+    >>> data_kind(data=None)
     'vectors'
-    >>> data_kind(data=np.arange(10).reshape((5, 2)), x=None, y=None)
+    >>> data_kind(data=np.arange(10).reshape((5, 2)))
     'matrix'
-    >>> data_kind(data="my-data-file.txt", x=None, y=None)
+    >>> data_kind(data="my-data-file.txt")
     'file'
-    >>> data_kind(data=pathlib.Path("my-data-file.txt"), x=None, y=None)
+    >>> data_kind(data=pathlib.Path("my-data-file.txt"))
     'file'
-    >>> data_kind(data=None, x=None, y=None, required_data=False)
+    >>> data_kind(data=None, required=False)
     'arg'
-    >>> data_kind(data=2.0, x=None, y=None, required_data=False)
+    >>> data_kind(data=2.0, required=False)
     'arg'
-    >>> data_kind(data=True, x=None, y=None, required_data=False)
+    >>> data_kind(data=True, required=False)
     'arg'
     >>> data_kind(data=xr.DataArray(np.random.rand(4, 3)))
     'grid'
     >>> data_kind(data=xr.DataArray(np.random.rand(3, 4, 5)))
     'image'
     """
-    # determine the data kind
     if isinstance(data, (str, pathlib.PurePath)):
         kind = "file"
-    elif isinstance(data, (bool, int, float)) or (data is None and not required_data):
+    elif isinstance(data, (bool, int, float)) or (data is None and not required):
         kind = "arg"
     elif isinstance(data, xr.DataArray):
         kind = "image" if len(data.dims) == 3 else "grid"
     elif hasattr(data, "__geo_interface__"):
         # geo-like Python object that implements ``__geo_interface__``
         # (geopandas.GeoDataFrame or shapely.geometry)
         kind = "geojson"
-    elif data is not None:
+    elif data is not None:  # anything but None is taken as a matrix
         kind = "matrix"
-    else:
+    else:  # fallback to vectors if data is None but required
         kind = "vectors"
-    _validate_data_input(
-        data=data,
-        x=x,
-        y=y,
-        z=z,
-        required_z=required_z,
-        required_data=required_data,
-        kind=kind,
-    )
     return kind
 
 

diff --git a/pygmt/src/blockm.py b/pygmt/src/blockm.py
@@ -44,7 +44,7 @@ def _blockm(block_method, data, x, y, z, outfile, **kwargs):
     with GMTTempFile(suffix=".csv") as tmpfile:
         with Session() as lib:
             table_context = lib.virtualfile_from_data(
-                check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
+                check_kind="vector", data=data, vectors=[x, y, z], ncols=3
             )
             # Run blockm* on data table
             with table_context as infile:

diff --git a/pygmt/src/contour.py b/pygmt/src/contour.py
@@ -116,7 +116,7 @@ def contour(self, data=None, x=None, y=None, z=None, **kwargs):
 
     with Session() as lib:
         file_context = lib.virtualfile_from_data(
-            check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
+            check_kind="vector", data=data, vectors=[x, y, z], ncols=3
         )
         with file_context as fname:
             lib.call_module(

diff --git a/pygmt/src/nearneighbor.py b/pygmt/src/nearneighbor.py
@@ -150,7 +150,7 @@ def nearneighbor(data=None, x=None, y=None, z=None, **kwargs):
     with GMTTempFile(suffix=".nc") as tmpfile:
         with Session() as lib:
             table_context = lib.virtualfile_from_data(
-                check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
+                check_kind="vector", data=data, vectors=[x, y, z], ncols=3
             )
             with table_context as infile:
                 if (outgrid := kwargs.get("G")) is None: