From 713ad0ab613a7a80d1ad89aa5713ac72572b21f3 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 15 Mar 2023 23:37:44 +0800 Subject: [PATCH 01/85] Add data types GMT_DATASEGMENT, GMT_DATATABLE and GMT_DATASET --- pygmt/datatypes.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 pygmt/datatypes.py diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py new file mode 100644 index 00000000000..6c83a35bbd0 --- /dev/null +++ b/pygmt/datatypes.py @@ -0,0 +1,82 @@ +import ctypes as ctp + + +class GMT_DATASEGMENT(ctp.Structure): + """ + For holding segment lines in memory. + """ + + _fields_ = [ + ("n_rows", ctp.c_uint64), # Number of points in this segment + ("n_columns", ctp.c_uint64), # Number of fields in each record (>= 2) + ("min", ctp.POINTER(ctp.c_double)), # Minimum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), # Maximum coordinate for each column + # Data x, y, and possibly other columns + ("data", ctp.POINTER(ctp.POINTER(ctp.c_double))), + ("label", ctp.c_char_p), # Label string (if applicable) + ("header", ctp.c_char_p), # Segment header (if applicable) + ("text", ctp.POINTER(ctp.c_char_p)), # text beyond the data + ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API + ] + + +class GMT_DATATABLE(ctp.Structure): + """ + To hold an array of line segment structures and header information in one + container. + """ + + _fields_ = [ + ("n_headers", ctp.c_uint), # Number of file header records (0 if no header) + ("n_columns", ctp.c_uint64), # Number of columns (fields) in each record + ("n_segments", ctp.c_uint64), # Number of segments in the array + ("n_records", ctp.c_uint64), # Total number of data records across all segments + ("min", ctp.POINTER(ctp.c_double)), # Minimum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), # Maximum coordinate for each column + ( + "header", + ctp.POINTER(ctp.c_char_p), + ), # Array with all file header records, if any + ( + "segment", + ctp.POINTER(ctp.POINTER(GMT_DATASEGMENT)), + ), # Pointer to array of segments + ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API + ] + + +class GMT_DATASET(ctp.Structure): + """ + Single container for an array of GMT tables (files) + """ + + _fields = [ + ("n_tables", ctp.c_uint64), # The total number of tables (files) contained + ("n_columns", ctp.c_uint64), # The number of data columns + ("n_segments", ctp.c_uint64), # The total number of segments across all tables + ( + "n_records", + ctp.c_uint64, + ), # The total number of data records across all tables + ("min", ctp.POINTER(ctp.c_double)), # Minimum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), # Maximum coordinate for each column + ( + "table", + ctp.POINTER(ctp.POINTER(GMT_DATATABLE)), + ), # Pointer to array of tables + ( + "type", + ctp.c_uint, + ), # The datatype (numerical, text, or mixed) of this dataset + ("geometry", ctp.c_unit), # The geometry of this dataset + ( + "ProjRefPROJ4", + ctp.c_char_p, + ), # To store a referencing system string in PROJ.4 format + ( + "ProjRefWKT", + ctp.c_char_p, + ), # To store a referencing system string in WKT format + ("ProjRefEPSG", ctp.c_int), # To store a referencing system EPSG code + ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API + ] From 9c580f94cc7dfd07734d49a2eea9ed25246e6212 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 16 Mar 2023 00:18:04 +0800 Subject: [PATCH 02/85] Updates --- pygmt/clib/session.py | 23 +++++++++++++++++++++++ pygmt/datatypes.py | 31 ++++++++++++------------------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 8db686812c1..91e2c9bb61a 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1680,3 +1680,26 @@ def extract_region(self): if status != 0: raise GMTCLibError("Failed to extract region from current figure.") return wesn + + def read_virtualfile(self, vfname): + """ + Read data from a virtual file. + + Parameters + ---------- + vfname : str + Name of the virtual file to read. + + Returns + ------- + Pointer to the data, which can be casted into GMT data types. + """ + c_read_virtualfile = self.get_libgmt_func( + "GMT_Read_VirtualFile", + argtypes=[ + ctp.c_void_p, + ctp.c_char_p, + ], + restype=ctp.c_void_p, + ) + return c_read_virtualfile(self.session_pointer, vfname.encode()) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 6c83a35bbd0..4f64e7e2324 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -50,33 +50,26 @@ class GMT_DATASET(ctp.Structure): Single container for an array of GMT tables (files) """ - _fields = [ + _fields_ = [ ("n_tables", ctp.c_uint64), # The total number of tables (files) contained ("n_columns", ctp.c_uint64), # The number of data columns ("n_segments", ctp.c_uint64), # The total number of segments across all tables - ( - "n_records", - ctp.c_uint64, - ), # The total number of data records across all tables + # The total number of data records across all tables + ("n_records", ctp.c_uint64), ("min", ctp.POINTER(ctp.c_double)), # Minimum coordinate for each column ("max", ctp.POINTER(ctp.c_double)), # Maximum coordinate for each column - ( - "table", - ctp.POINTER(ctp.POINTER(GMT_DATATABLE)), - ), # Pointer to array of tables - ( - "type", - ctp.c_uint, - ), # The datatype (numerical, text, or mixed) of this dataset - ("geometry", ctp.c_unit), # The geometry of this dataset - ( - "ProjRefPROJ4", - ctp.c_char_p, - ), # To store a referencing system string in PROJ.4 format + # Pointer to array of tables + ("table", ctp.POINTER(ctp.POINTER(GMT_DATATABLE))), + # The datatype (numerical, text, or mixed) of this dataset + ("type", ctp.c_int32), + ("geometry", ctp.c_int32), # The geometry of this dataset + # To store a referencing system string in PROJ.4 format + ("ProjRefPROJ4", ctp.c_char_p), + # To store a referencing system string in WKT format ( "ProjRefWKT", ctp.c_char_p, - ), # To store a referencing system string in WKT format + ), ("ProjRefEPSG", ctp.c_int), # To store a referencing system EPSG code ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API ] From 3ce3341e14deb1699ab2185f3f6cdfc2a4c319fd Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 16 Mar 2023 19:52:20 +0800 Subject: [PATCH 03/85] Fix formatting --- pygmt/datatypes.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 4f64e7e2324..1d9d4329db8 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -33,14 +33,10 @@ class GMT_DATATABLE(ctp.Structure): ("n_records", ctp.c_uint64), # Total number of data records across all segments ("min", ctp.POINTER(ctp.c_double)), # Minimum coordinate for each column ("max", ctp.POINTER(ctp.c_double)), # Maximum coordinate for each column - ( - "header", - ctp.POINTER(ctp.c_char_p), - ), # Array with all file header records, if any - ( - "segment", - ctp.POINTER(ctp.POINTER(GMT_DATASEGMENT)), - ), # Pointer to array of segments + # Array with all file header records, if any + ("header", ctp.POINTER(ctp.c_char_p)), + # Pointer to array of segments + ("segment", ctp.POINTER(ctp.POINTER(GMT_DATASEGMENT))), ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API ] @@ -66,10 +62,7 @@ class GMT_DATASET(ctp.Structure): # To store a referencing system string in PROJ.4 format ("ProjRefPROJ4", ctp.c_char_p), # To store a referencing system string in WKT format - ( - "ProjRefWKT", - ctp.c_char_p, - ), + ("ProjRefWKT", ctp.c_char_p), ("ProjRefEPSG", ctp.c_int), # To store a referencing system EPSG code ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API ] From b655f8868f67614b59c2f00f56f6cd4925ace15a Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 9 Oct 2023 18:45:23 +0800 Subject: [PATCH 04/85] Finally, a working version --- pygmt/clib/session.py | 31 +++++++++++++++++++++++ pygmt/src/grd2xyz.py | 59 ++++++++++++++++++++++++++++--------------- 2 files changed, 70 insertions(+), 20 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 91e2c9bb61a..407cc3442c7 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -21,6 +21,7 @@ vectors_to_arrays, ) from pygmt.clib.loading import load_libgmt +from pygmt.datatypes import GMT_DATASET from pygmt.exceptions import ( GMTCLibError, GMTCLibNoSessionError, @@ -1703,3 +1704,33 @@ def read_virtualfile(self, vfname): restype=ctp.c_void_p, ) return c_read_virtualfile(self.session_pointer, vfname.encode()) + + @contextmanager + def virtualfile_to_gmtdataset(self): + """ + Create a virtual file for writing a GMT_GRID object. + + Yields + ------ + vfile : str + Name of the virtual file. + """ + family = "GMT_IS_DATASET" + geometry = "GMT_IS_PLP" + with self.open_virtual_file(family, geometry, "GMT_OUT", None) as vfile: + yield vfile + + def gmtdataset_to_vectors(self, vfile): + data = ctp.cast(self.read_virtualfile(f"{vfile}"), ctp.POINTER(GMT_DATASET)) + ds = data.contents + + vectors = [] + for itble in range(ds.n_tables): + dtbl = ds.table[itble].contents + for iseg in range(dtbl.n_segments): + dseg = dtbl.segment[iseg].contents + for icol in range(dseg.n_columns): + vectors.append( + np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) + ) + return vectors diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 0b4338a7aa2..20103857003 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -3,12 +3,12 @@ """ import warnings +import numpy as np import pandas as pd import xarray as xr from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( - GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, @@ -172,25 +172,44 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): # Reverse the dims because it is rows, columns ordered. dataframe_header = [grid.dims[1], grid.dims[0], grid.name] - with GMTTempFile() as tmpfile: - with Session() as lib: - file_context = lib.virtualfile_from_data(check_kind="raster", data=grid) - with file_context as infile: - if outfile is None: - outfile = tmpfile.name - lib.call_module( - module="grd2xyz", - args=build_arg_string(kwargs, infile=infile, outfile=outfile), - ) - - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - result = pd.read_csv( - tmpfile.name, sep="\t", names=dataframe_header, comment=">" + # Two different options to output the data + # Option 1 + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="raster", data=grid + ) as invfile, lib.virtualfile_to_gmtdataset() as outvfile: + lib.call_module( + module="grd2xyz", + args=build_arg_string(kwargs, infile=invfile, outfile=outvfile), ) - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None + vectors = lib.gmtdataset_to_vectors(outvfile) + if output_type == "file": + lib.call_module("write", f"{outvfile} {outfile} -Td") + return None + + if output_type == "numpy": + return np.array(vectors).T + return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) + + """ + # Option 2 + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="raster", data=grid + ) as invfile, lib.virtualfile_to_gmtdataset() as outvfile: + if output_type == "file": + outvfile = outfile + lib.call_module( + module="grd2xyz", + args=build_arg_string(kwargs, infile=invfile, outfile=outvfile), + ) + + if output_type == "file": + return None + + vectors = lib.gmtdataset_to_vectors(outvfile) if output_type == "numpy": - result = result.to_numpy() - return result + return np.array(vectors).T + return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) + """ From 221efec42cef6dd9c3976cdc2325efcfb9999fcf Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 9 Oct 2023 19:16:02 +0800 Subject: [PATCH 05/85] Fix the code structure --- pygmt/src/grd2xyz.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 20103857003..68ad69c5c8b 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -182,15 +182,14 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): module="grd2xyz", args=build_arg_string(kwargs, infile=invfile, outfile=outvfile), ) - vectors = lib.gmtdataset_to_vectors(outvfile) - if output_type == "file": lib.call_module("write", f"{outvfile} {outfile} -Td") return None - if output_type == "numpy": - return np.array(vectors).T - return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) + vectors = lib.gmtdataset_to_vectors(outvfile) + if output_type == "numpy": + return np.array(vectors).T + return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) """ # Option 2 @@ -205,11 +204,11 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): args=build_arg_string(kwargs, infile=invfile, outfile=outvfile), ) - if output_type == "file": - return None + if output_type == "file": + return None - vectors = lib.gmtdataset_to_vectors(outvfile) - if output_type == "numpy": - return np.array(vectors).T - return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) - """ + vectors = lib.gmtdataset_to_vectors(outvfile) + if output_type == "numpy": + return np.array(vectors).T + return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) + """ \ No newline at end of file From 6f4a6513e55de53a33fb7d6cc0c226d7d8bc2d7c Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 9 Oct 2023 19:19:05 +0800 Subject: [PATCH 06/85] Simplify the two options for grd2xyz --- pygmt/src/grd2xyz.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 68ad69c5c8b..9b2db1734cf 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -8,12 +8,7 @@ import xarray as xr from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import ( - build_arg_string, - fmt_docstring, - kwargs_to_strings, - use_alias, -) +from pygmt.helpers import build_arg_string, fmt_docstring, kwargs_to_strings, use_alias __doctest_skip__ = ["grd2xyz"] @@ -178,25 +173,22 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): with lib.virtualfile_from_data( check_kind="raster", data=grid ) as invfile, lib.virtualfile_to_gmtdataset() as outvfile: + # Option 1 lib.call_module( module="grd2xyz", args=build_arg_string(kwargs, infile=invfile, outfile=outvfile), ) + if output_type == "file": lib.call_module("write", f"{outvfile} {outfile} -Td") return None - vectors = lib.gmtdataset_to_vectors(outvfile) if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) - """ - # Option 2 - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="raster", data=grid - ) as invfile, lib.virtualfile_to_gmtdataset() as outvfile: + """ + # Option 2 if output_type == "file": outvfile = outfile lib.call_module( @@ -206,9 +198,8 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): if output_type == "file": return None - vectors = lib.gmtdataset_to_vectors(outvfile) if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) - """ \ No newline at end of file + """ From 9a1dc0cb5ff07e88b80ef0a6fc61f87e6e827667 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 9 Oct 2023 19:22:48 +0800 Subject: [PATCH 07/85] fix --- pygmt/src/grd2xyz.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 9b2db1734cf..1c820aee567 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -167,8 +167,6 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): # Reverse the dims because it is rows, columns ordered. dataframe_header = [grid.dims[1], grid.dims[0], grid.name] - # Two different options to output the data - # Option 1 with Session() as lib: with lib.virtualfile_from_data( check_kind="raster", data=grid From 1843b35abf9666a0a9858e274b5eefbf7352ec20 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 9 Oct 2023 20:24:17 +0800 Subject: [PATCH 08/85] Add docstrings to pygmt/datatypes.py --- pygmt/datatypes.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 1d9d4329db8..d544a5f2f40 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -1,3 +1,8 @@ +""" +GMT data types for ctypes. + +See the GMT source code gmt_resources.h for the original C struct definitions. +""" import ctypes as ctp @@ -43,7 +48,7 @@ class GMT_DATATABLE(ctp.Structure): class GMT_DATASET(ctp.Structure): """ - Single container for an array of GMT tables (files) + Single container for an array of GMT tables (files). """ _fields_ = [ From bd52024bf7ddae0e24905165ae433ab12beef116 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 9 Oct 2023 20:34:41 +0800 Subject: [PATCH 09/85] Improve the docstrings --- pygmt/clib/session.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 407cc3442c7..c9dbda7cb45 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1708,7 +1708,7 @@ def read_virtualfile(self, vfname): @contextmanager def virtualfile_to_gmtdataset(self): """ - Create a virtual file for writing a GMT_GRID object. + Create a virtual file for writing a GMT_DATASET object. Yields ------ @@ -1721,12 +1721,26 @@ def virtualfile_to_gmtdataset(self): yield vfile def gmtdataset_to_vectors(self, vfile): - data = ctp.cast(self.read_virtualfile(f"{vfile}"), ctp.POINTER(GMT_DATASET)) - ds = data.contents + """ + Read GMT_DATASET object from a virtual file and convert to vectors. + + Parameters + ---------- + vfile : str + Name of the virtual file. + + Returns + ------- + vectors : list of 1-D arrays + List of vectors containing the data from the GMT_DATASET object. + """ + # Read the virtual file and cast it to a pointer to a GMT_DATASET + ds = ctp.cast(self.read_virtualfile(vfile), ctp.POINTER(GMT_DATASET)).contents + # Loop over the tables, segments, and columns to get the data as vectors vectors = [] - for itble in range(ds.n_tables): - dtbl = ds.table[itble].contents + for itbl in range(ds.n_tables): + dtbl = ds.table[itbl].contents for iseg in range(dtbl.n_segments): dseg = dtbl.segment[iseg].contents for icol in range(dseg.n_columns): From 19eb3aab7d0b04e7f1dbd60c90a51cbd457cb4a7 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 9 Oct 2023 21:52:07 +0800 Subject: [PATCH 10/85] Get rid of temporary files from grdtrack --- pygmt/src/grdtrack.py | 62 +++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index d9b005883b9..088553fc543 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -1,16 +1,11 @@ """ grdtrack - Sample grids at specified (x,y) locations. """ +import numpy as np import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import ( - GMTTempFile, - build_arg_string, - fmt_docstring, - kwargs_to_strings, - use_alias, -) +from pygmt.helpers import build_arg_string, fmt_docstring, kwargs_to_strings, use_alias __doctest_skip__ = ["grdtrack"] @@ -43,7 +38,9 @@ w="wrap", ) @kwargs_to_strings(R="sequence", S="sequence", i="sequence_comma", o="sequence_comma") -def grdtrack(grid, points=None, newcolname=None, outfile=None, **kwargs): +def grdtrack( + grid, points=None, newcolname=None, output_type="pandas", outfile=None, **kwargs +): r""" Sample grids at specified (x,y) locations. @@ -292,29 +289,30 @@ def grdtrack(grid, points=None, newcolname=None, outfile=None, **kwargs): if hasattr(points, "columns") and newcolname is None: raise GMTInvalidInput("Please pass in a str to 'newcolname'") - with GMTTempFile(suffix=".csv") as tmpfile: - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="raster", data=grid - ) as grdfile, lib.virtualfile_from_data( - check_kind="vector", data=points, required_data=False - ) as csvfile: - kwargs["G"] = grdfile - if outfile is None: # Output to tmpfile if outfile is not set - outfile = tmpfile.name - lib.call_module( - module="grdtrack", - args=build_arg_string(kwargs, infile=csvfile, outfile=outfile), - ) + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="raster", data=grid + ) as grdfile, lib.virtualfile_from_data( + check_kind="vector", data=points, required_data=False + ) as csvfile, lib.virtualfile_to_gmtdataset() as outvfile: + kwargs["G"] = grdfile + lib.call_module( + module="grdtrack", + args=build_arg_string(kwargs, infile=csvfile, outfile=outvfile), + ) + if outfile is not None: + # if output_type == "file": + lib.call_module("write", f"{outvfile} {outfile} -Td") + return None + + vectors = lib.gmtdataset_to_vectors(outvfile) + + if output_type == "numpy": + return np.array(vectors).T - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - try: - column_names = points.columns.to_list() + [newcolname] - result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) - except AttributeError: # 'str' object has no attribute 'columns' - result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None + if isinstance(points, pd.DataFrame): + column_names = points.columns.to_list() + [newcolname] + else: + column_names = None - return result + return pd.DataFrame(np.array(vectors).T, columns=column_names) From 85460486da3dfa8f51fc4fa2c5ba13ecee80e38e Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 10 Oct 2023 14:02:59 +0800 Subject: [PATCH 11/85] Revert "Get rid of temporary files from grdtrack" This reverts commit 19eb3aab7d0b04e7f1dbd60c90a51cbd457cb4a7. --- pygmt/src/grdtrack.py | 62 ++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 088553fc543..d9b005883b9 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -1,11 +1,16 @@ """ grdtrack - Sample grids at specified (x,y) locations. """ -import numpy as np import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import build_arg_string, fmt_docstring, kwargs_to_strings, use_alias +from pygmt.helpers import ( + GMTTempFile, + build_arg_string, + fmt_docstring, + kwargs_to_strings, + use_alias, +) __doctest_skip__ = ["grdtrack"] @@ -38,9 +43,7 @@ w="wrap", ) @kwargs_to_strings(R="sequence", S="sequence", i="sequence_comma", o="sequence_comma") -def grdtrack( - grid, points=None, newcolname=None, output_type="pandas", outfile=None, **kwargs -): +def grdtrack(grid, points=None, newcolname=None, outfile=None, **kwargs): r""" Sample grids at specified (x,y) locations. @@ -289,30 +292,29 @@ def grdtrack( if hasattr(points, "columns") and newcolname is None: raise GMTInvalidInput("Please pass in a str to 'newcolname'") - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="raster", data=grid - ) as grdfile, lib.virtualfile_from_data( - check_kind="vector", data=points, required_data=False - ) as csvfile, lib.virtualfile_to_gmtdataset() as outvfile: - kwargs["G"] = grdfile - lib.call_module( - module="grdtrack", - args=build_arg_string(kwargs, infile=csvfile, outfile=outvfile), - ) - if outfile is not None: - # if output_type == "file": - lib.call_module("write", f"{outvfile} {outfile} -Td") - return None - - vectors = lib.gmtdataset_to_vectors(outvfile) - - if output_type == "numpy": - return np.array(vectors).T + with GMTTempFile(suffix=".csv") as tmpfile: + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="raster", data=grid + ) as grdfile, lib.virtualfile_from_data( + check_kind="vector", data=points, required_data=False + ) as csvfile: + kwargs["G"] = grdfile + if outfile is None: # Output to tmpfile if outfile is not set + outfile = tmpfile.name + lib.call_module( + module="grdtrack", + args=build_arg_string(kwargs, infile=csvfile, outfile=outfile), + ) - if isinstance(points, pd.DataFrame): - column_names = points.columns.to_list() + [newcolname] - else: - column_names = None + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + try: + column_names = points.columns.to_list() + [newcolname] + result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) + except AttributeError: # 'str' object has no attribute 'columns' + result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None - return pd.DataFrame(np.array(vectors).T, columns=column_names) + return result From 76944a86123512334a9d967ee1bf93b29e258b00 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 9 Oct 2023 21:52:07 +0800 Subject: [PATCH 12/85] pygmt.grdtrack: Support consistent table-like outputs --- pygmt/src/grdtrack.py | 79 +++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index d9b005883b9..4825ea9eff9 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -1,16 +1,13 @@ """ grdtrack - Sample grids at specified (x,y) locations. """ +import warnings + +import numpy as np import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import ( - GMTTempFile, - build_arg_string, - fmt_docstring, - kwargs_to_strings, - use_alias, -) +from pygmt.helpers import build_arg_string, fmt_docstring, kwargs_to_strings, use_alias __doctest_skip__ = ["grdtrack"] @@ -43,7 +40,9 @@ w="wrap", ) @kwargs_to_strings(R="sequence", S="sequence", i="sequence_comma", o="sequence_comma") -def grdtrack(grid, points=None, newcolname=None, outfile=None, **kwargs): +def grdtrack( + grid, points=None, output_type="pandas", outfile=None, newcolname=None, **kwargs +): r""" Sample grids at specified (x,y) locations. @@ -292,29 +291,45 @@ def grdtrack(grid, points=None, newcolname=None, outfile=None, **kwargs): if hasattr(points, "columns") and newcolname is None: raise GMTInvalidInput("Please pass in a str to 'newcolname'") - with GMTTempFile(suffix=".csv") as tmpfile: - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="raster", data=grid - ) as grdfile, lib.virtualfile_from_data( - check_kind="vector", data=points, required_data=False - ) as csvfile: - kwargs["G"] = grdfile - if outfile is None: # Output to tmpfile if outfile is not set - outfile = tmpfile.name - lib.call_module( - module="grdtrack", - args=build_arg_string(kwargs, infile=csvfile, outfile=outfile), - ) + if output_type not in ["numpy", "pandas", "file"]: + raise GMTInvalidInput( + "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." + ) + + if outfile is not None and output_type != "file": + msg = ( + f"Changing 'output_type' from '{output_type}' to 'file' " + "since 'outfile' parameter is set. Please use output_type='file' " + "to silence this warning." + ) + warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) + output_type = "file" + elif outfile is None and output_type == "file": + raise GMTInvalidInput("Must specify 'outfile' for ASCII output.") + + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="raster", data=grid + ) as ingrid, lib.virtualfile_from_data( + check_kind="vector", data=points, required_data=False + ) as infile, lib.virtualfile_to_gmtdataset() as outvfile: + kwargs["G"] = ingrid + lib.call_module( + module="grdtrack", + args=build_arg_string(kwargs, infile=infile, outfile=outvfile), + ) + + if output_type == "file": + lib.call_module("write", f"{outvfile} {outfile} -Td") + return None + + vectors = lib.gmtdataset_to_vectors(outvfile) + if output_type == "numpy": + return np.array(vectors).T - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - try: - column_names = points.columns.to_list() + [newcolname] - result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) - except AttributeError: # 'str' object has no attribute 'columns' - result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None + if isinstance(points, pd.DataFrame): + column_names = points.columns.to_list() + [newcolname] + else: + column_names = None - return result + return pd.DataFrame(np.array(vectors).T, columns=column_names) From bac13bd874c28dbe8d254991e7a8270cf70576fe Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 10 Oct 2023 15:37:53 +0800 Subject: [PATCH 13/85] Update to virtualfile_to_data which can also be used for grids --- pygmt/clib/session.py | 16 ++++++++++++---- pygmt/src/grd2xyz.py | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index c9dbda7cb45..2917ccce4e4 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1706,17 +1706,25 @@ def read_virtualfile(self, vfname): return c_read_virtualfile(self.session_pointer, vfname.encode()) @contextmanager - def virtualfile_to_gmtdataset(self): + def virtualfile_to_data(self, kind): """ - Create a virtual file for writing a GMT_DATASET object. + Create a virtual file for writing a GMT data container. + + Parameters + ---------- + kind : str + The kind of data container to create. Choose from "grid" or + "dataset". Yields ------ vfile : str Name of the virtual file. """ - family = "GMT_IS_DATASET" - geometry = "GMT_IS_PLP" + family, geometry = { + "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), + "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), + }[kind] with self.open_virtual_file(family, geometry, "GMT_OUT", None) as vfile: yield vfile diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 1c820aee567..3b14e926c71 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -170,7 +170,7 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): with Session() as lib: with lib.virtualfile_from_data( check_kind="raster", data=grid - ) as invfile, lib.virtualfile_to_gmtdataset() as outvfile: + ) as invfile, lib.virtualfile_to_data(kind="dataset") as outvfile: # Option 1 lib.call_module( module="grd2xyz", From ba6d94eb1c813ebc6be7a6c51dd34f275810c80f Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 10 Oct 2023 16:36:26 +0800 Subject: [PATCH 14/85] Add read_virtualfile_to_data to simplify the logic --- pygmt/clib/session.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 2917ccce4e4..719bad34154 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1705,6 +1705,28 @@ def read_virtualfile(self, vfname): ) return c_read_virtualfile(self.session_pointer, vfname.encode()) + def read_virtualfile_to_data(self, vfname, kind): + """ + Read a virtual file and convert to a GMT data container. + + Parameters + ---------- + vfname : str + Name of the virtual file to read. + kind : str + The kind of data container to create. Choose from "grid" or + "dataset". + + Returns + ------- + Pointer to the GMT_GRID or GMT_DATASET data container. + """ + type = { + # "grid": GMT_GRID, # implemented in PR #2398 + "dataset": GMT_DATASET, + }[kind] + return ctp.cast(self.read_virtualfile(vfname), ctp.POINTER(type)) + @contextmanager def virtualfile_to_data(self, kind): """ @@ -1743,7 +1765,7 @@ def gmtdataset_to_vectors(self, vfile): List of vectors containing the data from the GMT_DATASET object. """ # Read the virtual file and cast it to a pointer to a GMT_DATASET - ds = ctp.cast(self.read_virtualfile(vfile), ctp.POINTER(GMT_DATASET)).contents + ds = self.read_virtualfile_to_data(vfile, kind="dataset").contents # Loop over the tables, segments, and columns to get the data as vectors vectors = [] From 589a9dd8f73b0ea29c8d6fd5d789e10ebf4ec73a Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 10 Oct 2023 17:02:57 +0800 Subject: [PATCH 15/85] Fix a typo --- pygmt/src/grdtrack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 4825ea9eff9..aba4ddd3a08 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -312,7 +312,7 @@ def grdtrack( check_kind="raster", data=grid ) as ingrid, lib.virtualfile_from_data( check_kind="vector", data=points, required_data=False - ) as infile, lib.virtualfile_to_gmtdataset() as outvfile: + ) as infile, lib.virtualfile_to_data(kind="dataset") as outvfile: kwargs["G"] = ingrid lib.call_module( module="grdtrack", From a06a5b43bf454613c60c46b89838ec892f05259b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 11 Oct 2023 11:03:40 +0800 Subject: [PATCH 16/85] Merge read_virtualfile and read_virtualfile_to_data into a single function --- pygmt/clib/session.py | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 719bad34154..06e7c8262e4 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1682,18 +1682,23 @@ def extract_region(self): raise GMTCLibError("Failed to extract region from current figure.") return wesn - def read_virtualfile(self, vfname): + def read_virtualfile(self, vfname, kind=None): """ - Read data from a virtual file. + Read data from a virtual file and cast it into a GMT data container if requested. Parameters ---------- vfname : str Name of the virtual file to read. + kind : str + Cast the data into a GMT data container. Choose from "grid" or + "dataset". If None, will return a ctypes void pointer. + Returns ------- - Pointer to the data, which can be casted into GMT data types. + Pointer to the GMT data container. If ``kind`` is None, returns a + ctypes void pointer instead. """ c_read_virtualfile = self.get_libgmt_func( "GMT_Read_VirtualFile", @@ -1703,29 +1708,18 @@ def read_virtualfile(self, vfname): ], restype=ctp.c_void_p, ) - return c_read_virtualfile(self.session_pointer, vfname.encode()) - - def read_virtualfile_to_data(self, vfname, kind): - """ - Read a virtual file and convert to a GMT data container. + pointer = c_read_virtualfile(self.session_pointer, vfname.encode()) + if kind is None: # Return the ctypes void pointer + return pointer - Parameters - ---------- - vfname : str - Name of the virtual file to read. - kind : str - The kind of data container to create. Choose from "grid" or - "dataset". - - Returns - ------- - Pointer to the GMT_GRID or GMT_DATASET data container. - """ + # The GMT C API function GMT_Read_VirtualFile returns a void pointer. + # It usually needs to be cast to a pointer to GMT data container (e.g., + # GMT_GRID or GMT_DATASET). type = { # "grid": GMT_GRID, # implemented in PR #2398 "dataset": GMT_DATASET, }[kind] - return ctp.cast(self.read_virtualfile(vfname), ctp.POINTER(type)) + return ctp.cast(pointer, ctp.POINTER(type)) @contextmanager def virtualfile_to_data(self, kind): @@ -1765,7 +1759,7 @@ def gmtdataset_to_vectors(self, vfile): List of vectors containing the data from the GMT_DATASET object. """ # Read the virtual file and cast it to a pointer to a GMT_DATASET - ds = self.read_virtualfile_to_data(vfile, kind="dataset").contents + ds = self.read_virtualfile(vfile, kind="dataset").contents # Loop over the tables, segments, and columns to get the data as vectors vectors = [] From f5a9d447d1b003f93207949ecf940b0ab8bceef4 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 11 Oct 2023 13:10:40 +0800 Subject: [PATCH 17/85] Refactor the virtualfile_to_data function to support writing to a real file --- pygmt/clib/session.py | 33 ++++++++++++++++++++++----------- pygmt/src/grd2xyz.py | 23 +++-------------------- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 06e7c8262e4..f7f1d30c734 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1684,7 +1684,8 @@ def extract_region(self): def read_virtualfile(self, vfname, kind=None): """ - Read data from a virtual file and cast it into a GMT data container if requested. + Read data from a virtual file and cast it into a GMT data container if + requested. Parameters ---------- @@ -1722,27 +1723,37 @@ def read_virtualfile(self, vfname, kind=None): return ctp.cast(pointer, ctp.POINTER(type)) @contextmanager - def virtualfile_to_data(self, kind): + def virtualfile_to_data(self, kind, fname=None): """ - Create a virtual file for writing a GMT data container. + Create a virtual file for writing a GMT data container or yield the + output file name. Parameters ---------- kind : str The kind of data container to create. Choose from "grid" or - "dataset". + "dataset". It has no effect if ``fname`` is given. + + fname : str or None + If given, yield the output file name instead of the virtual file. Yields ------ vfile : str - Name of the virtual file. + Name of the virtual file or the output file name. """ - family, geometry = { - "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), - "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), - }[kind] - with self.open_virtual_file(family, geometry, "GMT_OUT", None) as vfile: - yield vfile + # If fname is given, yield the output file name. + if fname is not None: + yield fname + # Otherwise, create a virtual file for writing a GMT data container. + else: + # Determine the family and geometry of the data container based on 'kind'. + family, geometry = { + "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), + "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), + }[kind] + with self.open_virtual_file(family, geometry, "GMT_OUT", None) as vfile: + yield vfile def gmtdataset_to_vectors(self, vfile): """ diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 3b14e926c71..f057f402f48 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -170,34 +170,17 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): with Session() as lib: with lib.virtualfile_from_data( check_kind="raster", data=grid - ) as invfile, lib.virtualfile_to_data(kind="dataset") as outvfile: - # Option 1 + ) as invfile, lib.virtualfile_to_data( + kind="dataset", fname=outfile + ) as outvfile: lib.call_module( module="grd2xyz", args=build_arg_string(kwargs, infile=invfile, outfile=outvfile), ) if output_type == "file": - lib.call_module("write", f"{outvfile} {outfile} -Td") return None vectors = lib.gmtdataset_to_vectors(outvfile) if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) - - """ - # Option 2 - if output_type == "file": - outvfile = outfile - lib.call_module( - module="grd2xyz", - args=build_arg_string(kwargs, infile=invfile, outfile=outvfile), - ) - - if output_type == "file": - return None - vectors = lib.gmtdataset_to_vectors(outvfile) - if output_type == "numpy": - return np.array(vectors).T - return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) - """ From 547049534cbda6b9a424b8ea861f56f7976166a0 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 11 Oct 2023 13:14:58 +0800 Subject: [PATCH 18/85] Simplify the codes following the gmtdataset changes --- pygmt/src/grdtrack.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index aba4ddd3a08..54ff309e0fa 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -307,12 +307,19 @@ def grdtrack( elif outfile is None and output_type == "file": raise GMTInvalidInput("Must specify 'outfile' for ASCII output.") + if isinstance(points, pd.DataFrame): + column_names = points.columns.to_list() + [newcolname] + else: + column_names = None + with Session() as lib: with lib.virtualfile_from_data( check_kind="raster", data=grid ) as ingrid, lib.virtualfile_from_data( check_kind="vector", data=points, required_data=False - ) as infile, lib.virtualfile_to_data(kind="dataset") as outvfile: + ) as infile, lib.virtualfile_to_data( + kind="dataset", fname=outfile + ) as outvfile: kwargs["G"] = ingrid lib.call_module( module="grdtrack", @@ -320,16 +327,8 @@ def grdtrack( ) if output_type == "file": - lib.call_module("write", f"{outvfile} {outfile} -Td") return None - vectors = lib.gmtdataset_to_vectors(outvfile) if output_type == "numpy": return np.array(vectors).T - - if isinstance(points, pd.DataFrame): - column_names = points.columns.to_list() + [newcolname] - else: - column_names = None - return pd.DataFrame(np.array(vectors).T, columns=column_names) From 555bfe36ce6958bc7dfb36656ce911baaa09a8aa Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 11 Oct 2023 13:35:52 +0800 Subject: [PATCH 19/85] Move gmtdataset_to_vectors as a method of the GMT_DATASET class --- pygmt/clib/session.py | 29 ----------------------------- pygmt/datatypes.py | 23 +++++++++++++++++++++++ pygmt/src/grd2xyz.py | 4 +++- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index f7f1d30c734..dd86ff8a87b 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1754,32 +1754,3 @@ def virtualfile_to_data(self, kind, fname=None): }[kind] with self.open_virtual_file(family, geometry, "GMT_OUT", None) as vfile: yield vfile - - def gmtdataset_to_vectors(self, vfile): - """ - Read GMT_DATASET object from a virtual file and convert to vectors. - - Parameters - ---------- - vfile : str - Name of the virtual file. - - Returns - ------- - vectors : list of 1-D arrays - List of vectors containing the data from the GMT_DATASET object. - """ - # Read the virtual file and cast it to a pointer to a GMT_DATASET - ds = self.read_virtualfile(vfile, kind="dataset").contents - - # Loop over the tables, segments, and columns to get the data as vectors - vectors = [] - for itbl in range(ds.n_tables): - dtbl = ds.table[itbl].contents - for iseg in range(dtbl.n_segments): - dseg = dtbl.segment[iseg].contents - for icol in range(dseg.n_columns): - vectors.append( - np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) - ) - return vectors diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index d544a5f2f40..6d2e64c0f7c 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -5,6 +5,8 @@ """ import ctypes as ctp +import numpy as np + class GMT_DATASEGMENT(ctp.Structure): """ @@ -71,3 +73,24 @@ class GMT_DATASET(ctp.Structure): ("ProjRefEPSG", ctp.c_int), # To store a referencing system EPSG code ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API ] + + def to_vectors(self): + """ + Convert the GMT_DATASET object to a list of vectors. + + Returns + ------- + vectors : list of 1-D arrays + List of vectors containing the data from the GMT_DATASET object. + """ + # Loop over the tables, segments, and columns to get the data as vectors + vectors = [] + for itbl in range(self.n_tables): + dtbl = self.table[itbl].contents + for iseg in range(dtbl.n_segments): + dseg = dtbl.segment[iseg].contents + for icol in range(dseg.n_columns): + vectors.append( + np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) + ) + return vectors diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index f057f402f48..6e6a03dbd6c 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -180,7 +180,9 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): if output_type == "file": return None - vectors = lib.gmtdataset_to_vectors(outvfile) + vectors = lib.read_virtualfile( + outvfile, kind="dataset" + ).contents.to_vectors() if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) From 6d26c10f32bc777ec5f4dfa17e0684918589aacb Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 11 Oct 2023 13:37:37 +0800 Subject: [PATCH 20/85] Update after gmtdataset changes --- pygmt/src/grdtrack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 54ff309e0fa..066d7464ebe 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -328,7 +328,7 @@ def grdtrack( if output_type == "file": return None - vectors = lib.gmtdataset_to_vectors(outvfile) + vectors = lib.read_virtualfile(outvfile, kind="dataset").contents.to_vectors() if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(np.array(vectors).T, columns=column_names) From 2205cbb1567fd7e00cb6f8c8aa525a6d45fb6494 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 11 Oct 2023 20:58:04 +0800 Subject: [PATCH 21/85] Add Pythonic objects --- pygmt/datatypes.py | 90 ++++++++++++++++++++++++++++++++++++++++++++ pygmt/src/grd2xyz.py | 3 +- 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 6d2e64c0f7c..3b1c3295ce4 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -8,6 +8,23 @@ import numpy as np +# Python representation of the GMT data types. +# See the comments in the `GMT_DATASET.to_pydata` method for more details. +class PyGMT_DATASET: + def __init__(self, table): + self.table = table + + +class PyGMT_DATATABLE: + def __init__(self, segment): + self.segment = segment + + +class PyGMT_DATASEGMENT: + def __init__(self, data): + self.data = data + + class GMT_DATASEGMENT(ctp.Structure): """ For holding segment lines in memory. @@ -94,3 +111,76 @@ def to_vectors(self): np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) ) return vectors + + def to_pydata(self): + """ + Convert the ctypes GMT_DATASET object to the Python PyGMT_DATASET + object. + + The ctypes GMT_DATASET/GMT_DATATABLE/GMT_DATASEGMENT objects are difficult to use, + because most of attributes are pointers to other objects or ctypes arrays. + For example, let's say `dataset` is a GMT_DATASET object, and you want to access + the data of the first segment of the first table, + you have to use the following code (note the `contents` attribute): + + >>> data = dataset.table[0].contents.segment[0].contents.data + + Now `data` is a `POINTER(POINTER(c_double))` object. + The first column is `data[0]`, but you can't use `print(data[0])` to print the + data, because it will print the memory address of the data. You have to use + + >>> print(np.ctypeslib.as_array(data[0], shape=(n_rows,))) + + to print the data. It's difficult to use for us developers (see the `to_vectors` + above for example). It will be even more difficult to understand for users. + So, exposing the ctypes objects to users is a bad idea. + + This method converts the ctypes object to a Python object, which is easier to + use. For example, the following code converts the `dataset` to a Python object: + + >>> pydata = dataset.to_pydata() + + Now `pydata` is a PyGMT_DATASET object. + + To get the number of tables, you can use the following code: + + >>> len( + ... pydata.table + ... ) # table is a list. That's why we don't need the `n_tables` attribute. + + To get the first column of the first segment of the first table:: + + >>> pydata.table[0].segment[0].data[0] + + The PyGMT_DATASET object is more Pythonic and can be exposed to users. + The most big benefit is that now it's possible to support multiple-segment files + with headers (e.g., a segment with header `> -Z1.0`). + + However, the arrays in the Python object are still pointers to the original + memory allocated by GMT, so the data will be destroyed when the Session ends. + We may need to copy the data to a new memory location if we want to use the + data after the Session ends. + """ + table = [] + for itbl in range(self.n_tables): + segment = [] + for iseg in range(self.table[itbl].contents.n_segments): + seg = self.table[itbl].contents.segment[iseg].contents + n_columns, n_rows = seg.n_columns, seg.n_rows + data = [ + np.ctypeslib.as_array(seg.data[icol], shape=(n_rows,)) + for icol in range(n_columns) + ] + segment.append(PyGMT_DATASEGMENT(data=data)) + table.append(PyGMT_DATATABLE(segment=segment)) + pydata = PyGMT_DATASET(table=table) + pydata.n_columns = self.n_columns + return pydata + + def to_vectors_v2(self): + pydata = self.to_pydata() + vectors = [ + np.concatenate([seg.data[i] for tbl in pydata.table for seg in tbl.segment]) + for i in range(pydata.n_columns) + ] + return vectors diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 6e6a03dbd6c..e5d11bb67c9 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -180,9 +180,10 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): if output_type == "file": return None + # vectors = lib.read_virtualfile(outvfile, kind="dataset").contents.to_vectors() vectors = lib.read_virtualfile( outvfile, kind="dataset" - ).contents.to_vectors() + ).contents.to_vectors_v2() if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) From 4e67ebf15cfd0363c492ae12b649ddf2480a20f0 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 11 Oct 2023 21:30:13 +0800 Subject: [PATCH 22/85] Add more notes about GMT.jl implementation --- pygmt/datatypes.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 3b1c3295ce4..c2e33e9ffdb 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -160,6 +160,16 @@ def to_pydata(self): memory allocated by GMT, so the data will be destroyed when the Session ends. We may need to copy the data to a new memory location if we want to use the data after the Session ends. + + Notes + ----- + In GMT.jl, the GMT_DATASET is defined in + https://github.com/GenericMappingTools/GMT.jl/blob/master/src/libgmt_h.jl#L119. + It also provides the more friendly data type GMTdataset. + See https://www.generic-mapping-tools.org/GMT.jl/dev/types/#Dataset-type. + + A `get_dataset` function is provided to convert GMT's GMT_DATASET + to GMT.jl's GMTdataset. """ table = [] for itbl in range(self.n_tables): From 4707b872451f0848ee90c183ac6c5346f10c415a Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 12 Oct 2023 12:44:33 +0800 Subject: [PATCH 23/85] Refactor the codes using nested classes --- pygmt/datatypes.py | 123 +++++++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 50 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index c2e33e9ffdb..c5f695c636e 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -1,7 +1,5 @@ """ -GMT data types for ctypes. - -See the GMT source code gmt_resources.h for the original C struct definitions. +GMT data types. """ import ctypes as ctp @@ -25,70 +23,95 @@ def __init__(self, data): self.data = data -class GMT_DATASEGMENT(ctp.Structure): - """ - For holding segment lines in memory. +class GMT_DATASET(ctp.Structure): """ + GMT dataset structure for holding multiple tables (files). - _fields_ = [ - ("n_rows", ctp.c_uint64), # Number of points in this segment - ("n_columns", ctp.c_uint64), # Number of fields in each record (>= 2) - ("min", ctp.POINTER(ctp.c_double)), # Minimum coordinate for each column - ("max", ctp.POINTER(ctp.c_double)), # Maximum coordinate for each column - # Data x, y, and possibly other columns - ("data", ctp.POINTER(ctp.POINTER(ctp.c_double))), - ("label", ctp.c_char_p), # Label string (if applicable) - ("header", ctp.c_char_p), # Segment header (if applicable) - ("text", ctp.POINTER(ctp.c_char_p)), # text beyond the data - ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API - ] - - -class GMT_DATATABLE(ctp.Structure): - """ - To hold an array of line segment structures and header information in one - container. + See the GMT source code gmt_resources.h for the original C struct + definitions. """ - _fields_ = [ - ("n_headers", ctp.c_uint), # Number of file header records (0 if no header) - ("n_columns", ctp.c_uint64), # Number of columns (fields) in each record - ("n_segments", ctp.c_uint64), # Number of segments in the array - ("n_records", ctp.c_uint64), # Total number of data records across all segments - ("min", ctp.POINTER(ctp.c_double)), # Minimum coordinate for each column - ("max", ctp.POINTER(ctp.c_double)), # Maximum coordinate for each column - # Array with all file header records, if any - ("header", ctp.POINTER(ctp.c_char_p)), - # Pointer to array of segments - ("segment", ctp.POINTER(ctp.POINTER(GMT_DATASEGMENT))), - ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API - ] - + class GMT_DATATABLE(ctp.Structure): + """ + GMT datatable structure for holding a single table with multiple + segments. + """ -class GMT_DATASET(ctp.Structure): - """ - Single container for an array of GMT tables (files). - """ + class GMT_DATASEGMENT(ctp.Structure): + """ + GMT datasegment structure for holding a single segment with + multiple columns. + """ + + _fields_ = [ + # Number of rows/records in this segment + ("n_rows", ctp.c_uint64), + # Number of fields in each record + ("n_columns", ctp.c_uint64), + # Minimum coordinate for each column + ("min", ctp.POINTER(ctp.c_double)), + # Maximum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), + # Data x, y, and possibly other columns + ("data", ctp.POINTER(ctp.POINTER(ctp.c_double))), + # Label string (if applicable) + ("label", ctp.c_char_p), + # Segment header (if applicable) + ("header", ctp.c_char_p), + # text beyond the data + ("text", ctp.POINTER(ctp.c_char_p)), + # Book-keeping variables "hidden" from the API + ("hidden", ctp.c_void_p), + ] + + _fields_ = [ + # Number of file header records (0 if no header) + ("n_headers", ctp.c_uint), + # Number of columns (fields) in each record + ("n_columns", ctp.c_uint64), + # Number of segments in the array + ("n_segments", ctp.c_uint64), + # Total number of data records across all segments + ("n_records", ctp.c_uint64), + # Minimum coordinate for each column + ("min", ctp.POINTER(ctp.c_double)), + # Maximum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), + # Array with all file header records, if any + ("header", ctp.POINTER(ctp.c_char_p)), + # Pointer to array of segments + ("segment", ctp.POINTER(ctp.POINTER(GMT_DATASEGMENT))), + # Book-keeping variables "hidden" from the API + ("hidden", ctp.c_void_p), + ] _fields_ = [ - ("n_tables", ctp.c_uint64), # The total number of tables (files) contained - ("n_columns", ctp.c_uint64), # The number of data columns - ("n_segments", ctp.c_uint64), # The total number of segments across all tables + # The total number of tables (files) contained + ("n_tables", ctp.c_uint64), + # The number of data columns + ("n_columns", ctp.c_uint64), + # The total number of segments across all tables + ("n_segments", ctp.c_uint64), # The total number of data records across all tables ("n_records", ctp.c_uint64), - ("min", ctp.POINTER(ctp.c_double)), # Minimum coordinate for each column - ("max", ctp.POINTER(ctp.c_double)), # Maximum coordinate for each column + # Minimum coordinate for each column + ("min", ctp.POINTER(ctp.c_double)), + # Maximum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), # Pointer to array of tables ("table", ctp.POINTER(ctp.POINTER(GMT_DATATABLE))), # The datatype (numerical, text, or mixed) of this dataset ("type", ctp.c_int32), - ("geometry", ctp.c_int32), # The geometry of this dataset + # The geometry of this dataset + ("geometry", ctp.c_int32), # To store a referencing system string in PROJ.4 format ("ProjRefPROJ4", ctp.c_char_p), # To store a referencing system string in WKT format ("ProjRefWKT", ctp.c_char_p), - ("ProjRefEPSG", ctp.c_int), # To store a referencing system EPSG code - ("hidden", ctp.c_void_p), # Book-keeping variables "hidden" from the API + # To store a referencing system EPSG code + ("ProjRefEPSG", ctp.c_int), + # Book-keeping variables "hidden" from the API + ("hidden", ctp.c_void_p), ] def to_vectors(self): From 013f4bbec1eca5b95c22c113c2c89847e7796d92 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 12 Oct 2023 13:34:32 +0800 Subject: [PATCH 24/85] Add more examples --- pygmt/datatypes.py | 53 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index c5f695c636e..9aca748397b 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -29,6 +29,24 @@ class GMT_DATASET(ctp.Structure): See the GMT source code gmt_resources.h for the original C struct definitions. + + Examples + -------- + >>> from pygmt.clib import Session + >>> with Session() as lib: + ... with lib.virtualfile_to_data(kind="dataset") as vfout: + ... lib.call_module("read", f"@App_O_cross.txt {vfout} -Td") + ... ds = lib.read_virtualfile(vfout, kind="dataset").contents + ... print(ds.n_tables, ds.n_columns, ds.n_segments, ds.n_records) + ... print(ds.min[0], ds.max[0], ds.min[1], ds.max[1]) + ... seg = ds.table[0].contents.segment[0].contents + ... print(seg.data[0][: seg.n_rows]) + ... print(seg.data[1][: seg.n_rows]) + ... + 1 2 3 14 + 59.0 158.0 -12.0 13.0 + [59.0, 62.0, 66.0, 71.0, 77.0] + [-12.0, -7.0, -3.0, -1.0, 3.0] """ class GMT_DATATABLE(ctp.Structure): @@ -118,21 +136,42 @@ def to_vectors(self): """ Convert the GMT_DATASET object to a list of vectors. + Examples + -------- + + >>> from pygmt.clib import Session + >>> with Session() as lib: + ... with lib.virtualfile_to_data(kind="dataset") as vfout: + ... lib.call_module("read", f"@App_O_cross.txt {vfout} -Td") + ... ds = lib.read_virtualfile(vfout, kind="dataset") + ... vectors = ds.contents.to_vectors() + ... + >>> len(vectors) # 2 columns + 2 + >>> vectors[0] + array([ 59., 62., 66., 71., 77., 94., 100., 105., 109., 114., 119., + 126., 148., 158.]) + >>> vectors[1] + array([-12. , -7. , -3. , -1. , 3. , -11. , -10.5, -9.5, -8.6, + -6.5, -4. , 2. , 3. , 13. ]) + Returns ------- vectors : list of 1-D arrays List of vectors containing the data from the GMT_DATASET object. """ - # Loop over the tables, segments, and columns to get the data as vectors + # Currently, the same column in all segments of all tables are concatenated. vectors = [] - for itbl in range(self.n_tables): - dtbl = self.table[itbl].contents - for iseg in range(dtbl.n_segments): - dseg = dtbl.segment[iseg].contents - for icol in range(dseg.n_columns): - vectors.append( + for icol in range(self.n_columns): # all have the same number of columns? + colvector = [] + for itbl in range(self.n_tables): + dtbl = self.table[itbl].contents + for iseg in range(dtbl.n_segments): + dseg = dtbl.segment[iseg].contents + colvector.append( np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) ) + vectors.append(np.concatenate(colvector)) return vectors def to_pydata(self): From 19fd16437c530d7dd00562de0a29ee91493293b7 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 12 Oct 2023 22:25:20 +0800 Subject: [PATCH 25/85] Deal with text column --- pygmt/datatypes.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 9aca748397b..961b7b78901 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -172,6 +172,18 @@ def to_vectors(self): np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) ) vectors.append(np.concatenate(colvector)) + + # deal with trailing text column + textvector = [] + for itbl in range(self.n_tables): + dtbl = self.table[itbl].contents + for iseg in range(dtbl.n_segments): + dseg = dtbl.segment[iseg].contents + if dseg.text: + textvector.extend(dseg.text[: dseg.n_rows]) + if textvector: + vectors.append(np.char.decode(textvector)) + return vectors def to_pydata(self): From 3cff015faaf47bf5e5f857c3a4ff16015c55519b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 10:43:57 +0800 Subject: [PATCH 26/85] Fix linting issues --- pygmt/datatypes.py | 61 ++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 961b7b78901..d6e5db54e8c 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -191,49 +191,52 @@ def to_pydata(self): Convert the ctypes GMT_DATASET object to the Python PyGMT_DATASET object. - The ctypes GMT_DATASET/GMT_DATATABLE/GMT_DATASEGMENT objects are difficult to use, - because most of attributes are pointers to other objects or ctypes arrays. - For example, let's say `dataset` is a GMT_DATASET object, and you want to access - the data of the first segment of the first table, - you have to use the following code (note the `contents` attribute): + The ctypes GMT_DATASET/GMT_DATATABLE/GMT_DATASEGMENT objects are + difficult to use, because most of attributes are pointers to other + objects or ctypes arrays. For example, let's say ``dataset`` is a + GMT_DATASET object, and you want to access the data of the first + segment of the first table, you have to use the following code + (note the ``contents`` attribute):: - >>> data = dataset.table[0].contents.segment[0].contents.data + data = dataset.table[0].contents.segment[0].contents.data - Now `data` is a `POINTER(POINTER(c_double))` object. - The first column is `data[0]`, but you can't use `print(data[0])` to print the - data, because it will print the memory address of the data. You have to use + Now ``data`` is a ``POINTER(POINTER(c_double))`` object. The first + column is ``data[0]``, but you can't use ``print(data[0])`` to print + the data, because it will print the memory address of the data. + You have to use:: - >>> print(np.ctypeslib.as_array(data[0], shape=(n_rows,))) + print(np.ctypeslib.as_array(data[0], shape=(n_rows,))) - to print the data. It's difficult to use for us developers (see the `to_vectors` - above for example). It will be even more difficult to understand for users. - So, exposing the ctypes objects to users is a bad idea. + to print the data. It's difficult to use for us developers (see the + ``to_vectors`` above for example). It will be even more difficult to + understand for users. So, exposing the ctypes objects to users is a + bad idea. - This method converts the ctypes object to a Python object, which is easier to - use. For example, the following code converts the `dataset` to a Python object: + This method converts the ctypes object to a Python object, which is + easier to use. For example, the following code converts the ``dataset`` + to a Python object:: - >>> pydata = dataset.to_pydata() + pydata = dataset.to_pydata() - Now `pydata` is a PyGMT_DATASET object. + Now ``pydata`` is a PyGMT_DATASET object. - To get the number of tables, you can use the following code: + To get the number of tables, you can use the following code:: - >>> len( - ... pydata.table - ... ) # table is a list. That's why we don't need the `n_tables` attribute. + len(pydata.table) To get the first column of the first segment of the first table:: - >>> pydata.table[0].segment[0].data[0] + pydata.table[0].segment[0].data[0] The PyGMT_DATASET object is more Pythonic and can be exposed to users. - The most big benefit is that now it's possible to support multiple-segment files - with headers (e.g., a segment with header `> -Z1.0`). - - However, the arrays in the Python object are still pointers to the original - memory allocated by GMT, so the data will be destroyed when the Session ends. - We may need to copy the data to a new memory location if we want to use the - data after the Session ends. + The most big benefit is that now it's possible to support + multiple-segment files with headers (e.g., a segment with header + ``> -Z1.0``). + + However, the arrays in the Python object are still pointers to the + original memory allocated by GMT, so the data will be destroyed when + the Session ends. We may need to copy the data to a new memory location + if we want to use the data after the Session ends. Notes ----- From 9843eca64e05ac0218531b44f2e048ccf052951f Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 13:56:17 +0800 Subject: [PATCH 27/85] Standarize virtual file names --- pygmt/src/grd2xyz.py | 10 ++++------ pygmt/src/grdtrack.py | 12 ++++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index ec3fc1149c1..2d610471be4 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -170,19 +170,17 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): with Session() as lib: with lib.virtualfile_from_data( check_kind="raster", data=grid - ) as invfile, lib.virtualfile_to_data( - kind="dataset", fname=outfile - ) as outvfile: + ) as vingrd, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: lib.call_module( module="grd2xyz", - args=build_arg_string(kwargs, infile=invfile, outfile=outvfile), + args=build_arg_string(kwargs, infile=vingrd, outfile=vouttbl), ) if output_type == "file": return None - # vectors = lib.read_virtualfile(outvfile, kind="dataset").contents.to_vectors() + # vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() vectors = lib.read_virtualfile( - outvfile, kind="dataset" + vouttbl, kind="dataset" ).contents.to_vectors_v2() if output_type == "numpy": return np.array(vectors).T diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 89b394429c2..68a44172a70 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -315,20 +315,20 @@ def grdtrack( with Session() as lib: with lib.virtualfile_from_data( check_kind="raster", data=grid - ) as ingrid, lib.virtualfile_from_data( + ) as vingrd, lib.virtualfile_from_data( check_kind="vector", data=points, required_data=False - ) as infile, lib.virtualfile_to_data( + ) as vintbl, lib.virtualfile_to_data( kind="dataset", fname=outfile - ) as outvfile: - kwargs["G"] = ingrid + ) as vouttbl: + kwargs["G"] = vingrd lib.call_module( module="grdtrack", - args=build_arg_string(kwargs, infile=infile, outfile=outvfile), + args=build_arg_string(kwargs, vintbl=vintbl, outfile=vouttbl), ) if output_type == "file": return None - vectors = lib.read_virtualfile(outvfile, kind="dataset").contents.to_vectors() + vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(np.array(vectors).T, columns=column_names) From 9af418ac57152451f2ffa8814e409469157ba946 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 16:35:35 +0800 Subject: [PATCH 28/85] Improve the GMT_DATASET doctest --- pygmt/datatypes.py | 58 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index d6e5db54e8c..500583b1637 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -1,5 +1,5 @@ """ -GMT data types. +Wrappers for GMT data types. """ import ctypes as ctp @@ -27,26 +27,56 @@ class GMT_DATASET(ctp.Structure): """ GMT dataset structure for holding multiple tables (files). + This class is only meant for internal use by PyGMT. It is not exposed to + users. + See the GMT source code gmt_resources.h for the original C struct definitions. Examples -------- + >>> from pygmt.helpers import GMTTempFile >>> from pygmt.clib import Session - >>> with Session() as lib: - ... with lib.virtualfile_to_data(kind="dataset") as vfout: - ... lib.call_module("read", f"@App_O_cross.txt {vfout} -Td") - ... ds = lib.read_virtualfile(vfout, kind="dataset").contents - ... print(ds.n_tables, ds.n_columns, ds.n_segments, ds.n_records) - ... print(ds.min[0], ds.max[0], ds.min[1], ds.max[1]) - ... seg = ds.table[0].contents.segment[0].contents - ... print(seg.data[0][: seg.n_rows]) - ... print(seg.data[1][: seg.n_rows]) + >>> + >>> with GMTTempFile(suffix=".txt") as tmpfile: + ... # prepare the sample data file + ... with open(tmpfile.name, mode="w") as fp: + ... print(">", file=fp) + ... print("1.0 2.0 3.0 TEXT1 TEXT2", file=fp) + ... print("4.0 5.0 6.0 TEXT3 TEXT4", file=fp) + ... print(">", file=fp) + ... print("7.0 8.0 9.0 TEXT5 TEXT6", file=fp) + ... print("10.0 11.0 12.0 TEXT7 TEXT8", file=fp) + ... # read the data file + ... with Session() as lib: + ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... # the dataset + ... ds = lib.read_virtualfile(vouttbl, kind="dataset").contents + ... print(ds.n_tables, ds.n_columns, ds.n_segments) + ... print(ds.min[: ds.n_columns], ds.max[: ds.n_columns]) + ... # the table + ... tbl = ds.table[0].contents + ... print(tbl.n_columns, tbl.n_segments, tbl.n_records) + ... print(tbl.min[: tbl.n_columns], ds.max[: tbl.n_columns]) + ... for i in range(tbl.n_segments): + ... seg = tbl.segment[i].contents + ... for j in range(seg.n_columns): + ... print(seg.data[j][:seg.n_rows]) + ... print(seg.text[: seg.n_rows]) ... - 1 2 3 14 - 59.0 158.0 -12.0 13.0 - [59.0, 62.0, 66.0, 71.0, 77.0] - [-12.0, -7.0, -3.0, -1.0, 3.0] + 1 3 2 + [1.0, 2.0, 3.0] [10.0, 11.0, 12.0] + 3 2 4 + [1.0, 2.0, 3.0] [10.0, 11.0, 12.0] + [1.0, 4.0] + [2.0, 5.0] + [3.0, 6.0] + [b'TEXT1 TEXT2', b'TEXT3 TEXT4'] + [7.0, 10.0] + [8.0, 11.0] + [9.0, 12.0] + [b'TEXT5 TEXT6', b'TEXT7 TEXT8'] """ class GMT_DATATABLE(ctp.Structure): From 7160a4f5c12eba32e3146d7a3bfe76df1f0bfb0b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 19:11:57 +0800 Subject: [PATCH 29/85] Fix a typo --- pygmt/src/grdtrack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 68a44172a70..519163de3f4 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -323,7 +323,7 @@ def grdtrack( kwargs["G"] = vingrd lib.call_module( module="grdtrack", - args=build_arg_string(kwargs, vintbl=vintbl, outfile=vouttbl), + args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), ) if output_type == "file": From 6e07b9533cc84408758acbce819a2a159605ad5e Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 20:19:58 +0800 Subject: [PATCH 30/85] Improve the doctest for GMT_DATASET.to_vectors() --- pygmt/datatypes.py | 55 +++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 500583b1637..6fcd063af14 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -42,11 +42,11 @@ class GMT_DATASET(ctp.Structure): ... # prepare the sample data file ... with open(tmpfile.name, mode="w") as fp: ... print(">", file=fp) - ... print("1.0 2.0 3.0 TEXT1 TEXT2", file=fp) - ... print("4.0 5.0 6.0 TEXT3 TEXT4", file=fp) + ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) + ... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) ... print(">", file=fp) - ... print("7.0 8.0 9.0 TEXT5 TEXT6", file=fp) - ... print("10.0 11.0 12.0 TEXT7 TEXT8", file=fp) + ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) + ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) ... # read the data file ... with Session() as lib: ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: @@ -62,7 +62,7 @@ class GMT_DATASET(ctp.Structure): ... for i in range(tbl.n_segments): ... seg = tbl.segment[i].contents ... for j in range(seg.n_columns): - ... print(seg.data[j][:seg.n_rows]) + ... print(seg.data[j][: seg.n_rows]) ... print(seg.text[: seg.n_rows]) ... 1 3 2 @@ -72,11 +72,11 @@ class GMT_DATASET(ctp.Structure): [1.0, 4.0] [2.0, 5.0] [3.0, 6.0] - [b'TEXT1 TEXT2', b'TEXT3 TEXT4'] + [b'TEXT1 TEXT23', b'TEXT4 TEXT567'] [7.0, 10.0] [8.0, 11.0] [9.0, 12.0] - [b'TEXT5 TEXT6', b'TEXT7 TEXT8'] + [b'TEXT8 TEXT90', b'TEXT123 TEXT456789'] """ class GMT_DATATABLE(ctp.Structure): @@ -164,26 +164,41 @@ class GMT_DATASEGMENT(ctp.Structure): def to_vectors(self): """ - Convert the GMT_DATASET object to a list of vectors. + Convert a GMT_DATASET object to a list of vectors. Examples -------- - + >>> from pygmt.helpers import GMTTempFile >>> from pygmt.clib import Session - >>> with Session() as lib: - ... with lib.virtualfile_to_data(kind="dataset") as vfout: - ... lib.call_module("read", f"@App_O_cross.txt {vfout} -Td") - ... ds = lib.read_virtualfile(vfout, kind="dataset") - ... vectors = ds.contents.to_vectors() + >>> + >>> with GMTTempFile(suffix=".txt") as tmpfile: + ... # prepare the sample data file + ... with open(tmpfile.name, mode="w") as fp: + ... print(">", file=fp) + ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) + ... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) + ... print(">", file=fp) + ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) + ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) + ... with Session() as lib: + ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: + ... lib.call_module( + ... "read", f"{tmpfile.name} {vouttbl} -Td" + ... ) + ... ds = lib.read_virtualfile(vouttbl, kind="dataset") + ... vectors = ds.contents.to_vectors() ... - >>> len(vectors) # 2 columns - 2 + >>> len(vectors) # 4 columns + 4 >>> vectors[0] - array([ 59., 62., 66., 71., 77., 94., 100., 105., 109., 114., 119., - 126., 148., 158.]) + array([ 1., 4., 7., 10.]) >>> vectors[1] - array([-12. , -7. , -3. , -1. , 3. , -11. , -10.5, -9.5, -8.6, - -6.5, -4. , 2. , 3. , 13. ]) + array([ 2., 5., 8., 11.]) + >>> vectors[2] + array([ 3., 6., 9., 12.]) + >>> vectors[3] + array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90', + 'TEXT123 TEXT456789'], dtype=' Date: Wed, 25 Oct 2023 22:37:00 +0800 Subject: [PATCH 31/85] Add more comments to the GMT_DATASET.to_vectors function --- pygmt/datatypes.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 6fcd063af14..4433f91b401 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -166,6 +166,11 @@ def to_vectors(self): """ Convert a GMT_DATASET object to a list of vectors. + Currently, the number of columns in all segments of all tables are + assumed to be the same. The same column in all segments of all tables + are concatenated. The trailing text column is also concatenated as a + string vector. + Examples -------- >>> from pygmt.helpers import GMTTempFile @@ -205,9 +210,8 @@ def to_vectors(self): vectors : list of 1-D arrays List of vectors containing the data from the GMT_DATASET object. """ - # Currently, the same column in all segments of all tables are concatenated. vectors = [] - for icol in range(self.n_columns): # all have the same number of columns? + for icol in range(self.n_columns): colvector = [] for itbl in range(self.n_tables): dtbl = self.table[itbl].contents From afecfc04aebbb91388147cb5bd74fe9cc742005a Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 22:37:53 +0800 Subject: [PATCH 32/85] Remove the GMT_DATASET.to_vectors_v2 method --- pygmt/datatypes.py | 10 +--------- pygmt/src/grd2xyz.py | 5 +---- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 4433f91b401..570038ce29e 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -311,12 +311,4 @@ def to_pydata(self): table.append(PyGMT_DATATABLE(segment=segment)) pydata = PyGMT_DATASET(table=table) pydata.n_columns = self.n_columns - return pydata - - def to_vectors_v2(self): - pydata = self.to_pydata() - vectors = [ - np.concatenate([seg.data[i] for tbl in pydata.table for seg in tbl.segment]) - for i in range(pydata.n_columns) - ] - return vectors + return pydata \ No newline at end of file diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 2d610471be4..886ca10007a 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -178,10 +178,7 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): if output_type == "file": return None - # vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() - vectors = lib.read_virtualfile( - vouttbl, kind="dataset" - ).contents.to_vectors_v2() + vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) From 89efc18ced8226f95d7cf4fc968f33a4093450a4 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 22:46:47 +0800 Subject: [PATCH 33/85] Remove the GMT_DATASET.to_pydata method to focus on the GMT dataset structure --- pygmt/datatypes.py | 95 ---------------------------------------------- 1 file changed, 95 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 570038ce29e..5fd2f50f045 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -6,23 +6,6 @@ import numpy as np -# Python representation of the GMT data types. -# See the comments in the `GMT_DATASET.to_pydata` method for more details. -class PyGMT_DATASET: - def __init__(self, table): - self.table = table - - -class PyGMT_DATATABLE: - def __init__(self, segment): - self.segment = segment - - -class PyGMT_DATASEGMENT: - def __init__(self, data): - self.data = data - - class GMT_DATASET(ctp.Structure): """ GMT dataset structure for holding multiple tables (files). @@ -234,81 +217,3 @@ def to_vectors(self): vectors.append(np.char.decode(textvector)) return vectors - - def to_pydata(self): - """ - Convert the ctypes GMT_DATASET object to the Python PyGMT_DATASET - object. - - The ctypes GMT_DATASET/GMT_DATATABLE/GMT_DATASEGMENT objects are - difficult to use, because most of attributes are pointers to other - objects or ctypes arrays. For example, let's say ``dataset`` is a - GMT_DATASET object, and you want to access the data of the first - segment of the first table, you have to use the following code - (note the ``contents`` attribute):: - - data = dataset.table[0].contents.segment[0].contents.data - - Now ``data`` is a ``POINTER(POINTER(c_double))`` object. The first - column is ``data[0]``, but you can't use ``print(data[0])`` to print - the data, because it will print the memory address of the data. - You have to use:: - - print(np.ctypeslib.as_array(data[0], shape=(n_rows,))) - - to print the data. It's difficult to use for us developers (see the - ``to_vectors`` above for example). It will be even more difficult to - understand for users. So, exposing the ctypes objects to users is a - bad idea. - - This method converts the ctypes object to a Python object, which is - easier to use. For example, the following code converts the ``dataset`` - to a Python object:: - - pydata = dataset.to_pydata() - - Now ``pydata`` is a PyGMT_DATASET object. - - To get the number of tables, you can use the following code:: - - len(pydata.table) - - To get the first column of the first segment of the first table:: - - pydata.table[0].segment[0].data[0] - - The PyGMT_DATASET object is more Pythonic and can be exposed to users. - The most big benefit is that now it's possible to support - multiple-segment files with headers (e.g., a segment with header - ``> -Z1.0``). - - However, the arrays in the Python object are still pointers to the - original memory allocated by GMT, so the data will be destroyed when - the Session ends. We may need to copy the data to a new memory location - if we want to use the data after the Session ends. - - Notes - ----- - In GMT.jl, the GMT_DATASET is defined in - https://github.com/GenericMappingTools/GMT.jl/blob/master/src/libgmt_h.jl#L119. - It also provides the more friendly data type GMTdataset. - See https://www.generic-mapping-tools.org/GMT.jl/dev/types/#Dataset-type. - - A `get_dataset` function is provided to convert GMT's GMT_DATASET - to GMT.jl's GMTdataset. - """ - table = [] - for itbl in range(self.n_tables): - segment = [] - for iseg in range(self.table[itbl].contents.n_segments): - seg = self.table[itbl].contents.segment[iseg].contents - n_columns, n_rows = seg.n_columns, seg.n_rows - data = [ - np.ctypeslib.as_array(seg.data[icol], shape=(n_rows,)) - for icol in range(n_columns) - ] - segment.append(PyGMT_DATASEGMENT(data=data)) - table.append(PyGMT_DATATABLE(segment=segment)) - pydata = PyGMT_DATASET(table=table) - pydata.n_columns = self.n_columns - return pydata \ No newline at end of file From c7a0982f21183d65b9111e0a6ad5105b58581232 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 22:47:39 +0800 Subject: [PATCH 34/85] Fix linting issues --- pygmt/clib/session.py | 2 +- pygmt/src/grd2xyz.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index ca0cde5bca7..abc3d5334b4 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1745,7 +1745,7 @@ def virtualfile_to_data(self, kind, fname=None): yield fname # Otherwise, create a virtual file for writing a GMT data container. else: - # Determine the family and geometry of the data container based on 'kind'. + # Determine the family and geometry of the data container from kind family, geometry = { "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 886ca10007a..1a8288bd40b 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -178,7 +178,9 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): if output_type == "file": return None - vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() + vectors = lib.read_virtualfile( + vouttbl, kind="dataset" + ).contents.to_vectors() if output_type == "numpy": return np.array(vectors).T return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) From 3c46aca5feab13550d6eb041f04fe13782573c2e Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 22:51:32 +0800 Subject: [PATCH 35/85] Remove two blank lines --- pygmt/clib/session.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index abc3d5334b4..28f9784dae7 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1689,7 +1689,6 @@ def read_virtualfile(self, vfname, kind=None): ---------- vfname : str Name of the virtual file to read. - kind : str Cast the data into a GMT data container. Choose from "grid" or "dataset". If None, will return a ctypes void pointer. @@ -1731,7 +1730,6 @@ def virtualfile_to_data(self, kind, fname=None): kind : str The kind of data container to create. Choose from "grid" or "dataset". It has no effect if ``fname`` is given. - fname : str or None If given, yield the output file name instead of the virtual file. From 78c395919b521f309d421f120b9f4858b84501c1 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 25 Oct 2023 23:14:17 +0800 Subject: [PATCH 36/85] Let pandas deal with the conversion to numpy --- pygmt/src/grd2xyz.py | 17 ++++++++--------- pygmt/src/grdtrack.py | 7 ++++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 1a8288bd40b..50cccd9d0a0 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -163,7 +163,7 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): # Set the default column names for the pandas dataframe header dataframe_header = ["x", "y", "z"] # Let output pandas column names match input DataArray dimension names - if isinstance(grid, xr.DataArray) and output_type == "pandas": + if isinstance(grid, xr.DataArray): # Reverse the dims because it is rows, columns ordered. dataframe_header = [grid.dims[1], grid.dims[0], grid.name] @@ -176,11 +176,10 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): args=build_arg_string(kwargs, infile=vingrd, outfile=vouttbl), ) - if output_type == "file": - return None - vectors = lib.read_virtualfile( - vouttbl, kind="dataset" - ).contents.to_vectors() - if output_type == "numpy": - return np.array(vectors).T - return pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) + if output_type == "file": + return None + vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() + result = pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) + if output_type == "pandas": + return result + return result.to_numpy() diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 519163de3f4..56b62e9772d 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -329,6 +329,7 @@ def grdtrack( if output_type == "file": return None vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() - if output_type == "numpy": - return np.array(vectors).T - return pd.DataFrame(np.array(vectors).T, columns=column_names) + result = pd.DataFrame(data=np.array(vectors).T, columns=column_names) + if output_type == "pandas": + return result + return result.to_numpy() From c86208c3dc42b236b3ffa2db20eae5a123f758df Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 26 Oct 2023 15:42:15 +0800 Subject: [PATCH 37/85] Disable some pylint warnings --- pygmt/datatypes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 5fd2f50f045..f240015678c 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -6,7 +6,7 @@ import numpy as np -class GMT_DATASET(ctp.Structure): +class GMT_DATASET(ctp.Structure): # pylint: disable=invalid-name,too-few-public-methods """ GMT dataset structure for holding multiple tables (files). @@ -63,12 +63,14 @@ class GMT_DATASET(ctp.Structure): """ class GMT_DATATABLE(ctp.Structure): + # pylint: disable=invalid-name,too-few-public-methods """ GMT datatable structure for holding a single table with multiple segments. """ class GMT_DATASEGMENT(ctp.Structure): + # pylint: disable=invalid-name,too-few-public-methods """ GMT datasegment structure for holding a single segment with multiple columns. From caa9d107243c8d680a0f4c1ed06ac75e1caee200 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 26 Oct 2023 15:50:26 +0800 Subject: [PATCH 38/85] Let pandas deal with 1-D arrays directly --- pygmt/src/grd2xyz.py | 3 +-- pygmt/src/grdtrack.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 50cccd9d0a0..78d29baac71 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -3,7 +3,6 @@ """ import warnings -import numpy as np import pandas as pd import xarray as xr from pygmt.clib import Session @@ -179,7 +178,7 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): if output_type == "file": return None vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() - result = pd.DataFrame(data=np.array(vectors).T, columns=dataframe_header) + result = pd.DataFrame(data=vectors, index=dataframe_header).T if output_type == "pandas": return result return result.to_numpy() diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 56b62e9772d..4eaabf9bff9 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -3,7 +3,6 @@ """ import warnings -import numpy as np import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput @@ -329,7 +328,7 @@ def grdtrack( if output_type == "file": return None vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() - result = pd.DataFrame(data=np.array(vectors).T, columns=column_names) + result = pd.DataFrame(data=vectors, index=column_names).T if output_type == "pandas": return result return result.to_numpy() From d37efcfa1033a221e826b8342edca83b7dafaaf8 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 27 Oct 2023 10:11:21 +0800 Subject: [PATCH 39/85] Fix linting issues --- pygmt/clib/session.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 28f9784dae7..d307314c902 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -90,6 +90,7 @@ class Session: + # pylint: disable=too-many-public-methods """ A GMT API session where most operations involving the C API happen. @@ -1713,11 +1714,11 @@ def read_virtualfile(self, vfname, kind=None): # The GMT C API function GMT_Read_VirtualFile returns a void pointer. # It usually needs to be cast to a pointer to GMT data container (e.g., # GMT_GRID or GMT_DATASET). - type = { + dtype = { # "grid": GMT_GRID, # implemented in PR #2398 "dataset": GMT_DATASET, }[kind] - return ctp.cast(pointer, ctp.POINTER(type)) + return ctp.cast(pointer, ctp.POINTER(dtype)) @contextmanager def virtualfile_to_data(self, kind, fname=None): From 5c3f1b166b4b6f959e5086f4ec5ad5a8d83151d0 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 27 Oct 2023 10:39:36 +0800 Subject: [PATCH 40/85] Add a return_table function --- pygmt/helpers/__init__.py | 1 + pygmt/helpers/utils.py | 33 +++++++++++++++++++++++++++++++++ pygmt/src/grd2xyz.py | 22 +++++++++++++--------- pygmt/src/grdtrack.py | 21 +++++++++++++-------- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/pygmt/helpers/__init__.py b/pygmt/helpers/__init__.py index 5eb265e8002..de666aacad6 100644 --- a/pygmt/helpers/__init__.py +++ b/pygmt/helpers/__init__.py @@ -20,4 +20,5 @@ is_nonstr_iter, launch_external_viewer, non_ascii_to_octal, + return_table, ) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index dc6fdf1e7e3..1a7cc28b778 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -11,6 +11,7 @@ import webbrowser from collections.abc import Iterable +import pandas as pd import xarray as xr from pygmt.exceptions import GMTInvalidInput @@ -550,3 +551,35 @@ def args_in_kwargs(args, kwargs): return any( kwargs.get(arg) is not None and kwargs.get(arg) is not False for arg in args ) + + +def return_table(session, output_type, vfile, colnames): + """ + Return an output table from a virtual file based on the output type. + + Parameters + ---------- + session : :class:`pygmt.clib.Session` + The current session. + output_type : str + The output type. Can be ``"pandas"``, ``"numpy"``, or ``"file"``. + vfile : str + The virtual file name. + colnames : list of str + The column names for the :class:`pandas.DataFrame` output. + + Returns + ------- + :class:`pandas.DataFrame` or :class:`numpy.ndarray` or None + The output table. If ``output_type`` is ``"file"``, returns ``None``. + """ + if output_type == "file": # Already written to file, so return None + return None + # Read the virtual file as a GMT dataset and convert to vectors + vectors = session.read_virtualfile(vfile, kind="dataset").contents.to_vectors() + # pandas.DataFrame output + result = pd.DataFrame(data=vectors, index=colnames).T + if output_type == "pandas": + return result + # NumPy.ndarray output + return result.to_numpy() diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 78d29baac71..cae1da75e80 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -3,11 +3,16 @@ """ import warnings -import pandas as pd import xarray as xr from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import build_arg_string, fmt_docstring, kwargs_to_strings, use_alias +from pygmt.helpers import ( + build_arg_string, + fmt_docstring, + kwargs_to_strings, + return_table, + use_alias, +) __doctest_skip__ = ["grd2xyz"] @@ -175,10 +180,9 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): args=build_arg_string(kwargs, infile=vingrd, outfile=vouttbl), ) - if output_type == "file": - return None - vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() - result = pd.DataFrame(data=vectors, index=dataframe_header).T - if output_type == "pandas": - return result - return result.to_numpy() + return return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=dataframe_header, + ) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 4eaabf9bff9..cbc1f9d7faa 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -6,7 +6,13 @@ import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import build_arg_string, fmt_docstring, kwargs_to_strings, use_alias +from pygmt.helpers import ( + build_arg_string, + fmt_docstring, + kwargs_to_strings, + return_table, + use_alias, +) __doctest_skip__ = ["grdtrack"] @@ -325,10 +331,9 @@ def grdtrack( args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), ) - if output_type == "file": - return None - vectors = lib.read_virtualfile(vouttbl, kind="dataset").contents.to_vectors() - result = pd.DataFrame(data=vectors, index=column_names).T - if output_type == "pandas": - return result - return result.to_numpy() + return return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=column_names, + ) From cd11f985b43316ff8ca89a011a180b4a9022e619 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 27 Oct 2023 11:11:48 +0800 Subject: [PATCH 41/85] Add the validate_output_type function to check the output_type parameter --- pygmt/helpers/__init__.py | 1 + pygmt/helpers/validators.py | 41 +++++++++++++++++++++++++++++++++++++ pygmt/src/filter1d.py | 23 ++++++++------------- pygmt/src/grd2xyz.py | 18 ++-------------- pygmt/src/grdhisteq.py | 16 ++------------- pygmt/src/grdvolume.py | 9 ++------ pygmt/src/triangulate.py | 17 ++------------- 7 files changed, 59 insertions(+), 66 deletions(-) create mode 100644 pygmt/helpers/validators.py diff --git a/pygmt/helpers/__init__.py b/pygmt/helpers/__init__.py index 5eb265e8002..bd3b9680257 100644 --- a/pygmt/helpers/__init__.py +++ b/pygmt/helpers/__init__.py @@ -21,3 +21,4 @@ launch_external_viewer, non_ascii_to_octal, ) +from pygmt.helpers.validators import validate_output_type diff --git a/pygmt/helpers/validators.py b/pygmt/helpers/validators.py new file mode 100644 index 00000000000..5d11b39f16e --- /dev/null +++ b/pygmt/helpers/validators.py @@ -0,0 +1,41 @@ +""" +Functions to check if given arguments are valid. +""" +import warnings + +from pygmt.exceptions import GMTInvalidInput + + +def validate_output_type(output_type, outfile=None): + """ + Check if the 'output_type' and 'outfile' parameters are valid. + + Parameters + ---------- + output_type : str + The type for a table output. Valid values are "file", "numpy", and + "pandas". + outfile : str + The file name for the output table file. Required if + ``output_type="file"``. + + Returns + ------- + str + The original or corrected output type. + """ + if output_type not in ["file", "numpy", "pandas"]: + raise GMTInvalidInput( + "Must specify 'output_type' either as 'file', 'numpy', or 'pandas'." + ) + if output_type == "file" and outfile is None: + raise GMTInvalidInput("Must specify 'outfile' for output_type='file'.") + if output_type != "file" and outfile is not None: + msg = ( + f"Changing 'output_type' from '{output_type}' to 'file' " + "since 'outfile' parameter is set. Please use output_type='file' " + "to silence this warning." + ) + warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) + output_type = "file" + return output_type diff --git a/pygmt/src/filter1d.py b/pygmt/src/filter1d.py index cc189a53053..1a82a88a694 100644 --- a/pygmt/src/filter1d.py +++ b/pygmt/src/filter1d.py @@ -1,12 +1,17 @@ """ filter1d - Time domain filtering of 1-D data tables """ -import warnings import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import GMTTempFile, build_arg_string, fmt_docstring, use_alias +from pygmt.helpers import ( + GMTTempFile, + build_arg_string, + fmt_docstring, + use_alias, + validate_output_type, +) @fmt_docstring @@ -109,18 +114,8 @@ def filter1d(data, output_type="pandas", outfile=None, **kwargs): """ if kwargs.get("F") is None: raise GMTInvalidInput("Pass a required argument to 'filter_type'.") - if output_type not in ["numpy", "pandas", "file"]: - raise GMTInvalidInput("Must specify format as either numpy, pandas, or file.") - if outfile is not None and output_type != "file": - msg = ( - f"Changing `output_type` of filter1d from '{output_type}' to 'file' " - "since `outfile` parameter is set. Please use `output_type='file'` " - "to silence this warning." - ) - warnings.warn(msg, category=RuntimeWarning, stacklevel=2) - output_type = "file" - elif output_type == "file" and outfile is None: - raise GMTInvalidInput("Must specify outfile for ASCII output.") + + output_type = validate_output_type(output_type, outfile=outfile) with GMTTempFile() as tmpfile: with Session() as lib: diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index bd3af2962b8..236290c698f 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -1,7 +1,6 @@ """ grd2xyz - Convert grid to data table """ -import warnings import pandas as pd import xarray as xr @@ -13,6 +12,7 @@ fmt_docstring, kwargs_to_strings, use_alias, + validate_output_type, ) __doctest_skip__ = ["grd2xyz"] @@ -143,21 +143,7 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): 0 10.0 25.0 863.0 1 10.5 25.0 985.5 """ - if output_type not in ["numpy", "pandas", "file"]: - raise GMTInvalidInput( - "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." - ) - - if outfile is not None and output_type != "file": - msg = ( - f"Changing 'output_type' of grd2xyz from '{output_type}' to 'file' " - "since 'outfile' parameter is set. Please use output_type='file' " - "to silence this warning." - ) - warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) - output_type = "file" - elif outfile is None and output_type == "file": - raise GMTInvalidInput("Must specify 'outfile' for ASCII output.") + output_type = validate_output_type(output_type, outfile=outfile) if kwargs.get("o") is not None and output_type == "pandas": raise GMTInvalidInput( diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py index 0d795a7dcbd..e66ebe79daa 100644 --- a/pygmt/src/grdhisteq.py +++ b/pygmt/src/grdhisteq.py @@ -1,7 +1,6 @@ """ grdhisteq - Perform histogram equalization for a grid. """ -import warnings import numpy as np import pandas as pd @@ -13,6 +12,7 @@ fmt_docstring, kwargs_to_strings, use_alias, + validate_output_type, ) from pygmt.io import load_dataarray @@ -321,23 +321,11 @@ def compute_bins( This method does a weighted histogram equalization for geographic grids to account for node area varying with latitude. """ - # Return a pandas.DataFrame if ``outfile`` is not set - if output_type not in ["numpy", "pandas", "file"]: - raise GMTInvalidInput( - "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." - ) + output_type = validate_output_type(output_type, outfile=outfile) if header is not None and output_type != "file": raise GMTInvalidInput("'header' is only allowed with output_type='file'.") - if isinstance(outfile, str) and output_type != "file": - msg = ( - f"Changing 'output_type' from '{output_type}' to 'file' " - "since 'outfile' parameter is set. Please use output_type='file' " - "to silence this warning." - ) - warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) - output_type = "file" with GMTTempFile(suffix=".txt") as tmpfile: if output_type != "file": outfile = tmpfile.name diff --git a/pygmt/src/grdvolume.py b/pygmt/src/grdvolume.py index ba4a2d871bc..77f7881b8ff 100644 --- a/pygmt/src/grdvolume.py +++ b/pygmt/src/grdvolume.py @@ -3,13 +3,13 @@ """ import pandas as pd from pygmt.clib import Session -from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, use_alias, + validate_output_type, ) __doctest_skip__ = ["grdvolume"] @@ -101,12 +101,7 @@ def grdvolume(grid, output_type="pandas", outfile=None, **kwargs): 3 350 2.018302e+12 5.222640e+14 258.764032 4 400 1.857370e+12 4.252699e+14 228.963499 """ - if output_type not in ["numpy", "pandas", "file"]: - raise GMTInvalidInput( - """Must specify format as either numpy, pandas, or file.""" - ) - if output_type == "file" and outfile is None: - raise GMTInvalidInput("""Must specify outfile for ASCII output.""") + output_type = validate_output_type(output_type, outfile=outfile) with GMTTempFile() as tmpfile: with Session() as lib: diff --git a/pygmt/src/triangulate.py b/pygmt/src/triangulate.py index b3578941dcd..60661b1242a 100644 --- a/pygmt/src/triangulate.py +++ b/pygmt/src/triangulate.py @@ -2,7 +2,6 @@ triangulate - Delaunay triangulation or Voronoi partitioning and gridding of Cartesian data. """ -import warnings import pandas as pd from pygmt.clib import Session @@ -13,6 +12,7 @@ fmt_docstring, kwargs_to_strings, use_alias, + validate_output_type, ) from pygmt.io import load_dataarray @@ -357,20 +357,7 @@ def delaunay_triples( # pylint: disable=too-many-arguments,too-many-locals ``triangulate`` is a Cartesian or small-geographic area operator and is unaware of periodic or polar boundary conditions. """ - # Return a pandas.DataFrame if ``outfile`` is not set - if output_type not in ["numpy", "pandas", "file"]: - raise GMTInvalidInput( - "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." - ) - - if isinstance(outfile, str) and output_type != "file": - msg = ( - f"Changing 'output_type' from '{output_type}' to 'file' " - "since 'outfile' parameter is set. Please use output_type='file' " - "to silence this warning." - ) - warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) - output_type = "file" + output_type = validate_output_type(output_type, outfile) # Return a pandas.DataFrame if ``outfile`` is not set with GMTTempFile(suffix=".txt") as tmpfile: From 4a42c4db81556813117c83b467d2344ce7c0d84f Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 28 Oct 2023 16:27:35 +0800 Subject: [PATCH 42/85] Use pd.DataFrame.from_dict to construct the DataFrame object --- pygmt/helpers/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 1a7cc28b778..2b2232c3379 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -578,7 +578,9 @@ def return_table(session, output_type, vfile, colnames): # Read the virtual file as a GMT dataset and convert to vectors vectors = session.read_virtualfile(vfile, kind="dataset").contents.to_vectors() # pandas.DataFrame output - result = pd.DataFrame(data=vectors, index=colnames).T + if colnames is None: + colnames = pd.RangeIndex(0, len(vectors)) + result = pd.DataFrame.from_dict(dict(zip(colnames, vectors))) if output_type == "pandas": return result # NumPy.ndarray output From de31384da5aa444938361345a1720a38065af84b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 28 Oct 2023 16:31:50 +0800 Subject: [PATCH 43/85] Refactor grdvolume.py --- pygmt/src/grdvolume.py | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/pygmt/src/grdvolume.py b/pygmt/src/grdvolume.py index ba4a2d871bc..a5287c6e55f 100644 --- a/pygmt/src/grdvolume.py +++ b/pygmt/src/grdvolume.py @@ -1,14 +1,13 @@ """ grdvolume - Calculate grid volume and area constrained by a contour. """ -import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( - GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, + return_table, use_alias, ) @@ -108,23 +107,17 @@ def grdvolume(grid, output_type="pandas", outfile=None, **kwargs): if output_type == "file" and outfile is None: raise GMTInvalidInput("""Must specify outfile for ASCII output.""") - with GMTTempFile() as tmpfile: - with Session() as lib: - file_context = lib.virtualfile_from_data(check_kind="raster", data=grid) - with file_context as infile: - if outfile is None: - outfile = tmpfile.name - lib.call_module( - module="grdvolume", - args=build_arg_string(kwargs, infile=infile, outfile=outfile), - ) - - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None - - if output_type == "numpy": - result = result.to_numpy() - return result + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="raster", data=grid + ) as vingrid, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: + lib.call_module( + module="grdvolume", + args=build_arg_string(kwargs, infile=vingrid, outfile=vouttbl), + ) + return return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=None, + ) From 333c5eb3b015d322c6ad5492e63469a35ea81072 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 28 Oct 2023 16:41:23 +0800 Subject: [PATCH 44/85] Refactor select.py --- pygmt/src/select.py | 64 ++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/pygmt/src/select.py b/pygmt/src/select.py index 294d4e282d1..fd2d267b704 100644 --- a/pygmt/src/select.py +++ b/pygmt/src/select.py @@ -1,13 +1,16 @@ """ select - Select data table subsets based on multiple spatial criteria. """ +import warnings + import pandas as pd from pygmt.clib import Session +from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( - GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, + return_table, use_alias, ) @@ -40,7 +43,7 @@ w="wrap", ) @kwargs_to_strings(M="sequence", R="sequence", i="sequence_comma", o="sequence_comma") -def select(data=None, outfile=None, **kwargs): +def select(data=None, output_type="pandas", outfile=None, **kwargs): r""" Select data table subsets based on multiple spatial criteria. @@ -196,25 +199,38 @@ def select(data=None, outfile=None, **kwargs): >>> out = pygmt.select(data=ship_data, region=[246, 247, 20, 21]) """ - with GMTTempFile(suffix=".csv") as tmpfile: - with Session() as lib: - table_context = lib.virtualfile_from_data(check_kind="vector", data=data) - with table_context as infile: - if outfile is None: - outfile = tmpfile.name - lib.call_module( - module="select", - args=build_arg_string(kwargs, infile=infile, outfile=outfile), - ) - - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - try: - column_names = data.columns.to_list() - result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) - except AttributeError: # 'str' object has no attribute 'columns' - result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None - - return result + if output_type not in ["numpy", "pandas", "file"]: + raise GMTInvalidInput( + "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." + ) + + if outfile is not None and output_type != "file": + msg = ( + f"Changing 'output_type' from '{output_type}' to 'file' " + "since 'outfile' parameter is set. Please use output_type='file' " + "to silence this warning." + ) + warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) + output_type = "file" + elif outfile is None and output_type == "file": + raise GMTInvalidInput("Must specify 'outfile' for ASCII output.") + + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="vector", data=data + ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: + lib.call_module( + module="select", + args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), + ) + + column_names = ( + data.columns.to_list() if isinstance(data, pd.DataFrame) else None + ) + + return return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=column_names, + ) From 061355f34feb25cd92a7aa7826ebba4adc284b5b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 28 Oct 2023 17:58:01 +0800 Subject: [PATCH 45/85] Use validate_output_type --- pygmt/src/select.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/pygmt/src/select.py b/pygmt/src/select.py index fd2d267b704..4d3f1ad05c0 100644 --- a/pygmt/src/select.py +++ b/pygmt/src/select.py @@ -1,17 +1,16 @@ """ select - Select data table subsets based on multiple spatial criteria. """ -import warnings import pandas as pd from pygmt.clib import Session -from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( build_arg_string, fmt_docstring, kwargs_to_strings, return_table, use_alias, + validate_output_type, ) __doctest_skip__ = ["select"] @@ -198,22 +197,7 @@ def select(data=None, output_type="pandas", outfile=None, **kwargs): >>> # longitudes 246 and 247 and latitudes 20 and 21 >>> out = pygmt.select(data=ship_data, region=[246, 247, 20, 21]) """ - - if output_type not in ["numpy", "pandas", "file"]: - raise GMTInvalidInput( - "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." - ) - - if outfile is not None and output_type != "file": - msg = ( - f"Changing 'output_type' from '{output_type}' to 'file' " - "since 'outfile' parameter is set. Please use output_type='file' " - "to silence this warning." - ) - warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) - output_type = "file" - elif outfile is None and output_type == "file": - raise GMTInvalidInput("Must specify 'outfile' for ASCII output.") + output_type = validate_output_type(output_type, outfile=outfile) with Session() as lib: with lib.virtualfile_from_data( From f36448f324d86874461eee05cf9c305361b1a658 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 28 Oct 2023 18:02:47 +0800 Subject: [PATCH 46/85] Refactor blockm* --- pygmt/src/blockm.py | 88 ++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 32 deletions(-) diff --git a/pygmt/src/blockm.py b/pygmt/src/blockm.py index 09e37638f57..351e13021ff 100644 --- a/pygmt/src/blockm.py +++ b/pygmt/src/blockm.py @@ -5,17 +5,18 @@ import pandas as pd from pygmt.clib import Session from pygmt.helpers import ( - GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, + return_table, use_alias, + validate_output_type, ) __doctest_skip__ = ["blockmean", "blockmedian", "blockmode"] -def _blockm(block_method, data, x, y, z, outfile, **kwargs): +def _blockm(block_method, data, x, y, z, output_type, outfile, **kwargs): r""" Block average (x, y, z) data tables by mean, median, or mode estimation. @@ -41,31 +42,27 @@ def _blockm(block_method, data, x, y, z, outfile, **kwargs): - None if ``outfile`` is set (filtered output will be stored in file set by ``outfile``) """ - with GMTTempFile(suffix=".csv") as tmpfile: - with Session() as lib: - table_context = lib.virtualfile_from_data( - check_kind="vector", data=data, x=x, y=y, z=z, required_z=True + output_type = validate_output_type(output_type, outfile=outfile) + + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="vector", data=data, x=x, y=y, z=z, required_z=True + ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: + lib.call_module( + module=block_method, + args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), ) - # Run blockm* on data table - with table_context as infile: - if outfile is None: - outfile = tmpfile.name - lib.call_module( - module=block_method, - args=build_arg_string(kwargs, infile=infile, outfile=outfile), - ) - - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - try: - column_names = data.columns.to_list() - result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) - except AttributeError: # 'str' object has no attribute 'columns' - result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None - - return result + + column_names = ( + data.columns.to_list() if isinstance(data, pd.DataFrame) else None + ) + + return return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=column_names, + ) @fmt_docstring @@ -86,7 +83,9 @@ def _blockm(block_method, data, x, y, z, outfile, **kwargs): w="wrap", ) @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma") -def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs): +def blockmean( + data=None, x=None, y=None, z=None, output_type="pandas", outfile=None, **kwargs +): r""" Block average (x, y, z) data tables by mean estimation. @@ -161,7 +160,14 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs): ... ) """ return _blockm( - block_method="blockmean", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs + block_method="blockmean", + data=data, + x=x, + y=y, + z=z, + output_type=output_type, + outfile=outfile, + **kwargs, ) @@ -182,7 +188,9 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs): w="wrap", ) @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma") -def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs): +def blockmedian( + data=None, x=None, y=None, z=None, output_type="pandas", outfile=None, **kwargs +): r""" Block average (x, y, z) data tables by median estimation. @@ -248,7 +256,14 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs): ... ) """ return _blockm( - block_method="blockmedian", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs + block_method="blockmedian", + data=data, + x=x, + y=y, + z=z, + output_type=output_type, + outfile=outfile, + **kwargs, ) @@ -269,7 +284,9 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs): w="wrap", ) @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma") -def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs): +def blockmode( + data=None, x=None, y=None, z=None, output_type="pandas", outfile=None, **kwargs +): r""" Block average (x, y, z) data tables by mode estimation. @@ -335,5 +352,12 @@ def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs): ... ) """ return _blockm( - block_method="blockmode", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs + block_method="blockmode", + data=data, + x=x, + y=y, + z=z, + output_type=output_type, + outfile=outfile, + **kwargs, ) From 1ca2b7658574ce6ceea323953f6438504498bcc3 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 28 Oct 2023 18:06:13 +0800 Subject: [PATCH 47/85] Refactor filter1d --- pygmt/src/filter1d.py | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/pygmt/src/filter1d.py b/pygmt/src/filter1d.py index 1a82a88a694..1d9be34f068 100644 --- a/pygmt/src/filter1d.py +++ b/pygmt/src/filter1d.py @@ -2,13 +2,12 @@ filter1d - Time domain filtering of 1-D data tables """ -import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( - GMTTempFile, build_arg_string, fmt_docstring, + return_table, use_alias, validate_output_type, ) @@ -117,23 +116,18 @@ def filter1d(data, output_type="pandas", outfile=None, **kwargs): output_type = validate_output_type(output_type, outfile=outfile) - with GMTTempFile() as tmpfile: - with Session() as lib: - file_context = lib.virtualfile_from_data(check_kind="vector", data=data) - with file_context as infile: - if outfile is None: - outfile = tmpfile.name - lib.call_module( - module="filter1d", - args=build_arg_string(kwargs, infile=infile, outfile=outfile), - ) - - # Read temporary csv output to a pandas table - if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame - result = pd.read_csv(tmpfile.name, sep="\t", comment=">") - elif outfile != tmpfile.name: # return None if outfile set, output in outfile - result = None - - if output_type == "numpy": - result = result.to_numpy() - return result + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="vector", data=data + ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: + lib.call_module( + module="filter1d", + args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), + ) + + return return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=None, + ) From 2760a2165c364c69c70ad78dde3723740afa5559 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 28 Oct 2023 21:12:33 +0800 Subject: [PATCH 48/85] Fix a bug in filter1d test --- pygmt/tests/test_filter1d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/tests/test_filter1d.py b/pygmt/tests/test_filter1d.py index c03b3e4b8a5..51c9042a5a5 100644 --- a/pygmt/tests/test_filter1d.py +++ b/pygmt/tests/test_filter1d.py @@ -25,7 +25,7 @@ def test_filter1d_no_outfile(data): Test filter1d with no set outgrid. """ result = filter1d(data=data, filter_type="g5") - assert result.shape == (670, 2) + assert result.shape == (671, 2) def test_filter1d_file_output(data): From e99cd1eea5c99694a9258886318550947737bb21 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 28 Oct 2023 21:52:50 +0800 Subject: [PATCH 49/85] Refactor project.py --- pygmt/src/project.py | 64 +++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/pygmt/src/project.py b/pygmt/src/project.py index 6ce7da4b521..ca9ddc839d0 100644 --- a/pygmt/src/project.py +++ b/pygmt/src/project.py @@ -1,15 +1,15 @@ """ project - Project data onto lines or great circles, or generate tracks. """ -import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( - GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, + return_table, use_alias, + validate_output_type, ) @@ -31,7 +31,9 @@ f="coltypes", ) @kwargs_to_strings(E="sequence", L="sequence", T="sequence", W="sequence", C="sequence") -def project(data=None, x=None, y=None, z=None, outfile=None, **kwargs): +def project( + data=None, x=None, y=None, z=None, output_type="pandas", outfile=None, **kwargs +): r""" Project data onto lines or great circles, or generate tracks. @@ -222,31 +224,31 @@ def project(data=None, x=None, y=None, z=None, outfile=None, **kwargs): "The `convention` parameter is not allowed with `generate`." ) - with GMTTempFile(suffix=".csv") as tmpfile: - if outfile is None: # Output to tmpfile if outfile is not set - outfile = tmpfile.name - with Session() as lib: - if kwargs.get("G") is None: - table_context = lib.virtualfile_from_data( - check_kind="vector", data=data, x=x, y=y, z=z, required_z=False - ) - - # Run project on the temporary (csv) data table - with table_context as infile: - arg_str = build_arg_string(kwargs, infile=infile, outfile=outfile) - else: - arg_str = build_arg_string(kwargs, outfile=outfile) - lib.call_module(module="project", args=arg_str) - - # if user did not set outfile, return pd.DataFrame - if outfile == tmpfile.name: - if kwargs.get("G") is not None: - column_names = list("rsp") - result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) - else: - result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") - # return None if outfile set, output in outfile - elif outfile != tmpfile.name: - result = None - - return result + output_type = validate_output_type(output_type, outfile=outfile) + + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="vector", + data=data, + x=x, + y=y, + z=z, + required_z=False, + required_data=False, + ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: + lib.call_module( + module="project", + args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), + ) + + if kwargs.get("G") is not None: + column_names = list("rsp") + else: + column_names = None + + return return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=column_names, + ) From 4ff0a15510e093a2818e7c0892ad8b8cd143a342 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 29 Oct 2023 00:30:41 +0800 Subject: [PATCH 50/85] Refactor the table part of grdhisteq --- pygmt/src/grdhisteq.py | 68 ++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py index e66ebe79daa..a205e78bac9 100644 --- a/pygmt/src/grdhisteq.py +++ b/pygmt/src/grdhisteq.py @@ -3,7 +3,6 @@ """ import numpy as np -import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( @@ -11,6 +10,7 @@ build_arg_string, fmt_docstring, kwargs_to_strings, + return_table, use_alias, validate_output_type, ) @@ -110,32 +110,31 @@ def _grdhisteq(grid, output_type, **kwargs): """ with Session() as lib: - file_context = lib.virtualfile_from_data(check_kind="raster", data=grid) - with file_context as infile: + with lib.virtualfile_from_data( + check_kind="raster", data=grid + ) as vingrid, lib.virtualfile_to_data( + kind="dataset", fname=kwargs.get("D") + ) as vouttbl: + kwargs["D"] = vouttbl lib.call_module( - module="grdhisteq", args=build_arg_string(kwargs, infile=infile) + module="grdhisteq", args=build_arg_string(kwargs, infile=vingrid) ) - if output_type == "file": - return None - if output_type == "xarray": - return load_dataarray(kwargs["G"]) - - result = pd.read_csv( - filepath_or_buffer=kwargs["D"], - sep="\t", - header=None, - names=["start", "stop", "bin_id"], - dtype={ - "start": np.float32, - "stop": np.float32, - "bin_id": np.uint32, - }, - ) - if output_type == "numpy": - return result.to_numpy() + if output_type == "xarray": + return load_dataarray(kwargs["G"]) - return result.set_index("bin_id") + result = return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=["start", "stop", "bin_id"], + ) + if output_type == "pandas": + result = result.astype( + {"start": np.float32, "stop": np.float32, "bin_id": np.uint32} + ) + return result.set_index("bin_id") + return result @staticmethod @fmt_docstring @@ -326,16 +325,13 @@ def compute_bins( if header is not None and output_type != "file": raise GMTInvalidInput("'header' is only allowed with output_type='file'.") - with GMTTempFile(suffix=".txt") as tmpfile: - if output_type != "file": - outfile = tmpfile.name - return grdhisteq._grdhisteq( - grid, - output_type=output_type, - outfile=outfile, - divisions=divisions, - quadratic=quadratic, - verbose=verbose, - region=region, - header=header, - ) + return grdhisteq._grdhisteq( + grid, + output_type=output_type, + outfile=outfile, + divisions=divisions, + quadratic=quadratic, + verbose=verbose, + region=region, + header=header, + ) From 1bd46ad0a2480839845bb5d8a76c9f364b548992 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 29 Oct 2023 00:42:14 +0800 Subject: [PATCH 51/85] Refactor the table part of triangulate --- pygmt/src/triangulate.py | 71 ++++++++++++++++----------------- pygmt/tests/test_triangulate.py | 7 +++- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/pygmt/src/triangulate.py b/pygmt/src/triangulate.py index 60661b1242a..f83f893e513 100644 --- a/pygmt/src/triangulate.py +++ b/pygmt/src/triangulate.py @@ -3,7 +3,6 @@ Cartesian data. """ -import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( @@ -11,6 +10,7 @@ build_arg_string, fmt_docstring, kwargs_to_strings, + return_table, use_alias, validate_output_type, ) @@ -126,26 +126,27 @@ def _triangulate( ``outgrid`` or ``outfile``) """ with Session() as lib: - table_context = lib.virtualfile_from_data( + with lib.virtualfile_from_data( check_kind="vector", data=data, x=x, y=y, z=z, required_z=False - ) - with table_context as infile: + ) as vintbl, lib.virtualfile_to_data( + kind="dataset", fname=outfile + ) as vouttbl: # table output if outgrid is unset, else output to outgrid if (outgrid := kwargs.get("G")) is None: - kwargs.update({">": outfile}) + kwargs.update({">": vouttbl}) lib.call_module( - module="triangulate", args=build_arg_string(kwargs, infile=infile) + module="triangulate", args=build_arg_string(kwargs, infile=vintbl) ) - if output_type == "file": - return None - if output_type == "xarray": - return load_dataarray(outgrid) + if output_type == "xarray": + return load_dataarray(outgrid) - result = pd.read_csv(outfile, sep="\t", header=None) - if output_type == "numpy": - return result.to_numpy() - return result + return return_table( + session=lib, + output_type=output_type, + vfile=vouttbl, + colnames=None, + ) @staticmethod @fmt_docstring @@ -359,26 +360,22 @@ def delaunay_triples( # pylint: disable=too-many-arguments,too-many-locals """ output_type = validate_output_type(output_type, outfile) - # Return a pandas.DataFrame if ``outfile`` is not set - with GMTTempFile(suffix=".txt") as tmpfile: - if output_type != "file": - outfile = tmpfile.name - return triangulate._triangulate( - data=data, - x=x, - y=y, - z=z, - output_type=output_type, - outfile=outfile, - projection=projection, - verbose=verbose, - binary=binary, - nodata=nodata, - find=find, - coltypes=coltypes, - header=header, - incols=incols, - skiprows=skiprows, - wrap=wrap, - **kwargs, - ) + return triangulate._triangulate( + data=data, + x=x, + y=y, + z=z, + output_type=output_type, + outfile=outfile, + projection=projection, + verbose=verbose, + binary=binary, + nodata=nodata, + find=find, + coltypes=coltypes, + header=header, + incols=incols, + skiprows=skiprows, + wrap=wrap, + **kwargs, + ) diff --git a/pygmt/tests/test_triangulate.py b/pygmt/tests/test_triangulate.py index 5ba5de998c9..466ddea4f78 100644 --- a/pygmt/tests/test_triangulate.py +++ b/pygmt/tests/test_triangulate.py @@ -43,7 +43,8 @@ def fixture_expected_dataframe(): [4, 6, 1], [3, 4, 2], [9, 3, 8], - ] + ], + dtype=float, ) @@ -117,7 +118,9 @@ def test_delaunay_triples_outfile(dataframe, expected_dataframe): assert len(record) == 1 # check that only one warning was raised assert result is None # return value is None assert Path(tmpfile.name).stat().st_size > 0 - temp_df = pd.read_csv(filepath_or_buffer=tmpfile.name, sep="\t", header=None) + temp_df = pd.read_csv( + filepath_or_buffer=tmpfile.name, sep="\t", header=None, dtype=float + ) pd.testing.assert_frame_equal(left=temp_df, right=expected_dataframe) From 48f94cf5199d8d488566c2a79778f3d0be73d2fd Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 29 Oct 2023 11:05:36 +0800 Subject: [PATCH 52/85] Fix a bug in grdhisteq --- pygmt/src/grdhisteq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py index a205e78bac9..e0662a2e3c4 100644 --- a/pygmt/src/grdhisteq.py +++ b/pygmt/src/grdhisteq.py @@ -108,14 +108,14 @@ def _grdhisteq(grid, output_type, **kwargs): ------- :func:`pygmt.grd2cpt` """ - with Session() as lib: with lib.virtualfile_from_data( check_kind="raster", data=grid ) as vingrid, lib.virtualfile_to_data( kind="dataset", fname=kwargs.get("D") ) as vouttbl: - kwargs["D"] = vouttbl + if kwargs.get("D"): + kwargs["D"] = vouttbl lib.call_module( module="grdhisteq", args=build_arg_string(kwargs, infile=vingrid) ) From 49625dff2d275020ee5299309f8916f2acb88ed1 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 29 Oct 2023 11:58:09 +0800 Subject: [PATCH 53/85] Fix grdhisteq --- pygmt/src/grdhisteq.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py index e0662a2e3c4..0f0b87636d8 100644 --- a/pygmt/src/grdhisteq.py +++ b/pygmt/src/grdhisteq.py @@ -63,7 +63,7 @@ class grdhisteq: # pylint: disable=invalid-name h="header", ) @kwargs_to_strings(R="sequence") - def _grdhisteq(grid, output_type, **kwargs): + def _grdhisteq(caller, grid, output_type, **kwargs): r""" Perform histogram equalization for a grid. @@ -108,13 +108,16 @@ def _grdhisteq(grid, output_type, **kwargs): ------- :func:`pygmt.grd2cpt` """ + if caller not in ["compute_bins", "equalize_grid"]: + raise GMTInvalidInput(f"Unrecognized caller: {caller}.") + with Session() as lib: with lib.virtualfile_from_data( check_kind="raster", data=grid ) as vingrid, lib.virtualfile_to_data( kind="dataset", fname=kwargs.get("D") ) as vouttbl: - if kwargs.get("D"): + if caller == "compute_bins": kwargs["D"] = vouttbl lib.call_module( module="grdhisteq", args=build_arg_string(kwargs, infile=vingrid) @@ -217,6 +220,7 @@ def equalize_grid( else: raise GMTInvalidInput("Must specify 'outgrid' as a string or None.") return grdhisteq._grdhisteq( + caller="equalize_grid", grid=grid, output_type=output_type, outgrid=outgrid, @@ -326,7 +330,8 @@ def compute_bins( raise GMTInvalidInput("'header' is only allowed with output_type='file'.") return grdhisteq._grdhisteq( - grid, + caller="compute_bins", + grid=grid, output_type=output_type, outfile=outfile, divisions=divisions, From 473dc7b3c4a91de5594bb22c9a0536acb4784b1a Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 30 Oct 2023 23:11:15 +0800 Subject: [PATCH 54/85] Fix merge errors --- pygmt/src/blockm.py | 4 ++-- pygmt/src/project.py | 4 ++-- pygmt/src/select.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pygmt/src/blockm.py b/pygmt/src/blockm.py index 351e13021ff..63776ccaf6f 100644 --- a/pygmt/src/blockm.py +++ b/pygmt/src/blockm.py @@ -10,7 +10,7 @@ kwargs_to_strings, return_table, use_alias, - validate_output_type, + validate_output_table_type, ) __doctest_skip__ = ["blockmean", "blockmedian", "blockmode"] @@ -42,7 +42,7 @@ def _blockm(block_method, data, x, y, z, output_type, outfile, **kwargs): - None if ``outfile`` is set (filtered output will be stored in file set by ``outfile``) """ - output_type = validate_output_type(output_type, outfile=outfile) + output_type = validate_output_table_type(output_type, outfile=outfile) with Session() as lib: with lib.virtualfile_from_data( diff --git a/pygmt/src/project.py b/pygmt/src/project.py index ca9ddc839d0..3214b61c1a9 100644 --- a/pygmt/src/project.py +++ b/pygmt/src/project.py @@ -9,7 +9,7 @@ kwargs_to_strings, return_table, use_alias, - validate_output_type, + validate_output_table_type, ) @@ -224,7 +224,7 @@ def project( "The `convention` parameter is not allowed with `generate`." ) - output_type = validate_output_type(output_type, outfile=outfile) + output_type = validate_output_table_type(output_type, outfile=outfile) with Session() as lib: with lib.virtualfile_from_data( diff --git a/pygmt/src/select.py b/pygmt/src/select.py index 4d3f1ad05c0..1d96cac99b8 100644 --- a/pygmt/src/select.py +++ b/pygmt/src/select.py @@ -10,7 +10,7 @@ kwargs_to_strings, return_table, use_alias, - validate_output_type, + validate_output_table_type, ) __doctest_skip__ = ["select"] @@ -197,7 +197,7 @@ def select(data=None, output_type="pandas", outfile=None, **kwargs): >>> # longitudes 246 and 247 and latitudes 20 and 21 >>> out = pygmt.select(data=ship_data, region=[246, 247, 20, 21]) """ - output_type = validate_output_type(output_type, outfile=outfile) + output_type = validate_output_table_type(output_type, outfile=outfile) with Session() as lib: with lib.virtualfile_from_data( From df48ada2f17a15c10cd39af1fb0c38dae83fa131 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 30 Oct 2023 23:26:59 +0800 Subject: [PATCH 55/85] grdtrack: Use validate_output_table_type --- pygmt/src/grdtrack.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index cbc1f9d7faa..4d186363b8d 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -1,7 +1,6 @@ """ grdtrack - Sample grids at specified (x,y) locations. """ -import warnings import pandas as pd from pygmt.clib import Session @@ -12,6 +11,7 @@ kwargs_to_strings, return_table, use_alias, + validate_output_table_type, ) __doctest_skip__ = ["grdtrack"] @@ -296,21 +296,7 @@ def grdtrack( if hasattr(points, "columns") and newcolname is None: raise GMTInvalidInput("Please pass in a str to 'newcolname'") - if output_type not in ["numpy", "pandas", "file"]: - raise GMTInvalidInput( - "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." - ) - - if outfile is not None and output_type != "file": - msg = ( - f"Changing 'output_type' from '{output_type}' to 'file' " - "since 'outfile' parameter is set. Please use output_type='file' " - "to silence this warning." - ) - warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) - output_type = "file" - elif outfile is None and output_type == "file": - raise GMTInvalidInput("Must specify 'outfile' for ASCII output.") + output_type = validate_output_table_type(output_type, outfile=outfile) if isinstance(points, pd.DataFrame): column_names = points.columns.to_list() + [newcolname] From 3b5e4f8580814654b8d903e0db4085f769f6b2ff Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 1 Nov 2023 12:10:49 +0800 Subject: [PATCH 56/85] Formatting --- pygmt/src/filter1d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/src/filter1d.py b/pygmt/src/filter1d.py index afc74afe7c3..223e8522496 100644 --- a/pygmt/src/filter1d.py +++ b/pygmt/src/filter1d.py @@ -130,4 +130,4 @@ def filter1d(data, output_type="pandas", outfile=None, **kwargs): output_type=output_type, vfile=vouttbl, colnames=None, - ) \ No newline at end of file + ) From 3d5147d422763ef24b3a9b415933e00d535f8e92 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 13 Nov 2023 17:51:47 +0800 Subject: [PATCH 57/85] Consistently use column_names --- pygmt/helpers/utils.py | 10 +++++----- pygmt/src/blockm.py | 2 +- pygmt/src/filter1d.py | 2 +- pygmt/src/grd2xyz.py | 6 +++--- pygmt/src/grdhisteq.py | 2 +- pygmt/src/grdtrack.py | 2 +- pygmt/src/grdvolume.py | 2 +- pygmt/src/project.py | 2 +- pygmt/src/select.py | 2 +- pygmt/src/triangulate.py | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 2b2232c3379..513cea177d4 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -553,7 +553,7 @@ def args_in_kwargs(args, kwargs): ) -def return_table(session, output_type, vfile, colnames): +def return_table(session, output_type, vfile, column_names): """ Return an output table from a virtual file based on the output type. @@ -565,7 +565,7 @@ def return_table(session, output_type, vfile, colnames): The output type. Can be ``"pandas"``, ``"numpy"``, or ``"file"``. vfile : str The virtual file name. - colnames : list of str + column_names : list of str The column names for the :class:`pandas.DataFrame` output. Returns @@ -578,9 +578,9 @@ def return_table(session, output_type, vfile, colnames): # Read the virtual file as a GMT dataset and convert to vectors vectors = session.read_virtualfile(vfile, kind="dataset").contents.to_vectors() # pandas.DataFrame output - if colnames is None: - colnames = pd.RangeIndex(0, len(vectors)) - result = pd.DataFrame.from_dict(dict(zip(colnames, vectors))) + if column_names is None: + column_names = pd.RangeIndex(0, len(vectors)) + result = pd.DataFrame.from_dict(dict(zip(column_names, vectors))) if output_type == "pandas": return result # NumPy.ndarray output diff --git a/pygmt/src/blockm.py b/pygmt/src/blockm.py index 63776ccaf6f..b766781e86d 100644 --- a/pygmt/src/blockm.py +++ b/pygmt/src/blockm.py @@ -61,7 +61,7 @@ def _blockm(block_method, data, x, y, z, output_type, outfile, **kwargs): session=lib, output_type=output_type, vfile=vouttbl, - colnames=column_names, + column_names=column_names, ) diff --git a/pygmt/src/filter1d.py b/pygmt/src/filter1d.py index 223e8522496..c612b9bcf30 100644 --- a/pygmt/src/filter1d.py +++ b/pygmt/src/filter1d.py @@ -129,5 +129,5 @@ def filter1d(data, output_type="pandas", outfile=None, **kwargs): session=lib, output_type=output_type, vfile=vouttbl, - colnames=None, + column_names=None, ) diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 1172c496fb9..f4d2476cb3f 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -151,11 +151,11 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): ) # Set the default column names for the pandas dataframe header - dataframe_header = ["x", "y", "z"] + column_names = ["x", "y", "z"] # Let output pandas column names match input DataArray dimension names if isinstance(grid, xr.DataArray): # Reverse the dims because it is rows, columns ordered. - dataframe_header = [grid.dims[1], grid.dims[0], grid.name] + column_names = [grid.dims[1], grid.dims[0], grid.name] with Session() as lib: with lib.virtualfile_from_data( @@ -170,5 +170,5 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): session=lib, output_type=output_type, vfile=vouttbl, - colnames=dataframe_header, + column_names=column_names, ) diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py index 3da5c6b46a4..34428a5de91 100644 --- a/pygmt/src/grdhisteq.py +++ b/pygmt/src/grdhisteq.py @@ -130,7 +130,7 @@ def _grdhisteq(caller, grid, output_type, **kwargs): session=lib, output_type=output_type, vfile=vouttbl, - colnames=["start", "stop", "bin_id"], + column_names=["start", "stop", "bin_id"], ) if output_type == "pandas": result = result.astype( diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index 4d186363b8d..c1791d4b852 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -321,5 +321,5 @@ def grdtrack( session=lib, output_type=output_type, vfile=vouttbl, - colnames=column_names, + column_names=column_names, ) diff --git a/pygmt/src/grdvolume.py b/pygmt/src/grdvolume.py index af9932f9a37..8ce5de0a1c5 100644 --- a/pygmt/src/grdvolume.py +++ b/pygmt/src/grdvolume.py @@ -114,5 +114,5 @@ def grdvolume(grid, output_type="pandas", outfile=None, **kwargs): session=lib, output_type=output_type, vfile=vouttbl, - colnames=None, + column_names=None, ) diff --git a/pygmt/src/project.py b/pygmt/src/project.py index 3214b61c1a9..442b2dde081 100644 --- a/pygmt/src/project.py +++ b/pygmt/src/project.py @@ -250,5 +250,5 @@ def project( session=lib, output_type=output_type, vfile=vouttbl, - colnames=column_names, + column_names=column_names, ) diff --git a/pygmt/src/select.py b/pygmt/src/select.py index 1d96cac99b8..2589aff9896 100644 --- a/pygmt/src/select.py +++ b/pygmt/src/select.py @@ -216,5 +216,5 @@ def select(data=None, output_type="pandas", outfile=None, **kwargs): session=lib, output_type=output_type, vfile=vouttbl, - colnames=column_names, + column_names=column_names, ) diff --git a/pygmt/src/triangulate.py b/pygmt/src/triangulate.py index ffaa93467de..02b04ab0d22 100644 --- a/pygmt/src/triangulate.py +++ b/pygmt/src/triangulate.py @@ -145,7 +145,7 @@ def _triangulate( session=lib, output_type=output_type, vfile=vouttbl, - colnames=None, + column_names=None, ) @staticmethod From b9454ce0c29e83b194650906704fa1aa26f398f8 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 14 Nov 2023 19:15:17 +0800 Subject: [PATCH 58/85] Always convert text data to string dtype --- pygmt/helpers/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 513cea177d4..014fa70e69f 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -581,6 +581,13 @@ def return_table(session, output_type, vfile, column_names): if column_names is None: column_names = pd.RangeIndex(0, len(vectors)) result = pd.DataFrame.from_dict(dict(zip(column_names, vectors))) + # convert text data from object dtype to string dtype + result = result.convert_dtypes( + convert_string=True, + convert_integer=False, + convert_floating=False, + convert_boolean=False, + ) if output_type == "pandas": return result # NumPy.ndarray output From 87424677f37158be872c42c344ed02c0aa68a261 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 14 Nov 2023 20:11:27 +0800 Subject: [PATCH 59/85] Change the to_vectors method to to_dataframe which returns a pd.DataFrame --- pygmt/datatypes.py | 38 +++++++++++++++++--------------------- pygmt/helpers/utils.py | 12 +++++------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index f240015678c..4bf540400bf 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -4,6 +4,7 @@ import ctypes as ctp import numpy as np +import pandas as pd class GMT_DATASET(ctp.Structure): # pylint: disable=invalid-name,too-few-public-methods @@ -147,14 +148,19 @@ class GMT_DATASEGMENT(ctp.Structure): ("hidden", ctp.c_void_p), ] - def to_vectors(self): + def to_dataframe(self): """ - Convert a GMT_DATASET object to a list of vectors. + Convert a GMT_DATASET object to a :class:`pandas.DataFrame` object. Currently, the number of columns in all segments of all tables are assumed to be the same. The same column in all segments of all tables are concatenated. The trailing text column is also concatenated as a - string vector. + single string column. + + Returns + ------- + :class:`pandas.DataFrame` + A :class:`pandas.DataFrame` object. Examples -------- @@ -176,24 +182,14 @@ def to_vectors(self): ... "read", f"{tmpfile.name} {vouttbl} -Td" ... ) ... ds = lib.read_virtualfile(vouttbl, kind="dataset") - ... vectors = ds.contents.to_vectors() + ... df = ds.contents.to_dataframe() ... - >>> len(vectors) # 4 columns - 4 - >>> vectors[0] - array([ 1., 4., 7., 10.]) - >>> vectors[1] - array([ 2., 5., 8., 11.]) - >>> vectors[2] - array([ 3., 6., 9., 12.]) - >>> vectors[3] - array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90', - 'TEXT123 TEXT456789'], dtype='>> df + 0 1 2 3 + 0 1.0 2.0 3.0 TEXT1 TEXT23 + 1 4.0 5.0 6.0 TEXT4 TEXT567 + 2 7.0 8.0 9.0 TEXT8 TEXT90 + 3 10.0 11.0 12.0 TEXT123 TEXT456789 """ vectors = [] for icol in range(self.n_columns): @@ -218,4 +214,4 @@ def to_vectors(self): if textvector: vectors.append(np.char.decode(textvector)) - return vectors + return pd.concat([pd.Series(v) for v in vectors], axis=1) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 014fa70e69f..be26ea34e23 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -11,7 +11,6 @@ import webbrowser from collections.abc import Iterable -import pandas as pd import xarray as xr from pygmt.exceptions import GMTInvalidInput @@ -575,12 +574,11 @@ def return_table(session, output_type, vfile, column_names): """ if output_type == "file": # Already written to file, so return None return None - # Read the virtual file as a GMT dataset and convert to vectors - vectors = session.read_virtualfile(vfile, kind="dataset").contents.to_vectors() - # pandas.DataFrame output - if column_names is None: - column_names = pd.RangeIndex(0, len(vectors)) - result = pd.DataFrame.from_dict(dict(zip(column_names, vectors))) + # Read the virtual file as a GMT dataset and convert to pandas.DataFrame + result = session.read_virtualfile(vfile, kind="dataset").contents.to_dataframe() + # assign column names + if column_names is not None: + result.columns = column_names # convert text data from object dtype to string dtype result = result.convert_dtypes( convert_string=True, From b7e082347de9a009ef314596a199207681c6b154 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 18 Nov 2023 22:18:49 +0800 Subject: [PATCH 60/85] Requires pandas>=1.2.0 --- environment.yml | 2 +- pygmt/helpers/utils.py | 2 +- pyproject.toml | 2 +- requirements.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/environment.yml b/environment.yml index 73b45304436..7cd09ea2a40 100644 --- a/environment.yml +++ b/environment.yml @@ -6,7 +6,7 @@ dependencies: # Required dependencies - gmt=6.4.0 - numpy>=1.22 - - pandas + - pandas>=1.2.0 - xarray - netCDF4 - packaging diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index be26ea34e23..5ec8a8b0a31 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -583,7 +583,7 @@ def return_table(session, output_type, vfile, column_names): result = result.convert_dtypes( convert_string=True, convert_integer=False, - convert_floating=False, + convert_floating=False, # requires pandas>=1.2.0 convert_boolean=False, ) if output_type == "pandas": diff --git a/pyproject.toml b/pyproject.toml index 850664d0db9..d9251c5e6a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ classifiers = [ ] dependencies = [ "numpy>=1.22", - "pandas", + "pandas>=1.2.0", "xarray", "netCDF4", "packaging", diff --git a/requirements.txt b/requirements.txt index 37777a99d81..d9afdee98bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # Required packages numpy>=1.22 -pandas +pandas>=1.2.0 xarray netCDF4 packaging From e9de4bbd53125548a98c67576a0ea47073356fb8 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 18 Nov 2023 22:19:08 +0800 Subject: [PATCH 61/85] Fix formatting --- pygmt/src/grdtrack.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index c1791d4b852..78d75e61a3b 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -308,9 +308,7 @@ def grdtrack( check_kind="raster", data=grid ) as vingrd, lib.virtualfile_from_data( check_kind="vector", data=points, required_data=False - ) as vintbl, lib.virtualfile_to_data( - kind="dataset", fname=outfile - ) as vouttbl: + ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: kwargs["G"] = vingrd lib.call_module( module="grdtrack", From 2cba0c7f15d390c94a49eb9f0b0d777e4207ba9b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 21 Nov 2023 21:22:43 +0800 Subject: [PATCH 62/85] Remove pylint directives --- pygmt/clib/session.py | 1 - pygmt/datatypes.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 68f61c9f507..d1ca8566b80 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -90,7 +90,6 @@ class Session: - # pylint: disable=too-many-public-methods """ A GMT API session where most operations involving the C API happen. diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 4bf540400bf..47af175c1de 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -7,7 +7,7 @@ import pandas as pd -class GMT_DATASET(ctp.Structure): # pylint: disable=invalid-name,too-few-public-methods +class GMT_DATASET(ctp.Structure): """ GMT dataset structure for holding multiple tables (files). @@ -64,14 +64,12 @@ class GMT_DATASET(ctp.Structure): # pylint: disable=invalid-name,too-few-public """ class GMT_DATATABLE(ctp.Structure): - # pylint: disable=invalid-name,too-few-public-methods """ GMT datatable structure for holding a single table with multiple segments. """ class GMT_DATASEGMENT(ctp.Structure): - # pylint: disable=invalid-name,too-few-public-methods """ GMT datasegment structure for holding a single segment with multiple columns. From 1754e9a282c5d7d9415cfc3120252cb48a5d1bc0 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 4 Dec 2023 18:44:53 +0800 Subject: [PATCH 63/85] Minor fix --- pygmt/clib/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 854ba9e15ff..c9bdd7cd84e 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1712,7 +1712,7 @@ def read_virtualfile(self, vfname, kind=None): }[kind] return ctp.cast(pointer, ctp.POINTER(dtype)) - @contextmanager + @contextlib.contextmanager def virtualfile_to_data(self, kind, fname=None): """ Create a virtual file for writing a GMT data container or yield the From d70a7c474d00217743ef8c0c55fe8e8d20f51516 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 7 Jan 2024 12:04:20 +0800 Subject: [PATCH 64/85] Rewrap and add type hints --- pygmt/clib/session.py | 39 ++++++++++++++++++++------------------- pygmt/datatypes.py | 31 +++++++++++++------------------ pygmt/src/project.py | 6 +----- 3 files changed, 34 insertions(+), 42 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 3d09875ebe2..c780989c1a0 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -4,11 +4,14 @@ Uses ctypes to wrap most of the core functions from the C API. """ +from __future__ import annotations + import contextlib import ctypes as ctp import pathlib import sys import warnings +from typing import Literal import numpy as np import pandas as pd @@ -1654,23 +1657,22 @@ def extract_region(self): raise GMTCLibError("Failed to extract region from current figure.") return wesn - def read_virtualfile(self, vfname, kind=None): + def read_virtualfile(self, vfname : str, kind : Literal["dataset", "grid", None] = None): """ - Read data from a virtual file and cast it into a GMT data container if - requested. + Read data from a virtual file and cast into a GMT data container if requested. Parameters ---------- - vfname : str + vfname Name of the virtual file to read. - kind : str - Cast the data into a GMT data container. Choose from "grid" or - "dataset". If None, will return a ctypes void pointer. + kind + Cast the data into a GMT data container. Valid values are ``"dataset"``, + ``"grid"`` and ``None``. If ``None``, will return a ctypes void pointer. Returns ------- - Pointer to the GMT data container. If ``kind`` is None, returns a - ctypes void pointer instead. + Pointer to the GMT data container. If ``kind`` is None, returns a ctypes void + pointer instead. """ c_read_virtualfile = self.get_libgmt_func( "GMT_Read_VirtualFile", @@ -1684,9 +1686,9 @@ def read_virtualfile(self, vfname, kind=None): if kind is None: # Return the ctypes void pointer return pointer - # The GMT C API function GMT_Read_VirtualFile returns a void pointer. - # It usually needs to be cast to a pointer to GMT data container (e.g., - # GMT_GRID or GMT_DATASET). + # The GMT C API function GMT_Read_VirtualFile returns a void pointer. It usually + # needs to be cast to a pointer to GMT data container (e.g., GMT_GRID or + # GMT_DATASET). dtype = { # "grid": GMT_GRID, # implemented in PR #2398 "dataset": GMT_DATASET, @@ -1694,17 +1696,16 @@ def read_virtualfile(self, vfname, kind=None): return ctp.cast(pointer, ctp.POINTER(dtype)) @contextlib.contextmanager - def virtualfile_to_data(self, kind, fname=None): + def virtualfile_to_data(self, kind: Literal["dataset", "grid"], fname: str | None = None): """ - Create a virtual file for writing a GMT data container or yield the - output file name. + Create a virtual file for writing a GMT data container or yield the file name. Parameters ---------- - kind : str - The kind of data container to create. Choose from "grid" or - "dataset". It has no effect if ``fname`` is given. - fname : str or None + kind + The kind of data container to create. Valid values are ``"dataset"`` and + ``"grid"``. It's ignored if ``fname`` is given. + fname If given, yield the output file name instead of the virtual file. Yields diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py index 47af175c1de..3041d9559b6 100644 --- a/pygmt/datatypes.py +++ b/pygmt/datatypes.py @@ -2,6 +2,7 @@ Wrappers for GMT data types. """ import ctypes as ctp +from typing import ClassVar import numpy as np import pandas as pd @@ -11,11 +12,9 @@ class GMT_DATASET(ctp.Structure): """ GMT dataset structure for holding multiple tables (files). - This class is only meant for internal use by PyGMT. It is not exposed to - users. + This class is only meant for internal use by PyGMT. It is not exposed to users. - See the GMT source code gmt_resources.h for the original C struct - definitions. + See the GMT source code gmt_resources.h for the original C struct definitions. Examples -------- @@ -65,17 +64,16 @@ class GMT_DATASET(ctp.Structure): class GMT_DATATABLE(ctp.Structure): """ - GMT datatable structure for holding a single table with multiple - segments. + GMT datatable structure for holding a single table with multiple segments. """ class GMT_DATASEGMENT(ctp.Structure): """ - GMT datasegment structure for holding a single segment with - multiple columns. + GMT datasegment structure for holding a single segment with multiple + columns. """ - _fields_ = [ + _fields_ :ClassVar = [ # Number of rows/records in this segment ("n_rows", ctp.c_uint64), # Number of fields in each record @@ -96,7 +94,7 @@ class GMT_DATASEGMENT(ctp.Structure): ("hidden", ctp.c_void_p), ] - _fields_ = [ + _fields_ :ClassVar = [ # Number of file header records (0 if no header) ("n_headers", ctp.c_uint), # Number of columns (fields) in each record @@ -117,7 +115,7 @@ class GMT_DATASEGMENT(ctp.Structure): ("hidden", ctp.c_void_p), ] - _fields_ = [ + _fields_ : ClassVar = [ # The total number of tables (files) contained ("n_tables", ctp.c_uint64), # The number of data columns @@ -150,10 +148,9 @@ def to_dataframe(self): """ Convert a GMT_DATASET object to a :class:`pandas.DataFrame` object. - Currently, the number of columns in all segments of all tables are - assumed to be the same. The same column in all segments of all tables - are concatenated. The trailing text column is also concatenated as a - single string column. + Currently, the number of columns in all segments of all tables are assumed to be + the same. The same column in all segments of all tables are concatenated. The + trailing text column is also concatenated as a single string column. Returns ------- @@ -176,9 +173,7 @@ def to_dataframe(self): ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) ... with Session() as lib: ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: - ... lib.call_module( - ... "read", f"{tmpfile.name} {vouttbl} -Td" - ... ) + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") ... ds = lib.read_virtualfile(vouttbl, kind="dataset") ... df = ds.contents.to_dataframe() ... diff --git a/pygmt/src/project.py b/pygmt/src/project.py index 442b2dde081..38da5ddcb61 100644 --- a/pygmt/src/project.py +++ b/pygmt/src/project.py @@ -240,11 +240,7 @@ def project( module="project", args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), ) - - if kwargs.get("G") is not None: - column_names = list("rsp") - else: - column_names = None + column_names = list("rsp") if kwargs.get("G") is not None else None return return_table( session=lib, From a73927e37c7ca9a98d59f2d65ac353dbc71588e4 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 7 Jan 2024 14:04:45 +0800 Subject: [PATCH 65/85] Temporarily enable benchmarks --- .github/workflows/benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 358f0105f63..1d0a0f2a8b1 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -13,7 +13,7 @@ on: - 'pygmt/**/*.py' - '.github/workflows/benchmarks.yml' # Uncomment the 'pull_request' line below to trigger the workflow in PR - # pull_request: + pull_request: # `workflow_dispatch` allows CodSpeed to trigger backtest # performance analysis in order to generate initial data. workflow_dispatch: From 79c499d94f549513da4a5e0bbf3dc11318d342e2 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 19 Feb 2024 18:58:34 +0800 Subject: [PATCH 66/85] Move dataset definition into pygmt/datatypes/dataset.py --- pygmt/clib/session.py | 5 +- pygmt/datatypes.py | 210 ------------------------------------- pygmt/datatypes/dataset.py | 202 ++++++++++++++++++++++++++++++++++- 3 files changed, 204 insertions(+), 213 deletions(-) delete mode 100644 pygmt/datatypes.py diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 7e95fab0d95..dd45699bcae 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -12,7 +12,6 @@ import sys import warnings from typing import Literal -from typing import Literal import numpy as np import pandas as pd @@ -1745,7 +1744,9 @@ def extract_region(self): return wesn @contextlib.contextmanager - def virtualfile_to_data(self, kind: Literal["dataset", "grid"], fname: str | None = None): + def virtualfile_to_data( + self, kind: Literal["dataset", "grid"], fname: str | None = None + ): """ Create a virtual file for writing a GMT data container or yield the file name. diff --git a/pygmt/datatypes.py b/pygmt/datatypes.py deleted file mode 100644 index 3041d9559b6..00000000000 --- a/pygmt/datatypes.py +++ /dev/null @@ -1,210 +0,0 @@ -""" -Wrappers for GMT data types. -""" -import ctypes as ctp -from typing import ClassVar - -import numpy as np -import pandas as pd - - -class GMT_DATASET(ctp.Structure): - """ - GMT dataset structure for holding multiple tables (files). - - This class is only meant for internal use by PyGMT. It is not exposed to users. - - See the GMT source code gmt_resources.h for the original C struct definitions. - - Examples - -------- - >>> from pygmt.helpers import GMTTempFile - >>> from pygmt.clib import Session - >>> - >>> with GMTTempFile(suffix=".txt") as tmpfile: - ... # prepare the sample data file - ... with open(tmpfile.name, mode="w") as fp: - ... print(">", file=fp) - ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) - ... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) - ... print(">", file=fp) - ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) - ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) - ... # read the data file - ... with Session() as lib: - ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: - ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") - ... # the dataset - ... ds = lib.read_virtualfile(vouttbl, kind="dataset").contents - ... print(ds.n_tables, ds.n_columns, ds.n_segments) - ... print(ds.min[: ds.n_columns], ds.max[: ds.n_columns]) - ... # the table - ... tbl = ds.table[0].contents - ... print(tbl.n_columns, tbl.n_segments, tbl.n_records) - ... print(tbl.min[: tbl.n_columns], ds.max[: tbl.n_columns]) - ... for i in range(tbl.n_segments): - ... seg = tbl.segment[i].contents - ... for j in range(seg.n_columns): - ... print(seg.data[j][: seg.n_rows]) - ... print(seg.text[: seg.n_rows]) - ... - 1 3 2 - [1.0, 2.0, 3.0] [10.0, 11.0, 12.0] - 3 2 4 - [1.0, 2.0, 3.0] [10.0, 11.0, 12.0] - [1.0, 4.0] - [2.0, 5.0] - [3.0, 6.0] - [b'TEXT1 TEXT23', b'TEXT4 TEXT567'] - [7.0, 10.0] - [8.0, 11.0] - [9.0, 12.0] - [b'TEXT8 TEXT90', b'TEXT123 TEXT456789'] - """ - - class GMT_DATATABLE(ctp.Structure): - """ - GMT datatable structure for holding a single table with multiple segments. - """ - - class GMT_DATASEGMENT(ctp.Structure): - """ - GMT datasegment structure for holding a single segment with multiple - columns. - """ - - _fields_ :ClassVar = [ - # Number of rows/records in this segment - ("n_rows", ctp.c_uint64), - # Number of fields in each record - ("n_columns", ctp.c_uint64), - # Minimum coordinate for each column - ("min", ctp.POINTER(ctp.c_double)), - # Maximum coordinate for each column - ("max", ctp.POINTER(ctp.c_double)), - # Data x, y, and possibly other columns - ("data", ctp.POINTER(ctp.POINTER(ctp.c_double))), - # Label string (if applicable) - ("label", ctp.c_char_p), - # Segment header (if applicable) - ("header", ctp.c_char_p), - # text beyond the data - ("text", ctp.POINTER(ctp.c_char_p)), - # Book-keeping variables "hidden" from the API - ("hidden", ctp.c_void_p), - ] - - _fields_ :ClassVar = [ - # Number of file header records (0 if no header) - ("n_headers", ctp.c_uint), - # Number of columns (fields) in each record - ("n_columns", ctp.c_uint64), - # Number of segments in the array - ("n_segments", ctp.c_uint64), - # Total number of data records across all segments - ("n_records", ctp.c_uint64), - # Minimum coordinate for each column - ("min", ctp.POINTER(ctp.c_double)), - # Maximum coordinate for each column - ("max", ctp.POINTER(ctp.c_double)), - # Array with all file header records, if any - ("header", ctp.POINTER(ctp.c_char_p)), - # Pointer to array of segments - ("segment", ctp.POINTER(ctp.POINTER(GMT_DATASEGMENT))), - # Book-keeping variables "hidden" from the API - ("hidden", ctp.c_void_p), - ] - - _fields_ : ClassVar = [ - # The total number of tables (files) contained - ("n_tables", ctp.c_uint64), - # The number of data columns - ("n_columns", ctp.c_uint64), - # The total number of segments across all tables - ("n_segments", ctp.c_uint64), - # The total number of data records across all tables - ("n_records", ctp.c_uint64), - # Minimum coordinate for each column - ("min", ctp.POINTER(ctp.c_double)), - # Maximum coordinate for each column - ("max", ctp.POINTER(ctp.c_double)), - # Pointer to array of tables - ("table", ctp.POINTER(ctp.POINTER(GMT_DATATABLE))), - # The datatype (numerical, text, or mixed) of this dataset - ("type", ctp.c_int32), - # The geometry of this dataset - ("geometry", ctp.c_int32), - # To store a referencing system string in PROJ.4 format - ("ProjRefPROJ4", ctp.c_char_p), - # To store a referencing system string in WKT format - ("ProjRefWKT", ctp.c_char_p), - # To store a referencing system EPSG code - ("ProjRefEPSG", ctp.c_int), - # Book-keeping variables "hidden" from the API - ("hidden", ctp.c_void_p), - ] - - def to_dataframe(self): - """ - Convert a GMT_DATASET object to a :class:`pandas.DataFrame` object. - - Currently, the number of columns in all segments of all tables are assumed to be - the same. The same column in all segments of all tables are concatenated. The - trailing text column is also concatenated as a single string column. - - Returns - ------- - :class:`pandas.DataFrame` - A :class:`pandas.DataFrame` object. - - Examples - -------- - >>> from pygmt.helpers import GMTTempFile - >>> from pygmt.clib import Session - >>> - >>> with GMTTempFile(suffix=".txt") as tmpfile: - ... # prepare the sample data file - ... with open(tmpfile.name, mode="w") as fp: - ... print(">", file=fp) - ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) - ... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) - ... print(">", file=fp) - ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) - ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) - ... with Session() as lib: - ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: - ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") - ... ds = lib.read_virtualfile(vouttbl, kind="dataset") - ... df = ds.contents.to_dataframe() - ... - >>> df - 0 1 2 3 - 0 1.0 2.0 3.0 TEXT1 TEXT23 - 1 4.0 5.0 6.0 TEXT4 TEXT567 - 2 7.0 8.0 9.0 TEXT8 TEXT90 - 3 10.0 11.0 12.0 TEXT123 TEXT456789 - """ - vectors = [] - for icol in range(self.n_columns): - colvector = [] - for itbl in range(self.n_tables): - dtbl = self.table[itbl].contents - for iseg in range(dtbl.n_segments): - dseg = dtbl.segment[iseg].contents - colvector.append( - np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) - ) - vectors.append(np.concatenate(colvector)) - - # deal with trailing text column - textvector = [] - for itbl in range(self.n_tables): - dtbl = self.table[itbl].contents - for iseg in range(dtbl.n_segments): - dseg = dtbl.segment[iseg].contents - if dseg.text: - textvector.extend(dseg.text[: dseg.n_rows]) - if textvector: - vectors.append(np.char.decode(textvector)) - - return pd.concat([pd.Series(v) for v in vectors], axis=1) diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py index cc1f29041ee..19a845b1114 100644 --- a/pygmt/datatypes/dataset.py +++ b/pygmt/datatypes/dataset.py @@ -2,7 +2,207 @@ Wrapper for the GMT_DATASET data type. """ import ctypes as ctp +from typing import ClassVar + +import numpy as np +import pandas as pd class _GMT_DATASET(ctp.Structure): # noqa: N801 - pass + """ + GMT dataset structure for holding multiple tables (files). + + This class is only meant for internal use by PyGMT. It is not exposed to users. + + See the GMT source code gmt_resources.h for the original C struct definitions. + + Examples + -------- + >>> from pygmt.helpers import GMTTempFile + >>> from pygmt.clib import Session + >>> + >>> with GMTTempFile(suffix=".txt") as tmpfile: + ... # prepare the sample data file + ... with open(tmpfile.name, mode="w") as fp: + ... print(">", file=fp) + ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) + ... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) + ... print(">", file=fp) + ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) + ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) + ... # read the data file + ... with Session() as lib: + ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... # the dataset + ... ds = lib.read_virtualfile(vouttbl, kind="dataset").contents + ... print(ds.n_tables, ds.n_columns, ds.n_segments) + ... print(ds.min[: ds.n_columns], ds.max[: ds.n_columns]) + ... # the table + ... tbl = ds.table[0].contents + ... print(tbl.n_columns, tbl.n_segments, tbl.n_records) + ... print(tbl.min[: tbl.n_columns], ds.max[: tbl.n_columns]) + ... for i in range(tbl.n_segments): + ... seg = tbl.segment[i].contents + ... for j in range(seg.n_columns): + ... print(seg.data[j][: seg.n_rows]) + ... print(seg.text[: seg.n_rows]) + 1 3 2 + [1.0, 2.0, 3.0] [10.0, 11.0, 12.0] + 3 2 4 + [1.0, 2.0, 3.0] [10.0, 11.0, 12.0] + [1.0, 4.0] + [2.0, 5.0] + [3.0, 6.0] + [b'TEXT1 TEXT23', b'TEXT4 TEXT567'] + [7.0, 10.0] + [8.0, 11.0] + [9.0, 12.0] + [b'TEXT8 TEXT90', b'TEXT123 TEXT456789'] + """ + + class _GMT_DATATABLE(ctp.Structure): # noqa: N801 + """ + GMT datatable structure for holding a single table with multiple segments. + """ + + class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801 + """ + GMT datasegment structure for holding a single segment with multiple + columns. + """ + + _fields_: ClassVar = [ + # Number of rows/records in this segment + ("n_rows", ctp.c_uint64), + # Number of fields in each record + ("n_columns", ctp.c_uint64), + # Minimum coordinate for each column + ("min", ctp.POINTER(ctp.c_double)), + # Maximum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), + # Data x, y, and possibly other columns + ("data", ctp.POINTER(ctp.POINTER(ctp.c_double))), + # Label string (if applicable) + ("label", ctp.c_char_p), + # Segment header (if applicable) + ("header", ctp.c_char_p), + # text beyond the data + ("text", ctp.POINTER(ctp.c_char_p)), + # Book-keeping variables "hidden" from the API + ("hidden", ctp.c_void_p), + ] + + _fields_: ClassVar = [ + # Number of file header records (0 if no header) + ("n_headers", ctp.c_uint), + # Number of columns (fields) in each record + ("n_columns", ctp.c_uint64), + # Number of segments in the array + ("n_segments", ctp.c_uint64), + # Total number of data records across all segments + ("n_records", ctp.c_uint64), + # Minimum coordinate for each column + ("min", ctp.POINTER(ctp.c_double)), + # Maximum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), + # Array with all file header records, if any + ("header", ctp.POINTER(ctp.c_char_p)), + # Pointer to array of segments + ("segment", ctp.POINTER(ctp.POINTER(_GMT_DATASEGMENT))), + # Book-keeping variables "hidden" from the API + ("hidden", ctp.c_void_p), + ] + + _fields_: ClassVar = [ + # The total number of tables (files) contained + ("n_tables", ctp.c_uint64), + # The number of data columns + ("n_columns", ctp.c_uint64), + # The total number of segments across all tables + ("n_segments", ctp.c_uint64), + # The total number of data records across all tables + ("n_records", ctp.c_uint64), + # Minimum coordinate for each column + ("min", ctp.POINTER(ctp.c_double)), + # Maximum coordinate for each column + ("max", ctp.POINTER(ctp.c_double)), + # Pointer to array of tables + ("table", ctp.POINTER(ctp.POINTER(_GMT_DATATABLE))), + # The datatype (numerical, text, or mixed) of this dataset + ("type", ctp.c_int32), + # The geometry of this dataset + ("geometry", ctp.c_int32), + # To store a referencing system string in PROJ.4 format + ("ProjRefPROJ4", ctp.c_char_p), + # To store a referencing system string in WKT format + ("ProjRefWKT", ctp.c_char_p), + # To store a referencing system EPSG code + ("ProjRefEPSG", ctp.c_int), + # Book-keeping variables "hidden" from the API + ("hidden", ctp.c_void_p), + ] + + def to_dataframe(self): + """ + Convert a GMT_DATASET object to a :class:`pandas.DataFrame` object. + + Currently, the number of columns in all segments of all tables are assumed to be + the same. The same column in all segments of all tables are concatenated. The + trailing text column is also concatenated as a single string column. + + Returns + ------- + :class:`pandas.DataFrame` + A :class:`pandas.DataFrame` object. + + Examples + -------- + >>> from pygmt.helpers import GMTTempFile + >>> from pygmt.clib import Session + >>> + >>> with GMTTempFile(suffix=".txt") as tmpfile: + ... # prepare the sample data file + ... with open(tmpfile.name, mode="w") as fp: + ... print(">", file=fp) + ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) + ... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp) + ... print(">", file=fp) + ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) + ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) + ... with Session() as lib: + ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... ds = lib.read_virtualfile(vouttbl, kind="dataset") + ... df = ds.contents.to_dataframe() + >>> df + 0 1 2 3 + 0 1.0 2.0 3.0 TEXT1 TEXT23 + 1 4.0 5.0 6.0 TEXT4 TEXT567 + 2 7.0 8.0 9.0 TEXT8 TEXT90 + 3 10.0 11.0 12.0 TEXT123 TEXT456789 + """ + vectors = [] + for icol in range(self.n_columns): + colvector = [] + for itbl in range(self.n_tables): + dtbl = self.table[itbl].contents + for iseg in range(dtbl.n_segments): + dseg = dtbl.segment[iseg].contents + colvector.append( + np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) + ) + vectors.append(np.concatenate(colvector)) + + # deal with trailing text column + textvector = [] + for itbl in range(self.n_tables): + dtbl = self.table[itbl].contents + for iseg in range(dtbl.n_segments): + dseg = dtbl.segment[iseg].contents + if dseg.text: + textvector.extend(dseg.text[: dseg.n_rows]) + if textvector: + vectors.append(np.char.decode(textvector)) + + return pd.concat([pd.Series(v) for v in vectors], axis=1) From c4d47db68446c3e53dc070122f696bf50d79e24b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 19 Feb 2024 20:47:46 +0800 Subject: [PATCH 67/85] Remove unused imports --- pygmt/clib/session.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index dd45699bcae..70c989e064e 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -4,8 +4,6 @@ Uses ctypes to wrap most of the core functions from the C API. """ -from __future__ import annotations - import contextlib import ctypes as ctp import pathlib From 76a09f08d2132e767dcb4110f812527359a7e876 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 20 Feb 2024 10:57:08 +0800 Subject: [PATCH 68/85] Fix open_virtual_file to open_virtualfile --- pygmt/clib/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 70c989e064e..a5cca564c7f 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1771,5 +1771,5 @@ def virtualfile_to_data( "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), }[kind] - with self.open_virtual_file(family, geometry, "GMT_OUT", None) as vfile: + with self.open_virtualfile(family, geometry, "GMT_OUT", None) as vfile: yield vfile From 9a035ef93acf5a596a6da722262591750893baee Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 20 Feb 2024 12:30:17 +0800 Subject: [PATCH 69/85] Improve docstrings --- pygmt/clib/session.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index a5cca564c7f..13f1b3a035b 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1746,15 +1746,16 @@ def virtualfile_to_data( self, kind: Literal["dataset", "grid"], fname: str | None = None ): """ - Create a virtual file for writing a GMT data container or yield the file name. + Create a virtual file for storing output data in a data container or yield the + actual file name. Parameters ---------- kind The kind of data container to create. Valid values are ``"dataset"`` and - ``"grid"``. It's ignored if ``fname`` is given. + ``"grid"``. Ignored if ``fname`` is given. fname - If given, yield the output file name instead of the virtual file. + If given, yield the actual file name instead of the virtual file name. Yields ------ @@ -1764,9 +1765,9 @@ def virtualfile_to_data( # If fname is given, yield the output file name. if fname is not None: yield fname - # Otherwise, create a virtual file for writing a GMT data container. + # Otherwise, create a virtual file for storing the output data. else: - # Determine the family and geometry of the data container from kind + # Determine the family and geometry from kind family, geometry = { "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), From 828c9c14a78fff401bc4f8b96fee2110224677cb Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 20 Feb 2024 15:02:59 +0800 Subject: [PATCH 70/85] Add doctests for virtualfile_to_data --- pygmt/clib/session.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 13f1b3a035b..66874a4a9b4 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1743,7 +1743,7 @@ def extract_region(self): @contextlib.contextmanager def virtualfile_to_data( - self, kind: Literal["dataset", "grid"], fname: str | None = None + self, kind: Literal["dataset", "grid", None] = None, fname: str | None = None ): """ Create a virtual file for storing output data in a data container or yield the @@ -1753,7 +1753,7 @@ def virtualfile_to_data( ---------- kind The kind of data container to create. Valid values are ``"dataset"`` and - ``"grid"``. Ignored if ``fname`` is given. + ``"grid"`` or ``None``. Ignored if ``fname`` is specified. fname If given, yield the actual file name instead of the virtual file name. @@ -1761,6 +1761,40 @@ def virtualfile_to_data( ------ vfile : str Name of the virtual file or the output file name. + + Examples + -------- + >>> from pathlib import Path + >>> from pygmt.helpers import GMTTempFile + >>> from pygmt.clib import Session + >>> from pygmt.datatypes import _GMT_DATASET, _GMT_GRID + >>> + >>> # Create a virtual file for storing the output table. + >>> with GMTTempFile(suffix=".txt") as tmpfile: + ... with open(tmpfile.name, mode="w") as fp: + ... print("1.0 2.0 3.0 TEXT", file=fp) + ... with Session() as lib: + ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... ds = lib.read_virtualfile(vouttbl, kind="dataset") + >>> isinstance(ds.contents, _GMT_DATASET) + True + >>> + >>> # Create a virtual file for storing the output grid. + >>> with Session() as lib: + ... with lib.virtualfile_to_data(kind="grid") as voutgrd: + ... lib.call_module("read", f"@earth_relief_01d_g {voutgrd} -Tg") + ... outgrd = lib.read_virtualfile(voutgrd, kind="grid") + >>> isinstance(outgrd.contents, _GMT_GRID) + True + >>> + >>> # Write data to file without creating a virtual file + >>> with GMTTempFile(suffix=".nc") as tmpfile: + ... with Session() as lib: + ... with lib.virtualfile_to_data(fname=tmpfile.name) as voutgrd: + ... lib.call_module("read", f"@earth_relief_01d_g {voutgrd} -Tg") + ... assert voutgrd == tmpfile.name + ... assert Path(voutgrd).stat().st_size > 0 """ # If fname is given, yield the output file name. if fname is not None: From 4a5eea37cefe3e48b5e370c3a136e228a8755f69 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 20 Feb 2024 15:04:30 +0800 Subject: [PATCH 71/85] isort --- pygmt/clib/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 66874a4a9b4..0204787d8e0 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1765,9 +1765,9 @@ def virtualfile_to_data( Examples -------- >>> from pathlib import Path - >>> from pygmt.helpers import GMTTempFile >>> from pygmt.clib import Session >>> from pygmt.datatypes import _GMT_DATASET, _GMT_GRID + >>> from pygmt.helpers import GMTTempFile >>> >>> # Create a virtual file for storing the output table. >>> with GMTTempFile(suffix=".txt") as tmpfile: From a878635361f3ae71f5025be791be08d499c8f610 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 20 Feb 2024 15:10:33 +0800 Subject: [PATCH 72/85] clib.Session: Add the virtualfile_to_data method for creating virtual files for output --- doc/api/index.rst | 1 + pygmt/clib/session.py | 70 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/doc/api/index.rst b/doc/api/index.rst index 60a1240150e..c460beb4aaf 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -293,6 +293,7 @@ conversion of Python variables to GMT virtual files: clib.Session.virtualfile_from_matrix clib.Session.virtualfile_from_vectors clib.Session.virtualfile_from_grid + clib.Session.virtualfile_to_data Low level access (these are mostly used by the :mod:`pygmt.clib` package): diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 3aa42c885df..e30b3fd703b 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1609,6 +1609,76 @@ def virtualfile_from_data( # noqa: PLR0912 return file_context + @contextlib.contextmanager + def virtualfile_to_data( + self, kind: Literal["dataset", "grid"] = "dataset", fname: str | None = None + ): + """ + Create a virtual file for storing output data in a data container or yield the + actual file name. + + Parameters + ---------- + kind + The kind of data container to create. Valid values are ``"dataset"`` and + ``"grid"``. Ignored if ``fname`` is specified. + fname + If given, yield the actual file name instead of the virtual file name. + + Yields + ------ + vfile : str + Name of the virtual file or the output file name. + + Examples + -------- + >>> from pathlib import Path + >>> from pygmt.clib import Session + >>> from pygmt.datatypes import _GMT_DATASET, _GMT_GRID + >>> from pygmt.helpers import GMTTempFile + >>> + >>> # Create a virtual file for storing the output table. + >>> with GMTTempFile(suffix=".txt") as tmpfile: + ... with open(tmpfile.name, mode="w") as fp: + ... print("1.0 2.0 3.0 TEXT", file=fp) + ... with Session() as lib: + ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: + ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") + ... ds = lib.read_virtualfile(vouttbl, kind="dataset") + >>> isinstance(ds.contents, _GMT_DATASET) + True + >>> + >>> # Create a virtual file for storing the output grid. + >>> with Session() as lib: + ... with lib.virtualfile_to_data(kind="grid") as voutgrd: + ... lib.call_module("read", f"@earth_relief_01d_g {voutgrd} -Tg") + ... outgrd = lib.read_virtualfile(voutgrd, kind="grid") + >>> isinstance(outgrd.contents, _GMT_GRID) + True + >>> + >>> # Write data to file without creating a virtual file + >>> with GMTTempFile(suffix=".nc") as tmpfile: + ... with Session() as lib: + ... with lib.virtualfile_to_data( + ... kind="grid", fname=tmpfile.name + ... ) as voutgrd: + ... lib.call_module("read", f"@earth_relief_01d_g {voutgrd} -Tg") + ... assert voutgrd == tmpfile.name + ... assert Path(voutgrd).stat().st_size > 0 + """ + # If fname is given, yield the output file name. + if fname is not None: + yield fname + # Otherwise, create a virtual file for storing the output data. + else: + # Determine the family and geometry from kind + family, geometry = { + "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), + "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), + }[kind] + with self.open_virtualfile(family, geometry, "GMT_OUT", None) as vfile: + yield vfile + def read_virtualfile( self, vfname: str, kind: Literal["dataset", "grid", None] = None ): From c72701e43e96c8134346639f3d406ebb7dda243d Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 20 Feb 2024 20:52:29 +0800 Subject: [PATCH 73/85] Improve docstrings --- pygmt/datatypes/dataset.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py index 19a845b1114..1338de71f3d 100644 --- a/pygmt/datatypes/dataset.py +++ b/pygmt/datatypes/dataset.py @@ -63,13 +63,12 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801 class _GMT_DATATABLE(ctp.Structure): # noqa: N801 """ - GMT datatable structure for holding a single table with multiple segments. + GMT datatable structure for holding a table with multiple segments. """ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801 """ - GMT datasegment structure for holding a single segment with multiple - columns. + GMT datasegment structure for holding a segment with multiple columns. """ _fields_: ClassVar = [ @@ -145,7 +144,7 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801 def to_dataframe(self): """ - Convert a GMT_DATASET object to a :class:`pandas.DataFrame` object. + Convert a _GMT_DATASET object to a :class:`pandas.DataFrame` object. Currently, the number of columns in all segments of all tables are assumed to be the same. The same column in all segments of all tables are concatenated. The From 761aff487ad988c52f7b7cb8baf70235a94c80bf Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Tue, 20 Feb 2024 20:58:16 +0800 Subject: [PATCH 74/85] Improve the return_table function --- pygmt/helpers/utils.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 0fe43e2363f..94a09834152 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -11,6 +11,7 @@ import time import webbrowser from collections.abc import Iterable +from typing import Literal import xarray as xr from pygmt.exceptions import GMTInvalidInput @@ -557,7 +558,12 @@ def args_in_kwargs(args, kwargs): ) -def return_table(session, output_type, vfile, column_names): +def return_table( + session, + output_type: Literal["pandas", "numpy", "file"], + vfile: str, + column_names: list[str], +): """ Return an output table from a virtual file based on the output type. @@ -565,11 +571,11 @@ def return_table(session, output_type, vfile, column_names): ---------- session : :class:`pygmt.clib.Session` The current session. - output_type : str - The output type. Can be ``"pandas"``, ``"numpy"``, or ``"file"``. - vfile : str + output_type + The output type. Valid values are ``"pandas"``, ``"numpy"``, or ``"file"``. + vfile The virtual file name. - column_names : list of str + column_names The column names for the :class:`pandas.DataFrame` output. Returns @@ -588,7 +594,7 @@ def return_table(session, output_type, vfile, column_names): result = result.convert_dtypes( convert_string=True, convert_integer=False, - convert_floating=False, # requires pandas>=1.2.0 + convert_floating=False, convert_boolean=False, ) if output_type == "pandas": From 279eeb4affde6310ac3fcbf7f24dfea52dbd4133 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 21 Feb 2024 11:57:36 +0800 Subject: [PATCH 75/85] column_names default to None --- pygmt/helpers/utils.py | 2 +- pygmt/src/filter1d.py | 7 +------ pygmt/src/grdvolume.py | 7 +------ pygmt/src/triangulate.py | 7 +------ 4 files changed, 4 insertions(+), 19 deletions(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 94a09834152..f81bfdec16b 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -562,7 +562,7 @@ def return_table( session, output_type: Literal["pandas", "numpy", "file"], vfile: str, - column_names: list[str], + column_names: list[str] | None = None, ): """ Return an output table from a virtual file based on the output type. diff --git a/pygmt/src/filter1d.py b/pygmt/src/filter1d.py index c612b9bcf30..3b94fb40870 100644 --- a/pygmt/src/filter1d.py +++ b/pygmt/src/filter1d.py @@ -125,9 +125,4 @@ def filter1d(data, output_type="pandas", outfile=None, **kwargs): args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), ) - return return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=None, - ) + return return_table(session=lib, output_type=output_type, vfile=vouttbl) diff --git a/pygmt/src/grdvolume.py b/pygmt/src/grdvolume.py index e9d6b339c0a..3073888e082 100644 --- a/pygmt/src/grdvolume.py +++ b/pygmt/src/grdvolume.py @@ -109,9 +109,4 @@ def grdvolume(grid, output_type="pandas", outfile=None, **kwargs): module="grdvolume", args=build_arg_string(kwargs, infile=vingrid, outfile=vouttbl), ) - return return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=None, - ) + return return_table(session=lib, output_type=output_type, vfile=vouttbl) diff --git a/pygmt/src/triangulate.py b/pygmt/src/triangulate.py index 5aa91fd5c4a..c74b8eece73 100644 --- a/pygmt/src/triangulate.py +++ b/pygmt/src/triangulate.py @@ -138,12 +138,7 @@ def _triangulate( if output_type == "xarray": return load_dataarray(outgrid) - return return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=None, - ) + return return_table(session=lib, output_type=output_type, vfile=vouttbl) @staticmethod @fmt_docstring From e933497900784bbb7c25afd1d1c26b5d997f27ee Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 21 Feb 2024 12:03:06 +0800 Subject: [PATCH 76/85] Update select --- pygmt/src/select.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pygmt/src/select.py b/pygmt/src/select.py index 92a97f052a5..fed75646f33 100644 --- a/pygmt/src/select.py +++ b/pygmt/src/select.py @@ -1,7 +1,6 @@ """ select - Select data table subsets based on multiple spatial criteria. """ - import pandas as pd from pygmt.clib import Session from pygmt.helpers import ( @@ -208,9 +207,9 @@ def select(data=None, output_type="pandas", outfile=None, **kwargs): args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), ) - column_names = ( - data.columns.to_list() if isinstance(data, pd.DataFrame) else None - ) + column_names = None + if isinstance(data, pd.DataFrame): + column_names = data.columns.to_list() return return_table( session=lib, From d1f31500382466c769e4177087a91c2eceaf7d9b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 21 Feb 2024 12:47:51 +0800 Subject: [PATCH 77/85] Update blockm --- pygmt/src/blockm.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pygmt/src/blockm.py b/pygmt/src/blockm.py index 446ad9d4f92..b92550eb9ad 100644 --- a/pygmt/src/blockm.py +++ b/pygmt/src/blockm.py @@ -52,10 +52,9 @@ def _blockm(block_method, data, x, y, z, output_type, outfile, **kwargs): module=block_method, args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), ) - - column_names = ( - data.columns.to_list() if isinstance(data, pd.DataFrame) else None - ) + column_names = None + if isinstance(data, pd.DataFrame): + column_names = data.columns.to_list() return return_table( session=lib, From 77739f8484dc7e01d22a49f13c2a885bab59b7b5 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 21 Feb 2024 12:52:18 +0800 Subject: [PATCH 78/85] Update grdtrack --- pygmt/src/grdtrack.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index c1941e161b0..af1ea1fd913 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -1,7 +1,6 @@ """ grdtrack - Sample grids at specified (x,y) locations. """ - import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput @@ -296,10 +295,9 @@ def grdtrack( output_type = validate_output_table_type(output_type, outfile=outfile) + column_names = None if isinstance(points, pd.DataFrame): column_names = [*points.columns.to_list(), newcolname] - else: - column_names = None with Session() as lib: with lib.virtualfile_from_data( From 2b1d565d2207c375889ce14c75f2f80ba6813bdf Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 21 Feb 2024 12:55:15 +0800 Subject: [PATCH 79/85] Remove the old codes --- pygmt/clib/session.py | 68 ------------------------------------------- 1 file changed, 68 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 49d7a689e04..e30b3fd703b 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1810,71 +1810,3 @@ def extract_region(self): if status != 0: raise GMTCLibError("Failed to extract region from current figure.") return wesn - - @contextlib.contextmanager - def virtualfile_to_data( - self, kind: Literal["dataset", "grid", None] = None, fname: str | None = None - ): - """ - Create a virtual file for storing output data in a data container or yield the - actual file name. - - Parameters - ---------- - kind - The kind of data container to create. Valid values are ``"dataset"`` and - ``"grid"`` or ``None``. Ignored if ``fname`` is specified. - fname - If given, yield the actual file name instead of the virtual file name. - - Yields - ------ - vfile : str - Name of the virtual file or the output file name. - - Examples - -------- - >>> from pathlib import Path - >>> from pygmt.clib import Session - >>> from pygmt.datatypes import _GMT_DATASET, _GMT_GRID - >>> from pygmt.helpers import GMTTempFile - >>> - >>> # Create a virtual file for storing the output table. - >>> with GMTTempFile(suffix=".txt") as tmpfile: - ... with open(tmpfile.name, mode="w") as fp: - ... print("1.0 2.0 3.0 TEXT", file=fp) - ... with Session() as lib: - ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: - ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") - ... ds = lib.read_virtualfile(vouttbl, kind="dataset") - >>> isinstance(ds.contents, _GMT_DATASET) - True - >>> - >>> # Create a virtual file for storing the output grid. - >>> with Session() as lib: - ... with lib.virtualfile_to_data(kind="grid") as voutgrd: - ... lib.call_module("read", f"@earth_relief_01d_g {voutgrd} -Tg") - ... outgrd = lib.read_virtualfile(voutgrd, kind="grid") - >>> isinstance(outgrd.contents, _GMT_GRID) - True - >>> - >>> # Write data to file without creating a virtual file - >>> with GMTTempFile(suffix=".nc") as tmpfile: - ... with Session() as lib: - ... with lib.virtualfile_to_data(fname=tmpfile.name) as voutgrd: - ... lib.call_module("read", f"@earth_relief_01d_g {voutgrd} -Tg") - ... assert voutgrd == tmpfile.name - ... assert Path(voutgrd).stat().st_size > 0 - """ - # If fname is given, yield the output file name. - if fname is not None: - yield fname - # Otherwise, create a virtual file for storing the output data. - else: - # Determine the family and geometry from kind - family, geometry = { - "grid": ("GMT_IS_GRID", "GMT_IS_SURFACE"), - "dataset": ("GMT_IS_DATASET", "GMT_IS_PLP"), - }[kind] - with self.open_virtualfile(family, geometry, "GMT_OUT", None) as vfile: - yield vfile From a21b2df8d149ee4af172e8bd6d14dbebc017179a Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 28 Feb 2024 12:48:39 +0800 Subject: [PATCH 80/85] Revert non-related changes and focus on the _GMT_DATASET class --- pygmt/helpers/__init__.py | 1 - pygmt/helpers/utils.py | 46 ----------------- pygmt/src/blockm.py | 87 ++++++++++++--------------------- pygmt/src/filter1d.py | 33 ++++++++----- pygmt/src/grd2xyz.py | 44 ++++++++++------- pygmt/src/grdhisteq.py | 75 ++++++++++++++-------------- pygmt/src/grdtrack.py | 52 ++++++++++---------- pygmt/src/grdvolume.py | 32 ++++++++---- pygmt/src/project.py | 60 ++++++++++++----------- pygmt/src/select.py | 49 ++++++++++--------- pygmt/src/triangulate.py | 66 ++++++++++++++----------- pygmt/tests/test_triangulate.py | 7 +-- 12 files changed, 260 insertions(+), 292 deletions(-) diff --git a/pygmt/helpers/__init__.py b/pygmt/helpers/__init__.py index 4de92648959..8b8891cb63f 100644 --- a/pygmt/helpers/__init__.py +++ b/pygmt/helpers/__init__.py @@ -20,6 +20,5 @@ is_nonstr_iter, launch_external_viewer, non_ascii_to_octal, - return_table, ) from pygmt.helpers.validators import validate_output_table_type diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index f81bfdec16b..f1649a68a74 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -11,7 +11,6 @@ import time import webbrowser from collections.abc import Iterable -from typing import Literal import xarray as xr from pygmt.exceptions import GMTInvalidInput @@ -556,48 +555,3 @@ def args_in_kwargs(args, kwargs): return any( kwargs.get(arg) is not None and kwargs.get(arg) is not False for arg in args ) - - -def return_table( - session, - output_type: Literal["pandas", "numpy", "file"], - vfile: str, - column_names: list[str] | None = None, -): - """ - Return an output table from a virtual file based on the output type. - - Parameters - ---------- - session : :class:`pygmt.clib.Session` - The current session. - output_type - The output type. Valid values are ``"pandas"``, ``"numpy"``, or ``"file"``. - vfile - The virtual file name. - column_names - The column names for the :class:`pandas.DataFrame` output. - - Returns - ------- - :class:`pandas.DataFrame` or :class:`numpy.ndarray` or None - The output table. If ``output_type`` is ``"file"``, returns ``None``. - """ - if output_type == "file": # Already written to file, so return None - return None - # Read the virtual file as a GMT dataset and convert to pandas.DataFrame - result = session.read_virtualfile(vfile, kind="dataset").contents.to_dataframe() - # assign column names - if column_names is not None: - result.columns = column_names - # convert text data from object dtype to string dtype - result = result.convert_dtypes( - convert_string=True, - convert_integer=False, - convert_floating=False, - convert_boolean=False, - ) - if output_type == "pandas": - return result - # NumPy.ndarray output - return result.to_numpy() diff --git a/pygmt/src/blockm.py b/pygmt/src/blockm.py index b92550eb9ad..9dff491ed93 100644 --- a/pygmt/src/blockm.py +++ b/pygmt/src/blockm.py @@ -5,18 +5,17 @@ import pandas as pd from pygmt.clib import Session from pygmt.helpers import ( + GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, - return_table, use_alias, - validate_output_table_type, ) __doctest_skip__ = ["blockmean", "blockmedian", "blockmode"] -def _blockm(block_method, data, x, y, z, output_type, outfile, **kwargs): +def _blockm(block_method, data, x, y, z, outfile, **kwargs): r""" Block average (x, y, z) data tables by mean, median, or mode estimation. @@ -42,26 +41,31 @@ def _blockm(block_method, data, x, y, z, output_type, outfile, **kwargs): - None if ``outfile`` is set (filtered output will be stored in file set by ``outfile``) """ - output_type = validate_output_table_type(output_type, outfile=outfile) - - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="vector", data=data, x=x, y=y, z=z, required_z=True - ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: - lib.call_module( - module=block_method, - args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), + with GMTTempFile(suffix=".csv") as tmpfile: + with Session() as lib: + table_context = lib.virtualfile_from_data( + check_kind="vector", data=data, x=x, y=y, z=z, required_z=True ) - column_names = None - if isinstance(data, pd.DataFrame): - column_names = data.columns.to_list() - - return return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=column_names, - ) + # Run blockm* on data table + with table_context as infile: + if outfile is None: + outfile = tmpfile.name + lib.call_module( + module=block_method, + args=build_arg_string(kwargs, infile=infile, outfile=outfile), + ) + + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + try: + column_names = data.columns.to_list() + result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) + except AttributeError: # 'str' object has no attribute 'columns' + result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None + + return result @fmt_docstring @@ -82,9 +86,7 @@ def _blockm(block_method, data, x, y, z, output_type, outfile, **kwargs): w="wrap", ) @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma") -def blockmean( - data=None, x=None, y=None, z=None, output_type="pandas", outfile=None, **kwargs -): +def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs): r""" Block average (x, y, z) data tables by mean estimation. @@ -157,14 +159,7 @@ def blockmean( >>> data_bmean = pygmt.blockmean(data=data, region=[245, 255, 20, 30], spacing="5m") """ return _blockm( - block_method="blockmean", - data=data, - x=x, - y=y, - z=z, - output_type=output_type, - outfile=outfile, - **kwargs, + block_method="blockmean", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs ) @@ -185,9 +180,7 @@ def blockmean( w="wrap", ) @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma") -def blockmedian( - data=None, x=None, y=None, z=None, output_type="pandas", outfile=None, **kwargs -): +def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs): r""" Block average (x, y, z) data tables by median estimation. @@ -253,14 +246,7 @@ def blockmedian( ... ) """ return _blockm( - block_method="blockmedian", - data=data, - x=x, - y=y, - z=z, - output_type=output_type, - outfile=outfile, - **kwargs, + block_method="blockmedian", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs ) @@ -281,9 +267,7 @@ def blockmedian( w="wrap", ) @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma") -def blockmode( - data=None, x=None, y=None, z=None, output_type="pandas", outfile=None, **kwargs -): +def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs): r""" Block average (x, y, z) data tables by mode estimation. @@ -347,12 +331,5 @@ def blockmode( >>> data_bmode = pygmt.blockmode(data=data, region=[245, 255, 20, 30], spacing="5m") """ return _blockm( - block_method="blockmode", - data=data, - x=x, - y=y, - z=z, - output_type=output_type, - outfile=outfile, - **kwargs, + block_method="blockmode", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs ) diff --git a/pygmt/src/filter1d.py b/pygmt/src/filter1d.py index 3b94fb40870..bc07e6e1314 100644 --- a/pygmt/src/filter1d.py +++ b/pygmt/src/filter1d.py @@ -2,12 +2,13 @@ filter1d - Time domain filtering of 1-D data tables """ +import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( + GMTTempFile, build_arg_string, fmt_docstring, - return_table, use_alias, validate_output_table_type, ) @@ -116,13 +117,23 @@ def filter1d(data, output_type="pandas", outfile=None, **kwargs): output_type = validate_output_table_type(output_type, outfile=outfile) - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="vector", data=data - ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: - lib.call_module( - module="filter1d", - args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), - ) - - return return_table(session=lib, output_type=output_type, vfile=vouttbl) + with GMTTempFile() as tmpfile: + with Session() as lib: + file_context = lib.virtualfile_from_data(check_kind="vector", data=data) + with file_context as infile: + if outfile is None: + outfile = tmpfile.name + lib.call_module( + module="filter1d", + args=build_arg_string(kwargs, infile=infile, outfile=outfile), + ) + + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None + + if output_type == "numpy": + result = result.to_numpy() + return result diff --git a/pygmt/src/grd2xyz.py b/pygmt/src/grd2xyz.py index 96fbdda4f56..f7b70ea8c58 100644 --- a/pygmt/src/grd2xyz.py +++ b/pygmt/src/grd2xyz.py @@ -2,14 +2,15 @@ grd2xyz - Convert grid to data table """ +import pandas as pd import xarray as xr from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( + GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, - return_table, use_alias, validate_output_table_type, ) @@ -149,24 +150,31 @@ def grd2xyz(grid, output_type="pandas", outfile=None, **kwargs): ) # Set the default column names for the pandas dataframe header - column_names = ["x", "y", "z"] + dataframe_header = ["x", "y", "z"] # Let output pandas column names match input DataArray dimension names - if isinstance(grid, xr.DataArray): + if isinstance(grid, xr.DataArray) and output_type == "pandas": # Reverse the dims because it is rows, columns ordered. - column_names = [grid.dims[1], grid.dims[0], grid.name] - - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="raster", data=grid - ) as vingrd, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: - lib.call_module( - module="grd2xyz", - args=build_arg_string(kwargs, infile=vingrd, outfile=vouttbl), + dataframe_header = [grid.dims[1], grid.dims[0], grid.name] + + with GMTTempFile() as tmpfile: + with Session() as lib: + file_context = lib.virtualfile_from_data(check_kind="raster", data=grid) + with file_context as infile: + if outfile is None: + outfile = tmpfile.name + lib.call_module( + module="grd2xyz", + args=build_arg_string(kwargs, infile=infile, outfile=outfile), + ) + + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + result = pd.read_csv( + tmpfile.name, sep="\t", names=dataframe_header, comment=">" ) + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None - return return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=column_names, - ) + if output_type == "numpy": + result = result.to_numpy() + return result diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py index 1aadf7c4b90..c9524aa1ab4 100644 --- a/pygmt/src/grdhisteq.py +++ b/pygmt/src/grdhisteq.py @@ -3,6 +3,7 @@ """ import numpy as np +import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( @@ -10,7 +11,6 @@ build_arg_string, fmt_docstring, kwargs_to_strings, - return_table, use_alias, validate_output_table_type, ) @@ -63,7 +63,7 @@ class grdhisteq: # noqa: N801 h="header", ) @kwargs_to_strings(R="sequence") - def _grdhisteq(caller, grid, output_type, **kwargs): + def _grdhisteq(grid, output_type, **kwargs): r""" Perform histogram equalization for a grid. @@ -105,36 +105,34 @@ def _grdhisteq(caller, grid, output_type, **kwargs): -------- :func:`pygmt.grd2cpt` """ - if caller not in ["compute_bins", "equalize_grid"]: - raise GMTInvalidInput(f"Unrecognized caller: {caller}.") with Session() as lib: - with lib.virtualfile_from_data( - check_kind="raster", data=grid - ) as vingrid, lib.virtualfile_to_data( - kind="dataset", fname=kwargs.get("D") - ) as vouttbl: - if caller == "compute_bins": - kwargs["D"] = vouttbl + file_context = lib.virtualfile_from_data(check_kind="raster", data=grid) + with file_context as infile: lib.call_module( - module="grdhisteq", args=build_arg_string(kwargs, infile=vingrid) + module="grdhisteq", args=build_arg_string(kwargs, infile=infile) ) - if output_type == "xarray": - return load_dataarray(kwargs["G"]) + if output_type == "file": + return None + if output_type == "xarray": + return load_dataarray(kwargs["G"]) + + result = pd.read_csv( + filepath_or_buffer=kwargs["D"], + sep="\t", + header=None, + names=["start", "stop", "bin_id"], + dtype={ + "start": np.float32, + "stop": np.float32, + "bin_id": np.uint32, + }, + ) + if output_type == "numpy": + return result.to_numpy() - result = return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=["start", "stop", "bin_id"], - ) - if output_type == "pandas": - result = result.astype( - {"start": np.float32, "stop": np.float32, "bin_id": np.uint32} - ) - return result.set_index("bin_id") - return result + return result.set_index("bin_id") @staticmethod @fmt_docstring @@ -214,7 +212,6 @@ def equalize_grid( else: raise GMTInvalidInput("Must specify 'outgrid' as a string or None.") return grdhisteq._grdhisteq( - caller="equalize_grid", grid=grid, output_type=output_type, outgrid=outgrid, @@ -322,14 +319,16 @@ def compute_bins( if header is not None and output_type != "file": raise GMTInvalidInput("'header' is only allowed with output_type='file'.") - return grdhisteq._grdhisteq( - caller="compute_bins", - grid=grid, - output_type=output_type, - outfile=outfile, - divisions=divisions, - quadratic=quadratic, - verbose=verbose, - region=region, - header=header, - ) + with GMTTempFile(suffix=".txt") as tmpfile: + if output_type != "file": + outfile = tmpfile.name + return grdhisteq._grdhisteq( + grid, + output_type=output_type, + outfile=outfile, + divisions=divisions, + quadratic=quadratic, + verbose=verbose, + region=region, + header=header, + ) diff --git a/pygmt/src/grdtrack.py b/pygmt/src/grdtrack.py index af1ea1fd913..1d0daa1b800 100644 --- a/pygmt/src/grdtrack.py +++ b/pygmt/src/grdtrack.py @@ -5,12 +5,11 @@ from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( + GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, - return_table, use_alias, - validate_output_table_type, ) __doctest_skip__ = ["grdtrack"] @@ -44,9 +43,7 @@ w="wrap", ) @kwargs_to_strings(R="sequence", S="sequence", i="sequence_comma", o="sequence_comma") -def grdtrack( - grid, points=None, output_type="pandas", outfile=None, newcolname=None, **kwargs -): +def grdtrack(grid, points=None, newcolname=None, outfile=None, **kwargs): r""" Sample grids at specified (x,y) locations. @@ -293,26 +290,29 @@ def grdtrack( if hasattr(points, "columns") and newcolname is None: raise GMTInvalidInput("Please pass in a str to 'newcolname'") - output_type = validate_output_table_type(output_type, outfile=outfile) + with GMTTempFile(suffix=".csv") as tmpfile: + with Session() as lib: + with lib.virtualfile_from_data( + check_kind="raster", data=grid + ) as grdfile, lib.virtualfile_from_data( + check_kind="vector", data=points, required_data=False + ) as csvfile: + kwargs["G"] = grdfile + if outfile is None: # Output to tmpfile if outfile is not set + outfile = tmpfile.name + lib.call_module( + module="grdtrack", + args=build_arg_string(kwargs, infile=csvfile, outfile=outfile), + ) - column_names = None - if isinstance(points, pd.DataFrame): - column_names = [*points.columns.to_list(), newcolname] + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + try: + column_names = [*points.columns.to_list(), newcolname] + result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) + except AttributeError: # 'str' object has no attribute 'columns' + result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="raster", data=grid - ) as vingrd, lib.virtualfile_from_data( - check_kind="vector", data=points, required_data=False - ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: - kwargs["G"] = vingrd - lib.call_module( - module="grdtrack", - args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), - ) - return return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=column_names, - ) + return result diff --git a/pygmt/src/grdvolume.py b/pygmt/src/grdvolume.py index 3073888e082..70d99e19b82 100644 --- a/pygmt/src/grdvolume.py +++ b/pygmt/src/grdvolume.py @@ -1,12 +1,13 @@ """ grdvolume - Calculate grid volume and area constrained by a contour. """ +import pandas as pd from pygmt.clib import Session from pygmt.helpers import ( + GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, - return_table, use_alias, validate_output_table_type, ) @@ -101,12 +102,23 @@ def grdvolume(grid, output_type="pandas", outfile=None, **kwargs): """ output_type = validate_output_table_type(output_type, outfile=outfile) - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="raster", data=grid - ) as vingrid, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: - lib.call_module( - module="grdvolume", - args=build_arg_string(kwargs, infile=vingrid, outfile=vouttbl), - ) - return return_table(session=lib, output_type=output_type, vfile=vouttbl) + with GMTTempFile() as tmpfile: + with Session() as lib: + file_context = lib.virtualfile_from_data(check_kind="raster", data=grid) + with file_context as infile: + if outfile is None: + outfile = tmpfile.name + lib.call_module( + module="grdvolume", + args=build_arg_string(kwargs, infile=infile, outfile=outfile), + ) + + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None + + if output_type == "numpy": + result = result.to_numpy() + return result diff --git a/pygmt/src/project.py b/pygmt/src/project.py index 38da5ddcb61..6ce7da4b521 100644 --- a/pygmt/src/project.py +++ b/pygmt/src/project.py @@ -1,15 +1,15 @@ """ project - Project data onto lines or great circles, or generate tracks. """ +import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( + GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, - return_table, use_alias, - validate_output_table_type, ) @@ -31,9 +31,7 @@ f="coltypes", ) @kwargs_to_strings(E="sequence", L="sequence", T="sequence", W="sequence", C="sequence") -def project( - data=None, x=None, y=None, z=None, output_type="pandas", outfile=None, **kwargs -): +def project(data=None, x=None, y=None, z=None, outfile=None, **kwargs): r""" Project data onto lines or great circles, or generate tracks. @@ -224,27 +222,31 @@ def project( "The `convention` parameter is not allowed with `generate`." ) - output_type = validate_output_table_type(output_type, outfile=outfile) - - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="vector", - data=data, - x=x, - y=y, - z=z, - required_z=False, - required_data=False, - ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: - lib.call_module( - module="project", - args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), - ) - column_names = list("rsp") if kwargs.get("G") is not None else None - - return return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=column_names, - ) + with GMTTempFile(suffix=".csv") as tmpfile: + if outfile is None: # Output to tmpfile if outfile is not set + outfile = tmpfile.name + with Session() as lib: + if kwargs.get("G") is None: + table_context = lib.virtualfile_from_data( + check_kind="vector", data=data, x=x, y=y, z=z, required_z=False + ) + + # Run project on the temporary (csv) data table + with table_context as infile: + arg_str = build_arg_string(kwargs, infile=infile, outfile=outfile) + else: + arg_str = build_arg_string(kwargs, outfile=outfile) + lib.call_module(module="project", args=arg_str) + + # if user did not set outfile, return pd.DataFrame + if outfile == tmpfile.name: + if kwargs.get("G") is not None: + column_names = list("rsp") + result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) + else: + result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") + # return None if outfile set, output in outfile + elif outfile != tmpfile.name: + result = None + + return result diff --git a/pygmt/src/select.py b/pygmt/src/select.py index fed75646f33..4c057f83063 100644 --- a/pygmt/src/select.py +++ b/pygmt/src/select.py @@ -4,12 +4,11 @@ import pandas as pd from pygmt.clib import Session from pygmt.helpers import ( + GMTTempFile, build_arg_string, fmt_docstring, kwargs_to_strings, - return_table, use_alias, - validate_output_table_type, ) __doctest_skip__ = ["select"] @@ -41,7 +40,7 @@ w="wrap", ) @kwargs_to_strings(M="sequence", R="sequence", i="sequence_comma", o="sequence_comma") -def select(data=None, output_type="pandas", outfile=None, **kwargs): +def select(data=None, outfile=None, **kwargs): r""" Select data table subsets based on multiple spatial criteria. @@ -196,24 +195,26 @@ def select(data=None, output_type="pandas", outfile=None, **kwargs): >>> # longitudes 246 and 247 and latitudes 20 and 21 >>> out = pygmt.select(data=ship_data, region=[246, 247, 20, 21]) """ - output_type = validate_output_table_type(output_type, outfile=outfile) - - with Session() as lib: - with lib.virtualfile_from_data( - check_kind="vector", data=data - ) as vintbl, lib.virtualfile_to_data(kind="dataset", fname=outfile) as vouttbl: - lib.call_module( - module="select", - args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl), - ) - - column_names = None - if isinstance(data, pd.DataFrame): - column_names = data.columns.to_list() - - return return_table( - session=lib, - output_type=output_type, - vfile=vouttbl, - column_names=column_names, - ) + + with GMTTempFile(suffix=".csv") as tmpfile: + with Session() as lib: + table_context = lib.virtualfile_from_data(check_kind="vector", data=data) + with table_context as infile: + if outfile is None: + outfile = tmpfile.name + lib.call_module( + module="select", + args=build_arg_string(kwargs, infile=infile, outfile=outfile), + ) + + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + try: + column_names = data.columns.to_list() + result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) + except AttributeError: # 'str' object has no attribute 'columns' + result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None + + return result diff --git a/pygmt/src/triangulate.py b/pygmt/src/triangulate.py index c74b8eece73..8fb19e9f63e 100644 --- a/pygmt/src/triangulate.py +++ b/pygmt/src/triangulate.py @@ -3,6 +3,7 @@ Cartesian data. """ +import pandas as pd from pygmt.clib import Session from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( @@ -10,7 +11,6 @@ build_arg_string, fmt_docstring, kwargs_to_strings, - return_table, use_alias, validate_output_table_type, ) @@ -123,22 +123,26 @@ def _triangulate( ``outgrid`` or ``outfile``) """ with Session() as lib: - with lib.virtualfile_from_data( + table_context = lib.virtualfile_from_data( check_kind="vector", data=data, x=x, y=y, z=z, required_z=False - ) as vintbl, lib.virtualfile_to_data( - kind="dataset", fname=outfile - ) as vouttbl: + ) + with table_context as infile: # table output if outgrid is unset, else output to outgrid if (outgrid := kwargs.get("G")) is None: - kwargs.update({">": vouttbl}) + kwargs.update({">": outfile}) lib.call_module( - module="triangulate", args=build_arg_string(kwargs, infile=vintbl) + module="triangulate", args=build_arg_string(kwargs, infile=infile) ) - if output_type == "xarray": - return load_dataarray(outgrid) + if output_type == "file": + return None + if output_type == "xarray": + return load_dataarray(outgrid) - return return_table(session=lib, output_type=output_type, vfile=vouttbl) + result = pd.read_csv(outfile, sep="\t", header=None) + if output_type == "numpy": + return result.to_numpy() + return result @staticmethod @fmt_docstring @@ -351,22 +355,26 @@ def delaunay_triples( # noqa: PLR0913 """ output_type = validate_output_table_type(output_type, outfile) - return triangulate._triangulate( - data=data, - x=x, - y=y, - z=z, - output_type=output_type, - outfile=outfile, - projection=projection, - verbose=verbose, - binary=binary, - nodata=nodata, - find=find, - coltypes=coltypes, - header=header, - incols=incols, - skiprows=skiprows, - wrap=wrap, - **kwargs, - ) + # Return a pandas.DataFrame if ``outfile`` is not set + with GMTTempFile(suffix=".txt") as tmpfile: + if output_type != "file": + outfile = tmpfile.name + return triangulate._triangulate( + data=data, + x=x, + y=y, + z=z, + output_type=output_type, + outfile=outfile, + projection=projection, + verbose=verbose, + binary=binary, + nodata=nodata, + find=find, + coltypes=coltypes, + header=header, + incols=incols, + skiprows=skiprows, + wrap=wrap, + **kwargs, + ) diff --git a/pygmt/tests/test_triangulate.py b/pygmt/tests/test_triangulate.py index 191e43af305..97d00a42a56 100644 --- a/pygmt/tests/test_triangulate.py +++ b/pygmt/tests/test_triangulate.py @@ -43,8 +43,7 @@ def fixture_expected_dataframe(): [4, 6, 1], [3, 4, 2], [9, 3, 8], - ], - dtype=float, + ] ) @@ -116,9 +115,7 @@ def test_delaunay_triples_outfile(dataframe, expected_dataframe): assert len(record) == 1 # check that only one warning was raised assert result is None # return value is None assert Path(tmpfile.name).stat().st_size > 0 - temp_df = pd.read_csv( - filepath_or_buffer=tmpfile.name, sep="\t", header=None, dtype=float - ) + temp_df = pd.read_csv(filepath_or_buffer=tmpfile.name, sep="\t", header=None) pd.testing.assert_frame_equal(left=temp_df, right=expected_dataframe) From 180f3ec9233f7f7e30378ab0d00b46c9f77aeab3 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 28 Feb 2024 12:56:06 +0800 Subject: [PATCH 81/85] Minor fixes and improvements --- pygmt/datatypes/dataset.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py index 1338de71f3d..eb110e3cb12 100644 --- a/pygmt/datatypes/dataset.py +++ b/pygmt/datatypes/dataset.py @@ -12,8 +12,7 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801 """ GMT dataset structure for holding multiple tables (files). - This class is only meant for internal use by PyGMT. It is not exposed to users. - + This class is only meant for internal use by PyGMT and is not exposed to users. See the GMT source code gmt_resources.h for the original C struct definitions. Examples @@ -22,7 +21,7 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801 >>> from pygmt.clib import Session >>> >>> with GMTTempFile(suffix=".txt") as tmpfile: - ... # prepare the sample data file + ... # Prepare the sample data file ... with open(tmpfile.name, mode="w") as fp: ... print(">", file=fp) ... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp) @@ -30,15 +29,15 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801 ... print(">", file=fp) ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) - ... # read the data file + ... # Read the data file ... with Session() as lib: - ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: + ... with lib.virtualfile_out(kind="dataset") as vouttbl: ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") - ... # the dataset + ... # The dataset ... ds = lib.read_virtualfile(vouttbl, kind="dataset").contents ... print(ds.n_tables, ds.n_columns, ds.n_segments) ... print(ds.min[: ds.n_columns], ds.max[: ds.n_columns]) - ... # the table + ... # The table ... tbl = ds.table[0].contents ... print(tbl.n_columns, tbl.n_segments, tbl.n_records) ... print(tbl.min[: tbl.n_columns], ds.max[: tbl.n_columns]) @@ -142,7 +141,7 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801 ("hidden", ctp.c_void_p), ] - def to_dataframe(self): + def to_dataframe(self) -> pd.DataFrame: """ Convert a _GMT_DATASET object to a :class:`pandas.DataFrame` object. @@ -152,7 +151,7 @@ def to_dataframe(self): Returns ------- - :class:`pandas.DataFrame` + df A :class:`pandas.DataFrame` object. Examples @@ -170,7 +169,7 @@ def to_dataframe(self): ... print("7.0 8.0 9.0 TEXT8 TEXT90", file=fp) ... print("10.0 11.0 12.0 TEXT123 TEXT456789", file=fp) ... with Session() as lib: - ... with lib.virtualfile_to_data(kind="dataset") as vouttbl: + ... with lib.virtualfile_out(kind="dataset") as vouttbl: ... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td") ... ds = lib.read_virtualfile(vouttbl, kind="dataset") ... df = ds.contents.to_dataframe() @@ -181,6 +180,7 @@ def to_dataframe(self): 2 7.0 8.0 9.0 TEXT8 TEXT90 3 10.0 11.0 12.0 TEXT123 TEXT456789 """ + # Deal with numeric columns vectors = [] for icol in range(self.n_columns): colvector = [] @@ -193,7 +193,7 @@ def to_dataframe(self): ) vectors.append(np.concatenate(colvector)) - # deal with trailing text column + # Deal with trailing text column textvector = [] for itbl in range(self.n_tables): dtbl = self.table[itbl].contents From cc5c4ec69229b4c780af7c97fe4d7363b0475806 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 2 Mar 2024 16:45:14 +0800 Subject: [PATCH 82/85] Move the object->str conversion of text column to the to_dataframe method --- pygmt/datatypes/dataset.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py index 416e7caef91..cc45533c7b8 100644 --- a/pygmt/datatypes/dataset.py +++ b/pygmt/datatypes/dataset.py @@ -205,4 +205,14 @@ def to_dataframe(self) -> pd.DataFrame: if textvector: vectors.append(np.char.decode(textvector)) - return pd.concat([pd.Series(v) for v in vectors], axis=1) + df = pd.concat([pd.Series(v) for v in vectors], axis=1) + + # convert text data from object dtype to string dtype + if textvector: + df = df.convert_dtypes( + convert_string=True, + convert_integer=False, + convert_floating=False, + convert_boolean=False, + ) + return df From 661af0b00574c9d22e043a0527bbc3a16c9a4784 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 7 Mar 2024 06:00:08 +0800 Subject: [PATCH 83/85] Apply suggestions from code review Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/datatypes/dataset.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py index cc45533c7b8..5c9e7efa9c8 100644 --- a/pygmt/datatypes/dataset.py +++ b/pygmt/datatypes/dataset.py @@ -192,7 +192,7 @@ def to_dataframe(self) -> pd.DataFrame: colvector.append( np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,)) ) - vectors.append(np.concatenate(colvector)) + vectors.append(pd.Series(data=np.concatenate(colvector))) # Deal with trailing text column textvector = [] @@ -203,16 +203,10 @@ def to_dataframe(self) -> pd.DataFrame: if dseg.text: textvector.extend(dseg.text[: dseg.n_rows]) if textvector: - vectors.append(np.char.decode(textvector)) + vectors.append( + pd.Series(data=np.char.decode(textvector), dtype=pd.StringDtype()) + ) - df = pd.concat([pd.Series(v) for v in vectors], axis=1) + df = pd.concat(objs=vectors, axis=1) - # convert text data from object dtype to string dtype - if textvector: - df = df.convert_dtypes( - convert_string=True, - convert_integer=False, - convert_floating=False, - convert_boolean=False, - ) return df From 394a054ac1f3f22b7da55114d24192090192129b Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 7 Mar 2024 06:00:29 +0800 Subject: [PATCH 84/85] Update pygmt/datatypes/dataset.py Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com> --- pygmt/datatypes/dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py index 5c9e7efa9c8..e69d229f653 100644 --- a/pygmt/datatypes/dataset.py +++ b/pygmt/datatypes/dataset.py @@ -180,6 +180,8 @@ def to_dataframe(self) -> pd.DataFrame: 1 4.0 5.0 6.0 TEXT4 TEXT567 2 7.0 8.0 9.0 TEXT8 TEXT90 3 10.0 11.0 12.0 TEXT123 TEXT456789 + >>> df.dtypes.to_list() + [dtype('float64'), dtype('float64'), dtype('float64'), string[python]] """ # Deal with numeric columns vectors = [] From 17d2b4c767cb3f663aeb055f09280f2b2cc1d440 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 7 Mar 2024 10:46:04 +0800 Subject: [PATCH 85/85] Remove a blank line --- pygmt/datatypes/dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pygmt/datatypes/dataset.py b/pygmt/datatypes/dataset.py index e69d229f653..21953ee9051 100644 --- a/pygmt/datatypes/dataset.py +++ b/pygmt/datatypes/dataset.py @@ -210,5 +210,4 @@ def to_dataframe(self) -> pd.DataFrame: ) df = pd.concat(objs=vectors, axis=1) - return df