Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the Session.virtualfile_from_stringio method to allow StringIO input for certain functions/methods #3326

Merged
merged 38 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
5e22d55
Add the Session.virtualfile_from_stringio function to support StringI…
seisman Jul 11, 2024
f75844e
Let virtualfile_in support the stringio kind
seisman Jul 11, 2024
50467b5
Make data_kind support stringio
seisman Jul 11, 2024
14a14e9
Refactor Figure.legend to support stringio
seisman Jul 11, 2024
37c3c2f
Add a legend test for StringIO input
seisman Jul 11, 2024
4e2e545
Merge branch 'main' into api/virtualfile-from-stringio
seisman Jul 17, 2024
8e1a609
Some updates
seisman Jul 17, 2024
55d6e81
Merge branch 'main' into api/virtualfile-from-stringio
seisman Jul 20, 2024
b3b2cb9
Merge branch 'main' into api/virtualfile-from-stringio
seisman Jul 21, 2024
89757ec
Fix styling issue
seisman Jul 21, 2024
0489783
Merge branch 'main' into api/virtualfile-from-stringio
seisman Jul 22, 2024
791e4f6
Merge branch 'main' into api/virtualfile-from-stringio
seisman Jul 24, 2024
5f4d21f
Updates
seisman Jul 24, 2024
1a2d336
Merge branch 'main' into api/virtualfile-from-stringio
seisman Jul 26, 2024
3682098
Improve Figure.legend
seisman Jul 26, 2024
21b2496
Merge branch 'main' into api/virtualfile-from-stringio
seisman Sep 11, 2024
146e430
Fix legend
seisman Sep 11, 2024
c589a40
Updates
seisman Sep 11, 2024
ff90b2e
Figure.legend: Refactor to simplify the logic of checking legend spec…
seisman Sep 11, 2024
90455c1
Merge remote-tracking branch 'origin/refactor/legend' into api/virtua…
seisman Sep 12, 2024
4e4bd2d
FIx
seisman Sep 12, 2024
deb917d
Revert changes in legend
seisman Sep 12, 2024
49fa805
Merge branch 'main' into api/virtualfile-from-stringio
seisman Sep 12, 2024
97bbe05
Merge branch 'main' into api/virtualfile-from-stringio
seisman Sep 13, 2024
dece315
Improve docstrings
seisman Sep 13, 2024
486fce7
Support mutli-segment stringio input
seisman Sep 13, 2024
021a97a
Revert "Support mutli-segment stringio input"
seisman Sep 13, 2024
f6da405
Remove the leading '>' from header
seisman Sep 13, 2024
824d861
Also need to set the header pointer to None
seisman Sep 13, 2024
640e9a9
Support mutli-segment stringio input
seisman Sep 13, 2024
3161963
Update docstrings
seisman Sep 13, 2024
026f6e4
Fix a bug in n_rows
seisman Sep 13, 2024
850337e
Add some tests
seisman Sep 13, 2024
ed20118
Fix static type checking
seisman Sep 13, 2024
f1b5f08
Improve the tests
seisman Sep 13, 2024
e10cef6
Simplify the checking of segment header
seisman Sep 13, 2024
22d4d55
Merge branch 'main' into api/virtualfile-from-stringio
seisman Sep 16, 2024
5ef08e3
Merge branch 'main' into api/virtualfile-from-stringio
seisman Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -317,5 +317,6 @@ Low level access (these are mostly used by the :mod:`pygmt.clib` package):
clib.Session.get_libgmt_func
clib.Session.virtualfile_from_data
clib.Session.virtualfile_from_grid
clib.Session.virtualfile_from_stringio
clib.Session.virtualfile_from_matrix
clib.Session.virtualfile_from_vectors
89 changes: 87 additions & 2 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import contextlib
import ctypes as ctp
import io
import pathlib
import sys
import warnings
Expand Down Expand Up @@ -60,6 +61,7 @@
"GMT_IS_PLP", # items could be any one of POINT, LINE, or POLY
"GMT_IS_SURFACE", # items are 2-D grid
"GMT_IS_VOLUME", # items are 3-D grid
"GMT_IS_TEXT", # Text strings which triggers ASCII text reading
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is only ASCII supported for now? What about other encodings?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GMT only accepts ASCII text strings. Any non-ASCII characters should be written in the form of octal codes.

]

METHODS = [
Expand All @@ -70,6 +72,11 @@
DIRECTIONS = ["GMT_IN", "GMT_OUT"]

MODES = ["GMT_CONTAINER_ONLY", "GMT_IS_OUTPUT"]
MODE_MODIFIERS = [
"GMT_GRID_IS_CARTESIAN",
"GMT_GRID_IS_GEO",
"GMT_WITH_STRINGS",
]

REGISTRATIONS = ["GMT_GRID_PIXEL_REG", "GMT_GRID_NODE_REG"]

Expand Down Expand Up @@ -728,7 +735,7 @@
mode_int = self._parse_constant(
mode,
valid=MODES,
valid_modifiers=["GMT_GRID_IS_CARTESIAN", "GMT_GRID_IS_GEO"],
valid_modifiers=MODE_MODIFIERS,
)
geometry_int = self._parse_constant(geometry, valid=GEOMETRIES)
registration_int = self._parse_constant(registration, valid=REGISTRATIONS)
Expand Down Expand Up @@ -1603,6 +1610,83 @@
with self.open_virtualfile(*args) as vfile:
yield vfile

@contextlib.contextmanager
def virtualfile_from_stringio(self, stringio: io.StringIO):
r"""
Store a :class:`io.StringIO` object in a virtual file.

Store the contents of a :class:`io.StringIO` object in a GMT_DATASET container
and create a virtual file to pass to a GMT module.

Parameters
----------
stringio
The :class:`io.StringIO` object containing the data to be stored in the
virtual file.

Yields
------
fname
The name of the virtual file.

Examples
--------
>>> import io
>>> from pygmt.clib import Session
>>> stringio = io.StringIO(
... "# Comment\n"
... "H 24p Legend\n"
... "N 2\n"
... "S 0.1i c 0.15i p300/12 0.25p 0.3i My circle\n"
... )
>>> with Session() as lib:
... with lib.virtualfile_from_stringio(stringio) as fin:
... lib.virtualfile_to_dataset(vfname=fin, output_type="pandas")
0
0 H 24p Legend
1 N 2
2 S 0.1i c 0.15i p300/12 0.25p 0.3i My circle
"""
# Parse the strings in the io.StringIO object.
# For simplicity, we make a few assumptions.
# - "#" indicates a comment line
# - ">" indicates a segment header
# - Only one table and one segment
header = None
string_arrays = []
for line in stringio.getvalue().splitlines():
if line.startswith("#"): # Skip comments
continue
if line.startswith(">"): # Segment header
if header is not None: # Only one segment is allowed now.
raise GMTInvalidInput("Only one segment is allowed.")
header = line
continue

Check warning on line 1664 in pygmt/clib/session.py

View check run for this annotation

Codecov / codecov/patch

pygmt/clib/session.py#L1661-L1664

Added lines #L1661 - L1664 were not covered by tests
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need a test to check that multi-segment inputs fail.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's likely we have to allow multi-segments in StringIO.

Here is a CLI version for typesetting a paragraph of text. It seems the first line must be a line not starting with # and is ignored. Not sure why it was designed like this. I'll open another POC PR for Figure.text and see how it works.

gmt text -R0/3/0/5 -JX3i -h1 -M -N -F+f12,Times-Roman+jLT -pdf figure << EOF
This is an unmarked header record not starting with #
> 0 -0.5 13p 3i j
@%5%Figure 1.@%% This illustration shows nothing useful, but it still needs
a figure caption. Highlighted in @;255/0/0;red@;; you can see the locations
of cities where it is @\_impossible@\_ to get any good Thai food; these are to be avoided.
EOF

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correction: I didn't see the -h1 option in the CLI version, which skips the first line. Without -h1, the CLI version still works:

gmt text -R0/3/0/5 -JX3i -M -N -F+f12,Times-Roman+jLT -pdf figure << EOF
> 0 -0.5 13p 3i j
@%5%Figure 1.@%% This illustration shows nothing useful, but it still needs
a figure caption. Highlighted in @;255/0/0;red@;; you can see the locations
of cities where it is @\_impossible@\_ to get any good Thai food; these are to be avoided.
EOF

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is a POC example to show how to support paragraph mode via StringIO:

In [1]: import io

In [2]: stringio = io.StringIO(
   ...:     "> 0 -0.5 13p 3i j\n"
   ...:     "@%5%Figure 1.@%% This illustration shows nothing useful, but it still needs\n"
   ...:     "a figure caption. Highlighted in @;255/0/0;red@;; you can see the locations\n"
   ...:     "of cities where it is @_impossible@_ to get any good Thai food; these are to be avoided.\n"
   ...: )

In [3]: import pygmt

In [4]: fig = pygmt.Figure()

In [5]: from pygmt.clib import Session

In [6]: with Session() as lib:
   ...:     with lib.virtualfile_in(data=stringio) as vintbl:
   ...:         lib.call_module(module="text", args=f"{vintbl} -R0/3/0/5 -JX3i -M -N -F+f12,Times-Roman+jLT")
   ...:

In [7]: fig.show()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StringIO objects containing multi-segments are supported in the latest version.

string_arrays.append(line)
# Only one table and one segment. No numeric data, so n_columns is 0.
n_tables, n_segments, n_rows, n_columns = 1, 1, len(string_arrays), 0

family, geometry = "GMT_IS_DATASET", "GMT_IS_TEXT"
dataset = self.create_data(
family,
geometry,
mode="GMT_CONTAINER_ONLY|GMT_WITH_STRINGS",
dim=[n_tables, n_segments, n_rows, n_columns],
)
dataset = ctp.cast(dataset, ctp.POINTER(_GMT_DATASET))
# Assign the strings to the segment
seg = dataset.contents.table[0].contents.segment[0].contents
if header is not None:
seg.header = header.encode()

Check warning on line 1680 in pygmt/clib/session.py

View check run for this annotation

Codecov / codecov/patch

pygmt/clib/session.py#L1680

Added line #L1680 was not covered by tests
seg.text = strings_to_ctypes_array(string_arrays)

with self.open_virtualfile(family, geometry, "GMT_IN", dataset) as vfile:
try:
yield vfile
finally:
# Must set the text to None to avoid double freeing the memory
seg.text = None

def virtualfile_in( # noqa: PLR0912
self,
check_kind=None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then the question is, how can we add StringIO support to specific functions only (e.g., Figure.legend)?

Maybe legend should have a special check_kind? E.g. if check_kind("legend"): valid_kinds += ("stringio", ...)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #3438 for the POC PR for Figure.legend. Since we already checked the data kind before entering the session, we can have check_kind=False in Figure.legend.

Expand Down Expand Up @@ -1696,6 +1780,7 @@
"geojson": tempfile_from_geojson,
"grid": self.virtualfile_from_grid,
"image": tempfile_from_image,
"stringio": self.virtualfile_from_stringio,
# Note: virtualfile_from_matrix is not used because a matrix can be
# converted to vectors instead, and using vectors allows for better
# handling of string type inputs (e.g. for datetime data types)
Expand All @@ -1704,7 +1789,7 @@
}[kind]

# Ensure the data is an iterable (Python list or tuple)
if kind in {"geojson", "grid", "image", "file", "arg"}:
if kind in {"geojson", "grid", "image", "file", "arg", "stringio"}:
if kind == "image" and data.dtype != "uint8":
msg = (
f"Input image has dtype: {data.dtype} which is unsupported, "
Expand Down
16 changes: 13 additions & 3 deletions pygmt/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Utilities and common tasks for wrapping the GMT modules.
"""

import io
import os
import pathlib
import shutil
Expand Down Expand Up @@ -188,8 +189,10 @@ def _check_encoding(

def data_kind(
data: Any = None, required: bool = True
) -> Literal["arg", "file", "geojson", "grid", "image", "matrix", "vectors"]:
"""
) -> Literal[
"arg", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors"
]:
r"""
Check the kind of data that is provided to a module.

The ``data`` argument can be in any type, but only following types are supported:
Expand Down Expand Up @@ -222,6 +225,7 @@ def data_kind(
>>> import numpy as np
>>> import xarray as xr
>>> import pathlib
>>> import io
>>> data_kind(data=None)
'vectors'
>>> data_kind(data=np.arange(10).reshape((5, 2)))
Expand All @@ -240,8 +244,12 @@ def data_kind(
'grid'
>>> data_kind(data=xr.DataArray(np.random.rand(3, 4, 5)))
'image'
>>> data_kind(data=io.StringIO("TEXT1\nTEXT23\n"))
'stringio'
"""
kind: Literal["arg", "file", "geojson", "grid", "image", "matrix", "vectors"]
kind: Literal[
"arg", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors"
]
if isinstance(data, str | pathlib.PurePath) or (
isinstance(data, list | tuple)
and all(isinstance(_file, str | pathlib.PurePath) for _file in data)
Expand All @@ -250,6 +258,8 @@ def data_kind(
kind = "file"
elif isinstance(data, bool | int | float) or (data is None and not required):
kind = "arg"
elif isinstance(data, io.StringIO):
kind = "stringio"
elif isinstance(data, xr.DataArray):
kind = "image" if len(data.dims) == 3 else "grid"
elif hasattr(data, "__geo_interface__"):
Expand Down