-
Notifications
You must be signed in to change notification settings - Fork 218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add the Session.virtualfile_from_stringio method to allow StringIO input for certain functions/methods #3326
Changes from all commits
5e22d55
f75844e
50467b5
14a14e9
37c3c2f
4e2e545
8e1a609
55d6e81
b3b2cb9
89757ec
0489783
791e4f6
5f4d21f
1a2d336
3682098
21b2496
146e430
c589a40
ff90b2e
90455c1
4e4bd2d
deb917d
49fa805
97bbe05
dece315
486fce7
021a97a
f6da405
824d861
640e9a9
3161963
026f6e4
850337e
ed20118
f1b5f08
e10cef6
22d4d55
5ef08e3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
|
||
import contextlib | ||
import ctypes as ctp | ||
import io | ||
import pathlib | ||
import sys | ||
import warnings | ||
|
@@ -60,6 +61,7 @@ | |
"GMT_IS_PLP", # items could be any one of POINT, LINE, or POLY | ||
"GMT_IS_SURFACE", # items are 2-D grid | ||
"GMT_IS_VOLUME", # items are 3-D grid | ||
"GMT_IS_TEXT", # Text strings which triggers ASCII text reading | ||
] | ||
|
||
METHODS = [ | ||
|
@@ -70,6 +72,11 @@ | |
DIRECTIONS = ["GMT_IN", "GMT_OUT"] | ||
|
||
MODES = ["GMT_CONTAINER_ONLY", "GMT_IS_OUTPUT"] | ||
MODE_MODIFIERS = [ | ||
"GMT_GRID_IS_CARTESIAN", | ||
"GMT_GRID_IS_GEO", | ||
"GMT_WITH_STRINGS", | ||
] | ||
|
||
REGISTRATIONS = ["GMT_GRID_PIXEL_REG", "GMT_GRID_NODE_REG"] | ||
|
||
|
@@ -728,7 +735,7 @@ def create_data( | |
mode_int = self._parse_constant( | ||
mode, | ||
valid=MODES, | ||
valid_modifiers=["GMT_GRID_IS_CARTESIAN", "GMT_GRID_IS_GEO"], | ||
valid_modifiers=MODE_MODIFIERS, | ||
) | ||
geometry_int = self._parse_constant(geometry, valid=GEOMETRIES) | ||
registration_int = self._parse_constant(registration, valid=REGISTRATIONS) | ||
|
@@ -1603,6 +1610,100 @@ def virtualfile_from_grid(self, grid): | |
with self.open_virtualfile(*args) as vfile: | ||
yield vfile | ||
|
||
@contextlib.contextmanager | ||
def virtualfile_from_stringio(self, stringio: io.StringIO): | ||
r""" | ||
Store a :class:`io.StringIO` object in a virtual file. | ||
|
||
Store the contents of a :class:`io.StringIO` object in a GMT_DATASET container | ||
and create a virtual file to pass to a GMT module. | ||
|
||
For simplicity, currently we make following assumptions in the StringIO object | ||
|
||
- ``"#"`` indicates a comment line. | ||
- ``">"`` indicates a segment header. | ||
|
||
Parameters | ||
---------- | ||
stringio | ||
The :class:`io.StringIO` object containing the data to be stored in the | ||
virtual file. | ||
|
||
Yields | ||
------ | ||
fname | ||
The name of the virtual file. | ||
|
||
Examples | ||
-------- | ||
>>> import io | ||
>>> from pygmt.clib import Session | ||
>>> # A StringIO object containing legend specifications | ||
>>> stringio = io.StringIO( | ||
... "# Comment\n" | ||
... "H 24p Legend\n" | ||
... "N 2\n" | ||
... "S 0.1i c 0.15i p300/12 0.25p 0.3i My circle\n" | ||
... ) | ||
>>> with Session() as lib: | ||
... with lib.virtualfile_from_stringio(stringio) as fin: | ||
... lib.virtualfile_to_dataset(vfname=fin, output_type="pandas") | ||
0 | ||
0 H 24p Legend | ||
1 N 2 | ||
2 S 0.1i c 0.15i p300/12 0.25p 0.3i My circle | ||
""" | ||
# Parse the io.StringIO object. | ||
segments = [] | ||
current_segment = {"header": "", "data": []} | ||
for line in stringio.getvalue().splitlines(): | ||
if line.startswith("#"): # Skip comments | ||
continue | ||
if line.startswith(">"): # Segment header | ||
if current_segment["data"]: # If we have data, start a new segment | ||
segments.append(current_segment) | ||
current_segment = {"header": "", "data": []} | ||
current_segment["header"] = line.strip(">").lstrip() | ||
else: | ||
current_segment["data"].append(line) # type: ignore[attr-defined] | ||
if current_segment["data"]: # Add the last segment if it has data | ||
segments.append(current_segment) | ||
|
||
# One table with one or more segments. | ||
# n_rows is the maximum number of rows/records for all segments. | ||
# n_columns is the number of numeric data columns, so it's 0 here. | ||
n_tables = 1 | ||
n_segments = len(segments) | ||
n_rows = max(len(segment["data"]) for segment in segments) | ||
n_columns = 0 | ||
|
||
# Create the GMT_DATASET container | ||
family, geometry = "GMT_IS_DATASET", "GMT_IS_TEXT" | ||
dataset = self.create_data( | ||
family, | ||
geometry, | ||
mode="GMT_CONTAINER_ONLY|GMT_WITH_STRINGS", | ||
dim=[n_tables, n_segments, n_rows, n_columns], | ||
) | ||
dataset = ctp.cast(dataset, ctp.POINTER(_GMT_DATASET)) | ||
table = dataset.contents.table[0].contents | ||
for i, segment in enumerate(segments): | ||
seg = table.segment[i].contents | ||
if segment["header"]: | ||
seg.header = segment["header"].encode() # type: ignore[attr-defined] | ||
seg.text = strings_to_ctypes_array(segment["data"]) | ||
|
||
with self.open_virtualfile(family, geometry, "GMT_IN", dataset) as vfile: | ||
try: | ||
yield vfile | ||
finally: | ||
# Must set the pointers to None to avoid double freeing the memory. | ||
# Maybe upstream bug. | ||
for i in range(n_segments): | ||
seg = table.segment[i].contents | ||
seg.header = None | ||
seg.text = None | ||
|
||
def virtualfile_in( # noqa: PLR0912 | ||
self, | ||
check_kind=None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #3438 for the POC PR for |
||
|
@@ -1696,6 +1797,7 @@ def virtualfile_in( # noqa: PLR0912 | |
"geojson": tempfile_from_geojson, | ||
"grid": self.virtualfile_from_grid, | ||
"image": tempfile_from_image, | ||
"stringio": self.virtualfile_from_stringio, | ||
# Note: virtualfile_from_matrix is not used because a matrix can be | ||
# converted to vectors instead, and using vectors allows for better | ||
# handling of string type inputs (e.g. for datetime data types) | ||
|
@@ -1704,7 +1806,7 @@ def virtualfile_in( # noqa: PLR0912 | |
}[kind] | ||
|
||
# Ensure the data is an iterable (Python list or tuple) | ||
if kind in {"geojson", "grid", "image", "file", "arg"}: | ||
if kind in {"geojson", "grid", "image", "file", "arg", "stringio"}: | ||
if kind == "image" and data.dtype != "uint8": | ||
msg = ( | ||
f"Input image has dtype: {data.dtype} which is unsupported, " | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is only ASCII supported for now? What about other encodings?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GMT only accepts ASCII text strings. Any non-ASCII characters should be written in the form of octal codes.