diff --git a/.github/workflows/run_tests_osx.yml b/.github/workflows/run_tests_osx.yml index 7919887794..cf87b35bb4 100644 --- a/.github/workflows/run_tests_osx.yml +++ b/.github/workflows/run_tests_osx.yml @@ -7,7 +7,7 @@ name: Run macOS-based netCDF Tests -on: [pull_request, workflow_dispatch] +on: [push,pull_request, workflow_dispatch] jobs: diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml index 5f18aa1e0d..a6d25dec0e 100644 --- a/.github/workflows/run_tests_ubuntu.yml +++ b/.github/workflows/run_tests_ubuntu.yml @@ -4,7 +4,7 @@ name: Run Ubuntu/Linux netCDF Tests -on: [pull_request, workflow_dispatch] +on: [push,pull_request, workflow_dispatch] jobs: @@ -42,7 +42,7 @@ jobs: wget https://support.hdfgroup.org/ftp/HDF/releases/HDF4.2.15/src/hdf-4.2.15.tar.bz2 tar -jxf hdf-4.2.15.tar.bz2 pushd hdf-4.2.15 - ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib + ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib make -j make install -j popd @@ -164,7 +164,7 @@ jobs: - name: Configure shell: bash -l {0} - run: CFLAGS=${CFLAGS} LDFLAGS=${LDFLAGS} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} ./configure --enable-hdf4 --enable-hdf5 --enable-dap --disable-dap-remote-tests --enable-doxygen + run: CFLAGS=${CFLAGS} LDFLAGS=${LDFLAGS} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} ./configure --enable-hdf4 --enable-hdf5 --enable-dap --disable-dap-remote-tests --enable-doxygen --enable-external-server-tests if: ${{ success() }} - name: Look at config.log if error diff --git a/.github/workflows/run_tests_win_mingw.yml b/.github/workflows/run_tests_win_mingw.yml index 46771aae07..773819c025 100644 --- a/.github/workflows/run_tests_win_mingw.yml +++ b/.github/workflows/run_tests_win_mingw.yml @@ -7,7 +7,7 @@ name: Run MSYS2, MinGW64-based Tests -on: [pull_request, workflow_dispatch] +on: [push,pull_request, workflow_dispatch] jobs: diff --git a/CMakeLists.txt b/CMakeLists.txt index 5dac91dd55..2db8dea02e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1092,9 +1092,12 @@ ENDIF() # Option to Enable DAP long tests, remote tests. OPTION(ENABLE_DAP_REMOTE_TESTS "Enable DAP remote tests." ON) +OPTION(ENABLE_EXTERNAL_SERVER_TESTS "Enable external Server remote tests." OFF) OPTION(ENABLE_DAP_LONG_TESTS "Enable DAP long tests." OFF) SET(REMOTETESTSERVERS "remotetest.unidata.ucar.edu" CACHE STRING "test servers to use for remote test") +SET(REMOTETESTSERVERS "remotetest.unidata.ucar.edu" CACHE STRING "test servers to use for remote test") + # See if we have zlib FIND_PACKAGE(ZLIB) @@ -1731,7 +1734,7 @@ ENDIF() # Set some of the options as advanced. MARK_AS_ADVANCED(ENABLE_INTERNAL_DOCS VALGRIND_TESTS ENABLE_COVERAGE_TESTS ) -MARK_AS_ADVANCED(ENABLE_DAP_REMOTE_TESTS ENABLE_DAP_LONG_TESTS USE_REMOTE_CDASH) +MARK_AS_ADVANCED(ENABLE_DAP_REMOTE_TESTS ENABLE_DAP_LONG_TESTS USE_REMOTE_CDASH ENABLE_EXTERNAL_SERVER_TESTS) MARK_AS_ADVANCED(ENABLE_DOXYGEN_BUILD_RELEASE_DOCS DOXYGEN_ENABLE_TASKS ENABLE_DOXYGEN_SERVER_SIDE_SEARCH) MARK_AS_ADVANCED(ENABLE_SHARED_LIBRARY_VERSION) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index cdac66eadf..2c7c8efe51 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,7 +7,9 @@ This file contains a high-level description of this package's evolution. Release ## 4.9.1 - T.B.D. -* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????). +* [Enhancement] Add support for Zarr (fixed length) string type in nczarr. See [Github #2492](https://github.com/Unidata/netcdf-c/pull/2492). +* [Bug Fix] Split the remote tests into two parts: one for the remotetest server and one for all other external servers. Also add a configure option to enable the latter set. See [Github #2491](https://github.com/Unidata/netcdf-c/pull/2491). +* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #2462](https://github.com/Unidata/netcdf-c/pull/2466). * [Bug Fix] Provide a default enum const when fill value does not match any enum constant for the value zero. See [Github #2462](https://github.com/Unidata/netcdf-c/pull/2462). * [Bug Fix] Fix the json submodule symbol conflicts between libnetcdf and the plugin specific netcdf_json.h. See [Github #2448](https://github.com/Unidata/netcdf-c/pull/2448). * [Bug Fix] Fix quantize with CLASSIC_MODEL files. See [Github #2405](https://github.com/Unidata/netcdf-c/pull/2445). @@ -27,7 +29,7 @@ This file contains a high-level description of this package's evolution. Release * [Enhancement] Allow the read/write of JSON-valued Zarr attributes to allow for domain specific info such as used by GDAL/Zarr. See [Github #2278](https://github.com/Unidata/netcdf-c/pull/2278). * [Enhancement] Turn on the XArray convention for NCZarr files by default. WARNING, this means that the mode should explicitly specify "nczarr" or "zarr" even if "xarray" or "noxarray" is specified. See [Github #2257](https://github.com/Unidata/netcdf-c/pull/2257). - +* [Enhancement] Update the documentation to match the current filter capabilities See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249). * [Enhancement] Update the documentation to match the current filter capabilities. See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249). * [Enhancement] Support installation of pre-built standard filters into user-specified location. See [Github #2318](https://github.com/Unidata/netcdf-c/pull/2318). * [Enhancement] Improve filter support. More specifically (1) add nc_inq_filter_avail to check if a filter is available, (2) add the notion of standard filters, (3) cleanup szip support to fix interaction with NCZarr. See [Github #2245](https://github.com/Unidata/netcdf-c/pull/2245). diff --git a/configure.ac b/configure.ac index 1a1d7615e3..f83ea3dd8d 100644 --- a/configure.ac +++ b/configure.ac @@ -604,31 +604,52 @@ AM_CONDITIONAL(ENABLE_QUANTIZE, [test x$enable_quantize = xyes]) # --enable-dap => enable-dap4 enable_dap4=$enable_dap -AC_MSG_CHECKING([whether dap remote testing should be enabled]) +AC_MSG_CHECKING([whether dap use of remotetest server should be enabled]) AC_ARG_ENABLE([dap-remote-tests], - [AS_HELP_STRING([--enable-dap-remote-tests], - [enable dap remote tests])]) -test "x$enable_dap_remote_tests" = xno || enable_dap_remote_tests=yes + [AS_HELP_STRING([--disable-dap-remote-tests], + [disable dap remote tests])]) +# Default off +test "x$enable_dap_remote_tests" = xyes || enable_dap_remote_tests=no if test "x$enable_dap" = "xno" ; then enable_dap_remote_tests=no fi AC_MSG_RESULT($enable_dap_remote_tests) +AC_MSG_CHECKING([whether dap use of remotetest server should be enabled]) +AC_ARG_ENABLE([dap-remote-tests], + [AS_HELP_STRING([--disable-dap-remote-tests], + [disable dap remote tests])]) +test "x$enable_dap_remote_tests" = xno || enable_dap_remote_tests=yes +AC_MSG_RESULT($enable_dap_remote_tests) + +AC_MSG_CHECKING([whether use of external servers should be enabled]) +AC_ARG_ENABLE([external-server-tests], + [AS_HELP_STRING([--enable-external-server-tests (default off)], + [enable external server tests])]) +test "x$enable_external_server_tests" = xyes || enable_external_server_tests=no +AC_MSG_RESULT($enable_external_server_tests) + +if test "x$enable_dap_remote_tests" = "xno" ; then + AC_MSG_NOTICE([--disable-dap_remote_tests => --disable-external-server-tests]) + enable_external_server_tests=no +fi + # Default is not to do the remote authorization tests. -AC_MSG_CHECKING([whether dap remote authorization testing should be enabled (default off)]) +AC_MSG_CHECKING([whether dap authorization testing should be enabled (default off)]) AC_ARG_ENABLE([dap-auth-tests], [AS_HELP_STRING([--enable-dap-auth-tests], [enable dap remote authorization tests])]) test "x$enable_dap_auth_tests" = xyes || enable_dap_auth_tests=no +AC_MSG_RESULT($enable_dap_auth_tests) + # dap must be enabled + if test "x$enable_dap" = "xno" ; then - enable_dap_auth_tests=no -fi -# if remote tests are disabled, then so is this -if test "x$enable_dap_remote_tests" = "xno" ; then + AC_MSG_NOTICE([--disable-dap => --disable-dap-remote-tests --disable-auth-tests --disable-external-server-tests]) enable_dap_remote_tests=no + enable_dap_auth_tests=no + enable_external_server_tests=no fi -AC_MSG_RESULT($enable_dap_auth_tests) # Did the user specify a list of test servers to try for remote tests? AC_MSG_CHECKING([which remote test server(s) to use]) @@ -653,16 +674,20 @@ fi if test "x$enable_dap_remote_tests" = xyes; then AC_DEFINE([ENABLE_DAP_REMOTE_TESTS], [1], [if true, do remote tests]) fi +if test "x$enable_external_server_tests" = xyes; then + AC_DEFINE([ENABLE_EXTERNAL_SERVER_TESTS], [1], [if true, do remote external tests]) +fi AC_MSG_CHECKING([whether the time-consuming dap tests should be enabled (default off)]) AC_ARG_ENABLE([dap-long-tests], [AS_HELP_STRING([--enable-dap-long-tests], [enable dap long tests])]) test "x$enable_dap_long_tests" = xyes || enable_dap_long_tests=no -if test "x$enable_dap_remote_tests" = "xno" ; then +AC_MSG_RESULT([$enable_dap_long_tests]) +if test "x$enable_dap_remote_tests" = "xno" || test "x$enable_external_server_tests" = "xno" ; then + AC_MSG_NOTICE([--disable-dap-remote|external-server-tests => --disable_dap_long_tests]) enable_dap_long_tests=no fi -AC_MSG_RESULT([$enable_dap_long_tests]) # Control zarr storage if test "x$enable_nczarr" = xyes ; then @@ -1758,6 +1783,7 @@ AM_CONDITIONAL(ENABLE_DAP4, [test "x$enable_dap4" = xyes]) AM_CONDITIONAL(USE_STRICT_NULL_BYTE_HEADER_PADDING, [test x$enable_strict_null_byte_header_padding = xyes]) AM_CONDITIONAL(ENABLE_CDF5, [test "x$enable_cdf5" = xyes]) AM_CONDITIONAL(ENABLE_DAP_REMOTE_TESTS, [test "x$enable_dap_remote_tests" = xyes]) +AM_CONDITIONAL(ENABLE_EXTERNAL_SERVER_TESTS, [test "x$enable_external_server_tests" = xyes]) AM_CONDITIONAL(ENABLE_DAP_AUTH_TESTS, [test "x$enable_dap_auth_tests" = xyes]) AM_CONDITIONAL(ENABLE_DAP_LONG_TESTS, [test "x$enable_dap_long_tests" = xyes]) AM_CONDITIONAL(USE_PNETCDF_DIR, [test ! "x$PNETCDFDIR" = x]) diff --git a/dap4_test/test_thredds.sh b/dap4_test/test_thredds.sh index 99cbcccc32..eae63d63ea 100755 --- a/dap4_test/test_thredds.sh +++ b/dap4_test/test_thredds.sh @@ -28,6 +28,7 @@ failure() { setresultdir results_test_thredds if test "x${RESET}" = x1 ; then rm -fr ${BASELINEH}/*.thredds ; fi +if test "x$FEATURE_THREDDSTEST" = x1 ; then for f in $F ; do makeurl "dap4://thredds-test.unidata.ucar.edu/thredds/dap4/casestudies" "$f" echo "testing: $URL" @@ -43,6 +44,7 @@ for f in $F ; do cp ./results_test_thredds/${base}.thredds ${BASELINETH}/${base}.thredds fi done +fi # FEATURE_THREDDSTEST echo "*** Pass" exit 0 diff --git a/docs/nczarr.md b/docs/nczarr.md index 95e6c55895..eb7cd46b26 100644 --- a/docs/nczarr.md +++ b/docs/nczarr.md @@ -13,12 +13,10 @@ This extension provides a mapping from a subset of the full netCDF Enhanced (aka The NetCDF version of this storage format is called NCZarr [4]. A note on terminology in this document. - 1. The term "dataset" is used to refer to all of the Zarr objects constituting the meta-data and data. There are some important "caveats" of which to be aware when using this software. - 1. NCZarr currently is not thread-safe. So any attempt to use it with parallelism, including MPIO, is likely to fail. # The NCZarr Data Model {#nczarr_data_model} @@ -35,28 +33,29 @@ Specifically the XArray ''\_ARRAY\_DIMENSIONS'' attribute is one such. There are two other, secondary assumption: 1. The actual storage format in which the dataset is stored -- a zip file, for example -- can be read by the _Zarr_ implementation. -2. The filters used by the dataset can be encoded/decoded by the implementation. +2. The compressors (aka filters) used by the dataset can be encoded/decoded by the implementation. NCZarr uses HDF5-style filters, so ensuring access to such filters is somewhat complicated. See [the companion document on +filters](./md_filters.html "filters") for details. -Briefly, the data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. -As with netcdf-4 chunking is supported. -Filters and compression are supported, but -[the companion document on filters](./md_filters.html "filters") -should be consulted for the details. +Briefly, the data model supported by NCZarr is netcdf-4 minus +the user-defined types. However, a restricted form of String type +is supported (see Appendix H). +As with netcdf-4 chunking is supported. Filters and compression +are also [supported](./md_filters.html "filters"). Specifically, the model supports the following. -- "Atomic" types: char, byte, ubyte, short, ushort, int, uint, int64, uint64. +- "Atomic" types: char, byte, ubyte, short, ushort, int, uint, int64, uint64, string. - Shared (named) dimensions - Attributes with specified types -- both global and per-variable - Chunking - Fill values - Groups - N-Dimensional variables +- Scalar variables - Per-variable endianness (big or little) - Filters (including compression) With respect to full netCDF-4, the following concepts are currently unsupported. -- String type - User-defined types (enum, opaque, VLEN, and Compound) - Unlimited dimensions - Contiguous or compact storage @@ -66,6 +65,15 @@ because they are HDF5 specific. When specified, they are treated as chunked where the file consists of only one chunk. This means that testing for contiguous or compact is not possible; the _nc_inq_var_chunking_ function will always return NC_CHUNKED and the chunksizes will be the same as the dimension sizes of the variable's dimensions. +Additionally, it should be noted that NCZarr supports scalar variables, +but Zarr does not; Zarr only supports dimensioned variables. +In order to support interoperability, NCZarr does the following. +1. A scalar variable is recorded in the Zarr metadata as if it has a shape of **[1]**. +2. A note is stored in the NCZarr metadata that this is actually a netCDF scalar variable. + +These actions allow NCZarr to properly show scalars in its API while still +maintaining compatibility with Zarr. + # Enabling NCZarr Support {#nczarr_enable} NCZarr support is enabled by default. @@ -322,7 +330,6 @@ aws_secret_access_key=YYYY... ``` See Appendix E for additional information. - ## Addressing Style The notion of "addressing style" may need some expansion. @@ -378,14 +385,14 @@ of NCZarr specific information. These keys are as follows: -_\_NCZARR_SUPERBLOCK\__ -- this is in the top level group -- key _/.zarr_. +_\_nczarr_superblock\__ -- this is in the top level group -- key _/.zarr_. It is in effect the "superblock" for the dataset and contains any netcdf specific dataset level information. It is also used to verify that a given key is the root of a dataset. Currently it contains the following key(s): * "version" -- the NCZarr version defining the format of the dataset. -_\_NCZARR_GROUP\__ -- this key appears in every _.zgroup_ object. +_\_nczarr_group\__ -- this key appears in every _.zgroup_ object. It contains any netcdf specific group information. Specifically it contains the following keys: * "dims" -- the name and size of shared dimensions defined in this group. @@ -393,13 +400,13 @@ Specifically it contains the following keys: * "groups" -- the name of sub-groups defined in this group. These lists allow walking the NCZarr dataset without having to use the potentially costly search operation. -_\_NCZARR_ARRAY\__ -- this key appears in every _.zarray_ object. +_\_nczarr_array\__ -- this key appears in every _.zarray_ object. It contains netcdf specific array information. Specifically it contains the following keys: * dimrefs -- the names of the shared dimensions referenced by the variable. * storage -- indicates if the variable is chunked vs contiguous in the netcdf sense. -_\_NCZARR_ATTR\__ -- this key appears in every _.zattr_ object. +_\_nczarr_attr\__ -- this key appears in every _.zattr_ object. This means that technically, it is attribute, but one for which access is normally surpressed . Specifically it contains the following keys: @@ -412,17 +419,17 @@ The latter case, zarr reading nczarr is possible if the zarr library is willing The former case, nczarr reading zarr is also possible if the nczarr can simulate or infer the contents of the missing _\_NCZARR\_XXX_ objects. As a rule this can be done as follows. -1. _\_NCZARR_GROUP\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. +1. _\_nczarr_group\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. The search looks for occurrences of _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables). Constructing the set of "shared dimensions" is carried out by walking all the variables in the whole dataset and collecting the set of unique integer shapes for the variables. For each such dimension length, a top level dimension is created named ".zdim_" where len is the integer length. -2. _\_NCZARR_ARRAY\__ -- The dimrefs are inferred by using the shape +2. _\_nczarr_array\__ -- The dimrefs are inferred by using the shape in _.zarray_ and creating references to the simulated shared dimension. netcdf specific information. -3. _\_NCZARR_ATTR\__ -- The type of each attribute is inferred by trying to parse the first attribute value string. +3. _\_nczarr_attr\__ -- The type of each attribute is inferred by trying to parse the first attribute value string. # Compatibility {#nczarr_compatibility} @@ -434,7 +441,7 @@ The Xarray [7] Zarr implementation uses its own mechan It uses a special attribute named ''_ARRAY_DIMENSIONS''. The value of this attribute is a list of dimension names (strings). An example might be ````["time", "lon", "lat"]````. -It is essentially equivalent to the ````_NCZARR_ARRAY "dimrefs" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset. +It is essentially equivalent to the ````_nczarr_array "dimrefs" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset. As of _netcdf-c_ version 4.8.2, The Xarray ''_ARRAY_DIMENSIONS'' attribute is supported for both NCZarr and pure Zarr. If possible, this attribute will be read/written by default, @@ -778,34 +785,169 @@ The version 1 format defines three specific objects: _.nczgroup_, _.nczarray_,_. These are stored in parallel with the corresponding Zarr objects. So if there is a key of the form "/x/y/.zarray", then there is also a key "/x/y/.nczarray". The content of these objects is the same as the contents of the corresponding keys. So the value of the ''_NCZARR_ARRAY'' key is the same as the content of the ''.nczarray'' object. The list of connections is as follows: -* ''.nczarr'' <=> ''_NCZARR_SUPERBLOCK_'' -* ''.nczgroup <=> ''_NCZARR_GROUP_'' -* ''.nczarray <=> ''_NCZARR_ARRAY_'' -* ''.nczattr <=> ''_NCZARR_ATTR_'' +* ''.nczarr'' <=> ''_nczarr_superblock_'' +* ''.nczgroup <=> ''_nczarr_group_'' +* ''.nczarray <=> ''_nczarr_array_'' +* ''.nczattr <=> ''_nczarr_attr_'' # Appendix G. JSON Attribute Convention. {#nczarr_json} -An attribute may be encountered on read whose value when parsed -by JSON is a dictionary. As a special conventions, the value -converted to a string and stored as the value of the attribute -and the type of the attribute is treated as char. - -When writing a character valued attribute, it's value is examined -to see if it looks like a JSON dictionary (i.e. "{...}") -and is parseable as JSON. -If so, then the attribute value is treated as one long string, -parsed as JSON, and stored in the .zattr file in JSON form. +The Zarr V2 specification is somewhat vague on what is a legal +value for an attribute. The examples all show one of two cases: +1. A simple JSON scalar atomic values (e.g. int, float, char, etc), or +2. A JSON array of such values. -These conventions are intended to help support various +However, the Zarr specification can be read to infer that the value +can in fact be any legal JSON expression. +This "convention" is currently used routinely to help support various attributes created by other packages where the attribute is a -complex JSON dictionary. An example is the GDAL Driver -convention [12]. The value is a complex -JSON dictionary and it is desirable to both read and write that kind of -information through the netcdf API. +complex JSON expression. An example is the GDAL Driver +convention [12], where the value is a complex +JSON dictionary. + +In order for NCZarr to be as consistent as possible with Zarr Version 2, +it is desirable to support this convention for attribute values. +This means that there must be some way to handle an attribute +whose value is not either of the two cases above. That is, its value +is some more complex JSON expression. Ideally both reading and writing +of such attributes should be supported. + +One more point. NCZarr attempts to record the associated netcdf +attribute type (encoded in the form of a NumPy "dtype") for each +attribute. This information is stored as NCZarr-specific +metadata. Note that pure Zarr makes no attempt to record such +type information. + +The current algorithm to support JSON valued attributes +operates as follows. + +## Writing an attribute: +There are mutiple cases to consider. + +1. The netcdf attribute **is not** of type NC_CHAR and its value is a single atomic value. + * Convert to an equivalent JSON atomic value and write that JSON expression. + * Compute the Zarr equivalent dtype and store in the NCZarr metadata. + +2. The netcdf attribute **is not** of type NC_CHAR and its value is a vector of atomic values. + * Convert to an equivalent JSON array of atomic values and write that JSON expression. + * Compute the Zarr equivalent dtype and store in the NCZarr metadata. + +3. The netcdf attribute **is** of type NC_CHAR and its value – taken as a single sequence of characters – +**is** parseable as a legal JSON expression. + * Parse to produce a JSON expression and write that expression. + * Use "|S1" as the dtype and store in the NCZarr metadata. + +4. The netcdf attribute **is** of type NC_CHAR and its value – taken as a single sequence of characters – +**is not** parseable as a legal JSON expression. + * Convert to a JSON string and write that expression + * Use "|S1" as the dtype and store in the NCZarr metadata. + +## Reading an attribute: + +The process of reading and interpreting an attribute value requires two +pieces of information. +* The value of the attribute as a JSON expression, and +* The optional associated dtype of the attribute; note that this may not exist +if, for example, the file is pure zarr. + +Given these two pieces of information, the read process is as follows. + +1. The JSON expression is a simple JSON atomic value. + * If the dtype is defined, then convert the JSON to that type of data, +and then store it as the equivalent netcdf vector of size one. + * If the dtype is not defined, then infer the dtype based on the the JSON value, +and then store it as the equivalent netcdf vector of size one. + +2. The JSON expression is an array of simple JSON atomic values. + * If the dtype is defined, then convert each JSON value in the array to that type of data, +and then store it as the equivalent netcdf vector. + * If the dtype is not defined, then infer the dtype based on the first JSON value in the array, +and then store it as the equivalent netcdf vector. + +3. The JSON expression is an array some of whose values are dictionaries or (sub-)arrays. + * Un-parse the expression to an equivalent sequence of characters, and then store it as of type NC_CHAR. + +3. The JSON expression is a dictionary. + * Un-parse the expression to an equivalent sequence of characters, and then store it as of type NC_CHAR. + +## Notes + +1. If a character valued attributes's value can be parsed as a legal JSON expression, then it will be stored as such. +2. Reading and writing are *almost* idempotent in that the sequence of +actions "read-write-read" is equivalent to a single "read" and "write-read-write" is equivalent to a single "write". +The "almost" caveat is necessary because (1) whitespace may be added or lost during the sequence of operations, +and (2) numeric precision may change. + +# Appendix H. Support for string types + +Zarr supports a string type, but it is restricted to +fixed size strings. NCZarr also supports such strings, +but there are some differences in order to interoperate +with the netcdf-4/HDF5 variable length strings. + +The primary issue to be addressed is to provide a way for user +to specify the maximum size of the fixed length strings. This is +handled by providing the following new attributes: +1. **_nczarr_default_maxstrlen** — +This is an attribute of the root group. It specifies the default +maximum string length for string types. If not specified, then +it has the value of 64 characters. +2. **_nczarr_maxstrlen** — +This is a per-variable attribute. It specifies the maximum +string length for the string type associated with the variable. +If not specified, then it is assigned the value of +**_nczarr_default_maxstrlen**. + +Note that when accessing a string through the netCDF API, the +fixed length strings appear as variable length strings. This +means that they are stored as pointers to the string +(i.e. **char\***) and with a trailing nul character. +One consequence is that if the user writes a variable length +string through the netCDF API, and the length of that string +is greater than the maximum string length for a variable, +then the string is silently truncated. +Another consequence is that the user must reclaim the string storage. + +Adding strings also requires some hacking to handle the existing +netcdf-c NC_CHAR type, which does not exist in Zarr. The goal +was to choose NumPY types for both the netcdf-c NC_STRING type +and the netcdf-c NC_CHAR type such that if a pure zarr +implementation reads them, it will still work. + +For writing variables and NCZarr attributes, the type mapping is as follows: +* "|S1" for NC_CHAR. +* ">S1" for NC_STRING && MAXSTRLEN==1 +* ">Sn" for NC_STRING && MAXSTRLEN==n + +Note that it is a bit of a hack to use endianness, but it should be ok since for +string/char, the endianness has no meaning. + +So when reading data with a pure zarr implementaion +the above types should always appear as strings, +and the type that signals NC_CHAR (in NCZarr) +would be handled by Zarr as a string of length 1. + +# Change Log {#nczarr_changelog} + +Note, this log was only started as of 8/11/2022 and is not +intended to be a detailed chronology. Rather, it provides highlights +that will be of interest to NCZarr users. In order to see exact changes, +It is necessary to use the 'git diff' command. + +## 8/29/2022 +1. Zarr fixed-size string types are now supported. + +## 8/11/2022 +1. The NCZarr specific keys have been converted to lower-case +(e.g. "_nczarr_attr" instead of "_NCZARR_ATTR"). Upper case is +accepted for back compatibility. + +2. The legal values of an attribute has been extended to +include arbitrary JSON expressions; see Appendix G for more details. # Point of Contact {#nczarr_poc} __Author__: Dennis Heimbigner
__Email__: dmh at ucar dot edu
__Initial Version__: 4/10/2020
-__Last Revised__: 7/16/2021 +__Last Revised__: 8/27/2022 diff --git a/include/nc4internal.h b/include/nc4internal.h index 9b76c135a8..2833e0c5a1 100644 --- a/include/nc4internal.h +++ b/include/nc4internal.h @@ -106,9 +106,6 @@ typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT; /** Subset of readonly flags; readable by name only thru the API. */ #define NAMEONLYFLAG 4 -/** Subset of readonly flags; Value is actually in file. */ -#define MATERIALIZEDFLAG 8 - /** Per-variable attribute, as opposed to global */ #define VARFLAG 16 @@ -492,9 +489,13 @@ extern void NC_freeglobalstate(void); #define NC_ATT_COORDINATES "_Netcdf4Coordinates" /*see hdf5internal.h:COORDINATES*/ #define NC_ATT_FORMAT "_Format" #define NC_ATT_DIMID_NAME "_Netcdf4Dimid" +#define NC_ATT_FILLVALUE "_FillValue" #define NC_ATT_NC3_STRICT_NAME "_nc3_strict" #define NC_XARRAY_DIMS "_ARRAY_DIMENSIONS" #define NC_ATT_CODECS "_Codecs" -#define NC_NCZARR_ATTR "_NCZARR_ATTR" +#define NC_NCZARR_ATTR "_nczarr_attr" +#define NC_NCZARR_ATTR_UC "_NCZARR_ATTR" +#define NC_NCZARR_MAXSTRLEN_ATTR "_nczarr_maxstrlen" +#define NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR "_nczarr_default_maxstrlen" #endif /* _NC4INTERNAL_ */ diff --git a/include/ncconfigure.h b/include/ncconfigure.h index c0679bc304..77d34ac4c1 100644 --- a/include/ncconfigure.h +++ b/include/ncconfigure.h @@ -50,9 +50,10 @@ extern "C" { #endif /* WARNING: in some systems, these functions may be defined as macros, so check */ -#ifndef HAVE_STRDUP #ifndef strdup +#ifndef HAVE_STRDUP char* strdup(const char*); +#define HAVE_STRDUP #endif #endif @@ -120,11 +121,7 @@ unsigned long long int strtoull(const char*, char**, int); /* handle null arguments */ #ifndef nulldup -#ifdef HAVE_STRDUP #define nulldup(s) ((s)==NULL?NULL:strdup(s)) -#else -extern char *nulldup(const char* s); -#endif #endif #ifndef nulllen diff --git a/include/ncjson.h b/include/ncjson.h index c4974cfb58..32b050f06c 100644 --- a/include/ncjson.h +++ b/include/ncjson.h @@ -64,6 +64,7 @@ typedef struct NCjson { don't use union so we can know when to reclaim sval */ struct NCJconst {int bval; long long ival; double dval; char* sval;}; +#define NCJconst_empty {0,0,0.0,NULL} /**************************************************/ /* Extended API */ @@ -116,6 +117,8 @@ OPTEXPORT int NCJclone(const NCjson* json, NCjson** clonep); #ifndef NETCDF_JSON_H /* dump NCjson* object to output file */ OPTEXPORT void NCJdump(const NCjson* json, unsigned flags, FILE*); +/* convert NCjson* object to output string */ +OPTEXPORT const char* NCJtotext(const NCjson* json); #endif #if defined(__cplusplus) diff --git a/include/netcdf_json.h b/include/netcdf_json.h index d53944a8a5..33eabe4606 100644 --- a/include/netcdf_json.h +++ b/include/netcdf_json.h @@ -2,9 +2,19 @@ See the COPYRIGHT file for more information. */ + #ifndef NETCDF_JSON_H #define NETCDF_JSON_H 1 +/* +WARNING: +If you modify this file, +then you need to got to +the include/ directory +and do the command: + make makepluginjson +*/ + /* Inside libnetcdf and for plugins, export the json symbols */ #ifndef DLLEXPORT #ifdef _WIN32 @@ -54,6 +64,7 @@ typedef struct NCjson { don't use union so we can know when to reclaim sval */ struct NCJconst {int bval; long long ival; double dval; char* sval;}; +#define NCJconst_empty {0,0,0.0,NULL} /**************************************************/ /* Extended API */ @@ -106,6 +117,8 @@ OPTEXPORT int NCJclone(const NCjson* json, NCjson** clonep); #ifndef NETCDF_JSON_H /* dump NCjson* object to output file */ OPTEXPORT void NCJdump(const NCjson* json, unsigned flags, FILE*); +/* convert NCjson* object to output string */ +OPTEXPORT const char* NCJtotext(const NCjson* json); #endif #if defined(__cplusplus) @@ -140,6 +153,18 @@ OPTEXPORT void NCJdump(const NCjson* json, unsigned flags, FILE*); TODO: make utf8 safe */ +/* +WARNING: +If you modify this file, +then you need to got to +the include/ directory +and do the command: + make makenetcdfjson +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include #include #include @@ -235,7 +260,7 @@ static int bytesappendquoted(NCJbuf* buf, const char* s); static int bytesappend(NCJbuf* buf, const char* s); static int bytesappendc(NCJbuf* bufp, const char c); -/* Hide these for plugins */ +/* Hide everything for plugins */ #ifdef NETCDF_JSON_H #define OPTSTATIC static static int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp); @@ -266,10 +291,6 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) NCJparser* parser = NULL; NCjson* json = NULL; - /* Need at least 1 character of input */ - if(len == 0 || text == NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - if(jsonp == NULL) goto done; parser = calloc(1,sizeof(NCJparser)); if(parser == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} @@ -277,6 +298,16 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) if(parser->text == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} memcpy(parser->text,text,len); + /* trim trailing whitespace */ + if(len > 0) { + char* p; + for(p=parser->text+(len-1);p >= parser->text;p--) { + if(*p > ' ') break; + } + len = (size_t)((p - parser->text) + 1); + } + if(len == 0) + {stat = NCJTHROW(NCJ_ERR); goto done;} parser->text[len] = '\0'; parser->text[len+1] = '\0'; parser->pos = &parser->text[0]; @@ -285,6 +316,8 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) fprintf(stderr,"json: |%s|\n",parser->text); #endif if((stat=NCJparseR(parser,&json))==NCJ_ERR) goto done; + /* Must consume all of the input */ + if(parser->pos != (parser->text+len)) {stat = NCJ_ERR; goto done;} *jsonp = json; json = NULL; @@ -1186,16 +1219,32 @@ NCJdump(const NCjson* json, unsigned flags, FILE* out) nullfree(text); } +OPTSTATIC const char* +NCJtotext(const NCjson* json) +{ + static char outtext[4096]; + char* text = NULL; + if(json == NULL) {strcpy(outtext,""); goto done;} + (void)NCJunparse(json,0,&text); + outtext[0] = '\0'; + strlcat(outtext,text,sizeof(outtext)); + nullfree(text); +done: + return outtext; +} + /* Hack to avoid static unused warning */ -void +static void netcdf_supresswarnings(void) { void* ignore; - ignore = (void*)NCJdump; + ignore = (void*)netcdf_supresswarnings; ignore = (void*)NCJinsert; ignore = (void*)NCJaddstring; ignore = (void*)NCJcvt; ignore = (void*)NCJdictget; ignore = (void*)NCJparse; + ignore = (void*)NCJdump; + ignore = (void*)NCJtotext; ignore = ignore; } diff --git a/libdap4/ncd4dispatch.c b/libdap4/ncd4dispatch.c index f875c641e0..616f9c9051 100644 --- a/libdap4/ncd4dispatch.c +++ b/libdap4/ncd4dispatch.c @@ -43,7 +43,7 @@ static const NC_reservedatt NCD4_reserved[] = { {D4CHECKSUMATTR, READONLYFLAG|NAMEONLYFLAG}, /*_DAP4_Checksum_CRC32*/ {D4LEATTR, READONLYFLAG|NAMEONLYFLAG}, /*_DAP4_Little_Endian*/ /* Also need to include the provenance attributes */ - {NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/ + {NCPROPS, READONLYFLAG|NAMEONLYFLAG}, /*_NCProperties*/ {NULL, 0} }; diff --git a/libdispatch/dinfermodel.c b/libdispatch/dinfermodel.c index d991b9e0f5..74fd55a4fc 100644 --- a/libdispatch/dinfermodel.c +++ b/libdispatch/dinfermodel.c @@ -907,7 +907,7 @@ NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void if(!modelcomplete(model)) { const char** p = ncurifragmentparams(uri); /* envv format */ if(p != NULL) { - for(;*p;p++) { + for(;*p;p+=2) { const char* key = p[0]; const char* value = p[1];; if((stat=processfragmentkeys(key,value,model))) goto done; diff --git a/libdispatch/ncjson.c b/libdispatch/ncjson.c index b716fbdec0..0ebd515aa3 100644 --- a/libdispatch/ncjson.c +++ b/libdispatch/ncjson.c @@ -15,6 +15,9 @@ and do the command: make makenetcdfjson */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include #include #include @@ -111,7 +114,7 @@ static int bytesappendquoted(NCJbuf* buf, const char* s); static int bytesappend(NCJbuf* buf, const char* s); static int bytesappendc(NCJbuf* bufp, const char c); -/* Hide these for plugins */ +/* Hide everything for plugins */ #ifdef NETCDF_JSON_H #define OPTSTATIC static static int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp); @@ -142,10 +145,6 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) NCJparser* parser = NULL; NCjson* json = NULL; - /* Need at least 1 character of input */ - if(len == 0 || text == NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - if(jsonp == NULL) goto done; parser = calloc(1,sizeof(NCJparser)); if(parser == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} @@ -153,6 +152,16 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) if(parser->text == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} memcpy(parser->text,text,len); + /* trim trailing whitespace */ + if(len > 0) { + char* p; + for(p=parser->text+(len-1);p >= parser->text;p--) { + if(*p > ' ') break; + } + len = (size_t)((p - parser->text) + 1); + } + if(len == 0) + {stat = NCJTHROW(NCJ_ERR); goto done;} parser->text[len] = '\0'; parser->text[len+1] = '\0'; parser->pos = &parser->text[0]; @@ -161,6 +170,8 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) fprintf(stderr,"json: |%s|\n",parser->text); #endif if((stat=NCJparseR(parser,&json))==NCJ_ERR) goto done; + /* Must consume all of the input */ + if(parser->pos != (parser->text+len)) {stat = NCJ_ERR; goto done;} *jsonp = json; json = NULL; @@ -1062,17 +1073,32 @@ NCJdump(const NCjson* json, unsigned flags, FILE* out) nullfree(text); } +OPTSTATIC const char* +NCJtotext(const NCjson* json) +{ + static char outtext[4096]; + char* text = NULL; + if(json == NULL) {strcpy(outtext,""); goto done;} + (void)NCJunparse(json,0,&text); + outtext[0] = '\0'; + strlcat(outtext,text,sizeof(outtext)); + nullfree(text); +done: + return outtext; +} + /* Hack to avoid static unused warning */ static void netcdf_supresswarnings(void) { void* ignore; ignore = (void*)netcdf_supresswarnings; - ignore = (void*)NCJdump; ignore = (void*)NCJinsert; ignore = (void*)NCJaddstring; ignore = (void*)NCJcvt; ignore = (void*)NCJdictget; ignore = (void*)NCJparse; + ignore = (void*)NCJdump; + ignore = (void*)NCJtotext; ignore = ignore; } diff --git a/libdispatch/nclog.c b/libdispatch/nclog.c index 823c83228b..b474b8c4b9 100644 --- a/libdispatch/nclog.c +++ b/libdispatch/nclog.c @@ -32,7 +32,7 @@ #define MAXTAGS 256 #define NCTAGDFALT "Log"; -#define NC_MAX_FRAMES 256 +#define NC_MAX_FRAMES 1024 static int nclogginginitialized = 0; diff --git a/libnczarr/zarr.c b/libnczarr/zarr.c index 671b0c2ace..05961c7e0f 100644 --- a/libnczarr/zarr.c +++ b/libnczarr/zarr.c @@ -62,6 +62,8 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** contr &zinfo->zarr.nczarr_version.minor, &zinfo->zarr.nczarr_version.release); + zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT; + /* Apply client controls */ if((stat = applycontrols(zinfo))) goto done; @@ -126,6 +128,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls) zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG); if((zinfo->envv_controls = NCZ_clonestringvec(0,controls))==NULL) /*0=>envv style*/ {stat = NC_ENOMEM; goto done;} + zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT; /* Add struct to hold NCZ-specific group info. */ if (!(root->format_grp_info = calloc(1, sizeof(NCZ_GRP_INFO_T)))) @@ -288,6 +291,64 @@ ncz_open_rootgroup(NC_FILE_INFO_T* dataset) } #endif + +static const char* +controllookup(const char** envv_controls, const char* key) +{ + const char** p; + for(p=envv_controls;*p;p+=2) { + if(strcasecmp(key,*p)==0) { + return p[1]; + } + } + return NULL; +} + + +static int +applycontrols(NCZ_FILE_INFO_T* zinfo) +{ + int i,stat = NC_NOERR; + const char* value = NULL; + NClist* modelist = nclistnew(); + int noflags = 0; /* track non-default negative flags */ + + if((value = controllookup((const char**)zinfo->envv_controls,"mode")) != NULL) { + if((stat = NCZ_comma_parse(value,modelist))) goto done; + } + /* Process the modelist first */ + zinfo->controls.mapimpl = NCZM_DEFAULT; + zinfo->controls.flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */ + for(i=0;icontrols.flags |= (FLAG_PUREZARR); + else if(strcasecmp(p,XARRAYCONTROL)==0) + zinfo->controls.flags |= FLAG_PUREZARR; + else if(strcasecmp(p,NOXARRAYCONTROL)==0) + noflags |= FLAG_XARRAYDIMS; + else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP; + else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE; + else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3; + } + /* Apply negative controls by turning off negative flags */ + /* This is necessary to avoid order dependence of mode flags when both positive and negative flags are defined */ + zinfo->controls.flags &= (~noflags); + + /* Process other controls */ + if((value = controllookup((const char**)zinfo->envv_controls,"log")) != NULL) { + zinfo->controls.flags |= FLAG_LOGGING; + ncsetlogging(1); + } + if((value = controllookup((const char**)zinfo->envv_controls,"show")) != NULL) { + if(strcasecmp(value,"fetch")==0) + zinfo->controls.flags |= FLAG_SHOWFETCH; + } +done: + nclistfreeall(modelist); + return stat; +} + #if 0 /** @internal Rewrite attributes into a group or var @@ -332,7 +393,7 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs NCjson* k = NULL; NCjson* v = NULL; /* remove any previous version */ - if(!NCJremove(jattrs,NCZ_V2_ATTRS,&k,&v)) { + if(!NCJremove(jattrs,NCZ_V2_ATTRS,1,&k,&v)) { NCJreclaim(k); NCJreclaim(v); } } @@ -358,60 +419,5 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs } #endif -static const char* -controllookup(const char** envv_controls, const char* key) -{ - const char** p; - for(p=envv_controls;*p;p+=2) { - if(strcasecmp(key,*p)==0) { - return p[1]; - } - } - return NULL; -} - -static int -applycontrols(NCZ_FILE_INFO_T* zinfo) -{ - int i,stat = NC_NOERR; - const char* value = NULL; - NClist* modelist = nclistnew(); - int noflags = 0; /* track non-default negative flags */ - - if((value = controllookup((const char**)zinfo->envv_controls,"mode")) != NULL) { - if((stat = NCZ_comma_parse(value,modelist))) goto done; - } - /* Process the modelist first */ - zinfo->controls.mapimpl = NCZM_DEFAULT; - zinfo->controls.flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */ - for(i=0;icontrols.flags |= (FLAG_PUREZARR); - else if(strcasecmp(p,XARRAYCONTROL)==0) - zinfo->controls.flags |= FLAG_PUREZARR; - else if(strcasecmp(p,NOXARRAYCONTROL)==0) - noflags |= FLAG_XARRAYDIMS; - else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP; - else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE; - else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3; - } - /* Apply negative controls by turning off negative flags */ - /* This is necessary to avoid order dependence of mode flags when both positive and negative flags are defined */ - zinfo->controls.flags &= (~noflags); - - /* Process other controls */ - if((value = controllookup((const char**)zinfo->envv_controls,"log")) != NULL) { - zinfo->controls.flags |= FLAG_LOGGING; - ncsetlogging(1); - } - if((value = controllookup((const char**)zinfo->envv_controls,"show")) != NULL) { - if(strcasecmp(value,"fetch")==0) - zinfo->controls.flags |= FLAG_SHOWFETCH; - } -done: - nclistfreeall(modelist); - return stat; -} diff --git a/libnczarr/zarr.h b/libnczarr/zarr.h index 9f78e19297..6957bdd144 100644 --- a/libnczarr/zarr.h +++ b/libnczarr/zarr.h @@ -15,6 +15,16 @@ struct ChunkKey; struct S3credentials; +/* Intermediate results */ +struct ZCVT { + signed long long int64v; + unsigned long long uint64v; + double float64v; + char* strv; /* null terminated utf-8 */ +}; + +#define zcvt_empty {0,0,0.0,NULL} + /* zarr.c */ EXTERNL int ncz_create_dataset(NC_FILE_INFO_T*, NC_GRP_INFO_T*, const char** controls); EXTERNL int ncz_open_dataset(NC_FILE_INFO_T*, const char** controls); @@ -31,8 +41,9 @@ EXTERNL int ncz_unload_jatts(NCZ_FILE_INFO_T*, NC_OBJ* container, NCjson* jattrs EXTERNL int ncz_close_file(NC_FILE_INFO_T* file, int abort); /* zcvt.c */ -EXTERNL int NCZ_convert1(NCjson* jsrc, nc_type, unsigned char* memory0); -EXTERNL int NCZ_stringconvert1(nc_type typid, size_t len, char* src, NCjson* jvalue); +EXTERNL int NCZ_json2cvt(NCjson* jsrc, struct ZCVT* zcvt, nc_type* typeidp); +EXTERNL int NCZ_convert1(NCjson* jsrc, nc_type, NCbytes*); +EXTERNL int NCZ_stringconvert1(nc_type typid, char* src, NCjson* jvalue); EXTERNL int NCZ_stringconvert(nc_type typid, size_t len, void* data0, NCjson** jdatap); /* zsync.c */ @@ -53,9 +64,11 @@ EXTERNL int NCZ_dimkey(const NC_DIM_INFO_T* dim, char** pathp); EXTERNL int ncz_splitkey(const char* path, NClist* segments); EXTERNL int NCZ_readdict(NCZMAP* zmap, const char* key, NCjson** jsonp); EXTERNL int NCZ_readarray(NCZMAP* zmap, const char* key, NCjson** jsonp); -EXTERNL int ncz_zarr_type_name(nc_type nctype, int little, const char** znamep); -EXTERNL int ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep); -EXTERNL int ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianness); +EXTERNL int ncz_nctypedecode(const char* snctype, nc_type* nctypep); +EXTERNL int ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr,int len, char** dnamep); +EXTERNL int ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp); +EXTERNL int NCZ_inferattrtype(NCjson* value, nc_type typehint, nc_type* typeidp); +EXTERNL int NCZ_inferinttype(unsigned long long u64, int negative); EXTERNL int ncz_fill_value_sort(nc_type nctype, int*); EXTERNL int NCZ_createobject(NCZMAP* zmap, const char* key, size64_t size); EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json); @@ -73,6 +86,11 @@ EXTERNL int NCZ_ischunkname(const char* name,char dimsep); EXTERNL char* NCZ_chunkpath(struct ChunkKey key); EXTERNL int NCZ_reclaim_fill_value(NC_VAR_INFO_T* var); EXTERNL int NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp); +EXTERNL int NCZ_get_maxstrlen(NC_OBJ* obj); +EXTERNL int NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen); +EXTERNL int NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen); +EXTERNL int NCZ_copy_data(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* xtype, const void* memory, size_t count, int nofill, void* copy); +EXTERNL int NCZ_iscomplexjson(NCjson* value, nc_type typehint); /* zwalk.c */ EXTERNL int NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata); diff --git a/libnczarr/zattr.c b/libnczarr/zattr.c index 8cd4cfd7a4..c3f890d744 100644 --- a/libnczarr/zattr.c +++ b/libnczarr/zattr.c @@ -68,9 +68,11 @@ ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **at } /** - * @internal Get one of three special attributes, NCPROPS, - * ISNETCDF4ATT, and SUPERBLOCKATT. These atts are not all really in - * the file, they are constructed on the fly. + * @internal Get one of the special attributes: + * See the reserved attribute table in libsrc4/nc4internal.c. + * The special attributes are the ones marked with NAMEONLYFLAG. + * For example: NCPROPS, ISNETCDF4ATT, and SUPERBLOCKATT, and CODECS. + * These atts are not all really in the file, they are constructed on the fly. * * @param h5 Pointer to ZARR file info struct. * @param var Pointer to var info struct; NULL signals global. @@ -323,8 +325,9 @@ NCZ_del_att(int ncid, int varid, const char *name) return NC_ENOTATT; /* Reclaim the content of the attribute */ - if(att->data) + if(att->data) { if((retval = nc_reclaim_data_all(ncid,att->nc_typeid,att->data,att->len))) return retval; + } att->data = NULL; att->len = 0; @@ -426,7 +429,7 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, size_t len, const void *data, nc_type mem_type, int force) { NC* nc; - NC_FILE_INFO_T *h5; + NC_FILE_INFO_T *h5 = NULL; NC_VAR_INFO_T *var = NULL; NCindex* attlist = NULL; NC_ATT_INFO_T* att; @@ -575,7 +578,7 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, } /* If this is the _FillValue attribute, then we will also have to - * copy the value to the fill_vlue pointer of the NC_VAR_INFO_T + * copy the value to the fill_value pointer of the NC_VAR_INFO_T * struct for this var. (But ignore a global _FillValue * attribute). Also kill the cache fillchunk as no longer valid */ if (!strcmp(att->hdr.name, _FillValue) && varid != NC_GLOBAL) @@ -670,6 +673,23 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, att->data = copy; copy = NULL; } } + + /* If this is a maxstrlen attribute, then we will also have to + * sync the value to NCZ_VAR_INFO_T or NCZ_FILE_INFO_T structure */ + { + if(strcmp(att->hdr.name,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0 && varid == NC_GLOBAL && len == 1) { + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)h5->format_file_info; + if((retval = nc4_convert_type(att->data, &zfile->default_maxstrlen, file_type, NC_INT, + len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0))) + BAIL(retval); + } else if(strcmp(att->hdr.name,NC_NCZARR_MAXSTRLEN_ATTR)==0 && varid != NC_GLOBAL && len == 1) { + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + if((retval = nc4_convert_type(att->data, &zvar->maxstrlen, file_type, NC_INT, + len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0))) + BAIL(retval); + } + } + att->dirty = NC_TRUE; att->created = NC_FALSE; att->len = len; @@ -994,14 +1014,19 @@ ncz_makeattr(NC_OBJ* container, NCindex* attlist, const char* name, nc_type type NCZ_ATT_INFO_T* zatt = NULL; void* clone = NULL; size_t typesize, clonesize; + int ncid; + NC* nc = NULL; NC_GRP_INFO_T* grp = (container->sort == NCGRP ? (NC_GRP_INFO_T*)container : ((NC_VAR_INFO_T*)container)->container); + nc = grp->nc4_info->controller; + ncid = nc->ext_ncid | grp->hdr.id; + /* Duplicate the values */ if ((stat = nc4_get_typelen_mem(grp->nc4_info, typeid, &typesize))) goto done; clonesize = len*typesize; if((clone = malloc(clonesize))==NULL) {stat = NC_ENOMEM; goto done;} - memcpy(clone,values,clonesize); + if((stat = nc_copy_data(ncid, typeid, values, len, clone))) goto done; if((stat=nc4_att_list_add(attlist,name,&att))) goto done; diff --git a/libnczarr/zcache.h b/libnczarr/zcache.h index f5f7362dd2..2ef0fe8ad9 100644 --- a/libnczarr/zcache.h +++ b/libnczarr/zcache.h @@ -32,6 +32,7 @@ typedef struct NCZCacheEntry { } key; size64_t hashkey; int isfiltered; /* 1=>data contains filtered data else real data */ + int isfixedstring; /* 1 => data contains the fixed strings, 0 => data contains pointers to strings */ size64_t size; /* |data| */ void* data; /* contains either filtered or real data */ } NCZCacheEntry; diff --git a/libnczarr/zclose.c b/libnczarr/zclose.c index f1f3354b1d..cc8b4d0064 100644 --- a/libnczarr/zclose.c +++ b/libnczarr/zclose.c @@ -172,10 +172,10 @@ zclose_vars(NC_GRP_INFO_T* grp) var->filters = NULL; #endif /* Reclaim the type */ - (void)zclose_type(var->type_info); - NCZ_free_chunk_cache(zvar->cache); + if(var->type_info) (void)zclose_type(var->type_info); + if(zvar->cache) NCZ_free_chunk_cache(zvar->cache); /* reclaim xarray */ - nclistfreeall(zvar->xarray); + if(zvar->xarray) nclistfreeall(zvar->xarray); nullfree(zvar); var->format_var_info = NULL; /* avoid memory errors */ } @@ -223,13 +223,9 @@ static int zclose_type(NC_TYPE_INFO_T* type) { int stat = NC_NOERR; - NCZ_TYPE_INFO_T* ztype; assert(type && type->format_type_info != NULL); - /* Get Zarr-specific type info. */ - ztype = type->format_type_info; - nullfree(ztype); - type->format_type_info = NULL; /* avoid memory errors */ + nullfree(type->format_type_info); return stat; } diff --git a/libnczarr/zcvt.c b/libnczarr/zcvt.c index 4b59b881d0..6fd781f346 100644 --- a/libnczarr/zcvt.c +++ b/libnczarr/zcvt.c @@ -15,51 +15,121 @@ Code taken directly from libdap4/d4cvt.c */ -/* Intermediate results */ -struct ZCVT { - signed long long int64v; - unsigned long long uint64v; - double float64v; +static const int ncz_type_size[NC_MAX_ATOMIC_TYPE+1] = { +0, /*NC_NAT*/ +sizeof(char), /*NC_BYTE*/ +sizeof(char), /*NC_CHAR*/ +sizeof(short), /*NC_SHORT*/ +sizeof(int), /*NC_INT*/ +sizeof(float), /*NC_FLOAT*/ +sizeof(double), /*NC_DOUBLE*/ +sizeof(unsigned char), /*NC_UBYTE*/ +sizeof(unsigned short), /*NC_USHORT*/ +sizeof(unsigned int), /*NC_UINT*/ +sizeof(long long), /*NC_INT64*/ +sizeof(unsigned long long), /*NC_UINT64*/ +sizeof(char *), /*NC_STRING*/ }; /* Forward */ static int typeid2jtype(nc_type typeid); -/* Convert an NCJ_STRING to a memory equivalent value of specified dsttype */ +#if 0 +/* Convert a JSON value to a struct ZCVT value and also return the type */ int -NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) +NCZ_string2cvt(char* src, nc_type srctype, struct ZCVT* zcvt, nc_type* typeidp) { int stat = NC_NOERR; - nc_type srctype; - struct ZCVT zcvt; - int outofrange = 0; + nc_type dsttype = NC_NAT; - /* Convert the incoming jsrc string to a restricted set of values */ + assert(zcvt); + + /* Convert to a restricted set of values */ + switch (srctype) { + case NC_BYTE: { + zcvt->int64v = (signed long long)(*((signed char*)src)); + dsttype = NC_INT64; + } break; + case NC_UBYTE: { + zcvt->uint64v = (unsigned long long)(*((unsigned char*)src)); + dsttype = NC_UINT64; + } break; + case NC_SHORT: { + zcvt->int64v = (signed long long)(*((signed short*)src)); + dsttype = NC_INT64; + } break; + case NC_USHORT: { + zcvt->uint64v = (unsigned long long)(*((unsigned short*)src)); + dsttype = NC_UINT64; + } break; + case NC_INT: { + zcvt->int64v = (signed long long)(*((signed int*)src)); + dsttype = NC_INT64; + } break; + case NC_UINT: { + zcvt->uint64v = (unsigned long long)(*((unsigned int*)src)); + dsttype = NC_UINT64; + } break; + case NC_INT64: { + zcvt->int64v = (signed long long)(*((signed long long*)src)); + dsttype = NC_INT64; + } break; + case NC_UINT64: { + zcvt->uint64v = (unsigned long long)(*((unsigned long long*)src)); + dsttype = NC_UINT64; + } break; + case NC_FLOAT: { + zcvt->float64v = (double)(*((float*)src)); + dsttype = NC_DOUBLE; + } break; + case NC_DOUBLE: { + dsttype = NC_DOUBLE; + zcvt->float64v= (double)(*((double*)src)); + } break; + case NC_STRING: { + dsttype = NC_STRING; + zcvt->strv= *((char**)src); + } break; + default: stat = NC_EINTERNAL; goto done; + } + if(typeidp) *typeidp = dsttype; +done: + return stat; +} +#endif + +/* Warning: not free returned zcvt.strv; it may point into a string in jsrc */ +int +NCZ_json2cvt(NCjson* jsrc, struct ZCVT* zcvt, nc_type* typeidp) +{ + int stat = NC_NOERR; + nc_type srctype = NC_NAT; + + /* Convert the incoming jsrc to a restricted set of values */ switch (NCJsort(jsrc)) { case NCJ_INT: /* convert to (u)int64 */ if(NCJstring(jsrc)[0] == '-') { - if(sscanf(NCJstring(jsrc),"%lld",&zcvt.int64v) != 1) + if(sscanf(NCJstring(jsrc),"%lld",&zcvt->int64v) != 1) {stat = NC_EINVAL; goto done;} srctype = NC_INT64; } else { - if(sscanf(NCJstring(jsrc),"%llu",&zcvt.uint64v) != 1) + if(sscanf(NCJstring(jsrc),"%llu",&zcvt->uint64v) != 1) {stat = NC_EINVAL; goto done;} srctype = NC_UINT64; } break; - case NCJ_STRING: case NCJ_DOUBLE: /* Capture nan and infinity values */ if(strcasecmp(NCJstring(jsrc),"nan")==0) - zcvt.float64v = NAN; + zcvt->float64v = NAN; else if(strcasecmp(NCJstring(jsrc),"-nan")==0) - zcvt.float64v = - NAN; + zcvt->float64v = - NAN; else if(strcasecmp(NCJstring(jsrc),"infinity")==0) - zcvt.float64v = INFINITY; + zcvt->float64v = INFINITY; else if(strcasecmp(NCJstring(jsrc),"-infinity")==0) - zcvt.float64v = (- INFINITY); + zcvt->float64v = (- INFINITY); else { - if(sscanf(NCJstring(jsrc),"%lg",&zcvt.float64v) != 1) + if(sscanf(NCJstring(jsrc),"%lg",&zcvt->float64v) != 1) {stat = NC_EINVAL; goto done;} } srctype = NC_DOUBLE; @@ -67,169 +137,233 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) case NCJ_BOOLEAN: srctype = NC_UINT64; if(strcasecmp(NCJstring(jsrc),"false")==0) - zcvt.uint64v = 0; + zcvt->uint64v = 0; else - zcvt.uint64v = 1; + zcvt->uint64v = 1; + break; + case NCJ_STRING: + srctype = NC_STRING; + zcvt->strv = NCJstring(jsrc); break; default: stat = NC_EINTERNAL; goto done; } + if(typeidp) *typeidp = srctype; +done: + return stat; +} + +/* Convert a singleton NCjson value to a memory equivalent value of specified dsttype; */ +int +NCZ_convert1(NCjson* jsrc, nc_type dsttype, NCbytes* buf) +{ + int stat = NC_NOERR; + nc_type srctype; + struct ZCVT zcvt = zcvt_empty; + int outofrange = 0; + size_t len = 0; + + assert(dsttype != NC_NAT && dsttype <= NC_MAX_ATOMIC_TYPE && buf); + + switch (NCJsort(jsrc)) { + case NCJ_STRING: case NCJ_INT: case NCJ_DOUBLE: case NCJ_BOOLEAN: + if((stat = NCZ_json2cvt(jsrc,&zcvt,&srctype))) goto done; + break; + default: stat = NC_EINVAL; goto done; /* Illegal JSON */ + } + + len = ncz_type_size[dsttype]; /* may change later */ + /* Now, do the down conversion */ switch (dsttype) { case NC_BYTE: { - signed char* p = (signed char*)memory; + signed char c = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < NC_MIN_BYTE || zcvt.int64v > NC_MAX_BYTE) outofrange = 1; - *p = (signed char)zcvt.int64v; + c = (signed char)zcvt.int64v; + ncbytesappend(buf,(char)c); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_BYTE) outofrange = 1; - *p = (signed char)zcvt.uint64v; + c = (signed char)zcvt.uint64v; + ncbytesappend(buf,(char)c); break; } } break; case NC_UBYTE: { - unsigned char* p = (unsigned char*)memory; + unsigned char c = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_BYTE) outofrange = 1; - *p = (unsigned char)zcvt.int64v; + c = (unsigned char)zcvt.int64v; + ncbytesappend(buf,(char)c); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_UBYTE) outofrange = 1; - *p = (unsigned char)zcvt.uint64v; + c = (unsigned char)zcvt.uint64v; + ncbytesappend(buf,(char)c); break; } } break; case NC_SHORT: { - signed short* p = (signed short*)memory; + signed short s = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < NC_MIN_SHORT || zcvt.int64v > NC_MAX_SHORT) outofrange = 1; - *p = (signed short)zcvt.int64v; + s = (signed short)zcvt.int64v; + ncbytesappendn(buf,(char*)&s,sizeof(s)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_SHORT) outofrange = 1; - *p = (signed short)zcvt.uint64v; + s = (signed short)zcvt.uint64v; + ncbytesappendn(buf,(char*)&s,sizeof(s)); break; } } break; case NC_USHORT: { - unsigned short* p = (unsigned short*)memory; + unsigned short s = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_USHORT) outofrange = 1; - *p = (unsigned short)zcvt.int64v; + s = (unsigned short)zcvt.int64v; + ncbytesappendn(buf,(char*)&s,sizeof(s)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_USHORT) outofrange = 1; - *p = (unsigned short)zcvt.uint64v; + s = (unsigned short)zcvt.uint64v; + ncbytesappendn(buf,(char*)&s,sizeof(s)); break; } } break; case NC_INT: { - signed int* p = (signed int*)memory; + signed int ii = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < NC_MIN_INT || zcvt.int64v > NC_MAX_INT) outofrange = 1; - *p = (signed int)zcvt.int64v; + ii = (signed int)zcvt.int64v; + ncbytesappendn(buf,(char*)&ii,sizeof(ii)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_INT) outofrange = 1; - *p = (signed int)zcvt.uint64v; + ii = (signed int)zcvt.uint64v; + ncbytesappendn(buf,(char*)&ii,sizeof(ii)); break; } } break; case NC_UINT: { - unsigned int* p = (unsigned int*)memory; + unsigned int ii = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_UINT) outofrange = 1; - *p = (unsigned int)zcvt.int64v; + ii = (unsigned int)zcvt.int64v; + ncbytesappendn(buf,(char*)&ii,sizeof(ii)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_UINT) outofrange = 1; - *p = (unsigned int)zcvt.uint64v; + ii = (unsigned int)zcvt.uint64v; + ncbytesappendn(buf,(char*)&ii,sizeof(ii)); break; } } break; case NC_INT64: { - signed long long* p = (signed long long*)memory; + signed long long ll = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: - *p = (signed long long)zcvt.int64v; + ll = (signed long long)zcvt.int64v; + ncbytesappendn(buf,(char*)&ll,sizeof(ll)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_INT64) outofrange = 1; - *p = (signed long long)zcvt.uint64v; + ll = (signed long long)zcvt.uint64v; + ncbytesappendn(buf,(char*)&ll,sizeof(ll)); break; } } break; case NC_UINT64: { - unsigned long long* p = (unsigned long long*)memory; + unsigned long long ll = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (signed long long)zcvt.float64v; /* fall thru */ case NC_INT64: if(zcvt.int64v < 0) outofrange = 1; - *p = (unsigned long long)zcvt.int64v; + ll = (unsigned long long)zcvt.int64v; + ncbytesappendn(buf,(char*)&ll,sizeof(ll)); break; case NC_UINT64: - *p = (unsigned long long)zcvt.uint64v; + ll = (unsigned long long)zcvt.uint64v; + ncbytesappendn(buf,(char*)&ll,sizeof(ll)); break; } } break; case NC_FLOAT: { - float* p = (float*)memory; + float f = 0; switch (srctype) { case NC_DOUBLE: - *p = (float)zcvt.float64v; + f = (float)zcvt.float64v; + ncbytesappendn(buf,(char*)&f,sizeof(f)); break; case NC_INT64: - *p = (float)zcvt.int64v; + f = (float)zcvt.int64v; + ncbytesappendn(buf,(char*)&f,sizeof(f)); break; case NC_UINT64: - *p = (float)zcvt.uint64v; + f = (float)zcvt.uint64v; + ncbytesappendn(buf,(char*)&f,sizeof(f)); break; } } break; case NC_DOUBLE: { - double* p = (double*)memory; + double d = 0; switch (srctype) { case NC_DOUBLE: - *p = (double)zcvt.float64v; + d = (double)zcvt.float64v; + ncbytesappendn(buf,(char*)&d,sizeof(d)); break; case NC_INT64: - *p = (double)zcvt.int64v; - break; - case NC_UINT64: - *p = (double)zcvt.uint64v; + d = (double)zcvt.int64v; + ncbytesappendn(buf,(char*)&d,sizeof(d)); + case NC_UINT64: + d = (double)zcvt.uint64v; + ncbytesappendn(buf,(char*)&d,sizeof(d)); break; } } break; + case NC_STRING: { + char* scopy = NULL; + if(srctype != NC_STRING) {stat = NC_EINVAL; goto done;} + /* Need to append the pointer and not what it points to */ + scopy = nulldup(zcvt.strv); + ncbytesappendn(buf,(void*)&scopy,sizeof(scopy)); + scopy = NULL; + } break; + case NC_CHAR: { + if(srctype != NC_STRING) {stat = NC_EINVAL; goto done;} + len = strlen(zcvt.strv); + ncbytesappendn(buf,zcvt.strv,len); + } break; default: stat = NC_EINTERNAL; goto done; } @@ -240,14 +374,15 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) /* Convert a memory value to a JSON string value */ int -NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) +NCZ_stringconvert1(nc_type srctype, char* src, NCjson* jvalue) { int stat = NC_NOERR; struct ZCVT zcvt; nc_type dsttype = NC_NAT; char s[1024]; + char* p = NULL; - assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype < NC_STRING); + assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype <= NC_STRING); /* Convert to a restricted set of values */ switch (srctype) { case NC_BYTE: { @@ -290,6 +425,10 @@ NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) dsttype = NC_DOUBLE; zcvt.float64v= (double)(*((double*)src)); } break; + case NC_STRING: { + dsttype = NC_STRING; + zcvt.strv= *((char**)src); + } break; default: stat = NC_EINTERNAL; goto done; } @@ -326,13 +465,21 @@ NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) } #endif } break; + case NC_STRING: { + p = nulldup(zcvt.strv); + } break; default: stat = NC_EINTERNAL; goto done; } - NCJsetstring(jvalue,strdup(s)); + if(p == NULL) + p = strdup(s); + NCJsetstring(jvalue,p); + p = NULL; done: + nullfree(p); return stat; } +/* Convert arbitrary netcdf attribute vector to equivalent JSON */ int NCZ_stringconvert(nc_type typeid, size_t len, void* data0, NCjson** jdatap) { @@ -352,17 +499,19 @@ NCZ_stringconvert(nc_type typeid, size_t len, void* data0, NCjson** jdatap) /* Handle char type specially */ if(typeid == NC_CHAR) { - /* Create a string valued json object */ - if((stat = NCJnewstringn(NCJ_STRING,len,src,&jdata))) - goto done; + /* Apply the JSON write convention */ + if((stat = NCJparsen(len,src,0,&jdata))) { /* !parseable */ + /* Create a string valued json object */ + if((stat = NCJnewstringn(NCJ_STRING,len,src,&jdata))) goto done; + } } else if(len == 1) { /* create singleton */ if((stat = NCJnew(jtype,&jdata))) goto done; - if((stat = NCZ_stringconvert1(typeid, len, src, jdata))) goto done; + if((stat = NCZ_stringconvert1(typeid, src, jdata))) goto done; } else { /* len > 1 create array of values */ if((stat = NCJnew(NCJ_ARRAY,&jdata))) goto done; for(i=0;iwritten_to = NC_TRUE; /* mark it written */ var->created = 1; -#if 0 - /* set the fill value and _FillValue attribute */ - if((stat = NCZ_ensure_fill_value(var))) goto done; /* ensure var->fill_value is set */ - assert(var->no_fill || var->fill_value != NULL); - /* rebuild the fill chunk */ - if((stat = NCZ_adjust_var_cache(var))) goto done; -#ifdef ENABLE_NCZARR_FILTERS - /* Build the filter working parameters for any filters */ - if((stat = NCZ_filter_setup(var))) goto done; -#endif -#endif /*0|1*/ } } if((stat = ncz_enddef_netcdf4_file(h5))) goto done; diff --git a/libnczarr/zinternal.h b/libnczarr/zinternal.h index 0e3cec55a4..3672c340f4 100644 --- a/libnczarr/zinternal.h +++ b/libnczarr/zinternal.h @@ -22,9 +22,6 @@ #define NCZ_CHUNKSIZE_FACTOR (10) #define NCZ_MIN_CHUNK_SIZE (2) -/* An attribute in the ZARR root group of this name means that the - * file must follow strict netCDF classic format rules. */ -#define NCZ_NC3_STRICT_ATT_NAME "_nc3_strict" /**************************************************/ /* Constants */ @@ -62,43 +59,54 @@ /* V2 Reserved Attributes */ /* Inserted into /.zgroup -_NCZARR_SUPERBLOCK: {"version": "2.0.0"} +_nczarr_superblock: {"version": "2.0.0"} Inserted into any .zgroup -"_NCZARR_GROUP": "{ +"_nczarr_group": "{ \"dimensions\": {\"d1\": \"1\", \"d2\": \"1\",...} \"variables\": [\"v1\", \"v2\", ...] \"groups\": [\"g1\", \"g2\", ...] }" Inserted into any .zarray -"_NCZARR_ARRAY": "{ +"_nczarr_array": "{ \"dimensions\": [\"/g1/g2/d1\", \"/d2\",...] \"storage\": \"scalar\"|\"contiguous\"|\"compact\"|\"chunked\" }" Inserted into any .zattrs ? or should it go into the container? -"_NCZARR_ATTRS": "{ +"_nczarr_attrs": "{ \"types\": {\"attr1\": \" NC_CHAR. ++ */ -#define NCZ_V2_SUPERBLOCK "_NCZARR_SUPERBLOCK" -#define NCZ_V2_GROUP "_NCZARR_GROUP" -#define NCZ_V2_ARRAY "_NCZARR_ARRAY" +#define NCZ_V2_SUPERBLOCK "_nczarr_superblock" +#define NCZ_V2_GROUP "_nczarr_group" +#define NCZ_V2_ARRAY "_nczarr_array" #define NCZ_V2_ATTR NC_NCZARR_ATTR +#define NCZ_V2_SUPERBLOCK_UC "_NCZARR_SUPERBLOCK" +#define NCZ_V2_GROUP_UC "_NCZARR_GROUP" +#define NCZ_V2_ARRAY_UC "_NCZARR_ARRAY" +#define NCZ_V2_ATTR_UC NC_NCZARR_ATTR_UC + +#define NCZARRCONTROL "nczarr" #define PUREZARRCONTROL "zarr" #define XARRAYCONTROL "xarray" #define NOXARRAYCONTROL "noxarray" +#define XARRAYSCALAR "_scalar_" #define LEGAL_DIM_SEPARATORS "./" #define DFALT_DIM_SEPARATOR '.' -#define islegaldimsep(c) ((c) != '\0' && strchr(LEGAL_DIM_SEPARATORS,(c)) != NULL) +/* Default max string length for fixed length strings */ +#define NCZ_MAXSTR_DEFAULT 64 -/* Mnemonics */ -#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ +#define islegaldimsep(c) ((c) != '\0' && strchr(LEGAL_DIM_SEPARATORS,(c)) != NULL) /* Mnemonics */ -#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ +#define ZCLEAR 0 /* For NCZ_copy_data */ +#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ /* Useful macro */ #define ncidforx(file,grpid) ((file)->controller->ext_ncid | (grpid)) @@ -146,6 +154,7 @@ typedef struct NCZ_FILE_INFO { # define FLAG_NCZARR_V1 16 NCZM_IMPL mapimpl; } controls; + int default_maxstrlen; /* default max str size for variables of type string */ } NCZ_FILE_INFO_T; /* This is a struct to handle the dim metadata. */ @@ -186,6 +195,7 @@ typedef struct NCZ_VAR_INFO { struct NClist* xarray; /* names from _ARRAY_DIMENSIONS */ char dimension_separator; /* '.' | '/' */ NClist* incompletefilters; + int maxstrlen; /* max length of strings for this variable */ } NCZ_VAR_INFO_T; /* Struct to hold ZARR-specific info for a field. */ diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index d5cbee966a..0b96ab0d21 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -20,10 +20,9 @@ static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp); static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose); -static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp); static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes); -static int zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst); -static int computeattrinfo(const char* name, NClist* atypes, NCjson* values, +static int zconvert(NCjson* src, nc_type typeid, size_t typelen, int* countp, NCbytes* dst); +static int computeattrinfo(const char* name, NClist* atypes, nc_type typehint, int purezarr, NCjson* values, nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap); static int parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps); static int parse_group_content_pure(NCZ_FILE_INFO_T* zinfo, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrps); @@ -37,12 +36,10 @@ static int locategroup(NC_FILE_INFO_T* file, size_t nsegs, NClist* segments, NC_ static int createdim(NC_FILE_INFO_T* file, const char* name, size64_t dimlen, NC_DIM_INFO_T** dimp); static int parsedimrefs(NC_FILE_INFO_T*, NClist* dimnames, size64_t* shape, NC_DIM_INFO_T** dims, int create); static int decodeints(NCjson* jshape, size64_t* shapes); -static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap); -static int inferattrtype(NCjson* values, nc_type* typeidp); -static int mininttype(unsigned long long u64, int negative); +static int computeattrdata(nc_type typehint, nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap); static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims); -static int read_dict(NCjson* jdict, NCjson** jtextp); -static int write_dict(size_t len, const void* data, NCjson** jsonp); +static int json_convention_read(NCjson* jdict, NCjson** jtextp); +static int jtypes2atypes(NCjson* jtypes, NClist* atypes); /**************************************************/ /**************************************************/ @@ -97,6 +94,7 @@ ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp) NCjson* jdims = NULL; LOG((3, "%s: ", __func__)); + ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); NCJnew(NCJ_DICT,&jdims); for(i=0; idim); i++) { @@ -109,7 +107,7 @@ ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp) if(jdimsp) {*jdimsp = jdims; jdims = NULL;} done: NCJreclaim(jdims); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -140,6 +138,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) NCjson* jtmp = NULL; LOG((3, "%s: dims: %s", __func__, key)); + ZTRACE(3,"file=%s grp=%s isclose=%d",file->controller->path,grp->hdr.name,isclose); zinfo = file->format_file_info; map = zinfo->map; @@ -169,7 +168,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); if((stat = NCJaddstring(jsubgrps,NCJ_STRING,g->hdr.name))) goto done; } - /* Create the "_NCZARR_GROUP" dict */ + /* Create the "_nczarr_group" dict */ if((stat = NCJnew(NCJ_DICT,&json))) goto done; /* Insert the various dicts and arrays */ @@ -241,7 +240,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) NCJreclaim(jsubgrps); nullfree(fullpath); nullfree(key); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -270,6 +269,8 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) NCjson* jdimrefs = NULL; NCjson* jtmp = NULL; NCjson* jfill = NULL; + char* dtypename = NULL; + int purezarr = 0; size64_t shape[NC_MAX_VAR_DIMS]; NCZ_VAR_INFO_T* zvar = var->format_var_info; #ifdef ENABLE_NCZARR_FILTERS @@ -277,10 +278,13 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) NCjson* jfilter = NULL; #endif + ZTRACE(3,"file=%s var=%s isclose=%d",file->controller->path,var->hdr.name,isclose); + zinfo = file->format_file_info; map = zinfo->map; -#if 1 + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + /* Make sure that everything is established */ /* ensure the fill value */ if((stat = NCZ_ensure_fill_value(var))) goto done; /* ensure var->fill_value is set */ @@ -293,7 +297,6 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* Build the filter working parameters for any filters */ if((stat = NCZ_filter_setup(var))) goto done; #endif -#endif /*0|1*/ /* Construct var path */ if((stat = NCZ_varkey(var,&fullpath))) @@ -321,7 +324,9 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* Integer list defining the length of each dimension of the array.*/ /* Create the list */ if((stat = NCJnew(NCJ_ARRAY,&jtmp))) goto done; - for(i=0;indims+zvar->scalar;i++) { + if(zvar->scalar) { + NCJaddstring(jtmp,NCJ_INT,"1"); + } else for(i=0;indims;i++) { snprintf(number,sizeof(number),"%llu",shape[i]); NCJaddstring(jtmp,NCJ_INT,number); } @@ -332,18 +337,12 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* A string or list defining a valid data type for the array. */ if((stat = NCJaddstring(jvar,NCJ_STRING,"dtype"))) goto done; { /* Add the type name */ - const char* dtypename; int endianness = var->type_info->endianness; - int islittle; - switch (endianness) { - case NC_ENDIAN_LITTLE: islittle = 1; break; - case NC_ENDIAN_BIG: islittle = 0; break; - case NC_ENDIAN_NATIVE: abort(); /* should never happen */ - } int atomictype = var->type_info->hdr.id; - assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE && atomictype != NC_STRING); - if((stat = ncz_zarr_type_name(atomictype,islittle,&dtypename))) goto done; + assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE); + if((stat = ncz_nctype2dtype(atomictype,endianness,purezarr,NCZ_get_maxstrlen((NC_OBJ*)var),&dtypename))) goto done; if((stat = NCJaddstring(jvar,NCJ_STRING,dtypename))) goto done; + nullfree(dtypename); dtypename = NULL; } /* chunks key */ @@ -356,7 +355,9 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) if((stat = NCJaddstring(jvar,NCJ_STRING,"chunks"))) goto done; /* Create the list */ if((stat = NCJnew(NCJ_ARRAY,&jtmp))) goto done; - for(i=0;i<(var->ndims+zvar->scalar);i++) { + if(zvar->scalar) { + NCJaddstring(jtmp,NCJ_INT,"1"); /* one chunk of size 1 */ + } else for(i=0;indims;i++) { size64_t len = (var->storage == NC_CONTIGUOUS ? shape[i] : var->chunksizes[i]); snprintf(number,sizeof(number),"%lld",len); NCJaddstring(jtmp,NCJ_INT,number); @@ -511,12 +512,13 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) nclistfreeall(dimrefs); nullfree(fullpath); nullfree(key); + nullfree(dtypename); nullfree(dimpath); NCJreclaim(jvar); NCJreclaim(jncvar); NCJreclaim(jtmp); NCJreclaim(jfill); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -534,6 +536,9 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) { int stat = NC_NOERR; NCZ_VAR_INFO_T* zvar = var->format_var_info; + + ZTRACE(3,"file=%s var=%s isclose=%d",file->controller->path,var->hdr.name,isclose); + if(isclose) { if((stat = ncz_sync_var_meta(file,var,isclose))) goto done; } @@ -545,7 +550,7 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) } done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } @@ -559,6 +564,8 @@ ncz_write_var(NC_VAR_INFO_T* var) int stat = NC_NOERR; NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + ZTRACE(3,"var=%s",var->hdr.name); + /* Flush the cache */ if(zvar->cache) { if((stat = NCZ_flush_chunk_cache(zvar->cache))) goto done; @@ -590,6 +597,7 @@ ncz_write_var(NC_VAR_INFO_T* var) } } + { /* Iterate over all the chunks to create missing ones */ if((chunkodom = nczodom_new(var->ndims+zvar->scalar,start,stop,stride,stop))==NULL) {stat = NC_ENOMEM; goto done;} @@ -609,13 +617,14 @@ ncz_write_var(NC_VAR_INFO_T* var) nullfree(key); key = NULL; } + } nczodom_free(chunkodom); nullfree(key); } #endif /*FILLONCLOSE*/ done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -638,52 +647,72 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc NCjson* jdimrefs = NULL; NCjson* jdict = NULL; NCjson* jint = NULL; + NCjson* jdata = NULL; NCZMAP* map = NULL; char* fullpath = NULL; char* key = NULL; char* content = NULL; char* dimpath = NULL; int isxarray = 0; - int isrootgroup = 0; + int inrootgroup = 0; NC_VAR_INFO_T* var = NULL; NC_GRP_INFO_T* grp = NULL; - + char* tname = NULL; + int purezarr = 0; + int endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + LOG((3, "%s", __func__)); - - if(container->sort == NCVAR) + ZTRACE(3,"file=%s container=%s |attlist|=%u",file->controller->path,container->name,(unsigned)ncindexsize(attlist)); + + if(container->sort == NCVAR) { var = (NC_VAR_INFO_T*)container; - else if(container->sort == NCGRP) + if(var->container && var->container->parent == NULL) + inrootgroup = 1; + } else if(container->sort == NCGRP) { grp = (NC_GRP_INFO_T*)container; - + } + zinfo = file->format_file_info; map = zinfo->map; + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; if(zinfo->controls.flags & FLAG_XARRAYDIMS) isxarray = 1; - if(container->sort == NCVAR) { - if(var->container && var->container->parent == NULL) - isrootgroup = 1; - } + /* Create the attribute dictionary */ + if((stat = NCJnew(NCJ_DICT,&jatts))) goto done; if(ncindexsize(attlist) > 0) { /* Create the jncattr.types object */ if((stat = NCJnew(NCJ_DICT,&jtypes))) goto done; - /* Walk all the attributes and collect the types by attribute name */ + /* Walk all the attributes convert to json and collect the dtype */ for(i=0;ihdr.name); /* If reserved and hidden, then ignore */ if(ra && (ra->flags & HIDDENATTRFLAG)) continue; - if(a->nc_typeid >= NC_STRING) - {stat = THROW(NC_ENCZARR); goto done;} - if((stat = ncz_zarr_type_name(a->nc_typeid,1,&tname))) goto done; - if((stat = NCJnewstring(NCJ_STRING,tname,&jtype))) - goto done; - if((stat = NCJinsert(jtypes,a->hdr.name,jtype))) /* add {name: type} */ - goto done; - jtype = NULL; +#endif + if(a->nc_typeid > NC_MAX_ATOMIC_TYPE) + {stat = (THROW(NC_ENCZARR)); goto done;} + if(a->nc_typeid == NC_STRING) + typesize = NCZ_get_maxstrlen(container); + else + {if((stat = NC4_inq_atomic_type(a->nc_typeid,NULL,&typesize))) goto done;} + /* Convert to storable json */ + if((stat = NCZ_stringconvert(a->nc_typeid,a->len,a->data,&jdata))) goto done; + if((stat = NCJinsert(jatts,a->hdr.name,jdata))) goto done; + jdata = NULL; + + /* Collect the corresponding dtype */ + { + if((stat = ncz_nctype2dtype(a->nc_typeid,endianness,purezarr,typesize,&tname))) goto done; + if((stat = NCJnewstring(NCJ_STRING,tname,&jtype))) goto done; + nullfree(tname); tname = NULL; + if((stat = NCJinsert(jtypes,a->hdr.name,jtype))) goto done; /* add {name: type} */ + jtype = NULL; + } } } @@ -695,17 +724,16 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc if(stat) goto done; - /* Jsonize the attribute list */ - if((stat = ncz_jsonize_atts(attlist,&jatts))) - goto done; - if(container->sort == NCVAR) { - if(isrootgroup && isxarray) { + if(inrootgroup && isxarray) { int dimsinroot = 1; /* Insert the XARRAY _ARRAY_ATTRIBUTE attribute */ if((stat = NCJnew(NCJ_ARRAY,&jdimrefs))) goto done; - /* Walk the dimensions to check in root group */ + /* Fake the scalar case */ + if(var->ndims == 0) { + NCJaddstring(jdimrefs,NCJ_STRING,XARRAYSCALAR); + } else /* Walk the dimensions and capture the names */ for(i=0;indims;i++) { NC_DIM_INFO_T* dim = var->dim[i]; /* Verify that the dimension is in the root group */ @@ -780,69 +808,20 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc nullfree(key); nullfree(content); nullfree(dimpath); + nullfree(tname); NCJreclaim(jatts); NCJreclaim(jtypes); NCJreclaim(jtype); NCJreclaim(jdimrefs); NCJreclaim(jdict); NCJreclaim(jint); - return THROW(stat); + NCJreclaim(jdata); + return ZUNTRACE(THROW(stat)); } /**************************************************/ -/** -@internal Convert a list of attributes to corresponding json. -Note that this does not push to the file. -Also note that attributes of length 1 are stored as singletons, not arrays. -This is to be more consistent with pure zarr. -Also implements the JSON dictionary convention. -@param attlist - [in] the attributes to dictify -@param jattrsp - [out] the json'ized att list -@return NC_NOERR -@author Dennis Heimbigner -*/ -static int -ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp) -{ - int stat = NC_NOERR; - int i, isdict; - NCjson* jattrs = NULL; - NCjson* akey = NULL; - NCjson* jdata = NULL; - - if((stat = NCJnew(NCJ_DICT,&jattrs))) goto done; - - /* Iterate over the attribute list */ - for(i=0;inc_typeid == NC_CHAR - && ((char*)att->data)[0] == DICTOPEN - && ((char*)att->data)[att->len-1] == DICTCLOSE) { - /* this is subject to the JSON dictionary convention? */ - if(write_dict(att->len,att->data,&jdata)==NC_NOERR) isdict=1; - } - if(!isdict) { - if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata))) - goto done; - } - if((stat = NCJinsert(jattrs,att->hdr.name,jdata))) goto done; - jdata = NULL; - } - - if(jattrsp) {*jattrsp = jattrs; jattrs = NULL;} - -done: - NCJreclaim(akey); - NCJreclaim(jdata); - NCJreclaim(jattrs); - NCJreclaim(jdata); - return THROW(stat); -} - /** @internal Extract attributes from a group or var and return the corresponding NCjson dict. @@ -856,7 +835,7 @@ the corresponding NCjson dict. static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypesp) { - int i,stat = NC_NOERR; + int stat = NC_NOERR; char* fullpath = NULL; char* key = NULL; NCjson* jnczarr = NULL; @@ -864,6 +843,8 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis NCjson* jncattr = NULL; NClist* atypes = NULL; /* envv list */ + ZTRACE(3,"map=%p container=%s nczarrv1=%d",map,container->name,nczarrv1); + /* alway return (possibly empty) list of types */ atypes = nclistnew(); @@ -883,7 +864,7 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis if((stat = nczm_concat(fullpath,ZATTRS,&key))) goto done; - /* Download the .zattrs object: may not exist */ + /* Download the .zattrs object: may not exist if not NCZarr V1 */ switch ((stat=NCZ_downloadjson(map,key,&jattrs))) { case NC_NOERR: break; case NC_EEMPTY: stat = NC_NOERR; break; /* did not exist */ @@ -903,8 +884,10 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis if((stat = nczm_concat(fullpath,NCZATTRDEP,&key))) goto done; stat=NCZ_downloadjson(map,key,&jncattr); } - } else {/* Get _NCZARR_ATTRS from .zattrs */ - stat = NCJdictget(jattrs,NCZ_V2_ATTR,&jncattr); + } else {/* Get _nczarr_attrs from .zattrs */ + stat = NCJdictget(jattrs,NCZ_V2_ATTR,&jncattr); + if(!stat && jncattr == NULL) + {stat = NCJdictget(jattrs,NCZ_V2_ATTR_UC,&jncattr);} } nullfree(key); key = NULL; switch (stat) { @@ -915,20 +898,13 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis if(jncattr != NULL) { NCjson* jtypes = NULL; /* jncattr attribute should be a dict */ - if(NCJsort(jncattr) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jncattr) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} /* Extract "types; may not exist if only hidden attributes are defined */ if((stat = NCJdictget(jncattr,"types",&jtypes))) goto done; if(jtypes != NULL) { - if(NCJsort(jtypes) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jtypes) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} /* Convert to an envv list */ - for(i=0;i= NC_STRING) + if(typeid > NC_MAX_ATOMIC_TYPE) {stat = NC_EINTERNAL; goto done;} - if((stat = computeattrdata(&typeid, values, &typelen, &len, &data))) goto done; + /* Use the hint if given one */ + if(typeid == NC_NAT) + typeid = typehint; + + if((stat = computeattrdata(typehint, &typeid, values, &typelen, &len, &data))) goto done; if(typeidp) *typeidp = typeid; if(lenp) *lenp = len; @@ -1021,159 +1036,60 @@ computeattrinfo(const char* name, NClist* atypes, NCjson* values, done: nullfree(data); - return THROW(stat); + return ZUNTRACEX(THROW(stat),"typeid=%d typelen=%d len=%u",*typeidp,*typelenp,*lenp); } /* Extract data for an attribute */ static int -computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap) +computeattrdata(nc_type typehint, nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* countp, void** datap) { int stat = NC_NOERR; - size_t count; - void* data = NULL; + NCbytes* buf = ncbytesnew(); size_t typelen; nc_type typeid = NC_NAT; NCjson* jtext = NULL; int reclaimvalues = 0; + int isjson = 0; /* 1 => attribute value is neither scalar nor array of scalars */ + int count = 0; /* no. of attribute values */ + + ZTRACE(3,"typehint=%d typeid=%d values=|%s|",typehint,*typeidp,NCJtotext(values)); /* Get assumed type */ if(typeidp) typeid = *typeidp; - if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done; - if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;} + if(typeid == NC_NAT && !isjson) { + if((stat = NCZ_inferattrtype(values,typehint, &typeid))) goto done; + } - if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) - goto done; + /* See if this is a simple vector (or scalar) of atomic types */ + isjson = NCZ_iscomplexjson(values,typeid); - /* Collect the length of the attribute; might be a singleton */ - switch (NCJsort(values)) { - case NCJ_ARRAY: - count = NCJlength(values); - break; - case NCJ_DICT: - /* Apply the JSON dictionary convention and convert to string */ - if((stat = read_dict(values,&jtext))) goto done; + if(isjson) { + /* Apply the JSON attribute convention and convert to JSON string */ + typeid = NC_CHAR; + if((stat = json_convention_read(values,&jtext))) goto done; values = jtext; jtext = NULL; reclaimvalues = 1; - /* fall thru */ - case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */ - if(typeid == NC_CHAR) { - count = strlen(NCJstring(values)); - if(count == 0) count = 1; /* Actually a single nul char, probably default fill value ugh!*/ - } else - count = 1; - break; - default: - count = 1; /* singleton */ - break; - } + } + + if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) + goto done; + + /* Convert the JSON attribute values to the actual netcdf attribute bytes */ + if((stat = zconvert(values,typeid,typelen,&count,buf))) goto done; - if(count > 0 && data == NULL) { - /* Allocate data space */ - if(typeid == NC_CHAR) - data = malloc(typelen*(count+1)); - else - data = malloc(typelen*count); - if(data == NULL) - {stat = NC_ENOMEM; goto done;} - /* convert to target type */ - if((stat = zconvert(typeid, typelen, values, data))) - goto done; - } - if(lenp) *lenp = count; if(typelenp) *typelenp = typelen; - if(datap) {*datap = data; data = NULL;} if(typeidp) *typeidp = typeid; /* return possibly inferred type */ + if(countp) *countp = count; + if(datap) *datap = ncbytesextract(buf); done: + ncbytesfree(buf); if(reclaimvalues) NCJreclaim(values); /* we created it */ - nullfree(data); - return THROW(stat); -} - -static int -inferattrtype(NCjson* value, nc_type* typeidp) -{ - int stat = NC_NOERR; - nc_type typeid; - NCjson* j = NULL; - unsigned long long u64; - long long i64; - int negative = 0; - - if(NCJsort(value) == NCJ_ARRAY && NCJlength(value) == 0) - {typeid = NC_NAT; goto done;} - - if(NCJsort(value) == NCJ_NULL) - {typeid = NC_NAT; goto done;} - - if(value->sort == NCJ_ARRAY) { - j=NCJith(value,0); - return inferattrtype(j,typeidp); - } - - switch (NCJsort(value)) { - case NCJ_NULL: - typeid = NC_CHAR; - return NC_NOERR; - case NCJ_DICT: - typeid = NC_CHAR; - goto done; - case NCJ_UNDEF: - return NC_EINVAL; - default: /* atomic */ - break; - } - if(NCJstring(value) != NULL) - negative = (NCJstring(value)[0] == '-'); - switch (value->sort) { - case NCJ_INT: - if(negative) { - sscanf(NCJstring(value),"%lld",&i64); - u64 = (unsigned long long)i64; - } else - sscanf(NCJstring(value),"%llu",&u64); - typeid = mininttype(u64,negative); - break; - case NCJ_DOUBLE: - typeid = NC_DOUBLE; - break; - case NCJ_BOOLEAN: - typeid = NC_UBYTE; - break; - case NCJ_STRING: /* requires special handling as an array of characters */ - typeid = NC_CHAR; - break; - default: - stat = NC_ENCZARR; - } -done: - if(typeidp) *typeidp = typeid; - return stat; -} - -static int -mininttype(unsigned long long u64, int negative) -{ - long long i64 = (long long)u64; /* keep bit pattern */ - if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64; - if(i64 < 0) { - if(i64 >= NC_MIN_BYTE) return NC_BYTE; - if(i64 >= NC_MIN_SHORT) return NC_SHORT; - if(i64 >= NC_MIN_INT) return NC_INT; - return NC_INT64; - } - if(i64 <= NC_MAX_BYTE) return NC_BYTE; - if(i64 <= NC_MAX_UBYTE) return NC_UBYTE; - if(i64 <= NC_MAX_SHORT) return NC_SHORT; - if(i64 <= NC_MAX_USHORT) return NC_USHORT; - if(i64 <= NC_MAX_INT) return NC_INT; - if(i64 <= NC_MAX_UINT) return NC_UINT; - return NC_INT64; + return ZUNTRACEX(THROW(stat),"typelen=%d count=%u",(typelenp?*typelenp:0),(countp?*countp:-1)); } - /** * @internal Read file data from map to memory. * @@ -1189,7 +1105,8 @@ ncz_read_file(NC_FILE_INFO_T* file) NCjson* json = NULL; LOG((3, "%s: file: %s", __func__, file->controller->path)); - + ZTRACE(3,"file=%s",file->controller->path); + /* _nczarr should already have been read in ncz_open_dataset */ /* Now load the groups starting with root */ @@ -1198,7 +1115,7 @@ ncz_read_file(NC_FILE_INFO_T* file) done: NCJreclaim(json); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -1228,7 +1145,8 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) int v1 = 0; LOG((3, "%s: dims: %s", __func__, key)); - + ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); + zinfo = file->format_file_info; map = zinfo->map; @@ -1255,9 +1173,10 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) goto done; /* Read */ switch (stat=NCZ_downloadjson(map,key,&jgroup)) { - case NC_NOERR: /* we read it */ - /* Extract the NCZ_V2_GROUP dict */ + case NC_NOERR: /* Extract the NCZ_V2_GROUP dict */ if((stat = NCJdictget(jgroup,NCZ_V2_GROUP,&jdict))) goto done; + if(!stat && jdict == NULL) + {if((stat = NCJdictget(jgroup,NCZ_V2_GROUP_UC,&jdict))) goto done;} break; case NC_EEMPTY: /* does not exist, use search */ if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps))) @@ -1295,7 +1214,7 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) nclistfreeall(subgrps); nullfree(fullpath); nullfree(key); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } @@ -1316,6 +1235,9 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) char* fullpath = NULL; char* key = NULL; NCZ_FILE_INFO_T* zinfo = NULL; + NC_VAR_INFO_T* var = NULL; + NCZ_VAR_INFO_T* zvar = NULL; + NC_GRP_INFO_T* grp = NULL; NCZMAP* map = NULL; NC_ATT_INFO_T* att = NULL; NCindex* attlist = NULL; @@ -1325,14 +1247,24 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) size_t len, typelen; void* data = NULL; NC_ATT_INFO_T* fillvalueatt = NULL; + nc_type typehint = NC_NAT; + int purezarr; + + ZTRACE(3,"file=%s container=%s",file->controller->path,container->name); zinfo = file->format_file_info; map = zinfo->map; - if(container->sort == NCGRP) - attlist = ((NC_GRP_INFO_T*)container)->att; - else - attlist = ((NC_VAR_INFO_T*)container)->att; + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + + if(container->sort == NCGRP) { + grp = ((NC_GRP_INFO_T*)container); + attlist = grp->att; + } else { + var = ((NC_VAR_INFO_T*)container); + zvar = (NCZ_VAR_INFO_T*)(var->format_var_info); + attlist = var->att; + } switch ((stat = load_jatts(map, container, (zinfo->controls.flags & FLAG_NCZARR_V1), &jattrs, &atypes))) { case NC_NOERR: break; @@ -1344,31 +1276,36 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) if(jattrs != NULL) { /* Iterate over the attributes to create the in-memory attributes */ - /* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray) */ + /* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray), etc. */ for(i=0;iparent == NULL && strcmp(aname,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0) + isdfaltmaxstrlen = 1; + if(var != NULL && strcmp(aname,NC_NCZARR_MAXSTRLEN_ATTR)==0) + ismaxstrlen = 1; /* See if this is reserved attribute */ - ra = NC_findreserved(NCJstring(key)); + ra = NC_findreserved(aname); if(ra != NULL) { - /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL, flags & READONLYFLAG */ - if(strcmp(NCJstring(key),NCPROPS)==0 - && container->sort == NCGRP - && file->root_grp == (NC_GRP_INFO_T*)container) { + /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL */ + if(strcmp(aname,NCPROPS)==0 && grp != NULL && file->root_grp == grp) { /* Setup provenance */ if(NCJsort(value) != NCJ_STRING) - {stat = THROW(NC_ENCZARR); goto done;} /*malformed*/ - if((stat = NCZ_read_provenance(file,NCJstring(key),NCJstring(value)))) + {stat = (THROW(NC_ENCZARR)); goto done;} /*malformed*/ + if((stat = NCZ_read_provenance(file,aname,NCJstring(value)))) goto done; } /* case 2: name = _ARRAY_DIMENSIONS, sort==NCVAR, flags & HIDDENATTRFLAG */ - if(strcmp(NCJstring(key),NC_XARRAY_DIMS)==0 - && container->sort == NCVAR - && (ra->flags & HIDDENATTRFLAG)) { - /* store for later */ - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)((NC_VAR_INFO_T*)container)->format_var_info; + if(strcmp(aname,NC_XARRAY_DIMS)==0 && var != NULL && (ra->flags & HIDDENATTRFLAG)) { + /* store for later */ int i; assert(NCJsort(value) == NCJ_ARRAY); if((zvar->xarray = nclistnew())==NULL) @@ -1379,19 +1316,28 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) nclistpush(zvar->xarray,strdup(NCJstring(k))); } } - /* else ignore */ - continue; + /* case other: if attribute is hidden */ + if(ra->flags & HIDDENATTRFLAG) continue; /* ignore it */ } + typehint = NC_NAT; + if(isfillvalue) + typehint = var->type_info->hdr.id ; /* if unknown use the var's type for _FillValue */ /* Create the attribute */ /* Collect the attribute's type and value */ - if((stat = computeattrinfo(NCJstring(key),atypes,value, + if((stat = computeattrinfo(aname,atypes,typehint,purezarr,value, &typeid,&typelen,&len,&data))) goto done; - if((stat = ncz_makeattr(container,attlist,NCJstring(key),typeid,len,data,&att))) + if((stat = ncz_makeattr(container,attlist,aname,typeid,len,data,&att))) goto done; - nullfree(data); data = NULL; /* passed to the attribute */ - /* Is this _FillValue ? */ - if(strcmp(att->hdr.name,_FillValue)==0) fillvalueatt = att; + /* No longer need this copy of the data */ + if((stat = nc_reclaim_data_all(file->controller->ext_ncid,att->nc_typeid,data,len))) goto done; + data = NULL; + if(isfillvalue) + fillvalueatt = att; + if(ismaxstrlen && att->nc_typeid == NC_INT) + zvar->maxstrlen = ((int*)att->data)[0]; + if(isdfaltmaxstrlen && att->nc_typeid == NC_INT) + zinfo->default_maxstrlen = ((int*)att->data)[0]; } } /* If we have not read a _FillValue, then go ahead and create it */ @@ -1407,12 +1353,13 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) ((NC_GRP_INFO_T*)container)->atts_read = 1; done: + if(data != NULL) + stat = nc_reclaim_data(file->controller->ext_ncid,att->nc_typeid,data,len); NCJreclaim(jattrs); nclistfreeall(atypes); nullfree(fullpath); - nullfree(data); nullfree(key); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -1430,6 +1377,8 @@ define_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* diminfo) { int i,stat = NC_NOERR; + ZTRACE(3,"file=%s grp=%s |diminfo|=%u",file->controller->path,grp->hdr.name,nclistlength(diminfo)); + /* Reify each dim in turn */ for(i = 0; i < nclistlength(diminfo); i+=2) { NC_DIM_INFO_T* dim = NULL; @@ -1449,7 +1398,7 @@ define_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* diminfo) } done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -1480,15 +1429,19 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) int purezarr = 0; int xarray = 0; int formatv1 = 0; - nc_type typeid; + nc_type vtype; + int vtypelen; size64_t* shapes = NULL; int rank = 0; + int zarr_rank = 1; /* Need to watch out for scalars */ NClist* dimnames = nclistnew(); #ifdef ENABLE_NCZARR_FILTERS NCjson* jfilter = NULL; int chainindex; #endif + ZTRACE(3,"file=%s grp=%s |varnames|=%u",file->controller->path,grp->hdr.name,nclistlength(varnames)); + zinfo = file->format_file_info; map = zinfo->map; @@ -1500,8 +1453,6 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) for(i = 0; i < nclistlength(varnames); i++) { NC_VAR_INFO_T* var; const char* varname = nclistget(varnames,i); - - /* Create the NC_VAR_INFO_T object */ if((stat = nc4_var_list_add2(grp, varname, &var))) goto done; @@ -1542,41 +1493,110 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) if((stat = NCJdictget(jvar,"zarr_format",&jvalue))) goto done; sscanf(NCJstring(jvalue),"%d",&version); if(version != zinfo->zarr.zarr_version) - {stat = THROW(NC_ENCZARR); goto done;} + {stat = (THROW(NC_ENCZARR)); goto done;} } /* Set the type and endianness of the variable */ { - nc_type vtype; int endianness; if((stat = NCJdictget(jvar,"dtype",&jvalue))) goto done; /* Convert dtype to nc_type + endianness */ - if((stat = ncz_dtype2typeinfo(NCJstring(jvalue),&vtype,&endianness))) + if((stat = ncz_dtype2nctype(NCJstring(jvalue),NC_NAT,purezarr,&vtype,&endianness,&vtypelen))) goto done; - if(vtype > NC_NAT && vtype < NC_STRING) { + if(vtype > NC_NAT && vtype <= NC_MAX_ATOMIC_TYPE) { /* Locate the NC_TYPE_INFO_T object */ if((stat = ncz_gettype(file,grp,vtype,&var->type_info))) goto done; } else {stat = NC_EBADTYPE; goto done;} if(endianness == NC_ENDIAN_NATIVE) endianness = zinfo->native_endianness; + if(endianness == NC_ENDIAN_NATIVE) + endianness = (NCZ_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); if(endianness == NC_ENDIAN_LITTLE || endianness == NC_ENDIAN_BIG) { var->endianness = endianness; } else {stat = NC_EBADTYPE; goto done;} var->type_info->endianness = var->endianness; /* Propagate */ + if(vtype == NC_STRING) { + zvar->maxstrlen = vtypelen; + vtypelen = sizeof(char*); /* in-memory len */ + if(zvar->maxstrlen <= 0) zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var); + } } + + if(!purezarr) { + /* Extract the _NCZARR_ARRAY values */ + /* Do this first so we know about storage esp. scalar */ + if(formatv1) { + /* Construct the path to the zarray object */ + if((stat = nczm_concat(varpath,NCZARRAY,&key))) + goto done; + /* Download the nczarray object */ + if((stat=NCZ_readdict(map,key,&jncvar))) + goto done; + nullfree(key); key = NULL; + } else {/* format v2 */ + /* Extract the NCZ_V2_ARRAY dict */ + if((stat = NCJdictget(jvar,NCZ_V2_ARRAY,&jncvar))) goto done; + if(!stat && jncvar == NULL) + {if((stat = NCJdictget(jvar,NCZ_V2_ARRAY_UC,&jncvar))) goto done;} + } + if(jncvar == NULL) {stat = NC_ENCZARR; goto done;} + assert((NCJsort(jncvar) == NCJ_DICT)); + /* Extract storage flag */ + if((stat = NCJdictget(jncvar,"storage",&jvalue))) + goto done; + if(jvalue != NULL) { + if(strcmp(NCJstring(jvalue),"chunked") == 0) { + var->storage = NC_CHUNKED; + } else if(strcmp(NCJstring(jvalue),"compact") == 0) { + var->storage = NC_COMPACT; + } else if(strcmp(NCJstring(jvalue),"scalar") == 0) { + var->storage = NC_CONTIGUOUS; + zvar->scalar = 1; + } else { /*storage = NC_CONTIGUOUS;*/ + var->storage = NC_CONTIGUOUS; + } + } + /* Extract dimrefs list */ + switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) { + case NC_NOERR: /* Extract the dimref names */ + assert((NCJsort(jdimrefs) == NCJ_ARRAY)); + if(zvar->scalar) { + assert(NCJlength(jdimrefs) == 0); + } else { + rank = NCJlength(jdimrefs); + for(j=0;jscalar) rank = 0; else rank = NCJlength(jvalue); - /* Set the rank of the variable */ + if(NCJsort(jvalue) != NCJ_ARRAY) {stat = (THROW(NC_ENCZARR)); goto done;} + if(zvar->scalar) { + rank = 0; + zarr_rank = 1; /* Zarr does not support scalars */ + } else + rank = (zarr_rank = NCJlength(jvalue)); + /* Save the rank of the variable */ if((stat = nc4_var_set_ndims(var, rank))) goto done; /* extract the shapes */ - if((shapes = (size64_t*)malloc(sizeof(size64_t)*rank)) == NULL) - {stat = THROW(NC_ENOMEM); goto done;} + if((shapes = (size64_t*)malloc(sizeof(size64_t)*zarr_rank)) == NULL) + {stat = (THROW(NC_ENOMEM)); goto done;} if((stat = decodeints(jvalue, shapes))) goto done; } + /* Capture dimension_separator (must precede chunk cache creation) */ { NCglobalstate* ngs = NC_getglobalstate(); @@ -1593,6 +1613,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) zvar->dimension_separator = ngs->zarr.dimension_separator; /* use global value */ assert(islegaldimsep(zvar->dimension_separator)); /* we are hosed */ } + /* fill_value; must precede calls to adjust cache */ { if((stat = NCJdictget(jvar,"fill_value",&jvalue))) goto done; @@ -1600,37 +1621,38 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) var->no_fill = 1; else { size_t fvlen; - typeid = var->type_info->hdr.id; + nc_type atypeid = vtype; var->no_fill = 0; - if((stat = computeattrdata(&typeid, jvalue, NULL, &fvlen, &var->fill_value))) + if((stat = computeattrdata(var->type_info->hdr.id, &atypeid, jvalue, NULL, &fvlen, &var->fill_value))) goto done; - assert(typeid == var->type_info->hdr.id); + assert(atypeid == vtype); /* Note that we do not create the _FillValue attribute here to avoid having to read all the attributes and thus foiling lazy read.*/ } } + /* chunks */ { - int rank; size64_t chunks[NC_MAX_VAR_DIMS]; if((stat = NCJdictget(jvar,"chunks",&jvalue))) goto done; if(jvalue != NULL && NCJsort(jvalue) != NCJ_ARRAY) - {stat = THROW(NC_ENCZARR); goto done;} + {stat = (THROW(NC_ENCZARR)); goto done;} /* Verify the rank */ - rank = NCJlength(jvalue); - if(rank > 0) { + assert (zarr_rank == NCJlength(jvalue)); + if(!zvar->scalar) { + if(zarr_rank == 0) {stat = NC_ENCZARR; goto done;} var->storage = NC_CHUNKED; - if(var->ndims+zvar->scalar != rank) - {stat = THROW(NC_ENCZARR); goto done;} - if((var->chunksizes = malloc(sizeof(size_t)*rank)) == NULL) + if(var->ndims != rank) + {stat = (THROW(NC_ENCZARR)); goto done;} + if((var->chunksizes = malloc(sizeof(size_t)*zarr_rank)) == NULL) {stat = NC_ENOMEM; goto done;} if((stat = decodeints(jvalue, chunks))) goto done; /* validate the chunk sizes */ zvar->chunkproduct = 1; for(j=0;j shapes[j]) - {stat = THROW(NC_ENCZARR); goto done;} + {stat = (THROW(NC_ENCZARR)); goto done;} var->chunksizes[j] = (size_t)chunks[j]; zvar->chunkproduct *= chunks[j]; } @@ -1690,62 +1712,14 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) #endif } - if(!purezarr) { - if(formatv1) { - /* Construct the path to the zarray object */ - if((stat = nczm_concat(varpath,NCZARRAY,&key))) - goto done; - /* Download the nczarray object */ - if((stat=NCZ_readdict(map,key,&jncvar))) - goto done; - nullfree(key); key = NULL; - } else {/* format v2 */ - /* Extract the NCZ_V2_ARRAY dict */ - if((stat = NCJdictget(jvar,NCZ_V2_ARRAY,&jncvar))) goto done; - } - if(jncvar == NULL) {stat = NC_ENCZARR; goto done;} - assert((NCJsort(jncvar) == NCJ_DICT)); - /* Extract storage flag */ - if((stat = NCJdictget(jncvar,"storage",&jvalue))) - goto done; - if(jvalue != NULL) { - if(strcmp(NCJstring(jvalue),"chunked") == 0) { - var->storage = NC_CHUNKED; - } else if(strcmp(NCJstring(jvalue),"compact") == 0) { - var->storage = NC_COMPACT; - } else if(strcmp(NCJstring(jvalue),"scalar") == 0) { - var->storage = NC_CONTIGUOUS; - zvar->scalar = 1; - } else { /*storage = NC_CONTIGUOUS;*/ - var->storage = NC_CONTIGUOUS; - } - } - /* Extract dimnames list */ - switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) { - case NC_NOERR: /* Extract the dimref names */ - assert((NCJsort(jdimrefs) == NCJ_ARRAY)); - assert(NCJlength(jdimrefs) == rank); - for(j=0;jdim))) goto done; - /* Extract the dimids */ - for(j=0;jdimids[j] = var->dim[j]->hdr.id; + if(!zvar->scalar) { + /* Extract the dimids */ + for(j=0;jdimids[j] = var->dim[j]->hdr.id; + } #ifdef ENABLE_NCZARR_FILTERS /* At this point, we can finalize the filters */ @@ -1766,7 +1740,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) nclistfreeall(dimnames); NCJreclaim(jvar); if(formatv1) NCJreclaim(jncvar); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -1784,6 +1758,8 @@ define_subgrps(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* subgrpnames) { int i,stat = NC_NOERR; + ZTRACE(3,"file=%s grp=%s |subgrpnames|=%u",file->controller->path,grp->hdr.name,nclistlength(subgrpnames)); + /* Load each subgroup name in turn */ for(i = 0; i < nclistlength(subgrpnames); i++) { NC_GRP_INFO_T* g = NULL; @@ -1807,7 +1783,7 @@ define_subgrps(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* subgrpnames) } done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } int @@ -1822,6 +1798,8 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) char* zarr_format = NULL; NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + ZTRACE(3,"file=%s",file->controller->path); + /* See if the V1 META-Root is being used */ switch(stat = NCZ_downloadjson(zinfo->map, NCZMETAROOT, &jnczgroup)) { case NC_EEMPTY: /* not there */ @@ -1846,7 +1824,9 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) if(jzgroup != NULL) { /* See if this NCZarr V2 */ if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK,&jsuper))) goto done; - if(jsuper != NULL) { + if(!stat && jsuper == NULL) + {if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;} + if(jsuper != NULL) { /* Extract the equivalent attribute */ if(jsuper->sort != NCJ_DICT) {stat = NC_ENCZARR; goto done;} @@ -1865,7 +1845,7 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) /* Also means file is read only */ file->no_write = 1; } else if(jsuper != NULL) { - /* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */ + /* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */ } if(nczarrvp) {*nczarrvp = nczarr_version; nczarr_version = NULL;} if(zarrfp) {*zarrfp = zarr_format; zarr_format = NULL;} @@ -1874,7 +1854,7 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) nullfree(nczarr_version); NCJreclaim(jzgroup); NCJreclaim(jnczgroup); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /**************************************************/ @@ -1886,9 +1866,11 @@ parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* int i,stat = NC_NOERR; NCjson* jvalue = NULL; + ZTRACE(3,"jcontent=|%s| |dimdefs|=%u |varnames|=%u |subgrps|=%u",NCJtotext(jcontent),(unsigned)nclistlength(dimdefs),(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); + if((stat=NCJdictget(jcontent,"dims",&jvalue))) goto done; if(jvalue != NULL) { - if(NCJsort(jvalue) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jvalue) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} /* Extract the dimensions defined in this group */ for(i=0;icommon.file->controller->path,grp->hdr.name,(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); + nclistclear(varnames); if((stat = searchvars(zinfo,grp,varnames))) goto done; nclistclear(subgrps); if((stat = searchsubgrps(zinfo,grp,subgrps))) goto done; done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } @@ -1981,7 +1965,7 @@ parse_var_dims_pure(NCZ_FILE_INFO_T* zinfo, NC_GRP_INFO_T* grp, NC_VAR_INFO_T* NCJreclaim(jvalue); nullfree(varkey); varkey = NULL; nullfree(zakey); zakey = NULL; - return THROW(stat); + return ZUNTRACE(THROW(stat)); } #endif @@ -2053,18 +2037,27 @@ searchsubgrps(NCZ_FILE_INFO_T* zfile, NC_GRP_INFO_T* grp, NClist* subgrpnames) return stat; } -/* Convert a list of integer strings to 64 bit integers */ +/* Convert a list of integer strings to 64 bit dimension sizes (shapes) */ static int decodeints(NCjson* jshape, size64_t* shapes) { int i, stat = NC_NOERR; for(i=0;iformat_var_info); LOG((3, "%s: var %s", __func__, var->hdr.name)); - + ZTRACE(3,"file=%s var=%s",file->controller->path,var->hdr.name); + /* Have we already read the var metadata? */ if (var->meta_read) return NC_NOERR; @@ -2291,6 +2285,8 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)(var->format_var_info); NCjson* jatts = NULL; + ZTRACE(3,"file=%s var=%s purezarr=%d xarray=%d ndims=%d shape=%s", + file->controller->path,var->hdr.name,purezarr,xarray,(int)ndims,nczprint_vector(ndims,shapes)); assert(zfile && zvar); if(purezarr && xarray) {/* Read in the attributes to get xarray dimdef attribute; Note that it might not exist */ @@ -2327,29 +2323,23 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra done: NCJreclaim(jatts); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** -Implement the JSON convention for dictionaries. - -Reading: If the value of the attribute is a dictionary, then stringify - it as the value and make the attribute be of type "char". - -Writing: if the attribute is of type char and looks like a JSON dictionary, - then parse it as JSON and use that as its value in .zattrs. +Implement the JSON convention: +Stringify it as the value and make the attribute be of type "char". */ static int -read_dict(NCjson* jdict, NCjson** jtextp) +json_convention_read(NCjson* json, NCjson** jtextp) { int stat = NC_NOERR; NCjson* jtext = NULL; char* text = NULL; - if(jdict == NULL) {stat = NC_EINVAL; goto done;} - if(NCJsort(jdict) != NCJ_DICT) {stat = NC_EINVAL; goto done;} - if(NCJunparse(jdict,0,&text)) {stat = NC_EINVAL; goto done;} + if(json == NULL) {stat = NC_EINVAL; goto done;} + if(NCJunparse(json,0,&text)) {stat = NC_EINVAL; goto done;} if(NCJnewstring(NCJ_STRING,text,&jtext)) {stat = NC_EINVAL; goto done;} *jtextp = jtext; jtext = NULL; done: @@ -2358,20 +2348,45 @@ read_dict(NCjson* jdict, NCjson** jtextp) return stat; } +#if 0 +/** +Implement the JSON convention: +Parse it as JSON and use that as its value in .zattrs. +*/ static int -write_dict(size_t len, const void* data, NCjson** jsonp) +json_convention_write(size_t len, const void* data, NCjson** jsonp, int* isjsonp) { int stat = NC_NOERR; - NCjson* jdict = NULL; + NCjson* jexpr = NULL; + int isjson = 0; assert(jsonp != NULL); - if(NCJparsen(len,(char*)data,0,&jdict)) - {stat = NC_EINVAL; goto done;} - if(NCJsort(jdict) != NCJ_DICT) - {stat = NC_EINVAL; goto done;} - *jsonp = jdict; jdict = NULL; + if(NCJparsen(len,(char*)data,0,&jexpr)) { + /* Ok, just treat as sequence of chars */ + if((stat = NCJnewstringn(NCJ_STRING, len, data, &jexpr))) goto done; + } + isjson = 1; + *jsonp = jexpr; jexpr = NULL; + if(isjsonp) *isjsonp = isjson; done: - NCJreclaim(jdict); + NCJreclaim(jexpr); return stat; } +#endif +/* Convert an attribute "types list to an envv style list */ +static int +jtypes2atypes(NCjson* jtypes, NClist* atypes) +{ + int i, stat = NC_NOERR; + for(i=0;ii1", /*NC_BYTE*/ ">U1", /*NC_CHAR*/ ">i2", /*NC_SHORT*/ ">i4", /*NC_INT*/ -">f4", /*NC_FLOAT*/ ">f8", /*NC_DOUBLE*/ ">u1", /*NC_UBYTE*/ -">u2", /*NC_USHORT*/ ">u4", /*NC_UINT*/ ">i8", /*NC_INT64*/ ">u8", /*NC_UINT64*/ -NULL, /*NC_STRING*/ +/* Table of nc_type X {Zarr,NCZarr} X endianness +Issue: Need to distinquish NC_STRING && MAXSTRLEN==1 from NC_CHAR +in a way that allows other Zarr implementations to read the data. + +Available info: +Write: we have the netcdf type, so there is no ambiguity. +Read: we have the variable type and also any attribute dtype, +but those types are ambiguous. +We also have the attribute vs variable type problem. +For pure zarr, we have to infer the type of an attribute, +so if we have "var:strattr = \"abcdef\"", then we need +to decide how to infer the type: NC_STRING vs NC_CHAR. + +Solution: +For variables and for NCZarr type attributes, distinquish by using: +* "|S1" for NC_CHAR. +* ">S1" for NC_STRING && MAXSTRLEN==1 +It is a bit of a hack to use endianness, but it should be ok since for +string/char, the endianness has no meaning. +Note that we could use "|U1", but since this is utf-16 or utf-32 +in python, it may cause problems when reading what amounts to utf-8. + +For attributes, we infer: +* NC_CHAR if the hint is 0 + - e.g. var:strattr = 'abcdef'" => NC_CHAR +* NC_STRING if hint is NC_STRING. + - e.g. string var:strattr = \"abc\", \"def\"" => NC_STRING + +Note also that if we read a pure zarr file we will probably always +see "|S1", so we will never see a variable of type NC_STRING with length == 1. +We might however see an attribute of type string. +*/ +static const struct ZTYPES { + char* zarr[3]; + char* nczarr[3]; +} znames[NUM_ATOMIC_TYPES] = { +/* nc_type Pure Zarr NCZarr + NE LE BE NE LE BE*/ +/*NC_NAT*/ {{NULL,NULL,NULL}, {NULL,NULL,NULL}}, +/*NC_BYTE*/ {{"|i1","i1"},{"|i1","i1"}}, +/*NC_CHAR*/ {{"|S1","|S1","|S1"},{"|S1","|S1","|S1"}}, +/*NC_SHORT*/ {{"|i2","i2"},{"|i2","i2"}}, +/*NC_INT*/ {{"|i4","i4"},{"|i4","i4"}}, +/*NC_FLOAT*/ {{"|f4","f4"},{"|f4","f4"}}, +/*NC_DOUBLE*/ {{"|f8","f8"},{"|f8","f8"}}, +/*NC_UBYTE*/ {{"|u1","u1"},{"|u1","u1"}}, +/*NC_USHORT*/ {{"|u2","u2"},{"|u2","u2"}}, +/*NC_UINT*/ {{"|u4","u4"},{"|u4","u4"}}, +/*NC_INT64*/ {{"|i8","i8"},{"|i8","i8"}}, +/*NC_UINT64*/ {{"|u8","u8"},{"|u8","u8"}}, +/*NC_STRING*/ {{">S%d",">S%d",">S%d"},{">S%d",">S%d",">S%d"}}, }; #if 0 @@ -47,6 +84,7 @@ NULL, /*NC_NAT*/ "4294967295", /*NC_UINT*/ "-9223372036854775806", /*NC_INT64*/ "18446744073709551614", /*NC_UINT64*/ +"", /*NC_STRING*/ }; #endif @@ -64,6 +102,7 @@ NCJ_INT, /*NC_USHORT*/ NCJ_INT, /*NC_UINT*/ NCJ_INT, /*NC_INT64*/ NCJ_INT, /*NC_UINT64*/ +NCJ_STRING, /*NC_STRING*/ }; /* Forward */ @@ -204,8 +243,8 @@ NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp) goto done; content[len] = '\0'; - if((stat = NCJparse(content,0,&json))) - goto done; + if((stat = NCJparse(content,0,&json)) < 0) + {stat = NC_ENCZARR; goto done;} if(jsonp) {*jsonp = json; json = NULL;} @@ -370,28 +409,6 @@ NCZ_readarray(NCZMAP* zmap, const char* key, NCjson** jsonp) return stat; } -/** -@internal Given an nc_type+other, produce the corresponding -zarr type name. -@param nctype - [in] nc_type -@param little - [in] 1=>little, 0 => big -@param namep - [out] pointer to hold pointer to the name -@return NC_NOERR -@author Dennis Heimbigner -*/ - -int -ncz_zarr_type_name(nc_type nctype, int little, const char** znamep) -{ - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; - if(little) { - if(znamep) *znamep = znames_little[nctype]; - } else { - if(znamep) *znamep = znames_big[nctype]; - } - return NC_NOERR; -} - #if 0 /** @internal Given an nc_type, produce the corresponding @@ -405,7 +422,7 @@ default fill value as a string. int ncz_default_fill_value(nc_type nctype, const char** dfaltp) { - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; + if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; if(dfaltp) *dfaltp = zfillvalue[nctype]; return NC_NOERR; } @@ -423,7 +440,7 @@ fill value JSON type int ncz_fill_value_sort(nc_type nctype, int* sortp) { - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; + if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; if(sortp) *sortp = zjsonsort[nctype]; return NC_NOERR; } @@ -486,57 +503,123 @@ NCZ_subobjects(NCZMAP* map, const char* prefix, const char* tag, char dimsep, NC return stat; } +#if 0 +/* Convert a netcdf-4 type integer */ int -ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep) +ncz_nctypedecode(const char* snctype, nc_type* nctypep) { unsigned nctype = 0; if(sscanf(snctype,"%u",&nctype)!=1) return NC_EINVAL; if(nctypep) *nctypep = nctype; return NC_NOERR; } +#endif + +/** +@internal Given an nc_type+other, produce the corresponding dtype string. +@param nctype - [in] nc_type +@param endianness - [in] endianness +@param purezarr - [in] 1=>pure zarr, 0 => nczarr +@param strlen - [in] max string length +@param namep - [out] pointer to hold pointer to the dtype; user frees +@return NC_NOERR +@return NC_EINVAL +@author Dennis Heimbigner +*/ + +int +ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr, int strlen, char** dnamep) +{ + char dname[64]; + char* format = NULL; + + if(nctype <= NC_NAT || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; + if(purezarr) + format = znames[nctype].zarr[endianness]; + else + format = znames[nctype].nczarr[endianness]; + snprintf(dname,sizeof(dname),format,strlen); + if(dnamep) *dnamep = strdup(dname); + return NC_NOERR; +} + +/* +@internal Convert a numcodecs dtype spec to a corresponding nc_type. +@param nctype - [in] dtype the dtype to convert +@param nctype - [in] typehint help disambiguate char vs string +@param purezarr - [in] 1=>pure zarr, 0 => nczarr +@param nctypep - [out] hold corresponding type +@param endianp - [out] hold corresponding endianness +@param typelenp - [out] hold corresponding type size (for fixed length strings) +@return NC_NOERR +@return NC_EINVAL +@author Dennis Heimbigner +*/ int -ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) +ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp) { int stat = NC_NOERR; int typelen = 0; int count; char tchar; nc_type nctype = NC_NAT; - int endianness = 0; + int endianness = -1; + const char* p; + int n; - if(endianness) *endianp = NC_ENDIAN_NATIVE; + if(endianp) *endianp = NC_ENDIAN_NATIVE; if(nctypep) *nctypep = NC_NAT; if(dtype == NULL) goto zerr; - if(strlen(dtype) < 3) goto zerr; - switch (dtype[0]) { + p = dtype; + switch (*p++) { case '<': endianness = NC_ENDIAN_LITTLE; break; case '>': endianness = NC_ENDIAN_BIG; break; + case '=': endianness = NC_ENDIAN_NATIVE; break; case '|': endianness = NC_ENDIAN_NATIVE; break; - default: goto zerr; + default: p--; endianness = NC_ENDIAN_NATIVE; break; } + tchar = *p++; /* get the base type */ /* Decode the type length */ - count = sscanf(dtype+2,"%d",&typelen); - if(count != 1) goto zerr; - tchar = dtype[1]; - switch(typelen) { - case 1: - switch (tchar) { - case 'i': nctype = NC_BYTE; break; - case 'u': nctype = NC_UBYTE; break; - case 'U': nctype = NC_CHAR; break; - default: goto zerr; + count = sscanf(p,"%d%n",&typelen,&n); + if(count == 0) goto zerr; + p += n; + + /* Short circuit fixed length strings */ + if(tchar == 'S') { + /* Fixed length string */ + switch (typehint) { + case NC_CHAR: nctype = NC_CHAR; typelen = 1; break; + case NC_STRING: nctype = NC_STRING; break; + default: + if(typelen == 1) {/* so |S1 => NC_CHAR */ + if(purezarr || endianness == NC_ENDIAN_NATIVE) nctype = NC_CHAR; + } else + nctype = NC_STRING; } - break; - case 2: +#if 0 + } else if(tchar == 'U') {/*back compatibility*/ + if(purezarr || typelen != 1) goto zerr; + nctype = NC_CHAR; +#endif + } else { + switch(typelen) { + case 1: + switch (tchar) { + case 'i': nctype = NC_BYTE; break; + case 'u': nctype = NC_UBYTE; break; + default: goto zerr; + } + break; + case 2: switch (tchar) { case 'i': nctype = NC_SHORT; break; case 'u': nctype = NC_USHORT; break; default: goto zerr; } break; - case 4: + case 4: switch (tchar) { case 'i': nctype = NC_INT; break; case 'u': nctype = NC_UINT; break; @@ -544,7 +627,7 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) default: goto zerr; } break; - case 8: + case 8: switch (tchar) { case 'i': nctype = NC_INT64; break; case 'u': nctype = NC_UINT64; break; @@ -552,10 +635,16 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) default: goto zerr; } break; - default: goto zerr; + default: goto zerr; + } } + /* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */ + if(endianness == NC_ENDIAN_NATIVE) + endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + if(nctypep) *nctypep = nctype; + if(typelenp) *typelenp = typelen; if(endianp) *endianp = endianness; done: @@ -565,7 +654,104 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) goto done; } +/* Infer the attribute's type based +primarily on the first atomic value encountered +recursively. +*/ +int +NCZ_inferattrtype(NCjson* value, nc_type typehint, nc_type* typeidp) +{ + int i,stat = NC_NOERR; + nc_type typeid; + NCjson* j = NULL; + unsigned long long u64; + long long i64; + int negative = 0; + + if(NCJsort(value) == NCJ_ARRAY && NCJlength(value) == 0) + {typeid = NC_NAT; goto done;} /* Empty array is illegal */ + + if(NCJsort(value) == NCJ_NULL) + {typeid = NC_NAT; goto done;} /* NULL is also illegal */ + + if(NCJsort(value) == NCJ_DICT) /* Complex JSON expr -- a dictionary */ + {typeid = NC_NAT; goto done;} + + /* If an array, make sure all the elements are simple */ + if(value->sort == NCJ_ARRAY) { + for(i=0;isort == NCJ_ARRAY) { + j=NCJith(value,0); + return NCZ_inferattrtype(j,typehint,typeidp); + } + + /* At this point, value is a primitive JSON Value */ + + switch (NCJsort(value)) { + case NCJ_NULL: + typeid = NC_NAT; + return NC_NOERR; + case NCJ_DICT: + typeid = NC_CHAR; + goto done; + case NCJ_UNDEF: + return NC_EINVAL; + default: /* atomic */ + break; + } + + if(NCJstring(value) != NULL) + negative = (NCJstring(value)[0] == '-'); + switch (value->sort) { + case NCJ_INT: + if(negative) { + sscanf(NCJstring(value),"%lld",&i64); + u64 = (unsigned long long)i64; + } else + sscanf(NCJstring(value),"%llu",&u64); + typeid = NCZ_inferinttype(u64,negative); + break; + case NCJ_DOUBLE: + typeid = NC_DOUBLE; + break; + case NCJ_BOOLEAN: + typeid = NC_UBYTE; + break; + case NCJ_STRING: /* requires special handling as an array of characters */ + typeid = NC_CHAR; + break; + default: + stat = NC_ENCZARR; + } +done: + if(typeidp) *typeidp = typeid; + return stat; +} +/* Infer the int type from the value; + minimum type will be int. +*/ +int +NCZ_inferinttype(unsigned long long u64, int negative) +{ + long long i64 = (long long)u64; /* keep bit pattern */ + if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64; + if(i64 < 0) { + if(i64 >= NC_MIN_INT) return NC_INT; + return NC_INT64; + } + if(i64 <= NC_MAX_INT) return NC_INT; + if(i64 <= NC_MAX_UINT) return NC_UINT; + return NC_INT64; +} + /** @internal Similar to NCZ_grppath, but using group ids. @param gid - [in] group id @@ -788,3 +974,139 @@ NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp) if(dst) (void)nc_reclaim_data_all(ncid,tid,dst,1); return stat; } + + +/* Get max str len for a variable or grp */ +/* Has side effect of setting values in the + internal data structures */ +int +NCZ_get_maxstrlen(NC_OBJ* obj) +{ + int maxstrlen = 0; + assert(obj->sort == NCGRP || obj->sort == NCVAR); + if(obj->sort == NCGRP) { + NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)obj; + NC_FILE_INFO_T* file = grp->nc4_info; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + if(zfile->default_maxstrlen == 0) + zfile->default_maxstrlen = NCZ_MAXSTR_DEFAULT; + maxstrlen = zfile->default_maxstrlen; + } else { /*(obj->sort == NCVAR)*/ + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)obj; + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + if(zvar->maxstrlen == 0) + zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var->container); + maxstrlen = zvar->maxstrlen; + } + return maxstrlen; +} + +int +NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen) +{ + size_t i; + unsigned char* sp = NULL; + const unsigned char* p = fixed; + memset((void*)charp,0,sizeof(char*)*count); + for(i=0;i maxstrlen) len = maxstrlen; + memcpy(p,charp[i],len); + } else { + memset(p,'\0',maxstrlen); + } + } + return NC_NOERR; +} + +/* +Wrap NC_copy_data, but take string value into account when overwriting +*/ +int +NCZ_copy_data(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* xtype, const void* memory, size_t count, int noclear, void* copy) +{ + if(xtype->hdr.id == NC_STRING && !noclear) { + size_t i; + char** scopy = (char**)copy; + /* Reclaim any string fill values in copy */ + for(i=0;icontroller->ext_ncid,xtype->hdr.id,memory,count,copy); +} + +#if 0 +/* Recursive helper */ +static int +checksimplejson(NCjson* json, int depth) +{ + int i; + + switch (NCJsort(json)) { + case NCJ_ARRAY: + if(depth > 0) return 0; /* e.g. [...,[...],...] or [...,{...},...] */ + for(i=0;i < NCJlength(json);i++) { + NCjson* j = NCJith(json,i); + if(!checksimplejson(j,depth+1)) return 0; + } + break; + case NCJ_DICT: + case NCJ_NULL: + case NCJ_UNDEF: + return 0; + default: break; + } + return 1; +} +#endif + +/* Return 1 if the attribute will be stored as a complex JSON valued attribute; return 0 otherwise */ +int +NCZ_iscomplexjson(NCjson* json, nc_type typehint) +{ + int i, stat = 0; + + switch (NCJsort(json)) { + case NCJ_ARRAY: + /* If the typehint is NC_CHAR, then always treat it as complex */ + if(typehint == NC_CHAR) {stat = 1; goto done;} + /* Otherwise see if it is a simple vector of atomic values */ + for(i=0;i < NCJlength(json);i++) { + NCjson* j = NCJith(json,i); + if(!NCJisatomic(j)) {stat = 1; goto done;} + } + break; + case NCJ_DICT: + case NCJ_NULL: + case NCJ_UNDEF: + stat = 1; goto done; + default: break; + } +done: + return stat; +} diff --git a/libnczarr/zvar.c b/libnczarr/zvar.c index da4ebba6f4..dd6311346b 100644 --- a/libnczarr/zvar.c +++ b/libnczarr/zvar.c @@ -12,6 +12,11 @@ #include "zincludes.h" #include /* For pow() used below. */ +/* Mnemonics */ +#define CREATE 0 +#define NOCREATE 1 + + #ifdef LOGGING static void reportchunking(const char* title, NC_VAR_INFO_T* var) @@ -107,10 +112,7 @@ ncz_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var) double total_chunk_size; #endif - if (var->type_info->nc_type_class == NC_STRING) - type_size = sizeof(char *); - else - type_size = var->type_info->size; + type_size = var->type_info->size; #ifdef LOGGING /* Later this will become the total number of bytes in the default @@ -406,7 +408,7 @@ var->type_info->rc++; /* Set variables no_fill to match the database default unless the * variable type is variable length (NC_STRING or NC_VLEN) or is * user-defined type. */ - if (var->type_info->nc_type_class < NC_STRING) + if (var->type_info->nc_type_class <= NC_STRING) var->no_fill = (h5->fill_mode == NC_NOFILL); /* Assign dimensions to the variable. At the same time, check to @@ -473,7 +475,6 @@ var->type_info->rc++; if (type) if ((retval = nc4_type_free(type))) BAILLOG(retval); - return ZUNTRACE(retval); } diff --git a/libnczarr/zwalk.c b/libnczarr/zwalk.c index 87f490c592..70d879a493 100644 --- a/libnczarr/zwalk.c +++ b/libnczarr/zwalk.c @@ -128,10 +128,16 @@ NCZ_transferslice(NC_VAR_INFO_T* var, int reading, common.swap = (zfile->native_endianness == var->endianness ? 0 : 1); common.chunkcount = 1; - for(r=0;rchunksizes[0]; + slices[0].start = 0; + slices[0].stride = 1; + slices[0].stop = 0; + slices[0].len = 1; + common.chunkcount = 1; + memshape[0] = 1; + } else for(r=0;rdim[r]->len; chunklens[r] = var->chunksizes[r]; slices[r].start = start[r]; @@ -221,7 +227,7 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) if((stat=wholechunk_indices(common,slices,chunkindices))) goto done; if(wdebug >= 1) fprintf(stderr,"case: wholechunk: chunkindices: %s\n",nczprint_vector(common->rank,chunkindices)); - /* Read the chunk */ + /* Read the chunk; handles fixed vs char* strings*/ switch ((stat = common->reader.read(common->reader.source, chunkindices, &chunkdata))) { case NC_EEMPTY: /* cache created the chunk */ break; @@ -232,9 +238,9 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) memptr = ((unsigned char*)common->memory); slpptr = ((unsigned char*)chunkdata); if(common->reading) { - memcpy(memptr,slpptr,common->chunkcount*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr,common->chunkcount,!ZCLEAR,memptr))) goto done; } else { - memcpy(slpptr,memptr,common->chunkcount*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr,common->chunkcount,ZCLEAR,slpptr))) goto done; } // transfern(common,slpptr,memptr,common->chunkcount,1,chunkdata); if(zutest && zutest->tests & UTEST_WHOLECHUNK) @@ -410,15 +416,16 @@ NCZ_walk(NCZProjection** projv, NCZOdometer* chunkodom, NCZOdometer* slpodom, NC if(slpavail > 0) { if(wdebug > 0) wdebug2(common,slpptr0,memptr0,slpavail,laststride,chunkdata); if(common->reading) { - memcpy(memptr0,slpptr0,slpavail*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr0,slpavail,!ZCLEAR,memptr0))) goto done; } else { - memcpy(slpptr0,memptr0,slpavail*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr0,slpavail,ZCLEAR,slpptr0))) goto done; } } // if((stat = transfern(common,slpptr0,memptr0,avail,nczodom_laststride(slpodom),chunkdata)))goto done; nczodom_next(memodom); nczodom_next(slpodom); } +done: return stat; } @@ -734,10 +741,11 @@ NCZ_transferscalar(struct Common* common) /* Figure out memory address */ memptr = ((unsigned char*)common->memory); slpptr = ((unsigned char*)chunkdata); - if(common->reading) - memcpy(memptr,slpptr,common->chunkcount*common->typesize); - else - memcpy(slpptr,memptr,common->chunkcount*common->typesize); + if(common->reading) { + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr,common->chunkcount,!ZCLEAR,memptr))) goto done; + } else { + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr,common->chunkcount,ZCLEAR,slpptr))) goto done; + } done: return stat; @@ -749,7 +757,7 @@ NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata) { int stat = NC_NOERR; NC_VAR_INFO_T* var = NULL; - NCZ_VAR_INFO_T* zvar; + NCZ_VAR_INFO_T* zvar = NULL; struct NCZChunkCache* cache = NULL; void* cachedata = NULL; @@ -759,8 +767,9 @@ NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata) cache = zvar->cache; if((stat = NCZ_read_cache_chunk(cache,zindices,&cachedata))) goto done; - if(chunkdata) - memcpy(chunkdata,cachedata,cache->chunksize); + if(chunkdata) { + if((stat = nc_copy_data(ncid,var->type_info->hdr.id,cachedata,cache->chunkcount,chunkdata))) goto done; + } done: return stat; diff --git a/libnczarr/zxcache.c b/libnczarr/zxcache.c index 52f8abf2ef..374890be68 100644 --- a/libnczarr/zxcache.c +++ b/libnczarr/zxcache.c @@ -202,9 +202,14 @@ NCZ_create_chunk_cache(NC_VAR_INFO_T* var, size64_t chunksize, char dimsep, NCZC } static void -free_cache_entry(NCZCacheEntry* entry) +free_cache_entry(NCZChunkCache* cache, NCZCacheEntry* entry) { if(entry) { + int tid = cache->var->type_info->hdr.id; + if(tid == NC_STRING && !entry->isfixedstring) { + int ncid = cache->var->container->nc4_info->controller->ext_ncid; + nc_reclaim_data(ncid,tid,entry->data,cache->chunkcount); + } nullfree(entry->data); nullfree(entry->key.varkey); nullfree(entry->key.chunkkey); @@ -225,7 +230,7 @@ NCZ_free_chunk_cache(NCZChunkCache* cache) NCZCacheEntry* entry = nclistremove(cache->mru,0); (void)ncxcacheremove(cache->xcache,entry->hashkey,&ptr); assert(ptr == entry); - free_cache_entry(entry); + free_cache_entry(cache,entry); } #ifdef DEBUG fprintf(stderr,"|cache.free|=%ld\n",nclistlength(cache->mru)); @@ -303,7 +308,7 @@ fprintf(stderr,"|cache.read.lru|=%ld\n",nclistlength(cache->mru)); done: if(created && stat == NC_NOERR) stat = NC_EEMPTY; /* tell upper layers */ - if(entry) free_cache_entry(entry); + if(entry) free_cache_entry(cache,entry); return THROW(stat); } @@ -343,7 +348,7 @@ fprintf(stderr,"|cache.write|=%ld\n",nclistlength(cache->mru)); if((stat=makeroom(cache))) goto done; done: - if(entry) free_cache_entry(entry); + if(entry) free_cache_entry(cache,entry); return THROW(stat); } #endif @@ -449,6 +454,7 @@ NCZ_ensure_fill_chunk(NCZChunkCache* cache) { int i, stat = NC_NOERR; NC_VAR_INFO_T* var = cache->var; + nc_type typeid = var->type_info->hdr.id; size_t typesize = var->type_info->size; if(cache->fillchunk) goto done; @@ -461,6 +467,11 @@ NCZ_ensure_fill_chunk(NCZChunkCache* cache) goto done; } if((stat = NCZ_ensure_fill_value(var))) goto done; + if(typeid == NC_STRING) { + char* src = *((char**)(var->fill_value)); + char** dst = (char**)(cache->fillchunk); + for(i=0;ichunkcount;i++) dst[i] = strdup(src); + } else switch (typesize) { case 1: { unsigned char c = *((unsigned char*)var->fill_value); @@ -597,6 +608,9 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) NCZ_FILE_INFO_T* zfile = NULL; NCZMAP* map = NULL; char* path = NULL; + nc_type tid = NC_NAT; + void* strchunk = NULL; + int ncid = 0; ZTRACE(5,"cache.var=%s entry.key=%s",cache->var->hdr.name,entry->key); LOG((3, "%s: var: %p", __func__, cache->var)); @@ -605,6 +619,26 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) zfile = file->format_file_info; map = zfile->map; + /* Collect some info */ + ncid = file->controller->ext_ncid; + tid = cache->var->type_info->hdr.id; + + if(tid == NC_STRING && !entry->isfixedstring) { + /* Convert from char* to char[strlen] format */ + int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); + assert(maxstrlen > 0); + if((strchunk = malloc(cache->chunkcount*maxstrlen))==NULL) {stat = NC_ENOMEM; goto done;} + /* copy char* to char[] format */ + if((stat = NCZ_char2fixed((const char**)entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done; + /* Reclaim the old chunk */ + if((stat = nc_reclaim_data_all(ncid,tid,entry->data,cache->chunkcount))) goto done; + entry->data = NULL; + entry->data = strchunk; strchunk = NULL; + entry->size = cache->chunkcount * maxstrlen; + entry->isfixedstring = 1; + } + + #ifdef ENABLE_NCZARR_FILTERS /* Make sure the entry is in filtered state */ if(!entry->isfiltered) { @@ -636,6 +670,7 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) default: goto done; } done: + nullfree(strchunk); nullfree(path); return ZUNTRACE(stat); } @@ -657,9 +692,12 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) NCZMAP* map = NULL; NC_FILE_INFO_T* file = NULL; NCZ_FILE_INFO_T* zfile = NULL; + NC_TYPE_INFO_T* xtype = NULL; + char** strchunk = NULL; size64_t size; int empty = 0; char* path = NULL; + int tid; ZTRACE(5,"cache.var=%s entry.key=%s sep=%d",cache->var->hdr.name,entry->key,cache->dimension_separator); @@ -670,22 +708,24 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) map = zfile->map; assert(map); + /* Collect some info */ + xtype = cache->var->type_info; + tid = xtype->hdr.id; + /* get size of the "raw" data on "disk" */ path = NCZ_chunkpath(entry->key); stat = nczmap_len(map,path,&size); nullfree(path); path = NULL; switch(stat) { - case NC_NOERR: break; + case NC_NOERR: entry->size = size; break; case NC_EEMPTY: empty = 1; stat = NC_NOERR; break; default: goto done; } if(!empty) { /* Make sure we have a place to read it */ - entry->size = size; - entry->isfiltered = FILTERED(cache); /* Is the data being read filtered? */ - if((entry->data = (void*)malloc(entry->size)) == NULL) - {stat = NC_ENOMEM; goto done;} + if((entry->data = (void*)calloc(1,entry->size)) == NULL) + {stat = NC_ENOMEM; goto done;} /* Read the raw data */ path = NCZ_chunkpath(entry->key); stat = nczmap_read(map,path,0,entry->size,(char*)entry->data); @@ -695,27 +735,32 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) case NC_EEMPTY: empty = 1; stat = NC_NOERR;break; default: goto done; } + entry->isfiltered = FILTERED(cache); /* Is the data being read filtered? */ + if(tid == NC_STRING) + entry->isfixedstring = 1; /* fill cache is in char[maxstrlen] format */ } if(empty) { /* fake the chunk */ entry->modified = (file->no_write?0:1); entry->size = cache->chunksize; - if((entry->data = (void*)malloc(entry->size)) == NULL) - {stat = NC_ENOMEM; goto done;} + entry->data = NULL; + entry->isfixedstring = 0; + entry->isfiltered = 0; /* apply fill value */ if(cache->fillchunk == NULL) {if((stat = NCZ_ensure_fill_chunk(cache))) goto done;} - memcpy(entry->data,cache->fillchunk,entry->size); - entry->isfiltered = 0; + if((entry->data = calloc(1,entry->size))==NULL) {stat = NC_ENOMEM; goto done;} + if((stat = NCZ_copy_data(file,xtype,cache->fillchunk,cache->chunkcount,!ZCLEAR,entry->data))) goto done; stat = NC_NOERR; } #ifdef ENABLE_NCZARR_FILTERS /* Make sure the entry is in unfiltered state */ - if(entry->isfiltered) { + if(!empty && entry->isfiltered) { NC_VAR_INFO_T* var = cache->var; void* unfiltered = NULL; /* pointer to the unfiltered data */ void* filtered = NULL; /* pointer to the filtered data */ size_t unflen; /* length of unfiltered data */ + assert(tid != NC_STRING || entry->isfixedstring); /* Get the filter chain to apply */ NClist* filterchain = (NClist*)var->filters; if(nclistlength(filterchain) == 0) {stat = NC_EFILTER; goto done;} @@ -730,7 +775,24 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) } #endif + if(tid == NC_STRING && entry->isfixedstring) { + /* Convert from char[strlen] to char* format */ + int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); + assert(maxstrlen > 0); + /* copy char[] to char* format */ + if((strchunk = (char**)malloc(sizeof(char*)*cache->chunkcount))==NULL) + {stat = NC_ENOMEM; goto done;} + if((stat = NCZ_fixed2char(entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done; + /* Reclaim the old chunk */ + nullfree(entry->data); + entry->data = NULL; + entry->data = strchunk; strchunk = NULL; + entry->size = cache->chunkcount * sizeof(char*); + entry->isfixedstring = 0; + } + done: + nullfree(strchunk); nullfree(path); return ZUNTRACE(stat); } diff --git a/libsrc4/nc4internal.c b/libsrc4/nc4internal.c index a4ead8f80f..bcb1d9af30 100644 --- a/libsrc4/nc4internal.c +++ b/libsrc4/nc4internal.c @@ -35,22 +35,24 @@ */ /** @internal List of reserved attributes. - WARNING: This list must be in sorted order for binary search. */ + WARNING: This list must be in (strcmp) sorted order for binary search. */ static const NC_reservedatt NC_reserved[] = { {NC_ATT_CLASS, READONLYFLAG|HIDDENATTRFLAG}, /*CLASS*/ {NC_ATT_DIMENSION_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*DIMENSION_LIST*/ {NC_ATT_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*NAME*/ {NC_ATT_REFERENCE_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*REFERENCE_LIST*/ - {NC_XARRAY_DIMS, READONLYFLAG|HIDDENATTRFLAG}, /*_ARRAY_DIMENSIONS*/ - {NC_ATT_CODECS, VARFLAG|READONLYFLAG|NAMEONLYFLAG|HIDDENATTRFLAG}, /*_Codecs*/ + {NC_XARRAY_DIMS, READONLYFLAG|NAMEONLYFLAG|HIDDENATTRFLAG}, /*_ARRAY_DIMENSIONS*/ + {NC_ATT_CODECS, VARFLAG|READONLYFLAG|NAMEONLYFLAG}, /*_Codecs*/ {NC_ATT_FORMAT, READONLYFLAG}, /*_Format*/ {ISNETCDF4ATT, READONLYFLAG|NAMEONLYFLAG}, /*_IsNetcdf4*/ - {NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/ - {NC_NCZARR_ATTR, READONLYFLAG|HIDDENATTRFLAG}, /*_NCZARR_ATTR*/ - {NC_ATT_COORDINATES, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Coordinates*/ - {NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Dimid*/ + {NCPROPS,READONLYFLAG|NAMEONLYFLAG|HIDDENATTRFLAG}, /*_NCProperties*/ + {NC_NCZARR_ATTR_UC, READONLYFLAG|HIDDENATTRFLAG}, /*_NCZARR_ATTR */ + {NC_ATT_COORDINATES, READONLYFLAG|HIDDENATTRFLAG}, /*_Netcdf4Coordinates*/ + {NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*_Netcdf4Dimid*/ {SUPERBLOCKATT, READONLYFLAG|NAMEONLYFLAG}, /*_SuperblockVersion*/ - {NC_ATT_NC3_STRICT_NAME, READONLYFLAG|MATERIALIZEDFLAG}, /*_nc3_strict*/ + {NC_ATT_NC3_STRICT_NAME, READONLYFLAG}, /*_nc3_strict*/ + {NC_ATT_NC3_STRICT_NAME, READONLYFLAG}, /*_nc3_strict*/ + {NC_NCZARR_ATTR, READONLYFLAG|HIDDENATTRFLAG}, /*_nczarr_attr */ }; #define NRESERVED (sizeof(NC_reserved) / sizeof(NC_reservedatt)) /*|NC_reservedatt|*/ @@ -1592,7 +1594,7 @@ nc4_rec_grp_del_att_data(NC_GRP_INFO_T *grp) LOG((3, "%s: grp->name %s", __func__, grp->hdr.name)); /* Recursively call this function for each child, if any, stopping - * if there is an error. */ + * if there is an error. */ for (i = 0; i < ncindexsize(grp->children); i++) if ((retval = nc4_rec_grp_del_att_data((NC_GRP_INFO_T *)ncindexith(grp->children, i)))) return retval; diff --git a/nc_test/CMakeLists.txt b/nc_test/CMakeLists.txt index 96c7b6237a..61bff28074 100644 --- a/nc_test/CMakeLists.txt +++ b/nc_test/CMakeLists.txt @@ -107,8 +107,10 @@ IF(BUILD_UTILITIES) ENDIF() IF(ENABLE_BYTERANGE) + IF(ENABLE_EXTERNAL_SERVER_TESTS) build_bin_test_no_prefix(tst_byterange) add_sh_test(nc_test test_byterange) + ENDIF() ENDIF() IF(BUILD_MMAP) diff --git a/nc_test/Makefile.am b/nc_test/Makefile.am index c72bd23002..e7e7e5dfc0 100644 --- a/nc_test/Makefile.am +++ b/nc_test/Makefile.am @@ -75,26 +75,27 @@ endif TESTS = $(TESTPROGRAMS) -if ENABLE_BYTERANGE if BUILD_UTILITIES + +if ENABLE_BYTERANGE +if ENABLE_EXTERNAL_SERVER_TESTS tst_byterange_SOURCES = tst_byterange.c check_PROGRAMS += tst_byterange TESTS += test_byterange.sh endif endif -if BUILD_UTILITIES TESTS += run_diskless.sh run_diskless5.sh run_inmemory.sh if LARGE_FILE_TESTS if ! ENABLE_PARALLEL TESTS += run_diskless2.sh endif endif + if BUILD_MMAP TESTS += run_mmap.sh run_mmap.log: run_diskless.log endif -endif # If pnetcdf is enabled, these tests are run by a test # script. Otherwise, the are run by automake in the usual way. @@ -104,6 +105,8 @@ else TESTS += t_nc tst_atts3 tst_nofill nc_test tst_small endif +endif # BUILD_UTILITIES + # The .c files that are generated with m4 are already distributed, but # we also include the original m4 files, plus test scripts data. EXTRA_DIST = test_get.m4 test_put.m4 run_diskless.sh run_diskless2.sh \ diff --git a/nc_test/test_byterange.sh b/nc_test/test_byterange.sh index 28b5e6374d..8c13cd3480 100755 --- a/nc_test/test_byterange.sh +++ b/nc_test/test_byterange.sh @@ -5,11 +5,14 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi set -e +# Note: thredds-test is currently down and the URLs need to be replaced + # Test Urls +if test "x$FEATURE_THREDDSTEST" = x1 ; then URL3="https://thredds-test.unidata.ucar.edu/thredds/fileServer/pointData/cf_dsg/example/point.nc#mode=bytes&aws.profile=none" -#URL3a="https://remotetest.unidata.ucar.edu/thredds/fileServer/testdata/2004050300_eta_211.nc#bytes&aws.profile=none" -URL4a="https://s3.us-east-1.amazonaws.com/noaa-goes16/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes&aws.profile=none" URL4b="https://thredds-test.unidata.ucar.edu/thredds/fileServer/irma/metar/files/METAR_20170910_0000.nc#bytes&aws.profile=none" +fi +URL4a="https://s3.us-east-1.amazonaws.com/noaa-goes16/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes&aws.profile=none" URL4c="s3://noaa-goes16/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes&aws.profile=none" # Requires auth URL3b="s3://unidata-zarr-test-data/byterangefiles/upload3.nc#bytes&aws.profile=unidata" @@ -81,12 +84,16 @@ testsetup https://s3.us-east-1.amazonaws.com/unidata-zarr-test-data echo "*** Testing reading NetCDF-3 file with http" -echo "***Test remote classic file" -testbytes nc3 classic "$URL3" +if test "x$FEATURE_THREDDSTEST" = x1 ; then + echo "***Test remote classic file" + testbytes nc3 classic "$URL3" +fi if test "x$FEATURE_HDF5" = xyes ; then - echo "***Test remote netcdf-4 file: non-s3" - testbytes nc4b netCDF-4 "$URL4b" + if test "x$FEATURE_THREDDSTEST" = x1 ; then + echo "***Test remote netcdf-4 file: non-s3" + testbytes nc4b netCDF-4 "$URL4b" + fi fi if test "x$FEATURE_S3TESTS" = xyes && test "x$FEATURE_HDF5" = xyes ; then echo "***Test remote netdf-4 file: s3" diff --git a/nc_test4/tst_specific_filters.sh b/nc_test4/tst_specific_filters.sh index 64074b04c5..43235d2869 100755 --- a/nc_test4/tst_specific_filters.sh +++ b/nc_test4/tst_specific_filters.sh @@ -8,7 +8,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh -set -x set -e if test "x$TESTNCZARR" = x1 ; then diff --git a/ncdap_test/CMakeLists.txt b/ncdap_test/CMakeLists.txt index 8c4f1af31e..ed07697e78 100644 --- a/ncdap_test/CMakeLists.txt +++ b/ncdap_test/CMakeLists.txt @@ -52,14 +52,14 @@ IF(ENABLE_TESTS) # not yet add_sh_test(ncdap tst_hyrax) add_sh_test(ncdap tst_fillmismatch) IF(ENABLE_DAP_LONG_TESTS) - add_sh_test(ncdap tst_longremote3) - add_bin_test(ncdap test_manyurls) - SET_TESTS_PROPERTIES(ncdap_tst_longremote3 ncdap_test_manyurls PROPERTIES RUN_SERIAL TRUE) + add_sh_test(ncdap tst_longremote3) + SET_TESTS_PROPERTIES(ncdap_tst_longremote3 PROPERTIES RUN_SERIAL TRUE) ENDIF(ENABLE_DAP_LONG_TESTS) - - - ENDIF(BUILD_UTILITIES) + IF(ENABLE_EXTERNAL_SERVER_TESTS) + add_bin_test(ncdap test_manyurls) + SET_TESTS_PROPERTIES(ncdap_test_manyurls PROPERTIES RUN_SERIAL TRUE) + ENDIF() IF(ENABLE_DAP_AUTH_TESTS) ##obsolete add_bin_test(ncdap t_auth) diff --git a/ncdap_test/Makefile.am b/ncdap_test/Makefile.am index 8cd39e51a9..3d045b5acd 100644 --- a/ncdap_test/Makefile.am +++ b/ncdap_test/Makefile.am @@ -52,10 +52,12 @@ TESTS += test_partvar if ENABLE_DAP_LONG_TESTS TESTS += tst_longremote3.sh tst_longremote3.log: tst_remote3.log +if ENABLE_EXTERNAL_SERVER_TESTS test_manyurls_SOURCES = test_manyurls.c manyurls.h check_PROGRAMS += test_manyurls - TESTS += test_manyurls test_manyurls.log: tst_longremote3.log + TESTS += test_manyurls +endif endif test_partvar_SOURCES = test_partvar.c diff --git a/ncgen/Makefile.am b/ncgen/Makefile.am index 5beb613cb9..49a3d64976 100644 --- a/ncgen/Makefile.am +++ b/ncgen/Makefile.am @@ -61,7 +61,7 @@ CLEANFILES = c0.nc c0_64.nc c0_4.nc c0_4c.nc ref_camrun.c \ makeparser:: rm -f ncgenl.c lex.ncg.c - flex -L -Pncg -8 ncgen.l + flex -Pncg -8 ncgen.l sed -e s/lex.ncg.c/ncgenl.c/g ncgenl.c bison -pncg -t -d ncgen.y rm -f ncgeny.c ncgeny.h diff --git a/ncgen/bindata.c b/ncgen/bindata.c index 4ed143f3a5..49ead4dfca 100644 --- a/ncgen/bindata.c +++ b/ncgen/bindata.c @@ -422,6 +422,7 @@ bin_generate_data_r(NCConstant* instance, Symbol* tsym, Datalist* fillvalue, Byt return stat; } +#if 0 /** Internal equivalent of ncaux_reclaim_data. */ @@ -591,5 +592,8 @@ bin_reclaim_compound(Symbol* tsym, Reclaim* reclaimer) } #endif /*USE_NETCDF4*/ +#endif /*0*/ + + #endif /*ENABLE_BINARY*/ diff --git a/ncgen/ncgen.l b/ncgen/ncgen.l index 9eeba19dab..c46fb38634 100644 --- a/ncgen/ncgen.l +++ b/ncgen/ncgen.l @@ -528,7 +528,7 @@ done: return 0; } return lexdebug(FLOAT_CONST); } -\'[^\\]\' { +\'[^\\]\' { (void) sscanf((char*)&yytext[1],"%c",&byte_val); return lexdebug(BYTE_CONST); } @@ -560,8 +560,9 @@ done: return 0; case 'r': byte_val = '\r'; break; case 't': byte_val = '\t'; break; case 'v': byte_val = '\v'; break; + case '0' : byte_val = '\0'; break; case '\\': byte_val = '\\'; break; - case '?': byte_val = '\177'; break; + case '?': byte_val = '\177'; break; case '\'': byte_val = '\''; break; default: byte_val = (char)yytext[2]; } diff --git a/ncgen/ncgenl.c b/ncgen/ncgenl.c index 74fe134000..6ea3a2270e 100644 --- a/ncgen/ncgenl.c +++ b/ncgen/ncgenl.c @@ -1,4 +1,6 @@ +#line 3 "ncgenl.c" + #define YY_INT_ALIGNED short int /* A lexical scanner generated by flex */ @@ -1582,6 +1584,8 @@ int yy_flex_debug = 0; #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET char *yytext; +#line 1 "ncgen.l" +#line 2 "ncgen.l" /********************************************************************* * Copyright 1993, UCAR/Unidata * See netcdf/COPYRIGHT file for copying and redistribution conditions. @@ -1727,6 +1731,8 @@ struct Specialtoken specials[] = { {NULL,0} /* null terminate */ }; +#line 1735 "ncgenl.c" + /* The most correct (validating) version of UTF8 character set (Taken from: http://www.w3.org/2005/03/23-lex-U) @@ -1768,6 +1774,7 @@ ID ([A-Za-z_]|{UTF8})([A-Z.@#\[\]a-z_0-9+-]|{UTF8})* /* Note: this definition of string will work for utf8 as well, although it is a very relaxed definition */ +#line 1778 "ncgenl.c" #define INITIAL 0 #define ST_C_COMMENT 1 @@ -1986,6 +1993,9 @@ YY_DECL } { +#line 226 "ncgen.l" + +#line 1999 "ncgenl.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -2044,12 +2054,14 @@ YY_DECL case 1: YY_RULE_SETUP +#line 227 "ncgen.l" { /* whitespace */ break; } YY_BREAK case 2: YY_RULE_SETUP +#line 231 "ncgen.l" { /* comment */ break; } @@ -2057,6 +2069,7 @@ YY_RULE_SETUP case 3: /* rule 3 can match eol */ YY_RULE_SETUP +#line 235 "ncgen.l" {int len; char* s = NULL; /* In netcdf4, this will be used in a variety of places, so only remove escapes */ @@ -2080,6 +2093,7 @@ yytext[MAXTRST-1] = '\0'; YY_BREAK case 4: YY_RULE_SETUP +#line 256 "ncgen.l" { /* drop leading 0x; pad to even number of chars */ char* p = yytext+2; int len = yyleng - 2; @@ -2094,115 +2108,143 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP +#line 268 "ncgen.l" {return lexdebug(COMPOUND);} YY_BREAK case 6: YY_RULE_SETUP +#line 269 "ncgen.l" {return lexdebug(ENUM);} YY_BREAK case 7: YY_RULE_SETUP +#line 270 "ncgen.l" {return lexdebug(OPAQUE_);} YY_BREAK case 8: YY_RULE_SETUP +#line 272 "ncgen.l" {return lexdebug(FLOAT_K);} YY_BREAK case 9: YY_RULE_SETUP +#line 273 "ncgen.l" {return lexdebug(DOUBLE_K);} YY_BREAK case 10: YY_RULE_SETUP +#line 274 "ncgen.l" {return lexdebug(CHAR_K);} YY_BREAK case 11: YY_RULE_SETUP +#line 275 "ncgen.l" {return lexdebug(BYTE_K);} YY_BREAK case 12: YY_RULE_SETUP +#line 276 "ncgen.l" {return lexdebug(SHORT_K);} YY_BREAK case 13: YY_RULE_SETUP +#line 277 "ncgen.l" {return lexdebug(INT_K);} YY_BREAK case 14: YY_RULE_SETUP +#line 278 "ncgen.l" {return lexdebug(identcheck(UBYTE_K));} YY_BREAK case 15: YY_RULE_SETUP +#line 279 "ncgen.l" {return lexdebug(identcheck(USHORT_K));} YY_BREAK case 16: YY_RULE_SETUP +#line 280 "ncgen.l" {return lexdebug(identcheck(UINT_K));} YY_BREAK case 17: YY_RULE_SETUP +#line 281 "ncgen.l" {return lexdebug(identcheck(INT64_K));} YY_BREAK case 18: YY_RULE_SETUP +#line 282 "ncgen.l" {return lexdebug(identcheck(UINT64_K));} YY_BREAK case 19: YY_RULE_SETUP +#line 283 "ncgen.l" {return lexdebug(identcheck(STRING_K));} YY_BREAK case 20: YY_RULE_SETUP +#line 285 "ncgen.l" {return lexdebug(FLOAT_K);} YY_BREAK case 21: YY_RULE_SETUP +#line 286 "ncgen.l" {return lexdebug(INT_K);} YY_BREAK case 22: YY_RULE_SETUP +#line 287 "ncgen.l" {return lexdebug(INT_K);} YY_BREAK case 23: YY_RULE_SETUP +#line 288 "ncgen.l" {return lexdebug(identcheck(UINT_K));} YY_BREAK case 24: YY_RULE_SETUP +#line 289 "ncgen.l" {return lexdebug(identcheck(UINT_K));} YY_BREAK case 25: YY_RULE_SETUP +#line 292 "ncgen.l" {int32_val = -1; return lexdebug(NC_UNLIMITED_K);} YY_BREAK case 26: YY_RULE_SETUP +#line 295 "ncgen.l" {return lexdebug(TYPES);} YY_BREAK case 27: YY_RULE_SETUP +#line 296 "ncgen.l" {return lexdebug(DIMENSIONS);} YY_BREAK case 28: YY_RULE_SETUP +#line 297 "ncgen.l" {return lexdebug(VARIABLES);} YY_BREAK case 29: YY_RULE_SETUP +#line 298 "ncgen.l" {return lexdebug(DATA);} YY_BREAK case 30: YY_RULE_SETUP +#line 299 "ncgen.l" {return lexdebug(GROUP);} YY_BREAK case 31: YY_RULE_SETUP +#line 301 "ncgen.l" {BEGIN(TEXT);return lexdebug(NETCDF);} YY_BREAK case 32: YY_RULE_SETUP +#line 303 "ncgen.l" { /* missing value (pre-2.4 backward compatibility) */ if (yytext[0] == '-') { double_val = -INFINITY; @@ -2215,6 +2257,7 @@ YY_RULE_SETUP YY_BREAK case 33: YY_RULE_SETUP +#line 312 "ncgen.l" { /* missing value (pre-2.4 backward compatibility) */ double_val = NAN; specialconstants = 1; @@ -2223,6 +2266,7 @@ YY_RULE_SETUP YY_BREAK case 34: YY_RULE_SETUP +#line 318 "ncgen.l" {/* missing value (pre-2.4 backward compatibility)*/ if (yytext[0] == '-') { float_val = -INFINITYF; @@ -2235,6 +2279,7 @@ YY_RULE_SETUP YY_BREAK case 35: YY_RULE_SETUP +#line 327 "ncgen.l" { /* missing value (pre-2.4 backward compatibility) */ float_val = NANF; specialconstants = 1; @@ -2243,6 +2288,7 @@ YY_RULE_SETUP YY_BREAK case 36: YY_RULE_SETUP +#line 333 "ncgen.l" { #ifdef USE_NETCDF4 if(l_flag == L_C || l_flag == L_BINARY) @@ -2255,6 +2301,7 @@ YY_RULE_SETUP YY_BREAK case 37: YY_RULE_SETUP +#line 343 "ncgen.l" { bbClear(lextext); bbAppendn(lextext,(char*)yytext,yyleng+1); /* include null */ @@ -2265,6 +2312,7 @@ YY_RULE_SETUP YY_BREAK case 38: YY_RULE_SETUP +#line 352 "ncgen.l" {struct Specialtoken* st; bbClear(lextext); bbAppendn(lextext,(char*)yytext,yyleng+1); /* include null */ @@ -2278,6 +2326,7 @@ YY_RULE_SETUP case 39: /* rule 39 can match eol */ YY_RULE_SETUP +#line 362 "ncgen.l" { int c; char* p; char* q; @@ -2297,6 +2346,7 @@ YY_RULE_SETUP YY_BREAK case 40: YY_RULE_SETUP +#line 379 "ncgen.l" { char* id = NULL; int len; len = strlen(yytext); len = unescape(yytext,len,ISIDENT,&id); @@ -2311,6 +2361,7 @@ YY_RULE_SETUP YY_BREAK case 41: YY_RULE_SETUP +#line 391 "ncgen.l" { /* We need to try to see what size of integer ((u)int). @@ -2391,6 +2442,7 @@ done: return 0; YY_BREAK case 42: YY_RULE_SETUP +#line 469 "ncgen.l" { int c; int token = 0; @@ -2442,6 +2494,7 @@ YY_RULE_SETUP YY_BREAK case 43: YY_RULE_SETUP +#line 517 "ncgen.l" { if (sscanf((char*)yytext, "%le", &double_val) != 1) { sprintf(errstr,"bad long or double constant: %s",(char*)yytext); @@ -2452,6 +2505,7 @@ YY_RULE_SETUP YY_BREAK case 44: YY_RULE_SETUP +#line 524 "ncgen.l" { if (sscanf((char*)yytext, "%e", &float_val) != 1) { sprintf(errstr,"bad float constant: %s",(char*)yytext); @@ -2463,6 +2517,7 @@ YY_RULE_SETUP case 45: /* rule 45 can match eol */ YY_RULE_SETUP +#line 531 "ncgen.l" { (void) sscanf((char*)&yytext[1],"%c",&byte_val); return lexdebug(BYTE_CONST); @@ -2470,6 +2525,7 @@ YY_RULE_SETUP YY_BREAK case 46: YY_RULE_SETUP +#line 535 "ncgen.l" { int oct = unescapeoct(&yytext[2]); if(oct < 0) { @@ -2482,6 +2538,7 @@ YY_RULE_SETUP YY_BREAK case 47: YY_RULE_SETUP +#line 544 "ncgen.l" { int hex = unescapehex(&yytext[3]); if(byte_val < 0) { @@ -2494,6 +2551,7 @@ YY_RULE_SETUP YY_BREAK case 48: YY_RULE_SETUP +#line 553 "ncgen.l" { switch ((char)yytext[2]) { case 'a': byte_val = '\007'; break; /* not everyone under- @@ -2504,8 +2562,9 @@ YY_RULE_SETUP case 'r': byte_val = '\r'; break; case 't': byte_val = '\t'; break; case 'v': byte_val = '\v'; break; + case '0' : byte_val = '\0'; break; case '\\': byte_val = '\\'; break; - case '?': byte_val = '\177'; break; + case '?': byte_val = '\177'; break; case '\'': byte_val = '\''; break; default: byte_val = (char)yytext[2]; } @@ -2515,6 +2574,7 @@ YY_RULE_SETUP case 49: /* rule 49 can match eol */ YY_RULE_SETUP +#line 572 "ncgen.l" { lineno++ ; break; @@ -2522,6 +2582,7 @@ YY_RULE_SETUP YY_BREAK case 50: YY_RULE_SETUP +#line 577 "ncgen.l" {/*initial*/ BEGIN(ST_C_COMMENT); break; @@ -2530,18 +2591,21 @@ YY_RULE_SETUP case 51: /* rule 51 can match eol */ YY_RULE_SETUP +#line 582 "ncgen.l" {/* continuation */ break; } YY_BREAK case 52: YY_RULE_SETUP +#line 586 "ncgen.l" {/* final */ BEGIN(INITIAL); break; } YY_BREAK case YY_STATE_EOF(ST_C_COMMENT): +#line 591 "ncgen.l" {/* final, error */ fprintf(stderr,"unterminated /**/ comment"); BEGIN(INITIAL); @@ -2550,14 +2614,17 @@ case YY_STATE_EOF(ST_C_COMMENT): YY_BREAK case 53: YY_RULE_SETUP +#line 597 "ncgen.l" {/* Note: this next rule will not work for UTF8 characters */ return lexdebug(yytext[0]) ; } YY_BREAK case 54: YY_RULE_SETUP +#line 600 "ncgen.l" ECHO; YY_BREAK +#line 2628 "ncgenl.c" case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(TEXT): yyterminate(); @@ -3563,6 +3630,8 @@ void yyfree (void * ptr ) #define YYTABLES_NAME "yytables" +#line 600 "ncgen.l" + static int lexdebug(int token) { @@ -3661,6 +3730,7 @@ parseULL(int radix, char* text, int* failp) return uint64; } + /** Given the raw bits, the sign char, the tag, and hasU fill in the appropriate *_val field diff --git a/ncgen/ncgeny.c b/ncgen/ncgeny.c index b44fdc2242..5bf2c1ea68 100644 --- a/ncgen/ncgeny.c +++ b/ncgen/ncgeny.c @@ -1,4 +1,4 @@ -/* A Bison parser, made by GNU Bison 3.8.2. */ +/* A Bison parser, made by GNU Bison 3.7.5. */ /* Bison implementation for Yacc-like parsers in C @@ -16,7 +16,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* As a special exception, you may create a larger work that contains part or all of the Bison parser skeleton and distribute that work @@ -46,10 +46,10 @@ USER NAME SPACE" below. */ /* Identify Bison output, and Bison version. */ -#define YYBISON 30802 +#define YYBISON 30705 /* Bison version string. */ -#define YYBISON_VERSION "3.8.2" +#define YYBISON_VERSION "3.7.5" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" @@ -539,18 +539,12 @@ typedef int yy_state_fast_t; # define YY_USE(E) /* empty */ #endif +#if defined __GNUC__ && ! defined __ICC && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ /* Suppress an incorrect diagnostic about yylval being uninitialized. */ -#if defined __GNUC__ && ! defined __ICC && 406 <= __GNUC__ * 100 + __GNUC_MINOR__ -# if __GNUC__ * 100 + __GNUC_MINOR__ < 407 -# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ - _Pragma ("GCC diagnostic push") \ - _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") -# else -# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ _Pragma ("GCC diagnostic push") \ _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \ _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -# endif # define YY_IGNORE_MAYBE_UNINITIALIZED_END \ _Pragma ("GCC diagnostic pop") #else @@ -770,7 +764,7 @@ static const yytype_int8 yytranslate[] = }; #if YYDEBUG -/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ + /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_int16 yyrline[] = { 0, 243, 243, 249, 251, 258, 265, 265, 268, 277, @@ -838,6 +832,21 @@ yysymbol_name (yysymbol_kind_t yysymbol) } #endif +#ifdef YYPRINT +/* YYTOKNUM[NUM] -- (External) token number corresponding to the + (internal) symbol number NUM (which must be that of a token). */ +static const yytype_int16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, + 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, + 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, + 123, 125, 59, 44, 61, 40, 41, 42, 58 +}; +#endif + #define YYPACT_NINF (-153) #define yypact_value_is_default(Yyn) \ @@ -848,8 +857,8 @@ yysymbol_name (yysymbol_kind_t yysymbol) #define yytable_value_is_error(Yyn) \ 0 -/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ + /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ static const yytype_int16 yypact[] = { -10, -24, 30, -153, -18, -153, 233, -153, -153, -153, @@ -882,9 +891,9 @@ static const yytype_int16 yypact[] = 233, -3, -153, -153, -153, -153 }; -/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. - Performed when YYTABLE does not specify something else to do. Zero - means the default is an error. */ + /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ static const yytype_uint8 yydefact[] = { 0, 0, 0, 3, 0, 1, 88, 2, 35, 36, @@ -917,7 +926,7 @@ static const yytype_uint8 yydefact[] = 88, 0, 79, 73, 10, 81 }; -/* YYPGOTO[NTERM-NUM]. */ + /* YYPGOTO[NTERM-NUM]. */ static const yytype_int16 yypgoto[] = { -153, -153, -153, -153, 22, -6, -153, -153, -153, -153, @@ -929,7 +938,7 @@ static const yytype_int16 yypgoto[] = -153, -152, -153, -37, -29, 2, -153, -22 }; -/* YYDEFGOTO[NTERM-NUM]. */ + /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int16 yydefgoto[] = { 0, 2, 4, 7, 23, 36, 49, 195, 260, 40, @@ -941,9 +950,9 @@ static const yytype_int16 yydefgoto[] = 193, 115, 163, 87, 88, 89, 217, 30 }; -/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule whose - number is the opposite. If YYTABLE_NINF, syntax error. */ + /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ static const yytype_int16 yytable[] = { 35, 79, 107, 75, 37, 194, 76, 178, 90, 74, @@ -1038,8 +1047,8 @@ static const yytype_int16 yycheck[] = 39 }; -/* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of - state STATE-NUM. */ + /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ static const yytype_uint8 yystos[] = { 0, 31, 70, 59, 71, 0, 60, 72, 4, 5, @@ -1072,7 +1081,7 @@ static const yytype_uint8 yystos[] = 61, 63, 66, 107, 116, 112 }; -/* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */ + /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_uint8 yyr1[] = { 0, 69, 70, 71, 72, 73, 74, 74, 76, 77, @@ -1093,7 +1102,7 @@ static const yytype_uint8 yyr1[] = 132, 132, 132, 132, 133, 134, 134, 135, 135, 136 }; -/* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */ + /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ static const yytype_int8 yyr2[] = { 0, 2, 3, 1, 4, 5, 0, 2, 0, 0, @@ -1123,7 +1132,6 @@ enum { YYENOMEM = -2 }; #define YYACCEPT goto yyacceptlab #define YYABORT goto yyabortlab #define YYERROR goto yyerrorlab -#define YYNOMEM goto yyexhaustedlab #define YYRECOVERING() (!!yyerrstatus) @@ -1164,7 +1172,10 @@ do { \ YYFPRINTF Args; \ } while (0) - +/* This macro is provided for backward compatibility. */ +# ifndef YY_LOCATION_PRINT +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +# endif # define YY_SYMBOL_PRINT(Title, Kind, Value, Location) \ @@ -1191,6 +1202,10 @@ yy_symbol_value_print (FILE *yyo, YY_USE (yyoutput); if (!yyvaluep) return; +# ifdef YYPRINT + if (yykind < YYNTOKENS) + YYPRINT (yyo, yytoknum[yykind], *yyvaluep); +# endif YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN YY_USE (yykind); YY_IGNORE_MAYBE_UNINITIALIZED_END @@ -1645,7 +1660,6 @@ yyparse (void) YYDPRINTF ((stderr, "Starting parse\n")); yychar = YYEMPTY; /* Cause a token to be read. */ - goto yysetstate; @@ -1671,7 +1685,7 @@ yyparse (void) if (yyss + yystacksize - 1 <= yyssp) #if !defined yyoverflow && !defined YYSTACK_RELOCATE - YYNOMEM; + goto yyexhaustedlab; #else { /* Get the current used size of the three stacks, in elements. */ @@ -1699,7 +1713,7 @@ yyparse (void) # else /* defined YYSTACK_RELOCATE */ /* Extend the stack our own way. */ if (YYMAXDEPTH <= yystacksize) - YYNOMEM; + goto yyexhaustedlab; yystacksize *= 2; if (YYMAXDEPTH < yystacksize) yystacksize = YYMAXDEPTH; @@ -1710,7 +1724,7 @@ yyparse (void) YY_CAST (union yyalloc *, YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); if (! yyptr) - YYNOMEM; + goto yyexhaustedlab; YYSTACK_RELOCATE (yyss_alloc, yyss); YYSTACK_RELOCATE (yyvs_alloc, yyvs); # undef YYSTACK_RELOCATE @@ -1732,7 +1746,6 @@ yyparse (void) } #endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ - if (yystate == YYFINAL) YYACCEPT; @@ -1847,13 +1860,13 @@ yyparse (void) case 2: /* ncdesc: NETCDF datasetid rootgroup */ #line 246 "ncgen.y" {if (error_count > 0) YYABORT;} -#line 1851 "ncgeny.c" +#line 1864 "ncgeny.c" break; case 3: /* datasetid: DATASETID */ #line 249 "ncgen.y" {createrootgroup(datasetname);} -#line 1857 "ncgeny.c" +#line 1870 "ncgeny.c" break; case 8: /* $@1: %empty */ @@ -1865,25 +1878,25 @@ yyparse (void) yyerror("duplicate group declaration within parent group for %s", id->name); } -#line 1869 "ncgeny.c" +#line 1882 "ncgeny.c" break; case 9: /* $@2: %empty */ #line 277 "ncgen.y" {listpop(groupstack);} -#line 1875 "ncgeny.c" +#line 1888 "ncgeny.c" break; case 12: /* typesection: TYPES */ #line 283 "ncgen.y" {} -#line 1881 "ncgeny.c" +#line 1894 "ncgeny.c" break; case 13: /* typesection: TYPES typedecls */ #line 285 "ncgen.y" {markcdf4("Type specification");} -#line 1887 "ncgeny.c" +#line 1900 "ncgeny.c" break; case 16: /* typename: ident */ @@ -1895,19 +1908,19 @@ yyparse (void) (yyvsp[0].sym)->name); listpush(typdefs,(void*)(yyvsp[0].sym)); } -#line 1899 "ncgeny.c" +#line 1912 "ncgeny.c" break; case 17: /* type_or_attr_decl: typedecl */ #line 300 "ncgen.y" {} -#line 1905 "ncgeny.c" +#line 1918 "ncgeny.c" break; case 18: /* type_or_attr_decl: attrdecl ';' */ #line 300 "ncgen.y" {} -#line 1911 "ncgeny.c" +#line 1924 "ncgeny.c" break; case 25: /* enumdecl: primtype ENUM typename '{' enumidlist '}' */ @@ -1938,13 +1951,13 @@ yyparse (void) } listsetlength(stack,stackbase);/* remove stack nodes*/ } -#line 1942 "ncgeny.c" +#line 1955 "ncgeny.c" break; case 26: /* enumidlist: enumid */ #line 343 "ncgen.y" {(yyval.mark)=listlength(stack); listpush(stack,(void*)(yyvsp[0].sym));} -#line 1948 "ncgeny.c" +#line 1961 "ncgeny.c" break; case 27: /* enumidlist: enumidlist ',' enumid */ @@ -1963,7 +1976,7 @@ yyparse (void) } listpush(stack,(void*)(yyvsp[0].sym)); } -#line 1967 "ncgeny.c" +#line 1980 "ncgeny.c" break; case 28: /* enumid: ident '=' constint */ @@ -1974,7 +1987,7 @@ yyparse (void) (yyvsp[-2].sym)->typ.econst=(yyvsp[0].constant); (yyval.sym)=(yyvsp[-2].sym); } -#line 1978 "ncgeny.c" +#line 1991 "ncgeny.c" break; case 29: /* opaquedecl: OPAQUE_ '(' INT_CONST ')' typename */ @@ -1988,7 +2001,7 @@ yyparse (void) (yyvsp[0].sym)->typ.size=int32_val; (void)ncaux_class_alignment(NC_OPAQUE,&(yyvsp[0].sym)->typ.alignment); } -#line 1992 "ncgeny.c" +#line 2005 "ncgeny.c" break; case 30: /* vlendecl: typeref '(' '*' ')' typename */ @@ -2004,7 +2017,7 @@ yyparse (void) (yyvsp[0].sym)->typ.size=VLENSIZE; (void)ncaux_class_alignment(NC_VLEN,&(yyvsp[0].sym)->typ.alignment); } -#line 2008 "ncgeny.c" +#line 2021 "ncgeny.c" break; case 31: /* compounddecl: COMPOUND typename '{' fields '}' */ @@ -2038,19 +2051,19 @@ yyparse (void) } listsetlength(stack,stackbase);/* remove stack nodes*/ } -#line 2042 "ncgeny.c" +#line 2055 "ncgeny.c" break; case 32: /* fields: field ';' */ #line 429 "ncgen.y" {(yyval.mark)=(yyvsp[-1].mark);} -#line 2048 "ncgeny.c" +#line 2061 "ncgeny.c" break; case 33: /* fields: fields field ';' */ #line 430 "ncgen.y" {(yyval.mark)=(yyvsp[-2].mark);} -#line 2054 "ncgeny.c" +#line 2067 "ncgeny.c" break; case 34: /* field: typeref fieldlist */ @@ -2066,103 +2079,103 @@ yyparse (void) f->typ.basetype = (yyvsp[-1].sym); } } -#line 2070 "ncgeny.c" +#line 2083 "ncgeny.c" break; case 35: /* primtype: CHAR_K */ #line 447 "ncgen.y" { (yyval.sym) = primsymbols[NC_CHAR]; } -#line 2076 "ncgeny.c" +#line 2089 "ncgeny.c" break; case 36: /* primtype: BYTE_K */ #line 448 "ncgen.y" { (yyval.sym) = primsymbols[NC_BYTE]; } -#line 2082 "ncgeny.c" +#line 2095 "ncgeny.c" break; case 37: /* primtype: SHORT_K */ #line 449 "ncgen.y" { (yyval.sym) = primsymbols[NC_SHORT]; } -#line 2088 "ncgeny.c" +#line 2101 "ncgeny.c" break; case 38: /* primtype: INT_K */ #line 450 "ncgen.y" { (yyval.sym) = primsymbols[NC_INT]; } -#line 2094 "ncgeny.c" +#line 2107 "ncgeny.c" break; case 39: /* primtype: FLOAT_K */ #line 451 "ncgen.y" { (yyval.sym) = primsymbols[NC_FLOAT]; } -#line 2100 "ncgeny.c" +#line 2113 "ncgeny.c" break; case 40: /* primtype: DOUBLE_K */ #line 452 "ncgen.y" { (yyval.sym) = primsymbols[NC_DOUBLE]; } -#line 2106 "ncgeny.c" +#line 2119 "ncgeny.c" break; case 41: /* primtype: UBYTE_K */ #line 453 "ncgen.y" { vercheck(NC_UBYTE); (yyval.sym) = primsymbols[NC_UBYTE]; } -#line 2112 "ncgeny.c" +#line 2125 "ncgeny.c" break; case 42: /* primtype: USHORT_K */ #line 454 "ncgen.y" { vercheck(NC_USHORT); (yyval.sym) = primsymbols[NC_USHORT]; } -#line 2118 "ncgeny.c" +#line 2131 "ncgeny.c" break; case 43: /* primtype: UINT_K */ #line 455 "ncgen.y" { vercheck(NC_UINT); (yyval.sym) = primsymbols[NC_UINT]; } -#line 2124 "ncgeny.c" +#line 2137 "ncgeny.c" break; case 44: /* primtype: INT64_K */ #line 456 "ncgen.y" { vercheck(NC_INT64); (yyval.sym) = primsymbols[NC_INT64]; } -#line 2130 "ncgeny.c" +#line 2143 "ncgeny.c" break; case 45: /* primtype: UINT64_K */ #line 457 "ncgen.y" { vercheck(NC_UINT64); (yyval.sym) = primsymbols[NC_UINT64]; } -#line 2136 "ncgeny.c" +#line 2149 "ncgeny.c" break; case 46: /* primtype: STRING_K */ #line 458 "ncgen.y" { vercheck(NC_STRING); (yyval.sym) = primsymbols[NC_STRING]; } -#line 2142 "ncgeny.c" +#line 2155 "ncgeny.c" break; case 48: /* dimsection: DIMENSIONS */ #line 462 "ncgen.y" {} -#line 2148 "ncgeny.c" +#line 2161 "ncgeny.c" break; case 49: /* dimsection: DIMENSIONS dimdecls */ #line 463 "ncgen.y" {} -#line 2154 "ncgeny.c" +#line 2167 "ncgeny.c" break; case 52: /* dim_or_attr_decl: dimdeclist */ #line 470 "ncgen.y" {} -#line 2160 "ncgeny.c" +#line 2173 "ncgeny.c" break; case 53: /* dim_or_attr_decl: attrdecl */ #line 470 "ncgen.y" {} -#line 2166 "ncgeny.c" +#line 2179 "ncgeny.c" break; case 56: /* dimdecl: dimd '=' constint */ @@ -2174,7 +2187,7 @@ fprintf(stderr,"dimension: %s = %llu\n",(yyvsp[-2].sym)->name,(unsigned long lon #endif reclaimconstant((yyvsp[0].constant)); } -#line 2178 "ncgeny.c" +#line 2191 "ncgeny.c" break; case 57: /* dimdecl: dimd '=' NC_UNLIMITED_K */ @@ -2186,7 +2199,7 @@ fprintf(stderr,"dimension: %s = %llu\n",(yyvsp[-2].sym)->name,(unsigned long lon fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); #endif } -#line 2190 "ncgeny.c" +#line 2203 "ncgeny.c" break; case 58: /* dimd: ident */ @@ -2200,31 +2213,31 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); (yyval.sym)=(yyvsp[0].sym); listpush(dimdefs,(void*)(yyvsp[0].sym)); } -#line 2204 "ncgeny.c" +#line 2217 "ncgeny.c" break; case 60: /* vasection: VARIABLES */ #line 508 "ncgen.y" {} -#line 2210 "ncgeny.c" +#line 2223 "ncgeny.c" break; case 61: /* vasection: VARIABLES vadecls */ #line 509 "ncgen.y" {} -#line 2216 "ncgeny.c" +#line 2229 "ncgeny.c" break; case 64: /* vadecl_or_attr: vardecl */ #line 516 "ncgen.y" {} -#line 2222 "ncgeny.c" +#line 2235 "ncgeny.c" break; case 65: /* vadecl_or_attr: attrdecl */ #line 516 "ncgen.y" {} -#line 2228 "ncgeny.c" +#line 2241 "ncgeny.c" break; case 66: /* vardecl: typeref varlist */ @@ -2248,7 +2261,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); } listsetlength(stack,stackbase);/* remove stack nodes*/ } -#line 2252 "ncgeny.c" +#line 2265 "ncgeny.c" break; case 67: /* varlist: varspec */ @@ -2256,13 +2269,13 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); {(yyval.mark)=listlength(stack); listpush(stack,(void*)(yyvsp[0].sym)); } -#line 2260 "ncgeny.c" +#line 2273 "ncgeny.c" break; case 68: /* varlist: varlist ',' varspec */ #line 545 "ncgen.y" {(yyval.mark)=(yyvsp[-2].mark); listpush(stack,(void*)(yyvsp[0].sym));} -#line 2266 "ncgeny.c" +#line 2279 "ncgeny.c" break; case 69: /* varspec: varident dimspec */ @@ -2293,31 +2306,31 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); listsetlength(stack,stackbase);/* remove stack nodes*/ (yyval.sym) = var; } -#line 2297 "ncgeny.c" +#line 2310 "ncgeny.c" break; case 70: /* dimspec: %empty */ #line 577 "ncgen.y" {(yyval.mark)=listlength(stack);} -#line 2303 "ncgeny.c" +#line 2316 "ncgeny.c" break; case 71: /* dimspec: '(' dimlist ')' */ #line 578 "ncgen.y" {(yyval.mark)=(yyvsp[-1].mark);} -#line 2309 "ncgeny.c" +#line 2322 "ncgeny.c" break; case 72: /* dimlist: dimref */ #line 581 "ncgen.y" {(yyval.mark)=listlength(stack); listpush(stack,(void*)(yyvsp[0].sym));} -#line 2315 "ncgeny.c" +#line 2328 "ncgeny.c" break; case 73: /* dimlist: dimlist ',' dimref */ #line 583 "ncgen.y" {(yyval.mark)=(yyvsp[-2].mark); listpush(stack,(void*)(yyvsp[0].sym));} -#line 2321 "ncgeny.c" +#line 2334 "ncgeny.c" break; case 74: /* dimref: path */ @@ -2332,7 +2345,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); } (yyval.sym)=dimsym; } -#line 2336 "ncgeny.c" +#line 2349 "ncgeny.c" break; case 75: /* fieldlist: fieldspec */ @@ -2340,13 +2353,13 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); {(yyval.mark)=listlength(stack); listpush(stack,(void*)(yyvsp[0].sym)); } -#line 2344 "ncgeny.c" +#line 2357 "ncgeny.c" break; case 76: /* fieldlist: fieldlist ',' fieldspec */ #line 605 "ncgen.y" {(yyval.mark)=(yyvsp[-2].mark); listpush(stack,(void*)(yyvsp[0].sym));} -#line 2350 "ncgeny.c" +#line 2363 "ncgeny.c" break; case 77: /* fieldspec: ident fielddimspec */ @@ -2377,31 +2390,31 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); listsetlength(stack,stackbase);/* remove stack nodes*/ (yyval.sym) = (yyvsp[-1].sym); } -#line 2381 "ncgeny.c" +#line 2394 "ncgeny.c" break; case 78: /* fielddimspec: %empty */ #line 638 "ncgen.y" {(yyval.mark)=listlength(stack);} -#line 2387 "ncgeny.c" +#line 2400 "ncgeny.c" break; case 79: /* fielddimspec: '(' fielddimlist ')' */ #line 639 "ncgen.y" {(yyval.mark)=(yyvsp[-1].mark);} -#line 2393 "ncgeny.c" +#line 2406 "ncgeny.c" break; case 80: /* fielddimlist: fielddim */ #line 643 "ncgen.y" {(yyval.mark)=listlength(stack); listpush(stack,(void*)(yyvsp[0].sym));} -#line 2399 "ncgeny.c" +#line 2412 "ncgeny.c" break; case 81: /* fielddimlist: fielddimlist ',' fielddim */ #line 645 "ncgen.y" {(yyval.mark)=(yyvsp[-2].mark); listpush(stack,(void*)(yyvsp[0].sym));} -#line 2405 "ncgeny.c" +#line 2418 "ncgeny.c" break; case 82: /* fielddim: UINT_CONST */ @@ -2415,7 +2428,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); (yyval.sym)->dim.isconstant = 1; (yyval.sym)->dim.declsize = uint32_val; } -#line 2419 "ncgeny.c" +#line 2432 "ncgeny.c" break; case 83: /* fielddim: INT_CONST */ @@ -2433,7 +2446,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); (yyval.sym)->dim.isconstant = 1; (yyval.sym)->dim.declsize = int32_val; } -#line 2437 "ncgeny.c" +#line 2450 "ncgeny.c" break; case 84: /* varref: ambiguous_ref */ @@ -2445,7 +2458,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); } (yyval.sym)=vsym; } -#line 2449 "ncgeny.c" +#line 2462 "ncgeny.c" break; case 85: /* typeref: ambiguous_ref */ @@ -2457,7 +2470,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); } (yyval.sym)=tsym; } -#line 2461 "ncgeny.c" +#line 2474 "ncgeny.c" break; case 86: /* ambiguous_ref: path */ @@ -2480,49 +2493,49 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); } (yyval.sym)=tvsym; } -#line 2484 "ncgeny.c" +#line 2497 "ncgeny.c" break; case 87: /* ambiguous_ref: primtype */ #line 720 "ncgen.y" {(yyval.sym)=(yyvsp[0].sym);} -#line 2490 "ncgeny.c" +#line 2503 "ncgeny.c" break; case 88: /* attrdecllist: %empty */ #line 727 "ncgen.y" {} -#line 2496 "ncgeny.c" +#line 2509 "ncgeny.c" break; case 89: /* attrdecllist: attrdecl ';' attrdecllist */ #line 727 "ncgen.y" {} -#line 2502 "ncgeny.c" +#line 2515 "ncgeny.c" break; case 90: /* attrdecl: ':' _NCPROPS '=' conststring */ #line 731 "ncgen.y" {(yyval.sym) = makespecial(_NCPROPS_FLAG,NULL,NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2508 "ncgeny.c" +#line 2521 "ncgeny.c" break; case 91: /* attrdecl: ':' _ISNETCDF4 '=' constbool */ #line 733 "ncgen.y" {(yyval.sym) = makespecial(_ISNETCDF4_FLAG,NULL,NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2514 "ncgeny.c" +#line 2527 "ncgeny.c" break; case 92: /* attrdecl: ':' _SUPERBLOCK '=' constint */ #line 735 "ncgen.y" {(yyval.sym) = makespecial(_SUPERBLOCK_FLAG,NULL,NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2520 "ncgeny.c" +#line 2533 "ncgeny.c" break; case 93: /* attrdecl: ':' ident '=' datalist */ #line 737 "ncgen.y" { (yyval.sym)=makeattribute((yyvsp[-2].sym),NULL,NULL,(yyvsp[0].datalist),ATTRGLOBAL);} -#line 2526 "ncgeny.c" +#line 2539 "ncgeny.c" break; case 94: /* attrdecl: typeref ambiguous_ref ':' ident '=' datalist */ @@ -2535,7 +2548,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); YYABORT; } } -#line 2539 "ncgeny.c" +#line 2552 "ncgeny.c" break; case 95: /* attrdecl: ambiguous_ref ':' ident '=' datalist */ @@ -2550,97 +2563,97 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); YYABORT; } } -#line 2554 "ncgeny.c" +#line 2567 "ncgeny.c" break; case 96: /* attrdecl: ambiguous_ref ':' _FILLVALUE '=' datalist */ #line 759 "ncgen.y" {(yyval.sym) = makespecial(_FILLVALUE_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].datalist),ISLIST);} -#line 2560 "ncgeny.c" +#line 2573 "ncgeny.c" break; case 97: /* attrdecl: typeref ambiguous_ref ':' _FILLVALUE '=' datalist */ #line 761 "ncgen.y" {(yyval.sym) = makespecial(_FILLVALUE_FLAG,(yyvsp[-4].sym),(yyvsp[-5].sym),(void*)(yyvsp[0].datalist),ISLIST);} -#line 2566 "ncgeny.c" +#line 2579 "ncgeny.c" break; case 98: /* attrdecl: ambiguous_ref ':' _STORAGE '=' conststring */ #line 763 "ncgen.y" {(yyval.sym) = makespecial(_STORAGE_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2572 "ncgeny.c" +#line 2585 "ncgeny.c" break; case 99: /* attrdecl: ambiguous_ref ':' _CHUNKSIZES '=' intlist */ #line 765 "ncgen.y" {(yyval.sym) = makespecial(_CHUNKSIZES_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].datalist),ISLIST);} -#line 2578 "ncgeny.c" +#line 2591 "ncgeny.c" break; case 100: /* attrdecl: ambiguous_ref ':' _FLETCHER32 '=' constbool */ #line 767 "ncgen.y" {(yyval.sym) = makespecial(_FLETCHER32_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2584 "ncgeny.c" +#line 2597 "ncgeny.c" break; case 101: /* attrdecl: ambiguous_ref ':' _DEFLATELEVEL '=' constint */ #line 769 "ncgen.y" {(yyval.sym) = makespecial(_DEFLATE_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2590 "ncgeny.c" +#line 2603 "ncgeny.c" break; case 102: /* attrdecl: ambiguous_ref ':' _SHUFFLE '=' constbool */ #line 771 "ncgen.y" {(yyval.sym) = makespecial(_SHUFFLE_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2596 "ncgeny.c" +#line 2609 "ncgeny.c" break; case 103: /* attrdecl: ambiguous_ref ':' _ENDIANNESS '=' conststring */ #line 773 "ncgen.y" {(yyval.sym) = makespecial(_ENDIAN_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2602 "ncgeny.c" +#line 2615 "ncgeny.c" break; case 104: /* attrdecl: ambiguous_ref ':' _FILTER '=' conststring */ #line 775 "ncgen.y" {(yyval.sym) = makespecial(_FILTER_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2608 "ncgeny.c" +#line 2621 "ncgeny.c" break; case 105: /* attrdecl: ambiguous_ref ':' _CODECS '=' conststring */ #line 777 "ncgen.y" {(yyval.sym) = makespecial(_CODECS_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2614 "ncgeny.c" +#line 2627 "ncgeny.c" break; case 106: /* attrdecl: ambiguous_ref ':' _QUANTIZEBG '=' constint */ #line 779 "ncgen.y" {(yyval.sym) = makespecial(_QUANTIZEBG_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2620 "ncgeny.c" +#line 2633 "ncgeny.c" break; case 107: /* attrdecl: ambiguous_ref ':' _QUANTIZEGBR '=' constint */ #line 781 "ncgen.y" {(yyval.sym) = makespecial(_QUANTIZEGBR_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2626 "ncgeny.c" +#line 2639 "ncgeny.c" break; case 108: /* attrdecl: ambiguous_ref ':' _QUANTIZEBR '=' constint */ #line 783 "ncgen.y" {(yyval.sym) = makespecial(_QUANTIZEBR_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2632 "ncgeny.c" +#line 2645 "ncgeny.c" break; case 109: /* attrdecl: ambiguous_ref ':' _NOFILL '=' constbool */ #line 785 "ncgen.y" {(yyval.sym) = makespecial(_NOFILL_FLAG,(yyvsp[-4].sym),NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2638 "ncgeny.c" +#line 2651 "ncgeny.c" break; case 110: /* attrdecl: ':' _FORMAT '=' conststring */ #line 787 "ncgen.y" {(yyval.sym) = makespecial(_FORMAT_FLAG,NULL,NULL,(void*)(yyvsp[0].constant),ISCONST);} -#line 2644 "ncgeny.c" +#line 2657 "ncgeny.c" break; case 111: /* path: ident */ @@ -2651,7 +2664,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); (yyvsp[0].sym)->is_prefixed=0; setpathcurrent((yyvsp[0].sym)); } -#line 2655 "ncgeny.c" +#line 2668 "ncgeny.c" break; case 112: /* path: PATH */ @@ -2662,269 +2675,269 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); (yyvsp[0].sym)->is_prefixed=1; /* path is set in ncgen.l*/ } -#line 2666 "ncgeny.c" +#line 2679 "ncgeny.c" break; case 114: /* datasection: DATA */ #line 808 "ncgen.y" {} -#line 2672 "ncgeny.c" +#line 2685 "ncgeny.c" break; case 115: /* datasection: DATA datadecls */ #line 809 "ncgen.y" {} -#line 2678 "ncgeny.c" +#line 2691 "ncgeny.c" break; case 118: /* datadecl: varref '=' datalist */ #line 817 "ncgen.y" {(yyvsp[-2].sym)->data = (yyvsp[0].datalist);} -#line 2684 "ncgeny.c" +#line 2697 "ncgeny.c" break; case 119: /* datalist: datalist0 */ #line 820 "ncgen.y" {(yyval.datalist) = (yyvsp[0].datalist);} -#line 2690 "ncgeny.c" +#line 2703 "ncgeny.c" break; case 120: /* datalist: datalist1 */ #line 821 "ncgen.y" {(yyval.datalist) = (yyvsp[0].datalist);} -#line 2696 "ncgeny.c" +#line 2709 "ncgeny.c" break; case 121: /* datalist0: %empty */ #line 825 "ncgen.y" {(yyval.datalist) = builddatalist(0);} -#line 2702 "ncgeny.c" +#line 2715 "ncgeny.c" break; case 122: /* datalist1: dataitem */ #line 829 "ncgen.y" {(yyval.datalist) = const2list((yyvsp[0].constant));} -#line 2708 "ncgeny.c" +#line 2721 "ncgeny.c" break; case 123: /* datalist1: datalist ',' dataitem */ #line 831 "ncgen.y" {dlappend((yyvsp[-2].datalist),((yyvsp[0].constant))); (yyval.datalist)=(yyvsp[-2].datalist); } -#line 2714 "ncgeny.c" +#line 2727 "ncgeny.c" break; case 124: /* dataitem: constdata */ #line 835 "ncgen.y" {(yyval.constant)=(yyvsp[0].constant);} -#line 2720 "ncgeny.c" +#line 2733 "ncgeny.c" break; case 125: /* dataitem: '{' datalist '}' */ #line 836 "ncgen.y" {(yyval.constant)=builddatasublist((yyvsp[-1].datalist));} -#line 2726 "ncgeny.c" +#line 2739 "ncgeny.c" break; case 126: /* constdata: simpleconstant */ #line 840 "ncgen.y" {(yyval.constant)=(yyvsp[0].constant);} -#line 2732 "ncgeny.c" +#line 2745 "ncgeny.c" break; case 127: /* constdata: OPAQUESTRING */ #line 841 "ncgen.y" {(yyval.constant)=makeconstdata(NC_OPAQUE);} -#line 2738 "ncgeny.c" +#line 2751 "ncgeny.c" break; case 128: /* constdata: FILLMARKER */ #line 842 "ncgen.y" {(yyval.constant)=makeconstdata(NC_FILLVALUE);} -#line 2744 "ncgeny.c" +#line 2757 "ncgeny.c" break; case 129: /* constdata: NIL */ #line 843 "ncgen.y" {(yyval.constant)=makeconstdata(NC_NIL);} -#line 2750 "ncgeny.c" +#line 2763 "ncgeny.c" break; case 130: /* constdata: econstref */ #line 844 "ncgen.y" {(yyval.constant)=(yyvsp[0].constant);} -#line 2756 "ncgeny.c" +#line 2769 "ncgeny.c" break; case 132: /* econstref: path */ #line 849 "ncgen.y" {(yyval.constant) = makeenumconstref((yyvsp[0].sym));} -#line 2762 "ncgeny.c" +#line 2775 "ncgeny.c" break; case 133: /* function: ident '(' arglist ')' */ #line 853 "ncgen.y" {(yyval.constant)=evaluate((yyvsp[-3].sym),(yyvsp[-1].datalist));} -#line 2768 "ncgeny.c" +#line 2781 "ncgeny.c" break; case 134: /* arglist: simpleconstant */ #line 858 "ncgen.y" {(yyval.datalist) = const2list((yyvsp[0].constant));} -#line 2774 "ncgeny.c" +#line 2787 "ncgeny.c" break; case 135: /* arglist: arglist ',' simpleconstant */ #line 860 "ncgen.y" {dlappend((yyvsp[-2].datalist),((yyvsp[0].constant))); (yyval.datalist)=(yyvsp[-2].datalist);} -#line 2780 "ncgeny.c" +#line 2793 "ncgeny.c" break; case 136: /* simpleconstant: CHAR_CONST */ #line 864 "ncgen.y" {(yyval.constant)=makeconstdata(NC_CHAR);} -#line 2786 "ncgeny.c" +#line 2799 "ncgeny.c" break; case 137: /* simpleconstant: BYTE_CONST */ #line 865 "ncgen.y" {(yyval.constant)=makeconstdata(NC_BYTE);} -#line 2792 "ncgeny.c" +#line 2805 "ncgeny.c" break; case 138: /* simpleconstant: SHORT_CONST */ #line 866 "ncgen.y" {(yyval.constant)=makeconstdata(NC_SHORT);} -#line 2798 "ncgeny.c" +#line 2811 "ncgeny.c" break; case 139: /* simpleconstant: INT_CONST */ #line 867 "ncgen.y" {(yyval.constant)=makeconstdata(NC_INT);} -#line 2804 "ncgeny.c" +#line 2817 "ncgeny.c" break; case 140: /* simpleconstant: INT64_CONST */ #line 868 "ncgen.y" {(yyval.constant)=makeconstdata(NC_INT64);} -#line 2810 "ncgeny.c" +#line 2823 "ncgeny.c" break; case 141: /* simpleconstant: UBYTE_CONST */ #line 869 "ncgen.y" {(yyval.constant)=makeconstdata(NC_UBYTE);} -#line 2816 "ncgeny.c" +#line 2829 "ncgeny.c" break; case 142: /* simpleconstant: USHORT_CONST */ #line 870 "ncgen.y" {(yyval.constant)=makeconstdata(NC_USHORT);} -#line 2822 "ncgeny.c" +#line 2835 "ncgeny.c" break; case 143: /* simpleconstant: UINT_CONST */ #line 871 "ncgen.y" {(yyval.constant)=makeconstdata(NC_UINT);} -#line 2828 "ncgeny.c" +#line 2841 "ncgeny.c" break; case 144: /* simpleconstant: UINT64_CONST */ #line 872 "ncgen.y" {(yyval.constant)=makeconstdata(NC_UINT64);} -#line 2834 "ncgeny.c" +#line 2847 "ncgeny.c" break; case 145: /* simpleconstant: FLOAT_CONST */ #line 873 "ncgen.y" {(yyval.constant)=makeconstdata(NC_FLOAT);} -#line 2840 "ncgeny.c" +#line 2853 "ncgeny.c" break; case 146: /* simpleconstant: DOUBLE_CONST */ #line 874 "ncgen.y" {(yyval.constant)=makeconstdata(NC_DOUBLE);} -#line 2846 "ncgeny.c" +#line 2859 "ncgeny.c" break; case 147: /* simpleconstant: TERMSTRING */ #line 875 "ncgen.y" {(yyval.constant)=makeconstdata(NC_STRING);} -#line 2852 "ncgeny.c" +#line 2865 "ncgeny.c" break; case 148: /* intlist: constint */ #line 879 "ncgen.y" {(yyval.datalist) = const2list((yyvsp[0].constant));} -#line 2858 "ncgeny.c" +#line 2871 "ncgeny.c" break; case 149: /* intlist: intlist ',' constint */ #line 880 "ncgen.y" {(yyval.datalist)=(yyvsp[-2].datalist); dlappend((yyvsp[-2].datalist),((yyvsp[0].constant)));} -#line 2864 "ncgeny.c" +#line 2877 "ncgeny.c" break; case 150: /* constint: INT_CONST */ #line 885 "ncgen.y" {(yyval.constant)=makeconstdata(NC_INT);} -#line 2870 "ncgeny.c" +#line 2883 "ncgeny.c" break; case 151: /* constint: UINT_CONST */ #line 887 "ncgen.y" {(yyval.constant)=makeconstdata(NC_UINT);} -#line 2876 "ncgeny.c" +#line 2889 "ncgeny.c" break; case 152: /* constint: INT64_CONST */ #line 889 "ncgen.y" {(yyval.constant)=makeconstdata(NC_INT64);} -#line 2882 "ncgeny.c" +#line 2895 "ncgeny.c" break; case 153: /* constint: UINT64_CONST */ #line 891 "ncgen.y" {(yyval.constant)=makeconstdata(NC_UINT64);} -#line 2888 "ncgeny.c" +#line 2901 "ncgeny.c" break; case 154: /* conststring: TERMSTRING */ #line 895 "ncgen.y" {(yyval.constant)=makeconstdata(NC_STRING);} -#line 2894 "ncgeny.c" +#line 2907 "ncgeny.c" break; case 155: /* constbool: conststring */ #line 899 "ncgen.y" {(yyval.constant)=(yyvsp[0].constant);} -#line 2900 "ncgeny.c" +#line 2913 "ncgeny.c" break; case 156: /* constbool: constint */ #line 900 "ncgen.y" {(yyval.constant)=(yyvsp[0].constant);} -#line 2906 "ncgeny.c" +#line 2919 "ncgeny.c" break; case 157: /* varident: IDENT */ #line 908 "ncgen.y" {(yyval.sym)=(yyvsp[0].sym);} -#line 2912 "ncgeny.c" +#line 2925 "ncgeny.c" break; case 158: /* varident: DATA */ #line 909 "ncgen.y" {(yyval.sym)=identkeyword((yyvsp[0].sym));} -#line 2918 "ncgeny.c" +#line 2931 "ncgeny.c" break; case 159: /* ident: IDENT */ #line 913 "ncgen.y" {(yyval.sym)=(yyvsp[0].sym);} -#line 2924 "ncgeny.c" +#line 2937 "ncgeny.c" break; -#line 2928 "ncgeny.c" +#line 2941 "ncgeny.c" default: break; } @@ -3000,7 +3013,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); } yyerror (yymsgp); if (yysyntax_error_status == YYENOMEM) - YYNOMEM; + goto yyexhaustedlab; } } @@ -3036,7 +3049,6 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); label yyerrorlab therefore never appears in user code. */ if (0) YYERROR; - ++yynerrs; /* Do not reclaim the symbols of the rule whose action triggered this YYERROR. */ @@ -3097,7 +3109,7 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); `-------------------------------------*/ yyacceptlab: yyresult = 0; - goto yyreturnlab; + goto yyreturn; /*-----------------------------------. @@ -3105,22 +3117,24 @@ fprintf(stderr,"dimension: %s = UNLIMITED\n",(yyvsp[-2].sym)->name); `-----------------------------------*/ yyabortlab: yyresult = 1; - goto yyreturnlab; + goto yyreturn; -/*-----------------------------------------------------------. -| yyexhaustedlab -- YYNOMEM (memory exhaustion) comes here. | -`-----------------------------------------------------------*/ +#if 1 +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ yyexhaustedlab: yyerror (YY_("memory exhausted")); yyresult = 2; - goto yyreturnlab; + goto yyreturn; +#endif -/*----------------------------------------------------------. -| yyreturnlab -- parsing is finished, clean up and return. | -`----------------------------------------------------------*/ -yyreturnlab: +/*-------------------------------------------------------. +| yyreturn -- parsing is finished, clean up and return. | +`-------------------------------------------------------*/ +yyreturn: if (yychar != YYEMPTY) { /* Make sure we have latest lookahead translation. See comments at diff --git a/ncgen/ncgeny.h b/ncgen/ncgeny.h index bc3b140333..798f45f1fc 100644 --- a/ncgen/ncgeny.h +++ b/ncgen/ncgeny.h @@ -1,4 +1,4 @@ -/* A Bison parser, made by GNU Bison 3.8.2. */ +/* A Bison parser, made by GNU Bison 3.7.5. */ /* Bison interface for Yacc-like parsers in C @@ -16,7 +16,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see . */ + along with this program. If not, see . */ /* As a special exception, you may create a larger work that contains part or all of the Bison parser skeleton and distribute that work @@ -139,8 +139,6 @@ typedef union YYSTYPE YYSTYPE; extern YYSTYPE ncglval; - int ncgparse (void); - #endif /* !YY_NCG_NCGEN_TAB_H_INCLUDED */ diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index ed41e4be3c..8e79c88735 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -116,6 +116,7 @@ IF(ENABLE_TESTS) add_sh_test(nczarr_test run_misc) add_sh_test(nczarr_test run_nczarr_fill) add_sh_test(nczarr_test run_jsonconvention) + add_sh_test(nczarr_test run_strings) BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC}) add_sh_test(nczarr_test run_quantize) diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am index ca43d28b35..81f684ea49 100644 --- a/nczarr_test/Makefile.am +++ b/nczarr_test/Makefile.am @@ -62,6 +62,7 @@ TESTS += run_interop.sh TESTS += run_misc.sh TESTS += run_nczarr_fill.sh TESTS += run_jsonconvention.sh +TESTS += run_strings.sh endif @@ -149,7 +150,9 @@ ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl ref_nczarr2zarr.cdl \ ref_bzip2.cdl ref_filtered.cdl ref_multi.cdl \ ref_any.cdl ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl \ ref_groups.h5 ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip \ -ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl ref_jsonconvention.cdl +ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl \ +ref_jsonconvention.cdl ref_jsonconvention.zmap \ +ref_string.cdl # Interoperability files EXTRA_DIST += ref_power_901_constants_orig.zip ref_power_901_constants.cdl ref_quotes_orig.zip ref_quotes.cdl diff --git a/nczarr_test/ref_jsonconvention.cdl b/nczarr_test/ref_jsonconvention.cdl index c4a52b8104..187fffd994 100644 --- a/nczarr_test/ref_jsonconvention.cdl +++ b/nczarr_test/ref_jsonconvention.cdl @@ -3,10 +3,16 @@ dimensions: d1 = 1 ; variables: int v(d1) ; - v:varconvention = "{\n\"key1\": [1,2,3], \"key2\": {\"key3\": \"abc\"}}" ; + v:varjson1 = "{\"key1\": [1,2,3], \"key2\": {\"key3\": \"abc\"}}" ; + v:varjson2 = "[[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]]" ; + v:varvec1 = "1.0, 0.0, 0.0" ; + v:varvec2 = "[0.,0.,1.]" ; // global attributes: - :grpconvention = "{\"key1\": [1,2,3], \n\"key2\": {\"key3\": \"abc\"}}" ; + :globalfloat = 1. ; + :globalfloatvec = 1., 2. ; + :globalchar = "abc" ; + :globalillegal = "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 " ; data: v = _ ; diff --git a/nczarr_test/ref_jsonconvention.zmap b/nczarr_test/ref_jsonconvention.zmap new file mode 100644 index 0000000000..fed6cfba1e --- /dev/null +++ b/nczarr_test/ref_jsonconvention.zmap @@ -0,0 +1,5 @@ +[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.1-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": " tmp_jsonconvention_${zext}.txt ${NCDUMP} $fileurl > tmp_jsonconvention_${zext}.cdl # remove '\n' from ref file before comparing rm -f tmp_jsonconvention.cdl sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention.cdl diff -b tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl +diff -b ${srcdir}/ref_jsonconvention.zmap tmp_jsonconvention_${zext}.txt } testcase file diff --git a/nczarr_test/run_newformat.sh b/nczarr_test/run_newformat.sh index dbb93e99f4..d5bc2ce76a 100755 --- a/nczarr_test/run_newformat.sh +++ b/nczarr_test/run_newformat.sh @@ -3,7 +3,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh -set -x . "$srcdir/test_nczarr.sh" set -e diff --git a/nczarr_test/run_strings.sh b/nczarr_test/run_strings.sh new file mode 100755 index 0000000000..c0b13568ae --- /dev/null +++ b/nczarr_test/run_strings.sh @@ -0,0 +1,88 @@ +#!/bin/sh + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +. "$srcdir/test_nczarr.sh" + +# This shell script tests support for the NC_STRING type + +set -e + +# Cvt stringattr to single char string +stringfixsa() { +rm -f $2 +sed -e '/:stringattr/ s|string :|:|' -e '/:stringattr/ s|", "||g' < $1 > $2 +} + +# Cvt stringattr to JSON format string +stringfixjsa() { +rm -f $2 +sed -e '/:stringattr/ s|string :|:|' -e '/:stringattr/ s|"|\\"|g' -e '/:stringattr/ s|= \(.*\);|= "\[\1\]" ;|' < $1 > $2 +} + +# Cvt v var data to single char string +stringfixv() { +rm -f $2 +sed -e '/v = / s|", "||g' < $1 > $2 +} + +# Cvt charattr to single char string +stringfixca() { +rm -f $2 +sed -e '/:charattr/ s|", "||g' <$1 > $2 +} + +# Cvt c var data to single char string +stringfixc() { +rm -f $2 +sed -e '/c = / s|", "||g' < $1 > $2 +} + +testcase() { +zext=$1 + +echo "*** Test: nczarr string write then read; format=$zext" +# Get pure zarr args +fileargs tmp_string_zarr "mode=zarr,$zext" +zarrurl="$fileurl" +zarrfile="$file" +# Get nczarr args +fileargs tmp_string_nczarr "mode=nczarr,$zext" +nczarrurl="$fileurl" +nczarrfile="$file" + +# setup +deletemap $zext $zarrfile +deletemap $zext $nczarrfile + +# Create alternate ref files +echo "*** create pure zarr file" +${NCGEN} -4 -b -o "$zarrurl" $srcdir/ref_string.cdl +echo "*** create nczarr file" +${NCGEN} -4 -b -o "$nczarrurl" $srcdir/ref_string.cdl + +echo "*** read purezarr" +${NCDUMP} -n ref_string $zarrurl > tmp_string_zarr_${zext}.cdl +${ZMD} -h $zarrurl > tmp_string_zarr_${zext}.txt +echo "*** read nczarr" +${NCDUMP} -n ref_string $nczarrurl > tmp_string_nczarr_${zext}.cdl +${ZMD} -h $nczarrurl > tmp_string_nczarr_${zext}.txt + +echo "*** convert for nczarr comparison" +stringfixca ${srcdir}/ref_string.cdl tmp_ref_string_ca.cdl +stringfixc tmp_ref_string_ca.cdl tmp_ref_string_cac.cdl + +echo "*** convert for zarr comparison" +stringfixjsa tmp_ref_string_cac.cdl tmp_ref_string_cacsa.cdl + +echo "*** verify" +diff -bw tmp_ref_string_cac.cdl tmp_string_nczarr_${zext}.cdl +diff -bw tmp_ref_string_cacsa.cdl tmp_string_zarr_${zext}.cdl +} + +testcase file +if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi +if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi + +exit 0 diff --git a/nczarr_test/zmapio.c b/nczarr_test/zmapio.c index c9c982c881..3131793e38 100644 --- a/nczarr_test/zmapio.c +++ b/nczarr_test/zmapio.c @@ -55,18 +55,21 @@ static struct Type { const char* typename; nc_type nctype; int typesize; + const char format[16]; } types[] = { -{"ubyte",NC_UBYTE,1}, -{"byte",NC_BYTE,1}, -{"ushort",NC_USHORT,2}, -{"short",NC_SHORT,2}, -{"uint",NC_UINT,4}, -{"int",NC_INT,4}, -{"uint64",NC_UINT64,8}, -{"int64",NC_INT64,8}, -{"float",NC_FLOAT,4}, -{"double",NC_DOUBLE,8}, -{NULL,NC_NAT,0} +{"ubyte",NC_UBYTE,1,"%u"}, +{"byte",NC_BYTE,1,"%d"}, +{"ushort",NC_USHORT,2,"%u"}, +{"short",NC_SHORT,2,"%d"}, +{"uint",NC_UINT,4,"%u"}, +{"int",NC_INT,4,"%d"}, +{"uint64",NC_UINT64,8,"%llu"}, +{"int64",NC_INT64,8,"%lld"}, +{"float",NC_FLOAT,4,"%f"}, +{"double",NC_DOUBLE,8,"%lf"}, +{"char",NC_CHAR,1,"'%c'"}, +{"string",NC_STRING,sizeof(char*),"%*s"}, +{NULL,NC_NAT,0,""} }; /* Command line options */ @@ -78,8 +81,10 @@ struct Dumpptions { NCZM_IMPL impl; char* rootpath; const struct Type* nctype; + char format[16]; int xflags; # define XNOZMETADATA 1 + int strlen; } dumpoptions; /* Forward */ @@ -120,9 +125,15 @@ decodeop(const char* name) } static const struct Type* -decodetype(const char* name) +decodetype(const char* name, int* strlenp) { struct Type* p = types; + + if(strncmp(name,"string/",strlen("string/"))==0) { + *strlenp = atoi(name+strlen("string/")); + name = "string"; + } + for(;p->typename != NULL;p++) { if(strcasecmp(p->typename,name)==0) return p; } @@ -138,9 +149,10 @@ main(int argc, char** argv) nc_initialize(); + /* Init options */ memset((void*)&dumpoptions,0,sizeof(dumpoptions)); - while ((c = getopt(argc, argv, "dhvx:t:T:X:")) != EOF) { + while ((c = getopt(argc, argv, "dhvx:t:F:T:X:")) != EOF) { switch(c) { case 'd': dumpoptions.debug = 1; @@ -148,17 +160,20 @@ main(int argc, char** argv) case 'h': dumpoptions.meta_only = 1; break; - case 'v': - zmapusage(); - goto done; case 't': - dumpoptions.nctype = decodetype(optarg); + dumpoptions.nctype = decodetype(optarg,&dumpoptions.strlen); if(dumpoptions.nctype == NULL) zmapusage(); break; case 'x': dumpoptions.mop = decodeop(optarg); if(dumpoptions.mop == MOP_NONE) zmapusage(); break; + case 'v': + zmapusage(); + goto done; + case 'F': + strcpy(dumpoptions.format,optarg); + break; case 'T': nctracelevel(atoi(optarg)); break; @@ -344,7 +359,7 @@ objdump(void) len = ceildiv(len,dumpoptions.nctype->typesize); } printf("[%d] %s : (%llu)",depth,obj,len); - if(kind == OK_CHUNK) + if(kind == OK_CHUNK && dumpoptions.nctype->nctype != NC_STRING) printf(" (%s)",dumpoptions.nctype->typename); printf(" |"); switch(kind) { @@ -434,25 +449,40 @@ static char hex[16] = "0123456789abcdef"; static void printcontent(size64_t len, const char* content, OBJKIND kind) { - size64_t i; + size64_t i, count; unsigned int c0,c1; - for(i=0;iformat; + if(dumpoptions.format[0] != '\0') + format = dumpoptions.format; + + if(dumpoptions.strlen > 0) { + strlen = dumpoptions.strlen; + count = ((len+strlen)-1)/strlen; + } else + count = len; + + for(i=0;i 0) printf(", "); switch(dumpoptions.nctype->nctype) { - case NC_BYTE: printf("%d",((char*)content)[i]); break; - case NC_SHORT: printf("%d",((short*)content)[i]); break; - case NC_INT: printf("%d",((int*)content)[i]); break; - case NC_INT64: printf("%lld",((long long*)content)[i]); break; - case NC_UBYTE: printf("%u",((unsigned char*)content)[i]); break; - case NC_USHORT: printf("%u",((unsigned short*)content)[i]); break; - case NC_UINT: printf("%u",((unsigned int*)content)[i]); break; - case NC_UINT64: printf("%llu",((unsigned long long*)content)[i]); break; - case NC_FLOAT: printf("%f",((float*)content)[i]); break; - case NC_DOUBLE: printf("%lf",((double*)content)[i]); break; + case NC_BYTE: printf(format,((char*)content)[i]); break; + case NC_SHORT: printf(format,((short*)content)[i]); break; + case NC_INT: printf(format,((int*)content)[i]); break; + case NC_INT64: printf(format,((long long*)content)[i]); break; + case NC_UBYTE: printf(format,((unsigned char*)content)[i]); break; + case NC_USHORT: printf(format,((unsigned short*)content)[i]); break; + case NC_UINT: printf(format,((unsigned int*)content)[i]); break; + case NC_UINT64: printf(format,((unsigned long long*)content)[i]); break; + case NC_FLOAT: printf(format,((float*)content)[i]); break; + case NC_DOUBLE: printf(format,((double*)content)[i]); break; + case NC_CHAR: printf(format,((char*)content)[i]); break; + case NC_STRING: printf(format,(int)strlen,((char*)(&content[i*strlen]))); break; default: abort(); } break; diff --git a/test_common.in b/test_common.in index dc5be62e09..1df44654b2 100644 --- a/test_common.in +++ b/test_common.in @@ -5,6 +5,10 @@ # See netcdf-c/COPYRIGHT file for more info. +# Load only once +if test "x$TEST_COMMON_SH" = x ; then +export TEST_COMMON_SH=1 + # Define location of execution TOPSRCDIR='@abs_top_srcdir@' TOPBUILDDIR='@abs_top_builddir@' @@ -32,6 +36,9 @@ FEATURE_NCZARR_ZIP=@DO_NCZARR_ZIP_TESTS@ FEATURE_FILTERTESTS=@DO_FILTER_TESTS@ FEATURE_PLUGIN_INSTALL_DIR=@PLUGIN_INSTALL_DIR@ +# Thredds-test server is currently disabled +#FEATURE_THREDDSTEST=1 + set -e # Figure out various locations in the src/build tree. @@ -170,5 +177,10 @@ avail() { if test yes = `${execdir}/../ncdump/ncfilteravail $1` ; then return 0 ; else echo "filter $1 not available" ; return 1; fi } +# Thredds-test is no longer available +#FEATURE_THREDDSTEST=1 + # Make sure we are in builddir (not execdir) cd $builddir + +fi #TEST_COMMON_SH