Skip to content

Commit

Permalink
Fix for unexpected issue in HDF5 with ASCII reading (#428)
Browse files Browse the repository at this point in the history
* Forces the character set for ASCII files to be ASCII, instead of UTF-8.
* More info: HDFGroup/hdf5#544
  • Loading branch information
sergiorg-hpc authored Apr 14, 2021
1 parent b75a79f commit b8998c6
Showing 1 changed file with 26 additions and 8 deletions.
34 changes: 26 additions & 8 deletions include/highfive/bits/H5ReadWrite_misc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*/
#pragma once

#include "H5Tpublic.h"
#include "H5Utils.hpp"

namespace HighFive {
Expand All @@ -32,27 +33,44 @@ struct BufferInfo {
// details implementation
template <typename SrcStrT>
struct string_type_checker {
static DataType getDataType(const DataType&, bool);
static DataType getDataType(const DataType&, const DataType&);
};

template <>
struct string_type_checker<void> {
inline static DataType getDataType(const DataType& element_type, bool) {
inline static DataType getDataType(const DataType& element_type, const DataType& dtype) {
// TEMP. CHANGE: Ensure that the character set is properly configured to prevent
// converter issues on HDF5 <=v1.12.0 when loading ASCII strings first.
// See https://github.com/HDFGroup/hdf5/issues/544 for further information.
if (H5Tget_class(element_type.getId()) == H5T_STRING &&
H5Tget_cset(dtype.getId()) == H5T_CSET_ASCII) {
H5Tset_cset(element_type.getId(), H5T_CSET_ASCII);
}
return element_type;
}};

template <std::size_t FixedLen>
struct string_type_checker<char[FixedLen]> {
inline static DataType getDataType(const DataType& element_type, bool ds_fixed_str) {
return ds_fixed_str ? AtomicType<char[FixedLen]>() : element_type;
inline static DataType getDataType(const DataType& element_type, const DataType& dtype) {
DataType return_type = (dtype.isFixedLenStr()) ? AtomicType<char[FixedLen]>() : element_type;
// TEMP. CHANGE: See string_type_checker<void> definition
if (H5Tget_cset(dtype.getId()) == H5T_CSET_ASCII) {
H5Tset_cset(return_type.getId(), H5T_CSET_ASCII);
}
return return_type;
}};

template <>
struct string_type_checker<char*> {
inline static DataType getDataType(const DataType&, bool ds_fixed_str) {
if (ds_fixed_str)
inline static DataType getDataType(const DataType&, const DataType& dtype) {
if (dtype.isFixedLenStr())
throw DataSetException("Can't output variable-length to fixed-length strings");
return AtomicType<std::string>();
// TEMP. CHANGE: See string_type_checker<void> definition
DataType return_type = AtomicType<std::string>();
if (H5Tget_cset(dtype.getId()) == H5T_CSET_ASCII) {
H5Tset_cset(return_type.getId(), H5T_CSET_ASCII);
}
return return_type;
}};

template <typename T>
Expand All @@ -62,7 +80,7 @@ BufferInfo<T>::BufferInfo(const DataType& dtype)
, n_dimensions(details::inspector<type_no_const>::recursive_ndim -
((is_fixed_len_string && is_char_array) ? 1 : 0))
, data_type(string_type_checker<char_array_t>::getDataType(
create_datatype<elem_type>(), is_fixed_len_string)) {
create_datatype<elem_type>(), dtype)) {
if (is_fixed_len_string && std::is_same<elem_type, std::string>::value) {
throw DataSetException("Can't output std::string as fixed-length. "
"Use raw arrays or FixedLenStringArray");
Expand Down

0 comments on commit b8998c6

Please sign in to comment.