Skip to content

Commit

Permalink
WIP #181. Change the interface a bit. Tests still fail but some becau…
Browse files Browse the repository at this point in the history
…se of stupid, not TDD.
  • Loading branch information
samcunliffe committed Mar 9, 2023
1 parent 93f7fdf commit b8199d0
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 193 deletions.
145 changes: 106 additions & 39 deletions tdms/include/hdf5_io.h
Original file line number Diff line number Diff line change
@@ -1,65 +1,132 @@
/**
* @file hdf5_io.h
* @brief File I/O using HDF5.
* @brief Helper classes for HDF5 file I/O.
* @details The main classes are `HDF5Reader` and `HDF5Writer` with the methods
* `HDF5Reader::read` and `HDF5Writer::write` respectively.
*/
#pragma once

// std
#include <memory>
#include <string>
#include <vector>

// libhdf5
#include <H5Cpp.h>

/**
* @brief Mode to write the file: R, RW, O.
* @brief The base class for HDF5 I/O.
* @details Common functionality and wraps handling the std::unique_ptr to hold
* the H5::File object.
*/
enum HDF5FileMode {
READONLY = 0,
READWRITE,
OVERWRITE
};
class HDF5Base {

/**
* @brief A simple class to wrap the HDF5 I/O.
*/
class HDF5File {
protected:
std::string filename_; /**< The name of the file. */
std::shared_ptr<H5::H5File> file_; /**< Pointer to the underlying H5::File. */

/**
* @brief Construct a new HDF5{Reader/Writer} for a named file.
* @param filename The name of the file.
* @param mode The H5 file access mode (RDONLY for a HDF5Reader, TRUNC for a
* HDF5Writer.)
* @throws H5::FileIException if the file doesn't exist or can't be created.
*/
HDF5Base(const std::string &filename, int mode = H5F_ACC_RDONLY)
: filename_(filename) {
file_ = std::make_unique<H5::H5File>(filename, mode);
}

/**
* @brief Destructor closes the file.
* @details Closes file when HDF5Reader(or HDF5Writer) goes out of scope.
* Since the file pointer is a smart pointer it is deallocated automatically.
*/
~HDF5Base() { file_->close(); }

public:
/** Default constructor: default filename and mode. */
HDF5File() : filename_("tdms.hdf5"), mode_(READONLY) { _open(); }
/**
* @brief Get the name of the file.
* @return std::string the filename.
*/
std::string get_filename() const { return filename_; }

/** Normal constructor. */
HDF5File(const std::string &filename, HDF5FileMode mode) : filename_(filename), mode_(mode) {
_open();
}
/**
* @brief Get the names of all datasets (data tables) currently in the file.
* @return std::vector<std::string> A vector of their names.
*/
std::vector<std::string> get_datanames() const;

/** Destructor, deletes pointers. */
~HDF5File();
/**
* @brief Print the names of all datasets to std::out.
*/
void ls() const;

/** Writes array to the file. */
void write();
/**
* @brief Return shape/dimensionality information about the array data stored
* with `name`.
* @param dataname The name of the data table.
* @return std::vector<hsize_t> The dimensions of the data.
*/
std::vector<hsize_t> shape_of(const std::string &dataname) const;

/** reads data from the file. */
void read();
/**
* @brief Dumps the data to std::out for debugging purposes.
* @param dataname The name of the data table.
*/
void data_dump(const std::string &dataname) const;

/**
* @brief Check file health.
* @brief Checks the file is a valid HDF5 file, and everything is OK.
* TODO: Can perhaps remove.
*
* @param print_debug Optionally this function can print to the debug log.
* @return true If the file is OK.
* @return false If the file is not OK.
* @return true If all is well.
* @return false Otherwise.
*/
bool isOK(bool print_debug=false);

private:
/** Common to both constructors: open/create the file and set file_ to point to it. */
void _open();
std::string filename_; /**< The file name. */
HDF5FileMode mode_; /**< The I/O mode: default is to create non-existing. */
H5::H5File *file_ = nullptr; /**< The H5 file itself. */
std::vector<H5::DataSet *> datasets_; /**< All datasets to be written to the file. */
bool is_ok() const;
};

/**
* @brief Example of HDF5 I/O... to be deleted.
* This function should not make it to a PR.
* @brief Class wrapper of the reading of HDF5 format files.
* @details Opens files in readonly and retrieves the datasets (in our case
* **double, but can be anything in general).
*/
void example_hdf5();
class HDF5Reader : public HDF5Base {

public:
/**
* @brief Construct a new HDF5Reader for a named file.
* @param filename The name of the file.
* @throws XX if file is not found.
*/
HDF5Reader(const std::string &filename) : HDF5Base(filename, H5F_ACC_RDONLY) {}

/**
* @brief Reads a named dataset from the HDF5 file.
* @param dataname The name of the datset to be read.
* @param data A pointer to an array of correct size.
*/
template <typename T>
void read(const std::string &dataname, T *data) const;
};

class HDF5Writer : public HDF5Base {

public:
/**
* @brief Construct a new HDF5Writer, creates a file.
* @param filename The name of the file to be created.
*/
HDF5Writer(const std::string &filename) : HDF5Base(filename, H5F_ACC_TRUNC) {}

/**
* @brief Write `data` to the file with `dataname`.
*
* @param dataname The name of the data table.
* @param data The data itself.
* @param size The size of the data array.
* @param dimensions The number of dimensions of the array.
*/
void write(const std::string &dataname, double *data, int size,
hsize_t *dimensions);
};
176 changes: 77 additions & 99 deletions tdms/src/hdf5_io.cpp
Original file line number Diff line number Diff line change
@@ -1,122 +1,100 @@
// own include
#include "hdf5_io.h"

// std
#include <string>
#include <iostream>
#include <stdexcept>

// external
#include <H5Cpp.h>
#include <H5Fpublic.h>
#include <spdlog/spdlog.h>


/**
* @brief Convert a HDF5FileMode to the #defined HDF5 file creation property.
* @note Internal function, only used in this file by HDF5File::_open.
/******************************************************************************
* HDF5Reader
*/
unsigned int convert_to_h5f_global(HDF5FileMode mode) {
switch (mode) {
case READONLY: return H5F_ACC_RDONLY;
case READWRITE: return H5F_ACC_RDWR;
case OVERWRITE: return H5F_ACC_TRUNC;
default: return std::numeric_limits<int>::max();
}
}
template<typename T>
void HDF5Reader::read(const std::string &dataset_name, T *data) const {

// get the dataset and dataspace (contains dimensionality info)
H5::DataSet dataset = file_->openDataSet(dataset_name);
H5::DataSpace dataspace = dataset.getSpace();

// need to get the number of matrix dimensions (rank) so that we can
// dynamically allocate `dimensions`
int rank = dataspace.getSimpleExtentNdims();
hsize_t *dimensions = new hsize_t[rank];
dataspace.getSimpleExtentDims(dimensions);

void HDF5File::_open() {
spdlog::trace("Opening file: {}, in mode: {}", filename_, static_cast<int>(mode_));
file_ = new H5::H5File(filename_.c_str(), convert_to_h5f_global(mode_));
return;
//auto dimensions = shape_of(dataset_name);
// TODO why do we need `dimensions` at all here?

// now get the data type
H5::DataType datatype = dataset.getDataType();
dataset.read(data, datatype);

delete[] dimensions;
}

HDF5File::~HDF5File() {
file_->close();
for (auto ds: datasets_)
delete ds;
delete file_;
/******************************************************************************
* HDF5Writer
*/
void HDF5Writer::write(const std::string &dataset_name, double *data, int size,
hsize_t *dimensions) {
// 1D array
hsize_t rank = 1;
(void) size;// TODO what?

// declare a dataspace
H5::DataSpace dataspace(rank, dimensions);
H5::DataType datatype(H5::PredType::NATIVE_DOUBLE);

// write the data to the dataset object in the file
H5::DataSet dataset = file_->createDataSet(dataset_name, datatype, dataspace);
dataset.write(data, H5::PredType::NATIVE_DOUBLE);
}

void HDF5File::write() {
for (int i=0; i<3; i++) {
std::cout << convert_to_h5f_global(static_cast<HDF5FileMode>(i)) << std::endl;
/******************************************************************************
* HDF5Base
*
* Common HDF5 I/O methods abstracted to the base class.
*/
std::vector<std::string> HDF5Base::get_datanames() const {
std::vector<std::string> names;

// iterate over all objects in the file
for (unsigned int i = 0; i < file_->getNumObjs(); i++) {
H5G_obj_t object_type = file_->getObjTypeByIdx(i);

// if the current object is a H5::Dataset then grab its name
if (object_type == H5G_DATASET) {
H5std_string object_name = file_->getObjnameByIdx(i);
names.push_back(object_name);
}
return;
}
void HDF5File::read() {
return;
}
return names;
}

bool HDF5File::isOK(bool print_debug) {
void HDF5Base::ls() const {
std::vector<std::string> names = this->get_datanames();
for (auto name : names) std::cout << name << std::endl;
return;
}

if (print_debug) {
// debug information
spdlog::debug("File is named: {}", filename_);
spdlog::debug("File mode: {}", static_cast<int>(mode_));
spdlog::debug("Internal H5::H5File address: {:p}", (void*)file_);
}
std::vector<hsize_t> HDF5Base::shape_of(const std::string &dataname) const {

// tests here
if (file_ == nullptr) return false;
// get the dataset and dataspace (contains dimensionality info)
H5::DataSet dataset = file_->openDataSet(dataname);
H5::DataSpace dataspace = dataset.getSpace();

if (file_->getFileSize() <= 0) return false;
// need the rank in order to declare the vector size
int rank = dataspace.getSimpleExtentNdims();
std::vector<hsize_t> dimensions(rank);
dataspace.getSimpleExtentDims(dimensions.data(), nullptr);

// passed all tests: it's ok
return true;
// vector is the size in each dimension i, j(, k)
return dimensions;
}

void HDF5Base::data_dump(const std::string &dataname) const {
//
}

#define MAX_NAME_LENGTH 32
const std::string FileName("SimpleCompound.h5");
const std::string DatasetName("PersonalInformation");
const std::string member_age("Age");
const std::string member_sex("Sex");
const std::string member_name("Name");
const std::string member_height("Height");

typedef struct {
int age;
char sex;
char name[MAX_NAME_LENGTH];
float height;
} PersonalInformation;



void example_hdf5() {

// Data to write
PersonalInformation person_list[] = {
{ 18, 'M', "Mary", 152.0 },
{ 32, 'F', "Tom", 178.6 },
{ 29, 'M', "Tarou", 166.6 }
};
// the length of the data
//int length = sizeof(person_list) / sizeof(PersonalInformation);
// the array of each length of multidimentional data.
hsize_t dim[1];
dim[0] = sizeof(person_list) / sizeof(PersonalInformation);

// the length of dim
int rank = sizeof(dim) / sizeof(hsize_t);

// defining the datatype to pass HDF55
H5::CompType mtype(sizeof(PersonalInformation));
mtype.insertMember(member_age, HOFFSET(PersonalInformation, age), H5::PredType::NATIVE_INT);
mtype.insertMember(member_sex, HOFFSET(PersonalInformation, sex), H5::PredType::C_S1);
mtype.insertMember(member_name, HOFFSET(PersonalInformation, name), H5::StrType(H5::PredType::C_S1, MAX_NAME_LENGTH));
mtype.insertMember(member_height, HOFFSET(PersonalInformation, height), H5::PredType::NATIVE_FLOAT);

// preparation of a dataset and a file.
H5::DataSpace space(rank, dim);
H5::H5File *file = new H5::H5File(FileName, H5F_ACC_TRUNC);
H5::DataSet *dataset = new H5::DataSet(file->createDataSet(DatasetName, mtype, space));
// Write
dataset->write(person_list, mtype);

delete dataset;
delete file;
return;

bool HDF5Base::is_ok() const {
return true;
//
}
Loading

0 comments on commit b8199d0

Please sign in to comment.