From ab6cf65dc0caeea0f969dc31ecd38a17bf6e8b46 Mon Sep 17 00:00:00 2001 From: Sam Cunliffe Date: Fri, 19 May 2023 16:53:12 +0100 Subject: [PATCH] Add dependence on `libhdf5` and basic implementation class structure. (#304) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add libhdf5. Co-authored-by: Mosè Giordano Co-authored-by: Will Graham <32364977+willGraham01@users.noreply.github.com> * Make clang-format happy. * Skeleton HDF5 wrapper class and failing tests. Basic class structure and some failing tests for TDD. * WIP #181. Change the interface a bit. Tests still fail but some because of stupid, not TDD. * Passes first tests! #181 * Perhaps we need to install HDF5 explicitly? * Fix indentation in README. Delete commented out. * Don't need C. * Throw not implemented exception. * Apply suggestions from code review --------- Co-authored-by: willGraham01 <1willgraham@gmail.com> Co-authored-by: Mosè Giordano Co-authored-by: Will Graham <32364977+willGraham01@users.noreply.github.com> --- .github/workflows/ci.yml | 12 ++- README.md | 3 +- tdms/CMakeLists.txt | 3 + tdms/cmake/targets.cmake | 2 + tdms/include/hdf5_io.h | 151 +++++++++++++++++++++++++++++++ tdms/src/hdf5_io.cpp | 69 ++++++++++++++ tdms/tests/unit/test_hdf5_io.cpp | 72 +++++++++++++++ 7 files changed, 309 insertions(+), 3 deletions(-) create mode 100644 tdms/include/hdf5_io.h create mode 100644 tdms/src/hdf5_io.cpp create mode 100644 tdms/tests/unit/test_hdf5_io.cpp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7ab1f2c86..b9d2c3147 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,7 +62,7 @@ jobs: if: ${{ contains(matrix.os, 'ubuntu') }} run: | sudo apt-get update - sudo apt-get install libfftw3-dev libgomp1 python3 lcov + sudo apt-get install libfftw3-dev libhdf5-dev libgomp1 python3 lcov # ------------------------------------------------------------------------------- # Windows @@ -79,12 +79,14 @@ jobs: run: | conda install fftw --yes echo "FFTWDIR=C:\Miniconda\envs\test\Library" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append + conda install hdf5 + echo "HDF5_DIR=C:\Miniconda\envs\test\Library" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append # ------------------------------------------------------------------------------- # MacOS - name: Install dependencies for MacOS if: ${{ contains(matrix.os, 'macos') }} - run: brew install fftw lcov + run: brew install fftw hdf5 lcov - name: Fix omp headers not linked on MacOS if: ${{ contains(matrix.os, 'macos') }} @@ -181,6 +183,12 @@ jobs: - name: Set up MATLAB uses: matlab-actions/setup-matlab@v1.2.3 + - name: Install HDF5 libraries for Ubuntu + if: ${{ contains(matrix.os, 'ubuntu') }} + run: | + sudo apt-get update + sudo apt-get install libhdf5-dev + - name: Install Python dependencies shell: bash run: | diff --git a/README.md b/README.md index b9671360d..42245867d 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ After installing with Homebrew, you may need to set the following CMake argument -DCMAKE_CXX_COMPILER=/Users/username/.local/homebrew/opt/llvm/bin/clang++ -DOMP_ROOT=/Users/username/.local/homebrew/opt/llvm/ -DCXX_ROOT=/Users/username/.local/homebrew/opt/llvm +-DHDF5_ROOT=/Users/username/.local/homebrew/opt/hdf5 ``` On an ARM Mac, you will need to install the x86 version of Homebrew. @@ -61,7 +62,7 @@ To do so, use the following commands: ```{sh} arch -x86_64 zsh arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" -arch -x86_64 /usr/local/bin/brew install llvm +arch -x86_64 /usr/local/bin/brew install llvm hdf5 ``` diff --git a/tdms/CMakeLists.txt b/tdms/CMakeLists.txt index 83f007e7c..6c9725f35 100644 --- a/tdms/CMakeLists.txt +++ b/tdms/CMakeLists.txt @@ -57,6 +57,9 @@ else() find_package(OpenMP REQUIRED) endif() +# hdf5 ------------------------------------------------------------------------ +find_package(HDF5 REQUIRED COMPONENTS CXX) +include_directories(${HDF5_INCLUDE_DIR}) # spdlog ---------------------------------------------------------------------- find_package(spdlog NO_CMAKE_PACKAGE_REGISTRY QUIET) diff --git a/tdms/cmake/targets.cmake b/tdms/cmake/targets.cmake index 7f6cdbcf5..bc0220046 100644 --- a/tdms/cmake/targets.cmake +++ b/tdms/cmake/targets.cmake @@ -9,6 +9,7 @@ function(release_target) ${Matlab_MX_LIBRARY} ${Matlab_MAT_LIBRARY} ${LIBCXX_LIBRARY} + ${HDF5_CXX_LIBRARIES} OpenMP::OpenMP_CXX spdlog::spdlog ) @@ -44,6 +45,7 @@ function(test_target) ${Matlab_MX_LIBRARY} ${Matlab_MAT_LIBRARY} ${LIBCXX_LIBRARY} + ${HDF5_CXX_LIBRARIES} OpenMP::OpenMP_CXX spdlog::spdlog ) diff --git a/tdms/include/hdf5_io.h b/tdms/include/hdf5_io.h new file mode 100644 index 000000000..7ed45426e --- /dev/null +++ b/tdms/include/hdf5_io.h @@ -0,0 +1,151 @@ +/** + * @file hdf5_io.h + * @brief Helper classes for HDF5 file I/O. + * @details The main classes are `HDF5Reader` and `HDF5Writer` with the methods + * `HDF5Reader::read` and `HDF5Writer::write` respectively. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +/** + * @brief The base class for HDF5 I/O. + * @details Common functionality and wraps handling the std::unique_ptr to hold + * the H5::File object. + */ +class HDF5Base { + +protected: + std::string filename_; /**< The name of the file. */ + std::shared_ptr file_; /**< Pointer to the underlying H5::File. */ + + /** + * @brief Construct a new HDF5{Reader/Writer} for a named file. + * @param filename The name of the file. + * @param mode The H5 file access mode (RDONLY for a HDF5Reader, TRUNC for a + * HDF5Writer.) + * @throws H5::FileIException if the file doesn't exist or can't be created. + */ + HDF5Base(const std::string &filename, int mode = H5F_ACC_RDONLY) + : filename_(filename) { + file_ = std::make_unique(filename, mode); + } + + /** + * @brief Destructor closes the file. + * @details Closes file when HDF5Reader(or HDF5Writer) goes out of scope. + * Since the file pointer is a smart pointer it is deallocated automatically. + */ + ~HDF5Base() { file_->close(); } + +public: + /** + * @brief Get the name of the file. + * @return std::string the filename. + */ + std::string get_filename() const { return filename_; } + + /** + * @brief Get the names of all datasets (data tables) currently in the file. + * @return std::vector A vector of their names. + */ + std::vector get_datanames() const; + + /** + * @brief Print the names of all datasets to std::out. + */ + void ls() const; + + /** + * @brief Return shape/dimensionality information about the array data stored + * with `name`. + * @param dataname The name of the data table. + * @return std::vector The dimensions of the data. + */ + std::vector shape_of(const std::string &dataname) const; + + + /** + * @brief Checks the file is a valid HDF5 file, and everything is OK. + * TODO: Can perhaps remove. + * + * @return true If all is well. + * @return false Otherwise. + */ + bool is_ok() const; +}; + +/** + * @brief Class wrapper of the reading of HDF5 format files. + * @details Opens files in readonly and retrieves the datasets (in our case + * **double, but can be anything in general). + */ +class HDF5Reader : public HDF5Base { + +public: + /** + * @brief Construct a new HDF5Reader for a named file. + * @param filename The name of the file. + * @throws XX if file is not found. + */ + HDF5Reader(const std::string &filename) + : HDF5Base(filename, H5F_ACC_RDONLY) {} + + /** + * @brief Reads a named dataset from the HDF5 file. + * @param dataname The name of the datset to be read. + * @param data A pointer to an array of correct size. + */ + template + void read(const std::string &dataset_name, T *data) const { + spdlog::debug("Reading {} from file: {}", dataset_name, filename_); + + // get the dataset and dataspace (contains dimensionality info) + H5::DataSet dataset = file_->openDataSet(dataset_name); + H5::DataSpace dataspace = dataset.getSpace(); + spdlog::debug("Created dataspace"); + + // need to get the number of matrix dimensions (rank) so that we can + // dynamically allocate `dimensions` + int rank = dataspace.getSimpleExtentNdims(); + spdlog::debug("Rank of dataspace: {}", rank); + hsize_t *dimensions = new hsize_t[rank]; + dataspace.getSimpleExtentDims(dimensions); + spdlog::debug("Got dimensions"); + + // now get the data type + H5::DataType datatype = dataset.getDataType(); + spdlog::debug("Got datatype"); + dataset.read(data, datatype); + spdlog::debug("Read"); + + delete[] dimensions; + } +}; + +class HDF5Writer : public HDF5Base { + +public: + /** + * @brief Construct a new HDF5Writer, creates a file. + * @param filename The name of the file to be created. + */ + HDF5Writer(const std::string &filename) : HDF5Base(filename, H5F_ACC_TRUNC) {} + + /** + * @brief Write `data` to the file with `dataname`. + * + * @param dataname The name of the data table. + * @param data The data itself. + * @param size The size of the data array. + * @param dimensions The number of dimensions of the array. + */ + void write(const std::string &dataname, double *data, int size, + hsize_t *dimensions); +}; diff --git a/tdms/src/hdf5_io.cpp b/tdms/src/hdf5_io.cpp new file mode 100644 index 000000000..467eefda7 --- /dev/null +++ b/tdms/src/hdf5_io.cpp @@ -0,0 +1,69 @@ +#include "hdf5_io.h" + +#include +#include + +#include +#include + +/****************************************************************************** + * HDF5Writer + */ +void HDF5Writer::write(const std::string &dataset_name, double *data, int size, + hsize_t *dimensions) { + spdlog::debug("Writing {} to file: {}", dataset_name, filename_); + + // declare a dataspace + H5::DataSpace dataspace(size, dimensions); + H5::DataType datatype(H5::PredType::NATIVE_DOUBLE); + + // write the data to the dataset object in the file + H5::DataSet dataset = file_->createDataSet(dataset_name, datatype, dataspace); + dataset.write(data, H5::PredType::NATIVE_DOUBLE); + spdlog::trace("Write successful."); +} + +/****************************************************************************** + * HDF5Base + * + * Common HDF5 I/O methods abstracted to the base class. + */ +std::vector HDF5Base::get_datanames() const { + std::vector names; + + // iterate over all objects in the file + for (unsigned int i = 0; i < file_->getNumObjs(); i++) { + H5G_obj_t object_type = file_->getObjTypeByIdx(i); + + // if the current object is a H5::Dataset then grab its name + if (object_type == H5G_DATASET) { + H5std_string object_name = file_->getObjnameByIdx(i); + names.push_back(object_name); + } + } + return names; +} + +void HDF5Base::ls() const { + std::vector names = this->get_datanames(); + for (auto name : names) std::cout << name << std::endl; + return; +} + +std::vector HDF5Base::shape_of(const std::string &dataname) const { + + // get the dataset and dataspace (contains dimensionality info) + H5::DataSet dataset = file_->openDataSet(dataname); + H5::DataSpace dataspace = dataset.getSpace(); + + // need the rank in order to declare the vector size + int rank = dataspace.getSimpleExtentNdims(); + std::vector dimensions(rank); + dataspace.getSimpleExtentDims(dimensions.data(), nullptr); + + // vector is the size in each dimension i, j(, k) + return dimensions; +} + + +bool HDF5Base::is_ok() const { return true; } diff --git a/tdms/tests/unit/test_hdf5_io.cpp b/tdms/tests/unit/test_hdf5_io.cpp new file mode 100644 index 000000000..6577b3b7a --- /dev/null +++ b/tdms/tests/unit/test_hdf5_io.cpp @@ -0,0 +1,72 @@ +/** + * @file test_hdf5_io.cpp + * @brief Tests of the HDF5 file I/O functionality. + */ +#include "hdf5_io.h" + +// std +#include +#include +#include + +// external +#include +#include +#include + +// tdms +#include "unit_test_utils.h" + +using tdms_tests::create_tmp_dir;// unit_test_utils.h + +TEST_CASE("Test file I/O construction/destruction.") { + // setup - temporary directory + auto tmp = create_tmp_dir(); + + SECTION("Check file creation.") { + HDF5Writer f(tmp.string() + "/test_file_constructor.h5"); + CHECK(f.is_ok()); + }// destructor called as we leave scope + + SECTION("Check all reasonable file extensions are OK.") { + for (auto extension : {".hdf5", ".h5", ".mat"}) { + { + HDF5Writer fw(tmp.string() + "/test_file" + extension); + CHECK(fw.is_ok()); + }// destructor called as we leave scope + + HDF5Reader fr(tmp.string() + "/test_file" + extension); + CHECK(fr.is_ok()); + } + } + + SECTION("Check can't open nonexistent file.") { + CHECK_THROWS(HDF5Reader(tmp.string() + "/this_file_doesnt_exist.h5")); + } + + // Normal operation: we should be able to create a file and write to it, then + // read from it. + SECTION("Check write then read.") { + // create a file + { + HDF5Writer fw(tmp.string() + "/test_file_wr.h5"); + hsize_t dimensions[1] = {1}; + double writeme = 1337.; + fw.write("testdata", &writeme, 1, dimensions); + spdlog::debug("Written data"); + + CHECK(fw.is_ok()); + fw.ls(); + + }// destructor called as we leave scope + + double data[1]; + HDF5Reader fr(tmp.string() + "/test_file_wr.h5"); + fr.read("testdata", data); + spdlog::debug("Have read {}!", data[0]); + } + + // teardown - remove temporary directory and all files + SPDLOG_DEBUG("Removing temporary directory."); + std::filesystem::remove_all(tmp); +}