From 8d49436be67075dd836ded90e21505c4d0519e66 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Thu, 15 Sep 2022 16:42:45 -0700 Subject: [PATCH] add virtual dataset support --- src/HDF5.jl | 1 + src/api/helpers.jl | 19 +++++++++++++++++++ src/api/types.jl | 1 + src/properties.jl | 6 ++++++ src/virtual.jl | 36 ++++++++++++++++++++++++++++++++++++ test/virtual_dataset.jl | 31 +++++++++++++++++++++++++++++++ 6 files changed, 94 insertions(+) create mode 100644 src/virtual.jl create mode 100644 test/virtual_dataset.jl diff --git a/src/HDF5.jl b/src/HDF5.jl index ced25e703..8e8cd4056 100644 --- a/src/HDF5.jl +++ b/src/HDF5.jl @@ -73,6 +73,7 @@ include("groups.jl") include("datatypes.jl") include("typeconversions.jl") include("dataspaces.jl") +include("virtual.jl") include("datasets.jl") include("attributes.jl") include("readwrite.jl") diff --git a/src/api/helpers.jl b/src/api/helpers.jl index 823ef0097..e7ff2e3d6 100644 --- a/src/api/helpers.jl +++ b/src/api/helpers.jl @@ -706,6 +706,25 @@ function h5p_get_userblock(plist_id) return len[] end +function h5p_get_virtual_count(dcpl_id) + count = Ref{Csize_t}() + h5p_get_virtual_count(dcpl_id, count) + return count[] +end + +function h5p_get_virtual_dsetname(dcpl_id, index) + len = h5p_get_virtual_dsetname(dcpl_id, index, C_NULL, 0) + buffer = StringVector(len) + h5p_get_virtual_dsetname(dcpl_id, index, buffer, len + 1) + return String(buffer) +end +function h5p_get_virtual_filename(dcpl_id, index) + len = h5p_get_virtual_filename(dcpl_id, index, C_NULL, 0) + buffer = StringVector(len) + h5p_get_virtual_filename(dcpl_id, index, buffer, len + 1) + return String(buffer) +end + function h5p_get_virtual_prefix(dapl_id) virtual_file_len = h5p_get_virtual_prefix(dapl_id, C_NULL, 0) buffer = StringVector(virtual_file_len) diff --git a/src/api/types.jl b/src/api/types.jl index 14c19abcb..74deb3ac7 100644 --- a/src/api/types.jl +++ b/src/api/types.jl @@ -260,6 +260,7 @@ _has_symbol(sym::Symbol) = Libdl.dlsym(libhdf5handle[], sym; throw_error=false) const H5D_COMPACT = 0 const H5D_CONTIGUOUS = 1 const H5D_CHUNKED = 2 +const H5D_VIRTUAL = 3 # allocation times (C enum H5D_alloc_time_t) const H5D_ALLOC_TIME_ERROR = -1 diff --git a/src/properties.jl b/src/properties.jl index 623fad27e..389b089c4 100644 --- a/src/properties.jl +++ b/src/properties.jl @@ -498,6 +498,9 @@ set_shuffle!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p), set_fletcher32!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p), Filters.Fletcher32()) set_blosc!(p::Properties, val) = error("The Blosc filter now requires the H5Zblosc package be loaded") +get_virtual(p::Properties) = VirtualLayout(p) +set_virtual!(p::Properties, vmaps) = append!(VirtualLayout(p), vmaps) + class_propertynames(::Type{DatasetCreateProperties}) = ( :alloc_time, @@ -508,6 +511,7 @@ class_propertynames(::Type{DatasetCreateProperties}) = ( :filters, :layout, :no_attrs_hint, + :virtual, # convenience :blosc, :deflate, @@ -532,6 +536,7 @@ function class_getproperty(::Type{DatasetCreateProperties}, p::Properties, name: false : API.h5p_get_dset_no_attrs_hint(p) ) : + name === :virtual ? get_virtual(p) : # deprecated name === :filter ? (depwarn("`filter` property name is deprecated, use `filters` instead",:class_getproperty); get_filters(p)) : class_getproperty(superclass(DatasetCreateProperties), p, name) @@ -549,6 +554,7 @@ function class_setproperty!(::Type{DatasetCreateProperties}, p::Properties, name error("no_attrs_hint is only valid for HDF5 library versions 1.10.5 or greater") : API.h5p_set_dset_no_attrs_hint(p, val) ) : + name === :virtual ? set_virtual!(p, val) : # set-only for convenience name === :blosc ? set_blosc!(p, val) : name === :deflate ? set_deflate!(p, val) : diff --git a/src/virtual.jl b/src/virtual.jl new file mode 100644 index 000000000..6109f4774 --- /dev/null +++ b/src/virtual.jl @@ -0,0 +1,36 @@ +# virtual dataset +struct VirtualMapping + vspace::Dataspace + srcfile::String + srcdset::String + srcspace::Dataspace +end + +struct VirtualLayout <: AbstractVector{VirtualMapping} + dcpl::DatasetCreateProperties +end + +function Base.length(vlayout::VirtualLayout) + return API.h5p_get_virtual_count(vlayout.dcpl) +end +Base.size(vlayout::VirtualLayout) = (length(vlayout),) + +function Base.push!(vlayout::VirtualLayout, vmap::VirtualMapping) + API.h5p_set_virtual(vlayout.dcpl, vmap.vspace, vmap.srcfile, vmap.srcdset, vmap.srcspace) + return vlayout +end +function Base.append!(vlayout::VirtualLayout, vmaps) + for vmap in vmaps + push!(vlayout, vmap) + end + return vlayout +end + +function Base.getindex(vlayout::VirtualLayout, i::Integer) + vspace = Dataspace(API.h5p_get_virtual_vspace(vlayout.dcpl, i-1)) + srcfile = API.h5p_get_virtual_filename(vlayout.dcpl, i-1) + srcdset = API.h5p_get_virtual_dsetname(vlayout.dcpl, i-1) + srcspace = Dataspace(API.h5p_get_virtual_srcspace(vlayout.dcpl, i-1)) + return VirtualMapping(vspace, srcfile, srcdset, srcspace) +end + diff --git a/test/virtual_dataset.jl b/test/virtual_dataset.jl new file mode 100644 index 000000000..0ac0a2f3e --- /dev/null +++ b/test/virtual_dataset.jl @@ -0,0 +1,31 @@ +using Test, HDF5 + +dirname = mktempdir() + +filename = joinpath(dirname, "main.hdf5") + +f = h5open(filename, "w") + +sub0 = joinpath(dirname, "sub-0.hdf5") +f0 = h5open(sub0, "w") +f0["x"] = fill(1.0, 3) +close(f0) + +sub1 = joinpath(dirname, "sub-1.hdf5") +f1 = h5open(sub1, "w") +f1["x"] = fill(2.0, 3) +close(f1) + +srcspace = dataspace((3,)) +vspace = dataspace((3, 2); max_dims=(3, -1)) +HDF5.select_hyperslab!(vspace, (1:3, HDF5.BlockRange(1; count=-1))) + +d = create_dataset( + f, + "x", + datatype(Float64), + vspace; + virtual=[HDF5.VirtualMapping(vspace, "./sub-%b.hdf5", "x", srcspace)] +) + +@test read(d) == hcat(fill(1.0, 3), fill(2.0, 3))