Skip to content

Commit

Permalink
Allow usage of page buffering for serial file access from parallel HD…
Browse files Browse the repository at this point in the history
…F5 builds (HDFGroup#4568)
  • Loading branch information
jhendersonHDF authored Jun 13, 2024
1 parent 539d175 commit 4a0b1b6
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 155 deletions.
10 changes: 10 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,16 @@ Bug Fixes since HDF5-1.14.0 release
===================================
Library
-------
- Fixed library to allow usage of page buffering feature for serial file
access with parallel builds of HDF5

When HDF5 is built with parallel support enabled, the library would previously
disallow any usage of page buffering, even if a file was not opened with
parallel access. The library now allows usage of page buffering for serial
file access with parallel builds of HDF5. Usage of page buffering is still
disabled for any form of parallel file access, even if only 1 MPI process
is used.

- Fixed a leak of datatype IDs created internally during datatype conversion

Fixed an issue where the library could leak IDs that it creates internally
Expand Down
33 changes: 20 additions & 13 deletions src/H5Fint.c
Original file line number Diff line number Diff line change
Expand Up @@ -2008,15 +2008,6 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id)
if (H5P_get(a_plist, H5F_ACS_PAGE_BUFFER_SIZE_NAME, &page_buf_size) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get page buffer size");
if (page_buf_size) {
#ifdef H5_HAVE_PARALLEL
/* Collective metadata writes are not supported with page buffering */
if (file->shared->coll_md_write)
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL,
"collective metadata writes are not supported with page buffering");

/* Temporary: fail file create when page buffering feature is enabled for parallel */
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "page buffering is disabled for parallel");
#endif /* H5_HAVE_PARALLEL */
/* Query for other page buffer cache properties */
if (H5P_get(a_plist, H5F_ACS_PAGE_BUFFER_MIN_META_PERC_NAME, &page_buf_min_meta_perc) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get minimum metadata fraction of page buffer");
Expand All @@ -2029,14 +2020,30 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get evict on close value");

#ifdef H5_HAVE_PARALLEL
/* Check for evict on close in parallel (currently unsupported) */
/* Check for unsupported settings in parallel */
assert(file->shared);
if (H5F_SHARED_HAS_FEATURE(file->shared, H5FD_FEAT_HAS_MPI)) {
int mpi_size = H5F_shared_mpi_get_size(file->shared);

if ((mpi_size > 1) && evict_on_close)
HGOTO_ERROR(H5E_FILE, H5E_UNSUPPORTED, NULL,
"evict on close is currently not supported in parallel HDF5");
/*
* While there shouldn't be any problems in general with using page buffering
* with only 1 MPI process, there are still some testing issues to be fixed.
* Until then, page buffering is disabled for any form of parallel access.
*/
if (page_buf_size) {
/* Collective metadata writes are not supported with page buffering */
if (file->shared->coll_md_write)
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL,
"collective metadata writes are not supported with page buffering");

HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "page buffering is disabled for parallel");
}

if (mpi_size > 1) {
if (evict_on_close)
HGOTO_ERROR(H5E_FILE, H5E_UNSUPPORTED, NULL,
"evict on close is currently not supported in parallel HDF5");
}
}
#endif

Expand Down
133 changes: 0 additions & 133 deletions test/page_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@
#define FILENAME_LEN 1024

/* test routines */
#ifdef H5_HAVE_PARALLEL
static unsigned verify_page_buffering_disabled(hid_t orig_fapl, const char *driver_name);
#else
#define NUM_DSETS 5
#define NX 100
#define NY 50
Expand All @@ -54,12 +51,9 @@ static unsigned test_stats_collection(hid_t orig_fapl, const char *driver_name);
/* helper routines */
static unsigned create_file(char *filename, hid_t fcpl, hid_t fapl);
static unsigned open_file(char *filename, hid_t fapl, hsize_t page_size, size_t page_buffer_size);
#endif /* H5_HAVE_PARALLEL */

static const char *FILENAME[] = {"filepaged", NULL};

#ifndef H5_HAVE_PARALLEL

/*-------------------------------------------------------------------------
* Function: create_file()
*
Expand Down Expand Up @@ -289,7 +283,6 @@ open_file(char *filename, hid_t fapl, hsize_t page_size, size_t page_buffer_size
H5E_END_TRY
return 1;
}
#endif /* H5_HAVE_PARALLEL */

/*
*
Expand Down Expand Up @@ -353,8 +346,6 @@ set_multi_split(const char *driver_name, hid_t fapl, hsize_t pagesize)

} /* set_multi_split() */

#ifndef H5_HAVE_PARALLEL

/*-------------------------------------------------------------------------
* Function: test_args()
*
Expand Down Expand Up @@ -2043,121 +2034,6 @@ test_stats_collection(hid_t orig_fapl, const char *driver_name)

return 1;
} /* test_stats_collection */
#endif /* #ifndef H5_HAVE_PARALLEL */

/*-------------------------------------------------------------------------
* Function: verify_page_buffering_disabled()
*
* Purpose: This function should only be called in parallel
* builds.
*
* At present, page buffering should be disabled in parallel
* builds. Verify this.
*
* Return: 0 if test is successful
* 1 if test fails
*
*-------------------------------------------------------------------------
*/

#ifdef H5_HAVE_PARALLEL
static unsigned
verify_page_buffering_disabled(hid_t orig_fapl, const char *driver_name)
{
char filename[FILENAME_LEN]; /* Filename to use */
hid_t file_id = H5I_INVALID_HID; /* File ID */
hid_t fcpl = H5I_INVALID_HID;
hid_t fapl = H5I_INVALID_HID;

TESTING("Page Buffering Disabled");
h5_fixname(FILENAME[0], orig_fapl, filename, sizeof(filename));

/* first, try to create a file with page buffering enabled */

if ((fapl = H5Pcopy(orig_fapl)) < 0)
TEST_ERROR;

if (set_multi_split(driver_name, fapl, 4096) != 0)
TEST_ERROR;

if ((fcpl = H5Pcreate(H5P_FILE_CREATE)) < 0)
FAIL_STACK_ERROR;

if (H5Pset_file_space_strategy(fcpl, H5F_FSPACE_STRATEGY_PAGE, 0, (hsize_t)1) < 0)
FAIL_STACK_ERROR;

if (H5Pset_file_space_page_size(fcpl, 4096) < 0)
FAIL_STACK_ERROR;

if (H5Pset_page_buffer_size(fapl, 4096 * 8, 0, 0) < 0)
FAIL_STACK_ERROR;

/* try to create the file -- should fail */
H5E_BEGIN_TRY
{
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, fcpl, fapl);
}
H5E_END_TRY

if (file_id >= 0)
TEST_ERROR;

/* now, create a file, close it, and then try to open it with page
* buffering enabled.
*/
if ((fcpl = H5Pcreate(H5P_FILE_CREATE)) < 0)
FAIL_STACK_ERROR;

if (H5Pset_file_space_strategy(fcpl, H5F_FSPACE_STRATEGY_PAGE, 0, (hsize_t)1) < 0)
FAIL_STACK_ERROR;

if (H5Pset_file_space_page_size(fcpl, 4096) < 0)
FAIL_STACK_ERROR;

/* create the file */
if ((file_id = H5Fcreate(filename, H5F_ACC_TRUNC, fcpl, H5P_DEFAULT)) < 0)
FAIL_STACK_ERROR;

/* close the file */
if (H5Fclose(file_id) < 0)
FAIL_STACK_ERROR;

/* try to open the file using the fapl prepared above which enables
* page buffering. Should fail.
*/
H5E_BEGIN_TRY
{
file_id = H5Fopen(filename, H5F_ACC_RDWR, fapl);
}
H5E_END_TRY

if (file_id >= 0)
TEST_ERROR;

if (H5Pclose(fcpl) < 0)
FAIL_STACK_ERROR;

if (H5Pclose(fapl) < 0)
FAIL_STACK_ERROR;

PASSED();

return 0;

error:

H5E_BEGIN_TRY
{
H5Pclose(fapl);
H5Pclose(fcpl);
H5Fclose(file_id);
}
H5E_END_TRY

return 1;

} /* verify_page_buffering_disabled() */
#endif /* H5_HAVE_PARALLEL */

/*-------------------------------------------------------------------------
* Function: main()
Expand Down Expand Up @@ -2203,22 +2079,13 @@ main(void)
FAIL_STACK_ERROR;
api_ctx_pushed = true;

#ifdef H5_HAVE_PARALLEL

puts("Page Buffering is disabled for parallel.");
nerrors += verify_page_buffering_disabled(fapl, driver_name);

#else /* H5_HAVE_PARALLEL */

nerrors += test_args(fapl, driver_name);
nerrors += test_raw_data_handling(fapl, driver_name);
nerrors += test_lru_processing(fapl, driver_name);
nerrors += test_min_threshold(fapl, driver_name);
nerrors += test_stats_collection(fapl, driver_name);
nerrors += test_pb_fapl_tolerance_at_open();

#endif /* H5_HAVE_PARALLEL */

h5_clean_files(FILENAME, fapl);

if (nerrors)
Expand Down
37 changes: 30 additions & 7 deletions testpar/t_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,17 @@
#define H5MF_FRIEND /*suppress error about including H5MFpkg */
#include "H5MFpkg.h"

#ifdef PB_OUT
#define NUM_DSETS 5
#endif

int mpi_size, mpi_rank;

#ifdef PB_OUT
static int create_file(const char *filename, hid_t fcpl, hid_t fapl, int metadata_write_strategy);
static int open_file(const char *filename, hid_t fapl, int metadata_write_strategy, hsize_t page_size,
size_t page_buffer_size);
#endif

/*
* test file access by communicator besides COMM_WORLD.
Expand Down Expand Up @@ -132,22 +136,29 @@ test_split_comm_access(void)
void
test_page_buffer_access(void)
{
const char *filename;
hid_t file_id = H5I_INVALID_HID; /* File ID */
hid_t fcpl, fapl;
size_t page_count = 0;
int i, num_elements = 200;
haddr_t raw_addr, meta_addr;
int *data;
H5F_t *f = NULL;
herr_t ret; /* generic return value */
const char *filename;
bool api_ctx_pushed = false; /* Whether API context pushed */
#ifdef PB_OUT
size_t page_count = 0;
int i, num_elements = 200;
haddr_t raw_addr, meta_addr;
int *data;
H5F_t *f = NULL;
bool api_ctx_pushed = false; /* Whether API context pushed */
#endif

MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);

filename = (const char *)GetTestParameters();

/* Until page buffering is supported in parallel in some form (even if
* just for a single MPI process), this test just will just check to
* make sure that an error is thrown when page buffering is enabled
* with parallel access.
*/
if (VERBOSE_MED)
printf("Page Buffer Usage in Parallel %s\n", filename);

Expand Down Expand Up @@ -175,6 +186,15 @@ test_page_buffer_access(void)
ret = H5Pset_coll_metadata_write(fapl, false);
VRFY((ret >= 0), "");

/* This should fail due to page buffering not being supported in parallel */
H5E_BEGIN_TRY
{
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, fcpl, fapl);
}
H5E_END_TRY
VRFY((file_id < 0), "H5Fcreate failed");

#ifdef PB_OUT
ret = create_file(filename, fcpl, fapl, H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
VRFY((ret == 0), "");
ret = open_file(filename, fapl, H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, sizeof(int) * 100,
Expand Down Expand Up @@ -442,8 +462,10 @@ test_page_buffer_access(void)
free(data);
data = NULL;
MPI_Barrier(MPI_COMM_WORLD);
#endif
}

#ifdef PB_OUT
static int
create_file(const char *filename, hid_t fcpl, hid_t fapl, int metadata_write_strategy)
{
Expand Down Expand Up @@ -760,6 +782,7 @@ open_file(const char *filename, hid_t fapl, int metadata_write_strategy, hsize_t

return nerrors;
}
#endif

/*
* NOTE: See HDFFV-10894 and add tests later to verify MPI-specific properties in the
Expand Down
2 changes: 0 additions & 2 deletions testpar/testphdf5.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,7 @@ main(int argc, char **argv)
AddTest("split", test_split_comm_access, NULL, "dataset using split communicators", PARATESTFILE);
AddTest("h5oflusherror", test_oflush, NULL, "H5Oflush failure", PARATESTFILE);

#ifdef PB_OUT /* temporary: disable page buffering when parallel */
AddTest("page_buffer", test_page_buffer_access, NULL, "page buffer usage in parallel", PARATESTFILE);
#endif

AddTest("props", test_file_properties, NULL, "Coll Metadata file property settings", PARATESTFILE);

Expand Down

0 comments on commit 4a0b1b6

Please sign in to comment.