Skip to content

Commit

Permalink
Fix an issue where the Subfiling VFD's context cache grows too large
Browse files Browse the repository at this point in the history
  • Loading branch information
jhendersonHDF committed Mar 16, 2024
1 parent 112f445 commit 31c3d58
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 13 deletions.
15 changes: 15 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,21 @@ Bug Fixes since HDF5-1.14.0 release
===================================
Library
-------
- Fixed an issue where the Subfiling VFD's context object cache could
grow too large

The Subfiling VFD keeps a cache of its internal context objects to
speed up access to a context object for a particular file, as well
as access to that object across multiple opens of the same file.
However, opening a large amount of files with the Subfiling VFD over
the course of an application's lifetime could cause this cache to grow
too large and result in the application running out of available MPI
communicator objects. On file close, the Subfiling VFD now simply
evicts context objects out of its cache and frees them. It is assumed
that multiple opens of a file will be a less common use case for the
Subfiling VFD, but this can be revisited if it proves to be an issue
for performance.

- Fixed asserts raised by large values of H5Pset_est_link_info() parameters

If large values for est_num_entries and/or est_name_len were passed
Expand Down
3 changes: 3 additions & 0 deletions src/H5FDsubfiling/H5FDsubfiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -1358,6 +1358,9 @@ H5FD__subfiling_close_int(H5FD_subfiling_t *file_ptr)
H5MM_free(file_ptr->file_dir);
file_ptr->file_dir = NULL;

if (file_ptr->context_id >= 0 && H5_free_subfiling_object(file_ptr->context_id) < 0)
H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "can't free subfiling context object");

/* Release the file info */
file_ptr = H5FL_FREE(H5FD_subfiling_t, file_ptr);

Expand Down
33 changes: 20 additions & 13 deletions src/H5FDsubfiling/H5subfiling_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ static int sf_file_map_size = 0;
#define DEFAULT_TOPOLOGY_CACHE_SIZE 4
#define DEFAULT_FILE_MAP_ENTRIES 8

static herr_t H5_free_subfiling_object(int64_t object_id);
static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context);
static herr_t H5_free_subfiling_topology(sf_topology_t *topology);

Expand Down Expand Up @@ -280,18 +279,25 @@ H5_get_subfiling_object(int64_t object_id)
* Purpose: Frees the underlying subfiling object for a given subfiling
* object ID.
*
* NOTE: Currently we assume that all created subfiling
* objects are cached in the (very simple) context/topology
* cache until application exit, so the only time a subfiling
* object should be freed by this routine is if something
* fails right after creating one. Otherwise, the internal
* indexing for the relevant cache will be invalid.
* NOTE: Because we want to avoid the potentially large
* overhead of determining the application topology on every
* file open, we currently assume that all created subfiling
* topology objects are cached in the (very simple) topology
* cache until application exit. This allows us to quickly
* find and assign a cached topology object to a subfiling
* context object for a file when opened. Therefore, a
* subfiling topology object should (currently) only ever be
* freed by this routine if a function fails right after
* creating a topology object. Otherwise, the internal
* indexing for the topology cache will be invalid and we will
* either leak memory or assign invalid topology pointers to
* subfiling context objects after that point.
*
* Return: Non-negative on success/Negative on failure
*
*-------------------------------------------------------------------------
*/
static herr_t
herr_t
H5_free_subfiling_object(int64_t object_id)
{
int64_t obj_type = (object_id >> 32) & 0x0FFFF;
Expand All @@ -305,30 +311,31 @@ H5_free_subfiling_object(int64_t object_id)
"couldn't get subfiling context for subfiling object ID");

if (H5_free_subfiling_object_int(sf_context) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object");
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling context object");

assert(sf_context_cache_num_entries > 0);
assert(sf_context == sf_context_cache[sf_context_cache_num_entries - 1]);
sf_context_cache[sf_context_cache_num_entries - 1] = NULL;
sf_context_cache_num_entries--;
}
else {
else if (obj_type == SF_TOPOLOGY) {
sf_topology_t *sf_topology;

assert(obj_type == SF_TOPOLOGY);

if (NULL == (sf_topology = H5_get_subfiling_object(object_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
"couldn't get subfiling context for subfiling object ID");

if (H5_free_subfiling_topology(sf_topology) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling topology");
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling topology object");

assert(sf_topology_cache_num_entries > 0);
assert(sf_topology == sf_topology_cache[sf_topology_cache_num_entries - 1]);
sf_topology_cache[sf_topology_cache_num_entries - 1] = NULL;
sf_topology_cache_num_entries--;
}
else
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL,
"couldn't free subfiling object - invalid object type");

done:
H5_SUBFILING_FUNC_LEAVE;
Expand Down
1 change: 1 addition & 0 deletions src/H5FDsubfiling/H5subfiling_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ H5_DLL herr_t H5_close_subfiles(int64_t subfiling_context_id, MPI_Comm file_comm

H5_DLL int64_t H5_new_subfiling_object_id(sf_obj_type_t obj_type);
H5_DLL void *H5_get_subfiling_object(int64_t object_id);
H5_DLL herr_t H5_free_subfiling_object(int64_t object_id);
H5_DLL herr_t H5_get_subfiling_config_from_file(FILE *config_file, int64_t *stripe_size,
int64_t *num_subfiles);
H5_DLL herr_t H5_resolve_pathname(const char *filepath, MPI_Comm comm, char **resolved_filepath);
Expand Down

0 comments on commit 31c3d58

Please sign in to comment.