Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix an issue where the Subfiling VFD's context cache grows too large #4159

Merged
merged 2 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,21 @@ Bug Fixes since HDF5-1.14.0 release

Library
-------
- Fixed an issue where the Subfiling VFD's context object cache could
grow too large

The Subfiling VFD keeps a cache of its internal context objects to
speed up access to a context object for a particular file, as well
as access to that object across multiple opens of the same file.
However, opening a large amount of files with the Subfiling VFD over
the course of an application's lifetime could cause this cache to grow
too large and result in the application running out of available MPI
communicator objects. On file close, the Subfiling VFD now simply
evicts context objects out of its cache and frees them. It is assumed
that multiple opens of a file will be a less common use case for the
Subfiling VFD, but this can be revisited if it proves to be an issue
for performance.

- Fixed error when overwriting certain nested variable length types

Previously, when using a datatype that included a variable length type
Expand Down
3 changes: 3 additions & 0 deletions src/H5FDsubfiling/H5FDsubfiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -1358,6 +1358,9 @@ H5FD__subfiling_close_int(H5FD_subfiling_t *file_ptr)
H5MM_free(file_ptr->file_dir);
file_ptr->file_dir = NULL;

if (file_ptr->context_id >= 0 && H5_free_subfiling_object(file_ptr->context_id) < 0)
H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "can't free subfiling context object");

/* Release the file info */
file_ptr = H5FL_FREE(H5FD_subfiling_t, file_ptr);

Expand Down
33 changes: 20 additions & 13 deletions src/H5FDsubfiling/H5subfiling_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ static int sf_file_map_size = 0;
#define DEFAULT_TOPOLOGY_CACHE_SIZE 4
#define DEFAULT_FILE_MAP_ENTRIES 8

static herr_t H5_free_subfiling_object(int64_t object_id);
static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context);
static herr_t H5_free_subfiling_topology(sf_topology_t *topology);

Expand Down Expand Up @@ -280,18 +279,25 @@ H5_get_subfiling_object(int64_t object_id)
* Purpose: Frees the underlying subfiling object for a given subfiling
* object ID.
*
* NOTE: Currently we assume that all created subfiling
* objects are cached in the (very simple) context/topology
* cache until application exit, so the only time a subfiling
* object should be freed by this routine is if something
* fails right after creating one. Otherwise, the internal
* indexing for the relevant cache will be invalid.
* NOTE: Because we want to avoid the potentially large
* overhead of determining the application topology on every
* file open, we currently assume that all created subfiling
* topology objects are cached in the (very simple) topology
* cache until application exit. This allows us to quickly
* find and assign a cached topology object to a subfiling
* context object for a file when opened. Therefore, a
* subfiling topology object should (currently) only ever be
* freed by this routine if a function fails right after
* creating a topology object. Otherwise, the internal
* indexing for the topology cache will be invalid and we will
* either leak memory or assign invalid topology pointers to
* subfiling context objects after that point.
*
* Return: Non-negative on success/Negative on failure
*
*-------------------------------------------------------------------------
*/
static herr_t
herr_t
H5_free_subfiling_object(int64_t object_id)
{
int64_t obj_type = (object_id >> 32) & 0x0FFFF;
Expand All @@ -305,30 +311,31 @@ H5_free_subfiling_object(int64_t object_id)
"couldn't get subfiling context for subfiling object ID");

if (H5_free_subfiling_object_int(sf_context) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object");
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling context object");

assert(sf_context_cache_num_entries > 0);
assert(sf_context == sf_context_cache[sf_context_cache_num_entries - 1]);
sf_context_cache[sf_context_cache_num_entries - 1] = NULL;
sf_context_cache_num_entries--;
}
else {
else if (obj_type == SF_TOPOLOGY) {
sf_topology_t *sf_topology;

assert(obj_type == SF_TOPOLOGY);

if (NULL == (sf_topology = H5_get_subfiling_object(object_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
"couldn't get subfiling context for subfiling object ID");

if (H5_free_subfiling_topology(sf_topology) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling topology");
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling topology object");

assert(sf_topology_cache_num_entries > 0);
assert(sf_topology == sf_topology_cache[sf_topology_cache_num_entries - 1]);
sf_topology_cache[sf_topology_cache_num_entries - 1] = NULL;
sf_topology_cache_num_entries--;
}
else
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL,
"couldn't free subfiling object - invalid object type");

done:
H5_SUBFILING_FUNC_LEAVE;
Expand Down
1 change: 1 addition & 0 deletions src/H5FDsubfiling/H5subfiling_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ H5_DLL herr_t H5_close_subfiles(int64_t subfiling_context_id, MPI_Comm file_comm

H5_DLL int64_t H5_new_subfiling_object_id(sf_obj_type_t obj_type);
H5_DLL void *H5_get_subfiling_object(int64_t object_id);
H5_DLL herr_t H5_free_subfiling_object(int64_t object_id);
H5_DLL herr_t H5_get_subfiling_config_from_file(FILE *config_file, int64_t *stripe_size,
int64_t *num_subfiles);
H5_DLL herr_t H5_resolve_pathname(const char *filepath, MPI_Comm comm, char **resolved_filepath);
Expand Down
Loading