From 2d68804b21cd759be1feca85a8059eba82879882 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Sat, 11 Feb 2023 09:02:48 -0600 Subject: [PATCH] communicator: make c_name a dynamic array and reorder struct make the c_name element of the communicator structure a dynamic element. This allows us to reduce the size of PREDEFINED_COMMUNICATOR_PAD back to 512 to maintain backwards compatibility with the ompi 4.1.x release series. Reorder the communicator fields to reduce the struct size. This brings the communicator size at 536 bytes with FT, PERUSE enabled and compiled in debug mode. Fixes issue #11373 Signed-off-by: Edgar Gabriel Signed-off-by: George Bosilca --- ompi/communicator/comm.c | 6 ++++ ompi/communicator/comm_init.c | 21 +++++++------ ompi/communicator/communicator.h | 44 +++++++++++++--------------- ompi/debuggers/predefined_gap_test.c | 10 +------ ompi/errhandler/errhandler.c | 10 +++---- ompi/errhandler/errhandler.h | 8 ++--- ompi/errhandler/errhandler_invoke.c | 8 ++--- ompi/include/ompi/memchecker.h | 4 +-- 8 files changed, 53 insertions(+), 58 deletions(-) diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 264c1763f42..71dac2fa061 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -26,6 +26,7 @@ * Copyright (c) 2021 Nanook Consulting. All rights reserved. * Copyright (c) 2018-2022 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -215,6 +216,11 @@ int ompi_comm_set_nb (ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm, if (NULL == newcomm) { return OMPI_ERR_OUT_OF_RESOURCE; } + newcomm->c_name = (char*) malloc (OPAL_MAX_OBJECT_NAME); + if (NULL == newcomm->c_name) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + newcomm->c_name[0] = '\0'; newcomm->super.s_info = NULL; /* fill in the inscribing hyper-cube dimensions */ newcomm->c_cube_dim = opal_cube_dim(local_size); diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 393d7ce164f..59aa0f38422 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -25,6 +25,7 @@ * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018-2022 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -169,8 +170,7 @@ int ompi_comm_init(void) (void)opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, 0, &ompi_mpi_comm_null); (void)opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, 1, &ompi_mpi_comm_null); - opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL", - sizeof(ompi_mpi_comm_null.comm.c_name)); + ompi_mpi_comm_null.comm.c_name = strdup ("MPI_COMM_NULL"); ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | OMPI_COMM_GLOBAL_INDEX; @@ -221,8 +221,7 @@ int ompi_comm_init_mpi3 (void) OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm); opal_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world); - opal_string_copy(ompi_mpi_comm_world.comm.c_name, "MPI_COMM_WORLD", - sizeof(ompi_mpi_comm_world.comm.c_name)); + ompi_mpi_comm_world.comm.c_name = strdup("MPI_COMM_WORLD"); ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | OMPI_COMM_GLOBAL_INDEX; ompi_mpi_comm_world.comm.instance = group->grp_instance; @@ -280,8 +279,7 @@ int ompi_comm_init_mpi3 (void) OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self.comm); opal_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self); - opal_string_copy(ompi_mpi_comm_self.comm.c_name, "MPI_COMM_SELF", - sizeof(ompi_mpi_comm_self.comm.c_name)); + ompi_mpi_comm_self.comm.c_name = strdup("MPI_COMM_SELF"); ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | OMPI_COMM_GLOBAL_INDEX; ompi_mpi_comm_self.comm.instance = group->grp_instance; @@ -412,7 +410,7 @@ static int ompi_comm_finalize (void) static void ompi_comm_construct(ompi_communicator_t* comm) { int idx; - comm->c_name[0] = '\0'; + comm->c_name = NULL; comm->c_index = MPI_UNDEFINED; comm->c_flags = 0; comm->c_my_rank = 0; @@ -444,7 +442,7 @@ static void ompi_comm_construct(ompi_communicator_t* comm) this communicator */ comm->c_keyhash = NULL; - comm->errhandler_type = OMPI_ERRHANDLER_TYPE_COMM; + comm->error_handler = &ompi_mpi_errors_are_fatal.eh; #ifdef OMPI_WANT_PERUSE comm->c_peruse_handles = NULL; #endif @@ -520,6 +518,11 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) comm->error_handler = NULL; } + if (NULL != comm->c_name) { + free (comm->c_name); + comm->c_name = NULL; + } + #if OPAL_ENABLE_FT_MPI if( NULL != comm->agreement_specific ) { OBJ_RELEASE( comm->agreement_specific ); diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index fd9ef4aab8b..79e8d5f0588 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -24,6 +24,7 @@ * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018-2022 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -261,7 +262,7 @@ struct ompi_communicator_t { opal_infosubscriber_t super; opal_mutex_t c_lock; /* mutex for name and potentially attributes */ - char c_name[MPI_MAX_OBJECT_NAME]; + char *c_name; ompi_comm_extended_cid_t c_contextid; ompi_comm_extended_cid_block_t c_contextidb; uint32_t c_index; @@ -269,12 +270,22 @@ struct ompi_communicator_t { uint32_t c_flags; /* flags, e.g. intercomm, topology, etc. */ uint32_t c_assertions; /* info assertions */ - int c_id_available; /* the currently available Cid for allocation - to a child*/ - int c_id_start_index; /* the starting index of the block of cids - allocated to this communicator*/ +#if OPAL_ENABLE_FT_MPI uint32_t c_epoch; /* Identifier used to differentiate between two communicators using the same c_contextid (not at the same time, obviously) */ +#endif + /* Non-blocking collective tag. These tags might be shared between + * all non-blocking collective modules (to avoid message collision + * between them in the case where multiple outstanding non-blocking + * collective coexists using multiple backends). + */ + opal_atomic_int32_t c_nbc_tag; + + /**< inscribing cube dimension */ + int c_cube_dim; + + /* index in Fortran <-> C translation array */ + int c_f_to_c_index; ompi_group_t *c_local_group; ompi_group_t *c_remote_group; @@ -287,16 +298,10 @@ struct ompi_communicator_t { /* Attributes */ struct opal_hash_table_t *c_keyhash; - /**< inscribing cube dimension */ - int c_cube_dim; - /* Standard information about the selected topology module (or NULL if this is not a cart, graph or dist graph communicator) */ struct mca_topo_base_module_t* c_topo; - /* index in Fortran <-> C translation array */ - int c_f_to_c_index; - #ifdef OMPI_WANT_PERUSE /* * Place holder for the PERUSE events. @@ -307,9 +312,7 @@ struct ompi_communicator_t { /* Error handling. This field does not have the "c_" prefix so that the OMPI_ERRHDL_* macros can find it, regardless of whether it's a comm, window, or file. */ - ompi_errhandler_t *error_handler; - ompi_errhandler_type_t errhandler_type; /* Hooks for PML to hang things */ struct mca_pml_comm_t *c_pml_comm; @@ -320,21 +323,14 @@ struct ompi_communicator_t { /* Collectives module interface and data */ mca_coll_base_comm_coll_t *c_coll; - /* Non-blocking collective tag. These tags might be shared between - * all non-blocking collective modules (to avoid message collision - * between them in the case where multiple outstanding non-blocking - * collective coexists using multiple backends). - */ - opal_atomic_int32_t c_nbc_tag; - /* instance that this comm belongs to */ ompi_instance_t* instance; #if OPAL_ENABLE_FT_MPI - /** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */ - int any_source_offset; /** agreement caching info for topology and previous returned decisions */ opal_object_t *agreement_specific; + /** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */ + int any_source_offset; /** Are MPI_ANY_SOURCE operations enabled? - OMPI_Comm_failure_ack */ bool any_source_enabled; /** Has this communicator been revoked - OMPI_Comm_revoke() */ @@ -437,7 +433,7 @@ typedef struct ompi_communicator_t ompi_communicator_t; * the PREDEFINED_COMMUNICATOR_PAD macro? * A: Most likely not, but it would be good to check. */ -#define PREDEFINED_COMMUNICATOR_PAD 1024 +#define PREDEFINED_COMMUNICATOR_PAD 512 struct ompi_predefined_communicator_t { struct ompi_communicator_t comm; diff --git a/ompi/debuggers/predefined_gap_test.c b/ompi/debuggers/predefined_gap_test.c index 0129eb63a23..4ad207988a1 100644 --- a/ompi/debuggers/predefined_gap_test.c +++ b/ompi/debuggers/predefined_gap_test.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2009 Sun Microsystems, Inc All rights reserved. * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 The University of Tennessee and The University + * Copyright (c) 2012-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. @@ -59,8 +59,6 @@ int main(int argc, char **argv) { GAP_CHECK("c_contextid", test_comm, c_contextid, c_name, 1); GAP_CHECK("c_my_rank", test_comm, c_my_rank, c_contextid, 1); GAP_CHECK("c_flags", test_comm, c_flags, c_my_rank, 1); - GAP_CHECK("c_id_available", test_comm, c_id_available, c_flags, 1); - GAP_CHECK("c_id_start_index", test_comm, c_id_start_index, c_id_available, 1); GAP_CHECK("c_remote_group", test_comm, c_remote_group, c_local_group, 1); GAP_CHECK("c_local_comm", test_comm, c_local_comm, c_remote_group, 1); GAP_CHECK("c_keyhash", test_comm, c_keyhash, c_local_comm, 1); @@ -73,8 +71,6 @@ int main(int argc, char **argv) { #else GAP_CHECK("error_handler", test_comm, error_handler, c_f_to_c_index, 1); #endif - GAP_CHECK("errhandler_type", test_comm, errhandler_type, error_handler, 1); - GAP_CHECK("c_pml_comm", test_comm, c_pml_comm, errhandler_type, 1); GAP_CHECK("c_coll", test_comm, c_coll, c_pml_comm, 1); /* Test Predefined group sizes */ @@ -129,8 +125,6 @@ int main(int argc, char **argv) { GAP_CHECK("w_keyhash", test_win, w_keyhash, w_flags, 1); GAP_CHECK("w_f_to_c_index", test_win, w_f_to_c_index, w_keyhash, 1); GAP_CHECK("error_handler", test_win, error_handler, w_f_to_c_index, 1); - GAP_CHECK("errhandler_type", test_win, errhandler_type, error_handler, 1); - GAP_CHECK("w_osc_module", test_win, w_osc_module, errhandler_type, 1); /* Test Predefined info sizes */ printf("=============================================\n"); @@ -151,8 +145,6 @@ int main(int argc, char **argv) { GAP_CHECK("f_flags", test_file, f_flags, f_amode, 1); GAP_CHECK("f_f_to_c_index", test_file, f_f_to_c_index, f_flags, 1); GAP_CHECK("error_handler", test_file, error_handler, f_f_to_c_index, 1); - GAP_CHECK("errhandler_type", test_file, errhandler_type, error_handler, 1); - GAP_CHECK("f_io_version", test_file, f_io_version, errhandler_type, 1); GAP_CHECK("f_io_selected_component", test_file, f_io_selected_component, f_io_version, 1); GAP_CHECK("f_io_selected_module", test_file, f_io_selected_module, f_io_selected_component, 1); GAP_CHECK("f_io_selected_data", test_file, f_io_selected_data, f_io_selected_module, 1); diff --git a/ompi/errhandler/errhandler.c b/ompi/errhandler/errhandler.c index 41c426905fc..bcdd2f345c6 100644 --- a/ompi/errhandler/errhandler.c +++ b/ompi/errhandler/errhandler.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2022 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -388,10 +388,10 @@ int ompi_errhandler_proc_failed_internal(ompi_proc_t* ompi_proc, int status, boo OMPI_NAME_PRINT(&ompi_proc->super.proc_name), ompi_comm_print_cid(comm), proc_rank, - (OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->errhandler_type ? "P" : - (OMPI_ERRHANDLER_TYPE_COMM == comm->errhandler_type ? "C" : - (OMPI_ERRHANDLER_TYPE_WIN == comm->errhandler_type ? "W" : - (OMPI_ERRHANDLER_TYPE_FILE == comm->errhandler_type ? "F" : "U") ) ) ) + (OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->error_handler->eh_mpi_object_type ? "P" : + (OMPI_ERRHANDLER_TYPE_COMM == comm->error_handler->eh_mpi_object_type ? "C" : + (OMPI_ERRHANDLER_TYPE_WIN == comm->error_handler->eh_mpi_object_type ? "W" : + (OMPI_ERRHANDLER_TYPE_FILE == comm->error_handler->eh_mpi_object_type ? "F" : "U") ) ) ) )); } diff --git a/ompi/errhandler/errhandler.h b/ompi/errhandler/errhandler.h index 5b35ca0df1d..519a84d9961 100644 --- a/ompi/errhandler/errhandler.h +++ b/ompi/errhandler/errhandler.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2022 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -238,7 +238,7 @@ extern opal_atomic_int32_t ompi_instance_count; #define OMPI_ERRHANDLER_INVOKE(mpi_object, err_code, message) \ ompi_errhandler_invoke((mpi_object)->error_handler, \ (mpi_object), \ - (int)(mpi_object)->errhandler_type, \ + (int)(mpi_object)->error_handler->eh_mpi_object_type, \ ompi_errcode_get_mpi_code(err_code), \ (message)); @@ -269,7 +269,7 @@ extern opal_atomic_int32_t ompi_instance_count; int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \ ompi_errhandler_invoke((mpi_object)->error_handler, \ (mpi_object), \ - (int) (mpi_object)->errhandler_type, \ + (int) (mpi_object)->error_handler->eh_mpi_object_type, \ (__mpi_err_code), \ (message)); \ return (__mpi_err_code); \ @@ -307,7 +307,7 @@ extern opal_atomic_int32_t ompi_instance_count; int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \ ompi_errhandler_invoke((mpi_object)->error_handler, \ (mpi_object), \ - (int)(mpi_object)->errhandler_type, \ + (int)(mpi_object)->error_handler->eh_mpi_object_type, \ (__mpi_err_code), \ (message)); \ return (__mpi_err_code); \ diff --git a/ompi/errhandler/errhandler_invoke.c b/ompi/errhandler/errhandler_invoke.c index 3cc7e2009fb..33526911765 100644 --- a/ompi/errhandler/errhandler_invoke.c +++ b/ompi/errhandler/errhandler_invoke.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -183,19 +183,19 @@ int ompi_errhandler_request_invoke(int count, case OMPI_REQUEST_COLL: return ompi_errhandler_invoke(mpi_object.comm->error_handler, mpi_object.comm, - mpi_object.comm->errhandler_type, + mpi_object.comm->error_handler->eh_mpi_object_type, ec, message); break; case OMPI_REQUEST_IO: return ompi_errhandler_invoke(mpi_object.file->error_handler, mpi_object.file, - mpi_object.file->errhandler_type, + mpi_object.file->error_handler->eh_mpi_object_type, ec, message); break; case OMPI_REQUEST_WIN: return ompi_errhandler_invoke(mpi_object.win->error_handler, mpi_object.win, - mpi_object.win->errhandler_type, + mpi_object.win->error_handler->eh_mpi_object_type, ec, message); break; default: diff --git a/ompi/include/ompi/memchecker.h b/ompi/include/ompi/memchecker.h index f6237ef9678..15b7111877f 100644 --- a/ompi/include/ompi/memchecker.h +++ b/ompi/include/ompi/memchecker.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. - * Copyright (c) 2010-2017 The University of Tennessee and The University + * Copyright (c) 2010-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -220,8 +220,6 @@ static inline int memchecker_comm(MPI_Comm comm) opal_memchecker_base_isdefined (&comm->c_name, MPI_MAX_OBJECT_NAME); opal_memchecker_base_isdefined (&comm->c_my_rank, sizeof(int)); opal_memchecker_base_isdefined (&comm->c_flags, sizeof(uint32_t)); - opal_memchecker_base_isdefined (&comm->c_id_available, sizeof(int)); - opal_memchecker_base_isdefined (&comm->c_id_start_index, sizeof(int)); opal_memchecker_base_isdefined (&comm->c_local_group, sizeof(ompi_group_t *)); opal_memchecker_base_isdefined (&comm->c_remote_group, sizeof(ompi_group_t *)); opal_memchecker_base_isdefined (&comm->c_keyhash, sizeof(struct opal_hash_table_t *));