From 856b033bc3e4632218e4ed92c32993b521f4a0b9 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 15 Mar 2023 10:41:08 +0000 Subject: [PATCH 01/49] feat: Read in the metis dump files and generate a scotch graph --- scotch/metis2scotch.c | 186 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 scotch/metis2scotch.c diff --git a/scotch/metis2scotch.c b/scotch/metis2scotch.c new file mode 100644 index 0000000000..69f5f75882 --- /dev/null +++ b/scotch/metis2scotch.c @@ -0,0 +1,186 @@ +#include +#include +#include +#define MAX_LINE_LENGTH 1024 +#include + + +/* function declaration */ +void read_ncells(const char* filename, int *ncells); +void read_metis_edges(const char* filename, int *adjncy); +void read_metis_weights(const char* filename, int *weights_e, int *weights_v); + +int main () { + // Read in the metis simple file dump + char* edgesname = ""; + // Read in the metis weights file dump + char* weightsname = ""; + /* local variable definition */ + int ncells = 0; + read_ncells(edgesname, &ncells); + int *xadj; + if ((xadj = (int *)malloc(sizeof(int) * (ncells + 1))) == NULL) + printf("Failed to allocate xadj buffer."); + int *adjncy; + if ((adjncy = (int *)malloc(sizeof(int) * 26 * ncells)) == NULL) + printf("Failed to allocate adjncy array."); + int *weights_v = NULL; + if ((weights_v = (int *)malloc(sizeof(int) * ncells)) == NULL) + printf("Failed to allocate vertex weights array"); + int *weights_e = NULL; + if ((weights_e = (int *)malloc(26 * sizeof(int) * ncells)) == NULL) + printf("Failed to allocate edge weights array"); + int *regionid; + if ((regionid = (int *)malloc(sizeof(int) * ncells)) == NULL) + printf("Failed to allocate regionid array"); + + read_metis_edges(edgesname, adjncy); + read_metis_weights(weightsname, weights_e, weights_v); + // Setting up the Scotch graph + SCOTCH_Graph graph; + SCOTCH_Num baseval = 0; + SCOTCH_Num vertnbr = ncells; + SCOTCH_Num *verttab; /* Vertex array [vertnbr+1] */ + SCOTCH_Num *vendtab = NULL; /* Vertex array [vertnbr] */ + SCOTCH_Num *velotab; /* Vertex load array */ + SCOTCH_Num *vlbltab = NULL; /* Vertex label array */ + SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ + SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ + SCOTCH_Num *edlotab; + + verttab = (SCOTCH_Num*) malloc((vertnbr+1) * sizeof(SCOTCH_Num)); + velotab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); + edgetab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); + edlotab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); + + printf("Done the set up \n"); + int i; + for (i = 0; i <= vertnbr; i++) { + verttab[i] = i*26; + velotab[i] = weights_v[i]; + } + + for (i = 0; i < edgenbr; i++) { + edgetab[i] = adjncy[i]; + edlotab[i] = weights_e[i]; + } + + printf("Initialise graph \n"); + SCOTCH_graphInit(&graph); + + if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, vendtab, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { + printf("Error: Cannot build Scotch Graph.\n"); + exit(EXIT_FAILURE); + } + + printf("Scotch Graph built successfully.\n"); + + FILE *file = fopen("", "w"); + if (file == NULL) { + printf("Error: Cannot open output file.\n"); + exit(EXIT_FAILURE); + } + + if (SCOTCH_graphSave(&graph, file) != 0) { + printf("Error: Cannot save Scotch Graph.\n"); + exit(EXIT_FAILURE); + } + + printf("Scotch Graph saved to file.\n"); + + fclose(file); + SCOTCH_graphExit(&graph); + + // Free memory + free(verttab); + free(velotab); + free(edgetab); + free(edlotab); + free(xadj); + free(adjncy); + free(weights_v); + free(weights_e); + free(regionid); + return 0; +} + + +void read_ncells(const char* filename, int *ncells) { + // Read in the number of cells/vertices + FILE* fp = fopen(filename, "r"); + if (fp == NULL) { + printf("printf opening file %s\n", filename); + return; + } + char line[MAX_LINE_LENGTH]; + int line_num = 0; + if (fgets(line, MAX_LINE_LENGTH, fp) != NULL) { + char * pch; + pch = strtok (line," "); + *ncells = atoi(pch); + } + fclose(fp); +} + + + +void read_metis_edges(const char* filename, int *adjncy) { + // Read in the vertex neighbours + FILE* fp = fopen(filename, "r"); + if (fp == NULL) { + printf("printf opening file %s\n", filename); + return; + } + char line[MAX_LINE_LENGTH]; + int index = 0; + int line_num = 0; + while (fgets(line, MAX_LINE_LENGTH, fp) != NULL) { + if (line_num > 0) { + char * pch; + pch = strtok(line," ,.-"); + while (pch != NULL){ + adjncy[index] = atoi(pch); + pch = strtok (NULL, " ,.-"); + index +=1; + } + } + line_num += 1; + } + fclose(fp); +} + +void read_metis_weights(const char* filename, int *weights_e, int *weights_v) { + // Read in the vertex and edge weights + FILE* fp = fopen(filename, "r"); + if (fp == NULL) { + printf("printf opening file %s\n", filename); + return; + } + + char line[MAX_LINE_LENGTH]; + int v_index = 0; + int e_index = 0; + int line_num = 0; + while (fgets(line, MAX_LINE_LENGTH, fp) != NULL) { + int vertex_ind = 0; + if (line_num > 0) { + printf ("Line number is %i\n",line_num); + char * pch; + pch = strtok(line," ,.-"); + weights_v[v_index] = atoi(pch); + printf ("Vertex weight is %s\n",pch); + while (pch != NULL){ + if (vertex_ind>0){ + printf ("Edge Weight is %s\n",pch); + weights_e[e_index] = atoi(pch); + e_index +=1; + } + vertex_ind += 1; + pch = strtok (NULL, " ,.-"); + } + v_index+=1; + } + line_num += 1; + } + fclose(fp); +} \ No newline at end of file From 386304bdaa87a113087a638838d3482ac2257a2a Mon Sep 17 00:00:00 2001 From: ucakdpg Date: Thu, 20 Apr 2023 12:00:01 +0100 Subject: [PATCH 02/49] outputting the graph structure --- examples/EAGLE_low_z/EAGLE_25/eagle_25.yml | 2 +- examples/EAGLE_low_z/EAGLE_6/eagle_6.yml | 2 +- src/partition.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml index 03ab1520be..6a49eb7812 100644 --- a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml @@ -94,7 +94,7 @@ FOF: # Parameters related to the initial conditions InitialConditions: - file_name: ./EAGLE_ICs_25.hdf5 # The file to read + file_name: ./EAGLE_ICs_25_deflated.hdf5 # The file to read periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml index 963821e177..22d6621f5c 100644 --- a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml @@ -96,7 +96,7 @@ FOF: # Parameters related to the initial conditions InitialConditions: - file_name: ./EAGLE_ICs_6.hdf5 # The file to read + file_name: ./EAGLE_ICs_6_deflated.hdf5 # The file to read periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/src/partition.c b/src/partition.c index b038aaf699..137fe7418c 100644 --- a/src/partition.c +++ b/src/partition.c @@ -569,8 +569,8 @@ static void split_metis(struct space *s, int nregions, int *celllist) { for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; /* To check or visualise the partition dump all the cells. */ - /*if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top, - s->nr_cells);*/ + if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top, + s->nr_cells); } #endif @@ -1318,8 +1318,8 @@ static void pick_metis(int nodeID, struct space *s, int nregions, idx_t objval; /* Dump graph in METIS format */ - /*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, - NULL, weights_e);*/ + dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, + NULL, weights_e); if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, NULL, weights_e, &idx_nregions, NULL, NULL, options, From 83533a027ae0c0054a1daa6b9fa96df584892220 Mon Sep 17 00:00:00 2001 From: ucakdpg Date: Thu, 20 Apr 2023 13:04:15 +0100 Subject: [PATCH 03/49] feat: Building with Scotch --- configure.ac | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/configure.ac b/configure.ac index d3b7fe301c..6408528b65 100644 --- a/configure.ac +++ b/configure.ac @@ -778,6 +778,41 @@ AC_CHECK_LIB(pthread, posix_fallocate, AC_DEFINE([HAVE_POSIX_FALLOCATE], [1], [The posix library implements file allocation functions.]), AC_MSG_WARN(POSIX implementation does not have file allocation functions.)) +# Check for SCOTCH. +have_scotch="no" +AC_ARG_WITH([scotch], + [AS_HELP_STRING([--with-scotch=PATH], + [root directory where SCOTCH is installed @<:@yes/no@:>@] + )], + [with_scotch="$withval"], + [with_scotch="no"] +) + +SCOTCH_LIBS="" +if test "x$with_scotch" != "xno"; then + +# Check if we have SCOTCH. + if test "x$with_scotch" != "xyes" -a "x$with_scotch" != "x"; then + SCOTCH_LIBS="-L$with_scotch/lib -lscotch -lscotcherr" + SCOTCH_INCS="-I$with_scotch/include" + else + SCOTCH_LIBS="-lscotch" + SCOTCH_INCS="" + fi + AC_CHECK_LIB([scotch],[SCOTCH_graphInit], [have_scotch="yes"], + [have_scotch="no"], $SCOTCH_LIBS) + if test "$have_scotch" = "yes"; then + AC_DEFINE([HAVE_SCOTCH],1,[The SCOTCH library is present.]) + else + AC_MSG_ERROR("Failed to find a SCOTCH library") + fi +fi + +AC_SUBST([SCOTCH_LIBS]) +AC_SUBST([SCOTCH_INCS]) +AM_CONDITIONAL([HAVESCOTCH],[test -n "$SCOTCH_LIBS"]) + + # Check for METIS. have_metis="no" AC_ARG_WITH([metis], @@ -2993,6 +3028,7 @@ AC_MSG_RESULT([ HDF5 enabled : $with_hdf5 - parallel : $have_parallel_hdf5 METIS/ParMETIS : $have_metis / $have_parmetis + Scotch : $have_scotch FFTW3 enabled : $have_fftw - threaded/openmp : $have_threaded_fftw / $have_openmp_fftw - MPI : $have_mpi_fftw From 4783ed55d47ddc8c8653c211affd1eedd016b174 Mon Sep 17 00:00:00 2001 From: ucakdpg Date: Mon, 24 Apr 2023 13:30:57 +0100 Subject: [PATCH 04/49] Updating Makefile and adding pick-scotch function --- Makefile.am | 4 +- src/partition.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 212 insertions(+), 7 deletions(-) diff --git a/Makefile.am b/Makefile.am index 3ca9fd5e74..3420209f2f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -48,8 +48,8 @@ EXTRA_LIBS = $(GSL_LIBS) $(HDF5_LIBS) $(FFTW_LIBS) $(NUMA_LIBS) $(PROFILER_LIBS) $(CHEALPIX_LIBS) # MPI libraries. -MPI_LIBS = $(PARMETIS_LIBS) $(METIS_LIBS) $(MPI_THREAD_LIBS) $(FFTW_MPI_LIBS) -MPI_FLAGS = -DWITH_MPI $(PARMETIS_INCS) $(METIS_INCS) $(FFTW_MPI_INCS) +MPI_LIBS = $(SCOTCH_LIBS) $(PARMETIS_LIBS) $(METIS_LIBS) $(MPI_THREAD_LIBS) $(FFTW_MPI_LIBS) +MPI_FLAGS = -DWITH_MPI $(SCOTCH_INCS) $(PARMETIS_INCS) $(METIS_INCS) $(FFTW_MPI_INCS) # Programs. bin_PROGRAMS = swift diff --git a/src/partition.c b/src/partition.c index 286fb01641..8188152ea2 100644 --- a/src/partition.c +++ b/src/partition.c @@ -51,6 +51,10 @@ #ifdef HAVE_METIS #include #endif +/* SCOTCH headers only used when MPI is also available. */ +#ifdef HAVE_SCOTCH +#include +#endif #endif /* Local headers. */ @@ -569,8 +573,8 @@ static void split_metis(struct space *s, int nregions, int *celllist) { for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; /* To check or visualise the partition dump all the cells. */ - if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top, - s->nr_cells); + /*if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top, + s->nr_cells);*/ } #endif @@ -1318,8 +1322,8 @@ static void pick_metis(int nodeID, struct space *s, int nregions, idx_t objval; /* Dump graph in METIS format */ - dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, - NULL, weights_e); + /*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, + NULL, weights_e);*/ if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, NULL, weights_e, &idx_nregions, NULL, NULL, options, @@ -1349,6 +1353,205 @@ static void pick_metis(int nodeID, struct space *s, int nregions, } #endif +#if defined(WITH_MPI) && defined(HAVE_SCOTCH) +/** + * @brief Partition the given space into a number of connected regions and + * map to available architecture. + * + * Split the space and map to compute architecture using Scotch. to derive + * a partitions using the given edge and vertex weights. If no weights + * are given then an unweighted partition is performed. + * + * @param nodeID the rank of our node. + * @param s the space of cells to partition. + * @param nregions the number of regions required in the partition. + * @param vertexw weights for the cells, sizeof number of cells if used, + * NULL for unit weights. Need to be in the range of idx_t. + * @param edgew weights for the graph edges between all cells, sizeof number + * of cells * 26 if used, NULL for unit weights. Need to be packed + * in CSR format, so same as adjncy array. Need to be in the range of + * idx_t. + * @param celllist on exit this contains the ids of the selected regions, + * sizeof number of cells. + */ +static void pick_scotch(int nodeID, struct space *s, int nregions, + double *vertexw, double *edgew, int *celllist) { + + /* Total number of cells. */ + int ncells = s->cdim[0] * s->cdim[1] * s->cdim[2]; + + /* Nothing much to do if only using a single partition. Also avoids METIS + * bug that doesn't handle this case well. */ + if (nregions == 1) { + for (int i = 0; i < ncells; i++) celllist[i] = 0; + return; + } + + /* Only one node needs to calculate this. */ + if (nodeID == 0) { + + /* Allocate adjacency and weights arrays . */ + idx_t *xadj; + if ((xadj = (idx_t *)malloc(sizeof(idx_t) * (ncells + 1))) == NULL) + error("Failed to allocate xadj buffer."); + idx_t *adjncy; + if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL) + error("Failed to allocate adjncy array."); + idx_t *weights_v = NULL; + if (vertexw != NULL) + if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate vertex weights array"); + idx_t *weights_e = NULL; + if (edgew != NULL) + if ((weights_e = (idx_t *)malloc(26 * sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate edge weights array"); + idx_t *regionid; + if ((regionid = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate regionid array"); + + /* Init the vertex weights array. */ + if (vertexw != NULL) { + for (int k = 0; k < ncells; k++) { + if (vertexw[k] > 1) { + weights_v[k] = vertexw[k]; + } else { + weights_v[k] = 0; + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check weights are all in range. */ + int failed = 0; + for (int k = 0; k < ncells; k++) { + if ((idx_t)vertexw[k] < 0) { + message("Input vertex weight out of range: %ld", (long)vertexw[k]); + failed++; + } + if (weights_v[k] < 0) { + message("Used vertex weight out of range: %" PRIDX, weights_v[k]); + failed++; + } + } + if (failed > 0) error("%d vertex weights are out of range", failed); +#endif + } + + /* Init the edges weights array. */ + + if (edgew != NULL) { + for (int k = 0; k < 26 * ncells; k++) { + if (edgew[k] > 1) { + weights_e[k] = edgew[k]; + } else { + weights_e[k] = 1; + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check weights are all in range. */ + int failed = 0; + for (int k = 0; k < 26 * ncells; k++) { + + if ((idx_t)edgew[k] < 0) { + message("Input edge weight out of range: %ld", (long)edgew[k]); + failed++; + } + if (weights_e[k] < 1) { + message("Used edge weight out of range: %" PRIDX, weights_e[k]); + failed++; + } + } + if (failed > 0) error("%d edge weights are out of range", failed); +#endif + } + + /* Define the cell graph. Keeping the edge weights association. */ + int nadjcny = 0; + int nxadj = 0; + // Setting up the Scotch graph + SCOTCH_Graph graph; + SCOTCH_Num baseval = 0; + SCOTCH_Num vertnbr = ncells; + SCOTCH_Num *verttab; /* Vertex array [vertnbr+1] */ + SCOTCH_Num *vendtab = NULL; /* Vertex array [vertnbr] */ + SCOTCH_Num *velotab; /* Vertex load array */ + SCOTCH_Num *vlbltab = NULL; /* Vertex label array */ + SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ + SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ + SCOTCH_Num *edlotab; + + verttab = (SCOTCH_Num*) malloc((vertnbr+1) * sizeof(SCOTCH_Num)); + velotab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); + edgetab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); + edlotab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); + + printf("Done the set up \n"); + int i; + for (i = 0; i <= vertnbr; i++) { + verttab[i] = i*26; + velotab[i] = weights_v[i]; + } + + for (i = 0; i < edgenbr; i++) { + edgetab[i] = adjncy[i]; + edlotab[i] = weights_e[i]; + } + + printf("Initialise graph \n"); + SCOTCH_graphInit(&graph); + + if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, vendtab, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { + error("Error: Cannot build Scotch Graph.\n"); + } + + printf("Scotch Graph built successfully.\n"); + + /* Dump graph in METIS format */ + dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, + NULL, weights_e); + + /* Read in architecture graph. */ + SCOTCH_Arch archdat; + SCOTCH_Strat stradat; + /* Load the architecture graph in .tgt format */ + FILE* arch_file = fopen("cosma_node_numad_deco.tgt", "r"); + if (SCOTCH_archLoad(&archdat, arch_file) != 1) + error("Error loading architecture graph"); + + SCOTCH_stratInit(&stradat); + + /* Set the mapping strategy options */ + const char* strat = "x"; + if (SCOTCH_stratGraphMap(&stradat, strat) != 1) + error("Error Scotch strategy initialisation failed."); + + /* Map the computation graph to the architecture graph */ + if (SCOTCH_graphMap(&grafdat, &archdat, &stradat, regionid) != 1) + error("Error Scotch mapping failed."); + + /* Check that the regionids are ok. */ + for (int k = 0; k < ncells; k++) { + if (regionid[k] < 0 || regionid[k] >= nregions) + error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k); + + /* And keep. */ + celllist[k] = regionid[k]; + } + + /* Clean up. */ + if (weights_v != NULL) free(weights_v); + if (weights_e != NULL) free(weights_e); + free(xadj); + free(adjncy); + free(regionid); + } + + /* Calculations all done, now everyone gets a copy. */ + int res = MPI_Bcast(celllist, ncells, MPI_INT, 0, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to broadcast new celllist"); +} +#endif + #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /* Helper struct for partition_gather weights. */ @@ -1699,7 +1902,9 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, } /* And repartition/ partition, using both weights or not as requested. */ -#ifdef HAVE_PARMETIS +#ifdef HAVE_SCOTCH + pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); +#elif HAVE_PARMETIS if (repartition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); From 4486162219061fb216100e9dffb7bb665c0fbd97 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 24 May 2023 10:58:25 +0100 Subject: [PATCH 05/49] fix: Builds on Myriad --- src/partition.c | 172 ++++++++++++++++++++++++++++++++++++++---------- src/partition.h | 3 +- 2 files changed, 138 insertions(+), 37 deletions(-) diff --git a/src/partition.c b/src/partition.c index 8188152ea2..a9c764411c 100644 --- a/src/partition.c +++ b/src/partition.c @@ -53,7 +53,7 @@ #endif /* SCOTCH headers only used when MPI is also available. */ #ifdef HAVE_SCOTCH -#include +#include #endif #endif @@ -67,6 +67,9 @@ #include "threadpool.h" #include "tools.h" +#define IDX_MAX INT32_MAX +#define IDX_MIN INT32_MIN + /* Simple descriptions of initial partition types for reports. */ const char *initial_partition_name[] = { "axis aligned grids of cells", "vectorized point associated cells", @@ -87,7 +90,7 @@ static int check_complete(struct space *s, int verbose, int nregions); * Repartition fixed costs per type/subtype. These are determined from the * statistics output produced when running with task debugging enabled. */ -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) static double repartition_costs[task_type_count][task_subtype_count]; #endif #if defined(WITH_MPI) @@ -330,7 +333,7 @@ static void graph_init(struct space *s, int periodic, idx_t *weights_e, } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) struct counts_mapper_data { double *counts; size_t size; @@ -560,7 +563,7 @@ static void sizes_to_edges(struct space *s, double *counts, double *edges) { } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /** * @brief Apply METIS cell-list partitioning to a cell structure. * @@ -578,7 +581,7 @@ static void split_metis(struct space *s, int nregions, int *celllist) { } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* qsort support. */ struct indexval { @@ -1391,22 +1394,22 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if (nodeID == 0) { /* Allocate adjacency and weights arrays . */ - idx_t *xadj; - if ((xadj = (idx_t *)malloc(sizeof(idx_t) * (ncells + 1))) == NULL) + SCOTCH_Num *xadj; + if ((xadj = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (ncells + 1))) == NULL) error("Failed to allocate xadj buffer."); - idx_t *adjncy; - if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL) + SCOTCH_Num *adjncy; + if ((adjncy = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * 26 * ncells)) == NULL) error("Failed to allocate adjncy array."); - idx_t *weights_v = NULL; + SCOTCH_Num *weights_v = NULL; if (vertexw != NULL) - if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + if ((weights_v = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * ncells)) == NULL) error("Failed to allocate vertex weights array"); - idx_t *weights_e = NULL; + SCOTCH_Num *weights_e = NULL; if (edgew != NULL) - if ((weights_e = (idx_t *)malloc(26 * sizeof(idx_t) * ncells)) == NULL) + if ((weights_e = (SCOTCH_Num *)malloc(26 * sizeof(SCOTCH_Num) * ncells)) == NULL) error("Failed to allocate edge weights array"); - idx_t *regionid; - if ((regionid = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + SCOTCH_Num *regionid; + if ((regionid = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * ncells)) == NULL) error("Failed to allocate regionid array"); /* Init the vertex weights array. */ @@ -1423,7 +1426,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, /* Check weights are all in range. */ int failed = 0; for (int k = 0; k < ncells; k++) { - if ((idx_t)vertexw[k] < 0) { + if ((SCOTCH_Num)vertexw[k] < 0) { message("Input vertex weight out of range: %ld", (long)vertexw[k]); failed++; } @@ -1452,7 +1455,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, int failed = 0; for (int k = 0; k < 26 * ncells; k++) { - if ((idx_t)edgew[k] < 0) { + if ((SCOTCH_Num)edgew[k] < 0) { message("Input edge weight out of range: %ld", (long)edgew[k]); failed++; } @@ -1466,8 +1469,6 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, } /* Define the cell graph. Keeping the edge weights association. */ - int nadjcny = 0; - int nxadj = 0; // Setting up the Scotch graph SCOTCH_Graph graph; SCOTCH_Num baseval = 0; @@ -1475,7 +1476,6 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Num *verttab; /* Vertex array [vertnbr+1] */ SCOTCH_Num *vendtab = NULL; /* Vertex array [vertnbr] */ SCOTCH_Num *velotab; /* Vertex load array */ - SCOTCH_Num *vlbltab = NULL; /* Vertex label array */ SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ SCOTCH_Num *edlotab; @@ -1506,9 +1506,8 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, printf("Scotch Graph built successfully.\n"); - /* Dump graph in METIS format */ - dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, - NULL, weights_e); + // /* Dump graph in METIS format */ + // /* Read in architecture graph. */ SCOTCH_Arch archdat; @@ -1526,13 +1525,13 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, error("Error Scotch strategy initialisation failed."); /* Map the computation graph to the architecture graph */ - if (SCOTCH_graphMap(&grafdat, &archdat, &stradat, regionid) != 1) + if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 1) error("Error Scotch mapping failed."); /* Check that the regionids are ok. */ for (int k = 0; k < ncells; k++) { if (regionid[k] < 0 || regionid[k] >= nregions) - error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k); + error("Got bad nodeID for cell"); /* And keep. */ celllist[k] = regionid[k]; @@ -1902,9 +1901,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, } /* And repartition/ partition, using both weights or not as requested. */ -#ifdef HAVE_SCOTCH - pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); -#elif HAVE_PARMETIS +#ifdef HAVE_PARMETIS if (repartition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); @@ -2047,7 +2044,93 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID, /* And apply to our cells */ split_metis(s, nr_nodes, repartition->celllist); } -#endif /* WITH_MPI && (HAVE_METIS || HAVE_PARMETIS) */ +#endif /* WITH_MPI && HAVE_METIS || HAVE_PARMETIS */ + +#if WITH_MPI && HAVE_SCOTCH +/** + * @brief Repartition the cells amongst the nodes using weights based on + * the memory use of particles in the cells. + * + * @param repartition the partition struct of the local engine. + * @param nodeID our nodeID. + * @param nr_nodes the number of nodes. + * @param s the space of cells holding our local particles. + */ +static void repart_scotch(struct repartition *repartition, int nodeID, + int nr_nodes, struct space *s) { + + /* Space for counts of particle memory use per cell. */ + double *weights = NULL; + if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) + error("Failed to allocate cell weights buffer."); + + /* Check each particle and accumulate the sizes per cell. */ + accumulate_sizes(s, s->e->verbose, weights); + + /* Allocate cell list for the partition. If not already done. */ +#ifdef HAVE_SCOTCH + int refine = 1; +#endif + if (repartition->ncelllist != s->nr_cells) { +#ifdef HAVE_SCOTCH + refine = 0; +#endif + free(repartition->celllist); + repartition->ncelllist = 0; + if ((repartition->celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == + NULL) + error("Failed to allocate celllist"); + repartition->ncelllist = s->nr_cells; + } + + /* We need to rescale the sum of the weights so that the sum is + * less than IDX_MAX, that is the range of idx_t. */ + double sum = 0.0; + for (int k = 0; k < s->nr_cells; k++) sum += weights[k]; + if (sum > (double)IDX_MAX) { + double scale = (double)(IDX_MAX - 1000) / sum; + for (int k = 0; k < s->nr_cells; k++) weights[k] *= scale; + } + + /* And repartition. */ +#ifdef HAVE_SCOTCH + pick_scotch(nodeID, s, nr_nodes, weights, NULL, repartition->celllist); +#endif + + /* Check that all cells have good values. All nodes have same copy, so just + * check on one. */ + if (nodeID == 0) { + for (int k = 0; k < s->nr_cells; k++) + if (repartition->celllist[k] < 0 || repartition->celllist[k] >= nr_nodes) + error("Got bad nodeID %d for cell %i.", repartition->celllist[k], k); + } + + /* Check that the partition is complete and all nodes have some cells. */ + int present[nr_nodes]; + int failed = 0; + for (int i = 0; i < nr_nodes; i++) present[i] = 0; + for (int i = 0; i < s->nr_cells; i++) present[repartition->celllist[i]]++; + for (int i = 0; i < nr_nodes; i++) { + if (!present[i]) { + failed = 1; + if (nodeID == 0) message("Node %d is not present after repartition", i); + } + } + + /* If partition failed continue with the current one, but make this clear. */ + if (failed) { + if (nodeID == 0) + message( + "WARNING: repartition has failed, continuing with the current" + " partition, load balance will not be optimal"); + for (int k = 0; k < s->nr_cells; k++) + repartition->celllist[k] = s->cells_top[k].nodeID; + } + + /* And apply to our cells */ + split_metis(s, nr_nodes, repartition->celllist); +} +#endif /* WITH_MPI && HAVE_SCOTCH */ /** * @brief Repartition the space using the given repartition type. @@ -2084,10 +2167,22 @@ void partition_repartition(struct repartition *reparttype, int nodeID, } else if (reparttype->type == REPART_METIS_VERTEX_COUNTS) { repart_memory_metis(reparttype, nodeID, nr_nodes, s); - + } else if (reparttype->type == REPART_NONE) { /* Doing nothing. */ + } else { + error("Impossible repartition type"); + } + + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + +#elif defined(WITH_MPI) && defined(HAVE_SCOTCH) + ticks tic = getticks(); + if (reparttype->type == REPART_SCOTCH) { + repart_scotch(reparttype, nodeID, nr_nodes, s); } else { error("Impossible repartition type"); } @@ -2096,7 +2191,7 @@ void partition_repartition(struct repartition *reparttype, int nodeID, message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); #else - error("SWIFT was not compiled with METIS or ParMETIS support."); + error("SWIFT was not compiled with METIS, ParMETIS or Scotch support."); #endif } @@ -2152,7 +2247,7 @@ void partition_initial_partition(struct partition *initial_partition, } else if (initial_partition->type == INITPART_METIS_WEIGHT || initial_partition->type == INITPART_METIS_WEIGHT_EDGE || initial_partition->type == INITPART_METIS_NOWEIGHT) { -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* Simple k-way partition selected by METIS using cell particle * counts as weights or not. Should be best when starting with a * inhomogeneous dist. @@ -2188,7 +2283,9 @@ void partition_initial_partition(struct partition *initial_partition, int *celllist = NULL; if ((celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) error("Failed to allocate celllist"); -#ifdef HAVE_PARMETIS +#ifdef HAVE_SCOTCH + pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, celllist); +#elif HAVE_PARMETIS if (initial_partition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); } else { @@ -2328,7 +2425,7 @@ void partition_init(struct partition *partition, if (strcmp("none", part_type) == 0) { repartition->type = REPART_NONE; -#if defined(HAVE_METIS) || defined(HAVE_PARMETIS) +#if defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH) } else if (strcmp("fullcosts", part_type) == 0) { repartition->type = REPART_METIS_VERTEX_EDGE_COSTS; @@ -2340,6 +2437,9 @@ void partition_init(struct partition *partition, } else if (strcmp("timecosts", part_type) == 0) { repartition->type = REPART_METIS_VERTEX_COSTS_TIMEBINS; + + } else if (strcmp("scotch", part_type) == 0) { + repartition->type = REPART_SCOTCH; } else { message("Invalid choice of re-partition type '%s'.", part_type); @@ -2432,7 +2532,7 @@ void partition_init(struct partition *partition, */ static int repart_init_fixed_costs(void) { -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* Set the default fixed cost. */ for (int j = 0; j < task_type_count; j++) { for (int k = 0; k < task_subtype_count; k++) { @@ -2485,7 +2585,7 @@ static int check_complete(struct space *s, int verbose, int nregions) { return (!failed); } -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) #ifdef SWIFT_DEBUG_CHECKS /** * @brief Check that the threadpool version of the weights construction is diff --git a/src/partition.h b/src/partition.h index 8f6dbbd148..8a6eba8915 100644 --- a/src/partition.h +++ b/src/partition.h @@ -48,7 +48,8 @@ enum repartition_type { REPART_METIS_VERTEX_EDGE_COSTS, REPART_METIS_EDGE_COSTS, REPART_METIS_VERTEX_COUNTS, - REPART_METIS_VERTEX_COSTS_TIMEBINS + REPART_METIS_VERTEX_COSTS_TIMEBINS, + REPART_SCOTCH }; /* Repartition preferences. */ From b0302b4755040e1bff38e0cea5add6888215044a Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 24 May 2023 11:00:36 +0100 Subject: [PATCH 06/49] Update partition.c --- src/partition.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/partition.c b/src/partition.c index 54fd19fc93..11307fa90f 100644 --- a/src/partition.c +++ b/src/partition.c @@ -53,7 +53,7 @@ #endif /* SCOTCH headers only used when MPI is also available. */ #ifdef HAVE_SCOTCH -#include +#include #endif #endif From ae3533a2588e33628cb8b5927da985302363bda1 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Thu, 1 Jun 2023 15:19:39 +0100 Subject: [PATCH 07/49] Correction to scotch functions --- src/partition.c | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/partition.c b/src/partition.c index 54fd19fc93..4c7d606dcb 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1392,7 +1392,6 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, /* Only one node needs to calculate this. */ if (nodeID == 0) { - /* Allocate adjacency and weights arrays . */ SCOTCH_Num *xadj; if ((xadj = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (ncells + 1))) == NULL) @@ -1485,7 +1484,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, edgetab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); edlotab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); - printf("Done the set up \n"); + int i; for (i = 0; i <= vertnbr; i++) { verttab[i] = i*26; @@ -1497,42 +1496,47 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, edlotab[i] = weights_e[i]; } - printf("Initialise graph \n"); SCOTCH_graphInit(&graph); if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, vendtab, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { error("Error: Cannot build Scotch Graph.\n"); } - printf("Scotch Graph built successfully.\n"); + // /* Dump graph in Scotch format */ + FILE *file = fopen("test_scotch.grf", "w"); + if (file == NULL) { + printf("Error: Cannot open output file.\n"); + } - // /* Dump graph in METIS format */ - // + if (SCOTCH_graphSave(&graph, file) != 0) { + printf("Error: Cannot save Scotch Graph.\n"); + } /* Read in architecture graph. */ SCOTCH_Arch archdat; SCOTCH_Strat stradat; /* Load the architecture graph in .tgt format */ - FILE* arch_file = fopen("cosma_node_numad_deco.tgt", "r"); - if (SCOTCH_archLoad(&archdat, arch_file) != 1) + FILE* arch_file = fopen("test.tgt", "r"); + if (arch_file == NULL) { + printf("Error: Cannot open topo file.\n"); + } + if (SCOTCH_archLoad(&archdat, arch_file) != 0) error("Error loading architecture graph"); SCOTCH_stratInit(&stradat); - /* Set the mapping strategy options */ - const char* strat = "x"; - if (SCOTCH_stratGraphMap(&stradat, strat) != 1) - error("Error Scotch strategy initialisation failed."); - /* Map the computation graph to the architecture graph */ - if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 1) + if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) error("Error Scotch mapping failed."); + printf("Scotch mapping done.\n"); + printf("number of regions %i", nregions); /* Check that the regionids are ok. */ for (int k = 0; k < ncells; k++) { - if (regionid[k] < 0 || regionid[k] >= nregions) - error("Got bad nodeID for cell"); - + if (regionid[k] < 0 || regionid[k] >= nregions){ + //error("Got bad nodeID for cell"); + printf("Bad Vertex %d is assigned to architecture block %d\n", k, regionid[k]); + } /* And keep. */ celllist[k] = regionid[k]; } @@ -2288,7 +2292,9 @@ void partition_initial_partition(struct partition *initial_partition, if ((celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) error("Failed to allocate celllist"); #ifdef HAVE_SCOTCH + message("Trying our best with Scotch"); pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, celllist); + message("Finished running pick scotch"); #elif HAVE_PARMETIS if (initial_partition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); @@ -2299,7 +2305,7 @@ void partition_initial_partition(struct partition *initial_partition, #else pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); #endif - + message("splitting cells now"); /* And apply to our cells */ split_metis(s, nr_nodes, celllist); @@ -2369,11 +2375,15 @@ void partition_init(struct partition *partition, #if defined(HAVE_METIS) || defined(HAVE_PARMETIS) const char *default_repart = "fullcosts"; const char *default_part = "edgememory"; +#elif defined(HAVE_SCOTCH) + const char *default_repart = "scotch"; + const char *default_part = "edgememory"; #else const char *default_repart = "none"; const char *default_part = "grid"; #endif + /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */ factor(nr_nodes, &partition->grid[0], &partition->grid[1]); factor(nr_nodes / partition->grid[1], &partition->grid[0], @@ -2392,7 +2402,7 @@ void partition_init(struct partition *partition, case 'v': partition->type = INITPART_VECTORIZE; break; -#if defined(HAVE_METIS) || defined(HAVE_PARMETIS) +#if defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH) case 'r': partition->type = INITPART_METIS_NOWEIGHT; break; @@ -2457,6 +2467,7 @@ void partition_init(struct partition *partition, "Permitted values are: 'none' when compiled without " "METIS or ParMETIS."); #endif + message("Choice of re-partition type '%s'.", part_type); } /* Get the fraction CPU time difference between nodes (<1) or the number From 89eb8343e26303d3f2090041102f9bb6c4172ca9 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 5 Jun 2023 10:39:36 +0100 Subject: [PATCH 08/49] Adding test tgt file --- examples/EAGLE_low_z/EAGLE_6/8.tgt | 1 + 1 file changed, 1 insertion(+) create mode 100644 examples/EAGLE_low_z/EAGLE_6/8.tgt diff --git a/examples/EAGLE_low_z/EAGLE_6/8.tgt b/examples/EAGLE_low_z/EAGLE_6/8.tgt new file mode 100644 index 0000000000..803b501d6c --- /dev/null +++ b/examples/EAGLE_low_z/EAGLE_6/8.tgt @@ -0,0 +1 @@ +tleaf 3 2 100 2 50 2 10 From dbd3f951d7f046487c987ccf9d8b8ccc9069a4f0 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 5 Jun 2023 13:37:28 +0100 Subject: [PATCH 09/49] Addition of the .tgt which represents a Myriad node --- examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt | 1 + examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt | 1 + 2 files changed, 2 insertions(+) create mode 100644 examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt create mode 100644 examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt diff --git a/examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt b/examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt new file mode 100644 index 0000000000..ed521f92ce --- /dev/null +++ b/examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt @@ -0,0 +1 @@ +tleaf 1 2 100 \ No newline at end of file diff --git a/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt b/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt new file mode 100644 index 0000000000..803b501d6c --- /dev/null +++ b/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt @@ -0,0 +1 @@ +tleaf 3 2 100 2 50 2 10 From 1ee20ec4c5c6fd72ac8f0435ddb31c08dd00aa55 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 7 Jun 2023 17:58:23 +0100 Subject: [PATCH 10/49] fix: comm matrix passed to scotch --- src/engine.c | 2 +- src/partition.c | 126 ++++++++++++++++++++++++++++++++++++++++-------- swift.c | 2 + swift_fof.c | 2 + 4 files changed, 112 insertions(+), 20 deletions(-) diff --git a/src/engine.c b/src/engine.c index 474f0a5b0e..6f25683e0e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -181,7 +181,7 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { */ void engine_repartition(struct engine *e) { -#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) +#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS) || defined(HAVE_SCOTCH)) ticks tic = getticks(); diff --git a/src/partition.c b/src/partition.c index 1e8915efd6..0a3914b4ab 100644 --- a/src/partition.c +++ b/src/partition.c @@ -576,8 +576,8 @@ static void split_metis(struct space *s, int nregions, int *celllist) { for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; /* To check or visualise the partition dump all the cells. */ - /*if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top, - s->nr_cells);*/ + if (engine_rank == 0) dumpCellRanks("partition", s->cells_top, + s->nr_cells); } #endif @@ -1357,6 +1357,88 @@ static void pick_metis(int nodeID, struct space *s, int nregions, #endif #if defined(WITH_MPI) && defined(HAVE_SCOTCH) +/** + * @brief Fill the adjncy array defining the graph of cells in a space. + * + * See the ParMETIS and METIS manuals if you want to understand this + * format. The cell graph consists of all nodes as vertices with edges as the + * connections to all neighbours, so we have 26 per vertex for periodic + * boundary, fewer than 26 on the space edges when non-periodic. Note you will + * also need an xadj array, for METIS that would be: + * + * xadj[0] = 0; + * for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26; + * + * but each rank needs a different xadj when using ParMETIS (each segment + * should be rezeroed). + * + * @param s the space of cells. + * @param periodic whether to assume a periodic space (fixed 26 edges). + * @param weights_e the edge weights for the cells, if used. On input + * assumed to be ordered with a fixed 26 edges per cell, so + * will need reordering for non-periodic spaces. + * @param adjncy the adjncy array to fill, must be of size 26 * the number of + * cells in the space. + * @param nadjcny number of adjncy elements used, can be less if not periodic. + * @param xadj the METIS xadj array to fill, must be of size + * number of cells in space + 1. NULL for not used. + * @param nxadj the number of xadj element used. + */ +static void graph_init_scotch(struct space *s, int periodic, SCOTCH_Num *weights_e, + SCOTCH_Num *adjncy, int *nadjcny, SCOTCH_Num *xadj, int *nxadj) { + + /* Loop over all cells in the space. */ + *nadjcny = 0; + int cid = 0; + for (int l = 0; l < s->cdim[0]; l++) { + for (int m = 0; m < s->cdim[1]; m++) { + for (int n = 0; n < s->cdim[2]; n++) { + + /* Visit all neighbours of this cell, wrapping space at edges. */ + int p = 0; + for (int i = -1; i <= 1; i++) { + int ii = l + i; + if (ii < 0) + ii += s->cdim[0]; + else if (ii >= s->cdim[0]) + ii -= s->cdim[0]; + for (int j = -1; j <= 1; j++) { + int jj = m + j; + if (jj < 0) + jj += s->cdim[1]; + else if (jj >= s->cdim[1]) + jj -= s->cdim[1]; + for (int k = -1; k <= 1; k++) { + int kk = n + k; + if (kk < 0) + kk += s->cdim[2]; + else if (kk >= s->cdim[2]) + kk -= s->cdim[2]; + + /* If not self, record id of neighbour. */ + if (i || j || k) { + adjncy[cid * 26 + p] = cell_getid(s->cdim, ii, jj, kk); + p++; + } + } + } + } + + /* Next cell. */ + cid++; + } + } + *nadjcny = cid * 26; + + /* If given set SCOTCH xadj. */ + if (xadj != NULL) { + xadj[0] = 0; + for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26; + *nxadj = s->nr_cells; + } + } +} + /** * @brief Partition the given space into a number of connected regions and * map to available architecture. @@ -1467,6 +1549,11 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, #endif } + /* Define the cell graph. Keeping the edge weights association. */ + int nadjcny = 0; + int nxadj = 0; + graph_init_scotch(s, s->periodic, weights_e, adjncy, &nadjcny, xadj, &nxadj); + /* Define the cell graph. Keeping the edge weights association. */ // Setting up the Scotch graph SCOTCH_Graph graph; @@ -1503,7 +1590,11 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, } // /* Dump graph in Scotch format */ - FILE *file = fopen("test_scotch.grf", "w"); + static int partition_count = 0; + char fname[200]; + sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); + FILE *file = fopen(fname, "w"); + // FILE *file = fopen("scotch_input_com_graph_%i.grf", count++, "w"); if (file == NULL) { printf("Error: Cannot open output file.\n"); } @@ -1516,7 +1607,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Arch archdat; SCOTCH_Strat stradat; /* Load the architecture graph in .tgt format */ - FILE* arch_file = fopen("test.tgt", "r"); + FILE* arch_file = fopen("./topologies/2.tgt", "r"); if (arch_file == NULL) { printf("Error: Cannot open topo file.\n"); } @@ -1529,8 +1620,6 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) error("Error Scotch mapping failed."); - printf("Scotch mapping done.\n"); - printf("number of regions %i", nregions); /* Check that the regionids are ok. */ for (int k = 0; k < ncells; k++) { if (regionid[k] < 0 || regionid[k] >= nregions){ @@ -2157,8 +2246,19 @@ void partition_repartition(struct repartition *reparttype, int nodeID, int nr_nodes, struct space *s, struct task *tasks, int nr_tasks) { -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && defined(HAVE_SCOTCH) + ticks tic = getticks(); + + if (reparttype->type == REPART_SCOTCH) { + repart_scotch(reparttype, nodeID, nr_nodes, s); + } else { + error("Impossible repartition type"); + } + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +#elif defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) ticks tic = getticks(); if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) { @@ -2186,18 +2286,6 @@ void partition_repartition(struct repartition *reparttype, int nodeID, message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); -#elif defined(WITH_MPI) && defined(HAVE_SCOTCH) - ticks tic = getticks(); - - if (reparttype->type == REPART_SCOTCH) { - repart_scotch(reparttype, nodeID, nr_nodes, s); - } else { - error("Impossible repartition type"); - } - - if (s->e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); #else error("SWIFT was not compiled with METIS, ParMETIS or Scotch support."); #endif diff --git a/swift.c b/swift.c index 4201c7117a..fe60d457f6 100644 --- a/swift.c +++ b/swift.c @@ -878,6 +878,8 @@ int main(int argc, char *argv[]) { message("Using ParMETIS partitioning:"); #elif defined(HAVE_METIS) message("Using METIS serial partitioning:"); +#elif defined(HAVE_SCOTCH) + message("Using SCOTCH serial partitioning:"); #else message("Non-METIS partitioning:"); #endif diff --git a/swift_fof.c b/swift_fof.c index 996015bae3..ec339b3a3d 100644 --- a/swift_fof.c +++ b/swift_fof.c @@ -387,6 +387,8 @@ int main(int argc, char *argv[]) { message("Using ParMETIS partitioning:"); #elif defined(HAVE_METIS) message("Using METIS serial partitioning:"); +#elif defined(HAVE_SCOTCH) + message("Using SCOTCH serial partitioning:"); #else message("Non-METIS partitioning:"); #endif From 7654c835090dc7362f6f9dd484a404ec6bb42dac Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 26 Jun 2023 12:35:42 +0100 Subject: [PATCH 11/49] fix: Correction to partition_repatition --- src/partition.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/partition.c b/src/partition.c index 0a3914b4ab..434a6e071f 100644 --- a/src/partition.c +++ b/src/partition.c @@ -2246,19 +2246,7 @@ void partition_repartition(struct repartition *reparttype, int nodeID, int nr_nodes, struct space *s, struct task *tasks, int nr_tasks) { -#if defined(WITH_MPI) && defined(HAVE_SCOTCH) - ticks tic = getticks(); - - if (reparttype->type == REPART_SCOTCH) { - repart_scotch(reparttype, nodeID, nr_nodes, s); - } else { - error("Impossible repartition type"); - } - - if (s->e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -#elif defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) ticks tic = getticks(); if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) { @@ -2278,6 +2266,23 @@ void partition_repartition(struct repartition *reparttype, int nodeID, } else if (reparttype->type == REPART_NONE) { /* Doing nothing. */ + + } else { + error("Impossible repartition type"); + } + + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +#elif defined(WITH_MPI) && defined(HAVE_SCOTCH) + ticks tic = getticks(); + + if (reparttype->type == REPART_SCOTCH) { + repart_scotch(reparttype, nodeID, nr_nodes, s); + + } else if (reparttype->type == REPART_NONE) { + /* Doing nothing. */ + } else { error("Impossible repartition type"); } From c67ff380f2939ef7b521cada3574ecd397e827dd Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 5 Jul 2023 12:49:36 +0100 Subject: [PATCH 12/49] fix: filling up the edge and vertex weights --- src/partition.c | 229 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 174 insertions(+), 55 deletions(-) diff --git a/src/partition.c b/src/partition.c index 434a6e071f..e9afd8847c 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1471,7 +1471,6 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, for (int i = 0; i < ncells; i++) celllist[i] = 0; return; } - /* Only one node needs to calculate this. */ if (nodeID == 0) { /* Allocate adjacency and weights arrays . */ @@ -1548,12 +1547,10 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if (failed > 0) error("%d edge weights are out of range", failed); #endif } - /* Define the cell graph. Keeping the edge weights association. */ int nadjcny = 0; int nxadj = 0; graph_init_scotch(s, s->periodic, weights_e, adjncy, &nadjcny, xadj, &nxadj); - /* Define the cell graph. Keeping the edge weights association. */ // Setting up the Scotch graph SCOTCH_Graph graph; @@ -1565,20 +1562,18 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ SCOTCH_Num *edlotab; - + verttab = (SCOTCH_Num*) malloc((vertnbr+1) * sizeof(SCOTCH_Num)); velotab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); edgetab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); edlotab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); - - - int i; - for (i = 0; i <= vertnbr; i++) { + + for (int i = 0; i <= vertnbr; i++) { verttab[i] = i*26; velotab[i] = weights_v[i]; } - for (i = 0; i < edgenbr; i++) { + for (int i = 0; i < edgenbr; i++) { edgetab[i] = adjncy[i]; edlotab[i] = weights_e[i]; } @@ -1589,25 +1584,11 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, error("Error: Cannot build Scotch Graph.\n"); } - // /* Dump graph in Scotch format */ - static int partition_count = 0; - char fname[200]; - sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); - FILE *file = fopen(fname, "w"); - // FILE *file = fopen("scotch_input_com_graph_%i.grf", count++, "w"); - if (file == NULL) { - printf("Error: Cannot open output file.\n"); - } - - if (SCOTCH_graphSave(&graph, file) != 0) { - printf("Error: Cannot save Scotch Graph.\n"); - } - /* Read in architecture graph. */ SCOTCH_Arch archdat; SCOTCH_Strat stradat; /* Load the architecture graph in .tgt format */ - FILE* arch_file = fopen("./topologies/2.tgt", "r"); + FILE* arch_file = fopen("8.tgt", "r"); if (arch_file == NULL) { printf("Error: Cannot open topo file.\n"); } @@ -1615,11 +1596,10 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, error("Error loading architecture graph"); SCOTCH_stratInit(&stradat); - + printf("Scotch arch file init \n"); /* Map the computation graph to the architecture graph */ if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) error("Error Scotch mapping failed."); - /* Check that the regionids are ok. */ for (int k = 0; k < ncells; k++) { if (regionid[k] < 0 || regionid[k] >= nregions){ @@ -1629,7 +1609,15 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, /* And keep. */ celllist[k] = regionid[k]; } - + SCOTCH_graphExit(&graph); + SCOTCH_stratExit(&stradat); + SCOTCH_archExit(&archdat); + fclose(arch_file); + + if (verttab != NULL) free(verttab); + if (velotab != NULL) free(velotab); + if (edgetab != NULL) free(edgetab); + if (edlotab != NULL) free(edlotab); /* Clean up. */ if (weights_v != NULL) free(weights_v); if (weights_e != NULL) free(weights_e); @@ -1644,13 +1632,13 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* Helper struct for partition_gather weights. */ struct weights_mapper_data { double *weights_e; double *weights_v; - idx_t *inds; + int *inds; int eweights; int nodeID; int timebins; @@ -1682,7 +1670,7 @@ void partition_gather_weights(void *map_data, int num_elements, double *weights_e = mydata->weights_e; double *weights_v = mydata->weights_v; - idx_t *inds = mydata->inds; + int *inds = mydata->inds; int eweights = mydata->eweights; int nodeID = mydata->nodeID; int nr_cells = mydata->nr_cells; @@ -1827,6 +1815,9 @@ void partition_gather_weights(void *map_data, int num_elements, } } +#endif + +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /** * @brief Repartition the cells amongst the nodes using weights of * various kinds. @@ -2153,16 +2144,83 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID, * @param nr_nodes the number of nodes. * @param s the space of cells holding our local particles. */ -static void repart_scotch(struct repartition *repartition, int nodeID, - int nr_nodes, struct space *s) { +static void repart_scotch(int vweights, int eweights, int timebins, + struct repartition *repartition, int nodeID, + int nr_nodes, struct space *s, struct task *tasks, + int nr_tasks) { - /* Space for counts of particle memory use per cell. */ - double *weights = NULL; - if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) - error("Failed to allocate cell weights buffer."); - /* Check each particle and accumulate the sizes per cell. */ - accumulate_sizes(s, s->e->verbose, weights); + /* Create weight arrays using task ticks for vertices and edges (edges + * * assume the same graph structure as used in the part_ calls). */ + int nr_cells = s->nr_cells; + struct cell *cells = s->cells_top; + + /* Allocate and fill the adjncy indexing array defining the graph of + * * cells. */ + SCOTCH_Num *inds; + if ((inds = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * 26 * nr_cells)) == NULL) + error("Failed to allocate the inds array"); + int nadjcny = 0; + int nxadj = 0; + + graph_init_scotch(s, 1 /* periodic */, NULL /* no edge weights */, inds, &nadjcny, + NULL /* no xadj needed */, &nxadj); + + /* Allocate and init weights. */ + double *weights_v = NULL; + double *weights_e = NULL; + if (vweights) { + if ((weights_v = (double *)malloc(sizeof(double) * nr_cells)) == NULL) + error("Failed to allocate vertex weights arrays."); + bzero(weights_v, sizeof(double) * nr_cells); + } + if (eweights) { + if ((weights_e = (double *)malloc(sizeof(double) * 26 * nr_cells)) == NULL) + error("Failed to allocate edge weights arrays."); + bzero(weights_e, sizeof(double) * 26 * nr_cells); + } + + /* Gather weights. */ + struct weights_mapper_data weights_data; + + weights_data.cells = cells; + weights_data.eweights = eweights; + weights_data.inds = inds; + weights_data.nodeID = nodeID; + weights_data.nr_cells = nr_cells; + weights_data.timebins = timebins; + weights_data.vweights = vweights; + weights_data.weights_e = weights_e; + weights_data.weights_v = weights_v; + weights_data.use_ticks = repartition->use_ticks; + + ticks tic = getticks(); + + threadpool_map(&s->e->threadpool, partition_gather_weights, tasks, nr_tasks, + sizeof(struct task), threadpool_auto_chunk_size, + &weights_data); + if (s->e->verbose) + message("weight mapper took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + +#ifdef SWIFT_DEBUG_CHECKS + check_weights(tasks, nr_tasks, &weights_data, weights_v, weights_e); +#endif + + /* Merge the weights arrays across all nodes. */ + int res; + if (vweights) { + res = MPI_Allreduce(MPI_IN_PLACE, weights_v, nr_cells, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD); + if (res != MPI_SUCCESS) + mpi_error(res, "Failed to allreduce vertex weights."); + } + + if (eweights) { + res = MPI_Allreduce(MPI_IN_PLACE, weights_e, 26 * nr_cells, MPI_DOUBLE, + MPI_SUM, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to allreduce edge weights."); + } /* Allocate cell list for the partition. If not already done. */ #ifdef HAVE_SCOTCH @@ -2180,33 +2238,88 @@ static void repart_scotch(struct repartition *repartition, int nodeID, repartition->ncelllist = s->nr_cells; } - /* We need to rescale the sum of the weights so that the sum is - * less than IDX_MAX, that is the range of idx_t. */ - double sum = 0.0; - for (int k = 0; k < s->nr_cells; k++) sum += weights[k]; - if (sum > (double)IDX_MAX) { - double scale = (double)(IDX_MAX - 1000) / sum; - for (int k = 0; k < s->nr_cells; k++) weights[k] *= scale; + /* We need to rescale the sum of the weights so that the sums of the two + * * types of weights are less than IDX_MAX, that is the range of idx_t. */ + double vsum = 0.0; + if (vweights) + for (int k = 0; k < nr_cells; k++) vsum += weights_v[k]; + double esum = 0.0; + if (eweights) + for (int k = 0; k < 26 * nr_cells; k++) esum += weights_e[k]; + + /* Do the scaling, if needed, keeping both weights in proportion. */ + double vscale = 1.0; + double escale = 1.0; + if (vweights && eweights) { + if (vsum > esum) { + if (vsum > (double)IDX_MAX) { + vscale = (double)(IDX_MAX - 10000) / vsum; + escale = vscale; + } + } else { + if (esum > (double)IDX_MAX) { + escale = (double)(IDX_MAX - 10000) / esum; + vscale = escale; + } + } + } else if (vweights) { + if (vsum > (double)IDX_MAX) { + vscale = (double)(IDX_MAX - 10000) / vsum; + } + } else if (eweights) { + if (esum > (double)IDX_MAX) { + escale = (double)(IDX_MAX - 10000) / esum; + } } - /* And repartition. */ + if (vweights && vscale != 1.0) { + vsum = 0.0; + for (int k = 0; k < nr_cells; k++) { + weights_v[k] *= vscale; + vsum += weights_v[k]; + } + vscale = 1.0; + } + if (eweights && escale != 1.0) { + esum = 0.0; + for (int k = 0; k < 26 * nr_cells; k++) { + weights_e[k] *= escale; + esum += weights_e[k]; + } + escale = 1.0; + } + + /* Balance edges and vertices when the edge weights are timebins, as these + * * have no reason to have equivalent scales, we use an equipartition. */ + if (timebins && eweights) { + + /* Make sums the same. */ + if (vsum > esum) { + escale = vsum / esum; + for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= escale; + } else { + vscale = esum / vsum; + for (int k = 0; k < nr_cells; k++) weights_v[k] *= vscale; + } + } + + /* And repartition/ partition, using both weights or not as requested. */ #ifdef HAVE_SCOTCH - pick_scotch(nodeID, s, nr_nodes, weights, NULL, repartition->celllist); + pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); #endif - /* Check that all cells have good values. All nodes have same copy, so just - * check on one. */ + * * check on one. */ if (nodeID == 0) { - for (int k = 0; k < s->nr_cells; k++) + for (int k = 0; k < nr_cells; k++) if (repartition->celllist[k] < 0 || repartition->celllist[k] >= nr_nodes) error("Got bad nodeID %d for cell %i.", repartition->celllist[k], k); } - /* Check that the partition is complete and all nodes have some cells. */ + /* Check that the partition is complete and all nodes have some work. */ int present[nr_nodes]; int failed = 0; for (int i = 0; i < nr_nodes; i++) present[i] = 0; - for (int i = 0; i < s->nr_cells; i++) present[repartition->celllist[i]]++; + for (int i = 0; i < nr_cells; i++) present[repartition->celllist[i]]++; for (int i = 0; i < nr_nodes; i++) { if (!present[i]) { failed = 1; @@ -2220,12 +2333,17 @@ static void repart_scotch(struct repartition *repartition, int nodeID, message( "WARNING: repartition has failed, continuing with the current" " partition, load balance will not be optimal"); - for (int k = 0; k < s->nr_cells; k++) - repartition->celllist[k] = s->cells_top[k].nodeID; + for (int k = 0; k < nr_cells; k++) + repartition->celllist[k] = cells[k].nodeID; } /* And apply to our cells */ split_metis(s, nr_nodes, repartition->celllist); + + /* Clean up. */ + free(inds); + if (vweights) free(weights_v); + if (eweights) free(weights_e); } #endif /* WITH_MPI && HAVE_SCOTCH */ @@ -2278,7 +2396,8 @@ void partition_repartition(struct repartition *reparttype, int nodeID, ticks tic = getticks(); if (reparttype->type == REPART_SCOTCH) { - repart_scotch(reparttype, nodeID, nr_nodes, s); + repart_scotch(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks, + nr_tasks); } else if (reparttype->type == REPART_NONE) { /* Doing nothing. */ From 6921dedde3ccb029025ea94f3b8d2496c6740d14 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 5 Jul 2023 12:53:54 +0100 Subject: [PATCH 13/49] updating the target arch file for one cosma node (8 NUMA regions) --- examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt b/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt index 803b501d6c..ce824561b3 100644 --- a/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt +++ b/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt @@ -1 +1 @@ -tleaf 3 2 100 2 50 2 10 +tleaf 2 2 100 4 10 From 7e5898d074b27c87af2c1849d283d0be6863250f Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 26 Jul 2023 12:58:50 +0100 Subject: [PATCH 14/49] fix: Updating to read in target file --- examples/EAGLE_low_z/EAGLE_6/8.tgt | 1 - src/partition.c | 7 ++----- 2 files changed, 2 insertions(+), 6 deletions(-) delete mode 100644 examples/EAGLE_low_z/EAGLE_6/8.tgt diff --git a/examples/EAGLE_low_z/EAGLE_6/8.tgt b/examples/EAGLE_low_z/EAGLE_6/8.tgt deleted file mode 100644 index 803b501d6c..0000000000 --- a/examples/EAGLE_low_z/EAGLE_6/8.tgt +++ /dev/null @@ -1 +0,0 @@ -tleaf 3 2 100 2 50 2 10 diff --git a/src/partition.c b/src/partition.c index e9afd8847c..a37d48fe69 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1583,12 +1583,11 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, vendtab, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { error("Error: Cannot build Scotch Graph.\n"); } - /* Read in architecture graph. */ SCOTCH_Arch archdat; SCOTCH_Strat stradat; /* Load the architecture graph in .tgt format */ - FILE* arch_file = fopen("8.tgt", "r"); + FILE* arch_file = fopen("target.tgt", "r"); if (arch_file == NULL) { printf("Error: Cannot open topo file.\n"); } @@ -1596,7 +1595,6 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, error("Error loading architecture graph"); SCOTCH_stratInit(&stradat); - printf("Scotch arch file init \n"); /* Map the computation graph to the architecture graph */ if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) error("Error Scotch mapping failed."); @@ -1614,6 +1612,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_archExit(&archdat); fclose(arch_file); + if (verttab != NULL) free(verttab); if (velotab != NULL) free(velotab); if (edgetab != NULL) free(edgetab); @@ -2504,9 +2503,7 @@ void partition_initial_partition(struct partition *initial_partition, if ((celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) error("Failed to allocate celllist"); #ifdef HAVE_SCOTCH - message("Trying our best with Scotch"); pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, celllist); - message("Finished running pick scotch"); #elif HAVE_PARMETIS if (initial_partition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); From 0bdf8f3551ca2fd38fdba9de80f35a2c1ba493fa Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 26 Jul 2023 13:01:44 +0100 Subject: [PATCH 15/49] Adding a template multi cosma node job script --- multi_cosma_node.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 multi_cosma_node.sh diff --git a/multi_cosma_node.sh b/multi_cosma_node.sh new file mode 100644 index 0000000000..23a5103202 --- /dev/null +++ b/multi_cosma_node.sh @@ -0,0 +1,24 @@ +#!/bin/bash -l +# Example with $ntasks MPI tasks and 16 cpus per task +# Targeting $ntasks NUMA regions +# Project/Account (use your own) +#SBATCH -A dp004 + +#SBATCH -p cosma8 + +# Number of MPI tasks +#SBATCH --ntasks=32 + +#SBATCH --cpus-per-task=16 + +# Runtime of this jobs is less then 12 hours. +#SBATCH --time=00:40:00 + +# Clear the environment from any previously loaded modules +module purge > /dev/null 2>&1 + +module load cosma/2018 python/3.6.5 intel_comp/2022.1.2 compiler openmpi/4.1.1 fftw/3.3.9 parallel_hdf5/1.12.0 parmetis/4.0.3-64bit metis/5.1.0-64bit gsl/2.5 + +# And finally run the job +mpirun --map-by numa /cosma8/data/dp004/dc-gile1/swiftsim-scotch/swift_mpi --threads=16 --cosmology --hydro --self-gravity --stars eagle_6.yml | tee output.log +# End of submit file \ No newline at end of file From cc90448697ca572829c0b2d6038fcbe1c86cdda3 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 26 Jul 2023 13:59:27 +0100 Subject: [PATCH 16/49] Adding template target files --- target_files/1_cosma_node.tgt | 1 + target_files/2_cosma_nodes.tgt | 1 + target_files/4_cosma_nodes.tgt | 1 + 3 files changed, 3 insertions(+) create mode 100644 target_files/1_cosma_node.tgt create mode 100644 target_files/2_cosma_nodes.tgt create mode 100644 target_files/4_cosma_nodes.tgt diff --git a/target_files/1_cosma_node.tgt b/target_files/1_cosma_node.tgt new file mode 100644 index 0000000000..d82fa65cd0 --- /dev/null +++ b/target_files/1_cosma_node.tgt @@ -0,0 +1 @@ +tleaf 2 2 100 4 10 \ No newline at end of file diff --git a/target_files/2_cosma_nodes.tgt b/target_files/2_cosma_nodes.tgt new file mode 100644 index 0000000000..08bc0a3ccc --- /dev/null +++ b/target_files/2_cosma_nodes.tgt @@ -0,0 +1 @@ +tleaf 3 2 1000 2 100 4 10 \ No newline at end of file diff --git a/target_files/4_cosma_nodes.tgt b/target_files/4_cosma_nodes.tgt new file mode 100644 index 0000000000..2fb58c5047 --- /dev/null +++ b/target_files/4_cosma_nodes.tgt @@ -0,0 +1 @@ +tleaf 3 4 1000 2 100 4 10 \ No newline at end of file From 0144f84c6425a3b08eccf9b35c71692b1fbc6b0f Mon Sep 17 00:00:00 2001 From: Themis Kotsialos Date: Fri, 4 Aug 2023 13:09:37 +0100 Subject: [PATCH 17/49] Added scripts to create target file. --- create-target-file/create-target-file.py | 16 ++++++++ create-target-file/create-target-file.sh | 31 ++++++++++++++ .../sample-slurm-cosma-jscript.sh | 40 +++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 create-target-file/create-target-file.py create mode 100755 create-target-file/create-target-file.sh create mode 100755 create-target-file/sample-slurm-cosma-jscript.sh diff --git a/create-target-file/create-target-file.py b/create-target-file/create-target-file.py new file mode 100644 index 0000000000..5f72d1c197 --- /dev/null +++ b/create-target-file/create-target-file.py @@ -0,0 +1,16 @@ +import os +cwd = os.getcwd() + +print("Current working directory:", cwd) + + +nodes_to_use = -1 +mpi_tasks = -1 +cpus_per_task = -1 +tasks_per_core = -1 +tasks_per_node = -1 +cluster_architecture = "unknown" + + + + diff --git a/create-target-file/create-target-file.sh b/create-target-file/create-target-file.sh new file mode 100755 index 0000000000..aee9812fe8 --- /dev/null +++ b/create-target-file/create-target-file.sh @@ -0,0 +1,31 @@ +#!/bin/bash -l + +# Assumes we run SLURM; provision for SGE will be added afterwards + +# For use in several known cluster environments: [Durham cosma8] for now, +# perhaps [UCL's Myriad, Kathleen, Young] later on, +# [ARCHER2], etc. + +# Minimal error checking throughout + +# Workflow: +# - this script is called from within a submitted jobscript. +# - tries to determine architecture [cosma8 | Kathleen etc] +# - tries to determine jobscript directives relevant to how many +# nodes, mpi ranks/tasks etc are requested, and some other runtime parameters +# - in order to produce a [target.tgt] file, to be passed on to the SCOTCH module. + + +# Process the jobscript from which we are called, strip comments and gather +# switches / parameters relevant to SCOTCH, remove '=' from those lines + +sed -e '/^\s\{0,\}#\ /d' $1 | \ + sed -n -r -e '/^#SBATCH/p;/\-\-with\-arch/{s/^.*\-\-with\-arch\=/\-\-with\-arch\=/;s/[ ].*$//;p}' | \ + sed 's/\=/\ /' | \ + sed 's/^#SBATCH\ //' > target-specs.txt + +# Parse the lines in [target-specs.txt], determine SCOTCH-relevant parameters, +# consult cluster architecture, and if successful, produce the specs file [target.tgt]; +# if not, return some error, so we may return that error to the caller script. + + diff --git a/create-target-file/sample-slurm-cosma-jscript.sh b/create-target-file/sample-slurm-cosma-jscript.sh new file mode 100755 index 0000000000..4e10bc3931 --- /dev/null +++ b/create-target-file/sample-slurm-cosma-jscript.sh @@ -0,0 +1,40 @@ +#!/bin/bash -l + +#SBATCH -n 2 +# #SBATCH --nodes=4 +#SBATCH --ntasks=32 +# #SBATCH --ntasks=64 +#SBATCH --cpus-per-task=64 +# ---- #SBATCH --cpus-per-task=32 +export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK +#SBATCH --mem-per-cpu=4096 +#SBATCH --ntasks-per-core=1 + +# NOTE: In nodes with hyper-threading enabled, a task not requesting full cores +# may be distributed across sockets. +# This can be avoided by specifying --ntasks-per-core=1, +# which forces tasks to allocate full cores. + +#SBATCH --tasks-per-node 64 +# #SBATCH --sockets-per-node= +# #SBATCH --cores-per-socket= +# #SBATCH --threads-per-core= +# #SBATCH --ntasks-per-core=1 + + + + +./create-target-file.sh $0 + +# We can specify options even below the call of create-target-file.sh +#SBATCH --tasks-per-node 64 + +# mpirun -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID + + +swift-mpi -np $SLURM_NTASKS --with-arch=cosma8 your_program your_inputs $SLURM_ARRAY_TASK_ID +# swift-mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID +# swift-mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID + + # swift-mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID + From dd440d06121e801bd30fc47ac898b17ea374e8b6 Mon Sep 17 00:00:00 2001 From: Themis Kotsialos Date: Mon, 7 Aug 2023 10:02:09 +0100 Subject: [PATCH 18/49] Incremental edits. --- create-target-file/create-target-file.py | 8 ++++++-- create-target-file/create-target-file.sh | 9 +++++++++ create-target-file/sample-slurm-cosma-jscript.sh | 7 ++++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/create-target-file/create-target-file.py b/create-target-file/create-target-file.py index 5f72d1c197..56897cfcfb 100644 --- a/create-target-file/create-target-file.py +++ b/create-target-file/create-target-file.py @@ -10,7 +10,11 @@ tasks_per_core = -1 tasks_per_node = -1 cluster_architecture = "unknown" +processor_count = -1 # for -np= - - +# Sanity checks: +# - if a [cwd] directive is used, create target.tgt in there; +# otherwise, create it in $PWD, i..e in current directory +# - if no cluster arch is specified, default to [cosma8] +# - diff --git a/create-target-file/create-target-file.sh b/create-target-file/create-target-file.sh index aee9812fe8..37b5314f31 100755 --- a/create-target-file/create-target-file.sh +++ b/create-target-file/create-target-file.sh @@ -24,6 +24,15 @@ sed -e '/^\s\{0,\}#\ /d' $1 | \ sed 's/\=/\ /' | \ sed 's/^#SBATCH\ //' > target-specs.txt +BINARY_EXECUTABLE="swift_mpi" +# Make sure we have the main invocation line for swift_mpi, so we can +# parse switches not supplied by #SBATCH; we are interested in things like +# -np=, --with-arch= +sed -e '/^\s\{0,\}#\ /d' $1 | \ + sed -e 's/#.*$//g' | \ + sed -n "/${BINARY_EXECUTABLE}/p" | \ + sed 's/\=/\ /g' >> target-specs.txt + # Parse the lines in [target-specs.txt], determine SCOTCH-relevant parameters, # consult cluster architecture, and if successful, produce the specs file [target.tgt]; # if not, return some error, so we may return that error to the caller script. diff --git a/create-target-file/sample-slurm-cosma-jscript.sh b/create-target-file/sample-slurm-cosma-jscript.sh index 4e10bc3931..a1c357f626 100755 --- a/create-target-file/sample-slurm-cosma-jscript.sh +++ b/create-target-file/sample-slurm-cosma-jscript.sh @@ -32,9 +32,10 @@ export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK # mpirun -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID -swift-mpi -np $SLURM_NTASKS --with-arch=cosma8 your_program your_inputs $SLURM_ARRAY_TASK_ID -# swift-mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID -# swift-mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID +# swift_mpi -np $SLURM_NTASKS --with-arch=cosma8 your_program your_inputs $SLURM_ARRAY_TASK_ID +# swift_mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID +# swift_mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID # swift-mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID +mpirun /cosma/home/dp004/dc-kots1/swiftsim-ucl-dp004/swift_mpi --with-arch=cosma8 -np $SLURM_NTASKS --threads=16 --cosmology --hydro --self-gravity --stars eagle_6.yml | tee output.log From bb9e65da49e22e8b5b1313a7d315e0c6cbe9a9c0 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 9 Aug 2023 11:21:36 +0100 Subject: [PATCH 19/49] Defining the mapping strategy --- src/partition.c | 51 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/src/partition.c b/src/partition.c index a37d48fe69..eb63f68b78 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1555,37 +1555,50 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, // Setting up the Scotch graph SCOTCH_Graph graph; SCOTCH_Num baseval = 0; - SCOTCH_Num vertnbr = ncells; - SCOTCH_Num *verttab; /* Vertex array [vertnbr+1] */ - SCOTCH_Num *vendtab = NULL; /* Vertex array [vertnbr] */ - SCOTCH_Num *velotab; /* Vertex load array */ - SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ - SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ - SCOTCH_Num *edlotab; - + SCOTCH_Num vertnbr = ncells; /* Number of vertices */ + SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ + + SCOTCH_Num *verttab; /* Vertex array [vertnbr] */ verttab = (SCOTCH_Num*) malloc((vertnbr+1) * sizeof(SCOTCH_Num)); + + SCOTCH_Num *velotab; /* Vertex load array */ velotab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); - edgetab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); - edlotab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); - - for (int i = 0; i <= vertnbr; i++) { + + SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ + edgetab = (SCOTCH_Num*) malloc((edgenbr) * sizeof(SCOTCH_Num)); + + SCOTCH_Num *edlotab; /* Int load of each edge */ + edlotab = (SCOTCH_Num*) malloc((edgenbr) * sizeof(SCOTCH_Num)); + + for (int i = 0; i < vertnbr; i++) { verttab[i] = i*26; velotab[i] = weights_v[i]; } for (int i = 0; i < edgenbr; i++) { - edgetab[i] = adjncy[i]; + edgetab[i] = adjncy[i]-1; edlotab[i] = weights_e[i]; } SCOTCH_graphInit(&graph); - if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, vendtab, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { + if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, NULL, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { error("Error: Cannot build Scotch Graph.\n"); } + /* Will be wrapped in a DEBUG */ + static int partition_count = 0; + char fname[200]; + sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); + FILE *file = fopen(fname, "w"); + if (file == NULL) { + printf("Error: Cannot open output file.\n"); + } + + if (SCOTCH_graphSave(&graph, file) != 0) { + printf("Error: Cannot save Scotch Graph.\n"); + } /* Read in architecture graph. */ SCOTCH_Arch archdat; - SCOTCH_Strat stradat; /* Load the architecture graph in .tgt format */ FILE* arch_file = fopen("target.tgt", "r"); if (arch_file == NULL) { @@ -1594,7 +1607,15 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if (SCOTCH_archLoad(&archdat, arch_file) != 0) error("Error loading architecture graph"); + /* Initialise in strategy. */ + SCOTCH_Strat stradat; SCOTCH_stratInit(&stradat); + SCOTCH_Num num_vertices; + + num_vertices = SCOTCH_archSize(&archdat); + if (SCOTCH_stratGraphMapBuild(&stradat, SCOTCH_STRATQUALITY, num_vertices, 0.01) != 0) + error("Error setting the Scotch mapping strategy."); + /* Map the computation graph to the architecture graph */ if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) error("Error Scotch mapping failed."); From 151a7e9b831bb248b7d3aaeba4607c3375f887ac Mon Sep 17 00:00:00 2001 From: Euthymios Kotsialos Date: Fri, 11 Aug 2023 11:35:26 +0100 Subject: [PATCH 20/49] Added SCOTCH_LIBS, SCOTCH_INCS to src/Makefile.am for Scotch MPI compilation. --- src/Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index b0ad938f55..80b7f08c10 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -28,8 +28,8 @@ GIT_CMD = @GIT_CMD@ EXTRA_LIBS = $(GSL_LIBS) $(HDF5_LIBS) $(FFTW_LIBS) $(NUMA_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) $(TBBMALLOC_LIBS) $(GRACKLE_LIBS) $(SUNDIALS_LIBS) $(CHEALPIX_LIBS) # MPI libraries. -MPI_LIBS = $(PARMETIS_LIBS) $(METIS_LIBS) $(MPI_THREAD_LIBS) -MPI_FLAGS = -DWITH_MPI $(PARMETIS_INCS) $(METIS_INCS) +MPI_LIBS = $(SCOTCH_LIBS) $(PARMETIS_LIBS) $(METIS_LIBS) $(MPI_THREAD_LIBS) +MPI_FLAGS = -DWITH_MPI $(SCOTCH_INCS) $(PARMETIS_INCS) $(METIS_INCS) # Build the libswiftsim library and a convenience library just for the gravity tasks lib_LTLIBRARIES = libswiftsim.la From b9e23d7ae8973751dcb3042a47eeda97332ccdad Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 11 Aug 2023 14:37:44 +0100 Subject: [PATCH 21/49] updated assignment of edges --- src/partition.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/partition.c b/src/partition.c index eb63f68b78..bfba2b5e7d 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1574,9 +1574,10 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, verttab[i] = i*26; velotab[i] = weights_v[i]; } + verttab[vertnbr] = vertnbr*26; for (int i = 0; i < edgenbr; i++) { - edgetab[i] = adjncy[i]-1; + edgetab[i] = adjncy[i]; edlotab[i] = weights_e[i]; } @@ -1610,11 +1611,10 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, /* Initialise in strategy. */ SCOTCH_Strat stradat; SCOTCH_stratInit(&stradat); - SCOTCH_Num num_vertices; - - num_vertices = SCOTCH_archSize(&archdat); - if (SCOTCH_stratGraphMapBuild(&stradat, SCOTCH_STRATQUALITY, num_vertices, 0.01) != 0) - error("Error setting the Scotch mapping strategy."); + //SCOTCH_Num num_vertices; + //num_vertices = SCOTCH_archSize(&archdat); + //if (SCOTCH_stratGraphMapBuild(&stradat, SCOTCH_STRATQUALITY, num_vertices, 0.05) != 0) + // error("Error setting the Scotch mapping strategy."); /* Map the computation graph to the architecture graph */ if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) From 52297ce92c42b91fe4fbfbda524c4a4c78e7d105 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Tue, 15 Aug 2023 12:28:37 +0100 Subject: [PATCH 22/49] setting communication matrix to be symmetric --- src/partition.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/partition.c b/src/partition.c index bfba2b5e7d..35d8ccc9c5 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1511,7 +1511,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, failed++; } if (weights_v[k] < 0) { - message("Used vertex weight out of range: %" PRIDX, weights_v[k]); + message("Used vertex weight out of range: %d", weights_v[k]); failed++; } } @@ -1558,8 +1558,11 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Num vertnbr = ncells; /* Number of vertices */ SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ - SCOTCH_Num *verttab; /* Vertex array [vertnbr] */ - verttab = (SCOTCH_Num*) malloc((vertnbr+1) * sizeof(SCOTCH_Num)); + SCOTCH_Num *verttab; /* Vertex array [vertnbr + 1] */ + verttab = (SCOTCH_Num*) malloc((vertnbr + 1) * sizeof(SCOTCH_Num)); + + //SCOTCH_Num *vendtab; /* Vertex array [vertnbr] */ + //vendtab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); SCOTCH_Num *velotab; /* Vertex load array */ velotab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); @@ -1570,15 +1573,29 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Num *edlotab; /* Int load of each edge */ edlotab = (SCOTCH_Num*) malloc((edgenbr) * sizeof(SCOTCH_Num)); + int edges_deg = 26; for (int i = 0; i < vertnbr; i++) { - verttab[i] = i*26; + verttab[i] = i*edges_deg; velotab[i] = weights_v[i]; } - verttab[vertnbr] = vertnbr*26; + verttab[vertnbr] = vertnbr*edges_deg; + + int vertex_count = 0; + int neighbour; + int return_edge; for (int i = 0; i < edgenbr; i++) { - edgetab[i] = adjncy[i]; - edlotab[i] = weights_e[i]; + if ((i>(edges_deg-1)) && (i%edges_deg == 0) ){ + vertex_count++; + } + neighbour = adjncy[i]; + edgetab[i] = neighbour; + for (int j = 0; j < edges_deg; j++) { + if ((adjncy[(neighbour*edges_deg + j)]) == vertex_count){ + return_edge = (neighbour*edges_deg + j); + } + } + edlotab[i] = 0.5*(weights_e[i] + weights_e[return_edge]); } SCOTCH_graphInit(&graph); @@ -1586,7 +1603,8 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, NULL, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { error("Error: Cannot build Scotch Graph.\n"); } - /* Will be wrapped in a DEBUG */ +// #ifdef SWIFT_DEBUG_CHECKS + SCOTCH_graphCheck(&graph); static int partition_count = 0; char fname[200]; sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); @@ -1598,6 +1616,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if (SCOTCH_graphSave(&graph, file) != 0) { printf("Error: Cannot save Scotch Graph.\n"); } +// #endif /* Read in architecture graph. */ SCOTCH_Arch archdat; /* Load the architecture graph in .tgt format */ @@ -1611,10 +1630,11 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, /* Initialise in strategy. */ SCOTCH_Strat stradat; SCOTCH_stratInit(&stradat); - //SCOTCH_Num num_vertices; - //num_vertices = SCOTCH_archSize(&archdat); - //if (SCOTCH_stratGraphMapBuild(&stradat, SCOTCH_STRATQUALITY, num_vertices, 0.05) != 0) - // error("Error setting the Scotch mapping strategy."); + SCOTCH_Num num_vertices; + + num_vertices = SCOTCH_archSize(&archdat); + if (SCOTCH_stratGraphMapBuild(&stradat, SCOTCH_STRATQUALITY, num_vertices, 0.05) != 0) + error("Error setting the Scotch mapping strategy."); /* Map the computation graph to the architecture graph */ if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) @@ -2535,7 +2555,6 @@ void partition_initial_partition(struct partition *initial_partition, #else pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); #endif - message("splitting cells now"); /* And apply to our cells */ split_metis(s, nr_nodes, celllist); From 6c6f3ac46e90d277d74db24de4709141c1fe081c Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 18 Aug 2023 10:48:44 +0100 Subject: [PATCH 23/49] clean up, summed edge weight and balance strategy --- src/partition.c | 35 ++++++++++++++++++++++------------- src/partition.h | 6 +++--- src/space_regrid.c | 2 +- swift.c | 2 +- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/partition.c b/src/partition.c index 35d8ccc9c5..3ce8f2fa7c 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1595,7 +1595,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, return_edge = (neighbour*edges_deg + j); } } - edlotab[i] = 0.5*(weights_e[i] + weights_e[return_edge]); + edlotab[i] = weights_e[i] + weights_e[return_edge]; } SCOTCH_graphInit(&graph); @@ -1608,14 +1608,15 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, static int partition_count = 0; char fname[200]; sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); - FILE *file = fopen(fname, "w"); + FILE *graph_file = fopen(fname, "w"); if (file == NULL) { printf("Error: Cannot open output file.\n"); } - if (SCOTCH_graphSave(&graph, file) != 0) { + if (SCOTCH_graphSave(&graph, graph_file) != 0) { printf("Error: Cannot save Scotch Graph.\n"); } + fclose(graph_file); // #endif /* Read in architecture graph. */ SCOTCH_Arch archdat; @@ -1631,14 +1632,22 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Strat stradat; SCOTCH_stratInit(&stradat); SCOTCH_Num num_vertices; - + SCOTCH_Num flagval = SCOTCH_STRATBALANCE; + num_vertices = SCOTCH_archSize(&archdat); - if (SCOTCH_stratGraphMapBuild(&stradat, SCOTCH_STRATQUALITY, num_vertices, 0.05) != 0) + if (SCOTCH_stratGraphMapBuild(&stradat, flagval, num_vertices, 0.5) != 0) error("Error setting the Scotch mapping strategy."); /* Map the computation graph to the architecture graph */ if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) error("Error Scotch mapping failed."); +// #ifdef SWIFT_DEBUG_CHECKS + SCOTCH_Mapping mappptr; + SCOTCH_graphMapInit(&graph, &mappptr, &archdat, regionid); + FILE* map_stats = fopen("map_stats.out", "w"); + SCOTCH_graphMapView(&graph, &mappptr, map_stats); + fclose(map_stats); +// #endif /* Check that the regionids are ok. */ for (int k = 0; k < ncells; k++) { if (regionid[k] < 0 || regionid[k] >= nregions){ @@ -2504,9 +2513,9 @@ void partition_initial_partition(struct partition *initial_partition, return; } - } else if (initial_partition->type == INITPART_METIS_WEIGHT || - initial_partition->type == INITPART_METIS_WEIGHT_EDGE || - initial_partition->type == INITPART_METIS_NOWEIGHT) { + } else if (initial_partition->type == INITPART_WEIGHT || + initial_partition->type == INITPART_WEIGHT_EDGE || + initial_partition->type == INITPART_NOWEIGHT) { #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* Simple k-way partition selected by METIS using cell particle * counts as weights or not. Should be best when starting with a @@ -2514,7 +2523,7 @@ void partition_initial_partition(struct partition *initial_partition, */ double *weights_v = NULL; double *weights_e = NULL; - if (initial_partition->type == INITPART_METIS_WEIGHT) { + if (initial_partition->type == INITPART_WEIGHT) { /* Particles sizes per cell, which will be used as weights. */ if ((weights_v = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) error("Failed to allocate weights_v buffer."); @@ -2522,7 +2531,7 @@ void partition_initial_partition(struct partition *initial_partition, /* Check each particle and accumulate the sizes per cell. */ accumulate_sizes(s, s->e->verbose, weights_v); - } else if (initial_partition->type == INITPART_METIS_WEIGHT_EDGE) { + } else if (initial_partition->type == INITPART_WEIGHT_EDGE) { /* Particle sizes also counted towards the edges. */ @@ -2653,13 +2662,13 @@ void partition_init(struct partition *partition, break; #if defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH) case 'r': - partition->type = INITPART_METIS_NOWEIGHT; + partition->type = INITPART_NOWEIGHT; break; case 'm': - partition->type = INITPART_METIS_WEIGHT; + partition->type = INITPART_WEIGHT; break; case 'e': - partition->type = INITPART_METIS_WEIGHT_EDGE; + partition->type = INITPART_WEIGHT_EDGE; break; default: message("Invalid choice of initial partition type '%s'.", part_type); diff --git a/src/partition.h b/src/partition.h index 8a6eba8915..962bfca7e6 100644 --- a/src/partition.h +++ b/src/partition.h @@ -27,9 +27,9 @@ enum partition_type { INITPART_GRID = 0, INITPART_VECTORIZE, - INITPART_METIS_WEIGHT, - INITPART_METIS_NOWEIGHT, - INITPART_METIS_WEIGHT_EDGE + INITPART_WEIGHT, + INITPART_NOWEIGHT, + INITPART_WEIGHT_EDGE }; /* Simple descriptions of types for reports. */ diff --git a/src/space_regrid.c b/src/space_regrid.c index 95fa4d9cd9..18e15c524b 100644 --- a/src/space_regrid.c +++ b/src/space_regrid.c @@ -351,7 +351,7 @@ void space_regrid(struct space *s, int verbose) { message("Failed to get a new partition, trying less optimal method"); struct partition initial_partition; #if defined(HAVE_PARMETIS) || defined(HAVE_METIS) - initial_partition.type = INITPART_METIS_NOWEIGHT; + initial_partition.type = INITPART_NOWEIGHT; #else initial_partition.type = INITPART_VECTORIZE; #endif diff --git a/swift.c b/swift.c index fe60d457f6..38da8822fe 100644 --- a/swift.c +++ b/swift.c @@ -143,7 +143,7 @@ int main(int argc, char *argv[]) { MPI_SUCCESS) error("Call to MPI_Comm_set_errhandler failed with error %i.", res); if (myrank == 0) - pretime_message("MPI is up and running with %i node(s).\n", nr_nodes); + pretime_message("MPI is up and running with %i rank(s).\n", nr_nodes); if (nr_nodes == 1) { pretime_message("WARNING: you are running with one MPI rank."); pretime_message( From 7f901f43ae6169e38ff9ba2d97a91ebe3d86c402 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 18 Aug 2023 11:02:53 +0100 Subject: [PATCH 24/49] fix typo in partition.c --- src/partition.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/partition.c b/src/partition.c index 3ce8f2fa7c..b23d714390 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1609,7 +1609,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, char fname[200]; sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); FILE *graph_file = fopen(fname, "w"); - if (file == NULL) { + if (graph_file == NULL) { printf("Error: Cannot open output file.\n"); } From c481d545a808b5c554f24ed729b4194090185b20 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 18 Aug 2023 15:55:24 +0100 Subject: [PATCH 25/49] docs: New documentation on the installation and running with SWIFT --- INSTALL.swift | 9 ++ README-Scotch.md | 54 ++++++++++++ scotch/metis2scotch.c | 186 ------------------------------------------ 3 files changed, 63 insertions(+), 186 deletions(-) create mode 100644 README-Scotch.md delete mode 100644 scotch/metis2scotch.c diff --git a/INSTALL.swift b/INSTALL.swift index 6aba15db9d..239149fc31 100644 --- a/INSTALL.swift +++ b/INSTALL.swift @@ -150,6 +150,15 @@ before you can build it. least "--with-metis". ParMETIS is preferred over METIS when there is a choice. +- Scotch: + a build of the Scotch library should be used to + optimize the load between MPI nodes by mapping the decomposed domain + to the available compute. This should be found in the + standard installation directories, or pointed at using the + "--with-scotch" configuration options. + In this case the top-level installation directory of the build + should be given. + - libNUMA: a build of the NUMA library can be used to pin the threads to the physical core of the machine SWIFT is running on. This is diff --git a/README-Scotch.md b/README-Scotch.md new file mode 100644 index 0000000000..a91735586a --- /dev/null +++ b/README-Scotch.md @@ -0,0 +1,54 @@ +Information on how to run SWIFT with Scotch mapping, the test environment used on Cosma 8 and some scaling results. Code has been tested with Scotch version > 7.0 + +Last update 18th August 2023. + + +## Scotch + +Obtaining Scotch as not installed system wide on Cosma 8. +---------------- + +**Scotch** is publicly available under the CeCILL-C free software license, as described [here](https://gitlab.inria.fr/scotch/scotch/blob/master/LICENSE_en.txt). The license itself is available [here](https://gitlab.inria.fr/scotch/scotch/-/blob/master/doc/CeCILL-C_V1-en.txt). + +To use the lastest version of **Scotch**, please clone the master branch: + + git clone git@gitlab.inria.fr:scotch/scotch.git + +Tarballs of the **Scotch** releases are available [here](https://gitlab.inria.fr/scotch/scotch/-/releases). + +The following are instructions for installing locally on Cosma 8, please ammend as appropriate. +---------------- +_Environment_ +``` + module load cosma/2018 python/3.6.5 intel_comp/2022.1.2 compiler openmpi/4.1.1 fftw/3.3.9 parallel_hdf5/1.12.0 parmetis/4.0.3-64bit gsl/2.5 + module load cmake + module load bison +``` +Navigate to the Scotch directory and carry out the following commands + +``` + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=/path-to-install-dir .. + make -j5 + make install +``` + +## Configure SWIFT with Scotch +As per the usual installation [instructions](https://gitlab.cosma.dur.ac.uk/swift/swiftsim/-/blob/master/INSTALL.swift) potentially with the added flag passed to `./configure --with-scotch=\path-to-scotch` + +## Running with Scotch + +Scotch decomposes the SWIFT spatial domain and maps it to the available compute - taking into consideration the communication cost being components of the architecture. In order for this to be carried out the user needs to generate an appropriate architecture file. This architecture file should mirror the set up of the cluster being used. Scotch provides optimised architecture files which capture most HPC set ups. As we will be targetting NUMA regions on Cosma 8 we have modelled the architecture as a `tleaf` structure. + +In the following examples it is assumed that one mpi rank is mapped to each Cosma 8 NUMA region. This enforces that `cpus-per-task=16` in the SLURM submission script. The Cosma 8 nodes consist of 8 NUMA regions per node, with 4 NUMA regions per socket. Example `tleaf`files for various setups are given below, where the intrasocket communication cost between NUMA regions is set at _5_, intranode but across sockets is set at _10_ and the internode cost is set at _1000_. These weightings are estimated values but have been shown to give satisfactory results in the testcases explored. + +| Number of nodes | Number of MPI ranks | tleaf | +| --------------- | ------------------- | ----------------------- | +| 1 | 2 | tleaf 1 2 5 | +| 1 | 8 | tleaf 2 2 10 4 5 | +| 2 | 16 | tleaf 3 2 1000 2 10 4 5 | +| 4 | 32 | tleaf 3 4 1000 2 10 4 5 | +| 8 | 64 | tleaf 3 8 1000 2 10 4 5 | + +The user needs to define this tleaf structure and save it as `target.tgt` in the directory they will run SWIFT from. Ongoing work focuses on automatically generating this target architecture upon run time. diff --git a/scotch/metis2scotch.c b/scotch/metis2scotch.c deleted file mode 100644 index 69f5f75882..0000000000 --- a/scotch/metis2scotch.c +++ /dev/null @@ -1,186 +0,0 @@ -#include -#include -#include -#define MAX_LINE_LENGTH 1024 -#include - - -/* function declaration */ -void read_ncells(const char* filename, int *ncells); -void read_metis_edges(const char* filename, int *adjncy); -void read_metis_weights(const char* filename, int *weights_e, int *weights_v); - -int main () { - // Read in the metis simple file dump - char* edgesname = ""; - // Read in the metis weights file dump - char* weightsname = ""; - /* local variable definition */ - int ncells = 0; - read_ncells(edgesname, &ncells); - int *xadj; - if ((xadj = (int *)malloc(sizeof(int) * (ncells + 1))) == NULL) - printf("Failed to allocate xadj buffer."); - int *adjncy; - if ((adjncy = (int *)malloc(sizeof(int) * 26 * ncells)) == NULL) - printf("Failed to allocate adjncy array."); - int *weights_v = NULL; - if ((weights_v = (int *)malloc(sizeof(int) * ncells)) == NULL) - printf("Failed to allocate vertex weights array"); - int *weights_e = NULL; - if ((weights_e = (int *)malloc(26 * sizeof(int) * ncells)) == NULL) - printf("Failed to allocate edge weights array"); - int *regionid; - if ((regionid = (int *)malloc(sizeof(int) * ncells)) == NULL) - printf("Failed to allocate regionid array"); - - read_metis_edges(edgesname, adjncy); - read_metis_weights(weightsname, weights_e, weights_v); - // Setting up the Scotch graph - SCOTCH_Graph graph; - SCOTCH_Num baseval = 0; - SCOTCH_Num vertnbr = ncells; - SCOTCH_Num *verttab; /* Vertex array [vertnbr+1] */ - SCOTCH_Num *vendtab = NULL; /* Vertex array [vertnbr] */ - SCOTCH_Num *velotab; /* Vertex load array */ - SCOTCH_Num *vlbltab = NULL; /* Vertex label array */ - SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ - SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ - SCOTCH_Num *edlotab; - - verttab = (SCOTCH_Num*) malloc((vertnbr+1) * sizeof(SCOTCH_Num)); - velotab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); - edgetab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); - edlotab = (SCOTCH_Num*) malloc(edgenbr * sizeof(SCOTCH_Num)); - - printf("Done the set up \n"); - int i; - for (i = 0; i <= vertnbr; i++) { - verttab[i] = i*26; - velotab[i] = weights_v[i]; - } - - for (i = 0; i < edgenbr; i++) { - edgetab[i] = adjncy[i]; - edlotab[i] = weights_e[i]; - } - - printf("Initialise graph \n"); - SCOTCH_graphInit(&graph); - - if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, vendtab, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { - printf("Error: Cannot build Scotch Graph.\n"); - exit(EXIT_FAILURE); - } - - printf("Scotch Graph built successfully.\n"); - - FILE *file = fopen("", "w"); - if (file == NULL) { - printf("Error: Cannot open output file.\n"); - exit(EXIT_FAILURE); - } - - if (SCOTCH_graphSave(&graph, file) != 0) { - printf("Error: Cannot save Scotch Graph.\n"); - exit(EXIT_FAILURE); - } - - printf("Scotch Graph saved to file.\n"); - - fclose(file); - SCOTCH_graphExit(&graph); - - // Free memory - free(verttab); - free(velotab); - free(edgetab); - free(edlotab); - free(xadj); - free(adjncy); - free(weights_v); - free(weights_e); - free(regionid); - return 0; -} - - -void read_ncells(const char* filename, int *ncells) { - // Read in the number of cells/vertices - FILE* fp = fopen(filename, "r"); - if (fp == NULL) { - printf("printf opening file %s\n", filename); - return; - } - char line[MAX_LINE_LENGTH]; - int line_num = 0; - if (fgets(line, MAX_LINE_LENGTH, fp) != NULL) { - char * pch; - pch = strtok (line," "); - *ncells = atoi(pch); - } - fclose(fp); -} - - - -void read_metis_edges(const char* filename, int *adjncy) { - // Read in the vertex neighbours - FILE* fp = fopen(filename, "r"); - if (fp == NULL) { - printf("printf opening file %s\n", filename); - return; - } - char line[MAX_LINE_LENGTH]; - int index = 0; - int line_num = 0; - while (fgets(line, MAX_LINE_LENGTH, fp) != NULL) { - if (line_num > 0) { - char * pch; - pch = strtok(line," ,.-"); - while (pch != NULL){ - adjncy[index] = atoi(pch); - pch = strtok (NULL, " ,.-"); - index +=1; - } - } - line_num += 1; - } - fclose(fp); -} - -void read_metis_weights(const char* filename, int *weights_e, int *weights_v) { - // Read in the vertex and edge weights - FILE* fp = fopen(filename, "r"); - if (fp == NULL) { - printf("printf opening file %s\n", filename); - return; - } - - char line[MAX_LINE_LENGTH]; - int v_index = 0; - int e_index = 0; - int line_num = 0; - while (fgets(line, MAX_LINE_LENGTH, fp) != NULL) { - int vertex_ind = 0; - if (line_num > 0) { - printf ("Line number is %i\n",line_num); - char * pch; - pch = strtok(line," ,.-"); - weights_v[v_index] = atoi(pch); - printf ("Vertex weight is %s\n",pch); - while (pch != NULL){ - if (vertex_ind>0){ - printf ("Edge Weight is %s\n",pch); - weights_e[e_index] = atoi(pch); - e_index +=1; - } - vertex_ind += 1; - pch = strtok (NULL, " ,.-"); - } - v_index+=1; - } - line_num += 1; - } - fclose(fp); -} \ No newline at end of file From b53abcd745d6d90c1562e77ebf6bff741201b0ab Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 18 Aug 2023 16:00:53 +0100 Subject: [PATCH 26/49] fix: Typo in docs --- README-Scotch.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README-Scotch.md b/README-Scotch.md index a91735586a..919c952568 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -35,13 +35,13 @@ Navigate to the Scotch directory and carry out the following commands ``` ## Configure SWIFT with Scotch -As per the usual installation [instructions](https://gitlab.cosma.dur.ac.uk/swift/swiftsim/-/blob/master/INSTALL.swift) potentially with the added flag passed to `./configure --with-scotch=\path-to-scotch` +Follow the usual installation [instructions](https://gitlab.cosma.dur.ac.uk/swift/swiftsim/-/blob/master/INSTALL.swift) but if Scotch installed locally the added `--with-scotch=\path-to-scotch` flag will need to be passed to `./configure` ## Running with Scotch -Scotch decomposes the SWIFT spatial domain and maps it to the available compute - taking into consideration the communication cost being components of the architecture. In order for this to be carried out the user needs to generate an appropriate architecture file. This architecture file should mirror the set up of the cluster being used. Scotch provides optimised architecture files which capture most HPC set ups. As we will be targetting NUMA regions on Cosma 8 we have modelled the architecture as a `tleaf` structure. +Scotch decomposes the SWIFT spatial domain and maps it to the available compute - taking into consideration the communication cost between components of the architecture. In order for this to be carried out the user needs to generate an appropriate architecture file. This architecture file should mirror the set up of the cluster being used. Scotch provides optimised architecture files which capture most HPC set ups. As we will be targetting NUMA regions on Cosma 8 we have modelled the architecture as a `tleaf` structure. -In the following examples it is assumed that one mpi rank is mapped to each Cosma 8 NUMA region. This enforces that `cpus-per-task=16` in the SLURM submission script. The Cosma 8 nodes consist of 8 NUMA regions per node, with 4 NUMA regions per socket. Example `tleaf`files for various setups are given below, where the intrasocket communication cost between NUMA regions is set at _5_, intranode but across sockets is set at _10_ and the internode cost is set at _1000_. These weightings are estimated values but have been shown to give satisfactory results in the testcases explored. +In the following examples it is assumed that one mpi rank is mapped to each Cosma 8 NUMA region. This enforces that `cpus-per-task=16` is defined in the SLURM submission script. The Cosma 8 nodes consist of 8 NUMA regions per node, with 4 NUMA regions per socket. Example `tleaf`files for various setups are given below, where the intrasocket communication cost between NUMA regions is set at _5_, intranode but across sockets is set at _10_ and the internode cost is set at _1000_. These weightings are estimated values but have been shown to give satisfactory results in the testcases explored. | Number of nodes | Number of MPI ranks | tleaf | | --------------- | ------------------- | ----------------------- | From 7ac43956bfbbe82429213de58f28c7975af73b64 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 18 Aug 2023 16:03:59 +0100 Subject: [PATCH 27/49] fix: correcting the Markdown --- README-Scotch.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README-Scotch.md b/README-Scotch.md index 919c952568..ec073f16c9 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -3,8 +3,6 @@ Information on how to run SWIFT with Scotch mapping, the test environment used o Last update 18th August 2023. -## Scotch - Obtaining Scotch as not installed system wide on Cosma 8. ---------------- @@ -16,7 +14,7 @@ To use the lastest version of **Scotch**, please clone the master branch: Tarballs of the **Scotch** releases are available [here](https://gitlab.inria.fr/scotch/scotch/-/releases). -The following are instructions for installing locally on Cosma 8, please ammend as appropriate. +Instructions for installing locally on Cosma 8, please ammend as appropriate. ---------------- _Environment_ ``` @@ -34,10 +32,13 @@ Navigate to the Scotch directory and carry out the following commands make install ``` -## Configure SWIFT with Scotch +Configure SWIFT with Scotch +---------------- + Follow the usual installation [instructions](https://gitlab.cosma.dur.ac.uk/swift/swiftsim/-/blob/master/INSTALL.swift) but if Scotch installed locally the added `--with-scotch=\path-to-scotch` flag will need to be passed to `./configure` -## Running with Scotch +Running with Scotch +---------------- Scotch decomposes the SWIFT spatial domain and maps it to the available compute - taking into consideration the communication cost between components of the architecture. In order for this to be carried out the user needs to generate an appropriate architecture file. This architecture file should mirror the set up of the cluster being used. Scotch provides optimised architecture files which capture most HPC set ups. As we will be targetting NUMA regions on Cosma 8 we have modelled the architecture as a `tleaf` structure. From 7c06111e7d61d9dfe374d4023b560542748d7e08 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 18 Aug 2023 16:34:54 +0100 Subject: [PATCH 28/49] update to the docs --- README-Scotch.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README-Scotch.md b/README-Scotch.md index ec073f16c9..a3429efce0 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -53,3 +53,6 @@ In the following examples it is assumed that one mpi rank is mapped to each Cosm | 8 | 64 | tleaf 3 8 1000 2 10 4 5 | The user needs to define this tleaf structure and save it as `target.tgt` in the directory they will run SWIFT from. Ongoing work focuses on automatically generating this target architecture upon run time. + + +With OpenMPI the `mpirun` option `--map-by numa` has been found to be optimal. This is in contrast to previously suggested `--bind-to none` on the cosma8 [site](https://www.dur.ac.uk/icc/cosma/support/cosma8/). From d4b79021775050eea35d9c273660777f6feae78b Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 18 Aug 2023 17:23:09 +0100 Subject: [PATCH 29/49] Documentation updates --- README-Scotch.md | 12 +++++++++++- src/partition.c | 3 ++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/README-Scotch.md b/README-Scotch.md index a3429efce0..0768e1a4e5 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -1,4 +1,4 @@ -Information on how to run SWIFT with Scotch mapping, the test environment used on Cosma 8 and some scaling results. Code has been tested with Scotch version > 7.0 +Information on how to run SWIFT with Scotch mapping and the test environment used on Cosma 8. Code has been tested with Scotch version > 7.0 Last update 18th August 2023. @@ -56,3 +56,13 @@ The user needs to define this tleaf structure and save it as `target.tgt` in the With OpenMPI the `mpirun` option `--map-by numa` has been found to be optimal. This is in contrast to previously suggested `--bind-to none` on the cosma8 [site](https://www.dur.ac.uk/icc/cosma/support/cosma8/). + +Scotch details +---------------- + +Scotch carries out the mapping using various strategies which are outlined in the documentation. The Scotch strategy is passed to the Mapping functions. For the runs carried out here it was found that the global flag `SCOTCH_STRATBALANCE` and a imbalance ratio of `0.05` worked best. These values are passed to `SCOTCH_stratGraphMapBuild`. + +One issue with Scotch is that when the number of mpi ranks is comparable to the dimensionality for the SWIFT system the optimally mapping strategy doesn't map to all available NUMA regions. At present this isn't handled explicity in the code and the paritition reverts to a vectorised or previous partitioning. + +The SWIFT edge and vertex weights are estimated in the code, however edge weights are not symmetric. This causes an issue with SWIFT therefore before building the SCOTCH Graph the edge weigths are updated to equal to the sum of the two associated edge weights. + diff --git a/src/partition.c b/src/partition.c index b23d714390..690ede9026 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1583,7 +1583,8 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, int vertex_count = 0; int neighbour; int return_edge; - + /* The bidirectional weights associated with an edge are summed to ensure that the resultant + edges are symmetric. This is a neccessary for a Scotch grpah. */ for (int i = 0; i < edgenbr; i++) { if ((i>(edges_deg-1)) && (i%edges_deg == 0) ){ vertex_count++; From 1d4d464d6a9943332973677c493e85c8172f341f Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 21 Aug 2023 11:06:28 +0100 Subject: [PATCH 30/49] fix: Typo value for the imbalance ratio in pick_scotch --- src/partition.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/partition.c b/src/partition.c index b23d714390..c0c8da6637 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1635,7 +1635,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Num flagval = SCOTCH_STRATBALANCE; num_vertices = SCOTCH_archSize(&archdat); - if (SCOTCH_stratGraphMapBuild(&stradat, flagval, num_vertices, 0.5) != 0) + if (SCOTCH_stratGraphMapBuild(&stradat, flagval, num_vertices, 0.05) != 0) error("Error setting the Scotch mapping strategy."); /* Map the computation graph to the architecture graph */ From 88997dd0159cef6232dd69c35a2200642ad2b1b7 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 21 Aug 2023 11:26:11 +0100 Subject: [PATCH 31/49] Updates --- README-Scotch.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README-Scotch.md b/README-Scotch.md index 0768e1a4e5..460cf8b5c9 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -3,7 +3,7 @@ Information on how to run SWIFT with Scotch mapping and the test environment use Last update 18th August 2023. -Obtaining Scotch as not installed system wide on Cosma 8. +Obtaining Scotch ---------------- **Scotch** is publicly available under the CeCILL-C free software license, as described [here](https://gitlab.inria.fr/scotch/scotch/blob/master/LICENSE_en.txt). The license itself is available [here](https://gitlab.inria.fr/scotch/scotch/-/blob/master/doc/CeCILL-C_V1-en.txt). @@ -14,7 +14,7 @@ To use the lastest version of **Scotch**, please clone the master branch: Tarballs of the **Scotch** releases are available [here](https://gitlab.inria.fr/scotch/scotch/-/releases). -Instructions for installing locally on Cosma 8, please ammend as appropriate. +Instructions for installing locally on Cosma 8 ---------------- _Environment_ ``` @@ -62,7 +62,7 @@ Scotch details Scotch carries out the mapping using various strategies which are outlined in the documentation. The Scotch strategy is passed to the Mapping functions. For the runs carried out here it was found that the global flag `SCOTCH_STRATBALANCE` and a imbalance ratio of `0.05` worked best. These values are passed to `SCOTCH_stratGraphMapBuild`. -One issue with Scotch is that when the number of mpi ranks is comparable to the dimensionality for the SWIFT system the optimally mapping strategy doesn't map to all available NUMA regions. At present this isn't handled explicity in the code and the paritition reverts to a vectorised or previous partitioning. +One issue with Scotch is that when the number of mpi ranks is comparable to the dimensionality of the modelled SWIFT system the optimally mapping strategy doesn't neccessarily map to all available NUMA regions. At present this isn't handled explicity in the code and the paritition reverts to a vectorised or previous partitioning. -The SWIFT edge and vertex weights are estimated in the code, however edge weights are not symmetric. This causes an issue with SWIFT therefore before building the SCOTCH Graph the edge weigths are updated to equal to the sum of the two associated edge weights. +The SWIFT edge and vertex weights are estimated in the code, however edge weights are not symmetric - this causes an issue with SWIFT. Therefore, in the SCOTCH Graph the edge weigths are updated to equal to the sum of the two associated edge weights as calculated from SWIFT. From 4d5119c39546cd2fa89c1104b17001384b3d78d6 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 21 Aug 2023 13:09:17 +0100 Subject: [PATCH 32/49] fix typo --- src/partition.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/partition.c b/src/partition.c index 690ede9026..0b33cb2e35 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1584,7 +1584,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, int neighbour; int return_edge; /* The bidirectional weights associated with an edge are summed to ensure that the resultant - edges are symmetric. This is a neccessary for a Scotch grpah. */ + edges are symmetric. This is a neccessary for a Scotch graph. */ for (int i = 0; i < edgenbr; i++) { if ((i>(edges_deg-1)) && (i%edges_deg == 0) ){ vertex_count++; From c33a9121c24d66dc8487cff63b19fb4624b205ce Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 21 Aug 2023 13:36:18 +0100 Subject: [PATCH 33/49] Update README-Scotch.md --- README-Scotch.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README-Scotch.md b/README-Scotch.md index 460cf8b5c9..c708df21ea 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -40,7 +40,7 @@ Follow the usual installation [instructions](https://gitlab.cosma.dur.ac.uk/swif Running with Scotch ---------------- -Scotch decomposes the SWIFT spatial domain and maps it to the available compute - taking into consideration the communication cost between components of the architecture. In order for this to be carried out the user needs to generate an appropriate architecture file. This architecture file should mirror the set up of the cluster being used. Scotch provides optimised architecture files which capture most HPC set ups. As we will be targetting NUMA regions on Cosma 8 we have modelled the architecture as a `tleaf` structure. +Scotch carries out a _mapping_ of a _source_ (or process) graph onto a _target_ (or architecture) graph. The weighted _source_ graph is generated by SWIFT and it captures the computation and communication cost across the computational domain. The _target_ graph defines the communication cost across the available computing architecture. Therefore, to make use of the Scotch _mapping_ alogrithms a target architecture file (_target.tgt_) must be generated and it should mirror the set up of the cluster being used. Scotch provides optimised architecture files which capture most HPC set ups. As we will be targetting NUMA regions on Cosma 8 we have modelled the architecture as a `tleaf` structure. In the following examples it is assumed that one mpi rank is mapped to each Cosma 8 NUMA region. This enforces that `cpus-per-task=16` is defined in the SLURM submission script. The Cosma 8 nodes consist of 8 NUMA regions per node, with 4 NUMA regions per socket. Example `tleaf`files for various setups are given below, where the intrasocket communication cost between NUMA regions is set at _5_, intranode but across sockets is set at _10_ and the internode cost is set at _1000_. These weightings are estimated values but have been shown to give satisfactory results in the testcases explored. From e7c7479ebdbd4f6131bc712d2030ed81a23a5940 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 21 Aug 2023 13:43:42 +0100 Subject: [PATCH 34/49] Update README-Scotch.md --- README-Scotch.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README-Scotch.md b/README-Scotch.md index c708df21ea..497684f380 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -1,9 +1,9 @@ -Information on how to run SWIFT with Scotch mapping and the test environment used on Cosma 8. Code has been tested with Scotch version > 7.0 +Information on how to run SWIFT with Scotch mapping and the test environment used on Cosma 8. Code has been tested with Scotch version 7.0.2. Last update 18th August 2023. -Obtaining Scotch +Obtaining Scotch (as per gitlab [repo](https://gitlab.inria.fr/scotch/scotch)) ---------------- **Scotch** is publicly available under the CeCILL-C free software license, as described [here](https://gitlab.inria.fr/scotch/scotch/blob/master/LICENSE_en.txt). The license itself is available [here](https://gitlab.inria.fr/scotch/scotch/-/blob/master/doc/CeCILL-C_V1-en.txt). From ca0f0184536de5dd549e79bf31c7cc1a6ce07407 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 21 Aug 2023 15:46:08 +0100 Subject: [PATCH 35/49] fix: Formatting according to clang-format.sh --- src/const.h | 20 +-- src/distributed_io.c | 2 +- src/engine.c | 3 +- src/fof.c | 2 +- src/hydro/Shadowswift/voronoi3d_algorithm.h | 4 +- src/lightcone/lightcone.c | 2 +- src/parallel_io.c | 2 +- src/partition.c | 186 +++++++++++--------- src/runner_doiact_functions_stars.h | 2 +- 9 files changed, 118 insertions(+), 105 deletions(-) diff --git a/src/const.h b/src/const.h index 429fe2275a..9cf25776e4 100644 --- a/src/const.h +++ b/src/const.h @@ -27,7 +27,7 @@ /* Type of gradients to use (GIZMO_SPH only) */ /* If no option is chosen, no gradients are used (first order scheme) */ -//#define GRADIENTS_SPH +// #define GRADIENTS_SPH #define GRADIENTS_GIZMO /* Types of slope limiter to use (GIZMO_SPH only) */ @@ -40,11 +40,11 @@ /* Options to control the movement of particles for GIZMO_SPH. */ /* This option disables particle movement */ -//#define GIZMO_FIX_PARTICLES +// #define GIZMO_FIX_PARTICLES /* Try to keep cells regular by adding a correction velocity. */ -//#define GIZMO_STEER_MOTION +// #define GIZMO_STEER_MOTION /* Use the total energy instead of the thermal energy as conserved variable. */ -//#define GIZMO_TOTAL_ENERGY +// #define GIZMO_TOTAL_ENERGY /* Options to control handling of unphysical values (GIZMO_SPH only). */ /* In GIZMO, mass and energy (and hence density and pressure) can in principle @@ -53,7 +53,7 @@ If no options are selected below, we assume (and pray) that this will not happen, and add no restrictions to how these variables are treated. */ /* Check for unphysical values and crash if they occur. */ -//#define GIZMO_UNPHYSICAL_ERROR +// #define GIZMO_UNPHYSICAL_ERROR /* Check for unphysical values and reset them to safe values. */ #define GIZMO_UNPHYSICAL_RESCUE /* Show a warning message if an unphysical value was reset (only works if @@ -65,9 +65,9 @@ /* Parameters that control how GIZMO handles pathological particle configurations. */ /* Show a warning message if a pathological configuration has been detected. */ -//#define GIZMO_PATHOLOGICAL_WARNING +// #define GIZMO_PATHOLOGICAL_WARNING /* Crash if a pathological configuration has been detected. */ -//#define GIZMO_PATHOLOGICAL_ERROR +// #define GIZMO_PATHOLOGICAL_ERROR /* Maximum allowed gradient matrix condition number. If the condition number of the gradient matrix (defined in equation C1 in Hopkins, 2015) is larger than this value, we artificially increase the number of neighbours to get a more @@ -91,16 +91,16 @@ /* Options to control SHADOWFAX_SPH */ /* This option disables cell movement */ -//#define SHADOWFAX_FIX_CELLS +// #define SHADOWFAX_FIX_CELLS /* This option enables cell steering, i.e. trying to keep the cells regular by adding a correction to the cell velocities.*/ #define SHADOWFAX_STEER_CELL_MOTION /* This option evolves the total energy instead of the thermal energy */ -//#define SHADOWFAX_TOTAL_ENERGY +// #define SHADOWFAX_TOTAL_ENERGY /* Source terms */ #define SOURCETERMS_NONE -//#define SOURCETERMS_SN_FEEDBACK +// #define SOURCETERMS_SN_FEEDBACK /* GRACKLE doesn't really like exact zeroes, so use something * comparatively small instead. */ diff --git a/src/distributed_io.c b/src/distributed_io.c index b60958a3be..7a2833fa5d 100644 --- a/src/distributed_io.c +++ b/src/distributed_io.c @@ -62,7 +62,7 @@ #include "xmf.h" /* Are we timing the i/o? */ -//#define IO_SPEED_MEASUREMENT +// #define IO_SPEED_MEASUREMENT /** * @brief Writes a data array in given HDF5 group. diff --git a/src/engine.c b/src/engine.c index 6f25683e0e..fba056c458 100644 --- a/src/engine.c +++ b/src/engine.c @@ -181,7 +181,8 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { */ void engine_repartition(struct engine *e) { -#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_PARMETIS) || defined(HAVE_METIS) || defined(HAVE_SCOTCH)) ticks tic = getticks(); diff --git a/src/fof.c b/src/fof.c index 50c2f726cb..3bfcc105e5 100644 --- a/src/fof.c +++ b/src/fof.c @@ -56,7 +56,7 @@ #define FOF_COMPRESS_PATHS_MIN_LENGTH (2) /* Are we timing calculating group properties in the FOF? */ -//#define WITHOUT_GROUP_PROPS +// #define WITHOUT_GROUP_PROPS /** * @brief Properties of a group used for black hole seeding diff --git a/src/hydro/Shadowswift/voronoi3d_algorithm.h b/src/hydro/Shadowswift/voronoi3d_algorithm.h index 37d7730545..449e1f22a3 100644 --- a/src/hydro/Shadowswift/voronoi3d_algorithm.h +++ b/src/hydro/Shadowswift/voronoi3d_algorithm.h @@ -31,7 +31,7 @@ #include /* For debugging purposes */ -//#define LOOP_CHECK 1000 +// #define LOOP_CHECK 1000 #ifdef LOOP_CHECK /* We need to do the trickery below to get a unique counter for each call to the @@ -75,7 +75,7 @@ __attribute__((always_inline)) INLINE int check_counter(int *counter, /* This flag activates a number of expensive geometrical checks that help finding bugs. */ -//#define VORONOI3D_EXPENSIVE_CHECKS +// #define VORONOI3D_EXPENSIVE_CHECKS /* Tolerance parameter used to decide when to use more precise geometric criteria */ diff --git a/src/lightcone/lightcone.c b/src/lightcone/lightcone.c index 8c244b0c71..d04076384a 100644 --- a/src/lightcone/lightcone.c +++ b/src/lightcone/lightcone.c @@ -59,7 +59,7 @@ #include "units.h" /* Whether to dump the replication list */ -//#define DUMP_REPLICATIONS +// #define DUMP_REPLICATIONS #ifdef DUMP_REPLICATIONS static int output_nr = 0; #endif diff --git a/src/parallel_io.c b/src/parallel_io.c index bc81e1bbc4..3575db2240 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -69,7 +69,7 @@ #define HDF5_PARALLEL_IO_MAX_BYTES 2147000000LL /* Are we timing the i/o? */ -//#define IO_SPEED_MEASUREMENT +// #define IO_SPEED_MEASUREMENT /** * @brief Reads a chunk of data from an open HDF5 dataset diff --git a/src/partition.c b/src/partition.c index 6fbe704e02..40286a737a 100644 --- a/src/partition.c +++ b/src/partition.c @@ -67,8 +67,8 @@ #include "threadpool.h" #include "tools.h" -#define IDX_MAX INT32_MAX -#define IDX_MIN INT32_MIN +#define IDX_MAX INT32_MAX +#define IDX_MIN INT32_MIN /* Simple descriptions of initial partition types for reports. */ const char *initial_partition_name[] = { @@ -90,7 +90,8 @@ static int check_complete(struct space *s, int verbose, int nregions); * Repartition fixed costs per type/subtype. These are determined from the * statistics output produced when running with task debugging enabled. */ -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) static double repartition_costs[task_type_count][task_subtype_count]; #endif #if defined(WITH_MPI) @@ -333,7 +334,8 @@ static void graph_init(struct space *s, int periodic, idx_t *weights_e, } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) struct counts_mapper_data { double *counts; size_t size; @@ -563,7 +565,8 @@ static void sizes_to_edges(struct space *s, double *counts, double *edges) { } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /** * @brief Apply METIS cell-list partitioning to a cell structure. * @@ -576,12 +579,12 @@ static void split_metis(struct space *s, int nregions, int *celllist) { for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; /* To check or visualise the partition dump all the cells. */ - if (engine_rank == 0) dumpCellRanks("partition", s->cells_top, - s->nr_cells); + if (engine_rank == 0) dumpCellRanks("partition", s->cells_top, s->nr_cells); } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* qsort support. */ struct indexval { @@ -1384,8 +1387,9 @@ static void pick_metis(int nodeID, struct space *s, int nregions, * number of cells in space + 1. NULL for not used. * @param nxadj the number of xadj element used. */ -static void graph_init_scotch(struct space *s, int periodic, SCOTCH_Num *weights_e, - SCOTCH_Num *adjncy, int *nadjcny, SCOTCH_Num *xadj, int *nxadj) { +static void graph_init_scotch(struct space *s, int periodic, + SCOTCH_Num *weights_e, SCOTCH_Num *adjncy, + int *nadjcny, SCOTCH_Num *xadj, int *nxadj) { /* Loop over all cells in the space. */ *nadjcny = 0; @@ -1440,11 +1444,11 @@ static void graph_init_scotch(struct space *s, int periodic, SCOTCH_Num *weight } /** - * @brief Partition the given space into a number of connected regions and + * @brief Partition the given space into a number of connected regions and * map to available architecture. * - * Split the space and map to compute architecture using Scotch. to derive - * a partitions using the given edge and vertex weights. If no weights + * Split the space and map to compute architecture using Scotch. to derive + * a partitions using the given edge and vertex weights. If no weights * are given then an unweighted partition is performed. * * @param nodeID the rank of our node. @@ -1460,7 +1464,7 @@ static void graph_init_scotch(struct space *s, int periodic, SCOTCH_Num *weight * sizeof number of cells. */ static void pick_scotch(int nodeID, struct space *s, int nregions, - double *vertexw, double *edgew, int *celllist) { + double *vertexw, double *edgew, int *celllist) { /* Total number of cells. */ int ncells = s->cdim[0] * s->cdim[1] * s->cdim[2]; @@ -1475,18 +1479,22 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if (nodeID == 0) { /* Allocate adjacency and weights arrays . */ SCOTCH_Num *xadj; - if ((xadj = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (ncells + 1))) == NULL) + if ((xadj = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (ncells + 1))) == + NULL) error("Failed to allocate xadj buffer."); SCOTCH_Num *adjncy; - if ((adjncy = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * 26 * ncells)) == NULL) + if ((adjncy = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * 26 * ncells)) == + NULL) error("Failed to allocate adjncy array."); SCOTCH_Num *weights_v = NULL; if (vertexw != NULL) - if ((weights_v = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * ncells)) == NULL) + if ((weights_v = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * ncells)) == + NULL) error("Failed to allocate vertex weights array"); SCOTCH_Num *weights_e = NULL; if (edgew != NULL) - if ((weights_e = (SCOTCH_Num *)malloc(26 * sizeof(SCOTCH_Num) * ncells)) == NULL) + if ((weights_e = + (SCOTCH_Num *)malloc(26 * sizeof(SCOTCH_Num) * ncells)) == NULL) error("Failed to allocate edge weights array"); SCOTCH_Num *regionid; if ((regionid = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * ncells)) == NULL) @@ -1550,110 +1558,114 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, /* Define the cell graph. Keeping the edge weights association. */ int nadjcny = 0; int nxadj = 0; - graph_init_scotch(s, s->periodic, weights_e, adjncy, &nadjcny, xadj, &nxadj); + graph_init_scotch(s, s->periodic, weights_e, adjncy, &nadjcny, xadj, + &nxadj); /* Define the cell graph. Keeping the edge weights association. */ // Setting up the Scotch graph SCOTCH_Graph graph; SCOTCH_Num baseval = 0; - SCOTCH_Num vertnbr = ncells; /* Number of vertices */ - SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ + SCOTCH_Num vertnbr = ncells; /* Number of vertices */ + SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ - SCOTCH_Num *verttab; /* Vertex array [vertnbr + 1] */ - verttab = (SCOTCH_Num*) malloc((vertnbr + 1) * sizeof(SCOTCH_Num)); + SCOTCH_Num *verttab; /* Vertex array [vertnbr + 1] */ + verttab = (SCOTCH_Num *)malloc((vertnbr + 1) * sizeof(SCOTCH_Num)); - //SCOTCH_Num *vendtab; /* Vertex array [vertnbr] */ - //vendtab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); + // SCOTCH_Num *vendtab; /* Vertex array [vertnbr] */ + // vendtab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); - SCOTCH_Num *velotab; /* Vertex load array */ - velotab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); + SCOTCH_Num *velotab; /* Vertex load array */ + velotab = (SCOTCH_Num *)malloc((vertnbr) * sizeof(SCOTCH_Num)); - SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ - edgetab = (SCOTCH_Num*) malloc((edgenbr) * sizeof(SCOTCH_Num)); + SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ + edgetab = (SCOTCH_Num *)malloc((edgenbr) * sizeof(SCOTCH_Num)); SCOTCH_Num *edlotab; /* Int load of each edge */ - edlotab = (SCOTCH_Num*) malloc((edgenbr) * sizeof(SCOTCH_Num)); + edlotab = (SCOTCH_Num *)malloc((edgenbr) * sizeof(SCOTCH_Num)); int edges_deg = 26; for (int i = 0; i < vertnbr; i++) { - verttab[i] = i*edges_deg; - velotab[i] = weights_v[i]; + verttab[i] = i * edges_deg; + velotab[i] = weights_v[i]; } - verttab[vertnbr] = vertnbr*edges_deg; + verttab[vertnbr] = vertnbr * edges_deg; int vertex_count = 0; int neighbour; int return_edge; - /* The bidirectional weights associated with an edge are summed to ensure that the resultant - edges are symmetric. This is a neccessary for a Scotch graph. */ + /* The bidirectional weights associated with an edge are summed to ensure + that the resultant edges are symmetric. This is a neccessary for a Scotch + graph. */ for (int i = 0; i < edgenbr; i++) { - if ((i>(edges_deg-1)) && (i%edges_deg == 0) ){ - vertex_count++; - } - neighbour = adjncy[i]; - edgetab[i] = neighbour; - for (int j = 0; j < edges_deg; j++) { - if ((adjncy[(neighbour*edges_deg + j)]) == vertex_count){ - return_edge = (neighbour*edges_deg + j); - } + if ((i > (edges_deg - 1)) && (i % edges_deg == 0)) { + vertex_count++; + } + neighbour = adjncy[i]; + edgetab[i] = neighbour; + for (int j = 0; j < edges_deg; j++) { + if ((adjncy[(neighbour * edges_deg + j)]) == vertex_count) { + return_edge = (neighbour * edges_deg + j); } - edlotab[i] = weights_e[i] + weights_e[return_edge]; + } + edlotab[i] = weights_e[i] + weights_e[return_edge]; } SCOTCH_graphInit(&graph); - if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, NULL, velotab, NULL, edgenbr, edgetab, edlotab) != 0) { - error("Error: Cannot build Scotch Graph.\n"); + if (SCOTCH_graphBuild(&graph, baseval, vertnbr, verttab, NULL, velotab, + NULL, edgenbr, edgetab, edlotab) != 0) { + error("Error: Cannot build Scotch Graph.\n"); } -// #ifdef SWIFT_DEBUG_CHECKS + // #ifdef SWIFT_DEBUG_CHECKS SCOTCH_graphCheck(&graph); static int partition_count = 0; char fname[200]; sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); FILE *graph_file = fopen(fname, "w"); if (graph_file == NULL) { - printf("Error: Cannot open output file.\n"); + printf("Error: Cannot open output file.\n"); } if (SCOTCH_graphSave(&graph, graph_file) != 0) { - printf("Error: Cannot save Scotch Graph.\n"); + printf("Error: Cannot save Scotch Graph.\n"); } fclose(graph_file); -// #endif + // #endif /* Read in architecture graph. */ SCOTCH_Arch archdat; /* Load the architecture graph in .tgt format */ - FILE* arch_file = fopen("target.tgt", "r"); + FILE *arch_file = fopen("target.tgt", "r"); if (arch_file == NULL) { - printf("Error: Cannot open topo file.\n"); + printf("Error: Cannot open topo file.\n"); } if (SCOTCH_archLoad(&archdat, arch_file) != 0) - error("Error loading architecture graph"); + error("Error loading architecture graph"); /* Initialise in strategy. */ SCOTCH_Strat stradat; SCOTCH_stratInit(&stradat); SCOTCH_Num num_vertices; SCOTCH_Num flagval = SCOTCH_STRATBALANCE; - + num_vertices = SCOTCH_archSize(&archdat); if (SCOTCH_stratGraphMapBuild(&stradat, flagval, num_vertices, 0.05) != 0) error("Error setting the Scotch mapping strategy."); /* Map the computation graph to the architecture graph */ if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) - error("Error Scotch mapping failed."); -// #ifdef SWIFT_DEBUG_CHECKS + error("Error Scotch mapping failed."); + // #ifdef SWIFT_DEBUG_CHECKS SCOTCH_Mapping mappptr; SCOTCH_graphMapInit(&graph, &mappptr, &archdat, regionid); - FILE* map_stats = fopen("map_stats.out", "w"); + FILE *map_stats = fopen("map_stats.out", "w"); SCOTCH_graphMapView(&graph, &mappptr, map_stats); fclose(map_stats); -// #endif + // #endif /* Check that the regionids are ok. */ for (int k = 0; k < ncells; k++) { - if (regionid[k] < 0 || regionid[k] >= nregions){ - //error("Got bad nodeID for cell"); - printf("Bad Vertex %d is assigned to architecture block %d\n", k, regionid[k]); + if (regionid[k] < 0 || regionid[k] >= nregions) { + // error("Got bad nodeID for cell"); + printf("Bad Vertex %d is assigned to architecture block %d\n", k, + regionid[k]); } /* And keep. */ celllist[k] = regionid[k]; @@ -1662,7 +1674,6 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_stratExit(&stradat); SCOTCH_archExit(&archdat); fclose(arch_file); - if (verttab != NULL) free(verttab); if (velotab != NULL) free(velotab); @@ -1682,7 +1693,8 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* Helper struct for partition_gather weights. */ struct weights_mapper_data { @@ -2194,27 +2206,26 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID, * @param nr_nodes the number of nodes. * @param s the space of cells holding our local particles. */ -static void repart_scotch(int vweights, int eweights, int timebins, +static void repart_scotch(int vweights, int eweights, int timebins, struct repartition *repartition, int nodeID, int nr_nodes, struct space *s, struct task *tasks, int nr_tasks) { - - /* Create weight arrays using task ticks for vertices and edges (edges - * * assume the same graph structure as used in the part_ calls). */ + /* Create weight arrays using task ticks for vertices and edges (edges + * * assume the same graph structure as used in the part_ calls). */ int nr_cells = s->nr_cells; struct cell *cells = s->cells_top; /* Allocate and fill the adjncy indexing array defining the graph of - * * cells. */ + * * cells. */ SCOTCH_Num *inds; if ((inds = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * 26 * nr_cells)) == NULL) error("Failed to allocate the inds array"); int nadjcny = 0; int nxadj = 0; - graph_init_scotch(s, 1 /* periodic */, NULL /* no edge weights */, inds, &nadjcny, - NULL /* no xadj needed */, &nxadj); + graph_init_scotch(s, 1 /* periodic */, NULL /* no edge weights */, inds, + &nadjcny, NULL /* no xadj needed */, &nxadj); /* Allocate and init weights. */ double *weights_v = NULL; @@ -2289,7 +2300,7 @@ static void repart_scotch(int vweights, int eweights, int timebins, } /* We need to rescale the sum of the weights so that the sums of the two - * * types of weights are less than IDX_MAX, that is the range of idx_t. */ + * * types of weights are less than IDX_MAX, that is the range of idx_t. */ double vsum = 0.0; if (vweights) for (int k = 0; k < nr_cells; k++) vsum += weights_v[k]; @@ -2340,7 +2351,7 @@ static void repart_scotch(int vweights, int eweights, int timebins, } /* Balance edges and vertices when the edge weights are timebins, as these - * * have no reason to have equivalent scales, we use an equipartition. */ + * * have no reason to have equivalent scales, we use an equipartition. */ if (timebins && eweights) { /* Make sums the same. */ @@ -2358,7 +2369,7 @@ static void repart_scotch(int vweights, int eweights, int timebins, pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); #endif /* Check that all cells have good values. All nodes have same copy, so just - * * check on one. */ + * * check on one. */ if (nodeID == 0) { for (int k = 0; k < nr_cells; k++) if (repartition->celllist[k] < 0 || repartition->celllist[k] >= nr_nodes) @@ -2431,10 +2442,10 @@ void partition_repartition(struct repartition *reparttype, int nodeID, } else if (reparttype->type == REPART_METIS_VERTEX_COUNTS) { repart_memory_metis(reparttype, nodeID, nr_nodes, s); - + } else if (reparttype->type == REPART_NONE) { /* Doing nothing. */ - + } else { error("Impossible repartition type"); } @@ -2442,16 +2453,15 @@ void partition_repartition(struct repartition *reparttype, int nodeID, if (s->e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); -#elif defined(WITH_MPI) && defined(HAVE_SCOTCH) +#elif defined(WITH_MPI) && defined(HAVE_SCOTCH) ticks tic = getticks(); if (reparttype->type == REPART_SCOTCH) { - repart_scotch(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks, - nr_tasks); - + repart_scotch(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks, nr_tasks); + } else if (reparttype->type == REPART_NONE) { /* Doing nothing. */ - + } else { error("Impossible repartition type"); } @@ -2517,7 +2527,8 @@ void partition_initial_partition(struct partition *initial_partition, } else if (initial_partition->type == INITPART_WEIGHT || initial_partition->type == INITPART_WEIGHT_EDGE || initial_partition->type == INITPART_NOWEIGHT) { -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* Simple k-way partition selected by METIS using cell particle * counts as weights or not. Should be best when starting with a * inhomogeneous dist. @@ -2642,7 +2653,6 @@ void partition_init(struct partition *partition, const char *default_part = "grid"; #endif - /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */ factor(nr_nodes, &partition->grid[0], &partition->grid[1]); factor(nr_nodes / partition->grid[1], &partition->grid[0], @@ -2710,7 +2720,7 @@ void partition_init(struct partition *partition, } else if (strcmp("timecosts", part_type) == 0) { repartition->type = REPART_METIS_VERTEX_COSTS_TIMEBINS; - + } else if (strcmp("scotch", part_type) == 0) { repartition->type = REPART_SCOTCH; @@ -2726,7 +2736,7 @@ void partition_init(struct partition *partition, "Permitted values are: 'none' when compiled without " "METIS or ParMETIS."); #endif - message("Choice of re-partition type '%s'.", part_type); + message("Choice of re-partition type '%s'.", part_type); } /* Get the fraction CPU time difference between nodes (<1) or the number @@ -2806,7 +2816,8 @@ void partition_init(struct partition *partition, */ static int repart_init_fixed_costs(void) { -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) /* Set the default fixed cost. */ for (int j = 0; j < task_type_count; j++) { for (int k = 0; k < task_subtype_count; k++) { @@ -2859,7 +2870,8 @@ static int check_complete(struct space *s, int verbose, int nregions) { return (!failed); } -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) +#if defined(WITH_MPI) && \ + (defined(HAVE_METIS) || defined(HAVE_PARMETIS) || defined(HAVE_SCOTCH)) #ifdef SWIFT_DEBUG_CHECKS /** * @brief Check that the threadpool version of the weights construction is diff --git a/src/runner_doiact_functions_stars.h b/src/runner_doiact_functions_stars.h index b163c6fc8a..06c5546bec 100644 --- a/src/runner_doiact_functions_stars.h +++ b/src/runner_doiact_functions_stars.h @@ -1060,7 +1060,7 @@ void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, /* Get the sorting index. */ int sid = 0; for (int k = 0; k < 3; k++) - sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 + sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); From 0a2e732ebaa2936264bf83a90302d4240b001270 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Tue, 22 Aug 2023 15:05:08 +0100 Subject: [PATCH 36/49] fix: Clean up of excess files --- create-target-file/create-target-file.py | 20 --------- create-target-file/create-target-file.sh | 40 ------------------ .../sample-slurm-cosma-jscript.sh | 41 ------------------- examples/EAGLE_low_z/EAGLE_25/eagle_25.yml | 2 +- examples/EAGLE_low_z/EAGLE_6/eagle_6.yml | 2 +- examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt | 1 - examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt | 1 - multi_cosma_node.sh | 24 ----------- 8 files changed, 2 insertions(+), 129 deletions(-) delete mode 100644 create-target-file/create-target-file.py delete mode 100755 create-target-file/create-target-file.sh delete mode 100755 create-target-file/sample-slurm-cosma-jscript.sh delete mode 100644 examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt delete mode 100644 examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt delete mode 100644 multi_cosma_node.sh diff --git a/create-target-file/create-target-file.py b/create-target-file/create-target-file.py deleted file mode 100644 index 56897cfcfb..0000000000 --- a/create-target-file/create-target-file.py +++ /dev/null @@ -1,20 +0,0 @@ -import os -cwd = os.getcwd() - -print("Current working directory:", cwd) - - -nodes_to_use = -1 -mpi_tasks = -1 -cpus_per_task = -1 -tasks_per_core = -1 -tasks_per_node = -1 -cluster_architecture = "unknown" -processor_count = -1 # for -np= - - -# Sanity checks: -# - if a [cwd] directive is used, create target.tgt in there; -# otherwise, create it in $PWD, i..e in current directory -# - if no cluster arch is specified, default to [cosma8] -# - diff --git a/create-target-file/create-target-file.sh b/create-target-file/create-target-file.sh deleted file mode 100755 index 37b5314f31..0000000000 --- a/create-target-file/create-target-file.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -l - -# Assumes we run SLURM; provision for SGE will be added afterwards - -# For use in several known cluster environments: [Durham cosma8] for now, -# perhaps [UCL's Myriad, Kathleen, Young] later on, -# [ARCHER2], etc. - -# Minimal error checking throughout - -# Workflow: -# - this script is called from within a submitted jobscript. -# - tries to determine architecture [cosma8 | Kathleen etc] -# - tries to determine jobscript directives relevant to how many -# nodes, mpi ranks/tasks etc are requested, and some other runtime parameters -# - in order to produce a [target.tgt] file, to be passed on to the SCOTCH module. - - -# Process the jobscript from which we are called, strip comments and gather -# switches / parameters relevant to SCOTCH, remove '=' from those lines - -sed -e '/^\s\{0,\}#\ /d' $1 | \ - sed -n -r -e '/^#SBATCH/p;/\-\-with\-arch/{s/^.*\-\-with\-arch\=/\-\-with\-arch\=/;s/[ ].*$//;p}' | \ - sed 's/\=/\ /' | \ - sed 's/^#SBATCH\ //' > target-specs.txt - -BINARY_EXECUTABLE="swift_mpi" -# Make sure we have the main invocation line for swift_mpi, so we can -# parse switches not supplied by #SBATCH; we are interested in things like -# -np=, --with-arch= -sed -e '/^\s\{0,\}#\ /d' $1 | \ - sed -e 's/#.*$//g' | \ - sed -n "/${BINARY_EXECUTABLE}/p" | \ - sed 's/\=/\ /g' >> target-specs.txt - -# Parse the lines in [target-specs.txt], determine SCOTCH-relevant parameters, -# consult cluster architecture, and if successful, produce the specs file [target.tgt]; -# if not, return some error, so we may return that error to the caller script. - - diff --git a/create-target-file/sample-slurm-cosma-jscript.sh b/create-target-file/sample-slurm-cosma-jscript.sh deleted file mode 100755 index a1c357f626..0000000000 --- a/create-target-file/sample-slurm-cosma-jscript.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -l - -#SBATCH -n 2 -# #SBATCH --nodes=4 -#SBATCH --ntasks=32 -# #SBATCH --ntasks=64 -#SBATCH --cpus-per-task=64 -# ---- #SBATCH --cpus-per-task=32 -export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK -#SBATCH --mem-per-cpu=4096 -#SBATCH --ntasks-per-core=1 - -# NOTE: In nodes with hyper-threading enabled, a task not requesting full cores -# may be distributed across sockets. -# This can be avoided by specifying --ntasks-per-core=1, -# which forces tasks to allocate full cores. - -#SBATCH --tasks-per-node 64 -# #SBATCH --sockets-per-node= -# #SBATCH --cores-per-socket= -# #SBATCH --threads-per-core= -# #SBATCH --ntasks-per-core=1 - - - - -./create-target-file.sh $0 - -# We can specify options even below the call of create-target-file.sh -#SBATCH --tasks-per-node 64 - -# mpirun -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID - - -# swift_mpi -np $SLURM_NTASKS --with-arch=cosma8 your_program your_inputs $SLURM_ARRAY_TASK_ID -# swift_mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID -# swift_mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID - - # swift-mpi --with-arch=cosma8 -np $SLURM_NTASKS your_program your_inputs $SLURM_ARRAY_TASK_ID -mpirun /cosma/home/dp004/dc-kots1/swiftsim-ucl-dp004/swift_mpi --with-arch=cosma8 -np $SLURM_NTASKS --threads=16 --cosmology --hydro --self-gravity --stars eagle_6.yml | tee output.log - diff --git a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml index 01198a1c54..8ba338df28 100644 --- a/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_low_z/EAGLE_25/eagle_25.yml @@ -94,7 +94,7 @@ FOF: # Parameters related to the initial conditions InitialConditions: - file_name: ./EAGLE_ICs_25_deflated.hdf5 # The file to read + file_name: ./EAGLE_ICs_25.hdf5 # The file to read periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml index 0726fd2446..a77e1bb827 100644 --- a/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_low_z/EAGLE_6/eagle_6.yml @@ -96,7 +96,7 @@ FOF: # Parameters related to the initial conditions InitialConditions: - file_name: ./EAGLE_ICs_6_deflated.hdf5 # The file to read + file_name: ./EAGLE_ICs_6.hdf5 # The file to read periodic: 1 cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget diff --git a/examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt b/examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt deleted file mode 100644 index ed521f92ce..0000000000 --- a/examples/EAGLE_low_z/EAGLE_6/topologies/2.tgt +++ /dev/null @@ -1 +0,0 @@ -tleaf 1 2 100 \ No newline at end of file diff --git a/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt b/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt deleted file mode 100644 index ce824561b3..0000000000 --- a/examples/EAGLE_low_z/EAGLE_6/topologies/8.tgt +++ /dev/null @@ -1 +0,0 @@ -tleaf 2 2 100 4 10 diff --git a/multi_cosma_node.sh b/multi_cosma_node.sh deleted file mode 100644 index 23a5103202..0000000000 --- a/multi_cosma_node.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -l -# Example with $ntasks MPI tasks and 16 cpus per task -# Targeting $ntasks NUMA regions -# Project/Account (use your own) -#SBATCH -A dp004 - -#SBATCH -p cosma8 - -# Number of MPI tasks -#SBATCH --ntasks=32 - -#SBATCH --cpus-per-task=16 - -# Runtime of this jobs is less then 12 hours. -#SBATCH --time=00:40:00 - -# Clear the environment from any previously loaded modules -module purge > /dev/null 2>&1 - -module load cosma/2018 python/3.6.5 intel_comp/2022.1.2 compiler openmpi/4.1.1 fftw/3.3.9 parallel_hdf5/1.12.0 parmetis/4.0.3-64bit metis/5.1.0-64bit gsl/2.5 - -# And finally run the job -mpirun --map-by numa /cosma8/data/dp004/dc-gile1/swiftsim-scotch/swift_mpi --threads=16 --cosmology --hydro --self-gravity --stars eagle_6.yml | tee output.log -# End of submit file \ No newline at end of file From 3ab621c3d98a9c613f964f19e573a226df7e5593 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Tue, 22 Aug 2023 16:50:24 +0100 Subject: [PATCH 37/49] fix: Formatting scripts --- src/runner_doiact_functions_stars.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runner_doiact_functions_stars.h b/src/runner_doiact_functions_stars.h index 06c5546bec..b163c6fc8a 100644 --- a/src/runner_doiact_functions_stars.h +++ b/src/runner_doiact_functions_stars.h @@ -1060,7 +1060,7 @@ void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, /* Get the sorting index. */ int sid = 0; for (int k = 0; k < 3; k++) - sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 + sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); From ca9deedb24fb5b53328f04c2f60a5f8a0d5a9d65 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 23 Aug 2023 15:22:33 +0100 Subject: [PATCH 38/49] fix: Revert whitespace --- src/const.h | 20 ++++++++++---------- src/distributed_io.c | 2 +- src/fof.c | 2 +- src/hydro/Shadowswift/voronoi3d_algorithm.h | 4 ++-- src/lightcone/lightcone.c | 2 +- src/parallel_io.c | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/const.h b/src/const.h index 9cf25776e4..429fe2275a 100644 --- a/src/const.h +++ b/src/const.h @@ -27,7 +27,7 @@ /* Type of gradients to use (GIZMO_SPH only) */ /* If no option is chosen, no gradients are used (first order scheme) */ -// #define GRADIENTS_SPH +//#define GRADIENTS_SPH #define GRADIENTS_GIZMO /* Types of slope limiter to use (GIZMO_SPH only) */ @@ -40,11 +40,11 @@ /* Options to control the movement of particles for GIZMO_SPH. */ /* This option disables particle movement */ -// #define GIZMO_FIX_PARTICLES +//#define GIZMO_FIX_PARTICLES /* Try to keep cells regular by adding a correction velocity. */ -// #define GIZMO_STEER_MOTION +//#define GIZMO_STEER_MOTION /* Use the total energy instead of the thermal energy as conserved variable. */ -// #define GIZMO_TOTAL_ENERGY +//#define GIZMO_TOTAL_ENERGY /* Options to control handling of unphysical values (GIZMO_SPH only). */ /* In GIZMO, mass and energy (and hence density and pressure) can in principle @@ -53,7 +53,7 @@ If no options are selected below, we assume (and pray) that this will not happen, and add no restrictions to how these variables are treated. */ /* Check for unphysical values and crash if they occur. */ -// #define GIZMO_UNPHYSICAL_ERROR +//#define GIZMO_UNPHYSICAL_ERROR /* Check for unphysical values and reset them to safe values. */ #define GIZMO_UNPHYSICAL_RESCUE /* Show a warning message if an unphysical value was reset (only works if @@ -65,9 +65,9 @@ /* Parameters that control how GIZMO handles pathological particle configurations. */ /* Show a warning message if a pathological configuration has been detected. */ -// #define GIZMO_PATHOLOGICAL_WARNING +//#define GIZMO_PATHOLOGICAL_WARNING /* Crash if a pathological configuration has been detected. */ -// #define GIZMO_PATHOLOGICAL_ERROR +//#define GIZMO_PATHOLOGICAL_ERROR /* Maximum allowed gradient matrix condition number. If the condition number of the gradient matrix (defined in equation C1 in Hopkins, 2015) is larger than this value, we artificially increase the number of neighbours to get a more @@ -91,16 +91,16 @@ /* Options to control SHADOWFAX_SPH */ /* This option disables cell movement */ -// #define SHADOWFAX_FIX_CELLS +//#define SHADOWFAX_FIX_CELLS /* This option enables cell steering, i.e. trying to keep the cells regular by adding a correction to the cell velocities.*/ #define SHADOWFAX_STEER_CELL_MOTION /* This option evolves the total energy instead of the thermal energy */ -// #define SHADOWFAX_TOTAL_ENERGY +//#define SHADOWFAX_TOTAL_ENERGY /* Source terms */ #define SOURCETERMS_NONE -// #define SOURCETERMS_SN_FEEDBACK +//#define SOURCETERMS_SN_FEEDBACK /* GRACKLE doesn't really like exact zeroes, so use something * comparatively small instead. */ diff --git a/src/distributed_io.c b/src/distributed_io.c index 7a2833fa5d..b60958a3be 100644 --- a/src/distributed_io.c +++ b/src/distributed_io.c @@ -62,7 +62,7 @@ #include "xmf.h" /* Are we timing the i/o? */ -// #define IO_SPEED_MEASUREMENT +//#define IO_SPEED_MEASUREMENT /** * @brief Writes a data array in given HDF5 group. diff --git a/src/fof.c b/src/fof.c index 3bfcc105e5..50c2f726cb 100644 --- a/src/fof.c +++ b/src/fof.c @@ -56,7 +56,7 @@ #define FOF_COMPRESS_PATHS_MIN_LENGTH (2) /* Are we timing calculating group properties in the FOF? */ -// #define WITHOUT_GROUP_PROPS +//#define WITHOUT_GROUP_PROPS /** * @brief Properties of a group used for black hole seeding diff --git a/src/hydro/Shadowswift/voronoi3d_algorithm.h b/src/hydro/Shadowswift/voronoi3d_algorithm.h index 449e1f22a3..37d7730545 100644 --- a/src/hydro/Shadowswift/voronoi3d_algorithm.h +++ b/src/hydro/Shadowswift/voronoi3d_algorithm.h @@ -31,7 +31,7 @@ #include /* For debugging purposes */ -// #define LOOP_CHECK 1000 +//#define LOOP_CHECK 1000 #ifdef LOOP_CHECK /* We need to do the trickery below to get a unique counter for each call to the @@ -75,7 +75,7 @@ __attribute__((always_inline)) INLINE int check_counter(int *counter, /* This flag activates a number of expensive geometrical checks that help finding bugs. */ -// #define VORONOI3D_EXPENSIVE_CHECKS +//#define VORONOI3D_EXPENSIVE_CHECKS /* Tolerance parameter used to decide when to use more precise geometric criteria */ diff --git a/src/lightcone/lightcone.c b/src/lightcone/lightcone.c index d04076384a..8c244b0c71 100644 --- a/src/lightcone/lightcone.c +++ b/src/lightcone/lightcone.c @@ -59,7 +59,7 @@ #include "units.h" /* Whether to dump the replication list */ -// #define DUMP_REPLICATIONS +//#define DUMP_REPLICATIONS #ifdef DUMP_REPLICATIONS static int output_nr = 0; #endif diff --git a/src/parallel_io.c b/src/parallel_io.c index 3575db2240..bc81e1bbc4 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -69,7 +69,7 @@ #define HDF5_PARALLEL_IO_MAX_BYTES 2147000000LL /* Are we timing the i/o? */ -// #define IO_SPEED_MEASUREMENT +//#define IO_SPEED_MEASUREMENT /** * @brief Reads a chunk of data from an open HDF5 dataset From 7c62a50ef0d48391423c2acb970965a7ad762eb5 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Thu, 24 Aug 2023 10:31:11 +0100 Subject: [PATCH 39/49] fix: Adding defs of idx_t --- src/partition.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/partition.c b/src/partition.c index 40286a737a..a599df6010 100644 --- a/src/partition.c +++ b/src/partition.c @@ -67,8 +67,14 @@ #include "threadpool.h" #include "tools.h" -#define IDX_MAX INT32_MAX -#define IDX_MIN INT32_MIN +#ifndef idx_t +#define idx_t SCOTCH_Idx +#endif + +#ifndef IDX_MAX +#define IDX_MAX SCOTCH_NUMMAX +#endif + /* Simple descriptions of initial partition types for reports. */ const char *initial_partition_name[] = { @@ -1615,7 +1621,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, NULL, edgenbr, edgetab, edlotab) != 0) { error("Error: Cannot build Scotch Graph.\n"); } - // #ifdef SWIFT_DEBUG_CHECKS + #ifdef SWIFT_DEBUG_CHECKS SCOTCH_graphCheck(&graph); static int partition_count = 0; char fname[200]; @@ -1629,7 +1635,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, printf("Error: Cannot save Scotch Graph.\n"); } fclose(graph_file); - // #endif + #endif /* Read in architecture graph. */ SCOTCH_Arch archdat; /* Load the architecture graph in .tgt format */ @@ -1653,13 +1659,13 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, /* Map the computation graph to the architecture graph */ if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) error("Error Scotch mapping failed."); - // #ifdef SWIFT_DEBUG_CHECKS + #ifdef SWIFT_DEBUG_CHECKS SCOTCH_Mapping mappptr; SCOTCH_graphMapInit(&graph, &mappptr, &archdat, regionid); FILE *map_stats = fopen("map_stats.out", "w"); SCOTCH_graphMapView(&graph, &mappptr, map_stats); fclose(map_stats); - // #endif + #endif /* Check that the regionids are ok. */ for (int k = 0; k < ncells; k++) { if (regionid[k] < 0 || regionid[k] >= nregions) { @@ -1700,7 +1706,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, struct weights_mapper_data { double *weights_e; double *weights_v; - int *inds; + idx_t *inds; int eweights; int nodeID; int timebins; From 6621820b7c873b6569675147086618cc36aeadb1 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Thu, 24 Aug 2023 15:18:36 +0100 Subject: [PATCH 40/49] fix: Check for METIS and PARMETIS --- src/partition.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/partition.c b/src/partition.c index a599df6010..e3a087ee34 100644 --- a/src/partition.c +++ b/src/partition.c @@ -67,13 +67,13 @@ #include "threadpool.h" #include "tools.h" -#ifndef idx_t -#define idx_t SCOTCH_Idx -#endif -#ifndef IDX_MAX +#ifdef HAVE_SCOTCH +#if !defined(HAVE_METIS) && !defined(HAVE_PARMETIS) +#define idx_t SCOTCH_Idx #define IDX_MAX SCOTCH_NUMMAX #endif +#endif /* Simple descriptions of initial partition types for reports. */ @@ -1394,7 +1394,7 @@ static void pick_metis(int nodeID, struct space *s, int nregions, * @param nxadj the number of xadj element used. */ static void graph_init_scotch(struct space *s, int periodic, - SCOTCH_Num *weights_e, SCOTCH_Num *adjncy, + SCOTCH_Num *weights_e, idx_t *adjncy, int *nadjcny, SCOTCH_Num *xadj, int *nxadj) { /* Loop over all cells in the space. */ @@ -1488,8 +1488,8 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, if ((xadj = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (ncells + 1))) == NULL) error("Failed to allocate xadj buffer."); - SCOTCH_Num *adjncy; - if ((adjncy = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * 26 * ncells)) == + idx_t *adjncy; + if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL) error("Failed to allocate adjncy array."); SCOTCH_Num *weights_v = NULL; @@ -1738,7 +1738,7 @@ void partition_gather_weights(void *map_data, int num_elements, double *weights_e = mydata->weights_e; double *weights_v = mydata->weights_v; - int *inds = mydata->inds; + idx_t *inds = mydata->inds; int eweights = mydata->eweights; int nodeID = mydata->nodeID; int nr_cells = mydata->nr_cells; @@ -2224,8 +2224,8 @@ static void repart_scotch(int vweights, int eweights, int timebins, /* Allocate and fill the adjncy indexing array defining the graph of * * cells. */ - SCOTCH_Num *inds; - if ((inds = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * 26 * nr_cells)) == NULL) + idx_t *inds; + if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 * nr_cells)) == NULL) error("Failed to allocate the inds array"); int nadjcny = 0; int nxadj = 0; @@ -2480,7 +2480,6 @@ void partition_repartition(struct repartition *reparttype, int nodeID, error("SWIFT was not compiled with METIS, ParMETIS or Scotch support."); #endif } - /** * @brief Initial partition of space cells. * From 54c5ac4a63728a8f1c0c2518d4fe6f9f36e7bc6e Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Thu, 24 Aug 2023 15:20:03 +0100 Subject: [PATCH 41/49] adding scotch repartition_name[] --- src/partition.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/partition.c b/src/partition.c index e3a087ee34..95aeeb8bc9 100644 --- a/src/partition.c +++ b/src/partition.c @@ -87,7 +87,8 @@ const char *initial_partition_name[] = { const char *repartition_name[] = { "none", "edge and vertex task cost weights", "task cost edge weights", "memory balanced, using particle vertex weights", - "vertex task costs and edge delta timebin weights"}; + "vertex task costs and edge delta timebin weights", + "scotch mapping, edge and vertex cost weights"}; /* Local functions, if needed. */ static int check_complete(struct space *s, int verbose, int nregions); From 72ac2c9b41f20782b9877e1db0a53da5ac62675c Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 9 Oct 2023 14:38:42 +0100 Subject: [PATCH 42/49] Update README-Scotch.md (#28) * Fix for infinite loop in Brent's method * Fixed issues with missing unlocks/dependencies/debug checks for top level cells which aren't also super cells * GEAR-RT: theory * Deadlock detector * Update spin/jet AGN model * corrected number of items to write in tracers_io.h * Fix new time-step for spin-jet scheme * Grackle Cooling updates * Make the EAGLE_ICs example load the PS2020 cooling tables and not the Wiersma+09 ones * Update EAGLE_ICs README to mention how to load the Wiersma tables if needed * Add a new potential MWPotential2014 * Fix jet initial velocities * Fix EAGLE tracers' list of outputs * Add forcing terms to the hydro equations * Add barotropic EoS * Add a function so that we can mark memory as deliberately leaked at the end of the program. * Update README-Scotch.md * Update README-Scotch.md Co-authored-by: Ilektra Christidi * Update README-Scotch.md Co-authored-by: Ilektra Christidi * Update README-Scotch.md Co-authored-by: Ilektra Christidi * Update README-Scotch.md Co-authored-by: Ilektra Christidi --------- Co-authored-by: Yolan Uyttenhove Co-authored-by: Matthieu Schaller Co-authored-by: Mladen Ivkovic Co-authored-by: Filip Husko Co-authored-by: Yves Revaz Co-authored-by: Darwin Co-authored-by: Ilektra Christidi --- README-Scotch.md | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/README-Scotch.md b/README-Scotch.md index 497684f380..869b5c6909 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -40,10 +40,11 @@ Follow the usual installation [instructions](https://gitlab.cosma.dur.ac.uk/swif Running with Scotch ---------------- -Scotch carries out a _mapping_ of a _source_ (or process) graph onto a _target_ (or architecture) graph. The weighted _source_ graph is generated by SWIFT and it captures the computation and communication cost across the computational domain. The _target_ graph defines the communication cost across the available computing architecture. Therefore, to make use of the Scotch _mapping_ alogrithms a target architecture file (_target.tgt_) must be generated and it should mirror the set up of the cluster being used. Scotch provides optimised architecture files which capture most HPC set ups. As we will be targetting NUMA regions on Cosma 8 we have modelled the architecture as a `tleaf` structure. +Scotch carries out a _mapping_ of a _source_ (or process) graph onto a _target_ (or architecture) graph. The weighted _source_ graph is generated by SWIFT and it captures the computation and communication cost across the computational domain. The _target_ graph defines the communication cost across the available computing architecture. Therefore, to make use of the Scotch _mapping_ alogrithms a target architecture file (_target.tgt_) must be generated and it should mirror the set up of the cluster being used. Scotch provides optimised architecture files which capture most HPC set ups. For Cosma8 runs we will be targetting NUMA regions therefore we have modelled the architecture as a `tleaf` structure. -In the following examples it is assumed that one mpi rank is mapped to each Cosma 8 NUMA region. This enforces that `cpus-per-task=16` is defined in the SLURM submission script. The Cosma 8 nodes consist of 8 NUMA regions per node, with 4 NUMA regions per socket. Example `tleaf`files for various setups are given below, where the intrasocket communication cost between NUMA regions is set at _5_, intranode but across sockets is set at _10_ and the internode cost is set at _1000_. These weightings are estimated values but have been shown to give satisfactory results in the testcases explored. +In the following examples it is assumed that one mpi rank is mapped to each Cosma 8 NUMA region. This enforces that `cpus-per-task=16` is defined in the SLURM submission script. The Cosma 8 nodes consist of 8 NUMA regions per node, with 4 NUMA regions per socket. Example `tleaf` files for various setups are given below, where the intrasocket communication cost between NUMA regions is set at _5_, intranode but across sockets is set at _10_ and the internode cost is set at _1000_. These weightings are estimated values but have been shown to give satisfactory results in the testcases explored. An estimate of the relative latency between NUMA regions on a node can be obtained by using [hwloc](https://github.com/open-mpi/hwloc), specifically by using `hwloc-distances`. +**Example tleaf graphs to represent various Cosma8 configurations** | Number of nodes | Number of MPI ranks | tleaf | | --------------- | ------------------- | ----------------------- | | 1 | 2 | tleaf 1 2 5 | @@ -52,17 +53,39 @@ In the following examples it is assumed that one mpi rank is mapped to each Cosm | 4 | 32 | tleaf 3 4 1000 2 10 4 5 | | 8 | 64 | tleaf 3 8 1000 2 10 4 5 | -The user needs to define this tleaf structure and save it as `target.tgt` in the directory they will run SWIFT from. Ongoing work focuses on automatically generating this target architecture upon run time. +The first index denotes the number of layers of the tleaf structure and the subsequent index pairs are the number of nodes in a layer and the cost to communicate between them. For example the numbers represented in the 64 MPI rank case (`tleaf 3 8 1000 2 10 4 5`) are as follows: reading left to right the `3` denotes the number of layers in the leaf structure, `8` denotes the number of vertices in the first layer (which corresponds to 8 compute nodes), `1000` the relative cost for internode communication, `2` denotes the number of vertices in the second layer (number of sockets on each node), `10` relative cost for intersocket communication, `4` denotes the number of vertices in the final layer (number of NUMA regions per socket on cosma8) and finally `5` is the relative cost of intrasocket communication. +The user needs to define this tleaf structure and save it as `target.tgt` in the directory they will run SWIFT from. Ongoing work focuses on automatically generating this target architecture upon run time. With OpenMPI the `mpirun` option `--map-by numa` has been found to be optimal. This is in contrast to previously suggested `--bind-to none` on the cosma8 [site](https://www.dur.ac.uk/icc/cosma/support/cosma8/). Scotch details ---------------- -Scotch carries out the mapping using various strategies which are outlined in the documentation. The Scotch strategy is passed to the Mapping functions. For the runs carried out here it was found that the global flag `SCOTCH_STRATBALANCE` and a imbalance ratio of `0.05` worked best. These values are passed to `SCOTCH_stratGraphMapBuild`. +Scotch carries out the mapping using various strategies which are outlined in the [documentation](https://gitlab.inria.fr/scotch/scotch/-/blob/master/doc/scotch_user7.0.pdf), a list of the strategies trialed include: `SCOTCH_STRATDEFAULT`, `SCOTCH_STRATBALANCE`, `SCOTCH_STRATQUALITY` and `SCOTCH_STRATSPEED`. The Scotch strategy string is passed to the Mapping functions. For the runs carried out here it was found that the global flag `SCOTCH_STRATBALANCE` and an imbalance ratio of `0.05` worked best. These values are passed to [`SCOTCH_stratGraphMapBuild`](https://github.com/UCL/swiftsim/blob/cb06b0e5c3d8457c474d0084d973f437d29b20d8/src/partition.c#L1657). + +One issue with Scotch is that when the number of mpi ranks is comparable to the dimensionality of the modelled SWIFT system the optimal mapping strategy doesn't neccessarily map to all available NUMA regions. At present this isn't handled explicity in the code and the paritition reverts to a vectorised or previous partitioning. + +The SWIFT edge and vertex weights are estimated in the code, however edge weights are not symmetric - this causes an issue with SWIFT. Therefore, in the SCOTCH Graph the edge weigths are updated to equal to the sum of the two associated edge weights as calculated from SWIFT. + +Test Runs +---------------- +The following results were obtained on cosma8 running with the `SCOTCH_STRATBALANCE` strategy string and an imbalance ratio of `0.05`. + + +| Testcase | Resources | flags | Scotch (s) | Metis (s) | +| -------- | --------------------------- | -------------- | ---------- | -------- | +| EAGLE_6 | nodes = 1 (8 NUMA regions) | `--map_by numa` | 1307.8 | 1401.3 | +| EAGLE_6 | nodes = 2 (16 NUMA regions) | `--map_by numa` | 1294.6 | 1314.2 | +| EAGLE_25 | nodes = 2 (16 NUMA regions) | `--map_by numa` | 8381.4 | 8420.6 | +| EAGLE_50 | nodes = 2 (16 NUMA regions) | `--map_by numa` | 69312.1 | 67273.6 | +| EAGLE_50 | nodes = 4 (32 NUMA regions) | `--map_by numa` | 51803.8 | 51058.3 | +| EAGLE_50 | nodes = 8 (64 NUMA regions) | `--map_by numa` | 41941.1 | 42700.5 | + +Notes +---------------- -One issue with Scotch is that when the number of mpi ranks is comparable to the dimensionality of the modelled SWIFT system the optimally mapping strategy doesn't neccessarily map to all available NUMA regions. At present this isn't handled explicity in the code and the paritition reverts to a vectorised or previous partitioning. +1. Implementing the parallel version [PT-Scotch](https://inria.hal.science/hal-00402893) should improve performance across large node count runs. +2. Further improvement could be achieved by accurately obtaining the cost to communicate across the resources provided by the scheduler at runtime. The above approach using the pre generated `tleaf` file is an approximation and tools like [netloc](https://www.open-mpi.org/projects/netloc/), which derive from the network fabric representive latency values would be the optimal solution. To begin this would require admin access to run the set up commands to generate an overall graph of the whole HPC. This graph structure is then referenced on run time with the allocated nodes ids to build up an accurate reprensentation of the available compute. -The SWIFT edge and vertex weights are estimated in the code, however edge weights are not symmetric - this causes an issue with SWIFT. Therefore, in the SCOTCH Graph the edge weigths are updated to equal to the sum of the two associated edge weights as calculated from SWIFT. From f660b0a0fc96d36b76111eec97e237d68fb070fb Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 3 Nov 2023 12:25:24 +0000 Subject: [PATCH 43/49] fix: Passing architecture file as flag (#27) * fix: Passing architecture file as flag * Fixed prevailing SCOTCH invocation with configure script; made ArchData globally visible. * Fixed picking scotch when also have metis/parmetis * Fixed picking scotch when also have metis/parmetis * Fixed populating partition struct with scotch target arch file before calling initial partitioning. * Prevent destruction and empty regeneration of archdat structure. * Edited swift_fof.c to match target file updates of swift.c * Minor edits: replaced a HAVE_PARMETIS with a HAVE_METIS, edit a message to show either SCOTCH or METIS used, changed strategy flag from SCOTCH_STRATQUALITY to SCOTCH_STRATBALANCE * Added a mesage to notify when a successful SCOTCH repartition takes place * Dropping reference to METIS in pick_scotch * Changed format specifiers for SCOTCH weights reporting in debug check - now acceptable for both 32 and 64 bit versions. * Merged two separate ifdef chunks of HAVE_SCOTCH for better visibility * Dressed up naked mallocs; averaged [div by 2.0] the edge loads; removed commented out and unused code. * Updated string for averaged and not summed weights --------- Co-authored-by: Dan Giles Co-authored-by: Themis Kotsialos --- src/debug.c | 3 +- src/debug.h | 6 ++- src/partition.c | 129 ++++++++++++++++++++++++++---------------------- src/partition.h | 1 + src/version.c | 9 ++-- swift.c | 28 +++++++++-- swift_fof.c | 27 ++++++++-- 7 files changed, 128 insertions(+), 75 deletions(-) diff --git a/src/debug.c b/src/debug.c index 978da76415..e536daceed 100644 --- a/src/debug.c +++ b/src/debug.c @@ -637,7 +637,8 @@ void dumpCells(const char *prefix, int super, int active, int mpiactive, fclose(file); } -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) \ + && !defined(HAVE_SCOTCH) /** * @brief Dump a graph in METIS standard format, simple format and weights diff --git a/src/debug.h b/src/debug.h index 4ec9dc8127..3f8d8e5037 100644 --- a/src/debug.h +++ b/src/debug.h @@ -39,8 +39,10 @@ int checkCellhdxmax(const struct cell *c, int *depth); void dumpCells(const char *prefix, int super, int active, int mpiactive, int pactive, struct space *s, int rank, int step); -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) -#include "metis.h" +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) \ + && !defined(HAVE_SCOTCH) +typedef int64_t idx_t; +#include void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt); #endif diff --git a/src/partition.c b/src/partition.c index 95aeeb8bc9..031cbbb8d6 100644 --- a/src/partition.c +++ b/src/partition.c @@ -45,15 +45,21 @@ #ifdef WITH_MPI #include /* METIS/ParMETIS headers only used when MPI is also available. */ -#ifdef HAVE_PARMETIS +#if defined(HAVE_PARMETIS) && !defined(HAVE_SCOTCH) #include #endif -#ifdef HAVE_METIS +#if defined(HAVE_METIS) && !defined(HAVE_SCOTCH) #include #endif /* SCOTCH headers only used when MPI is also available. */ #ifdef HAVE_SCOTCH #include +typedef int64_t idx_t; +#define IDX_T MPI_INT +#define idx_t SCOTCH_Idx +#define IDX_MAX SCOTCH_NUMMAX +SCOTCH_Arch the_archdat; +SCOTCH_Arch *p_archdat = &the_archdat; #endif #endif @@ -67,15 +73,6 @@ #include "threadpool.h" #include "tools.h" - -#ifdef HAVE_SCOTCH -#if !defined(HAVE_METIS) && !defined(HAVE_PARMETIS) -#define idx_t SCOTCH_Idx -#define IDX_MAX SCOTCH_NUMMAX -#endif -#endif - - /* Simple descriptions of initial partition types for reports. */ const char *initial_partition_name[] = { "axis aligned grids of cells", "vectorized point associated cells", @@ -195,7 +192,8 @@ static void split_vector(struct space *s, int nregions, int *samplecells) { * the cells next updates. */ -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) && !defined(HAVE_SCOTCH) + /** * @brief Fill the adjncy array defining the graph of cells in a space. * @@ -692,7 +690,7 @@ void permute_regions(int *newlist, int *oldlist, int nregions, int ncells, } #endif -#if defined(WITH_MPI) && defined(HAVE_PARMETIS) +#if defined(WITH_MPI) && defined(HAVE_PARMETIS) && !defined(HAVE_SCOTCH) /** * @brief Partition the given space into a number of connected regions using * ParMETIS. @@ -1203,7 +1201,7 @@ static void pick_parmetis(int nodeID, struct space *s, int nregions, } #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) && !defined(HAVE_SCOTCH) /** * @brief Partition the given space into a number of connected regions. * @@ -1471,13 +1469,13 @@ static void graph_init_scotch(struct space *s, int periodic, * sizeof number of cells. */ static void pick_scotch(int nodeID, struct space *s, int nregions, - double *vertexw, double *edgew, int *celllist) { + double *vertexw, double *edgew, int *celllist, + SCOTCH_Arch *archdat) { /* Total number of cells. */ int ncells = s->cdim[0] * s->cdim[1] * s->cdim[2]; - /* Nothing much to do if only using a single partition. Also avoids METIS - * bug that doesn't handle this case well. */ + /* Nothing much to do if only using a single partition. */ if (nregions == 1) { for (int i = 0; i < ncells; i++) celllist[i] = 0; return; @@ -1526,7 +1524,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, failed++; } if (weights_v[k] < 0) { - message("Used vertex weight out of range: %d", weights_v[k]); + message("Used vertex weight out of range: %ld", (long)weights_v[k]); failed++; } } @@ -1555,7 +1553,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, failed++; } if (weights_e[k] < 1) { - message("Used edge weight out of range: %" PRIDX, weights_e[k]); + message("Used edge weight out of range: %" "I64d", (long long)weights_e[k]); failed++; } } @@ -1575,19 +1573,24 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Num edgenbr = (26 * vertnbr); /* Number of edges (arcs) */ SCOTCH_Num *verttab; /* Vertex array [vertnbr + 1] */ - verttab = (SCOTCH_Num *)malloc((vertnbr + 1) * sizeof(SCOTCH_Num)); - - // SCOTCH_Num *vendtab; /* Vertex array [vertnbr] */ - // vendtab = (SCOTCH_Num*) malloc((vertnbr) * sizeof(SCOTCH_Num)); + if ((verttab = (SCOTCH_Num *)malloc((vertnbr + 1) * sizeof(SCOTCH_Num))) == NULL) { + error("Failed to allocate Vertex array"); + } SCOTCH_Num *velotab; /* Vertex load array */ - velotab = (SCOTCH_Num *)malloc((vertnbr) * sizeof(SCOTCH_Num)); + if ((velotab = (SCOTCH_Num *)malloc((vertnbr) * sizeof(SCOTCH_Num))) == NULL) { + error("Failed to allocate Vertex load array"); + } SCOTCH_Num *edgetab; /* Edge array [edgenbr] */ - edgetab = (SCOTCH_Num *)malloc((edgenbr) * sizeof(SCOTCH_Num)); + if ((edgetab = (SCOTCH_Num *)malloc((edgenbr) * sizeof(SCOTCH_Num))) == NULL) { + error("Failed to allocate Edge array"); + } SCOTCH_Num *edlotab; /* Int load of each edge */ - edlotab = (SCOTCH_Num *)malloc((edgenbr) * sizeof(SCOTCH_Num)); + if ((edlotab = (SCOTCH_Num *)malloc((edgenbr) * sizeof(SCOTCH_Num))) == NULL) { + error("Failed to allocate Edge Load array"); + } int edges_deg = 26; for (int i = 0; i < vertnbr; i++) { @@ -1599,7 +1602,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, int vertex_count = 0; int neighbour; int return_edge; - /* The bidirectional weights associated with an edge are summed to ensure + /* The bidirectional weights associated with an edge are averaged to ensure that the resultant edges are symmetric. This is a neccessary for a Scotch graph. */ for (int i = 0; i < edgenbr; i++) { @@ -1613,7 +1616,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, return_edge = (neighbour * edges_deg + j); } } - edlotab[i] = weights_e[i] + weights_e[return_edge]; + edlotab[i] = (SCOTCH_Num)((weights_e[i] + weights_e[return_edge]) / 2.0); } SCOTCH_graphInit(&graph); @@ -1637,50 +1640,42 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, } fclose(graph_file); #endif - /* Read in architecture graph. */ - SCOTCH_Arch archdat; - /* Load the architecture graph in .tgt format */ - FILE *arch_file = fopen("target.tgt", "r"); - if (arch_file == NULL) { - printf("Error: Cannot open topo file.\n"); - } - if (SCOTCH_archLoad(&archdat, arch_file) != 0) - error("Error loading architecture graph"); /* Initialise in strategy. */ SCOTCH_Strat stradat; SCOTCH_stratInit(&stradat); SCOTCH_Num num_vertices; + // Choose between different strategies: + // e.g., SCOTCH_STRATQUALITY, SCOTCH_STRATBALANCE, etc. + // SCOTCH_STRATBALANCE seems to be the best choice. SCOTCH_Num flagval = SCOTCH_STRATBALANCE; - - num_vertices = SCOTCH_archSize(&archdat); + num_vertices = SCOTCH_archSize(archdat); if (SCOTCH_stratGraphMapBuild(&stradat, flagval, num_vertices, 0.05) != 0) error("Error setting the Scotch mapping strategy."); /* Map the computation graph to the architecture graph */ - if (SCOTCH_graphMap(&graph, &archdat, &stradat, regionid) != 0) + if (SCOTCH_graphMap(&graph, archdat, &stradat, regionid) != 0) error("Error Scotch mapping failed."); #ifdef SWIFT_DEBUG_CHECKS SCOTCH_Mapping mappptr; - SCOTCH_graphMapInit(&graph, &mappptr, &archdat, regionid); + SCOTCH_graphMapInit(&graph, &mappptr, archdat, regionid); FILE *map_stats = fopen("map_stats.out", "w"); SCOTCH_graphMapView(&graph, &mappptr, map_stats); fclose(map_stats); #endif /* Check that the regionids are ok. */ for (int k = 0; k < ncells; k++) { - if (regionid[k] < 0 || regionid[k] >= nregions) { - // error("Got bad nodeID for cell"); - printf("Bad Vertex %d is assigned to architecture block %d\n", k, - regionid[k]); - } + if (regionid[k] < 0 || regionid[k] >= nregions) + error("Got bad nodeID for cell %i.", k); /* And keep. */ celllist[k] = regionid[k]; } SCOTCH_graphExit(&graph); SCOTCH_stratExit(&stradat); - SCOTCH_archExit(&archdat); - fclose(arch_file); + /* We will not be calling SCOTCH_archExit(archdat): + * this would destroy the contents of the archdat structure. + * The above two Scotch ...Exit() calls destroy localy defined + * structs, so they are OK to call. */ if (verttab != NULL) free(verttab); if (velotab != NULL) free(velotab); @@ -1886,7 +1881,7 @@ void partition_gather_weights(void *map_data, int num_elements, #endif -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) && !defined(HAVE_SCOTCH) /** * @brief Repartition the cells amongst the nodes using weights of * various kinds. @@ -2058,7 +2053,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, } /* And repartition/ partition, using both weights or not as requested. */ -#ifdef HAVE_PARMETIS +#if defined(HAVE_PARMETIS) && !defined(HAVE_SCOTCH) if (repartition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); @@ -2067,7 +2062,7 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, repartition->adaptive, repartition->itr, repartition->celllist); } -#else +#elif !defined(HAVE_SCOTCH) pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); #endif @@ -2156,7 +2151,7 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID, } /* And repartition. */ -#ifdef HAVE_PARMETIS +#if defined(HAVE_PARMETIS) && !defined(HAVE_SCOTCH) if (repartition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights, NULL, repartition->celllist); } else { @@ -2164,7 +2159,7 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID, repartition->adaptive, repartition->itr, repartition->celllist); } -#else +#elif !defined(HAVE_SCOTCH) pick_metis(nodeID, s, nr_nodes, weights, NULL, repartition->celllist); #endif @@ -2201,7 +2196,7 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID, /* And apply to our cells */ split_metis(s, nr_nodes, repartition->celllist); } -#endif /* WITH_MPI && HAVE_METIS || HAVE_PARMETIS */ +#endif /* WITH_MPI && HAVE_METIS || HAVE_PARMETIS && !defined(HAVE_SCOTCH) */ #if WITH_MPI && HAVE_SCOTCH /** @@ -2373,7 +2368,7 @@ static void repart_scotch(int vweights, int eweights, int timebins, /* And repartition/ partition, using both weights or not as requested. */ #ifdef HAVE_SCOTCH - pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist); + pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, repartition->celllist, p_archdat); #endif /* Check that all cells have good values. All nodes have same copy, so just * * check on one. */ @@ -2399,10 +2394,13 @@ static void repart_scotch(int vweights, int eweights, int timebins, if (failed) { if (nodeID == 0) message( - "WARNING: repartition has failed, continuing with the current" + "WARNING: SCOTCH repartition has failed, continuing with the current" " partition, load balance will not be optimal"); for (int k = 0; k < nr_cells; k++) repartition->celllist[k] = cells[k].nodeID; + } else { + if (nodeID == 0) + message("SCOTCH repartition successful."); } /* And apply to our cells */ @@ -2432,7 +2430,7 @@ void partition_repartition(struct repartition *reparttype, int nodeID, int nr_nodes, struct space *s, struct task *tasks, int nr_tasks) { -#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) +#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) && !defined(HAVE_SCOTCH) ticks tic = getticks(); if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) { @@ -2571,7 +2569,16 @@ void partition_initial_partition(struct partition *initial_partition, if ((celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) error("Failed to allocate celllist"); #ifdef HAVE_SCOTCH - pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, celllist); + //SCOTCH_Arch archdat; + FILE *arch_file = fopen(initial_partition->target_arch_file, "r"); + if (arch_file == NULL) + error("Error: Cannot open topo file."); + /* Load the architecture graph in .tgt format */ + if (SCOTCH_archLoad(p_archdat, arch_file) != 0) + error("Error loading architecture graph"); + fclose(arch_file); + pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, celllist, p_archdat); + //pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, celllist); #elif HAVE_PARMETIS if (initial_partition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); @@ -2589,7 +2596,11 @@ void partition_initial_partition(struct partition *initial_partition, * proceeding. */ if (!check_complete(s, (nodeID == 0), nr_nodes)) { if (nodeID == 0) +#ifdef HAVE_SCOTCH + message("SCOTCH initial partition failed, using a vectorised partition"); +#else message("METIS initial partition failed, using a vectorised partition"); +#endif initial_partition->type = INITPART_VECTORIZE; partition_initial_partition(initial_partition, nodeID, nr_nodes, s); } @@ -2648,7 +2659,7 @@ void partition_init(struct partition *partition, #ifdef WITH_MPI /* Defaults make use of METIS if available */ -#if defined(HAVE_METIS) || defined(HAVE_PARMETIS) +#if ((defined(HAVE_METIS) || defined(HAVE_PARMETIS)) && !defined(HAVE_SCOTCH)) const char *default_repart = "fullcosts"; const char *default_part = "edgememory"; #elif defined(HAVE_SCOTCH) diff --git a/src/partition.h b/src/partition.h index 962bfca7e6..5a7d69cc84 100644 --- a/src/partition.h +++ b/src/partition.h @@ -40,6 +40,7 @@ struct partition { enum partition_type type; int grid[3]; int usemetis; + char target_arch_file[PARSER_MAX_LINE_SIZE]; }; /* Repartition type to use. */ diff --git a/src/version.c b/src/version.c index 9d8642573d..0a8bc630e9 100644 --- a/src/version.c +++ b/src/version.c @@ -24,10 +24,11 @@ /* MPI headers. */ #ifdef WITH_MPI #include -#ifdef HAVE_METIS +#if defined(HAVE_METIS) && !defined(HAVE_SCOTCH) #include #endif -#ifdef HAVE_PARMETIS +#if defined(HAVE_PARMETIS) +#include #include #endif #endif @@ -318,7 +319,7 @@ const char *hdf5_version(void) { const char *metis_version(void) { static char version[256] = {0}; -#if defined(WITH_MPI) && defined(HAVE_METIS) +#if defined(WITH_MPI) && defined(HAVE_METIS) && !defined(HAVE_SCOTCH) sprintf(version, "%i.%i.%i", METIS_VER_MAJOR, METIS_VER_MINOR, METIS_VER_SUBMINOR); #else @@ -335,7 +336,7 @@ const char *metis_version(void) { const char *parmetis_version(void) { static char version[256] = {0}; -#if defined(WITH_MPI) && defined(HAVE_PARMETIS) +#if defined(WITH_MPI) && defined(HAVE_PARMETIS) && !defined(HAVE_SCOTCH) sprintf(version, "%i.%i.%i", PARMETIS_MAJOR_VERSION, PARMETIS_MINOR_VERSION, PARMETIS_SUBMINOR_VERSION); #else diff --git a/swift.c b/swift.c index d5e20ef16e..7d935de1fc 100644 --- a/swift.c +++ b/swift.c @@ -205,6 +205,7 @@ int main(int argc, char *argv[]) { char *output_parameters_filename = NULL; char *cpufreqarg = NULL; char *param_filename = NULL; + char *scotch_tgtfile = NULL; char restart_file[200] = ""; unsigned long long cpufreq = 0; float dump_tasks_threshold = 0.f; @@ -360,6 +361,10 @@ int main(int argc, char *argv[]) { "Fraction of the total step's time spent in a task to trigger " "a dump of the task plot on this step", NULL, 0, 0), + OPT_STRING('o', "scotch-target-file", &scotch_tgtfile, + "Target file of the architecture which is needed to carry " + "out Scotch mappings", + NULL, 0, 0), OPT_END(), }; struct argparse argparse; @@ -413,7 +418,6 @@ int main(int argc, char *argv[]) { /* Deal with thread numbers */ if (nr_pool_threads == -1) nr_pool_threads = nr_threads; - /* Write output parameter file */ if (myrank == 0 && output_parameters_filename != NULL) { io_write_output_field_parameter(output_parameters_filename, with_cosmology, with_fof, with_structure_finding); @@ -871,21 +875,35 @@ int main(int argc, char *argv[]) { struct repartition reparttype; #ifdef WITH_MPI struct partition initial_partition; +#if defined(HAVE_SCOTCH) + /* need to provide arch file name before partition_init() is called */ + if (scotch_tgtfile != NULL){ + strcpy(initial_partition.target_arch_file, scotch_tgtfile); + } else { + error("No Scotch target architecture file provided."); + } +#endif partition_init(&initial_partition, &reparttype, params, nr_nodes); /* Let's report what we did */ if (myrank == 0) { -#if defined(HAVE_PARMETIS) +#if defined(HAVE_PARMETIS) && !defined(HAVE_SCOTCH) if (reparttype.usemetis) message("Using METIS serial partitioning:"); else message("Using ParMETIS partitioning:"); -#elif defined(HAVE_METIS) +#elif defined(HAVE_METIS) && !defined(HAVE_SCOTCH) message("Using METIS serial partitioning:"); #elif defined(HAVE_SCOTCH) - message("Using SCOTCH serial partitioning:"); + message("Using Scotch serial mapping:"); + if (scotch_tgtfile != NULL){ + message("Using the Scotch Target file: %s", scotch_tgtfile); + /* strcpy(initial_partition.target_arch_file, scotch_tgtfile); */ + } else { /* extra failsafe check */ + error("Scotch mapping will fail: no target architecture file provided."); + } #else - message("Non-METIS partitioning:"); + message("Non-METIS and Non-SCOTCH partitioning:"); #endif message(" initial partitioning: %s", initial_partition_name[initial_partition.type]); diff --git a/swift_fof.c b/swift_fof.c index 04c44bcf76..4a56f07a83 100644 --- a/swift_fof.c +++ b/swift_fof.c @@ -154,6 +154,7 @@ int main(int argc, char *argv[]) { char *output_parameters_filename = NULL; char *cpufreqarg = NULL; char *param_filename = NULL; + char *scotch_tgtfile = NULL; unsigned long long cpufreq = 0; struct cmdparams cmdps; cmdps.nparam = 0; @@ -203,6 +204,10 @@ int main(int argc, char *argv[]) { OPT_INTEGER('Y', "threadpool-dumps", &dump_threadpool, "Time-step frequency at which threadpool tasks are dumped.", NULL, 0, 0), + OPT_STRING('o', "scotch-target-file", &scotch_tgtfile, + "Target file of the architecture which is needed to carry " + "out Scotch mappings", + NULL, 0, 0), OPT_END(), }; struct argparse argparse; @@ -376,21 +381,35 @@ int main(int argc, char *argv[]) { struct repartition reparttype; #ifdef WITH_MPI struct partition initial_partition; +#if defined(HAVE_SCOTCH) + /* need to provide arch file name before partition_init() is called */ + if (scotch_tgtfile != NULL){ + strcpy(initial_partition.target_arch_file, scotch_tgtfile); + } else { + error("No Scotch target architecture file provided."); + } +#endif partition_init(&initial_partition, &reparttype, params, nr_nodes); /* Let's report what we did */ if (myrank == 0) { -#if defined(HAVE_PARMETIS) +#if defined(HAVE_PARMETIS) && !defined(HAVE_SCOTCH) if (reparttype.usemetis) message("Using METIS serial partitioning:"); else message("Using ParMETIS partitioning:"); -#elif defined(HAVE_METIS) +#elif defined(HAVE_METIS) && !defined(HAVE_SCOTCH) message("Using METIS serial partitioning:"); #elif defined(HAVE_SCOTCH) - message("Using SCOTCH serial partitioning:"); + message("Using Scotch serial mapping:"); + if (scotch_tgtfile != NULL){ + message("Using the Scotch Target file: %s", scotch_tgtfile); + /* strcpy(initial_partition.target_arch_file, scotch_tgtfile); */ + } else { /* extra failsafe check */ + error("Scotch mapping will fail: no target architecture file provided."); + } #else - message("Non-METIS partitioning:"); + message("Non-METIS and Non-SCOTCH partitioning:"); #endif message(" initial partitioning: %s", initial_partition_name[initial_partition.type]); From 5d78376c11247213b68fc50acf4c5ac7764df144 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Mon, 6 Nov 2023 09:40:13 +0000 Subject: [PATCH 44/49] Initialised return_edge variable (#31) * Initialised return_edge variable * Removed a typedef for idx_t and a define for SCOTCH_Idx; * Removed typedef from metis/parmetis - only code chunk. --- src/debug.h | 1 - src/partition.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/debug.h b/src/debug.h index 3f8d8e5037..f2c93e5509 100644 --- a/src/debug.h +++ b/src/debug.h @@ -41,7 +41,6 @@ void dumpCells(const char *prefix, int super, int active, int mpiactive, #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) \ && !defined(HAVE_SCOTCH) -typedef int64_t idx_t; #include void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt); diff --git a/src/partition.c b/src/partition.c index 031cbbb8d6..65f350d8f1 100644 --- a/src/partition.c +++ b/src/partition.c @@ -54,8 +54,6 @@ /* SCOTCH headers only used when MPI is also available. */ #ifdef HAVE_SCOTCH #include -typedef int64_t idx_t; -#define IDX_T MPI_INT #define idx_t SCOTCH_Idx #define IDX_MAX SCOTCH_NUMMAX SCOTCH_Arch the_archdat; @@ -1601,7 +1599,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, int vertex_count = 0; int neighbour; - int return_edge; + int return_edge = 0; /* The bidirectional weights associated with an edge are averaged to ensure that the resultant edges are symmetric. This is a neccessary for a Scotch graph. */ From f559337e1666375f9f0683e0bc7b167666282969 Mon Sep 17 00:00:00 2001 From: Themis Kotsialos Date: Thu, 9 Nov 2023 15:13:23 +0000 Subject: [PATCH 45/49] Downsized IDX_MAX to INT_MAX, from SCOTCH_NUMMAX. --- src/partition.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/partition.c b/src/partition.c index 65f350d8f1..8993d862a1 100644 --- a/src/partition.c +++ b/src/partition.c @@ -55,7 +55,7 @@ #ifdef HAVE_SCOTCH #include #define idx_t SCOTCH_Idx -#define IDX_MAX SCOTCH_NUMMAX +#define IDX_MAX INT_MAX SCOTCH_Arch the_archdat; SCOTCH_Arch *p_archdat = &the_archdat; #endif From 04632532d47bcdb44fa41e5c84a83661809ad020 Mon Sep 17 00:00:00 2001 From: Themis Kotsialos Date: Mon, 13 Nov 2023 10:33:53 +0000 Subject: [PATCH 46/49] Defined IDX_MAX explicitly, using 32-bit value, define IDX_MAX (2147483647); makes scotch64 and scotch-local balance slightly better. --- src/partition.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/partition.c b/src/partition.c index 8993d862a1..e9aac228b1 100644 --- a/src/partition.c +++ b/src/partition.c @@ -55,7 +55,7 @@ #ifdef HAVE_SCOTCH #include #define idx_t SCOTCH_Idx -#define IDX_MAX INT_MAX +#define IDX_MAX (2147483647) SCOTCH_Arch the_archdat; SCOTCH_Arch *p_archdat = &the_archdat; #endif From f656d4dd0b10f10f1b1c4c069da970660256e51e Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Wed, 15 Nov 2023 16:07:57 +0000 Subject: [PATCH 47/49] Addressing comments on `configure.ac` and removal on unused `refine` (#33) * Update partition.c * Update configure.ac * Removal of refine in Scotch --- configure.ac | 2 +- src/partition.c | 49 ++++++++++++++++++++----------------------------- 2 files changed, 21 insertions(+), 30 deletions(-) diff --git a/configure.ac b/configure.ac index edfbce3a2e..f71bae899c 100644 --- a/configure.ac +++ b/configure.ac @@ -809,7 +809,7 @@ if test "x$with_scotch" != "xno"; then SCOTCH_LIBS="-L$with_scotch/lib -lscotch -lscotcherr" SCOTCH_INCS="-I$with_scotch/include" else - SCOTCH_LIBS="-lscotch" + SCOTCH_LIBS="-lscotch -lscotcherr" SCOTCH_INCS="" fi AC_CHECK_LIB([scotch],[SCOTCH_graphInit], [have_scotch="yes"], diff --git a/src/partition.c b/src/partition.c index e9aac228b1..e395f98325 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1366,18 +1366,14 @@ static void pick_metis(int nodeID, struct space *s, int nregions, /** * @brief Fill the adjncy array defining the graph of cells in a space. * - * See the ParMETIS and METIS manuals if you want to understand this - * format. The cell graph consists of all nodes as vertices with edges as the - * connections to all neighbours, so we have 26 per vertex for periodic - * boundary, fewer than 26 on the space edges when non-periodic. Note you will - * also need an xadj array, for METIS that would be: + * The format is identical to the METIS and ParMETIS periodic domain case. + * The cell graph consists of all nodes as vertices with edges as the connections + * to all neighbours, so we have 26 per vertex for periodic boundary. + * Note you will also need an xadj array, for SCOTCH that would be: * * xadj[0] = 0; * for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26; * - * but each rank needs a different xadj when using ParMETIS (each segment - * should be rezeroed). - * * @param s the space of cells. * @param periodic whether to assume a periodic space (fixed 26 edges). * @param weights_e the edge weights for the cells, if used. On input @@ -1386,7 +1382,7 @@ static void pick_metis(int nodeID, struct space *s, int nregions, * @param adjncy the adjncy array to fill, must be of size 26 * the number of * cells in the space. * @param nadjcny number of adjncy elements used, can be less if not periodic. - * @param xadj the METIS xadj array to fill, must be of size + * @param xadj the Scotch xadj array to fill, must be of size * number of cells in space + 1. NULL for not used. * @param nxadj the number of xadj element used. */ @@ -1437,7 +1433,7 @@ static void graph_init_scotch(struct space *s, int periodic, } *nadjcny = cid * 26; - /* If given set SCOTCH xadj. */ + /* If given set Scotch xadj. */ if (xadj != NULL) { xadj[0] = 0; for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26; @@ -1602,7 +1598,8 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, int return_edge = 0; /* The bidirectional weights associated with an edge are averaged to ensure that the resultant edges are symmetric. This is a neccessary for a Scotch - graph. */ + graph. Note that the below assumes a periodic domain, where each vertex has + 26 neighbours */ for (int i = 0; i < edgenbr; i++) { if ((i > (edges_deg - 1)) && (i % edges_deg == 0)) { vertex_count++; @@ -1624,19 +1621,19 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, error("Error: Cannot build Scotch Graph.\n"); } #ifdef SWIFT_DEBUG_CHECKS - SCOTCH_graphCheck(&graph); - static int partition_count = 0; - char fname[200]; - sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); - FILE *graph_file = fopen(fname, "w"); - if (graph_file == NULL) { - printf("Error: Cannot open output file.\n"); - } + SCOTCH_graphCheck(&graph); + static int partition_count = 0; + char fname[200]; + sprintf(fname, "scotch_input_com_graph_%03d.grf", partition_count++); + FILE *graph_file = fopen(fname, "w"); + if (graph_file == NULL) { + printf("Error: Cannot open output file.\n"); + } - if (SCOTCH_graphSave(&graph, graph_file) != 0) { - printf("Error: Cannot save Scotch Graph.\n"); - } - fclose(graph_file); + if (SCOTCH_graphSave(&graph, graph_file) != 0) { + printf("Error: Cannot save Scotch Graph.\n"); + } + fclose(graph_file); #endif /* Initialise in strategy. */ @@ -2284,13 +2281,7 @@ static void repart_scotch(int vweights, int eweights, int timebins, } /* Allocate cell list for the partition. If not already done. */ -#ifdef HAVE_SCOTCH - int refine = 1; -#endif if (repartition->ncelllist != s->nr_cells) { -#ifdef HAVE_SCOTCH - refine = 0; -#endif free(repartition->celllist); repartition->ncelllist = 0; if ((repartition->celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == From 7fed28389176391077b16d76cbf406d2b052bea6 Mon Sep 17 00:00:00 2001 From: themkots <70752441+themkots@users.noreply.github.com> Date: Fri, 17 Nov 2023 10:42:20 +0000 Subject: [PATCH 48/49] Update files - minor edits and SCOTCH mentions in comments etc. (#36) * Update debug.c - minor edits * Update engine.c * Update swift.c * Update swift_fof.c * Update partition.c * Update partition.c --- src/debug.c | 3 +-- src/engine.c | 2 +- src/partition.c | 26 +++++++++++++++----------- swift.c | 1 - swift_fof.c | 1 - 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/debug.c b/src/debug.c index e536daceed..8b9456e278 100644 --- a/src/debug.c +++ b/src/debug.c @@ -651,8 +651,7 @@ void dumpCells(const char *prefix, int super, int active, int mpiactive, * * The output filenames are generated from the prefix and the sequence number * of calls. So the first is called {prefix}_std_001.dat, - *{prefix}_simple_001.dat, - * {prefix}_weights_001.dat, etc. + * {prefix}_simple_001.dat, {prefix}_weights_001.dat, etc. * * @param prefix base output filename * @param nvertices the number of vertices diff --git a/src/engine.c b/src/engine.c index e66c5d956d..1021178a25 100644 --- a/src/engine.c +++ b/src/engine.c @@ -255,7 +255,7 @@ void engine_repartition(struct engine *e) { clocks_getunit()); #else if (e->reparttype->type != REPART_NONE) - error("SWIFT was not compiled with MPI and METIS or ParMETIS support."); + error("SWIFT was not compiled with MPI and METIS, ParMETIS or SCOTCH support."); /* Clear the repartition flag. */ e->forcerepart = 0; diff --git a/src/partition.c b/src/partition.c index e395f98325..ac8b5c8854 100644 --- a/src/partition.c +++ b/src/partition.c @@ -24,7 +24,8 @@ * a grid of cells into geometrically connected regions and distributing * these around a number of MPI nodes. * - * Currently supported partitioning types: grid, vectorise and METIS/ParMETIS. + * Currently supported partitioning types: + * grid, vectorise, METIS/ParMETIS and SCOTCH. */ /* Config parameters. */ @@ -1560,7 +1561,7 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, graph_init_scotch(s, s->periodic, weights_e, adjncy, &nadjcny, xadj, &nxadj); /* Define the cell graph. Keeping the edge weights association. */ - // Setting up the Scotch graph + /* Setting up the Scotch graph */ SCOTCH_Graph graph; SCOTCH_Num baseval = 0; SCOTCH_Num vertnbr = ncells; /* Number of vertices */ @@ -1640,9 +1641,10 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, SCOTCH_Strat stradat; SCOTCH_stratInit(&stradat); SCOTCH_Num num_vertices; - // Choose between different strategies: - // e.g., SCOTCH_STRATQUALITY, SCOTCH_STRATBALANCE, etc. - // SCOTCH_STRATBALANCE seems to be the best choice. + /* Choose between different strategies: + * e.g., SCOTCH_STRATQUALITY, SCOTCH_STRATBALANCE, etc. + * SCOTCH_STRATBALANCE seems to be the best choice. + */ SCOTCH_Num flagval = SCOTCH_STRATBALANCE; num_vertices = SCOTCH_archSize(archdat); if (SCOTCH_stratGraphMapBuild(&stradat, flagval, num_vertices, 0.05) != 0) @@ -1670,7 +1672,8 @@ static void pick_scotch(int nodeID, struct space *s, int nregions, /* We will not be calling SCOTCH_archExit(archdat): * this would destroy the contents of the archdat structure. * The above two Scotch ...Exit() calls destroy localy defined - * structs, so they are OK to call. */ + * structs, so they are OK to call. + */ if (verttab != NULL) free(verttab); if (velotab != NULL) free(velotab); @@ -2214,7 +2217,8 @@ static void repart_scotch(int vweights, int eweights, int timebins, struct cell *cells = s->cells_top; /* Allocate and fill the adjncy indexing array defining the graph of - * * cells. */ + * cells. + */ idx_t *inds; if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 * nr_cells)) == NULL) error("Failed to allocate the inds array"); @@ -2291,7 +2295,8 @@ static void repart_scotch(int vweights, int eweights, int timebins, } /* We need to rescale the sum of the weights so that the sums of the two - * * types of weights are less than IDX_MAX, that is the range of idx_t. */ + * types of weights are less than IDX_MAX, that is the range of idx_t. + */ double vsum = 0.0; if (vweights) for (int k = 0; k < nr_cells; k++) vsum += weights_v[k]; @@ -2342,7 +2347,8 @@ static void repart_scotch(int vweights, int eweights, int timebins, } /* Balance edges and vertices when the edge weights are timebins, as these - * * have no reason to have equivalent scales, we use an equipartition. */ + * have no reason to have equivalent scales, we use an equipartition. + */ if (timebins && eweights) { /* Make sums the same. */ @@ -2558,7 +2564,6 @@ void partition_initial_partition(struct partition *initial_partition, if ((celllist = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) error("Failed to allocate celllist"); #ifdef HAVE_SCOTCH - //SCOTCH_Arch archdat; FILE *arch_file = fopen(initial_partition->target_arch_file, "r"); if (arch_file == NULL) error("Error: Cannot open topo file."); @@ -2567,7 +2572,6 @@ void partition_initial_partition(struct partition *initial_partition, error("Error loading architecture graph"); fclose(arch_file); pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, celllist, p_archdat); - //pick_scotch(nodeID, s, nr_nodes, weights_v, weights_e, celllist); #elif HAVE_PARMETIS if (initial_partition->usemetis) { pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); diff --git a/swift.c b/swift.c index 7d935de1fc..db46df6712 100644 --- a/swift.c +++ b/swift.c @@ -898,7 +898,6 @@ int main(int argc, char *argv[]) { message("Using Scotch serial mapping:"); if (scotch_tgtfile != NULL){ message("Using the Scotch Target file: %s", scotch_tgtfile); - /* strcpy(initial_partition.target_arch_file, scotch_tgtfile); */ } else { /* extra failsafe check */ error("Scotch mapping will fail: no target architecture file provided."); } diff --git a/swift_fof.c b/swift_fof.c index 4a56f07a83..213c42facd 100644 --- a/swift_fof.c +++ b/swift_fof.c @@ -404,7 +404,6 @@ int main(int argc, char *argv[]) { message("Using Scotch serial mapping:"); if (scotch_tgtfile != NULL){ message("Using the Scotch Target file: %s", scotch_tgtfile); - /* strcpy(initial_partition.target_arch_file, scotch_tgtfile); */ } else { /* extra failsafe check */ error("Scotch mapping will fail: no target architecture file provided."); } From 6e5131b6e9fe7eadc977e87910f06d1e47767fc3 Mon Sep 17 00:00:00 2001 From: Dan Giles Date: Fri, 17 Nov 2023 10:51:25 +0000 Subject: [PATCH 49/49] Update README-Scotch.md (#35) * Update README-Scotch.md * Update README-Scotch.md - Warning on compiler inclusion preferences * Update README-Scotch.md - prod runs table updates 01 * Update README-Scotch.md table runs 02 * Update README-Scotch.md remove old table * Update README-Scotch.md added note for Rome and Milan type of Cosma 8 nodes * Update README-Scotch.md * Update README-Scotch.md - prod runs table 03 * Update README-Scotch.md - large simulations dump minimisation --------- Co-authored-by: themkots <70752441+themkots@users.noreply.github.com> --- README-Scotch.md | 124 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 11 deletions(-) diff --git a/README-Scotch.md b/README-Scotch.md index 869b5c6909..dcf7550801 100644 --- a/README-Scotch.md +++ b/README-Scotch.md @@ -18,7 +18,7 @@ Instructions for installing locally on Cosma 8 ---------------- _Environment_ ``` - module load cosma/2018 python/3.6.5 intel_comp/2022.1.2 compiler openmpi/4.1.1 fftw/3.3.9 parallel_hdf5/1.12.0 parmetis/4.0.3-64bit gsl/2.5 + module load cosma/2018 python/3.6.5 intel_comp/2022.1.2 compiler openmpi/4.1.4 fftw/3.3.9 parallel_hdf5/1.12.0 parmetis/4.0.3-64bit metis/5.1.0-64bit gsl/2.5 module load cmake module load bison ``` @@ -35,7 +35,30 @@ Navigate to the Scotch directory and carry out the following commands Configure SWIFT with Scotch ---------------- -Follow the usual installation [instructions](https://gitlab.cosma.dur.ac.uk/swift/swiftsim/-/blob/master/INSTALL.swift) but if Scotch installed locally the added `--with-scotch=\path-to-scotch` flag will need to be passed to `./configure` +Follow the usual installation [instructions](https://gitlab.cosma.dur.ac.uk/swift/swiftsim/-/blob/master/INSTALL.swift) but if Scotch installed locally the added `--with-scotch=/full/scotch/install/dir/path/` flag will need to be passed to `./configure` + + +There are also two beta-testing Scotch modules available at the time of this writing: +(as indicated by the dot before the version number, `.7.0.4`): +You can have the modules environment as +``` + module load cosma/2018 python/3.6.5 intel_comp/2022.1.2 compiler openmpi/4.1.4 fftw/3.3.9 parallel_hdf5/1.12.0 parmetis/4.0.3-64bit metis/5.1.0-64bit gsl/2.5' +``` +Then, +``` + module load scotch/.7.0.4-32bit +``` +or +``` + module load scotch/.7.0.4-64bit +``` +depending on what version of Scotch, 32-bit or 64-bit, you want to use. + +**Warning on compiler inclusion preferences** + +You can use the `modules` environment to load a suitable Scotch module. In `Cosma8` you can use `module load scotch/.7.0.4-32bit` or `module load scotch/.7.0.4-64bit` to get the 32- or the 64-bit compiled module for Scotch. At the moment of this writing, both 32 and 64 bit versions of Scotch can be loaded together, i.e. there is no exclusion check in the module files. There could be a case where both are needed, for testing or development purposes, for example. Then, care must be taken to avoid unintentional compile time behaviour: the order of the `-I/.../...` commandline switches that will find their way in the `Makefile`(s) depends on what the compiler used prefers in the order of inclusion -- so if you have loaded first the 32 bit module, its corresponding `-I` switch will be placed first, and if the 64 bit module is loaded afterwards in the environment, it will be placed after, etc. + +Also, in the case of using a locally-built Scotch package, it is advised that you _do not_ load any Scotch module in advance in the environment, and instead use `./configure` with the `--with-scotch=/full/scotch/install/dir/path/` in order to make sure that the `Makefile` will pick up the correct include and library files. Running with Scotch ---------------- @@ -66,21 +89,100 @@ Scotch carries out the mapping using various strategies which are outlined in th One issue with Scotch is that when the number of mpi ranks is comparable to the dimensionality of the modelled SWIFT system the optimal mapping strategy doesn't neccessarily map to all available NUMA regions. At present this isn't handled explicity in the code and the paritition reverts to a vectorised or previous partitioning. -The SWIFT edge and vertex weights are estimated in the code, however edge weights are not symmetric - this causes an issue with SWIFT. Therefore, in the SCOTCH Graph the edge weigths are updated to equal to the sum of the two associated edge weights as calculated from SWIFT. +The SWIFT edge and vertex weights are estimated in the code, however edge weights are not symmetric - this causes an issue with SWIFT. Therefore, in the SCOTCH Graph the edge weigths are updated to equal to the average value (sum/2) of the two associated edge weights as calculated from SWIFT. Test Runs ---------------- The following results were obtained on cosma8 running with the `SCOTCH_STRATBALANCE` strategy string and an imbalance ratio of `0.05`. +| Testcase | Resources | flags | Node types | Scotch32 (s) | Scotch64 (s) | Scotch local (s) | ParMetis (s) | Metis (s) | +| ---------- | --------------------------- | -------------- | ---------- | ------------ | ------------ | ---------------- | ------------ | --------- | +| EAGLE_006 | nodes = 1 (8 NUMA regions) | `--map_by numa` | Milan | 1191.8 | 1198.2 | 1173.6 | 1167.4 | 1176.4 | +| | -//- | -//- | Milan | 1176.7 | 1184.4 | 1193.8 | 1212.5 | 1182.1 | +| | -//- | -//- | Milan | 1174.5 | 1183.6 | 1175.2 | 1229.4 | 1180.7 | +| | -//- | -//- | Rome | 1368.8 | 1322.9 | 1351.5 | 1332.8 | 1334.9 | +| | -//- | -//- | Rome | 1378.3 | 1373.8 | 1353.4 | 1332.3 | 1346.8 | +| | -//- | -//- | Rome | 1367.3 | 1395.0 | 1361.0 | 1331.2 | 1330.8 | +| | | | | | | | | | +| | nodes = 2 (16 NUMA regions) | -//- | Milan | 1191.2 | 1225.8 | 1167.6 | 1154.0 | 1159.7 | +| | -//- | -//- | Milan | 1147.9 | 1185.0 | 1158.2 | 1168.3 | 1163.4 | +| | -//- | -//- | Milan | 1162.0 | 1180.4 | 1149.3 | 1147.7 | 1157.3 | +| | -//- | -//- | Rome | 1358.3 | 1538.3 | 1325.5 | 1338.8 | 1344.8 | +| | -//- | -//- | Rome | 1355.8 | 1519.3 | 1338.2 | 1390.1 | 1336.8 | +| | -//- | -//- | Rome | 1347.1 | 1395.0 | 1336.0 | 1345.4 | 1338.7 | +| | | | | | | | | | +| EAGLE_025 | nodes = 2 (16 NUMA regions) | `--map_by numa` | Milan | 7546.8 | 7450.0 | 7564.7 | 7202.0 | 7302.3 | +| | -//- | -//- | Milan | 7508.8 | 7490.4 | 7506.6 | 7416.3 | 7291.3 | +| | -//- | -//- | Milan | 7447.6 | 7516.5 | 7548.1 | 7093.1 | 7293.0 | +| | -//- | -//- | Rome | 8616.0 | 8660.5 | 8524.5 | 8810.5 | 8309.2 | +| | -//- | -//- | Rome | 8652.4 | 8492.7 | 8594.7 | 7955.9 | 8312.3 | +| | -//- | -//- | Rome | 8664.1 | 8565.2 | 8621.9 | 7946.7 | 8235.6 | +| | | | | | | | | | +| EAGLE_050 | nodes = 4 (32 NUMA regions) | `--map_by numa` | Milan | 45437.0 | 45714.8 | 45287.9 | 45110.4 | | +| | -//- | -//- | Milan | 45817.5 | 45128.4 | 45047.3 | 42131.7 | | +| | -//- | -//- | Milan | 45483.4 | 45213.9 | 45219.3 | 43263.8 | | +| | -//- | -//- | Rome | 51754.3 | 54724.4 | 51907.1 | 51315.1 | | +| | -//- | -//- | Rome | 51669.3 | 54213.8 | 51320.5 | 48338.0 | | +| | -//- | -//- | Rome | 51689.4 | 53387.0 | 51563.9 | 49702.8 | | +| | | | | | | | | | +| EAGLE_050 | nodes = 8 (64 NUMA regions) | `--map_by numa` | Milan | 36202.3 | 37158.3 | 36111.7 | 37006.0 | | +| | -//- | -//- | Milan | 36097.3 | 36503.1 | 36228.0 | 37344.7 | | +| | -//- | -//- | Milan | 36113.3 | 36155.7 | 36222.0 | 35488.4 | | +| | -//- | -//- | Rome | 42012.6 | 41790.7 | 41864.3 | 41723.6 | | +| | -//- | -//- | Rome | 41866.8 | 41517.0 | 41772.9 | 40533.6 | | +| | -//- | -//- | Rome | 43630.5 | 41419.9 | 41752.4 | 40628.3 | | + + + +Note: + +Cosma 8 cluster is currently comprised of: + + * 360 compute nodes with 1 TB RAM and dual 64-core AMD EPYC 7H12 water-cooled processors at 2.6GHz ("Rome"-type nodes) + * Upgraded with an additional 168 nodes with dual Milan 7763 processors at 2.45GHz ("Milan"-type nodes) + + + +Managing verbosity, logging info, dumping of intermediate results, debug info, restart files +--------------------------- +* If you pass the commandline switch `--enable-debugging-checks` to `./configure` while building, be prepared for additional logging and dump files. +* For large simulations, the amount of data dumps can easily overflow one's quota allowances; for example, the E_100 models can dump files on the order of `1TiBytes`, depending on runtime configuration, which can be specified in a `.yml` file. Please check the SWIFT docs. To keep dumping / logging to a local minimum, you can put the following sections / lines, for example, in a file `eagle_100.yml`: + -| Testcase | Resources | flags | Scotch (s) | Metis (s) | -| -------- | --------------------------- | -------------- | ---------- | -------- | -| EAGLE_6 | nodes = 1 (8 NUMA regions) | `--map_by numa` | 1307.8 | 1401.3 | -| EAGLE_6 | nodes = 2 (16 NUMA regions) | `--map_by numa` | 1294.6 | 1314.2 | -| EAGLE_25 | nodes = 2 (16 NUMA regions) | `--map_by numa` | 8381.4 | 8420.6 | -| EAGLE_50 | nodes = 2 (16 NUMA regions) | `--map_by numa` | 69312.1 | 67273.6 | -| EAGLE_50 | nodes = 4 (32 NUMA regions) | `--map_by numa` | 51803.8 | 51058.3 | -| EAGLE_50 | nodes = 8 (64 NUMA regions) | `--map_by numa` | 41941.1 | 42700.5 | +``` +# Parameters governing the snapshots +Snapshots: + select_output_on: 1 + select_output: output_list.yml + basename: eagle # Common part of the name of output files + scale_factor_first: 0.91 # Scale-factor of the first snaphot (cosmological run) + time_first: 0.01 # Time of the first output (non-cosmological run) (in internal units) + delta_time: 1.01 # Time difference between consecutive outputs (in internal units) + recording_triggers_part: [1.0227e-4, 1.0227e-5] # Recording starts 100M and 10M years before a snapshot + recording_triggers_bpart: [1.0227e-4, 1.0227e-5] # Recording starts 100M and 10M years before a snapshot +``` + +and the contents of `output_list.yml` can be +``` +Default: + Standard_Gas: off + Standard_DM: off + Standard_DMBackground: off + Standard_Stars: off + Standard_BH: off +``` + +In the `eagle_100.yml` we can also specify that we do not want the `restart` dumps: +``` +Restarts: + enable: 0 +``` + + +Current Limitations +---------------- +1. As seen in the table above the current Scotch implementation is comparable in performance to the ParMETIS (METIS) implementation on problem sizes up to EAGLE_50. However, the current implementation is running into difficulties on the EAGLE_100 testcase. The Scotch partition in this case causes two separate errors: Memory overflow when running across 8 Cosma8 nodes and on 16 Cosma8 nodes the resultant Scotch partition results in certain ranks having greater than 64 MPI proxies which is a hard limit set within Swift. Ongoing work is focused on sorting out this issue. +2. The current implementation has only been tested against periodic domains. This is where each vertex in Swift has exactly 26 neighbours. Additions to the edge mean calculation in the `pick_scotch` function will need to be carried out to expand to non-periodic domains. Notes ----------------