Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Chicoma-CPU and add Chicoma-GPU #73

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cime_config/machines/Depends.chicoma-cpu.gnu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# For this file, fixes non-BFB behavior of stealth feature on pm-cpu with -O2
set(NOOPT
eam/src/physics/cam/zm_conv.F90)

if (NOT DEBUG)
foreach(ITEM IN LISTS NOOPT)
e3sm_deoptimize_file("${ITEM}")
endforeach()
endif()




10 changes: 10 additions & 0 deletions cime_config/machines/Depends.chicoma-gpu.nvidia.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
list(APPEND REDUCE_OPT_LIST
homme/src/share/derivative_mod_base.F90
)

# Can use this flag to avoid internal compiler error for this file (with nvidia/21.11)
if (NOT DEBUG)
foreach(ITEM IN LISTS REDUCE_OPT_LIST)
e3sm_add_flags("${ITEM}" " -Mnovect")
endforeach()
endif()
10 changes: 10 additions & 0 deletions cime_config/machines/Depends.chicoma-gpu.nvidiagpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
list(APPEND REDUCE_OPT_LIST
homme/src/share/derivative_mod_base.F90
)

# Can use this flag to avoid internal compiler error for this file (with nvidia/21.11)
if (NOT DEBUG)
foreach(ITEM IN LISTS REDUCE_OPT_LIST)
e3sm_add_flags("${ITEM}" " -Mnovect")
endforeach()
endif()
19 changes: 19 additions & 0 deletions cime_config/machines/cmake_macros/gnu_chicoma-gpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
string(APPEND CONFIG_ARGS " --host=cray")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
string(APPEND SLIBS " -lblas -llapack")
set(CXX_LINKER "FORTRAN")
if (NOT DEBUG)
string(APPEND CFLAGS " -O2 -g")
endif()
if (NOT DEBUG)
string(APPEND FFLAGS " -O2 -g")
endif()
string(APPEND CXX_LIBS " -lstdc++")
set(MPICC "cc")
set(MPICXX "CC")
set(MPIFC "ftn")
set(SCC "gcc")
set(SCXX "g++")
set(SFC "gfortran")
20 changes: 20 additions & 0 deletions cime_config/machines/cmake_macros/gnugpu_chicoma-gpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
string(APPEND CONFIG_ARGS " --host=cray")
set(USE_CUDA "TRUE")
string(APPEND CPPDEFS " -DGPU")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
string(APPEND CPPDEFS " -DTHRUST_IGNORE_CUB_VERSION_CHECK")
string(APPEND SLIBS " -lblas -llapack")
string(APPEND CUDA_FLAGS " -ccbin CC -O2 -arch sm_80 --use_fast_math")
string(APPEND KOKKOS_OPTIONS " -DKokkos_ARCH_AMPERE80=On -DKokkos_ENABLE_CUDA=On -DKokkos_ENABLE_CUDA_LAMBDA=On -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=Off")
if (NOT DEBUG)
string(APPEND CFLAGS " -O2")
string(APPEND FFLAGS " -O2")
endif()
set(MPICC "cc")
set(MPICXX "CC")
set(MPIFC "ftn")
set(SCC "cc")
set(SCXX "CC")
set(SFC "ftn")
24 changes: 24 additions & 0 deletions cime_config/machines/cmake_macros/nvidia_chicoma-gpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
string(APPEND CONFIG_ARGS " --host=cray")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
string(APPEND SLIBS " -lblas -llapack")
set(CXX_LINKER "FORTRAN")
if (NOT DEBUG)
string(APPEND CFLAGS " -O2")
endif()
if (NOT DEBUG)
string(APPEND CXXFLAGS " -O2")
endif()
if (NOT DEBUG)
string(APPEND FFLAGS " -O2")
endif()
if (compile_threaded)
string(APPEND KOKKOS_OPTIONS " -DKokkos_ENABLE_OPENMP=Off") # work-around for nvidia as kokkos is not passing "-mp" for threaded build
endif()
set(MPICC "cc")
set(MPICXX "CC")
set(MPIFC "ftn")
set(SCC "cc")
set(SCXX "CC")
set(SFC "ftn")
13 changes: 13 additions & 0 deletions cime_config/machines/cmake_macros/nvidiagpu_chicoma-gpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
string(APPEND CONFIG_ARGS " --host=cray")
set(USE_CUDA "TRUE")
string(APPEND CPPDEFS " -DGPU")
if (COMP_NAME STREQUAL gptl)
string(APPEND CPPDEFS " -DHAVE_NANOTIME -DBIT64 -DHAVE_SLASHPROC -DHAVE_GETTIMEOFDAY")
endif()
string(APPEND CPPDEFS " -DTHRUST_IGNORE_CUB_VERSION_CHECK")
string(APPEND CUDA_FLAGS " -ccbin CC -O2 -arch sm_80 --use_fast_math")
string(APPEND SLIBS " -lblas -llapack")
set(CXX_LINKER "FORTRAN")
set(SCC "cc")
set(SCXX "CC")
set(SFC "ftn")
29 changes: 28 additions & 1 deletion cime_config/machines/config_batch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,34 @@
<directive>--qos=standard </directive>
</directives>
<queues>
<queue walltimemax="24:00:00" nodemax="1792" default="true">standard</queue>
<queue walltimemax="16:00:00" nodemax="1792" default="true">standard</queue>
</queues>
</batch_system>

<batch_system MACH="chicoma-gpu" type="slurm">
<directives>
<directive> --partition=gpu</directive>
</directives>
<directives compiler="gnugpu">
<directive> --gpus-per-task=1</directive>
</directives>
<directives COMPSET="!.*MMF.*" compiler="gnugpu">
<directive> --gpu-bind=none</directive>
</directives>
<directives COMPSET=".*MMF.*" compiler="gnugpu">
<directive> --gpu-bind=map_gpu:0,1,2,3</directive>
</directives>
<directives compiler="nvidiagpu">
<directive> --gpu-bind=none</directive>
</directives>
<directives compiler="gnu">
<directive> -G 0</directive>
</directives>
<directives compiler="nvidia">
<directive> -G 0</directive>
</directives>
<queues>
<queue walltimemax="16:00:00" nodemax="96" default="true">gpu</queue>
</queues>
</batch_system>

Expand Down
167 changes: 155 additions & 12 deletions cime_config/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4110,7 +4110,7 @@
<DESC>Chicoma CPU-only nodes at LANL IC. Each node has 2 AMD EPYC 7H12 64-Core (Milan) 512GB</DESC>
<NODENAME_REGEX>ch-fe*</NODENAME_REGEX>
<OS>Linux</OS>
<COMPILERS>gnu,nvidia,intel,aocc,amdclang</COMPILERS>
<COMPILERS>gnu,intel,nvidia,amdclang</COMPILERS>
<MPILIBS>mpich</MPILIBS>
<CIME_OUTPUT_ROOT>/lustre/scratch4/turquoise/$ENV{USER}/E3SM/scratch/chicoma-cpu</CIME_OUTPUT_ROOT>
<DIN_LOC_ROOT>/usr/projects/e3sm/inputdata</DIN_LOC_ROOT>
Expand All @@ -4124,15 +4124,15 @@
<BATCH_SYSTEM>slurm</BATCH_SYSTEM>
<SUPPORTED_BY>e3sm</SUPPORTED_BY>
<MAX_TASKS_PER_NODE>256</MAX_TASKS_PER_NODE>
<MAX_MPITASKS_PER_NODE>64</MAX_MPITASKS_PER_NODE>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mark-petersen, the issue you pointed out on Slack should be fixed here.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this should definitely be 128 for chicoma-cpu. Thanks.

<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<mpirun mpilib="default">
<executable>srun</executable>
<arguments>
<arg name="label"> --label</arg>
<arg name="num_tasks"> -n {{ total_tasks }} -N {{ num_nodes }}</arg>
<arg name="thread_count">-c $ENV{OMP_NUM_THREADS}</arg>
<arg name="binding"> $SHELL{if [ 128 -ge `./xmlquery --value MAX_MPITASKS_PER_NODE` ]; then echo "--cpu_bind=cores"; else echo "--cpu_bind=threads";fi;} </arg>
<arg name="thread_count">-c $SHELL{echo 256/`./xmlquery --value MAX_MPITASKS_PER_NODE`|bc}</arg>
<arg name="binding"> $SHELL{if [ 128 -ge `./xmlquery --value MAX_MPITASKS_PER_NODE` ]; then echo "--cpu_bind=cores"; else echo "--cpu_bind=threads";fi;}</arg>
<arg name="placement"> -m plane=$SHELL{echo `./xmlquery --value MAX_MPITASKS_PER_NODE`}</arg>
</arguments>
</mpirun>
Expand All @@ -4151,50 +4151,192 @@
<command name="unload">cray-hdf5-parallel</command>
<command name="unload">cray-netcdf-hdf5parallel</command>
<command name="unload">cray-parallel-netcdf</command>
<command name="unload">cray-netcdf</command>
<command name="unload">cray-hdf5</command>
<command name="unload">PrgEnv-gnu</command>
<command name="unload">PrgEnv-intel</command>
<command name="unload">PrgEnv-nvidia</command>
<command name="unload">PrgEnv-cray</command>
<command name="unload">PrgEnv-aocc</command>
<command name="unload">intel</command>
<command name="unload">intel-oneapi</command>
<command name="unload">nvidia</command>
<command name="unload">aocc</command>
<command name="unload">cudatoolkit</command>
<command name="unload">climate-utils</command>
<command name="unload">craype-accel-nvidia80</command>
<command name="unload">craype-accel-host</command>
<command name="unload">cce</command>
<command name="unload">perftools-base</command>
<command name="unload">perftools</command>
<command name="unload">darshan</command>
</modules>

<modules compiler="gnu">
<command name="load">PrgEnv-gnu/8.4.0</command>
<command name="load">gcc/12.2.0</command>
<command name="load">cray-libsci/23.05.1.4</command>
</modules>

<modules compiler="nvidia">
<command name="load">PrgEnv-nvidia/8.4.0</command>
<command name="load">nvidia/22.7</command>
<command name="load">cray-libsci/23.05.1.4</command>
</modules>

<modules compiler="intel">
<command name="load">PrgEnv-intel/8.4.0</command>
<command name="load">intel-classic/2023.2.0</command>
</modules>

<modules compiler="aocc">
<modules compiler="amdclang">
<command name="load">PrgEnv-aocc/8.4.0</command>
<command name="load">aocc/3.2.0</command>
<command name="load">cray-libsci/23.05.1.4</command>
</modules>

<modules compiler="amdclang">
<command name="load">PrgEnv-aocc/8.4.0</command>
<command name="load">aocc/3.2.0</command>
<modules>
<command name="load">craype-accel-host</command>
<command name="load">craype/2.7.21</command>
<command name="load">cray-mpich/8.1.26</command>
<command name="load">libfabric/1.15.2.0</command>
<command name="load">cray-hdf5-parallel/1.12.2.3</command>
<command name="load">cray-netcdf-hdf5parallel/4.9.0.3</command>
<command name="load">cray-parallel-netcdf/1.12.3.3</command>
<command name="load">cmake/3.25.1</command>
</modules>
</module_system>

<RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR>
<EXEROOT>$CIME_OUTPUT_ROOT/$CASE/bld</EXEROOT>
<TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE>

<environment_variables>
<env name="MPICH_ENV_DISPLAY">1</env>
<env name="MPICH_VERSION_DISPLAY">1</env>
<env name="OMP_STACKSIZE">128M</env>
<env name="OMP_PROC_BIND">spread</env>
<env name="OMP_PLACES">threads</env>
<env name="HDF5_USE_FILE_LOCKING">FALSE</env>
<env name="PERL5LIB">/usr/projects/climate/SHARED_CLIMATE/software/chicoma-cpu/perl5-only-switch/lib/perl5</env>
<env name="PNETCDF_HINTS">romio_ds_write=disable;romio_ds_read=disable;romio_cb_write=enable;romio_cb_read=enable</env>
<env name="FI_CXI_RX_MATCH_MODE">software</env>
<env name="MPICH_COLL_SYNC">MPI_Bcast</env>
<env name="NETCDF_PATH">$ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX}</env>
<env name="PNETCDF_PATH">$ENV{CRAY_PARALLEL_NETCDF_PREFIX}</env>
</environment_variables>
<resource_limits>
<resource name="RLIMIT_STACK">-1</resource>
</resource_limits>
</machine>

<machine MACH="chicoma-gpu">
<DESC>Chicoma GPU nodes at LANL IC. Each GPU node has single
AMD EPYC 7713 64-Core (Milan) (256GB) and 4 nvidia A100'</DESC>
<NODENAME_REGEX>ch-fe*</NODENAME_REGEX>
<OS>Linux</OS>
<COMPILERS>gnugpu,gnu,nvidiagpu,nvidia</COMPILERS>
<MPILIBS>mpich</MPILIBS>
<CIME_OUTPUT_ROOT>/lustre/scratch4/turquoise/$ENV{USER}/E3SM/scratch/chicoma-gpu</CIME_OUTPUT_ROOT>
<DIN_LOC_ROOT>/usr/projects/e3sm/inputdata</DIN_LOC_ROOT>
<DIN_LOC_ROOT_CLMFORC>/usr/projects/e3sm/inputdata/atm/datm7</DIN_LOC_ROOT_CLMFORC>
<DOUT_S_ROOT>/lustre/scratch4/turquoise/$ENV{USER}/E3SM/archive/$CASE</DOUT_S_ROOT>
<BASELINE_ROOT>/lustre/scratch4/turquoise/$ENV{USER}/E3SM/input_data/ccsm_baselines/$COMPILER</BASELINE_ROOT>
<CCSM_CPRNC>/usr/projects/climate/SHARED_CLIMATE/software/badger/cprnc</CCSM_CPRNC>
<GMAKE_J>10</GMAKE_J>
<TESTS>e3sm_developer</TESTS>
<NTEST_PARALLEL_JOBS>4</NTEST_PARALLEL_JOBS>
<BATCH_SYSTEM>slurm</BATCH_SYSTEM>
<SUPPORTED_BY>e3sm</SUPPORTED_BY>
<MAX_TASKS_PER_NODE>128</MAX_TASKS_PER_NODE>
<MAX_TASKS_PER_NODE compiler="gnu">256</MAX_TASKS_PER_NODE>
<MAX_TASKS_PER_NODE compiler="nvidia">256</MAX_TASKS_PER_NODE>
<MAX_MPITASKS_PER_NODE>4</MAX_MPITASKS_PER_NODE>
<MAX_MPITASKS_PER_NODE compiler="gnu">64</MAX_MPITASKS_PER_NODE>
<MAX_MPITASKS_PER_NODE compiler="nvidia">64</MAX_MPITASKS_PER_NODE>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<mpirun mpilib="default">
<executable>srun</executable>
<arguments>
<arg name="label"> --label</arg>
<arg name="num_tasks"> -n {{ total_tasks }} -N {{ num_nodes }}</arg>
<arg name="thread_count">-c $SHELL{echo 256/`./xmlquery --value MAX_MPITASKS_PER_NODE`|bc}</arg>
<arg name="binding"> $SHELL{if [ 128 -ge `./xmlquery --value MAX_MPITASKS_PER_NODE` ]; then echo "--cpu_bind=cores"; else echo "--cpu_bind=threads";fi;}</arg>
<arg name="placement"> -m plane=$SHELL{echo `./xmlquery --value MAX_MPITASKS_PER_NODE`}</arg>
</arguments>
</mpirun>
<module_system type="module" allow_error="true">
<init_path lang="perl">/usr/share/lmod/8.3.1/init/perl</init_path>
<!-- does not exist -->
<init_path lang="python">/usr/share/lmod/8.3.1/init/python</init_path>
<init_path lang="sh">/usr/share/lmod/8.3.1/init/sh</init_path>
<init_path lang="csh">/usr/share/lmod/8.3.1/init/csh</init_path>
<cmd_path lang="perl">/usr/share/lmod/lmod/libexec/lmod perl</cmd_path>
<cmd_path lang="python">/usr/share/lmod/lmod/libexec/lmod python</cmd_path>
<cmd_path lang="sh">module</cmd_path>
<cmd_path lang="csh">module</cmd_path>

<modules>
<command name="unload">cray-hdf5-parallel</command>
<command name="unload">cray-netcdf-hdf5parallel</command>
<command name="unload">cray-parallel-netcdf</command>
<command name="unload">cray-netcdf</command>
<command name="unload">cray-hdf5</command>
<command name="unload">PrgEnv-gnu</command>
<command name="unload">PrgEnv-intel</command>
<command name="unload">PrgEnv-nvidia</command>
<command name="unload">PrgEnv-cray</command>
<command name="unload">PrgEnv-aocc</command>
<command name="unload">intel</command>
<command name="unload">intel-oneapi</command>
<command name="unload">nvidia</command>
<command name="unload">aocc</command>
<command name="unload">cudatoolkit</command>
<command name="unload">climate-utils</command>
<command name="unload">craype-accel-nvidia80</command>
<command name="unload">craype-accel-host</command>
<command name="unload">perftools-base</command>
<command name="unload">perftools</command>
<command name="unload">darshan</command>
</modules>

<modules compiler="gnu.*">
<command name="load">PrgEnv-gnu/8.4.0</command>
<command name="load">gcc/12.2.0</command>
</modules>

<modules compiler="nvidia.*">
<command name="load">PrgEnv-nvidia/8.4.0</command>
<command name="load">nvidia/22.7</command>
</modules>

<modules compiler="gnugpu">
<command name="load">cudatoolkit/22.7_11.7</command>
<command name="load">craype-accel-nvidia80</command>
</modules>

<modules compiler="nvidiagpu">
<command name="load">cudatoolkit/22.7_11.7</command>
<command name="load">craype-accel-nvidia80</command>
<command name="load">gcc-mixed/11.2.0</command>
</modules>

<modules compiler="gnu">
<command name="load">craype-accel-host</command>
<command name="load">cray-libsci</command>
<command name="load">craype</command>
</modules>

<modules compiler="nvidia">
<command name="load">craype-accel-host</command>
</modules>

<modules>
<command name="load">cray-libsci/23.05.1.4</command>
<command name="load">craype/2.7.21</command>
<command name="load">cray-mpich/8.1.26</command>
<command name="load">libfabric/1.15.2.0</command>
<command name="load">cray-hdf5-parallel/1.12.2.3</command>
<command name="load">cray-netcdf-hdf5parallel/4.9.0.3</command>
<command name="load">cray-parallel-netcdf/1.12.3.3</command>
<command name="load">cmake/3.22.3</command>
<command name="load">cmake/3.25.1</command>
</modules>
</module_system>

Expand All @@ -4215,6 +4357,7 @@
<env name="MPICH_COLL_SYNC">MPI_Bcast</env>
<env name="NETCDF_PATH">$ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX}</env>
<env name="PNETCDF_PATH">$ENV{CRAY_PARALLEL_NETCDF_PREFIX}</env>
<env name="PKG_CONFIG_PATH">/usr/projects/e3sm/cudatoolkit:$ENV{PKG_CONFIG_PATH}</env>
</environment_variables>
<resource_limits>
<resource name="RLIMIT_STACK">-1</resource>
Expand Down
1 change: 1 addition & 0 deletions cime_config/machines/config_pio.xml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
<value mach="grizzly">netcdf</value>
<value mach="badger">netcdf</value>
<value mach="chicoma-cpu">netcdf</value>
<value mach="chicoma-gpu">netcdf</value>
<value mach="bebop" mpilib="impi" compset=".*CAM5.+MPAS.*">netcdf</value>
<value mach="fugaku" compiler="gnu">netcdf</value>
</values>
Expand Down