diff --git a/.clang-tidy b/.clang-tidy
index 16de3034c26..5a3deadf304 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -14,7 +14,11 @@ Checks: '-*,
         -cppcoreguidelines-narrowing-conversions,
         -cppcoreguidelines-no-malloc,
         -cppcoreguidelines-owning-memory,
+    google-build-explicit-make-pair,
+    google-build-namespaces,
+    google-global-names-in-headers,
     misc-const-correctness,
+    misc-definitions-in-headers,
     misc-misleading-bidirectional,
     misc-misleading-identifier,
     misc-misplaced-const,
@@ -22,7 +26,6 @@ Checks: '-*,
     misc-unused-alias-decls,
     misc-unused-parameters,
     misc-unused-using-decls,
-        -misc-definitions-in-headers,
     modernize-avoid-bind,
     modernize-concat-nested-namespaces,
     modernize-deprecated-headers,
@@ -79,5 +82,7 @@ Checks: '-*,
 CheckOptions:
 - key:          modernize-pass-by-value.ValuesOnly
   value:        'true'
+- key:          misc-definitions-in-headers.HeaderFileExtensions
+  value:        "H,"
 
 HeaderFilterRegex: 'Source[a-z_A-Z0-9\/]+\.H$'
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index 7f0e3747677..0f3adb8593a 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -111,7 +111,7 @@ jobs:
         which nvcc || echo "nvcc not in PATH!"
 
         git clone https://github.com/AMReX-Codes/amrex.git ../amrex
-        cd ../amrex && git checkout --detach c45770c9f9b2c5fa98c675a439c502e78912bf47 && cd -
+        cd ../amrex && git checkout --detach 48b3ec7cb7ad99823bd85fad83c13c3cfd5ecdd4 && cd -
         make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 2
 
   build_nvhpc21-11-nvcc:
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
index 0495155b430..2fd979feff9 100644
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -13,6 +13,7 @@ jobs:
     if: github.event.pull_request.draft == false
     env:
       CXXFLAGS: "-Werror -Wno-error=pass-failed"
+      HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: TRUE
       # For macOS, Ninja is slower than the default:
       #CMAKE_GENERATOR: Ninja
       # setuptools/mp4py work-around, see
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f10049cb58..2501c9dcaff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 # Preamble ####################################################################
 #
 cmake_minimum_required(VERSION 3.20.0)
-project(WarpX VERSION 23.08)
+project(WarpX VERSION 23.09)
 
 include(${WarpX_SOURCE_DIR}/cmake/WarpXFunctions.cmake)
 
diff --git a/Docs/source/conf.py b/Docs/source/conf.py
index 534cfd1e6b5..e2cdfe15cda 100644
--- a/Docs/source/conf.py
+++ b/Docs/source/conf.py
@@ -79,9 +79,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = u'23.08'
+version = u'23.09'
 # The full version, including alpha/beta/rc tags.
-release = u'23.08'
+release = u'23.09'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/Docs/source/install/dependencies.rst b/Docs/source/install/dependencies.rst
index 84e6289042d..7bd6c7dc8d5 100644
--- a/Docs/source/install/dependencies.rst
+++ b/Docs/source/install/dependencies.rst
@@ -75,7 +75,7 @@ Conda (Linux/macOS/Windows)
 
       .. code-block:: bash
 
-         conda create -n warpx-cpu-mpich-dev -c conda-forge blaspp boost ccache cmake compilers git lapackpp "openpmd-api=*=mpi_mpich*" python numpy pandas scipy yt "fftw=*=mpi_mpich*" pkg-config matplotlib mamba ninja mpich pip virtualenv
+         conda create -n warpx-cpu-mpich-dev -c conda-forge blaspp boost ccache cmake compilers git lapackpp "openpmd-api=*=mpi_mpich*" python make numpy pandas scipy yt "fftw=*=mpi_mpich*" pkg-config matplotlib mamba mpich mpi4py ninja pip virtualenv
          conda activate warpx-cpu-mpich-dev
 
          # compile WarpX with -DWarpX_MPI=ON
@@ -85,7 +85,7 @@ Conda (Linux/macOS/Windows)
 
       .. code-block:: bash
 
-         conda create -n warpx-cpu-dev -c conda-forge blaspp boost ccache cmake compilers git lapackpp openpmd-api python numpy pandas scipy yt fftw pkg-config matplotlib mamba ninja pip virtualenv
+         conda create -n warpx-cpu-dev -c conda-forge blaspp boost ccache cmake compilers git lapackpp openpmd-api python make numpy pandas scipy yt fftw pkg-config matplotlib mamba ninja pip virtualenv
          conda activate warpx-cpu-dev
 
          # compile WarpX with -DWarpX_MPI=OFF
@@ -107,6 +107,14 @@ For OpenMP support, you will further need:
 
          conda install -c conda-forge llvm-openmp
 
+For Nvidia CUDA GPU support, you will need to have `a recent CUDA driver installed <https://developer.nvidia.com/cuda-downloads>`__ or you can lower the CUDA version of `the Nvidia cuda package <https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#conda-installation>`__ and `conda-forge to match your drivers <https://docs.cupy.dev/en/stable/install.html#install-cupy-from-conda-forge>`__ and then add these packages:
+
+.. code-block:: bash
+
+   conda install -c nvidia -c conda-forge cuda cupy
+
+More info for `CUDA-enabled ML packages <https://twitter.com/jeremyphoward/status/1697435241152127369>`__.
+
 
 Spack (Linux/macOS)
 -------------------
@@ -214,7 +222,7 @@ The `Advanced Package Tool (APT) <https://en.wikipedia.org/wiki/APT_(software)>`
       .. code-block:: bash
 
          sudo apt update
-         sudo apt install build-essential ccache cmake g++ git libfftw3-mpi-dev libfftw3-dev libhdf5-openmpi-dev libopenmpi-dev pkg-config python3 python3-matplotlib python3-numpy python3-pandas python3-pip python3-scipy python3-venv
+         sudo apt install build-essential ccache cmake g++ git libfftw3-mpi-dev libfftw3-dev libhdf5-openmpi-dev libopenmpi-dev pkg-config python3 python3-matplotlib python3-mpi4py python3-numpy python3-pandas python3-pip python3-scipy python3-venv
 
          # optional:
          # for CUDA, either install
diff --git a/Docs/source/install/hpc/lassen.rst b/Docs/source/install/hpc/lassen.rst
index 1f03a3c117f..5726254652a 100644
--- a/Docs/source/install/hpc/lassen.rst
+++ b/Docs/source/install/hpc/lassen.rst
@@ -3,7 +3,7 @@
 Lassen (LLNL)
 =============
 
-The `Lassen V100 GPU cluster <https://hpc.llnl.gov/hardware/platforms/lassen>`_ is located at LLNL.
+The `Lassen V100 GPU cluster <https://hpc.llnl.gov/hardware/platforms/lassen>`__ is located at LLNL.
 
 
 Introduction
@@ -11,85 +11,170 @@ Introduction
 
 If you are new to this system, **please see the following resources**:
 
-* `LLNL user account <https://lc.llnl.gov/lorenz/mylc/mylc.cgi>`_
-* `Lassen user guide <https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system>`_
-* Batch system: `LSF <https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system#batch-system>`_
-* `Production directories <https://hpc.llnl.gov/hardware/file-systems>`_:
+* `LLNL user account <https://lc.llnl.gov/lorenz/mylc/mylc.cgi>`__ (login required)
+* `Lassen user guide <https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system>`__
+* Batch system: `LSF <https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system#batch-system>`__
+* `Jupyter service <https://lc.llnl.gov/jupyter>`__ (`documentation <https://lc.llnl.gov/confluence/display/LC/JupyterHub+and+Jupyter+Notebook>`__, login required)
+* `Production directories <https://hpc.llnl.gov/hardware/file-systems>`__:
 
   * ``/p/gpfs1/$(whoami)``: personal directory on the parallel filesystem
   * Note that the ``$HOME`` directory and the ``/usr/workspace/$(whoami)`` space are NFS mounted and *not* suitable for production quality data generation.
 
 
-Installation
-------------
+Login
+-----
 
-Use the following commands to download the WarpX source code and switch to the correct branch:
+.. note::
 
-.. code-block:: bash
+   Lassen is currently transitioning to RHEL8.
+   During this transition, first SSH into lassen and then ``ssh eatoss4`` next to work with the updated RHEL8/TOSS4 nodes.
 
-   git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
+   Approximately October 2023, the new software environment on these nodes will be the new default.
 
-We use the following modules and environments on the system (``$HOME/lassen_warpx.profile``).
 
-.. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_warpx.profile.example
-   :language: bash
-   :caption: You can copy this file from ``Tools/machines/lassen-llnl/lassen_warpx.profile.example``.
+.. _building-lassen-preparation:
+
+Preparation
+-----------
+
+Use the following commands to download the WarpX source code:
+
+.. code-block:: bash
 
-We recommend to store the above lines in a file, such as ``$HOME/lassen_warpx.profile``, and load it into your shell after a login:
+   git clone https://github.com/ECP-WarpX/WarpX.git /usr/workspace/${USER}/lassen/src/warpx
+
+We use system software modules, add environment hints and further dependencies via the file ``$HOME/lassen_v100_warpx.profile``.
+Create it now:
 
 .. code-block:: bash
 
-   source $HOME/lassen_warpx.profile
+   cp /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example $HOME/lassen_v100_warpx.profile
+
+.. dropdown:: Script Details
+   :color: light
+   :icon: info
+   :animate: fade-in-slide-down
 
-And since Lassen does not yet provide a module for them, install ADIOS2, BLAS++ and LAPACK++:
+   .. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example
+      :language: bash
+
+Edit the 2nd line of this script, which sets the ``export proj=""`` variable.
+For example, if you are member of the project ``nsldt``, then run ``vi $HOME/lassen_v100_warpx.profile``.
+Enter the edit mode by typing ``i`` and edit line 2 to read:
 
 .. code-block:: bash
 
-   # c-blosc (I/O compression)
-   git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git src/c-blosc
-   rm -rf src/c-blosc-lassen-build
-   cmake -S src/c-blosc -B src/c-blosc-lassen-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=$HOME/sw/lassen/c-blosc-1.21.1
-   cmake --build src/c-blosc-lassen-build --target install --parallel 16
-
-   # HDF5
-   git clone -b hdf5-1_14_1-2 https://github.com/HDFGroup/hdf5.git src/hdf5
-   rm -rf src/hdf5-lassen-build
-   cmake -S src/hdf5 -B src/hdf5-lassen-build -DBUILD_TESTING=OFF -DHDF5_ENABLE_PARALLEL=ON -DCMAKE_INSTALL_PREFIX=$HOME/sw/lassen/hdf5-1.14.1.2
-   cmake --build src/hdf5-lassen-build --target install --parallel 16
-
-   # ADIOS2
-   git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git src/adios2
-   rm -rf src/adios2-lassen-build
-   cmake -S src/adios2 -B src/adios2-lassen-build -DBUILD_TESTING=OFF -DADIOS2_BUILD_EXAMPLES=OFF -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_SST=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=$HOME/sw/lassen/adios2-2.8.3
-   cmake --build src/adios2-lassen-build --target install -j 16
-
-   # BLAS++ (for PSATD+RZ)
-   git clone https://github.com/icl-utk-edu/blaspp.git src/blaspp
-   rm -rf src/blaspp-lassen-build
-   cmake -S src/blaspp -B src/blaspp-lassen-build -Duse_openmp=ON -Dgpu_backend=cuda -Duse_cmake_find_blas=ON -DBLA_VENDOR=IBMESSL -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=$HOME/sw/lassen/blaspp-master
-   cmake --build src/blaspp-lassen-build --target install --parallel 16
-
-   # LAPACK++ (for PSATD+RZ)
-   git clone https://github.com/icl-utk-edu/lapackpp.git src/lapackpp
-   rm -rf src/lapackpp-lassen-build
-   CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S src/lapackpp -B src/lapackpp-lassen-build -Duse_cmake_find_lapack=ON -DBLA_VENDOR=IBMESSL -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=$HOME/sw/lassen/lapackpp-master -DLAPACK_LIBRARIES=/usr/lib64/liblapack.so
-   cmake --build src/lapackpp-lassen-build --target install --parallel 16
-
-Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile:
+   export proj="nsldt"
+
+Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit).
+
+.. important::
+
+   Now, and as the first step on future logins to lassen, activate these environment settings:
+
+   .. code-block:: bash
+
+      source $HOME/lassen_v100_warpx.profile
+
+Finally, since lassen does not yet provide software modules for some of our dependencies, install them once:
 
 .. code-block:: bash
 
-   cd $HOME/src/warpx
+   bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_dependencies.sh
+   source /usr/workspace/${USER}/lassen/gpu/venvs/warpx-lassen/bin/activate
+
+.. dropdown:: Script Details
+   :color: light
+   :icon: info
+   :animate: fade-in-slide-down
+
+   .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_dependencies.sh
+      :language: bash
+
+.. dropdown:: AI/ML Dependencies (Optional)
+   :animate: fade-in-slide-down
+
+   If you plan to run AI/ML workflows depending on pyTorch, run the next step as well.
+   This will take a while and should be skipped if not needed.
 
+   .. code-block:: bash
+
+      runNode bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_ml.sh
+
+   .. dropdown:: Script Details
+      :color: light
+      :icon: info
+      :animate: fade-in-slide-down
+
+      .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_ml.sh
+         :language: bash
+
+   For `optimas dependencies <https://github.com/optimas-org/optimas>`__ (incl. scikit-learn), plan another hour of build time:
+
+   .. code-block:: bash
+
+      python3 -m pip install -r /usr/workspace/${USER}/lassen/src/warpx/Tools/optimas/requirements.txt
+
+
+.. _building-lassen-compilation:
+
+Compilation
+-----------
+
+Use the following :ref:`cmake commands <building-cmake>` to compile the application executable:
+
+.. code-block:: bash
+
+   cd /usr/workspace/${USER}/lassen/src/warpx
    rm -rf build_lassen
-   cmake -S . -B build_lassen -DWarpX_COMPUTE=CUDA -DWarpX_DIMS="1;2;RZ;3" -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON
-   cmake --build build_lassen -j 10
 
-The other :ref:`general compile-time options <building-cmake>` apply as usual.
+   cmake -S . -B build_lassen -DWarpX_COMPUTE=CUDA -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_DIMS="1;2;RZ;3"
+   cmake --build build_lassen -j 8
+
+The WarpX application executables are now in ``/usr/workspace/${USER}/lassen/src/warpx/build_lassen/bin/``.
+Additionally, the following commands will install WarpX as a Python module:
+
+.. code-block:: bash
+
+   rm -rf build_lassen_py
+
+   cmake -S . -B build_lassen_py -DWarpX_COMPUTE=CUDA -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_APP=OFF -DWarpX_PYTHON=ON -DWarpX_DIMS="1;2;RZ;3"
+   cmake --build build_lassen_py -j 8 --target pip_install
+
+Now, you can :ref:`submit lassen compute jobs <running-cpp-lassen>` for WarpX :ref:`Python (PICMI) scripts <usage-picmi>` (:ref:`example scripts <usage-examples>`).
+Or, you can use the WarpX executables to submit lassen jobs (:ref:`example inputs <usage-examples>`).
+For executables, you can reference their location in your :ref:`job script <running-cpp-lassen>` or copy them to a location in ``$PROJWORK/$proj/``.
 
-**That's it!**
-WarpX executables for 1D, 2D, RZ and 3D are now in ``build_lassen/bin/`` and :ref:`can be run <running-cpp-lassen>` with the respective :ref:`example inputs files <usage-examples>`.
-Most people execute the binary directly or copy it out to a location in ``/p/gpfs1/$(whoami)``.
+
+.. _building-lassen-update:
+
+Update WarpX & Dependencies
+---------------------------
+
+If you already installed WarpX in the past and want to update it, start by getting the latest source code:
+
+.. code-block:: bash
+
+   cd /usr/workspace/${USER}/lassen/src/warpx
+
+   # read the output of this command - does it look ok?
+   git status
+
+   # get the latest WarpX source code
+   git fetch
+   git pull
+
+   # read the output of these commands - do they look ok?
+   git status
+   git log     # press q to exit
+
+And, if needed,
+
+- :ref:`update the lassen_v100_warpx.profile file <building-lassen-preparation>`,
+- log out and into the system, activate the now updated environment profile as usual,
+- :ref:`execute the dependency install scripts <building-lassen-preparation>`.
+
+As a last step, clean the build directory ``rm -rf /usr/workspace/${USER}/lassen/src/warpx/build_lassen`` and rebuild WarpX.
 
 
 .. _running-cpp-lassen:
@@ -146,3 +231,6 @@ Known System Issues
    .. code-block:: bash
 
       export OMPI_MCA_coll_ibm_skip_allgatherv=true
+
+As part of the same `CORAL acquisition program <https://doi.org/10.1109/SC.2018.00055>`__, Lassen is very similar to the design of Summit (OLCF).
+Thus, when encountering new issues it is worth checking also the :ref:`known Summit issues and work-arounds <building-summit-issues>`.
diff --git a/Docs/source/install/hpc/quartz.rst b/Docs/source/install/hpc/quartz.rst
index 8902ec2cf45..de7c5ace848 100644
--- a/Docs/source/install/hpc/quartz.rst
+++ b/Docs/source/install/hpc/quartz.rst
@@ -11,52 +11,138 @@ Introduction
 
 If you are new to this system, **please see the following resources**:
 
-* `LLNL user account <https://lc.llnl.gov/lorenz/mylc/mylc.cgi>`_
+* `LLNL user account <https://lc.llnl.gov/lorenz/mylc/mylc.cgi>`__ (login required)
 * `Quartz user guide <https://computing.llnl.gov/tutorials/linux_clusters/>`_
 * Batch system: `Slurm <https://computing.llnl.gov/tutorials/moab/>`_
+* `Jupyter service <https://lc.llnl.gov/jupyter>`__ (`documentation <https://lc.llnl.gov/confluence/display/LC/JupyterHub+and+Jupyter+Notebook>`__, login required)
 * `Production directories <https://hpc.llnl.gov/hardware/file-systems>`_:
 
   * ``/p/lustre1/$(whoami)`` and ``/p/lustre2/$(whoami)``: personal directory on the parallel filesystem
   * Note that the ``$HOME`` directory and the ``/usr/workspace/$(whoami)`` space are NFS mounted and *not* suitable for production quality data generation.
 
 
-Installation
-------------
+.. _building-quartz-preparation:
+
+Preparation
+-----------
 
-Use the following commands to download the WarpX source code and switch to the correct branch:
+Use the following commands to download the WarpX source code:
 
 .. code-block:: bash
 
    git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
 
-We use the following modules and environments on the system (``$HOME/quartz_warpx.profile``).
+We use system software modules, add environment hints and further dependencies via the file ``$HOME/quartz_warpx.profile``.
+Create it now:
 
-.. literalinclude:: ../../../../Tools/machines/quartz-llnl/quartz_warpx.profile.example
-   :language: bash
-   :caption: You can copy this file from ``Tools/machines/quartz-llnl/quartz_warpx.profile.example``.
+.. code-block:: bash
+
+   cp $HOME/src/warpx/Tools/machines/quartz-llnl/quartz/quartz_warpx.profile.example $HOME/quartz_warpx.profile
+
+.. dropdown:: Script Details
+   :color: light
+   :icon: info
+   :animate: fade-in-slide-down
+
+   .. literalinclude:: ../../../../Tools/machines/quartz-llnl/quartz/quartz_warpx.profile.example
+      :language: bash
+
+Edit the 2nd line of this script, which sets the ``export proj=""`` variable.
+For example, if you are member of the project ``tps``, then run ``vi $HOME/quartz_warpx.profile``.
+Enter the edit mode by typing ``i`` and edit line 2 to read:
+
+.. code-block:: bash
+
+   export proj="tps"
+
+Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit).
+
+.. important::
+
+   Now, and as the first step on future logins to Quartz, activate these environment settings:
+
+   .. code-block:: bash
+
+      source $HOME/quartz_warpx.profile
+
+Finally, since Quartz does not yet provide software modules for some of our dependencies, install them once:
+
+.. code-block:: bash
+
+   bash $HOME/src/warpx/Tools/machines/quartz-llnl/install_dependencies.sh
+   source /usr/workspace/${USER}/quartz/venvs/warpx-quartz/bin/activate
 
-We recommend to store the above lines in a file, such as ``$HOME/quartz_warpx.profile``, and load it into your shell after a login:
+.. dropdown:: Script Details
+   :color: light
+   :icon: info
+   :animate: fade-in-slide-down
+
+   .. literalinclude:: ../../../../Tools/machines/quartz-llnl/install_dependencies.sh
+      :language: bash
+
+
+.. _building-quartz-compilation:
+
+Compilation
+-----------
+
+Use the following :ref:`cmake commands <building-cmake>` to compile the application executable:
+
+.. code-block:: bash
+
+   cd $HOME/src/warpx
+   rm -rf build_quartz
+
+   cmake -S . -B build_quartz -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_DIMS="1;2;RZ;3"
+   cmake --build build_quartz -j 6
+
+The WarpX application executables are now in ``$HOME/src/warpx/build_quartz/bin/``.
+Additionally, the following commands will install WarpX as a Python module:
 
 .. code-block:: bash
 
-   source $HOME/quartz_warpx.profile
+   rm -rf build_quartz_py
+
+   cmake -S . -B build_quartz_py -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_APP=OFF -DWarpX_PYTHON=ON -DWarpX_DIMS="1;2;RZ;3"
+   cmake --build build_quartz_py -j 6 --target pip_install
+
+Now, you can :ref:`submit Quartz compute jobs <running-cpp-quartz>` for WarpX :ref:`Python (PICMI) scripts <usage-picmi>` (:ref:`example scripts <usage-examples>`).
+Or, you can use the WarpX executables to submit Quartz jobs (:ref:`example inputs <usage-examples>`).
+For executables, you can reference their location in your :ref:`job script <running-cpp-quartz>` or copy them to a location in ``$PROJWORK/$proj/``.
+
+
+.. _building-quartz-update:
 
-Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile:
+Update WarpX & Dependencies
+---------------------------
+
+If you already installed WarpX in the past and want to update it, start by getting the latest source code:
 
 .. code-block:: bash
 
    cd $HOME/src/warpx
-   rm -rf build
 
-   cmake -S . -B build -DWarpX_DIMS="1;2;3" -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON
-   cmake --build build -j 6
+   # read the output of this command - does it look ok?
+   git status
+
+   # get the latest WarpX source code
+   git fetch
+   git pull
+
+   # read the output of these commands - do they look ok?
+   git status
+   git log     # press q to exit
+
+And, if needed,
+
+- :ref:`update the quartz_warpx.profile file <building-quartz-preparation>`,
+- log out and into the system, activate the now updated environment profile as usual,
+- :ref:`execute the dependency install scripts <building-quartz-preparation>`.
 
-The other :ref:`general compile-time options <building-cmake>` apply as usual.
+As a last step, clean the build directory ``rm -rf $HOME/src/warpx/build_quartz`` and rebuild WarpX.
 
-**That's it!**
-A 3D WarpX executable is now in ``build/bin/`` and :ref:`can be run <running-cpp-quartz-CPUs>` with a :ref:`3D example inputs file <usage-examples>`.
-Most people execute the binary directly or copy it out to a location in ``/p/lustre1/$(whoami)``.
 
+.. _running-cpp-quartz:
 
 Running
 -------
diff --git a/Docs/source/usage/parameters.rst b/Docs/source/usage/parameters.rst
index 177e08090cd..27d7682899d 100644
--- a/Docs/source/usage/parameters.rst
+++ b/Docs/source/usage/parameters.rst
@@ -2472,7 +2472,10 @@ In-situ capabilities can be used by turning on Sensei or Ascent (provided they a
 BackTransformed Diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-``BackTransformed`` diag type are used when running a simulation in a boosted frame, to reconstruct output data to the lab frame. This option can be set using ``<diag_name>.diag_type = BackTransformed``. Additional options for this diagnostic include:
+``BackTransformed`` diag type are used when running a simulation in a boosted frame, to reconstruct output data to the lab frame. This option can be set using ``<diag_name>.diag_type = BackTransformed``. We support the following list of options from `Full Diagnostics`_
+    ``<diag_name>.format``, ``<diag_name>.openpmd_backend``, ``<diag_name>.dump_rz_modes``, ``<diag_name>.file_prefix``, ``<diag_name>.diag_lo``, ``<diag_name>.diag_hi``, ``<diag_name>.write_species``, ``<diag_name>.species``.
+
+    Additional options for this diagnostic include:
 
 * ``<diag_name>.num_snapshots_lab`` (`integer`)
     Only used when ``<diag_name>.diag_type`` is ``BackTransformed``.
@@ -2985,6 +2988,57 @@ Reduced Diagnostics
         1 or 0, it is possible to compute the charge on only some part of the
         embedded boundary.
 
+    * ``ColliderRelevant``
+        This diagnostics computes properties of two colliding beams that are relevant for particle colliders.
+        Two species must be specified. Photon species are not supported yet.
+        It is assumed that the two species propagate and collide along the ``z`` direction.
+        The output columns (for 3D-XYZ) are the following, where the minimum, average and maximum
+        are done over the whole species:
+
+        [0]: simulation step (iteration).
+
+        [1]: time (s).
+
+        [2]: time derivative of the luminosity (:math:`m^{-2}s^{-1}`) defined as:
+
+        .. math::
+
+            \frac{dL}{dt} = 2 c \iiint  n_1(x,y,z) n_2(x,y,z) dx dy dz
+
+        where :math:`n_1`, :math:`n_2` are the number densities of the two colliding species.
+
+        [3], [4], [5]: If, QED is enabled, the minimum, average and maximum values of the quantum parameter :math:`\chi` of species 1:
+        :math:`\chi_{min}`,
+        :math:`\langle \chi \rangle`,
+        :math:`\chi_{max}`.
+        If QED is not enabled, these numbers are not computed.
+
+        [6], [7]: The average and standard deviation of the values of the transverse coordinate :math:`x` (m) of species 1:
+        :math:`\langle x \rangle`,
+        :math:`\sqrt{\langle x- \langle x \rangle \rangle^2}`.
+
+        [8], [9]: The average and standard deviation of the values of the transverse coordinate :math:`y` (m) of species 1:
+        :math:`\langle y \rangle`,
+        :math:`\sqrt{\langle y- \langle y \rangle \rangle^2}`.
+
+        [10], [11], [12], [13]: The minimum, average, maximum and standard deviation of the angle :math:`\theta_x = \angle (u_x, u_z)` (rad) of species 1:
+        :math:`{\theta_x}_{min}`,
+        :math:`\langle \theta_x \rangle`,
+        :math:`{\theta_x}_{max}`,
+        :math:`\sqrt{\langle \theta_x- \langle \theta_x \rangle \rangle^2}`.
+
+        [14], [15], [16], [17]:  The minimum, average, maximum and standard deviation of the angle :math:`\theta_y = \angle (u_y, u_z)` (rad) of species 1:
+        :math:`{\theta_y}_{min}`,
+        :math:`\langle \theta_y \rangle`,
+        :math:`{\theta_y}_{max}`,
+        :math:`\sqrt{\langle \theta_y- \langle \theta_y \rangle \rangle^2}`.
+
+        [18], ..., [32]: Analogous quantities for species 2.
+
+        For 2D-XZ, :math:`y`-related quantities are not outputted.
+        For 1D-Z, :math:`x`-related and :math:`y`-related quantities are not outputted.
+        RZ geometry is not supported yet.
+
 * ``<reduced_diags_name>.intervals`` (`string`)
     Using the `Intervals Parser`_ syntax, this string defines the timesteps at which reduced
     diagnostics are written to file.
diff --git a/Examples/Tests/collider_relevant_diags/analysis_multiple_particles.py b/Examples/Tests/collider_relevant_diags/analysis_multiple_particles.py
new file mode 100755
index 00000000000..b23bb69d52c
--- /dev/null
+++ b/Examples/Tests/collider_relevant_diags/analysis_multiple_particles.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+
+import numpy as np
+import openpmd_api as io
+import pandas as pd
+from scipy.constants import c, e, hbar, m_e
+
+sys.path.append('../../../../warpx/Regression/Checksum/')
+import checksumAPI
+
+sys.path.append('../../../../warpx/Tools/Parser/')
+from input_file_parser import parse_input_file
+
+E_crit = m_e**2*c**3/(e*hbar)
+B_crit = m_e**2*c**2/(e*hbar)
+
+def chi(ux, uy, uz, Ex, Ey, Ez, Bx, By, Bz):
+    gamma = np.sqrt(1.+ux**2+uy**2+uz**2)
+    vx = ux / gamma * c
+    vy = uy / gamma * c
+    vz = uz / gamma * c
+    tmp1x = Ex + vy*Bz - vz*By
+    tmp1y = Ey - vx*Bz + vz*Bx
+    tmp1z = Ez + vx*By - vy*Bx
+    tmp2 = (Ex*vx + Ey*vy + Ez*vz)/c
+    chi = gamma/E_crit*np.sqrt(tmp1x**2+tmp1y**2+tmp1z**2 - tmp2**2)
+    return chi
+
+def dL_dt():
+    series = io.Series("diags/diag2/openpmd_%T.h5",io.Access.read_only)
+    iterations = np.asarray(series.iterations)
+    lumi = []
+    for n,ts in enumerate(iterations):
+        it = series.iterations[ts]
+        rho1 = it.meshes["rho_beam_e"]
+        dV = np.prod(rho1.grid_spacing)
+        rho1 = it.meshes["rho_beam_e"][io.Mesh_Record_Component.SCALAR].load_chunk()
+        rho2 = it.meshes["rho_beam_p"][io.Mesh_Record_Component.SCALAR].load_chunk()
+        beam_e_charge = it.particles["beam_e"]["charge"][io.Mesh_Record_Component.SCALAR].load_chunk()
+        beam_p_charge = it.particles["beam_p"]["charge"][io.Mesh_Record_Component.SCALAR].load_chunk()
+        q1 = beam_e_charge[0]
+        if not np.all(beam_e_charge == q1):
+            sys.exit('beam_e particles do not have the same charge')
+        q2 = beam_p_charge[0]
+        if not np.all(beam_p_charge == q2):
+            sys.exit('beam_p particles do not have the same charge')
+        series.flush()
+        n1 = rho1/q1
+        n2 = rho2/q2
+        l = 2*np.sum(n1*n2)*dV*c
+        lumi.append(l)
+    return lumi
+
+input_dict = parse_input_file('inputs_3d_multiple_particles')
+Ex, Ey, Ez = [float(w) for w in input_dict['particles.E_external_particle']]
+Bx, By, Bz = [float(w) for w in input_dict['particles.B_external_particle']]
+
+CollDiagFname='diags/reducedfiles/ColliderRelevant_beam_e_beam_p.txt'
+df = pd.read_csv(CollDiagFname, sep=" ", header=0)
+
+for species in ['beam_p', 'beam_e']:
+
+    ux1, ux2, ux3 = [float(w) for w in input_dict[f'{species}.multiple_particles_ux']]
+    uy1, uy2, uy3 = [float(w) for w in input_dict[f'{species}.multiple_particles_uy']]
+    uz1, uz2, uz3 = [float(w) for w in input_dict[f'{species}.multiple_particles_uz']]
+
+    x = np.array([float(w) for w in input_dict[f'{species}.multiple_particles_pos_x']])
+    y = np.array([float(w) for w in input_dict[f'{species}.multiple_particles_pos_y']])
+
+    w = np.array([float(w) for w in input_dict[f'{species}.multiple_particles_weight']])
+
+    CHI_ANALYTICAL = np.array([chi(ux1, uy1, uz1, Ex, Ey, Ez, Bx, By, Bz),
+                               chi(ux2, uy2, uz2, Ex, Ey, Ez, Bx, By, Bz),
+                               chi(ux3, uy3, uz3, Ex, Ey, Ez, Bx, By, Bz)])
+    THETAX = np.array([np.arctan2(ux1, uz1), np.arctan2(ux2, uz2), np.arctan2(ux3, uz3)])
+    THETAY = np.array([np.arctan2(uy1, uz1), np.arctan2(uy2, uz2), np.arctan2(uy3, uz3)])
+
+    # CHI MAX
+    fname=f'diags/reducedfiles/ParticleExtrema_{species}.txt'
+    chimax_pe = np.loadtxt(fname)[:,19]
+    chimax_cr = df[[col for col in df.columns if f'chi_max_{species}' in col]].to_numpy()
+    assert np.allclose(np.max(CHI_ANALYTICAL), chimax_cr, rtol=1e-8)
+    assert np.allclose(chimax_pe, chimax_cr, rtol=1e-8)
+
+    # CHI MIN
+    fname=f'diags/reducedfiles/ParticleExtrema_{species}.txt'
+    chimin_pe = np.loadtxt(fname)[:,18]
+    chimin_cr = df[[col for col in df.columns if f'chi_min_{species}' in col]].to_numpy()
+    assert np.allclose(np.min(CHI_ANALYTICAL), chimin_cr, rtol=1e-8)
+    assert np.allclose(chimin_pe, chimin_cr, rtol=1e-8)
+
+    # CHI AVERAGE
+    chiave_cr = df[[col for col in df.columns if f'chi_ave_{species}' in col]].to_numpy()
+    assert np.allclose(np.average(CHI_ANALYTICAL, weights=w), chiave_cr, rtol=1e-8)
+
+    # X AVE STD
+    x_ave_cr = df[[col for col in df.columns if f']x_ave_{species}' in col]].to_numpy()
+    x_std_cr = df[[col for col in df.columns if f']x_std_{species}' in col]].to_numpy()
+    x_ave = np.average(x, weights=w)
+    x_std = np.sqrt(np.average((x-x_ave)**2, weights=w))
+    assert np.allclose(x_ave, x_ave_cr, rtol=1e-8)
+    assert np.allclose(x_std, x_std_cr, rtol=1e-8)
+
+    # Y AVE STD
+    y_ave_cr = df[[col for col in df.columns if f']y_ave_{species}' in col]].to_numpy()
+    y_std_cr = df[[col for col in df.columns if f']y_std_{species}' in col]].to_numpy()
+    y_ave = np.average(y, weights=w)
+    y_std = np.sqrt(np.average((y-y_ave)**2, weights=w))
+    assert np.allclose(y_ave, y_ave_cr, rtol=1e-8)
+    assert np.allclose(y_std, y_std_cr, rtol=1e-8)
+
+    # THETA X MIN AVE MAX STD
+    thetax_min_cr = df[[col for col in df.columns if f'theta_x_min_{species}' in col]].to_numpy()
+    thetax_ave_cr = df[[col for col in df.columns if f'theta_x_ave_{species}' in col]].to_numpy()
+    thetax_max_cr = df[[col for col in df.columns if f'theta_x_max_{species}' in col]].to_numpy()
+    thetax_std_cr = df[[col for col in df.columns if f'theta_x_std_{species}' in col]].to_numpy()
+    thetax_min = np.min(THETAX)
+    thetax_ave = np.average(THETAX, weights=w)
+    thetax_max = np.max(THETAX)
+    thetax_std = np.sqrt(np.average((THETAX-thetax_ave)**2, weights=w))
+    assert np.allclose(thetax_min, thetax_min_cr, rtol=1e-8)
+    assert np.allclose(thetax_ave, thetax_ave_cr, rtol=1e-8)
+    assert np.allclose(thetax_max, thetax_max_cr, rtol=1e-8)
+    assert np.allclose(thetax_std, thetax_std_cr, rtol=1e-8)
+
+    # THETA Y MIN AVE MAX STD
+    thetay_min_cr = df[[col for col in df.columns if f'theta_y_min_{species}' in col]].to_numpy()
+    thetay_ave_cr = df[[col for col in df.columns if f'theta_y_ave_{species}' in col]].to_numpy()
+    thetay_max_cr = df[[col for col in df.columns if f'theta_y_max_{species}' in col]].to_numpy()
+    thetay_std_cr = df[[col for col in df.columns if f'theta_y_std_{species}' in col]].to_numpy()
+    thetay_min = np.min(THETAY)
+    thetay_ave = np.average(THETAY, weights=w)
+    thetay_max = np.max(THETAY)
+    thetay_std = np.sqrt(np.average((THETAY-thetay_ave)**2, weights=w))
+    assert np.allclose(thetay_min, thetay_min_cr, rtol=1e-8)
+    assert np.allclose(thetay_ave, thetay_ave_cr, rtol=1e-8)
+    assert np.allclose(thetay_max, thetay_max_cr, rtol=1e-8)
+    assert np.allclose(thetay_std, thetay_std_cr, rtol=1e-8)
+
+    # dL/dt
+    dL_dt_cr = df[[col for col in df.columns if 'dL_dt' in col]].to_numpy()
+    assert np.allclose(dL_dt_cr, dL_dt(), rtol=1e-8)
+
+# Checksum analysis
+plotfile = sys.argv[1]
+test_name = os.path.split(os.getcwd())[1]
+checksumAPI.evaluate_checksum(test_name, plotfile)
diff --git a/Examples/Tests/collider_relevant_diags/inputs_3d_multiple_particles b/Examples/Tests/collider_relevant_diags/inputs_3d_multiple_particles
new file mode 100644
index 00000000000..1efc68c33b0
--- /dev/null
+++ b/Examples/Tests/collider_relevant_diags/inputs_3d_multiple_particles
@@ -0,0 +1,130 @@
+#################################
+########## MY CONSTANTS #########
+#################################
+my_constants.nx = 8
+my_constants.ny = 8
+my_constants.nz = 8
+
+#################################
+####### GENERAL PARAMETERS ######
+#################################
+max_step = 1
+amr.n_cell = nx ny nz
+amr.max_grid_size = 4
+amr.blocking_factor = 4
+amr.max_level = 0
+geometry.dims = 3
+geometry.prob_lo = 0 0 0
+geometry.prob_hi =  8 8 8
+particles.do_tiling = 0
+warpx.use_filter = 0
+
+#################################
+######## BOUNDARY CONDITION #####
+#################################
+boundary.field_lo = periodic periodic periodic
+boundary.field_hi = periodic periodic periodic
+boundary.particle_lo = periodic periodic periodic
+boundary.particle_hi = periodic periodic periodic
+
+#################################
+############ NUMERICS ###########
+#################################
+algo.maxwell_solver = ckc
+warpx.cfl = 0.99
+algo.particle_shape = 1
+
+#################################
+############ FIELDS #############
+#################################
+particles.E_ext_particle_init_style = constant
+particles.B_ext_particle_init_style = constant
+particles.E_external_particle = 10000. 0. 0.
+particles.B_external_particle = 0. 5000. 0.
+
+#################################
+########### PARTICLES ###########
+#################################
+particles.species_names = pho beam_p beam_e
+particles.photon_species = pho
+
+beam_e.species_type = electron
+beam_e.injection_style = MultipleParticles
+beam_e.multiple_particles_pos_x = 4.5  3.5  0.5
+beam_e.multiple_particles_pos_y = 4.5  2.5  1.5
+beam_e.multiple_particles_pos_z = 4.5  1.5  1.5
+beam_e.multiple_particles_ux = 0.3 0.2 0.1
+beam_e.multiple_particles_uy = 0.4 -0.3 -0.1
+beam_e.multiple_particles_uz = 0.3 0.1 -10.
+beam_e.multiple_particles_weight = 1. 2 3
+beam_e.initialize_self_fields = 0
+beam_e.self_fields_required_precision = 5e-10
+beam_e.do_qed_quantum_sync = 1
+beam_e.qed_quantum_sync_phot_product_species = pho
+beam_e.do_not_push = 1
+beam_e.do_not_deposit = 1
+
+beam_p.species_type = positron
+beam_p.injection_style = MultipleParticles
+beam_p.multiple_particles_pos_x = 4.5 3.5 0.5
+beam_p.multiple_particles_pos_y = 4.5 2.5 1.5
+beam_p.multiple_particles_pos_z = 4.5 1.5 1.5
+beam_p.multiple_particles_ux = 0.3 0.2 0.1
+beam_p.multiple_particles_uy = 0.4 -0.3 -0.1
+beam_p.multiple_particles_uz = 0.3 0.1 -10.
+beam_p.multiple_particles_weight = 1. 2 3
+beam_p.initialize_self_fields = 0
+beam_p.self_fields_required_precision = 5e-10
+beam_p.do_qed_quantum_sync = 1
+beam_p.qed_quantum_sync_phot_product_species = pho
+beam_p.do_not_push = 1
+beam_p.do_not_deposit = 1
+
+pho.species_type = photon
+pho.injection_style = none
+
+#################################
+############# QED ###############
+#################################
+qed_qs.photon_creation_energy_threshold = 0.
+qed_qs.lookup_table_mode = builtin
+qed_qs.chi_min = 1.e-3
+warpx.do_qed_schwinger = 0
+
+#################################
+######### DIAGNOSTICS ###########
+#################################
+# FULL
+diagnostics.diags_names = diag1 diag2
+
+diag1.intervals = 1
+diag1.diag_type = Full
+diag1.write_species = 1
+diag1.fields_to_plot = Ex Ey Ez Bx By Bz jx jy jz rho_beam_e rho_beam_p rho
+diag1.species = pho beam_e beam_p
+diag1.format = plotfile
+#diag1.dump_last_timestep = 1
+
+diag2.intervals = 1
+diag2.diag_type = Full
+diag2.write_species = 1
+diag2.fields_to_plot = Ex Ey Ez Bx By Bz jx jy jz rho_beam_e rho_beam_p rho
+diag2.species = pho beam_e beam_p
+diag2.format = openpmd
+diag2.openpmd_backend = h5
+#diag2.dump_last_timestep = 1
+
+# REDUCED
+warpx.reduced_diags_names = ParticleExtrema_beam_e ParticleExtrema_beam_p ColliderRelevant_beam_e_beam_p
+
+ColliderRelevant_beam_e_beam_p.type = ColliderRelevant
+ColliderRelevant_beam_e_beam_p.intervals = 1
+ColliderRelevant_beam_e_beam_p.species =beam_e beam_p
+
+ParticleExtrema_beam_e.type = ParticleExtrema
+ParticleExtrema_beam_e.intervals = 1
+ParticleExtrema_beam_e.species = beam_e
+
+ParticleExtrema_beam_p.type = ParticleExtrema
+ParticleExtrema_beam_p.intervals = 1
+ParticleExtrema_beam_p.species = beam_p
diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py
index d5d00fb3090..ee4422daca6 100644
--- a/Python/pywarpx/picmi.py
+++ b/Python/pywarpx/picmi.py
@@ -128,6 +128,16 @@ class Species(picmistandard.PICMI_Species):
 
     warpx_save_particles_at_eb: bool, default=False
         Whether to save particles lost at the embedded boundary
+
+    warpx_do_resampling: bool, default=False
+        Whether particles will be resampled
+
+    warpx_resampling_trigger_intervals: bool, default=0
+        Timesteps at which to resample
+
+    warpx_resampling_trigger_max_avg_ppc: int, default=infinity
+        Resampling will be done when the average number of
+        particles per cell exceeds this number
     """
     def init(self, kw):
 
@@ -203,6 +213,11 @@ def init(self, kw):
         self.save_particles_at_zhi = kw.pop('warpx_save_particles_at_zhi', None)
         self.save_particles_at_eb = kw.pop('warpx_save_particles_at_eb', None)
 
+        # Resampling settings
+        self.do_resampling = kw.pop('warpx_do_resampling', None)
+        self.resampling_trigger_intervals = kw.pop('warpx_resampling_trigger_intervals', None)
+        self.resampling_triggering_max_avg_ppc = kw.pop('warpx_resampling_trigger_max_avg_ppc', None)
+
     def initialize_inputs(self, layout,
                           initialize_self_fields = False,
                           injection_plane_position = None,
@@ -238,7 +253,10 @@ def initialize_inputs(self, layout,
                                              do_not_deposit = self.do_not_deposit,
                                              do_not_push = self.do_not_push,
                                              do_not_gather = self.do_not_gather,
-                                             random_theta = self.random_theta)
+                                             random_theta = self.random_theta,
+                                             do_resampling=self.do_resampling,
+                                             resampling_trigger_intervals=self.resampling_trigger_intervals,
+                                             resampling_trigger_max_avg_ppc=self.resampling_triggering_max_avg_ppc)
 
         # add reflection models
         self.species.add_new_attr("reflection_model_xlo(E)", self.reflection_model_xlo)
@@ -277,6 +295,10 @@ def initialize_inputs(self, layout,
 
 
 class GaussianBunchDistribution(picmistandard.PICMI_GaussianBunchDistribution):
+    def init(self, kw):
+        self.do_symmetrize = kw.pop('warpx_do_symmetrize', None)
+        self.symmetrization_order = kw.pop('warpx_symmetrization_order', None)
+
     def initialize_inputs(self, species_number, layout, species, density_scale):
         species.injection_style = "gaussian_beam"
         species.x_m = self.centroid_position[0]
@@ -329,6 +351,9 @@ def initialize_inputs(self, species_number, layout, species, density_scale):
             species.uy = self.centroid_velocity[1]/constants.c
             species.uz = self.centroid_velocity[2]/constants.c
 
+        species.do_symmetrize = self.do_symmetrize
+        species.symmetrization_order = self.symmetrization_order
+
 
 class DensityDistributionBase(object):
     """This is a base class for several predefined density distributions. It
@@ -1655,6 +1680,9 @@ class Simulation(picmistandard.PICMI_Simulation):
     warpx_amrex_the_arena_init_size: long int, optional
         The amount of memory in bytes to allocate in the Arena.
 
+    warpx_amrex_use_gpu_aware_mpi: bool, optional
+        Whether to use GPU-aware MPI communications
+
     warpx_zmax_plasma_to_compute_max_step: float, optional
         Sets the simulation run time based on the maximum z value
 
@@ -1673,6 +1701,12 @@ class Simulation(picmistandard.PICMI_Simulation):
 
     warpx_checkpoint_signals: list of strings
         Signals on which to write out a checkpoint
+
+    warpx_numprocs: list of ints (1 in 1D, 2 in 2D, 3 in 3D)
+        Domain decomposition on the coarsest level.
+        The domain will be chopped into the exact number of pieces in each dimension as specified by this parameter.
+        https://warpx.readthedocs.io/en/latest/usage/parameters.html#distribution-across-mpi-ranks-and-parallelization
+        https://warpx.readthedocs.io/en/latest/usage/domain_decomposition.html#simple-method
     """
 
     # Set the C++ WarpX interface (see _libwarpx.LibWarpX) as an extension to
@@ -1708,6 +1742,7 @@ def init(self, kw):
         self.amr_restart = kw.pop('warpx_amr_restart', None)
         self.amrex_the_arena_is_managed = kw.pop('warpx_amrex_the_arena_is_managed', None)
         self.amrex_the_arena_init_size = kw.pop('warpx_amrex_the_arena_init_size', None)
+        self.amrex_use_gpu_aware_mpi = kw.pop('warpx_amrex_use_gpu_aware_mpi', None)
         self.zmax_plasma_to_compute_max_step = kw.pop('warpx_zmax_plasma_to_compute_max_step', None)
         self.compute_max_step_from_btd = kw.pop('warpx_compute_max_step_from_btd', None)
 
@@ -1716,6 +1751,7 @@ def init(self, kw):
 
         self.break_signals = kw.pop('warpx_break_signals', None)
         self.checkpoint_signals = kw.pop('warpx_checkpoint_signals', None)
+        self.numprocs = kw.pop('warpx_numprocs', None)
 
         self.inputs_initialized = False
         self.warpx_initialized = False
@@ -1766,6 +1802,8 @@ def initialize_inputs(self):
         pywarpx.warpx.break_signals = self.break_signals
         pywarpx.warpx.checkpoint_signals = self.checkpoint_signals
 
+        pywarpx.warpx.numprocs = self.numprocs
+
         particle_shape = self.particle_shape
         for s in self.species:
             if s.particle_shape is not None:
@@ -1835,6 +1873,9 @@ def initialize_inputs(self):
         if self.amrex_the_arena_init_size is not None:
             pywarpx.amrex.the_arena_init_size = self.amrex_the_arena_init_size
 
+        if self.amrex_use_gpu_aware_mpi is not None:
+            pywarpx.amrex.use_gpu_aware_mpi = self.amrex_use_gpu_aware_mpi
+
     def initialize_warpx(self, mpi_comm=None):
         if self.warpx_initialized:
             return
@@ -2161,6 +2202,7 @@ def initialize_inputs(self):
             variables = list(variables)
             variables.sort()
 
+        # species list
         if np.iterable(self.species):
             species_list = self.species
         else:
@@ -2204,6 +2246,10 @@ class LabFrameFieldDiagnostic(picmistandard.PICMI_LabFrameFieldDiagnostic,
     warpx_file_prefix: string, optional
         Passed to <diagnostic name>.file_prefix
 
+    warpx_intervals: integer or string
+        Selects the snapshots to be made, instead of using "num_snapshots" which
+        makes all snapshots. "num_snapshots" is ignored.
+
     warpx_file_min_digits: integer, optional
         Passed to <diagnostic name>.file_min_digits
 
@@ -2223,6 +2269,7 @@ def init(self, kw):
         self.format = kw.pop('warpx_format', None)
         self.openpmd_backend = kw.pop('warpx_openpmd_backend', None)
         self.file_prefix = kw.pop('warpx_file_prefix', None)
+        self.intervals = kw.pop('warpx_intervals', None)
         self.file_min_digits = kw.pop('warpx_file_min_digits', None)
         self.buffer_size = kw.pop('warpx_buffer_size', None)
         self.lower_bound = kw.pop('warpx_lower_bound', None)
@@ -2241,10 +2288,15 @@ def initialize_inputs(self):
         self.diagnostic.diag_hi = self.upper_bound
 
         self.diagnostic.do_back_transformed_fields = 1
-        self.diagnostic.num_snapshots_lab = self.num_snapshots
         self.diagnostic.dt_snapshots_lab = self.dt_snapshots
         self.diagnostic.buffer_size = self.buffer_size
 
+        # intervals and num_snapshots_lab cannot both be set
+        if self.intervals is not None:
+            self.diagnostic.intervals = self.intervals
+        else:
+            self.diagnostic.num_snapshots_lab = self.num_snapshots
+
         self.diagnostic.do_back_transformed_particles = self.write_species
 
         # --- Use a set to ensure that fields don't get repeated.
@@ -2307,6 +2359,10 @@ class LabFrameParticleDiagnostic(picmistandard.PICMI_LabFrameParticleDiagnostic,
     warpx_file_prefix: string, optional
         Passed to <diagnostic name>.file_prefix
 
+    warpx_intervals: integer or string
+        Selects the snapshots to be made, instead of using "num_snapshots" which
+        makes all snapshots. "num_snapshots" is ignored.
+
     warpx_file_min_digits: integer, optional
         Passed to <diagnostic name>.file_min_digits
 
@@ -2320,6 +2376,7 @@ def init(self, kw):
         self.format = kw.pop('warpx_format', None)
         self.openpmd_backend = kw.pop('warpx_openpmd_backend', None)
         self.file_prefix = kw.pop('warpx_file_prefix', None)
+        self.intervals = kw.pop('warpx_intervals', None)
         self.file_min_digits = kw.pop('warpx_file_min_digits', None)
         self.buffer_size = kw.pop('warpx_buffer_size', None)
         self.write_fields = kw.pop('warpx_write_fields', None)
@@ -2334,14 +2391,59 @@ def initialize_inputs(self):
         self.diagnostic.file_min_digits = self.file_min_digits
 
         self.diagnostic.do_back_transformed_particles = 1
-        self.diagnostic.num_snapshots_lab = self.num_snapshots
         self.diagnostic.dt_snapshots_lab = self.dt_snapshots
         self.diagnostic.buffer_size = self.buffer_size
 
+        # intervals and num_snapshots_lab cannot both be set
+        if self.intervals is not None:
+            self.diagnostic.intervals = self.intervals
+        else:
+            self.diagnostic.num_snapshots_lab = self.num_snapshots
+
         self.diagnostic.do_back_transformed_fields = self.write_fields
 
         self.set_write_dir()
 
+        # --- Use a set to ensure that fields don't get repeated.
+        variables = set()
+
+        if self.data_list is not None:
+            for dataname in self.data_list:
+                if dataname == 'position':
+                    # --- The positions are alway written out anyway
+                    pass
+                elif dataname == 'momentum':
+                    variables.add('ux')
+                    variables.add('uy')
+                    variables.add('uz')
+                elif dataname == 'weighting':
+                    variables.add('w')
+                elif dataname == 'fields':
+                    variables.add('Ex')
+                    variables.add('Ey')
+                    variables.add('Ez')
+                    variables.add('Bx')
+                    variables.add('By')
+                    variables.add('Bz')
+                elif dataname in ['ux', 'uy', 'uz', 'Ex', 'Ey', 'Ez', 'Bx', 'By', 'Bz', 'Er', 'Et', 'Br', 'Bt']:
+                    variables.add(dataname)
+
+            # --- Convert the set to a sorted list so that the order
+            # --- is the same on all processors.
+            variables = list(variables)
+            variables.sort()
+
+        # species list
+        if np.iterable(self.species):
+            species_list = self.species
+        else:
+            species_list = [self.species]
+
+        for specie in species_list:
+            diag = pywarpx.Bucket.Bucket(self.name + '.' + specie.name,
+                                         variables = variables)
+            self.diagnostic._species_dict[specie.name] = diag
+
 
 class ReducedDiagnostic(picmistandard.base._ClassWithInit, WarpXDiagnosticBase):
     """
diff --git a/Python/setup.py b/Python/setup.py
index 60109d7a960..99f79cd1d18 100644
--- a/Python/setup.py
+++ b/Python/setup.py
@@ -54,7 +54,7 @@
     package_data = {}
 
 setup(name = 'pywarpx',
-      version = '23.08',
+      version = '23.09',
       packages = ['pywarpx'],
       package_dir = {'pywarpx': 'pywarpx'},
       description = """Wrapper of WarpX""",
diff --git a/Regression/Checksum/benchmarks_json/collider_diagnostics.json b/Regression/Checksum/benchmarks_json/collider_diagnostics.json
new file mode 100644
index 00000000000..06553a7ec19
--- /dev/null
+++ b/Regression/Checksum/benchmarks_json/collider_diagnostics.json
@@ -0,0 +1,36 @@
+{
+  "lev=0": {
+    "Bx": 0.0,
+    "By": 0.0,
+    "Bz": 0.0,
+    "Ex": 0.0,
+    "Ey": 0.0,
+    "Ez": 0.0,
+    "jx": 0.0,
+    "jy": 0.0,
+    "jz": 0.0,
+    "rho": 0.0,
+    "rho_beam_e": 9.613059803999999e-19,
+    "rho_beam_p": 9.613059803999999e-19
+  },
+  "beam_e": {
+    "particle_momentum_x": 1.638554718442694e-22,
+    "particle_momentum_y": 2.184739624590259e-22,
+    "particle_momentum_z": 2.8401615119673365e-21,
+    "particle_opticalDepthQSR": 9.767741201839083e+00,
+    "particle_position_x": 8.5,
+    "particle_position_y": 8.5,
+    "particle_position_z": 7.5,
+    "particle_weight": 6.0
+  },
+  "beam_p": {
+    "particle_momentum_x": 1.638554718442694e-22,
+    "particle_momentum_y": 2.184739624590259e-22,
+    "particle_momentum_z": 2.8401615119673365e-21,
+    "particle_opticalDepthQSR": 8.773870620305825e+00,
+    "particle_position_x": 8.5,
+    "particle_position_y": 8.5,
+    "particle_position_z": 7.5,
+    "particle_weight": 6.0
+  }
+}
diff --git a/Regression/WarpX-GPU-tests.ini b/Regression/WarpX-GPU-tests.ini
index 6b1ded3d359..c20dc5ed93c 100644
--- a/Regression/WarpX-GPU-tests.ini
+++ b/Regression/WarpX-GPU-tests.ini
@@ -60,7 +60,7 @@ emailBody = Check https://ccse.lbl.gov/pub/GpuRegressionTesting/WarpX/ for more
 
 [AMReX]
 dir = /home/regtester/git/amrex/
-branch = c45770c9f9b2c5fa98c675a439c502e78912bf47
+branch = 48b3ec7cb7ad99823bd85fad83c13c3cfd5ecdd4
 
 [source]
 dir = /home/regtester/git/WarpX
diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini
index 4bde2a759a0..56139d3da17 100644
--- a/Regression/WarpX-tests.ini
+++ b/Regression/WarpX-tests.ini
@@ -59,7 +59,7 @@ emailBody = Check https://ccse.lbl.gov/pub/RegressionTesting/WarpX/ for more det
 
 [AMReX]
 dir = /home/regtester/AMReX_RegTesting/amrex/
-branch = c45770c9f9b2c5fa98c675a439c502e78912bf47
+branch = 48b3ec7cb7ad99823bd85fad83c13c3cfd5ecdd4
 
 [source]
 dir = /home/regtester/AMReX_RegTesting/warpx
@@ -209,6 +209,22 @@ compileTest = 0
 doVis = 0
 analysisRoutine = Examples/Tests/btd_rz/analysis_BTD_laser_antenna.py
 
+[collider_diagnostics]
+buildDir = .
+inputFile = Examples/Tests/collider_relevant_diags/inputs_3d_multiple_particles
+runtime_params = warpx.abort_on_warning_threshold=high
+dim = 3
+addToCompileString =
+cmakeSetupOpts = -DWarpX_DIMS=3
+restartTest = 0
+useMPI = 1
+numprocs = 2
+useOMP = 1
+numthreads = 1
+compileTest = 0
+doVis = 0
+analysisRoutine = Examples/Tests/collider_relevant_diags/analysis_multiple_particles.py
+
 [collisionISO]
 buildDir = .
 inputFile = Examples/Tests/collision/inputs_3d_isotropization
diff --git a/Regression/requirements.txt b/Regression/requirements.txt
index 011b11a75b5..5bdd04ba106 100644
--- a/Regression/requirements.txt
+++ b/Regression/requirements.txt
@@ -1,5 +1,5 @@
 dill
-lasy==0.1.1
+lasy
 matplotlib
 mpi4py
 numpy
diff --git a/Source/AcceleratorLattice/LatticeElements/HardEdgedPlasmaLens.cpp b/Source/AcceleratorLattice/LatticeElements/HardEdgedPlasmaLens.cpp
index 1c609d697e6..9c8726a9c4b 100644
--- a/Source/AcceleratorLattice/LatticeElements/HardEdgedPlasmaLens.cpp
+++ b/Source/AcceleratorLattice/LatticeElements/HardEdgedPlasmaLens.cpp
@@ -5,6 +5,7 @@
  * License: BSD-3-Clause-LBNL
  */
 #include "HardEdgedPlasmaLens.H"
+#include "Utils/Parser/ParserUtils.H"
 
 #include <AMReX_ParmParse.H>
 #include <AMReX_REAL.H>
@@ -25,8 +26,8 @@ HardEdgedPlasmaLens::AddElement (amrex::ParmParse & pp_element, amrex::ParticleR
 
     amrex::ParticleReal dEdx = 0._prt;
     amrex::ParticleReal dBdx = 0._prt;
-    pp_element.query("dEdx", dEdx);
-    pp_element.query("dBdx", dBdx);
+    utils::parser::queryWithParser(pp_element, "dEdx", dEdx);
+    utils::parser::queryWithParser(pp_element, "dBdx", dBdx);
 
     h_dEdx.push_back(dEdx);
     h_dBdx.push_back(dBdx);
diff --git a/Source/AcceleratorLattice/LatticeElements/HardEdgedQuadrupole.cpp b/Source/AcceleratorLattice/LatticeElements/HardEdgedQuadrupole.cpp
index cdb6502ecef..51bcf7a2497 100644
--- a/Source/AcceleratorLattice/LatticeElements/HardEdgedQuadrupole.cpp
+++ b/Source/AcceleratorLattice/LatticeElements/HardEdgedQuadrupole.cpp
@@ -5,6 +5,7 @@
  * License: BSD-3-Clause-LBNL
  */
 #include "HardEdgedQuadrupole.H"
+#include "Utils/Parser/ParserUtils.H"
 
 #include <AMReX_ParmParse.H>
 #include <AMReX_REAL.H>
@@ -25,8 +26,8 @@ HardEdgedQuadrupole::AddElement (amrex::ParmParse & pp_element, amrex::ParticleR
 
     amrex::ParticleReal dEdx = 0._prt;
     amrex::ParticleReal dBdx = 0._prt;
-    pp_element.query("dEdx", dEdx);
-    pp_element.query("dBdx", dBdx);
+    utils::parser::queryWithParser(pp_element, "dEdx", dEdx);
+    utils::parser::queryWithParser(pp_element, "dBdx", dBdx);
 
     h_dEdx.push_back(dEdx);
     h_dBdx.push_back(dBdx);
diff --git a/Source/AcceleratorLattice/LatticeElements/LatticeElementBase.cpp b/Source/AcceleratorLattice/LatticeElements/LatticeElementBase.cpp
index 6ef318057ff..248db59aaf6 100644
--- a/Source/AcceleratorLattice/LatticeElements/LatticeElementBase.cpp
+++ b/Source/AcceleratorLattice/LatticeElements/LatticeElementBase.cpp
@@ -5,6 +5,7 @@
  * License: BSD-3-Clause-LBNL
  */
 #include "LatticeElementBase.H"
+#include "Utils/Parser/ParserUtils.H"
 
 #include <AMReX_ParmParse.H>
 #include <AMReX_REAL.H>
@@ -21,7 +22,7 @@ LatticeElementBase::AddElementBase (amrex::ParmParse & pp_element, amrex::Partic
 {
     // Read in the length of the element and save the start and end, and update z_location
     amrex::ParticleReal ds;
-    pp_element.get("ds", ds);
+    utils::parser::getWithParser(pp_element, "ds", ds);
 
     h_zs.push_back(z_location);
     z_location += ds;
diff --git a/Source/BoundaryConditions/WarpX_PML_kernels.H b/Source/BoundaryConditions/WarpX_PML_kernels.H
index 14f56c1f5fc..1396613b4f5 100644
--- a/Source/BoundaryConditions/WarpX_PML_kernels.H
+++ b/Source/BoundaryConditions/WarpX_PML_kernels.H
@@ -14,17 +14,15 @@
 #include <AMReX.H>
 #include <AMReX_FArrayBox.H>
 
-using namespace amrex;
-
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void warpx_damp_pml_ex (int i, int j, int k, Array4<Real> const& Ex,
+void warpx_damp_pml_ex (int i, int j, int k, amrex::Array4<amrex::Real> const& Ex,
                         const amrex::IntVect& Ex_stag,
-                        const Real* const sigma_fac_x,
-                        const Real* const sigma_fac_y,
-                        const Real* const sigma_fac_z,
-                        const Real* const sigma_star_fac_x,
-                        const Real* const sigma_star_fac_y,
-                        const Real* const sigma_star_fac_z,
+                        const amrex::Real* const sigma_fac_x,
+                        const amrex::Real* const sigma_fac_y,
+                        const amrex::Real* const sigma_fac_z,
+                        const amrex::Real* const sigma_star_fac_x,
+                        const amrex::Real* const sigma_star_fac_y,
+                        const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
                         const bool dive_cleaning)
 {
@@ -94,14 +92,14 @@ void warpx_damp_pml_ex (int i, int j, int k, Array4<Real> const& Ex,
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void warpx_damp_pml_ey (int i, int j, int k, Array4<Real> const& Ey,
+void warpx_damp_pml_ey (int i, int j, int k, amrex::Array4<amrex::Real> const& Ey,
                         const amrex::IntVect& Ey_stag,
-                        const Real* const sigma_fac_x,
-                        const Real* const sigma_fac_y,
-                        const Real* const sigma_fac_z,
-                        const Real* const sigma_star_fac_x,
-                        const Real* const sigma_star_fac_y,
-                        const Real* const sigma_star_fac_z,
+                        const amrex::Real* const sigma_fac_x,
+                        const amrex::Real* const sigma_fac_y,
+                        const amrex::Real* const sigma_fac_z,
+                        const amrex::Real* const sigma_star_fac_x,
+                        const amrex::Real* const sigma_star_fac_y,
+                        const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
                         const bool dive_cleaning)
 {
@@ -168,14 +166,14 @@ void warpx_damp_pml_ey (int i, int j, int k, Array4<Real> const& Ey,
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void warpx_damp_pml_ez (int i, int j, int k, Array4<Real> const& Ez,
+void warpx_damp_pml_ez (int i, int j, int k, amrex::Array4<amrex::Real> const& Ez,
                         const amrex::IntVect& Ez_stag,
-                        const Real* const sigma_fac_x,
-                        const Real* const sigma_fac_y,
-                        const Real* const sigma_fac_z,
-                        const Real* const sigma_star_fac_x,
-                        const Real* const sigma_star_fac_y,
-                        const Real* const sigma_star_fac_z,
+                        const amrex::Real* const sigma_fac_x,
+                        const amrex::Real* const sigma_fac_y,
+                        const amrex::Real* const sigma_fac_z,
+                        const amrex::Real* const sigma_star_fac_x,
+                        const amrex::Real* const sigma_star_fac_y,
+                        const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
                         const bool dive_cleaning)
 {
@@ -245,14 +243,14 @@ void warpx_damp_pml_ez (int i, int j, int k, Array4<Real> const& Ez,
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void warpx_damp_pml_bx (int i, int j, int k, Array4<Real> const& Bx,
+void warpx_damp_pml_bx (int i, int j, int k, amrex::Array4<amrex::Real> const& Bx,
                         const amrex::IntVect& Bx_stag,
-                        const Real* const sigma_fac_x,
-                        const Real* const sigma_fac_y,
-                        const Real* const sigma_fac_z,
-                        const Real* const sigma_star_fac_x,
-                        const Real* const sigma_star_fac_y,
-                        const Real* const sigma_star_fac_z,
+                        const amrex::Real* const sigma_fac_x,
+                        const amrex::Real* const sigma_fac_y,
+                        const amrex::Real* const sigma_fac_z,
+                        const amrex::Real* const sigma_star_fac_x,
+                        const amrex::Real* const sigma_star_fac_y,
+                        const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
                         const bool divb_cleaning)
 {
@@ -322,14 +320,14 @@ void warpx_damp_pml_bx (int i, int j, int k, Array4<Real> const& Bx,
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void warpx_damp_pml_by (int i, int j, int k, Array4<Real> const& By,
+void warpx_damp_pml_by (int i, int j, int k, amrex::Array4<amrex::Real> const& By,
                         const amrex::IntVect& By_stag,
-                        const Real* const sigma_fac_x,
-                        const Real* const sigma_fac_y,
-                        const Real* const sigma_fac_z,
-                        const Real* const sigma_star_fac_x,
-                        const Real* const sigma_star_fac_y,
-                        const Real* const sigma_star_fac_z,
+                        const amrex::Real* const sigma_fac_x,
+                        const amrex::Real* const sigma_fac_y,
+                        const amrex::Real* const sigma_fac_z,
+                        const amrex::Real* const sigma_star_fac_x,
+                        const amrex::Real* const sigma_star_fac_y,
+                        const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
                         const bool divb_cleaning)
 {
@@ -396,14 +394,14 @@ void warpx_damp_pml_by (int i, int j, int k, Array4<Real> const& By,
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void warpx_damp_pml_bz (int i, int j, int k, Array4<Real> const& Bz,
+void warpx_damp_pml_bz (int i, int j, int k, amrex::Array4<amrex::Real> const& Bz,
                         const amrex::IntVect& Bz_stag,
-                        const Real* const sigma_fac_x,
-                        const Real* const sigma_fac_y,
-                        const Real* const sigma_fac_z,
-                        const Real* const sigma_star_fac_x,
-                        const Real* const sigma_star_fac_y,
-                        const Real* const sigma_star_fac_z,
+                        const amrex::Real* const sigma_fac_x,
+                        const amrex::Real* const sigma_fac_y,
+                        const amrex::Real* const sigma_fac_z,
+                        const amrex::Real* const sigma_star_fac_x,
+                        const amrex::Real* const sigma_star_fac_y,
+                        const amrex::Real* const sigma_star_fac_z,
                         int xlo, int ylo, int zlo,
                         const bool divb_cleaning)
 {
@@ -473,14 +471,14 @@ void warpx_damp_pml_bz (int i, int j, int k, Array4<Real> const& Bz,
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void warpx_damp_pml_scalar (int i, int j, int k, Array4<Real> const& arr,
+void warpx_damp_pml_scalar (int i, int j, int k, amrex::Array4<amrex::Real> const& arr,
                             const amrex::IntVect& arr_stag,
-                            const Real* const sigma_fac_x,
-                            const Real* const sigma_fac_y,
-                            const Real* const sigma_fac_z,
-                            const Real* const sigma_star_fac_x,
-                            const Real* const sigma_star_fac_y,
-                            const Real* const sigma_star_fac_z,
+                            const amrex::Real* const sigma_fac_x,
+                            const amrex::Real* const sigma_fac_y,
+                            const amrex::Real* const sigma_fac_z,
+                            const amrex::Real* const sigma_star_fac_x,
+                            const amrex::Real* const sigma_star_fac_y,
+                            const amrex::Real* const sigma_star_fac_z,
                             int xlo, int ylo, int zlo)
 {
 #if defined(WARPX_DIM_1D_Z)
diff --git a/Source/Diagnostics/ReducedDiags/CMakeLists.txt b/Source/Diagnostics/ReducedDiags/CMakeLists.txt
index b2a31267087..e63764bc24f 100644
--- a/Source/Diagnostics/ReducedDiags/CMakeLists.txt
+++ b/Source/Diagnostics/ReducedDiags/CMakeLists.txt
@@ -3,6 +3,7 @@ foreach(D IN LISTS WarpX_DIMS)
     target_sources(lib_${SD}
       PRIVATE
         BeamRelevant.cpp
+        ColliderRelevant.cpp
         FieldEnergy.cpp
         FieldProbe.cpp
         FieldProbeParticleContainer.cpp
diff --git a/Source/Diagnostics/ReducedDiags/ColliderRelevant.H b/Source/Diagnostics/ReducedDiags/ColliderRelevant.H
new file mode 100644
index 00000000000..1998c56812d
--- /dev/null
+++ b/Source/Diagnostics/ReducedDiags/ColliderRelevant.H
@@ -0,0 +1,61 @@
+/* Copyright 2023 The WarpX Community
+ *
+ * This file is part of WarpX.
+ *
+ * Authors: Arianna Formenti
+ * License: BSD-3-Clause-LBNL
+ */
+
+#ifndef WARPX_DIAGNOSTICS_REDUCEDDIAGS_COLLIDERRELEVANT_H_
+#define WARPX_DIAGNOSTICS_REDUCEDDIAGS_COLLIDERRELEVANT_H_
+
+#include "ReducedDiags.H"
+
+#include <map>
+#include <string>
+#include <vector>
+
+/**
+ *  This class contains diagnostics that are relevant to colliders.
+ */
+class ColliderRelevant : public ReducedDiags
+{
+public:
+
+    /**
+     * constructor
+     * @param[in] rd_name reduced diags names
+     */
+    ColliderRelevant(std::string rd_name);
+
+    /// name of the two colliding species
+    std::vector<std::string> m_beam_name;
+
+    /**
+     * \brief This function computes collider-relevant diagnostics.
+     * @param[in] step current time step
+     *
+     * [0]step, [1]time, [2]dL/dt,
+     * for first species:
+     * [3]chi_min, [4]chi_ave, [5] chi_max,
+     * [6]x_ave, [7]x_std,
+     * [8]y_ave, [9]y_std,
+     * [10]thetax_min, [11]thetax_ave, [12]thetax_max, [13]thetax_std,
+     * [14]thetay_min, [15]thetay_ave, [16]thetay_max, [17]thetay_std
+     * same for second species follows.
+     */
+    void ComputeDiags(int step) override final;
+
+private:
+    /// auxiliary structure to store headers and indices of the reduced diagnostics
+    struct aux_header_index
+    {
+        std::string header;
+        int idx;
+    };
+
+    /// map to store header texts and indices of the reduced diagnostics
+    std::map<std::string, aux_header_index> m_headers_indices;
+};
+
+#endif  // WARPX_DIAGNOSTICS_REDUCEDDIAGS_COLLIDERRELEVANT_H_
diff --git a/Source/Diagnostics/ReducedDiags/ColliderRelevant.cpp b/Source/Diagnostics/ReducedDiags/ColliderRelevant.cpp
new file mode 100644
index 00000000000..1e2c16ac737
--- /dev/null
+++ b/Source/Diagnostics/ReducedDiags/ColliderRelevant.cpp
@@ -0,0 +1,548 @@
+/* Copyright 2023 The WarpX Community
+ *
+ * This file is part of WarpX.
+ *
+ * Authors: Arianna Formenti, Yinjian Zhao
+ * License: BSD-3-Clause-LBNL
+ */
+#include "ColliderRelevant.H"
+
+#include "Diagnostics/ReducedDiags/ReducedDiags.H"
+#if (defined WARPX_QED)
+#   include "Particles/ElementaryProcess/QEDInternals/QedChiFunctions.H"
+#endif
+#include "Particles/Gather/FieldGather.H"
+#include "Particles/Gather/GetExternalFields.H"
+#include "Particles/MultiParticleContainer.H"
+#include "Particles/Pusher/GetAndSetPosition.H"
+#include "Particles/SpeciesPhysicalProperties.H"
+#include "Particles/WarpXParticleContainer.H"
+#include "Utils/WarpXConst.H"
+#include "Utils/TextMsg.H"
+#include "WarpX.H"
+
+#include <AMReX_Algorithm.H>
+#include <AMReX_Array.H>
+#include <AMReX_Array4.H>
+#include <AMReX_Box.H>
+#include <AMReX_Dim3.H>
+#include <AMReX_Extension.H>
+#include <AMReX_FArrayBox.H>
+#include <AMReX_FabArray.H>
+#include <AMReX_GpuQualifiers.H>
+#include <AMReX_IndexType.H>
+#include <AMReX_IntVect.H>
+#include <AMReX_MultiFab.H>
+#include <AMReX_PODVector.H>
+#include <AMReX_ParIter.H>
+#include <AMReX_ParallelDescriptor.H>
+#include <AMReX_ParmParse.H>
+#include <AMReX_ParticleReduce.H>
+#include <AMReX_Particles.H>
+#include <AMReX_Print.H>
+#include <AMReX_REAL.H>
+#include <AMReX_Reduce.H>
+#include <AMReX_Tuple.H>
+#include <AMReX_Vector.H>
+
+#include <ablastr/coarsen/sample.H>
+#include <ablastr/warn_manager/WarnManager.H>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <fstream>
+#include <limits>
+#include <map>
+#include <memory>
+#include <vector>
+
+using namespace amrex;
+
+ColliderRelevant::ColliderRelevant (std::string rd_name)
+: ReducedDiags{std::move(rd_name)}
+{
+    // read colliding species names - must be 2
+    amrex::ParmParse pp_rd_name(m_rd_name);
+    pp_rd_name.getarr("species", m_beam_name);
+
+    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
+        m_beam_name.size() == 2u,
+        "Collider-relevant diagnostics must involve exactly two species");
+
+    // RZ coordinate is not supported
+#if (defined WARPX_DIM_RZ)
+    WARPX_ABORT_WITH_MESSAGE(
+        "Collider-relevant diagnostics do not work in RZ geometry.");
+#endif
+
+    ablastr::warn_manager::WMRecordWarning(
+        "Diagnostics",
+        "The collider-relevant reduced diagnostic is meant for \
+        colliding species propagating along the z direction.",
+        ablastr::warn_manager::WarnPriority::low);
+
+    ablastr::warn_manager::WMRecordWarning(
+        "Diagnostics",
+        "The collider-relevant reduced diagnostic only considers the \
+        coarsest level of refinement for the calculations involving chi.",
+        ablastr::warn_manager::WarnPriority::low);
+
+    // get WarpX class object
+    auto& warpx = WarpX::GetInstance();
+
+    // get MultiParticleContainer class object
+    const MultiParticleContainer& mypc =  warpx.GetPartContainer();
+
+    // loop over species
+    for (int i_s = 0; i_s < 2; ++i_s)
+    {
+        // get WarpXParticleContainer class object
+        const WarpXParticleContainer& myspc = mypc.GetParticleContainerFromName(m_beam_name[i_s]);
+
+        // get charge
+        amrex::ParticleReal const q = myspc.getCharge();
+
+        // photon number density is not available yet
+        WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
+            q!=amrex::Real(0.0),
+            "Collider-relevant diagnostic does not work for neutral species yet");
+    }
+
+    // function to fill a vector with diags names and create corresponding entry in header
+    std::vector<std::string> all_diag_names;
+    auto add_diag = [&,c=0](
+        const std::string& name, const std::string& header) mutable {
+        m_headers_indices[name] = aux_header_index{header, c++};
+        all_diag_names.push_back(name);
+    };
+
+#if (defined WARPX_DIM_3D)
+    add_diag("dL_dt", "dL_dt(m^-2*s^-1)");
+#elif (defined WARPX_DIM_XZ)
+    add_diag("dL_dt", "dL_dt(m^-1*s^-1)");
+#else
+    add_diag("dL_dt", "dL_dt(s^-1)");
+#endif
+
+    // loop over species
+    for (int i_s = 0; i_s < 2; ++i_s)
+    {
+        // get WarpXParticleContainer class object
+        const WarpXParticleContainer& myspc = mypc.GetParticleContainerFromName(m_beam_name[i_s]);
+
+        if (myspc.DoQED()){
+            add_diag("chimin_"+m_beam_name[i_s], "chi_min_"+m_beam_name[i_s]+"()");
+            add_diag("chiave_"+m_beam_name[i_s], "chi_ave_"+m_beam_name[i_s]+"()");
+            add_diag("chimax_"+m_beam_name[i_s], "chi_max_"+m_beam_name[i_s]+"()");
+        }
+#if (defined WARPX_DIM_3D)
+        add_diag("x_ave_"+m_beam_name[i_s], "x_ave_"+m_beam_name[i_s]+"(m)");
+        add_diag("x_std_"+m_beam_name[i_s], "x_std_"+m_beam_name[i_s]+"(m)");
+        add_diag("y_ave_"+m_beam_name[i_s], "y_ave_"+m_beam_name[i_s]+"(m)");
+        add_diag("y_std_"+m_beam_name[i_s], "y_std_"+m_beam_name[i_s]+"(m)");
+        add_diag("thetax_min_"+m_beam_name[i_s], "theta_x_min_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetax_ave_"+m_beam_name[i_s], "theta_x_ave_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetax_max_"+m_beam_name[i_s], "theta_x_max_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetax_std_"+m_beam_name[i_s], "theta_x_std_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetay_min_"+m_beam_name[i_s], "theta_y_min_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetay_ave_"+m_beam_name[i_s], "theta_y_ave_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetay_max_"+m_beam_name[i_s], "theta_y_max_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetay_std_"+m_beam_name[i_s], "theta_y_std_"+m_beam_name[i_s]+"(rad)");
+
+#elif (defined WARPX_DIM_XZ)
+        add_diag("x_ave_"+m_beam_name[i_s], "x_ave_"+m_beam_name[i_s]+"(m)");
+        add_diag("x_std_"+m_beam_name[i_s], "x_std_"+m_beam_name[i_s]+"(m)");
+        add_diag("thetax_min_"+m_beam_name[i_s], "theta_x_min_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetax_ave_"+m_beam_name[i_s], "theta_x_ave_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetax_max_"+m_beam_name[i_s], "theta_x_max_"+m_beam_name[i_s]+"(rad)");
+        add_diag("thetax_std_"+m_beam_name[i_s], "theta_x_std_"+m_beam_name[i_s]+"(rad)");
+#endif
+        m_data.resize(all_diag_names.size());
+    }
+
+    if (amrex::ParallelDescriptor::IOProcessor())
+    {
+        if ( m_write_header )
+        {
+            // open file
+            std::ofstream ofs;
+            ofs.open(m_path + m_rd_name + "." + m_extension,
+                std::ofstream::out | std::ofstream::app);
+            // write header row
+            int off = 0;
+            ofs << "#";
+            ofs << "[" << off++ << "]step()";
+            ofs << m_sep;
+            ofs << "[" << off++ << "]time(s)";
+            for (const auto& name : all_diag_names){
+                const auto& el = m_headers_indices[name];
+                ofs << m_sep << "[" << el.idx + off << "]" << el.header;
+            }
+            ofs << std::endl;
+            // close file
+            ofs.close();
+        }
+    }
+}
+
+void ColliderRelevant::ComputeDiags (int step)
+{
+#if defined(WARPX_DIM_RZ)
+    amrex::ignore_unused(step);
+#else
+
+    // Judge if the diags should be done
+    if (!m_intervals.contains(step+1)) { return; }
+
+    // get MultiParticleContainer class object
+    const MultiParticleContainer& mypc = WarpX::GetInstance().GetPartContainer();
+
+    // get a reference to WarpX instance
+    auto& warpx = WarpX::GetInstance();
+
+    // get cell volume
+    amrex::Geometry const & geom = warpx.Geom(0);
+    amrex::Real dV = AMREX_D_TERM(geom.CellSize(0), *geom.CellSize(1), *geom.CellSize(2));
+
+    const auto get_idx = [&](const std::string& name){
+        return m_headers_indices.at(name).idx;
+    };
+
+    std::array<std::unique_ptr<amrex::MultiFab>, 2> num_dens;
+
+    // loop over species
+    for (int i_s = 0; i_s < 2; ++i_s)
+    {
+        // get WarpXParticleContainer class object
+        WarpXParticleContainer& myspc = mypc.GetParticleContainerFromName(m_beam_name[i_s]);
+        // get charge
+        amrex::ParticleReal const q = myspc.getCharge();
+
+        using PType = typename WarpXParticleContainer::SuperParticleType;
+
+        num_dens[i_s] = myspc.GetChargeDensity(0);
+        num_dens[i_s]->mult(1./q);
+
+#if defined(WARPX_DIM_1D_Z)
+        // w_tot
+        amrex::Real w_tot = ReduceSum( myspc,
+            [=] AMREX_GPU_HOST_DEVICE (const PType& p)
+            {
+                return p.rdata(PIdx::w);
+            });
+        amrex::ParallelDescriptor::ReduceRealSum(w_tot);
+#elif defined(WARPX_DIM_XZ)
+        // w_tot
+        // x_ave,
+        // thetax_min, thetax_ave, thetax_max
+        amrex::ReduceOps<ReduceOpSum,
+                         ReduceOpSum,
+                         ReduceOpMin, ReduceOpSum, ReduceOpMax> reduce_ops;
+        auto r = amrex::ParticleReduce<amrex::ReduceData<Real,
+                                                         Real,
+                                                         Real, Real, Real>>(
+            myspc,
+            [=] AMREX_GPU_DEVICE(const PType& p) noexcept -> amrex::GpuTuple<Real,
+                                                                             Real,
+                                                                             Real, Real, Real>
+            {
+                const amrex::Real w  = p.rdata(PIdx::w);
+                const amrex::Real x = p.pos(0);
+                const amrex::Real ux = p.rdata(PIdx::ux);
+                const amrex::Real uz = p.rdata(PIdx::uz);
+                const amrex::Real thetax = std::atan2(ux, uz);
+                return {w, w*x, thetax, w*thetax, thetax};
+            },
+            reduce_ops);
+
+        amrex::Real w_tot  = amrex::get<0>(r);
+        amrex::Real x_ave  = amrex::get<1>(r);
+        amrex::Real thetax_min  = amrex::get<2>(r);
+        amrex::Real thetax_ave  = amrex::get<3>(r);
+        amrex::Real thetax_max  = amrex::get<4>(r);
+
+        amrex::ParallelDescriptor::ReduceRealSum({w_tot, x_ave, thetax_ave});
+        amrex::ParallelDescriptor::ReduceRealMin({thetax_min});
+        amrex::ParallelDescriptor::ReduceRealMax({thetax_max});
+
+        // x_std, thetax_std
+        amrex::Real x_std = 0.0_rt;
+        amrex::Real thetax_std = 0.0_rt;
+
+        if (w_tot > 0.0_rt)
+        {
+            x_ave = x_ave / w_tot;
+            thetax_ave = thetax_ave / w_tot;
+
+            amrex::ReduceOps<ReduceOpSum, ReduceOpSum> reduce_ops_std;
+            auto r_std = amrex::ParticleReduce<amrex::ReduceData<Real, Real>>(
+                myspc,
+                [=] AMREX_GPU_DEVICE(const PType& p) noexcept -> amrex::GpuTuple<Real, Real>
+                {
+                    const amrex::Real w  = p.rdata(PIdx::w);
+                    const amrex::Real x = p.pos(0);
+                    const amrex::Real ux = p.rdata(PIdx::ux);
+                    const amrex::Real uz = p.rdata(PIdx::uz);
+                    const amrex::Real thetax = std::atan2(ux, uz);
+                    const amrex::Real tmp1 = (x - x_ave)*(x - x_ave)*w;
+                    const amrex::Real tmp2 = (thetax - thetax_ave)*(thetax - thetax_ave)*w;
+                    return {tmp1, tmp2};
+                },
+                reduce_ops_std);
+
+            x_std = amrex::get<0>(r_std);
+            thetax_std = amrex::get<1>(r_std);
+
+            amrex::ParallelDescriptor::ReduceRealSum({x_std, thetax_std});
+
+            x_std = std::sqrt(x_std / w_tot);
+            thetax_std = std::sqrt(thetax_std / w_tot);
+        }
+
+        m_data[get_idx("x_ave_"+m_beam_name[i_s])] = x_ave;
+        m_data[get_idx("x_std_"+m_beam_name[i_s])] = x_std;
+        m_data[get_idx("thetax_min_"+m_beam_name[i_s])] = thetax_min;
+        m_data[get_idx("thetax_ave_"+m_beam_name[i_s])] = thetax_ave;
+        m_data[get_idx("thetax_max_"+m_beam_name[i_s])] = thetax_max;
+        m_data[get_idx("thetax_std_"+m_beam_name[i_s])] = thetax_std;
+#elif defined(WARPX_DIM_3D)
+        // w_tot
+        // x_ave, y_ave,
+        // thetax_min, thetax_ave, thetax_max
+        // thetay_min, thetay_ave, thetay_max
+        amrex::ReduceOps<ReduceOpSum,
+                         ReduceOpSum, ReduceOpSum,
+                         ReduceOpMin, ReduceOpSum, ReduceOpMax,
+                         ReduceOpMin, ReduceOpSum, ReduceOpMax> reduce_ops;
+        auto r = amrex::ParticleReduce<amrex::ReduceData<Real,
+                                                         Real, Real,
+                                                         Real, Real, Real,
+                                                         Real, Real, Real>>(
+            myspc,
+            [=] AMREX_GPU_DEVICE(const PType& p) noexcept -> amrex::GpuTuple<Real,
+                                                                             Real, Real,
+                                                                             Real, Real, Real,
+                                                                             Real, Real, Real>
+            {
+                const amrex::Real w  = p.rdata(PIdx::w);
+                const amrex::Real x = p.pos(0);
+                const amrex::Real y = p.pos(1);
+                const amrex::Real ux = p.rdata(PIdx::ux);
+                const amrex::Real uy = p.rdata(PIdx::uy);
+                const amrex::Real uz = p.rdata(PIdx::uz);
+                const amrex::Real thetax = std::atan2(ux, uz);
+                const amrex::Real thetay = std::atan2(uy, uz);
+                return {w, w*x, w*y,
+                        thetax, w*thetax, thetax,
+                        thetay, w*thetay, thetay};
+            },
+            reduce_ops);
+
+        amrex::Real w_tot  = amrex::get<0>(r);
+        amrex::Real x_ave  = amrex::get<1>(r);
+        amrex::Real y_ave  = amrex::get<2>(r);
+        amrex::Real thetax_min  = amrex::get<3>(r);
+        amrex::Real thetax_ave  = amrex::get<4>(r);
+        amrex::Real thetax_max  = amrex::get<5>(r);
+        amrex::Real thetay_min  = amrex::get<6>(r);
+        amrex::Real thetay_ave  = amrex::get<7>(r);
+        amrex::Real thetay_max  = amrex::get<8>(r);
+
+        amrex::ParallelDescriptor::ReduceRealSum({w_tot, x_ave, y_ave, thetax_ave, thetay_ave});
+        amrex::ParallelDescriptor::ReduceRealMin({thetax_min, thetay_min});
+        amrex::ParallelDescriptor::ReduceRealMax({thetax_max, thetay_max});
+
+        // x_std, y_std, thetax_std, thetay_std
+        amrex::Real x_std = 0.0_rt;
+        amrex::Real y_std = 0.0_rt;
+        amrex::Real thetax_std = 0.0_rt;
+        amrex::Real thetay_std = 0.0_rt;
+
+        if (w_tot > 0.0_rt)
+        {
+            x_ave = x_ave / w_tot;
+            y_ave = y_ave / w_tot;
+            thetax_ave = thetax_ave / w_tot;
+            thetay_ave = thetay_ave / w_tot;
+
+            amrex::ReduceOps<ReduceOpSum, ReduceOpSum, ReduceOpSum, ReduceOpSum> reduce_ops_std;
+            auto r_std = amrex::ParticleReduce<amrex::ReduceData<Real, Real, Real, Real>>(
+                myspc,
+                [=] AMREX_GPU_DEVICE(const PType& p) noexcept -> amrex::GpuTuple<Real, Real, Real, Real>
+                {
+                    const amrex::Real w  = p.rdata(PIdx::w);
+                    const amrex::Real x = p.pos(0);
+                    const amrex::Real ux = p.rdata(PIdx::ux);
+                    const amrex::Real y = p.pos(1);
+                    const amrex::Real uy = p.rdata(PIdx::uy);
+                    const amrex::Real uz = p.rdata(PIdx::uz);
+                    const amrex::Real thetax = std::atan2(ux, uz);
+                    const amrex::Real thetay = std::atan2(uy, uz);
+                    const amrex::Real tmp1 = (x - x_ave)*(x - x_ave)*w;
+                    const amrex::Real tmp2 = (y - y_ave)*(y - y_ave)*w;
+                    const amrex::Real tmp3 = (thetax - thetax_ave)*(thetax - thetax_ave)*w;
+                    const amrex::Real tmp4 = (thetay - thetay_ave)*(thetay - thetay_ave)*w;
+                    return {tmp1, tmp2, tmp3, tmp4};
+                },
+                reduce_ops_std);
+
+            x_std = amrex::get<0>(r_std);
+            y_std = amrex::get<1>(r_std);
+            thetax_std = amrex::get<2>(r_std);
+            thetay_std = amrex::get<3>(r_std);
+
+            amrex::ParallelDescriptor::ReduceRealSum({x_std, y_std, thetax_std, thetay_std});
+
+            x_std = std::sqrt(x_std / w_tot);
+            y_std = std::sqrt(y_std / w_tot);
+            thetax_std = std::sqrt(thetax_std / w_tot);
+            thetay_std = std::sqrt(thetay_std / w_tot);
+        }
+
+        m_data[get_idx("x_ave_"+m_beam_name[i_s])] = x_ave;
+        m_data[get_idx("x_std_"+m_beam_name[i_s])] = x_std;
+        m_data[get_idx("y_ave_"+m_beam_name[i_s])] = y_ave;
+        m_data[get_idx("y_std_"+m_beam_name[i_s])] = y_std;
+        m_data[get_idx("thetax_min_"+m_beam_name[i_s])] = thetax_min;
+        m_data[get_idx("thetax_ave_"+m_beam_name[i_s])] = thetax_ave;
+        m_data[get_idx("thetax_max_"+m_beam_name[i_s])] = thetax_max;
+        m_data[get_idx("thetax_std_"+m_beam_name[i_s])] = thetax_std;
+        m_data[get_idx("thetay_min_"+m_beam_name[i_s])] = thetay_min;
+        m_data[get_idx("thetay_ave_"+m_beam_name[i_s])] = thetay_ave;
+        m_data[get_idx("thetay_max_"+m_beam_name[i_s])] = thetay_max;
+        m_data[get_idx("thetay_std_"+m_beam_name[i_s])] = thetay_std;
+#endif
+
+#if (defined WARPX_QED)
+        // get mass
+        amrex::ParticleReal m = myspc.getMass();
+        const bool is_photon = myspc.AmIA<PhysicalSpecies::photon>();
+        if (is_photon) {
+            m = PhysConst::m_e;
+        }
+
+        // compute chimin, chiave and chimax
+        amrex::Real chimin_f = 0.0_rt;
+        amrex::Real chimax_f = 0.0_rt;
+        amrex::Real chiave_f = 0.0_rt;
+
+        if (myspc.DoQED())
+        {
+            // define variables in preparation for field gatheeduce_data.value()ring
+            const int n_rz_azimuthal_modes = WarpX::n_rz_azimuthal_modes;
+            const int nox = WarpX::nox;
+            const bool galerkin_interpolation = WarpX::galerkin_interpolation;
+            const amrex::IntVect ngEB = warpx.getngEB();
+
+            // TODO loop over refinement levels: for (int lev = 0; lev <= level_number; ++lev)
+            const int lev = 0;
+
+            // define variables in preparation for field gathering
+            const std::array<amrex::Real,3>& dx = WarpX::CellSize(std::max(lev, 0));
+            const amrex::GpuArray<amrex::Real, 3> dx_arr = {dx[0], dx[1], dx[2]};
+            const amrex::MultiFab & Ex = warpx.getEfield(lev,0);
+            const amrex::MultiFab & Ey = warpx.getEfield(lev,1);
+            const amrex::MultiFab & Ez = warpx.getEfield(lev,2);
+            const amrex::MultiFab & Bx = warpx.getBfield(lev,0);
+            const amrex::MultiFab & By = warpx.getBfield(lev,1);
+            const amrex::MultiFab & Bz = warpx.getBfield(lev,2);
+
+            // declare reduce_op
+            ReduceOps<ReduceOpMin, ReduceOpMax, ReduceOpSum> reduce_op;
+            ReduceData<Real, Real, Real> reduce_data(reduce_op);
+            using ReduceTuple = typename decltype(reduce_data)::Type;
+
+            // Loop over boxes
+            for (WarpXParIter pti(myspc, lev); pti.isValid(); ++pti)
+            {
+                const auto GetPosition = GetParticlePosition(pti);
+                // get particle arrays
+                amrex::ParticleReal* const AMREX_RESTRICT ux = pti.GetAttribs()[PIdx::ux].dataPtr();
+                amrex::ParticleReal* const AMREX_RESTRICT uy = pti.GetAttribs()[PIdx::uy].dataPtr();
+                amrex::ParticleReal* const AMREX_RESTRICT uz = pti.GetAttribs()[PIdx::uz].dataPtr();
+                amrex::ParticleReal* const AMREX_RESTRICT w = pti.GetAttribs()[PIdx::w].dataPtr();
+                // declare external fields
+                const int offset = 0;
+                const auto getExternalEB = GetExternalEBField(pti, offset);
+                // define variables in preparation for field gathering
+                amrex::Box box = pti.tilebox();
+                box.grow(ngEB);
+                const amrex::Dim3 lo = amrex::lbound(box);
+                const std::array<amrex::Real, 3>& xyzmin = WarpX::LowerCorner(box, lev, 0._rt);
+                const amrex::GpuArray<amrex::Real, 3> xyzmin_arr = {xyzmin[0], xyzmin[1], xyzmin[2]};
+                const amrex::Array4<const amrex::Real> & ex_arr = Ex[pti].array();
+                const amrex::Array4<const amrex::Real> & ey_arr = Ey[pti].array();
+                const amrex::Array4<const amrex::Real> & ez_arr = Ez[pti].array();
+                const amrex::Array4<const amrex::Real> & bx_arr = Bx[pti].array();
+                const amrex::Array4<const amrex::Real> & by_arr = By[pti].array();
+                const amrex::Array4<const amrex::Real> & bz_arr = Bz[pti].array();
+                const amrex::IndexType ex_type = Ex[pti].box().ixType();
+                const amrex::IndexType ey_type = Ey[pti].box().ixType();
+                const amrex::IndexType ez_type = Ez[pti].box().ixType();
+                const amrex::IndexType bx_type = Bx[pti].box().ixType();
+                const amrex::IndexType by_type = By[pti].box().ixType();
+                const amrex::IndexType bz_type = Bz[pti].box().ixType();
+
+                // evaluate reduce_op
+                reduce_op.eval(pti.numParticles(), reduce_data,
+                [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
+                {
+                    // get external fields
+                    amrex::ParticleReal xp, yp, zp;
+                    GetPosition(i, xp, yp, zp);
+                    amrex::ParticleReal ex = 0._rt, ey = 0._rt, ez = 0._rt;
+                    amrex::ParticleReal bx = 0._rt, by = 0._rt, bz = 0._rt;
+                    getExternalEB(i, ex, ey, ez, bx, by, bz);
+
+                    // gather E and B
+                    doGatherShapeN(xp, yp, zp,
+                        ex, ey, ez, bx, by, bz,
+                        ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+                        ex_type, ey_type, ez_type,
+                        bx_type, by_type, bz_type,
+                        dx_arr, xyzmin_arr, lo,
+                        n_rz_azimuthal_modes, nox, galerkin_interpolation);
+                    // compute chi
+                    amrex::Real chi = 0.0_rt;
+                    if (is_photon) {
+                        chi = QedUtils::chi_photon(ux[i]*m, uy[i]*m, uz[i]*m,
+                                            ex, ey, ez, bx, by, bz);
+                    } else {
+                        chi = QedUtils::chi_ele_pos(ux[i]*m, uy[i]*m, uz[i]*m,
+                                            ex, ey, ez, bx, by, bz);
+                    }
+                    return {chi, chi, chi*w[i]};
+                });
+            }
+            auto val = reduce_data.value();
+            chimin_f = get<0>(val);
+            chimax_f = get<1>(val);
+            chiave_f = get<2>(val);
+            amrex::ParallelDescriptor::ReduceRealMin(chimin_f);
+            amrex::ParallelDescriptor::ReduceRealMax(chimax_f);
+            amrex::ParallelDescriptor::ReduceRealSum(chiave_f);
+
+            m_data[get_idx("chimin_"+m_beam_name[i_s])] = chimin_f;
+            m_data[get_idx("chiave_"+m_beam_name[i_s])] = chiave_f/w_tot;
+            m_data[get_idx("chimax_"+m_beam_name[i_s])] = chimax_f;
+        }
+#endif
+    } // end loop over species
+
+    // make density MultiFabs from nodal to cell centered
+    amrex::BoxArray ba = warpx.boxArray(0);
+    amrex::DistributionMapping dmap = warpx.DistributionMap(0);
+    constexpr int ncomp = 1;
+    constexpr int ngrow = 0;
+    amrex::MultiFab mf_dst1(ba.convert(amrex::IntVect::TheCellVector()), dmap, ncomp, ngrow);
+    amrex::MultiFab mf_dst2(ba.convert(amrex::IntVect::TheCellVector()), dmap, ncomp, ngrow);
+    ablastr::coarsen::sample::Coarsen(mf_dst1, *num_dens[0], 0, 0, ncomp, ngrow);
+    ablastr::coarsen::sample::Coarsen(mf_dst2, *num_dens[1], 0, 0, ncomp, ngrow);
+
+    // compute luminosity
+    amrex::Real const n1_dot_n2 = amrex::MultiFab::Dot(mf_dst1, 0, mf_dst2, 0, 1, 0);
+    amrex::Real const lumi = 2. * PhysConst::c * n1_dot_n2 * dV;
+    m_data[get_idx("dL_dt")] = lumi;
+#endif // not RZ
+}
diff --git a/Source/Diagnostics/ReducedDiags/FieldProbe.H b/Source/Diagnostics/ReducedDiags/FieldProbe.H
index e6ec60f040e..2b232264329 100644
--- a/Source/Diagnostics/ReducedDiags/FieldProbe.H
+++ b/Source/Diagnostics/ReducedDiags/FieldProbe.H
@@ -19,8 +19,6 @@
 #include <string>
 #include <vector>
 
-using namespace amrex::literals;
-
 /**
  * This enumeration is used for assigning structural geometry levels (point vs line vs plane)
  */
@@ -69,16 +67,16 @@ public:
     static const int noutputs = 11;
 
 private:
-    amrex::Real x_probe = 0._rt;
-    amrex::Real y_probe = 0._rt;
-    amrex::Real x1_probe = 0._rt;
-    amrex::Real y1_probe = 0._rt;
-    amrex::Real target_normal_x = 0._rt;
-    amrex::Real target_normal_y = 1._rt;
-    amrex::Real target_normal_z = 0._rt;
-    amrex::Real target_up_x = 0._rt;
-    amrex::Real target_up_y = 0._rt;
-    amrex::Real target_up_z = 1._rt;
+    amrex::Real x_probe = 0.;
+    amrex::Real y_probe = 0.;
+    amrex::Real x1_probe = 0.;
+    amrex::Real y1_probe = 0.;
+    amrex::Real target_normal_x = 0.;
+    amrex::Real target_normal_y = 1.;
+    amrex::Real target_normal_z = 0.;
+    amrex::Real target_up_x = 0.;
+    amrex::Real target_up_y = 0.;
+    amrex::Real target_up_z = 1.;
     amrex::Real z_probe, z1_probe;
     amrex::Real detector_radius;
 
diff --git a/Source/Diagnostics/ReducedDiags/Make.package b/Source/Diagnostics/ReducedDiags/Make.package
index f265e302d3a..a1f08a24da3 100644
--- a/Source/Diagnostics/ReducedDiags/Make.package
+++ b/Source/Diagnostics/ReducedDiags/Make.package
@@ -7,6 +7,7 @@ CEXE_sources += FieldProbe.cpp
 CEXE_sources += FieldProbeParticleContainer.cpp
 CEXE_sources += FieldMomentum.cpp
 CEXE_sources += BeamRelevant.cpp
+CEXE_sources += ColliderRelevant.cpp
 CEXE_sources += LoadBalanceCosts.cpp
 CEXE_sources += LoadBalanceEfficiency.cpp
 CEXE_sources += ParticleHistogram.cpp
diff --git a/Source/Diagnostics/ReducedDiags/MultiReducedDiags.cpp b/Source/Diagnostics/ReducedDiags/MultiReducedDiags.cpp
index d895c4caa23..73e63aeb4d3 100644
--- a/Source/Diagnostics/ReducedDiags/MultiReducedDiags.cpp
+++ b/Source/Diagnostics/ReducedDiags/MultiReducedDiags.cpp
@@ -8,6 +8,7 @@
 
 #include "BeamRelevant.H"
 #include "ChargeOnEB.H"
+#include "ColliderRelevant.H"
 #include "FieldEnergy.H"
 #include "FieldMaximum.H"
 #include "FieldProbe.H"
@@ -59,6 +60,7 @@ MultiReducedDiags::MultiReducedDiags ()
             {"FieldReduction",        [](CS s){return std::make_unique<FieldReduction>(s);}},
             {"RhoMaximum",            [](CS s){return std::make_unique<RhoMaximum>(s);}},
             {"BeamRelevant",          [](CS s){return std::make_unique<BeamRelevant>(s);}},
+            {"ColliderRelevant",      [](CS s){return std::make_unique<ColliderRelevant>(s);}},
             {"LoadBalanceCosts",      [](CS s){return std::make_unique<LoadBalanceCosts>(s);}},
             {"LoadBalanceEfficiency", [](CS s){return std::make_unique<LoadBalanceEfficiency>(s);}},
             {"ParticleHistogram",     [](CS s){return std::make_unique<ParticleHistogram>(s);}},
diff --git a/Source/Diagnostics/ReducedDiags/ReducedDiags.cpp b/Source/Diagnostics/ReducedDiags/ReducedDiags.cpp
index 77377c7d5ed..758b48b7262 100644
--- a/Source/Diagnostics/ReducedDiags/ReducedDiags.cpp
+++ b/Source/Diagnostics/ReducedDiags/ReducedDiags.cpp
@@ -24,9 +24,8 @@ using namespace amrex;
 
 // constructor
 ReducedDiags::ReducedDiags (std::string rd_name)
+    : m_rd_name(std::move(rd_name))
 {
-    m_rd_name = rd_name;
-
     BackwardCompatibility();
 
     const ParmParse pp_rd_name(m_rd_name);
diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp
index 73ccc164676..067cd9a11b6 100644
--- a/Source/Evolve/WarpXEvolve.cpp
+++ b/Source/Evolve/WarpXEvolve.cpp
@@ -216,7 +216,7 @@ WarpX::Evolve (int numsteps)
         // Resample particles
         // +1 is necessary here because value of step seen by user (first step is 1) is different than
         // value of step in code (first step is 0)
-        mypc->doResampling(istep[0]+1);
+        mypc->doResampling(istep[0]+1, verbose);
 
         if (num_mirrors>0){
             applyMirrors(cur_time);
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmGalileanRZ.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmGalileanRZ.cpp
index 79285948e5f..3188712d61a 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmGalileanRZ.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmGalileanRZ.cpp
@@ -15,7 +15,6 @@
 
 using namespace amrex::literals;
 
-
 /* \brief Initialize coefficients for the update equation */
 PsatdAlgorithmGalileanRZ::PsatdAlgorithmGalileanRZ (SpectralKSpaceRZ const & spectral_kspace,
                                                     amrex::DistributionMapping const & dm,
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmPmlRZ.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmPmlRZ.cpp
index b4285bfde5a..45a21f2ffec 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmPmlRZ.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmPmlRZ.cpp
@@ -13,6 +13,7 @@
 
 #include <cmath>
 
+using namespace amrex::literals;
 
 /* \brief Initialize coefficients for the update equation */
 PsatdAlgorithmPmlRZ::PsatdAlgorithmPmlRZ (SpectralKSpaceRZ const & spectral_kspace,
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmRZ.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmRZ.cpp
index 9fa05e134de..d5a95bc90bf 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmRZ.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithmRZ.cpp
@@ -13,6 +13,7 @@
 
 #include <cmath>
 
+using namespace amrex::literals;
 
 /* \brief Initialize coefficients for the update equation */
 PsatdAlgorithmRZ::PsatdAlgorithmRZ (SpectralKSpaceRZ const & spectral_kspace,
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithmRZ.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithmRZ.cpp
index 7e9794d3171..acbe3815cc1 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithmRZ.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithmRZ.cpp
@@ -9,6 +9,7 @@
 #include <cmath>
 
 using namespace amrex;
+using namespace amrex::literals;
 
 /**
  * \brief Compute spectral divergence of E
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp
index debca3afaf5..db189f4287a 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp
@@ -14,6 +14,7 @@
 
 #include <AMReX_Config.H>
 
+using namespace amrex::literals;
 
 /* \brief Initialize fields in spectral space, and FFT plans
  *
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
index 64ffc80ac37..2285ab05679 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
@@ -32,14 +32,11 @@
 !                                          (www.jpmoreau.fr)
 ! ------------------------------------------------------------------------ */
 
-#include "Utils/WarpXConst.H"
+#ifndef WARPX_BESSEL_ROOTS_H_
+#define WARPX_BESSEL_ROOTS_H_
 
 #include <AMReX_REAL.H>
-
-#include <cmath>
-
-
-void SecantRootFinder(int n, int nitmx, amrex::Real tol, amrex::Real *zeroj, int *ier);
+#include <AMReX_Vector.H>
 
 /*----------------------------------------------------------------------
  *    calculate the first nk zeroes of bessel function j(n, x)
@@ -56,112 +53,6 @@ void SecantRootFinder(int n, int nitmx, amrex::Real tol, amrex::Real *zeroj, int
  *    abramowitz m. & stegun irene a.
  *    handbook of mathematical functions
  */
-void GetBesselRoots(int n, int nk, amrex::Vector<amrex::Real>& roots, amrex::Vector<int> &ier)  {
-    using namespace amrex::literals;
-
-    amrex::Real zeroj;
-    int ierror, ik, k;
-
-    const amrex::Real tol = 1e-14_rt;
-    const amrex::Real nitmx = 10;
-
-    const amrex::Real c1 = 1.8557571_rt;
-    const amrex::Real c2 = 1.033150_rt;
-    const amrex::Real c3 = 0.00397_rt;
-    const amrex::Real c4 = 0.0908_rt;
-    const amrex::Real c5 = 0.043_rt;
-
-    const amrex::Real t0 = 4.0_rt*n*n;
-    const amrex::Real t1 = t0 - 1.0_rt;
-    const amrex::Real t3 = 4.0_rt*t1*(7.0_rt*t0 - 31.0_rt);
-    const amrex::Real t5 = 32.0_rt*t1*((83.0_rt*t0 - 982.0_rt)*t0 + 3779.0_rt);
-    const amrex::Real t7 = 64.0_rt*t1*(((6949.0_rt*t0 - 153855.0_rt)*t0 + 1585743.0_rt)*t0 - 6277237.0_rt);
-
-    roots.resize(nk);
-    ier.resize(nk);
-
-    // first zero
-    if (n == 0) {
-        zeroj = c1 + c2 - c3 - c4 + c5;
-        SecantRootFinder(n, nitmx, tol, &zeroj, &ierror);
-        ier[0] = ierror;
-        roots[0] = zeroj;
-        ik = 1;
-    }
-    else {
-        // Include the trivial root
-        ier[0] = 0;
-        roots[0] = 0.;
-        const amrex::Real f1 = std::pow(n, (1.0_rt/3.0_rt));
-        const amrex::Real f2 = f1*f1*n;
-        const amrex::Real f3 = f1*n*n;
-        zeroj = n + c1*f1 + (c2/f1) - (c3/n) - (c4/f2) + (c5/f3);
-        SecantRootFinder(n, nitmx, tol, &zeroj, &ierror);
-        ier[1] = ierror;
-        roots[1] = zeroj;
-        ik = 2;
-    }
-
-    // other zeroes
-    // k counts the nontrivial roots
-    // ik counts all roots
-    k = 2;
-    while (ik < nk) {
-
-        // mac mahon's series for k >> n
-        const amrex::Real b0 = (k + 0.5_rt*n - 0.25_rt)*MathConst::pi;
-        const amrex::Real b1 = 8.0_rt*b0;
-        const amrex::Real b2 = b1*b1;
-        const amrex::Real b3 = 3.0_rt*b1*b2;
-        const amrex::Real b5 = 5.0_rt*b3*b2;
-        const amrex::Real b7 = 7.0_rt*b5*b2;
-
-        zeroj = b0 - (t1/b1) - (t3/b3) - (t5/b5) - (t7/b7);
-
-        const amrex::Real errj = std::abs(jn(n, zeroj));
-
-        // improve solution using procedure SecantRootFinder
-        if (errj > tol) SecantRootFinder(n, nitmx, tol, &zeroj, &ierror);
-
-        roots[ik] = zeroj;
-        ier[ik] = ierror;
-
-        k++;
-        ik++;
-    }
-}
-
-void SecantRootFinder(int n, int nitmx, amrex::Real tol, amrex::Real *zeroj, int *ier) {
-    using namespace amrex::literals;
-
-    amrex::Real p0, p1, q0, q1, dp, p;
-    amrex::Real c[2];
-
-    c[0] = 0.95_rt;
-    c[1] = 0.999_rt;
-    *ier = 0;
-
-    p = *zeroj;
-    for (int ntry=0 ; ntry <= 1 ; ntry++) {
-        p0 = c[ntry]*(*zeroj);
+void GetBesselRoots(int n, int nk, amrex::Vector<amrex::Real>& roots, amrex::Vector<int> &ier);
 
-        p1 = *zeroj;
-        q0 = jn(n, p0);
-        q1 = jn(n, p1);
-        for (int it=1; it <= nitmx; it++) {
-            if (q1 == q0) break;
-            p = p1 - q1*(p1 - p0)/(q1 - q0);
-            dp = p - p1;
-            if (it > 1 && std::abs(dp) < tol) {
-                *zeroj = p;
-                return;
-            }
-            p0 = p1;
-            q0 = q1;
-            p1 = p;
-            q1 = jn(n, p1);
-        }
-    }
-    *ier = 3;
-    *zeroj = p;
-}
+#endif // WARPX_BESSEL_ROOTS_H_
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.cpp b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.cpp
new file mode 100644
index 00000000000..210a4baffc7
--- /dev/null
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.cpp
@@ -0,0 +1,153 @@
+/* Copyright 2019 David Grote
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+/* -------------------------------------------------------------------------
+! program to calculate the first zeroes (root abscissas) of the first
+! kind bessel function of integer order n using the subroutine rootj.
+! --------------------------------------------------------------------------
+! sample run:
+!
+! (calculate the first 10 zeroes of 1st kind bessel function of order 2).
+!
+! zeroes of bessel function of order: 2
+!
+! number of calculated zeroes: 10
+!
+! table of root abcissas (5 items per line)
+!    5.135622    8.417244   11.619841   14.795952   17.959819
+    21.116997   24.270112   27.420574   30.569204   33.716520
+!
+! table of error codes (5 items per line)
+!   0   0   0   0   0
+!   0   0   0   0   0
+!
+! --------------------------------------------------------------------------
+! reference: from numath library by tuan dang trong in fortran 77
+!            [bibli 18].
+!
+!                               c++ release 1.0 by j-p moreau, paris
+!                                          (www.jpmoreau.fr)
+! ------------------------------------------------------------------------ */
+
+#include "BesselRoots.H"
+
+#include "Utils/WarpXConst.H"
+
+#include <cmath>
+
+namespace{
+
+    void SecantRootFinder(int n, int nitmx, amrex::Real tol, amrex::Real *zeroj, int *ier) {
+        using namespace amrex::literals;
+
+        amrex::Real p0, p1, q0, q1, dp, p;
+        amrex::Real c[2];
+
+        c[0] = 0.95_rt;
+        c[1] = 0.999_rt;
+        *ier = 0;
+
+        p = *zeroj;
+        for (int ntry=0 ; ntry <= 1 ; ntry++) {
+            p0 = c[ntry]*(*zeroj);
+
+            p1 = *zeroj;
+            q0 = jn(n, p0);
+            q1 = jn(n, p1);
+            for (int it=1; it <= nitmx; it++) {
+                if (q1 == q0) break;
+                p = p1 - q1*(p1 - p0)/(q1 - q0);
+                dp = p - p1;
+                if (it > 1 && std::abs(dp) < tol) {
+                    *zeroj = p;
+                    return;
+                }
+                p0 = p1;
+                q0 = q1;
+                p1 = p;
+                q1 = jn(n, p1);
+            }
+        }
+        *ier = 3;
+        *zeroj = p;
+    }
+
+}
+
+void GetBesselRoots(int n, int nk, amrex::Vector<amrex::Real>& roots, amrex::Vector<int> &ier)  {
+    using namespace amrex::literals;
+
+    amrex::Real zeroj;
+    int ierror, ik, k;
+
+    const amrex::Real tol = 1e-14_rt;
+    const amrex::Real nitmx = 10;
+
+    const amrex::Real c1 = 1.8557571_rt;
+    const amrex::Real c2 = 1.033150_rt;
+    const amrex::Real c3 = 0.00397_rt;
+    const amrex::Real c4 = 0.0908_rt;
+    const amrex::Real c5 = 0.043_rt;
+
+    const amrex::Real t0 = 4.0_rt*n*n;
+    const amrex::Real t1 = t0 - 1.0_rt;
+    const amrex::Real t3 = 4.0_rt*t1*(7.0_rt*t0 - 31.0_rt);
+    const amrex::Real t5 = 32.0_rt*t1*((83.0_rt*t0 - 982.0_rt)*t0 + 3779.0_rt);
+    const amrex::Real t7 = 64.0_rt*t1*(((6949.0_rt*t0 - 153855.0_rt)*t0 + 1585743.0_rt)*t0 - 6277237.0_rt);
+
+    roots.resize(nk);
+    ier.resize(nk);
+
+    // first zero
+    if (n == 0) {
+        zeroj = c1 + c2 - c3 - c4 + c5;
+        ::SecantRootFinder(n, nitmx, tol, &zeroj, &ierror);
+        ier[0] = ierror;
+        roots[0] = zeroj;
+        ik = 1;
+    }
+    else {
+        // Include the trivial root
+        ier[0] = 0;
+        roots[0] = 0.;
+        const amrex::Real f1 = std::pow(n, (1.0_rt/3.0_rt));
+        const amrex::Real f2 = f1*f1*n;
+        const amrex::Real f3 = f1*n*n;
+        zeroj = n + c1*f1 + (c2/f1) - (c3/n) - (c4/f2) + (c5/f3);
+        ::SecantRootFinder(n, nitmx, tol, &zeroj, &ierror);
+        ier[1] = ierror;
+        roots[1] = zeroj;
+        ik = 2;
+    }
+
+    // other zeroes
+    // k counts the nontrivial roots
+    // ik counts all roots
+    k = 2;
+    while (ik < nk) {
+
+        // mac mahon's series for k >> n
+        const amrex::Real b0 = (k + 0.5_rt*n - 0.25_rt)*MathConst::pi;
+        const amrex::Real b1 = 8.0_rt*b0;
+        const amrex::Real b2 = b1*b1;
+        const amrex::Real b3 = 3.0_rt*b1*b2;
+        const amrex::Real b5 = 5.0_rt*b3*b2;
+        const amrex::Real b7 = 7.0_rt*b5*b2;
+
+        zeroj = b0 - (t1/b1) - (t3/b3) - (t5/b5) - (t7/b7);
+
+        const amrex::Real errj = std::abs(jn(n, zeroj));
+
+        // improve solution using procedure SecantRootFinder
+        if (errj > tol) ::SecantRootFinder(n, nitmx, tol, &zeroj, &ierror);
+
+        roots[ik] = zeroj;
+        ier[ik] = ierror;
+
+        k++;
+        ik++;
+    }
+}
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/CMakeLists.txt b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/CMakeLists.txt
index a3d2b60bcc4..70c9740367f 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/CMakeLists.txt
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/CMakeLists.txt
@@ -1,5 +1,6 @@
 target_sources(lib_rz
   PRIVATE
+    BesselRoots.cpp
     SpectralHankelTransformer.cpp
     HankelTransform.cpp
 )
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
index 31a68ab1e98..da62382dffa 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
@@ -16,6 +16,7 @@
 #include <blas.hh>
 #include <lapack.hh>
 
+using namespace amrex::literals;
 
 HankelTransform::HankelTransform (int const hankel_order,
                                   int const azimuthal_mode,
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/Make.package b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/Make.package
index 8bb1d7ef7b4..37ca9a93186 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/Make.package
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/Make.package
@@ -1,3 +1,4 @@
+CEXE_sources += BesselRoots.cpp
 CEXE_sources += SpectralHankelTransformer.cpp
 CEXE_sources += HankelTransform.cpp
 
diff --git a/Source/Initialization/InjectorMomentum.H b/Source/Initialization/InjectorMomentum.H
index 87b81381ce9..b0afafa730b 100644
--- a/Source/Initialization/InjectorMomentum.H
+++ b/Source/Initialization/InjectorMomentum.H
@@ -94,6 +94,7 @@ namespace {
       * @param u_th Momentum spread
       * @param engine Object used to generate random numbers
       */
+    AMREX_FORCE_INLINE
     AMREX_GPU_HOST_DEVICE
     amrex::Real
     generateGaussianFluxDist( amrex::Real u_m, amrex::Real u_th, amrex::RandomEngine const& engine ) {
diff --git a/Source/Initialization/PlasmaInjector.cpp b/Source/Initialization/PlasmaInjector.cpp
index 3de475d7011..b880d4a46e7 100644
--- a/Source/Initialization/PlasmaInjector.cpp
+++ b/Source/Initialization/PlasmaInjector.cpp
@@ -43,6 +43,8 @@
 #include <utility>
 #include <vector>
 
+using namespace amrex::literals;
+
 namespace {
     void StringParseAbortMessage(const std::string& var,
                                  const std::string& name) {
@@ -58,8 +60,6 @@ PlasmaInjector::PlasmaInjector (int ispecies, const std::string& name,
     const amrex::Geometry& geom):
     species_id{ispecies}, species_name{name}
 {
-    using namespace amrex::literals;
-
     const amrex::ParmParse pp_species_name(species_name);
 
 #ifdef AMREX_USE_GPU
diff --git a/Source/Laser/LaserProfiles.H b/Source/Laser/LaserProfiles.H
index 641bb072bc4..487e2d9e527 100644
--- a/Source/Laser/LaserProfiles.H
+++ b/Source/Laser/LaserProfiles.H
@@ -328,14 +328,14 @@ private:
      */
     struct{
 
-        /** Name of the lasy file geometry ("cartesian" for 3D cartesian or "thetaMode" for RZ) */
-        std::string fileGeom;
         /** Name of the binary file containing the data */
         std::string binary_file_name;
         /** Name of the lasy file containing the data */
         std::string lasy_file_name;
         /** true if the file is in the lasy format, false if it is in the binary format */
         bool file_in_lasy_format;
+        /** lasy file geometry ("cartesian" for 3D cartesian or "thetaMode" for RZ) */
+        int file_in_cartesian_geom;
         /** Dimensions of E_binary_data or E_lasy_data. nt, nx must be >=2.
          * If DIM=3, ny must be >=2 as well.
          * If DIM=2, ny must be 1 */
diff --git a/Source/Laser/LaserProfilesImpl/LaserProfileFromFile.cpp b/Source/Laser/LaserProfilesImpl/LaserProfileFromFile.cpp
index b67be64b47e..fbac9d9707f 100644
--- a/Source/Laser/LaserProfilesImpl/LaserProfileFromFile.cpp
+++ b/Source/Laser/LaserProfilesImpl/LaserProfileFromFile.cpp
@@ -150,7 +150,7 @@ WarpXLaserProfiles::FromFileLaserProfile::fill_amplitude (
         "Something bad has happened with the simulation time");
     }
     if (m_params.file_in_lasy_format){
-        if (m_params.fileGeom=="cartesian"){
+        if (m_params.file_in_cartesian_geom==1){
             internal_fill_amplitude_uniform_cartesian(idx_t_left, np, Xp, Yp, t, amplitude);
         } else {
             internal_fill_amplitude_uniform_cylindrical(idx_t_left, np, Xp, Yp, t, amplitude);
@@ -172,16 +172,17 @@ WarpXLaserProfiles::FromFileLaserProfile::parse_lasy_file(std::string lasy_file_
         WARPX_ALWAYS_ASSERT_WITH_MESSAGE(E.getAttribute("dataOrder").get<std::string>() == "C",
                                          "Reading from files with non-C dataOrder is not implemented");
 
-        m_params.fileGeom = E.getAttribute("geometry").get<std::string>();
+        std::string fileGeom = E.getAttribute("geometry").get<std::string>();
         auto E_laser = E[io::RecordComponent::SCALAR];
         auto extent = E_laser.getExtent();
         // Extract grid offset and grid spacing
         std::vector<double> offset = E.gridGlobalOffset();
         std::vector<double> position = E_laser.position<double>();
         std::vector<double> spacing = E.gridSpacing<double>();
-        if (m_params.fileGeom=="thetaMode") {
+        if (fileGeom=="thetaMode") {
             //Dimensions of lasy file data: {m,t,r}
             amrex::Print() << Utils::TextMsg::Info( "Found lasy file in RZ geometry" );
+            m_params.file_in_cartesian_geom = 0;
             m_params.n_rz_azimuthal_components = extent[0];
             m_params.nt = extent[1];
             m_params.nr = extent[2];
@@ -192,9 +193,10 @@ WarpXLaserProfiles::FromFileLaserProfile::parse_lasy_file(std::string lasy_file_
             m_params.t_max = m_params.t_min + (m_params.nt-1)*spacing[0];
             m_params.r_min = offset[1] + position[1]*spacing[1];
             m_params.r_max = m_params.r_min + (m_params.nr-1)*spacing[1];
-        } else if (m_params.fileGeom=="cartesian"){
+        } else if (fileGeom=="cartesian"){
             //Dimensions of lasy file data: {t,y,x}
             amrex::Print() << Utils::TextMsg::Info( "Found lasy file in 3D cartesian geometry");
+            m_params.file_in_cartesian_geom = 1;
             m_params.nt = extent[0];
             m_params.ny = extent[1];
             m_params.nx = extent[2];
@@ -214,6 +216,7 @@ WarpXLaserProfiles::FromFileLaserProfile::parse_lasy_file(std::string lasy_file_
     }
 
     //Broadcast parameters
+   ParallelDescriptor::Bcast(&m_params.file_in_cartesian_geom, 1, ParallelDescriptor::IOProcessorNumber());
     ParallelDescriptor::Bcast(&m_params.nt, 1, ParallelDescriptor::IOProcessorNumber());
     ParallelDescriptor::Bcast(&m_params.nx, 1, ParallelDescriptor::IOProcessorNumber());
     ParallelDescriptor::Bcast(&m_params.ny, 1, ParallelDescriptor::IOProcessorNumber());
@@ -316,7 +319,7 @@ WarpXLaserProfiles::FromFileLaserProfile::read_data_t_chunk (int t_begin, int t_
         "Reading [" + std::to_string(i_first) + ", " + std::to_string(i_last) +
             "] data chunk from " + m_params.lasy_file_name);
     int data_size;
-    if (m_params.fileGeom=="thetaMode") {
+    if (m_params.file_in_cartesian_geom==0) {
         data_size = m_params.n_rz_azimuthal_components*(i_last-i_first+1)*m_params.nr;
     } else {
         data_size = (i_last-i_first+1)*m_params.nx*m_params.ny;
@@ -329,7 +332,7 @@ WarpXLaserProfiles::FromFileLaserProfile::read_data_t_chunk (int t_begin, int t_
         auto E = i.meshes["laserEnvelope"];
         auto E_laser = E[io::RecordComponent::SCALAR];
         openPMD:: Extent full_extent = E_laser.getExtent();
-        if (m_params.fileGeom=="thetaMode") {
+        if (m_params.file_in_cartesian_geom==0) {
             const openPMD::Extent read_extent = { full_extent[0], (i_last - i_first + 1), full_extent[2]};
             auto r_data = E_laser.loadChunk< std::complex<double> >(io::Offset{ 0, i_first,  0}, read_extent);
             const int read_size = (i_last - i_first + 1)*m_params.nr;
diff --git a/Source/Particles/Collision/BinaryCollision/Coulomb/PairWiseCoulombCollisionFunc.H b/Source/Particles/Collision/BinaryCollision/Coulomb/PairWiseCoulombCollisionFunc.H
index fbcf8b2c497..3c5f3abd841 100644
--- a/Source/Particles/Collision/BinaryCollision/Coulomb/PairWiseCoulombCollisionFunc.H
+++ b/Source/Particles/Collision/BinaryCollision/Coulomb/PairWiseCoulombCollisionFunc.H
@@ -86,6 +86,8 @@ public:
         amrex::ParticleReal* /*p_pair_reaction_weight*/,
         amrex::RandomEngine const& engine) const
         {
+            using namespace amrex::literals;
+
             ElasticCollisionPerez(
                     I1s, I1e, I2s, I2e, I1, I2,
                     soa_1, soa_2,
diff --git a/Source/Particles/Collision/BinaryCollision/Coulomb/UpdateMomentumPerezElastic.H b/Source/Particles/Collision/BinaryCollision/Coulomb/UpdateMomentumPerezElastic.H
index 7c2432f6e1b..3101047e211 100644
--- a/Source/Particles/Collision/BinaryCollision/Coulomb/UpdateMomentumPerezElastic.H
+++ b/Source/Particles/Collision/BinaryCollision/Coulomb/UpdateMomentumPerezElastic.H
@@ -102,184 +102,193 @@ void UpdateMomentumPerezElastic (
     T_PR const g1s = ( T_PR(1.0) - vcDv1*inv_c2 )*gc*g1;
     T_PR const g2s = ( T_PR(1.0) - vcDv2*inv_c2 )*gc*g2;
 
-    // Compute the Coulomb log lnLmd
-    T_PR lnLmd;
-    if ( L > T_PR(0.0) ) { lnLmd = L; }
-    else
-    {
-        // Compute b0 according to eq (22) from Perez et al., Phys.Plasmas.19.083104 (2012)
-        // Note: there is a typo in the equation, the last square is incorrect!
-        // See the SMILEI documentation: https://smileipic.github.io/Smilei/Understand/collisions.html
-        // and https://github.com/ECP-WarpX/WarpX/files/3799803/main.pdf from GitHub #429
-        T_PR const b0 = amrex::Math::abs(q1*q2) * inv_c2 /
-               (T_PR(4.0)*MathConst::pi*PhysConst::ep0) * gc/mass_g *
-               ( m1*g1s*m2*g2s/(p1sm*p1sm*inv_c2) + T_PR(1.0) );
-
-        // Compute the minimal impact parameter
-        constexpr T_PR hbar_pi = static_cast<T_PR>(PhysConst::hbar*MathConst::pi);
-        const T_PR bmin = amrex::max(hbar_pi/p1sm, b0);
-
-        // Compute the Coulomb log lnLmd
-        lnLmd = amrex::max( T_PR(2.0),
-                T_PR(0.5)*std::log(T_PR(1.0)+lmdD*lmdD/(bmin*bmin)) );
-    }
-
     // Compute s
-    const auto tts = m1*g1s*m2*g2s/(inv_c2*p1sm*p1sm) + T_PR(1.0);
-    const auto tts2 = tts*tts;
-    T_PR s = n1*n2/n12 * dt*lnLmd*q1*q1*q2*q2 /
-          ( T_PR(4.0) * MathConst::pi * PhysConst::ep0 * PhysConst::ep0 *
-            m1*g1*m2*g2/(inv_c2*inv_c2) ) * gc*p1sm/mass_g * tts2;
-
-    // Compute s'
-    const auto cbrt_n1 = std::cbrt(n1);
-    const auto cbrt_n2 = std::cbrt(n2);
-    const auto coeff = static_cast<T_PR>(
-        std::pow(4.0*MathConst::pi/3.0,1.0/3.0));
-    T_PR const vrel = mass_g*p1sm/(m1*g1s*m2*g2s*gc);
-    T_PR const sp = coeff * n1*n2/n12 * dt * vrel * (m1+m2) /
-          amrex::max( m1*cbrt_n1*cbrt_n1,
-                      m2*cbrt_n2*cbrt_n2);
-
-    // Determine s
-    s = amrex::min(s,sp);
-
-    // Get random numbers
-    T_PR r = amrex::Random(engine);
-
-    // Compute scattering angle
-    T_PR cosXs;
-    T_PR sinXs;
-    if ( s <= T_PR(0.1) )
-    {
-        while ( true )
+    T_PR s = 0;
+    if (p1sm > std::numeric_limits<T_PR>::min()) {
+
+        // s is non-zero (i.e. particles scatter) only if the relative
+        // motion between particles is not negligible (p1sm non-zero)
+
+        // Compute the Coulomb log lnLmd first
+        T_PR lnLmd;
+        if ( L > T_PR(0.0) ) { lnLmd = L; }
+        else
         {
-            cosXs = T_PR(1.0) + s * std::log(r);
-            // Avoid the bug when r is too small such that cosXs < -1
-            if ( cosXs >= T_PR(-1.0) ) { break; }
-            r = amrex::Random(engine);
+            // Compute b0 according to eq (22) from Perez et al., Phys.Plasmas.19.083104 (2012)
+            // Note: there is a typo in the equation, the last square is incorrect!
+            // See the SMILEI documentation: https://smileipic.github.io/Smilei/Understand/collisions.html
+            // and https://github.com/ECP-WarpX/WarpX/files/3799803/main.pdf from GitHub #429
+            T_PR const b0 = amrex::Math::abs(q1*q2) * inv_c2 /
+                (T_PR(4.0)*MathConst::pi*PhysConst::ep0) * gc/mass_g *
+                ( m1*g1s*m2*g2s/(p1sm*p1sm*inv_c2) + T_PR(1.0) );
+
+            // Compute the minimal impact parameter
+            constexpr T_PR hbar_pi = static_cast<T_PR>(PhysConst::hbar*MathConst::pi);
+            const T_PR bmin = amrex::max(hbar_pi/p1sm, b0);
+
+            // Compute the Coulomb log lnLmd
+            lnLmd = amrex::max( T_PR(2.0),
+                    T_PR(0.5)*std::log(T_PR(1.0)+lmdD*lmdD/(bmin*bmin)) );
         }
-    }
-    else if ( s > T_PR(0.1) && s <= T_PR(3.0) )
-    {
-        T_PR const Ainv = static_cast<T_PR>(
-            0.0056958 + 0.9560202*s - 0.508139*s*s +
-            0.47913906*s*s*s - 0.12788975*s*s*s*s + 0.02389567*s*s*s*s*s);
-        cosXs = Ainv * std::log( std::exp(T_PR(-1.0)/Ainv) +
-                T_PR(2.0) * r * std::sinh(T_PR(1.0)/Ainv) );
-    }
-    else if ( s > T_PR(3.0) && s <= T_PR(6.0) )
-    {
-        T_PR const A = T_PR(3.0) * std::exp(-s);
-        cosXs = T_PR(1.0)/A * std::log( std::exp(-A) +
-                T_PR(2.0) * r * std::sinh(A) );
-    }
-    else
-    {
-        cosXs = T_PR(2.0) * r - T_PR(1.0);
-    }
-    sinXs = std::sqrt(T_PR(1.0) - cosXs*cosXs);
-
-    // Get random azimuthal angle
-    T_PR const phis = amrex::Random(engine) * T_PR(2.0) * MathConst::pi;
-    T_PR const cosphis = std::cos(phis);
-    T_PR const sinphis = std::sin(phis);
-
-    // Compute post-collision momenta pfs in COM
-    T_PR p1fsx;
-    T_PR p1fsy;
-    T_PR p1fsz;
-    // p1sp is the p1s perpendicular
-    T_PR p1sp = std::sqrt( p1sx*p1sx + p1sy*p1sy );
-    // Make sure p1sp is not almost zero
-    if ( p1sp > std::numeric_limits<T_PR>::min() )
-    {
-        p1fsx = ( p1sx*p1sz/p1sp ) * sinXs*cosphis +
-                ( p1sy*p1sm/p1sp ) * sinXs*sinphis +
-                ( p1sx           ) * cosXs;
-        p1fsy = ( p1sy*p1sz/p1sp ) * sinXs*cosphis +
-                (-p1sx*p1sm/p1sp ) * sinXs*sinphis +
-                ( p1sy           ) * cosXs;
-        p1fsz = (-p1sp           ) * sinXs*cosphis +
-                ( T_PR(0.0)      ) * sinXs*sinphis +
-                ( p1sz           ) * cosXs;
-        // Note a negative sign is different from
-        // Eq. (12) in Perez's paper,
-        // but they are the same due to the random nature of phis.
-    }
-    else
-    {
-        // If the previous p1sp is almost zero
-        // x->y  y->z  z->x
-        // This set is equivalent to the one in Nanbu's paper
-        p1sp = std::sqrt( p1sy*p1sy + p1sz*p1sz );
-        p1fsy = ( p1sy*p1sx/p1sp ) * sinXs*cosphis +
-                ( p1sz*p1sm/p1sp ) * sinXs*sinphis +
-                ( p1sy           ) * cosXs;
-        p1fsz = ( p1sz*p1sx/p1sp ) * sinXs*cosphis +
-                (-p1sy*p1sm/p1sp ) * sinXs*sinphis +
-                ( p1sz           ) * cosXs;
-        p1fsx = (-p1sp           ) * sinXs*cosphis +
-                ( T_PR(0.0)      ) * sinXs*sinphis +
-                ( p1sx           ) * cosXs;
-    }
 
-    T_PR const p2fsx = -p1fsx;
-    T_PR const p2fsy = -p1fsy;
-    T_PR const p2fsz = -p1fsz;
+        // Compute s
+        const auto tts = m1*g1s*m2*g2s/(inv_c2*p1sm*p1sm) + T_PR(1.0);
+        const auto tts2 = tts*tts;
+        s = n1*n2/n12 * dt*lnLmd*q1*q1*q2*q2 /
+            ( T_PR(4.0) * MathConst::pi * PhysConst::ep0 * PhysConst::ep0 *
+                m1*g1*m2*g2/(inv_c2*inv_c2) ) * gc*p1sm/mass_g * tts2;
 
-    // Transform from COM to lab frame
-    T_PR p1fx;    T_PR p2fx;
-    T_PR p1fy;    T_PR p2fy;
-    T_PR p1fz;    T_PR p2fz;
-    if ( vcms > std::numeric_limits<T_PR>::min() )
-    {
-        T_PR const vcDp1fs = vcx*p1fsx + vcy*p1fsy + vcz*p1fsz;
-        T_PR const vcDp2fs = vcx*p2fsx + vcy*p2fsy + vcz*p2fsz;
-        /* factor = (gc-1.0)/vcms; Rewrite to avoid subtraction losing precision when gc is close to 1 */
-        T_PR const factor = gc*gc*inv_c2/(gc+T_PR(1.0));
-        T_PR const factor1 = factor*vcDp1fs + m1*g1s*gc;
-        T_PR const factor2 = factor*vcDp2fs + m2*g2s*gc;
-        p1fx = p1fsx + vcx * factor1;
-        p1fy = p1fsy + vcy * factor1;
-        p1fz = p1fsz + vcz * factor1;
-        p2fx = p2fsx + vcx * factor2;
-        p2fy = p2fsy + vcy * factor2;
-        p2fz = p2fsz + vcz * factor2;
-    }
-    else // If vcms = 0, don't do Lorentz-transform.
-    {
-        p1fx = p1fsx;
-        p1fy = p1fsy;
-        p1fz = p1fsz;
-        p2fx = p2fsx;
-        p2fy = p2fsy;
-        p2fz = p2fsz;
-    }
+        // Compute s'
+        const auto cbrt_n1 = std::cbrt(n1);
+        const auto cbrt_n2 = std::cbrt(n2);
+        const auto coeff = static_cast<T_PR>(
+            std::pow(4.0*MathConst::pi/3.0,1.0/3.0));
+        T_PR const vrel = mass_g*p1sm/(m1*g1s*m2*g2s*gc);
+        T_PR const sp = coeff * n1*n2/n12 * dt * vrel * (m1+m2) /
+            amrex::max( m1*cbrt_n1*cbrt_n1,
+                        m2*cbrt_n2*cbrt_n2);
 
-    // Rejection method
-    r = amrex::Random(engine);
-    if ( w2 > r*amrex::max(w1, w2) )
-    {
-        u1x  = p1fx / m1;
-        u1y  = p1fy / m1;
-        u1z  = p1fz / m1;
-#ifndef AMREX_USE_DPCPP
-        AMREX_ASSERT(!std::isnan(u1x+u1y+u1z+u2x+u2y+u2z));
-        AMREX_ASSERT(!std::isinf(u1x+u1y+u1z+u2x+u2y+u2z));
-#endif
+        // Determine s
+        s = amrex::min(s,sp);
     }
-    r = amrex::Random(engine);
-    if ( w1 > r*amrex::max(w1, w2) )
-    {
-        u2x  = p2fx / m2;
-        u2y  = p2fy / m2;
-        u2z  = p2fz / m2;
+
+    // Only modify momenta if is s is non-zero
+    if (s > std::numeric_limits<T_PR>::min()) {
+
+        // Get random numbers
+        T_PR r = amrex::Random(engine);
+
+        // Compute scattering angle
+        T_PR cosXs;
+        T_PR sinXs;
+        if ( s <= T_PR(0.1) )
+        {
+            while ( true )
+            {
+                cosXs = T_PR(1.0) + s * std::log(r);
+                // Avoid the bug when r is too small such that cosXs < -1
+                if ( cosXs >= T_PR(-1.0) ) { break; }
+                r = amrex::Random(engine);
+            }
+        }
+        else if ( s > T_PR(0.1) && s <= T_PR(3.0) )
+        {
+            T_PR const Ainv = static_cast<T_PR>(
+                0.0056958 + 0.9560202*s - 0.508139*s*s +
+                0.47913906*s*s*s - 0.12788975*s*s*s*s + 0.02389567*s*s*s*s*s);
+            cosXs = Ainv * std::log( std::exp(T_PR(-1.0)/Ainv) +
+                    T_PR(2.0) * r * std::sinh(T_PR(1.0)/Ainv) );
+        }
+        else if ( s > T_PR(3.0) && s <= T_PR(6.0) )
+        {
+            T_PR const A = T_PR(3.0) * std::exp(-s);
+            cosXs = T_PR(1.0)/A * std::log( std::exp(-A) +
+                    T_PR(2.0) * r * std::sinh(A) );
+        }
+        else
+        {
+            cosXs = T_PR(2.0) * r - T_PR(1.0);
+        }
+        sinXs = std::sqrt(T_PR(1.0) - cosXs*cosXs);
+
+        // Get random azimuthal angle
+        T_PR const phis = amrex::Random(engine) * T_PR(2.0) * MathConst::pi;
+        T_PR const cosphis = std::cos(phis);
+        T_PR const sinphis = std::sin(phis);
+
+        // Compute post-collision momenta pfs in COM
+        T_PR p1fsx;
+        T_PR p1fsy;
+        T_PR p1fsz;
+        // p1sp is the p1s perpendicular
+        T_PR p1sp = std::sqrt( p1sx*p1sx + p1sy*p1sy );
+        // Make sure p1sp is not almost zero
+        if ( p1sp > std::numeric_limits<T_PR>::min() )
+        {
+            p1fsx = ( p1sx*p1sz/p1sp ) * sinXs*cosphis +
+                    ( p1sy*p1sm/p1sp ) * sinXs*sinphis +
+                    ( p1sx           ) * cosXs;
+            p1fsy = ( p1sy*p1sz/p1sp ) * sinXs*cosphis +
+                    (-p1sx*p1sm/p1sp ) * sinXs*sinphis +
+                    ( p1sy           ) * cosXs;
+            p1fsz = (-p1sp           ) * sinXs*cosphis +
+                    ( T_PR(0.0)      ) * sinXs*sinphis +
+                    ( p1sz           ) * cosXs;
+            // Note a negative sign is different from
+            // Eq. (12) in Perez's paper,
+            // but they are the same due to the random nature of phis.
+        }
+        else
+        {
+            // If the previous p1sp is almost zero
+            // x->y  y->z  z->x
+            // This set is equivalent to the one in Nanbu's paper
+            p1sp = std::sqrt( p1sy*p1sy + p1sz*p1sz );
+            p1fsy = ( p1sy*p1sx/p1sp ) * sinXs*cosphis +
+                    ( p1sz*p1sm/p1sp ) * sinXs*sinphis +
+                    ( p1sy           ) * cosXs;
+            p1fsz = ( p1sz*p1sx/p1sp ) * sinXs*cosphis +
+                    (-p1sy*p1sm/p1sp ) * sinXs*sinphis +
+                    ( p1sz           ) * cosXs;
+            p1fsx = (-p1sp           ) * sinXs*cosphis +
+                    ( T_PR(0.0)      ) * sinXs*sinphis +
+                    ( p1sx           ) * cosXs;
+        }
+
+        T_PR const p2fsx = -p1fsx;
+        T_PR const p2fsy = -p1fsy;
+        T_PR const p2fsz = -p1fsz;
+
+        // Transform from COM to lab frame
+        T_PR p1fx;    T_PR p2fx;
+        T_PR p1fy;    T_PR p2fy;
+        T_PR p1fz;    T_PR p2fz;
+        if ( vcms > std::numeric_limits<T_PR>::min() )
+        {
+            T_PR const vcDp1fs = vcx*p1fsx + vcy*p1fsy + vcz*p1fsz;
+            T_PR const vcDp2fs = vcx*p2fsx + vcy*p2fsy + vcz*p2fsz;
+            /* factor = (gc-1.0)/vcms; Rewrite to avoid subtraction losing precision when gc is close to 1 */
+            T_PR const factor = gc*gc*inv_c2/(gc+T_PR(1.0));
+            T_PR const factor1 = factor*vcDp1fs + m1*g1s*gc;
+            T_PR const factor2 = factor*vcDp2fs + m2*g2s*gc;
+            p1fx = p1fsx + vcx * factor1;
+            p1fy = p1fsy + vcy * factor1;
+            p1fz = p1fsz + vcz * factor1;
+            p2fx = p2fsx + vcx * factor2;
+            p2fy = p2fsy + vcy * factor2;
+            p2fz = p2fsz + vcz * factor2;
+        }
+        else // If vcms = 0, don't do Lorentz-transform.
+        {
+            p1fx = p1fsx;
+            p1fy = p1fsy;
+            p1fz = p1fsz;
+            p2fx = p2fsx;
+            p2fy = p2fsy;
+            p2fz = p2fsz;
+        }
+
+        // Rejection method
+        r = amrex::Random(engine);
+        if ( w2 > r*amrex::max(w1, w2) )
+        {
+            u1x  = p1fx / m1;
+            u1y  = p1fy / m1;
+            u1z  = p1fz / m1;
+        }
+        r = amrex::Random(engine);
+        if ( w1 > r*amrex::max(w1, w2) )
+        {
+            u2x  = p2fx / m2;
+            u2y  = p2fy / m2;
+            u2z  = p2fz / m2;
+        }
 #ifndef AMREX_USE_DPCPP
         AMREX_ASSERT(!std::isnan(u1x+u1y+u1z+u2x+u2y+u2z));
         AMREX_ASSERT(!std::isinf(u1x+u1y+u1z+u2x+u2y+u2z));
 #endif
-    }
+
+    } // if s > std::numeric_limits<T_PR>::min()
 
 }
 
diff --git a/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H b/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H
index 3b81dfa87d2..2b43f76e89f 100644
--- a/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H
+++ b/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H
@@ -113,6 +113,8 @@ public:
                     const amrex::ParticleReal* AMREX_RESTRICT p_pair_reaction_weight
                     ) const
     {
+        using namespace amrex::literals;
+
         if (n_total_pairs == 0) return {m_num_product_species, 0};
 
         // Compute offset array and allocate memory for the produced species
diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H
index 3d82a9ce08b..49a7c46a3e8 100644
--- a/Source/Particles/Deposition/CurrentDeposition.H
+++ b/Source/Particles/Deposition/CurrentDeposition.H
@@ -26,8 +26,6 @@
 #include <AMReX_Array4.H>
 #include <AMReX_REAL.H>
 
-using namespace amrex::literals;
-
 /**
  * \brief Current Deposition for thread thread_num
  * \tparam depos_order deposition order
@@ -72,6 +70,8 @@ void doDepositionShapeN (const GetParticlePosition& GetPosition,
                          amrex::Real* cost,
                          long load_balance_costs_update_algo)
 {
+    using namespace amrex::literals;
+
 #if !defined(WARPX_DIM_RZ)
     amrex::ignore_unused(n_rz_azimuthal_modes);
 #endif
@@ -388,6 +388,8 @@ void doDepositionSharedShapeN (const GetParticlePosition& GetPosition,
                                const amrex::Geometry& geom,
                                const amrex::IntVect& a_tbox_max_size)
 {
+    using namespace amrex::literals;
+
 #if defined(AMREX_USE_HIP) || defined(AMREX_USE_CUDA)
     using namespace amrex;
 
@@ -596,6 +598,8 @@ void doEsirkepovDepositionShapeN (const GetParticlePosition& GetPosition,
                                   long load_balance_costs_update_algo)
 {
     using namespace amrex;
+    using namespace amrex::literals;
+
 #if !defined(WARPX_DIM_RZ)
     ignore_unused(n_rz_azimuthal_modes);
 #endif
@@ -973,6 +977,8 @@ void doVayDepositionShapeN (const GetParticlePosition& GetPosition,
                             amrex::Real* cost,
                             long load_balance_costs_update_algo)
 {
+    using namespace amrex::literals;
+
 #if defined(WARPX_DIM_RZ)
     amrex::ignore_unused(GetPosition,
         wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
diff --git a/Source/Particles/Deposition/SharedDepositionUtils.H b/Source/Particles/Deposition/SharedDepositionUtils.H
index b3a96577aa0..40407b609c0 100644
--- a/Source/Particles/Deposition/SharedDepositionUtils.H
+++ b/Source/Particles/Deposition/SharedDepositionUtils.H
@@ -43,6 +43,8 @@ void addLocalToGlobal (const amrex::Box& bx,
                        const amrex::Array4<amrex::Real>& global,
                        const amrex::Array4<amrex::Real>& local) noexcept
 {
+    using namespace amrex::literals;
+
     const auto lo  = amrex::lbound(bx);
     const auto len = amrex::length(bx);
     for (int icell = threadIdx.x; icell < bx.numPts(); icell += blockDim.x)
@@ -80,6 +82,8 @@ void depositComponent (const GetParticlePosition& GetPosition,
                        const unsigned int ip,
                        const int zdir, const int NODE, const int CELL, const int dir)
 {
+    using namespace amrex::literals;
+
 #if !defined(WARPX_DIM_RZ)
     amrex::ignore_unused(n_rz_azimuthal_modes);
 #endif
diff --git a/Source/Particles/Filter/FilterFunctors.H b/Source/Particles/Filter/FilterFunctors.H
index a7b894ce336..fb83df1184a 100644
--- a/Source/Particles/Filter/FilterFunctors.H
+++ b/Source/Particles/Filter/FilterFunctors.H
@@ -108,6 +108,8 @@ struct ParserFilter
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     bool operator () (const SuperParticleType& p, const amrex::RandomEngine&) const noexcept
     {
+        using namespace amrex::literals;
+
         if (!m_is_active){
             return true;
         }
diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H
index 4c3aeca4579..fecd39137d7 100644
--- a/Source/Particles/MultiParticleContainer.H
+++ b/Source/Particles/MultiParticleContainer.H
@@ -179,7 +179,7 @@ public:
     *
     * @param[in] timestep the current timestep.
     */
-    void doResampling (int timestep);
+    void doResampling (int timestep, bool verbose);
 
 #ifdef WARPX_QED
     /** If Schwinger process is activated, this function is called at every
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index 1a02cabb816..b9802fa448b 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -942,14 +942,14 @@ MultiParticleContainer::doCollisions ( Real cur_time, amrex::Real dt )
     collisionhandler->doCollisions(cur_time, dt, this);
 }
 
-void MultiParticleContainer::doResampling (const int timestep)
+void MultiParticleContainer::doResampling (const int timestep, const bool verbose)
 {
     for (auto& pc : allcontainers)
     {
         // do_resampling can only be true for PhysicalParticleContainers
         if (!pc->do_resampling){ continue; }
 
-        pc->resample(timestep);
+        pc->resample(timestep, verbose);
     }
 }
 
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index c4ad257c95f..3e3ae069214 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -200,7 +200,7 @@ public:
 
     void MapParticletoBoostedFrame (amrex::ParticleReal& x, amrex::ParticleReal& y, amrex::ParticleReal& z,
                                     amrex::ParticleReal& ux, amrex::ParticleReal& uy, amrex::ParticleReal& uz,
-                                    amrex::Real t_lab = 0._prt);
+                                    amrex::Real t_lab = 0.);
 
     void AddGaussianBeam (
         amrex::Real x_m, amrex::Real y_m, amrex::Real z_m,
@@ -226,7 +226,7 @@ public:
         amrex::Gpu::HostVector<amrex::ParticleReal>& particle_uy,
         amrex::Gpu::HostVector<amrex::ParticleReal>& particle_uz,
         amrex::Gpu::HostVector<amrex::ParticleReal>& particle_w,
-        amrex::Real t_lab= 0._prt);
+        amrex::Real t_lab= 0.);
 
     /**
      * \brief Default initialize runtime attributes in a tile. This routine does not initialize the
@@ -307,7 +307,7 @@ public:
     *
     * @param[in] timestep the current timestep.
     */
-    void resample (int timestep) override final;
+    void resample (int timestep, bool verbose=true) override final;
 
 #ifdef WARPX_QED
     //Functions decleared in WarpXParticleContainer.H
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 958c800ffc0..1358011b9b0 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -2949,7 +2949,7 @@ PlasmaInjector* PhysicalParticleContainer::GetPlasmaInjector ()
     return plasma_injector.get();
 }
 
-void PhysicalParticleContainer::resample (const int timestep)
+void PhysicalParticleContainer::resample (const int timestep, const bool verbose)
 {
     // In heavily load imbalanced simulations, MPI processes with few particles will spend most of
     // the time at the MPI synchronization in TotalNumberOfParticles(). Having two profiler entries
@@ -2967,7 +2967,11 @@ void PhysicalParticleContainer::resample (const int timestep)
     WARPX_PROFILE_VAR_START(blp_resample_actual);
     if (m_resampler.triggered(timestep, global_numparts))
     {
-        amrex::Print() << Utils::TextMsg::Info("Resampling " + species_name);
+        if (verbose) {
+            amrex::Print() << Utils::TextMsg::Info(
+                "Resampling " + species_name + " at step " + std::to_string(timestep)
+            );
+        }
         for (int lev = 0; lev <= maxLevel(); lev++)
         {
             for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
@@ -2977,7 +2981,6 @@ void PhysicalParticleContainer::resample (const int timestep)
         }
     }
     WARPX_PROFILE_VAR_STOP(blp_resample_actual);
-
 }
 
 
diff --git a/Source/Particles/Sorting/CMakeLists.txt b/Source/Particles/Sorting/CMakeLists.txt
index d3c378e43b8..60a156f4649 100644
--- a/Source/Particles/Sorting/CMakeLists.txt
+++ b/Source/Particles/Sorting/CMakeLists.txt
@@ -3,5 +3,6 @@ foreach(D IN LISTS WarpX_DIMS)
     target_sources(lib_${SD}
       PRIVATE
         Partition.cpp
+        SortingUtils.cpp
     )
 endforeach()
diff --git a/Source/Particles/Sorting/Make.package b/Source/Particles/Sorting/Make.package
index 16efc02b89e..e6ad1604dc7 100644
--- a/Source/Particles/Sorting/Make.package
+++ b/Source/Particles/Sorting/Make.package
@@ -1,2 +1,4 @@
 CEXE_sources += Partition.cpp
+CEXE_sources += SortingUtils.cpp
+
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles/Sorting
diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H
index 0bec92d6efc..dd53ad6f610 100644
--- a/Source/Particles/Sorting/SortingUtils.H
+++ b/Source/Particles/Sorting/SortingUtils.H
@@ -19,18 +19,7 @@
  *
  * \param[inout] v Vector of integers, to be filled by this routine
  */
-void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector<long>& v )
-{
-#ifdef AMREX_USE_GPU
-    // On GPU: Use amrex
-    auto data = v.data();
-    auto N = v.size();
-    AMREX_FOR_1D( N, i, {data[i] = i;});
-#else
-    // On CPU: Use std library
-    std::iota( v.begin(), v.end(), 0L );
-#endif
-}
+void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector<long>& v );
 
 /** \brief Find the indices that would reorder the elements of `predicate`
  * so that the elements with non-zero value precede the other elements
diff --git a/Source/Particles/Sorting/SortingUtils.cpp b/Source/Particles/Sorting/SortingUtils.cpp
new file mode 100644
index 00000000000..699119e8e18
--- /dev/null
+++ b/Source/Particles/Sorting/SortingUtils.cpp
@@ -0,0 +1,22 @@
+/* Copyright 2019-2020 Andrew Myers, Maxence Thevenet, Remi Lehe
+ * Weiqun Zhang
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#include "SortingUtils.H"
+
+void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector<long>& v )
+{
+#ifdef AMREX_USE_GPU
+    // On GPU: Use amrex
+    auto data = v.data();
+    auto N = v.size();
+    AMREX_FOR_1D( N, i, {data[i] = i;});
+#else
+    // On CPU: Use std library
+    std::iota( v.begin(), v.end(), 0L );
+#endif
+}
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index c58b51fa6fb..c6da328264b 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -47,8 +47,6 @@
 #include <utility>
 
 
-using namespace amrex::literals;
-
 class WarpXParIter
     : public amrex::ParIter<0,0,PIdx::nattribs>
 {
@@ -381,7 +379,7 @@ public:
      * override the method for every derived class. Note that in practice this function is never
      * called because resample() is only called for PhysicalParticleContainers.
      */
-    virtual void resample (const int /*timestep*/) {}
+    virtual void resample (const int /*timestep*/, bool /*verbose*/) {}
 
     /**
      * When using runtime components, AMReX requires to touch all tiles
diff --git a/Source/Utils/ParticleUtils.H b/Source/Utils/ParticleUtils.H
index bd9ebeaeb5b..50dc12a722c 100644
--- a/Source/Utils/ParticleUtils.H
+++ b/Source/Utils/ParticleUtils.H
@@ -94,6 +94,8 @@ namespace ParticleUtils {
                               amrex::ParticleReal const Vx, amrex::ParticleReal const Vy,
                               amrex::ParticleReal const Vz )
     {
+        using namespace amrex::literals;
+
         // precompute repeatedly used quantities
         constexpr auto c2 = PhysConst::c * PhysConst::c;
         const auto V2 = (Vx*Vx + Vy*Vy + Vz*Vz);
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 508790ef20f..6857ad85bcf 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -871,26 +871,30 @@ WarpX::ReadParameters ()
             quantum_xi_c2 = static_cast<amrex::Real>(quantum_xi * PhysConst::c * PhysConst::c);
         }
 
-        for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-            WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
-                !(
-                    ( WarpX::field_boundary_lo[idim] == FieldBoundaryType::PML &&
-                    WarpX::field_boundary_lo[idim] == FieldBoundaryType::Absorbing_SilverMueller ) ||
-                    ( WarpX::field_boundary_hi[idim] == FieldBoundaryType::PML &&
-                     WarpX::field_boundary_hi[idim] == FieldBoundaryType::Absorbing_SilverMueller )
-                ),
-                "PML and Silver-Mueller boundary conditions cannot be activated at the same time.");
+        const auto at_least_one_boundary_is_pml =
+            (std::any_of(WarpX::field_boundary_lo.begin(), WarpX::field_boundary_lo.end(),
+                [](const auto& cc){return cc == FieldBoundaryType::PML;})
+            ||
+            std::any_of(WarpX::field_boundary_hi.begin(), WarpX::field_boundary_hi.end(),
+                [](const auto& cc){return cc == FieldBoundaryType::PML;})
+            );
 
+        const auto at_least_one_boundary_is_silver_mueller =
+            (std::any_of(WarpX::field_boundary_lo.begin(), WarpX::field_boundary_lo.end(),
+                [](const auto& cc){return cc == FieldBoundaryType::Absorbing_SilverMueller;})
+            ||
+            std::any_of(WarpX::field_boundary_hi.begin(), WarpX::field_boundary_hi.end(),
+                [](const auto& cc){return cc == FieldBoundaryType::Absorbing_SilverMueller;})
+            );
 
-            if (WarpX::field_boundary_lo[idim] == FieldBoundaryType::Absorbing_SilverMueller ||
-                WarpX::field_boundary_hi[idim] == FieldBoundaryType::Absorbing_SilverMueller)
-            {
-                // SilverMueller is implemented for Yee
-                WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
-                    electromagnetic_solver_id == ElectromagneticSolverAlgo::Yee,
-                    "The Silver-Mueller boundary condition can only be used with the Yee solver.");
-            }
-        }
+        WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
+            !(at_least_one_boundary_is_pml && at_least_one_boundary_is_silver_mueller),
+            "PML and Silver-Mueller boundary conditions cannot be activated at the same time.");
+
+        WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
+            (!at_least_one_boundary_is_silver_mueller) ||
+            (electromagnetic_solver_id == ElectromagneticSolverAlgo::Yee),
+            "The Silver-Mueller boundary condition can only be used with the Yee solver.");
 
         utils::parser::queryWithParser(pp_warpx, "pml_ncell", pml_ncell);
         utils::parser::queryWithParser(pp_warpx, "pml_delta", pml_delta);
diff --git a/Tools/Algorithms/stencil.py b/Tools/Algorithms/stencil.py
index cb24a62148b..63bb7f11c2e 100644
--- a/Tools/Algorithms/stencil.py
+++ b/Tools/Algorithms/stencil.py
@@ -13,7 +13,9 @@
 
 import argparse
 import os
+import sys
 
+sys.path.append('../Parser/')
 from input_file_parser import parse_input_file
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/Tools/Parser/__init__.py b/Tools/Parser/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/Tools/Algorithms/input_file_parser.py b/Tools/Parser/input_file_parser.py
similarity index 100%
rename from Tools/Algorithms/input_file_parser.py
rename to Tools/Parser/input_file_parser.py
diff --git a/Tools/machines/lassen-llnl/install_v100_dependencies.sh b/Tools/machines/lassen-llnl/install_v100_dependencies.sh
new file mode 100755
index 00000000000..223b41d1967
--- /dev/null
+++ b/Tools/machines/lassen-llnl/install_v100_dependencies.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+#
+# Copyright 2023 The WarpX Community
+#
+# This file is part of WarpX.
+#
+# Author: Axel Huebl
+# License: BSD-3-Clause-LBNL
+
+# Exit on first error encountered #############################################
+#
+set -eu -o pipefail
+
+
+# Check: ######################################################################
+#
+#   Was lassen_v100_warpx.profile sourced and configured correctly?
+if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your lassen_v100_warpx.profile file! Please edit its line 2 to continue!"; exit 1; fi
+
+
+# Remove old dependencies #####################################################
+#
+SRC_DIR="/usr/workspace/${USER}/lassen/src"
+SW_DIR="/usr/workspace/${USER}/lassen/gpu"
+rm -rf ${SW_DIR}
+mkdir -p ${SW_DIR}
+mkdir -p ${SRC_DIR}
+
+# remove common user mistakes in python, located in .local instead of a venv
+python3 -m pip uninstall -qq -y pywarpx
+python3 -m pip uninstall -qq -y warpx
+python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true
+
+
+# General extra dependencies ##################################################
+#
+
+# tmpfs build directory: avoids issues often seen with $HOME and is faster
+build_dir=$(mktemp -d)
+
+# c-blosc (I/O compression)
+if [ -d ${SRC_DIR}/c-blosc ]
+then
+  cd ${SRC_DIR}/c-blosc
+  git fetch --prune
+  git checkout v1.21.1
+  cd -
+else
+  git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git ${SRC_DIR}/c-blosc
+fi
+cmake -S ${SRC_DIR}/c-blosc -B ${build_dir}/c-blosc-lassen-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1
+cmake --build ${build_dir}/c-blosc-lassen-build --target install --parallel 10
+
+# HDF5
+if [ -d ${SRC_DIR}/hdf5 ]
+then
+  cd ${SRC_DIR}/hdf5
+  git fetch --prune
+  git checkout hdf5-1_14_1-2
+  cd -
+else
+  git clone -b hdf5-1_14_1-2 https://github.com/HDFGroup/hdf5.git ${SRC_DIR}/hdf5
+fi
+cmake -S ${SRC_DIR}/hdf5 -B ${build_dir}/hdf5-lassen-build -DBUILD_TESTING=OFF -DHDF5_ENABLE_PARALLEL=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/hdf5-1.14.1.2
+cmake --build ${build_dir}/hdf5-lassen-build --target install --parallel 10
+
+# ADIOS2
+if [ -d ${SRC_DIR}/adios2 ]
+then
+  cd ${SRC_DIR}/adios2
+  git fetch --prune
+  git checkout v2.8.3
+  cd -
+else
+  git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git ${SRC_DIR}/adios2
+fi
+cmake -S ${SRC_DIR}/adios2 -B ${build_dir}/adios2-lassen-build -DBUILD_TESTING=OFF -DADIOS2_BUILD_EXAMPLES=OFF -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_SST=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3
+cmake --build ${build_dir}/adios2-lassen-build --target install -j 10
+
+# BLAS++ (for PSATD+RZ)
+if [ -d ${SRC_DIR}/blaspp ]
+then
+  cd ${SRC_DIR}/blaspp
+  git fetch --prune
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/blaspp.git ${SRC_DIR}/blaspp
+fi
+cmake -S ${SRC_DIR}/blaspp -B ${build_dir}/blaspp-lassen-build -Duse_openmp=ON -Dgpu_backend=cuda -Duse_cmake_find_blas=ON -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master
+cmake --build ${build_dir}/blaspp-lassen-build --target install --parallel 10
+
+# LAPACK++ (for PSATD+RZ)
+if [ -d ${SRC_DIR}/lapackpp ]
+then
+  cd ${SRC_DIR}/lapackpp
+  git fetch --prune
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/lapackpp.git ${SRC_DIR}/lapackpp
+fi
+CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S ${SRC_DIR}/lapackpp -B ${build_dir}/lapackpp-lassen-build -Duse_cmake_find_lapack=ON -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master -DLAPACK_LIBRARIES=/usr/lib64/liblapack.so
+cmake --build ${build_dir}/lapackpp-lassen-build --target install --parallel 10
+
+
+# Python ######################################################################
+#
+python3 -m pip install --upgrade --user virtualenv
+rm -rf ${SW_DIR}/venvs/warpx-lassen
+python3 -m venv ${SW_DIR}/venvs/warpx-lassen
+source ${SW_DIR}/venvs/warpx-lassen/bin/activate
+python3 -m pip install --upgrade pip
+python3 -m pip cache purge
+python3 -m pip install --upgrade wheel
+python3 -m pip install --upgrade cython
+python3 -m pip install --upgrade numpy
+python3 -m pip install --upgrade pandas
+python3 -m pip install --upgrade -Ccompile-args="-j10" scipy
+python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py
+python3 -m pip install --upgrade openpmd-api
+MPLLOCALFREETYPE=1 python3 -m pip install --upgrade matplotlib==3.2.2  # does not try to build freetype itself
+echo "matplotlib==3.2.2" > ${build_dir}/constraints.txt
+python3 -m pip install --upgrade -c ${build_dir}/constraints.txt yt
+
+# install or update WarpX dependencies such as picmistandard
+python3 -m pip install --upgrade -r ${SRC_DIR}/warpx/requirements.txt
+
+# for ML dependencies, see install_v100_ml.sh
+
+
+# remove build temporary directory
+rm -rf ${build_dir}
diff --git a/Tools/machines/lassen-llnl/install_v100_ml.sh b/Tools/machines/lassen-llnl/install_v100_ml.sh
new file mode 100755
index 00000000000..6e00be035d6
--- /dev/null
+++ b/Tools/machines/lassen-llnl/install_v100_ml.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+#
+# Copyright 2023 The WarpX Community
+#
+# This file is part of WarpX.
+#
+# Author: Axel Huebl
+# License: BSD-3-Clause-LBNL
+
+# Exit on first error encountered #############################################
+#
+set -eu -o pipefail
+
+
+# Check: ######################################################################
+#
+#   Was lassen_v100_warpx.profile sourced and configured correctly?
+if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your lassen_v100_warpx.profile file! Please edit its line 2 to continue!"; exit 1; fi
+
+
+# Remove old dependencies #####################################################
+#
+SRC_DIR="/usr/workspace/${USER}/lassen/src"
+mkdir -p ${SRC_DIR}
+
+# remove common user mistakes in python, located in .local instead of a venv
+python3 -m pip uninstall -qqq -y torch 2>/dev/null || true
+
+
+# Python ML ###################################################################
+#
+# for basic python dependencies, see install_v100_dependencies.sh
+
+# optional: for libEnsemble - WIP: issues with nlopt
+# python3 -m pip install -r ${SRC_DIR}/warpx/Tools/LibEnsemble/requirements.txt
+
+# optional: for pytorch
+if [ -d ${SRC_DIR}/pytorch ]
+then
+  cd ${SRC_DIR}/pytorch
+  git fetch
+  git checkout .
+  git checkout v2.0.1
+  git submodule update --init --recursive
+  cd -
+else
+  git clone -b v2.0.1 --recurse-submodules https://github.com/pytorch/pytorch.git ${SRC_DIR}/pytorch
+fi
+cd ${SRC_DIR}/pytorch
+rm -rf build
+
+# see https://github.com/pytorch/pytorch/issues/97497#issuecomment-1499069641
+#     https://github.com/pytorch/pytorch/pull/98511
+wget -q -O - https://github.com/pytorch/pytorch/pull/98511.patch | git apply
+
+python3 -m pip install -r requirements.txt
+
+# see https://github.com/pytorch/pytorch/issues/108984#issuecomment-1712938737
+LDFLAGS="-L${CUDA_HOME}/nvidia/targets/ppc64le-linux/lib/" \
+USE_CUDA=1 BLAS=OpenBLAS MAX_JOBS=64 ATEN_AVX512_256=OFF BUILD_TEST=0 python3 setup.py develop
+#   (optional) If using torch.compile with inductor/triton, install the matching version of triton
+#make triton
+rm -rf build
+cd -
+
+# optional: optimas dependencies (based on libEnsemble & ax->botorch->gpytorch->pytorch)
+#   commented because scikit-learn et al. compile > 2 hrs
+#   please run manually on a login node if needed
+#python3 -m pip install -r ${SRC_DIR}/warpx/Tools/optimas/requirements.txt
diff --git a/Tools/machines/lassen-llnl/lassen.bsub b/Tools/machines/lassen-llnl/lassen_v100.bsub
similarity index 100%
rename from Tools/machines/lassen-llnl/lassen.bsub
rename to Tools/machines/lassen-llnl/lassen_v100.bsub
diff --git a/Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example b/Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example
new file mode 100644
index 00000000000..652af2a2822
--- /dev/null
+++ b/Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example
@@ -0,0 +1,54 @@
+# please set your project account
+#export proj="<yourProjectNameHere>"  # edit this and comment in
+
+# required dependencies
+module load cmake/3.23.1
+module load gcc/11.2.1
+module load cuda/12.0.0
+
+# optional: for QED lookup table generation support
+module load boost/1.70.0
+
+# optional: for openPMD support
+SRC_DIR="/usr/workspace/${USER}/lassen/src"
+SW_DIR="/usr/workspace/${USER}/lassen/gpu"
+export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-1.21.1:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${SW_DIR}/hdf5-1.14.1.2:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.8.3:$CMAKE_PREFIX_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/hdf5-1.14.1.2/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/adios2-2.8.3/lib64:$LD_LIBRARY_PATH
+
+# optional: for PSATD in RZ geometry support
+export CMAKE_PREFIX_PATH=${SW_DIR}/blaspp-master:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${SW_DIR}/lapackpp-master:$CMAKE_PREFIX_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/blaspp-master/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/lapackpp-master/lib64:$LD_LIBRARY_PATH
+
+# optional: for Python bindings
+module load python/3.8.2
+
+if [ -d "${SW_DIR}/venvs/warpx-lassen" ]
+then
+    source ${SW_DIR}/venvs/warpx-lassen/bin/activate
+fi
+
+# optional: an alias to request an interactive node for two hours
+alias getNode="bsub -G $proj -W 2:00 -nnodes 1 -Is /bin/bash"
+# an alias to run a command on a batch node for up to 30min
+#   usage: runNode <command>
+alias runNode="bsub -q debug -P $proj -W 2:00 -nnodes 1 -I"
+
+# fix system defaults: do not escape $ with a \ on tab completion
+shopt -s direxpand
+
+# optimize CUDA compilation for V100
+export AMREX_CUDA_ARCH=7.0
+export CUDAARCHS=70
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=${CXX}
diff --git a/Tools/machines/lassen-llnl/lassen_warpx.profile.example b/Tools/machines/lassen-llnl/lassen_warpx.profile.example
deleted file mode 100644
index fb605b53ec2..00000000000
--- a/Tools/machines/lassen-llnl/lassen_warpx.profile.example
+++ /dev/null
@@ -1,44 +0,0 @@
-# please set your project account
-#export proj="<yourProjectNameHere>"  # edit this and comment in
-
-# required dependencies
-module load cmake/3.23.1
-module load clang/12.0.1-gcc-8.3.1
-module load cuda/12.0.0
-
-# optional: for QED lookup table generation support
-module load boost/1.70.0
-
-# optional: for openPMD support
-export CMAKE_PREFIX_PATH=$HOME/sw/lassen/c-blosc-1.21.1:$CMAKE_PREFIX_PATH
-export CMAKE_PREFIX_PATH=$HOME/sw/lassen/hdf5-1.14.1.2:$CMAKE_PREFIX_PATH
-export CMAKE_PREFIX_PATH=$HOME/sw/lassen/adios2-2.8.3:$CMAKE_PREFIX_PATH
-export LD_LIBRARY_PATH=$HOME/sw/lassen/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH
-export LD_LIBRARY_PATH=$HOME/sw/lassen/hdf5-1.14.1.2/lib64:$LD_LIBRARY_PATH
-export LD_LIBRARY_PATH=$HOME/sw/lassen/adios2-2.8.3/lib64:$LD_LIBRARY_PATH
-
-# optional: for PSATD in RZ geometry support
-export CMAKE_PREFIX_PATH=$HOME/sw/lassen/blaspp-master:$CMAKE_PREFIX_PATH
-export CMAKE_PREFIX_PATH=$HOME/sw/lassen/lapackpp-master:$CMAKE_PREFIX_PATH
-export LD_LIBRARY_PATH=$HOME/sw/lassen/blaspp-master/lib64:$LD_LIBRARY_PATH
-export LD_LIBRARY_PATH=$HOME/sw/lassen/lapackpp-master/lib64:$LD_LIBRARY_PATH
-
-# optional: for Python bindings
-module load python/3.8.2
-
-# optional: an alias to request an interactive node for two hours
-alias getNode="bsub -G $proj -W 2:00 -nnodes 1 -Is /bin/bash"
-
-# fix system defaults: do not escape $ with a \ on tab completion
-shopt -s direxpand
-
-# optimize CUDA compilation for V100
-export AMREX_CUDA_ARCH=7.0
-export CUDAARCHS=70
-
-# compiler environment hints
-export CC=$(which clang)
-export CXX=$(which clang++)
-export FC=$(which gfortran)
-export CUDACXX=$(which nvcc)
-export CUDAHOSTCXX=$(which clang++)
diff --git a/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh b/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh
index a60c3ba7594..7bcc7053974 100755
--- a/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh
+++ b/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh
@@ -127,6 +127,7 @@ python3 -m pip install --upgrade matplotlib
 python3 -m pip install --upgrade yt
 # install or update WarpX dependencies such as picmistandard
 python3 -m pip install --upgrade -r $HOME/src/warpx/requirements.txt
+python3 -m pip install cupy-cuda11x  # CUDA 11.7 compatible wheel
 # optional: for libEnsemble
 python3 -m pip install -r $HOME/src/warpx/Tools/LibEnsemble/requirements.txt
 # optional: for optimas (based on libEnsemble & ax->botorch->gpytorch->pytorch)
diff --git a/Tools/machines/quartz-llnl/install_dependencies.sh b/Tools/machines/quartz-llnl/install_dependencies.sh
new file mode 100755
index 00000000000..c162ac5d390
--- /dev/null
+++ b/Tools/machines/quartz-llnl/install_dependencies.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+#
+# Copyright 2023 The WarpX Community
+#
+# This file is part of WarpX.
+#
+# Author: Axel Huebl
+# License: BSD-3-Clause-LBNL
+
+# Exit on first error encountered #############################################
+#
+set -eu -o pipefail
+
+
+# Check: ######################################################################
+#
+#   Was quartz_warpx.profile sourced and configured correctly?
+if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your quartz_warpx.profile file! Please edit its line 2 to continue!"; exit 1; fi
+
+
+# Remove old dependencies #####################################################
+#
+SW_DIR="/usr/workspace/${USER}/quartz"
+rm -rf ${SW_DIR}
+mkdir -p ${SW_DIR}
+
+# remove common user mistakes in python, located in .local instead of a venv
+python3 -m pip uninstall -qq -y pywarpx
+python3 -m pip uninstall -qq -y warpx
+python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true
+
+
+# General extra dependencies ##################################################
+#
+
+# tmpfs build directory: avoids issues often seen with ${HOME} and is faster
+build_dir=$(mktemp -d)
+
+# c-blosc (I/O compression)
+if [ -d ${HOME}/src/c-blosc ]
+then
+  cd ${HOME}/src/c-blosc
+  git fetch --prune
+  git checkout v1.21.1
+  cd -
+else
+  git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git ${HOME}/src/c-blosc
+fi
+cmake -S ${HOME}/src/c-blosc -B ${build_dir}/c-blosc-quartz-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1
+cmake --build ${build_dir}/c-blosc-quartz-build --target install --parallel 6
+
+# ADIOS2
+if [ -d ${HOME}/src/adios2 ]
+then
+  cd ${HOME}/src/adios2
+  git fetch --prune
+  git checkout v2.8.3
+  cd -
+else
+  git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git ${HOME}/src/adios2
+fi
+cmake -S ${HOME}/src/adios2 -B ${build_dir}/adios2-quartz-build -DBUILD_TESTING=OFF -DADIOS2_BUILD_EXAMPLES=OFF -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_SST=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3
+cmake --build ${build_dir}/adios2-quartz-build --target install -j 6
+
+# BLAS++ (for PSATD+RZ)
+if [ -d ${HOME}/src/blaspp ]
+then
+  cd ${HOME}/src/blaspp
+  git fetch --prune
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/blaspp.git ${HOME}/src/blaspp
+fi
+cmake -S ${HOME}/src/blaspp -B ${build_dir}/blaspp-quartz-build -Duse_openmp=ON -Duse_cmake_find_blas=ON -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master
+cmake --build ${build_dir}/blaspp-quartz-build --target install --parallel 6
+
+# LAPACK++ (for PSATD+RZ)
+if [ -d ${HOME}/src/lapackpp ]
+then
+  cd ${HOME}/src/lapackpp
+  git fetch --prune
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/lapackpp.git ${HOME}/src/lapackpp
+fi
+CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S ${HOME}/src/lapackpp -B ${build_dir}/lapackpp-quartz-build -Duse_cmake_find_lapack=ON -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master
+cmake --build ${build_dir}/lapackpp-quartz-build --target install --parallel 6
+
+
+# Python ######################################################################
+#
+python3 -m pip install --upgrade --user virtualenv
+rm -rf ${SW_DIR}/venvs/warpx-quartz
+python3 -m venv ${SW_DIR}/venvs/warpx-quartz
+source ${SW_DIR}/venvs/warpx-quartz/bin/activate
+python3 -m pip install --upgrade pip
+python3 -m pip cache purge
+python3 -m pip install --upgrade wheel
+python3 -m pip install --upgrade cython
+python3 -m pip install --upgrade numpy
+python3 -m pip install --upgrade pandas
+python3 -m pip install --upgrade scipy
+python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py
+python3 -m pip install --upgrade openpmd-api
+python3 -m pip install --upgrade matplotlib
+python3 -m pip install --upgrade yt
+
+# install or update WarpX dependencies such as picmistandard
+python3 -m pip install --upgrade -r ${HOME}/src/warpx/requirements.txt
+
+# ML dependencies
+python3 -m pip install --upgrade torch
+
+
+# remove build temporary directory ############################################
+#
+rm -rf ${build_dir}
diff --git a/Tools/machines/quartz-llnl/quartz_warpx.profile.example b/Tools/machines/quartz-llnl/quartz_warpx.profile.example
index 370e4a601ac..810005bafb7 100644
--- a/Tools/machines/quartz-llnl/quartz_warpx.profile.example
+++ b/Tools/machines/quartz-llnl/quartz_warpx.profile.example
@@ -1,37 +1,53 @@
 # please set your project account
-#export proj=<yourProject>
+#export proj="<yourProjectNameHere>"  # edit this and comment in
 
 # required dependencies
-module load cmake/3.20.2
-module load intel/2021.4
-module load mvapich2/2.3
+module load cmake/3.23.1
+module load clang/14.0.6-magic
+module load mvapich2/2.3.7
 
 # optional: for PSATD support
-module load fftw/3.3.8
+module load fftw/3.3.10
 
 # optional: for QED lookup table generation support
-module load boost/1.73.0
+module load boost/1.80.0
 
 # optional: for openPMD support
-# TODO ADIOS2
-module load hdf5-parallel/1.10.2
+module load hdf5-parallel/1.14.0
+
+SW_DIR="/usr/workspace/${USER}/quartz"
+export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-1.21.1:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.8.3:$CMAKE_PREFIX_PATH
 
 # optional: for PSATD in RZ geometry support
-# TODO: blaspp lapackpp
+export CMAKE_PREFIX_PATH=${SW_DIR}/blaspp-master:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${SW_DIR}/lapackpp-master:$CMAKE_PREFIX_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/blaspp-master/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/lapackpp-master/lib64:$LD_LIBRARY_PATH
 
 # optional: for Python bindings
-module load python/3.8.2
+module load python/3.9.12
+
+if [ -d "${SW_DIR}/venvs/warpx-quartz" ]
+then
+    source ${SW_DIR}/venvs/warpx-quartz/bin/activate
+fi
 
 # optional: an alias to request an interactive node for two hours
 alias getNode="srun --time=0:30:00 --nodes=1 --ntasks-per-node=2 --cpus-per-task=18 -p pdebug --pty bash"
+# an alias to run a command on a batch node for up to 30min
+#   usage: runNode <command>
+alias runNode="srun --time=0:30:00 --nodes=1 --ntasks-per-node=2 --cpus-per-task=18 -p pdebug"
 
 # fix system defaults: do not escape $ with a \ on tab completion
 shopt -s direxpand
 
+# optimize CPU microarchitecture for Intel Xeon E5-2695 v4
+# note: the cc/CC/ftn wrappers below add those
+export CXXFLAGS="-march=broadwell"
+export CFLAGS="-march=broadwell"
+
 # compiler environment hints
-export CC=$(which icc)
-export CXX=$(which icpc)
-export FC=$(which ifort)
-# we need a newer libstdc++:
-export CFLAGS="-gcc-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/gcc ${CFLAGS}"
-export CXXFLAGS="-gxx-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/g++ ${CXXFLAGS}"
+export CC=$(which clang)
+export CXX=$(which clang++)
+export FC=$(which gfortran)
diff --git a/Tools/machines/summit-olcf/install_gpu_ml.sh b/Tools/machines/summit-olcf/install_gpu_ml.sh
index ff4eaf1555c..25a8eb27abd 100755
--- a/Tools/machines/summit-olcf/install_gpu_ml.sh
+++ b/Tools/machines/summit-olcf/install_gpu_ml.sh
@@ -60,6 +60,7 @@ then
   git fetch
   git checkout .
   git checkout v2.0.1
+  git submodule update --init --recursive
   cd -
 else
   git clone -b v2.0.1 --recurse-submodules https://github.com/pytorch/pytorch.git /ccs/proj/${proj}/${USER}/src/pytorch
diff --git a/Tools/machines/summit-olcf/summit_warpx.profile.example b/Tools/machines/summit-olcf/summit_warpx.profile.example
index c060102d6e8..5d88bb9aeed 100644
--- a/Tools/machines/summit-olcf/summit_warpx.profile.example
+++ b/Tools/machines/summit-olcf/summit_warpx.profile.example
@@ -63,7 +63,7 @@ fi
 #   for paralle execution, start on the batch node: jsrun <command>
 alias getNode="bsub -q debug -P $proj -W 2:00 -nnodes 1 -Is /bin/bash"
 # an alias to run a command on a batch node for up to 30min
-#   usage: nrun <command>
+#   usage: runNode <command>
 alias runNode="bsub -q debug -P $proj -W 2:00 -nnodes 1 -I"
 
 # fix system defaults: do not escape $ with a \ on tab completion
diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake
index 113f26c3017..fb80973c96b 100644
--- a/cmake/dependencies/AMReX.cmake
+++ b/cmake/dependencies/AMReX.cmake
@@ -243,7 +243,7 @@ macro(find_amrex)
         endif()
         set(COMPONENT_PRECISION ${WarpX_PRECISION} P${WarpX_PARTICLE_PRECISION})
 
-        find_package(AMReX 23.08 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} TINYP LSOLVERS)
+        find_package(AMReX 23.09 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} TINYP LSOLVERS)
         message(STATUS "AMReX: Found version '${AMReX_VERSION}'")
     endif()
 endmacro()
@@ -257,7 +257,7 @@ set(WarpX_amrex_src ""
 set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git"
     CACHE STRING
     "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)")
-set(WarpX_amrex_branch "c45770c9f9b2c5fa98c675a439c502e78912bf47"
+set(WarpX_amrex_branch "48b3ec7cb7ad99823bd85fad83c13c3cfd5ecdd4"
     CACHE STRING
     "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)")
 
diff --git a/cmake/dependencies/PICSAR.cmake b/cmake/dependencies/PICSAR.cmake
index 05522724424..8400dc06a4b 100644
--- a/cmake/dependencies/PICSAR.cmake
+++ b/cmake/dependencies/PICSAR.cmake
@@ -85,7 +85,7 @@ function(find_picsar)
         #message(STATUS "PICSAR: Using version '${PICSAR_VERSION}'")
     else()
         # not supported by PICSAR (yet)
-        #find_package(PICSAR 23.08 CONFIG REQUIRED QED)
+        #find_package(PICSAR 23.09 CONFIG REQUIRED QED)
         #message(STATUS "PICSAR: Found version '${PICSAR_VERSION}'")
         message(FATAL_ERROR "PICSAR: Cannot be used as externally installed "
             "library yet. "
@@ -106,7 +106,7 @@ if(WarpX_QED)
     set(WarpX_picsar_repo "https://github.com/ECP-WarpX/picsar.git"
         CACHE STRING
         "Repository URI to pull and build PICSAR from if(WarpX_picsar_internal)")
-    set(WarpX_picsar_branch "aa54e985398c1d575abc7e6737cdbc660a13765f"
+    set(WarpX_picsar_branch "23.09"
         CACHE STRING
         "Repository branch for WarpX_picsar_repo if(WarpX_picsar_internal)")
 
diff --git a/run_test.sh b/run_test.sh
index 3dee9de0803..e24095ee8cf 100755
--- a/run_test.sh
+++ b/run_test.sh
@@ -71,7 +71,7 @@ python3 -m pip install --upgrade -r warpx/Regression/requirements.txt
 
 # Clone AMReX and warpx-data
 git clone https://github.com/AMReX-Codes/amrex.git
-cd amrex && git checkout --detach c45770c9f9b2c5fa98c675a439c502e78912bf47 && cd -
+cd amrex && git checkout --detach 48b3ec7cb7ad99823bd85fad83c13c3cfd5ecdd4 && cd -
 # warpx-data contains various required data sets
 git clone --depth 1 https://github.com/ECP-WarpX/warpx-data.git
 # openPMD-example-datasets contains various required data sets
diff --git a/setup.py b/setup.py
index 8d14aaa097d..91c8f1ac828 100644
--- a/setup.py
+++ b/setup.py
@@ -283,7 +283,7 @@ def build_extension(self, ext):
 setup(
     name='pywarpx',
     # note PEP-440 syntax: x.y.zaN but x.y.z.devN
-    version = '23.08',
+    version = '23.09',
     packages = ['pywarpx'],
     package_dir = {'pywarpx': 'Python/pywarpx'},
     author='Jean-Luc Vay, David P. Grote, Maxence Thévenet, Rémi Lehe, Andrew Myers, Weiqun Zhang, Axel Huebl, et al.',