cta-observatory · HealthyPear · Mar 2, 2022 · Aug 25, 2021 · Aug 25, 2021 · Aug 25, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,8 +1,5 @@
-.pytest_cache
-
-.DS_Store
-
 # Compiled files
+.pytest_cache
 *.py[co]
 *.pyc
 *.a
@@ -29,7 +26,7 @@ docs/api
 docs/_build
 
 # Benchmark plots
-plots/
+**/plots
 
 # Eclipse editor project files
 .project
@@ -80,6 +77,7 @@ distribute-*.tar.gz
 .sonarlint
 
 # output files (should not be stored in git)
+*.log
 *.h5
 *.hdf5
 *.fits

@@ -0,0 +1,153 @@
+.. _benchmark_script:
+
+Benchmarking
+============
+
+``protopipe-BENCHMARK`` is used to run benchmarks.
+It allows to list available benchmarking notebooks, interface with them
+and optionally convert them in HTML format for easier consultation.
+
+.. warning::
+
+  The *calibration* benchmark notebook requires a *ctapipe* version more
+  recent than the one which *protopipe* supports (`ctapipe >= 0.12.0`).
+
+By invoking the help argument, you can get help about how the script works:
+``protopipe-BENCHMARK`` has 2 sub-commands ``list`` and ``launch``,
+
+.. code-block::
+
+  usage: protopipe-BENCHMARK [-h] {list,launch} ...
+
+        Launch a benchmark notebook and convert it to an HTML page.
+        USAGE EXAMPLE:
+        --------------
+        >>> protopipe-BENCHMARK list
+        >>> protopipe-BENCHMARK launch -n TRAINING/benchmarks_DL1b_image-cleaning --config_file benchmarks.yaml
+
+
+  positional arguments:
+    {list,launch}
+      list         List available benchmarks
+      launch       Launch a specific benchmark
+
+  optional arguments:
+    -h, --help     show this help message and exit
+
+The ``launch`` command is essentially a convenient wrapper
+around `papermill <https://papermill.readthedocs.io/en/latest/>`__ and
+(optionally) `jupyter nbconvert <https://nbconvert.readthedocs.io/en/latest/>`__.
+
+On the first time ``protopipe-BENCHMARK`` is used to create a notebook,
+it also creates the template for a `Jupyter Book <https://jupyterbook.org/intro.html>`__
+dedicated to the analysis at hand (each benchmarking notebook will become a page).
+At the end of the analysis, the book can be trasferred to the 
+`Performance repository <https://gitlab.cta-observatory.org/mperesano/protopipe-results>`__
+as a Pull Request (please, see the README of that repository).
+
+.. code-block::
+
+  usage: protopipe-BENCHMARK launch [-h] [--help-notebook] -n NAME --config_file CONFIG_FILE [-k [KWARGS [KWARGS ...]]] [--outpath OUTPATH]
+                                    [--overwrite] [--suffix SUFFIX] [--no_export]
+
+  optional arguments:
+    -h, --help            show this help message and exit
+    --help-notebook       Print the list of available notebook parameters
+    -n NAME, --name NAME  Pipeline step and name of the benchmark (for a list use `protopipe-BENCHMARK -l`)
+    --config_file CONFIG_FILE
+                          Configuration file (default: stored under analysis 'config' folder)
+    -k [KWARGS [KWARGS ...]], --kwargs [KWARGS [KWARGS ...]]
+                          Overwrite or specify other configuration options (e.g. --kwargs foo=bar fiz=biz)
+    --outpath OUTPATH     If unset it will be read from benchmaks.yaml
+    --overwrite           Execute the notebook even if it overwrites the old result.
+    --suffix SUFFIX       Suffix for result and HTML files (default: None)
+    --no_export           Do not convert the result notebook to any other format.
+
+The configuration file used by this script is ``benchmarks.yaml`` of which an example is reported here,
+
+.. code-block:: yaml
+
+  # This configuration file simplifies the usage of benchmarks throughout the
+  # entire analysis.
+  # It is recommended to fill it and specify any remaining options using
+  # the --kwargs flag of protopipe-BENCHMARKS
+  # To specify directories, please provide full paths
+  # Note: users which use a CTADIRAC container should use paths OUTSIDE of it
+
+  # General settings for you analysis
+  analyses_directory: "ANALYSES_DIRECTORY" # filled by the grid interface
+  analysis_name: "ANALYSIS_NAME" # filled by the grid interface
+  # to compare with a previous release or version
+  load_protopipe_previous: False # If True load data from a previous analysis
+  analysis_name_2: "" # if files have different names override them (--kwargs)
+
+  # Global plot aesthetics settings
+  use_seaborn: True
+  matplotlib_settings:
+    # recommended colormaps: 'viridis' or 'cividis'
+    cmap: "cividis"
+    # recommended styles: 'tableau-colorblind10' or 'seaborn-colorblind'
+    style: "seaborn-colorblind"
+    rc: { "font_size": 8, "font_family": "Fira Sans" }
+    scale: 1.5 # scale all plots by a factor
+  seaborn_settings:
+    theme:
+      style: "whitegrid"
+      context: "talk"
+    # override context and/or style
+    rc_context: {}
+    rc_style: { "xtick.bottom": True, "ytick.left": True }
+
+  # Requirements data
+  load_requirements: True
+  requirements_input_directory: ""
+
+  # CTAMARS reference data
+  # available at https://forge.in2p3.fr/projects/step-by-step-reference-mars-analysis/wiki
+  load_CTAMARS: False
+  # this is a setup *required* to run the notebooks smoothly!
+  input_data_CTAMARS:
+    parent_directory: ""
+    TRAINING/DL1: "TRAINING/DL1"
+    TRAINING/DL2: "TRAINING/DL2"
+    DL2: "" # not available
+    DL3:
+      indir: "DL3"
+      infile: ""
+    label: "CTAMARS"
+
+  # EVENTDISPLAY reference data (only ROOT format, for the moment)
+  # available from https://forge.in2p3.fr/projects/cta_analysis-and-simulations/wiki#Instrument-Response-Functions
+  load_EventDisplay: True
+  input_data_EventDisplay:
+    input_directory:
+    input_file:
+    label: "EventDisplay"
+
+  # Input data
+  input_filenames:
+    # The simtel file is supposed to be used as a test run
+    # WARNING: CTAMARS comparison requires a specific simtel file, see notebook.
+    simtel: "" # (only) this is meant to be a full path
+    # This is data produced with protopipe
+    # These files are pre-defined so you shouldn't need to edit them
+    TRAINING_energy_gamma: "TRAINING_energy_tail_gamma_merged.h5"
+    TRAINING_classification_gamma: "TRAINING_classification_tail_gamma_merged.h5"
+    TRAINING_classification_proton: "TRAINING_classification_tail_proton_merged.h5"
+    DL2_gamma: "DL2_tail_gamma_merged.h5"
+    DL2_proton: "DL2_energy_tail_gamma_merged.h5"
+    DL2_electron: "DL2_energy_tail_gamma_merged.h5"
+    # The DL3 filename depends on the simulation and analysis settings
+    # Defined by editing performance.yaml
+    DL3: ""
+
+  model_configuration_filenames:
+    energy: "RandomForestRegressor.yaml"
+    classification: "RandomForestClassifier.yaml"
+
+  # This MUST be data produced with ctapipe-process
+  # with the JSON files available from protopipe or custom ones
+  input_filenames_ctapipe:
+    DL1a_gamma: "events_protopipe_CTAMARS_calibration_1stPass.dl1.h5"
+    DL1a_gamma_2ndPass: "events_protopipe_CTAMARS_calibration_2ndPass.dl1.h5"
+
@@ -23,6 +23,7 @@ Details
     model_diagnostics
     DL2
     optimization_cuts_IRFs
+    benchmark_script
 
 The tables created in the :ref:`data_training` and :ref:`DL2` steps are written
 on disk in HDF5_ format using the PyTables_ Python module.