diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e9cf2cf..72b6280a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,7 @@ find_package(k4FWCore) # implicit: Gaudi find_package(EDM4HEP 0.10.1) # implicit: Podio find_package(DD4hep) find_package(k4geo) +find_package(ONNXRuntime) #--------------------------------------------------------------- diff --git a/README.md b/README.md index db8a5506..47860c00 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ source /cvmfs/sw-nightlies.hsf.org/key4hep/setup.sh * k4FWCore * DD4hep * k4geo +* ONNXRuntime ## Building diff --git a/RecCalorimeter/src/components/CorrectCaloClusters.cpp b/RecCalorimeter/src/components/CorrectCaloClusters.cpp index adb1bccb..939c7d99 100644 --- a/RecCalorimeter/src/components/CorrectCaloClusters.cpp +++ b/RecCalorimeter/src/components/CorrectCaloClusters.cpp @@ -182,9 +182,9 @@ edm4hep::ClusterCollection* CorrectCaloClusters::initializeOutputClusters( for (auto const& inCluster: *inClusters) { auto outCluster = inCluster.clone(); verbose() << "Cluster position:" << endmsg; - verbose() << " x: " << outCluster.position().x << endmsg; - verbose() << " y: " << outCluster.position().y << endmsg; - verbose() << " z: " << outCluster.position().z << endmsg; + verbose() << " x: " << outCluster.getPosition().x << endmsg; + verbose() << " y: " << outCluster.getPosition().y << endmsg; + verbose() << " z: " << outCluster.getPosition().z << endmsg; outClusters->push_back(outCluster); } diff --git a/RecFCCeeCalorimeter/CMakeLists.txt b/RecFCCeeCalorimeter/CMakeLists.txt index 2e7bae6c..b9be7b18 100644 --- a/RecFCCeeCalorimeter/CMakeLists.txt +++ b/RecFCCeeCalorimeter/CMakeLists.txt @@ -2,6 +2,12 @@ # Package: RecFCCeeCalorimeter ################################################################################ +# ONNX DEPENDENCIES +# includes +include_directories("${ONNXRUNTIME_INCLUDE_DIRS}") +# libs +link_directories("${ONNXRUNTIME_LIBRARY_DIRS}") + file(GLOB _module_sources src/components/*.cpp) gaudi_add_module(k4RecFCCeeCalorimeterPlugins SOURCES ${_module_sources} @@ -16,8 +22,8 @@ gaudi_add_module(k4RecFCCeeCalorimeterPlugins DD4hep::DDG4 ROOT::Core ROOT::Hist + ${ONNXRUNTIME_LIBRARY} ) - install(TARGETS k4RecFCCeeCalorimeterPlugins EXPORT k4RecCalorimeterTargets RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT bin @@ -51,3 +57,4 @@ add_test(NAME FCCeeLAr_benchmarkCalibration WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} ) set_test_env(FCCeeLAr_benchmarkCalibration) + diff --git a/RecFCCeeCalorimeter/src/components/CalibrateCaloClusters.cpp b/RecFCCeeCalorimeter/src/components/CalibrateCaloClusters.cpp new file mode 100644 index 00000000..01b408bb --- /dev/null +++ b/RecFCCeeCalorimeter/src/components/CalibrateCaloClusters.cpp @@ -0,0 +1,361 @@ +#include "CalibrateCaloClusters.h" + +// Key4HEP +#include "k4Interface/IGeoSvc.h" + +// FCC Detectors +#include "detectorCommon/DetUtils_k4geo.h" + +// DD4hep +#include "DD4hep/Detector.h" + +// our EDM +#include "edm4hep/Cluster.h" +#include "edm4hep/ClusterCollection.h" +#include "edm4hep/CalorimeterHitCollection.h" + +DECLARE_COMPONENT(CalibrateCaloClusters) + +// convert vector data with given shape into ONNX runtime tensor +template +Ort::Value vec_to_tensor(std::vector &data, const std::vector &shape) +{ + Ort::MemoryInfo mem_info = + Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); + auto tensor = Ort::Value::CreateTensor(mem_info, data.data(), data.size(), shape.data(), shape.size()); + return tensor; +} + +CalibrateCaloClusters::CalibrateCaloClusters(const std::string &name, + ISvcLocator *svcLoc) + : Gaudi::Algorithm(name, svcLoc), + m_geoSvc("GeoSvc", "CalibrateCaloClusters") +{ + declareProperty("inClusters", m_inClusters, + "Input cluster collection"); + declareProperty("outClusters", m_outClusters, + "Calibrated (output) cluster collection"); +} + +StatusCode CalibrateCaloClusters::initialize() +{ + // Initialize base class + { + StatusCode sc = Gaudi::Algorithm::initialize(); + if (sc.isFailure()) + { + return sc; + } + } + + // Check if readouts exist + for (size_t i = 0; i < m_readoutNames.size(); ++i) + { + auto readouts = m_geoSvc->getDetector()->readouts(); + if (readouts.find(m_readoutNames.value().at(i)) == readouts.end()) + { + error() << "Missing readout, exiting!" << endmsg; + return StatusCode::FAILURE; + } + } + + // Check if readout related variables have the same size + if (m_systemIDs.size() != m_readoutNames.size()) + { + error() << "Sizes of the systemIDs vector and readoutNames vector do not match, exiting!" << endmsg; + return StatusCode::FAILURE; + } + if (m_systemIDs.size() != m_numLayers.size()) + { + error() << "Sizes of systemIDs vector and numLayers vector do not match, exiting!" << endmsg; + return StatusCode::FAILURE; + } + if (m_systemIDs.size() != m_layerFieldNames.size()) + { + error() << "Sizes of systemIDs vector and layerFieldNames vector do not match, exiting!" << endmsg; + return StatusCode::FAILURE; + } + if (m_systemIDs.size() != m_firstLayerIDs.size()) + { + error() << "Sizes of systemIDs vector and firstLayerIDs vector do not match, exiting!" << endmsg; + return StatusCode::FAILURE; + } + + // calculate total number of layers summed over the various subsystems + m_numLayersTotal = 0; + for (size_t i = 0; i < m_numLayers.size(); ++i) + { + m_numLayersTotal += m_numLayers[i]; + } + + // Initialize the calibration tool + StatusCode sc = readCalibrationFile(m_calibrationFile); + if (sc.isFailure()) + { + error() << "Initialization of calibration tool correction functions not successful!" << endmsg; + return sc; + } + info() << "Initialized the calibration" << endmsg; + return StatusCode::SUCCESS; +} + +StatusCode CalibrateCaloClusters::execute(const EventContext& evtCtx) const +{ + (void) evtCtx; // event context not used + + verbose() << "-------------------------------------------" << endmsg; + + // Get the input collection with clusters + const edm4hep::ClusterCollection *inClusters = m_inClusters.get(); + + // Initialize output clusters + edm4hep::ClusterCollection *outClusters = initializeOutputClusters(inClusters); + if (!outClusters) + { + error() << "Something went wrong in initialization of the output cluster collection, exiting!" << endmsg; + return StatusCode::FAILURE; + } + if (inClusters->size() != outClusters->size()) + { + error() << "Sizes of input and output cluster collections does not match, exiting!" << endmsg; + return StatusCode::FAILURE; + } + + // Apply calibration + { + StatusCode sc = calibrateClusters(inClusters, outClusters); + if (sc.isFailure()) + { + return sc; + } + } + + return StatusCode::SUCCESS; +} + +StatusCode CalibrateCaloClusters::finalize() +{ + if (m_ortSession) + delete m_ortSession; + if (m_ortEnv) + delete m_ortEnv; + + return Gaudi::Algorithm::finalize(); +} + +edm4hep::ClusterCollection *CalibrateCaloClusters::initializeOutputClusters( + const edm4hep::ClusterCollection *inClusters) const +{ + edm4hep::ClusterCollection *outClusters = m_outClusters.createAndPut(); + + for (auto const &inCluster : *inClusters) + { + auto outCluster = inCluster.clone(); + outClusters->push_back(outCluster); + } + + return outClusters; +} + +StatusCode CalibrateCaloClusters::readCalibrationFile(const std::string &calibrationFile) +{ + // set logging level based on output level of this alg + OrtLoggingLevel loggingLevel = ORT_LOGGING_LEVEL_WARNING; + MSG::Level outputLevel = this->msgStream().level(); + switch (outputLevel) + { + case MSG::Level::FATAL: // 6 + loggingLevel = ORT_LOGGING_LEVEL_FATAL; // 4 + break; + case MSG::Level::ERROR: // 5 + loggingLevel = ORT_LOGGING_LEVEL_ERROR; // 3 + break; + case MSG::Level::WARNING: // 4 + loggingLevel = ORT_LOGGING_LEVEL_WARNING; // 2 + break; + case MSG::Level::INFO: // 3 + loggingLevel = ORT_LOGGING_LEVEL_WARNING; // 2 (ORT_LOGGING_LEVEL_INFO too verbose..) + break; + case MSG::Level::DEBUG: // 2 + loggingLevel = ORT_LOGGING_LEVEL_INFO; // 1 + break; + case MSG::Level::VERBOSE: // 1 + loggingLevel = ORT_LOGGING_LEVEL_VERBOSE; // 0 + break; + default: + break; + } + try + { + m_ortEnv = new Ort::Env(loggingLevel, "ONNX runtime environment"); + Ort::SessionOptions session_options; + session_options.SetIntraOpNumThreads(1); + m_ortSession = new Ort::Experimental::Session(*m_ortEnv, const_cast(calibrationFile), session_options); + } + catch (const Ort::Exception &exception) + { + error() << "ERROR setting up ONNX runtime environment: " << exception.what() << endmsg; + return StatusCode::FAILURE; + } + + // print name/shape of inputs + // use default allocator (CPU) + Ort::AllocatorWithDefaultOptions allocator; + debug() << "Input Node Name/Shape (" << m_input_names.size() << "):" << endmsg; + for (std::size_t i = 0; i < m_ortSession->GetInputCount(); i++) + { + m_input_names.emplace_back(m_ortSession->GetInputName(i, allocator)); + m_input_shapes = m_ortSession->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); + debug() << "\t" << m_input_names.at(i) << " : "; + for (std::size_t k = 0; k < m_input_shapes.size() - 1; k++) + { + debug() << m_input_shapes[k] << "x"; + } + debug() << m_input_shapes[m_input_shapes.size() - 1] << endmsg; + } + // some models might have negative shape values to indicate dynamic shape, e.g., for variable batch size. + for (auto &s : m_input_shapes) + { + if (s < 0) + { + s = 1; + } + } + + // print name/shape of outputs + debug() << "Output Node Name/Shape (" << m_output_names.size() << "):" << endmsg; + for (std::size_t i = 0; i < m_ortSession->GetOutputCount(); i++) + { + m_output_names.emplace_back(m_ortSession->GetOutputName(i, allocator)); + m_output_shapes = m_ortSession->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); + debug() << "\t" << m_output_names.at(i) << " : "; + for (std::size_t k = 0; k < m_output_shapes.size() - 1; k++) + { + debug() << m_output_shapes[k] << "x"; + } + debug() << m_output_shapes[m_output_shapes.size() - 1] << endmsg; + } + + // the output should be a single value (the correction) + // and the inputs should be n(layers)+1 (fractions + total E) + // the first dimension of the tensors are the number of clusters + // to be calibrated simultaneously (-1 = dynamic) + // we will calibrate once at a time + if (m_input_shapes.size() != 2 || + m_output_shapes.size() != 2 || + m_input_shapes[1] != ((int)(m_numLayersTotal + 1)) || + m_output_shapes[1] != 1) + { + error() << "The input or output shapes in the calibration files do not match the expected architecture" << endmsg; + return StatusCode::FAILURE; + } + + return StatusCode::SUCCESS; +} + +StatusCode CalibrateCaloClusters::calibrateClusters(const edm4hep::ClusterCollection *inClusters, + edm4hep::ClusterCollection *outClusters) const +{ + + // this vector will contain the input features for the calibration + // i.e. the fraction of energy in each layer and the total energy + std::vector energiesInLayers(m_numLayersTotal + 1); + + // loop over the input clusters and perform the calibration + for (size_t j = 0; j < inClusters->size(); ++j) + { + + // retrieve total cluster energy + float ecl = (inClusters->at(j)).getEnergy(); + if (ecl <= 0.) + { + warning() << "Energy in calorimeter <= 0, ignoring energy correction!" << endmsg; + continue; + } + verbose() << "Cluster energy before calibration: " << ecl << endmsg; + + // calculate cluster energy in each layer and normalize by total cluster energy + calcEnergiesInLayers(inClusters->at(j), energiesInLayers); + for (size_t k = 0; k < m_numLayersTotal; ++k) + { + energiesInLayers[k] /= ecl; + } + energiesInLayers[m_numLayersTotal] = ecl; + verbose() << "Calibration inputs:" << endmsg; + for (size_t k = 0; k < energiesInLayers.size(); ++k) + { + verbose() << " f" << k << " : " << energiesInLayers[k] << endmsg; + } + + // run the MVA calibration and correct the cluster energy + float corr = 1.0; + // Create a single Ort tensor + std::vector input_tensors; + input_tensors.emplace_back(vec_to_tensor(energiesInLayers, m_input_shapes)); + + // double-check the dimensions of the input tensor + // assert(input_tensors[0].IsTensor() && input_tensors[0].GetTensorTypeAndShapeInfo().GetShape() == m_input_shapes); + + // pass data through model + try + { + std::vector output_tensors = m_ortSession->Run(m_input_names, + input_tensors, + m_output_names, + Ort::RunOptions{nullptr}); + + // double-check the dimensions of the output tensors + // NOTE: the number of output tensors is equal to the number of output nodes specifed in the Run() call + // assert(output_tensors.size() == output_names.size() && output_tensors[0].IsTensor()); + + float *outputData = output_tensors[0].GetTensorMutableData(); + corr = outputData[0]; + } + catch (const Ort::Exception &exception) + { + error() << "ERROR running model inference: " << exception.what() << endmsg; + return StatusCode::FAILURE; + } + + verbose() << "Calibration output: " << corr << endmsg; + outClusters->at(j).setEnergy(ecl * corr); + verbose() << "Corrected cluster energy: " << ecl * corr << endmsg; + } + + return StatusCode::SUCCESS; +} + +void CalibrateCaloClusters::calcEnergiesInLayers(edm4hep::Cluster cluster, + std::vector &energiesInLayer) const +{ + // reset vector with energies per layer + std::fill(energiesInLayer.begin(), energiesInLayer.end(), 0.0); + + // loop over the various readouts/subsystems/... + int startPositionToFill = 0; + for (size_t k = 0; k < m_readoutNames.size(); k++) + { + if (k > 0) + { + startPositionToFill += m_numLayers[k - 1]; + } + int systemID = m_systemIDs[k]; + int firstLayer = m_firstLayerIDs[k]; + std::string layerField = m_layerFieldNames[k]; + std::string readoutName = m_readoutNames[k]; + dd4hep::DDSegmentation::BitFieldCoder *decoder = m_geoSvc->getDetector()->readout(readoutName).idSpec().decoder(); + + // for each readout, loop over the cells and calculate the energies in the layers + // of that subsystem + for (auto cell = cluster.hits_begin(); cell != cluster.hits_end(); ++cell) + { + dd4hep::DDSegmentation::CellID cellID = cell->getCellID(); + if (decoder->get(cellID, "system") != systemID) + { + continue; + } + int layer = decoder->get(cellID, layerField); + energiesInLayer[startPositionToFill + layer - firstLayer] += cell->getEnergy(); + } + } +} diff --git a/RecFCCeeCalorimeter/src/components/CalibrateCaloClusters.h b/RecFCCeeCalorimeter/src/components/CalibrateCaloClusters.h new file mode 100644 index 00000000..f5d8a13e --- /dev/null +++ b/RecFCCeeCalorimeter/src/components/CalibrateCaloClusters.h @@ -0,0 +1,142 @@ +#ifndef RECFCCEECALORIMETER_CALIBRATECALOCLUSTERS_H +#define RECFCCEECALORIMETER_CALIBRATECALOCLUSTERS_H + +// Key4HEP +#include "k4FWCore/DataHandle.h" + +// Gaudi +#include "GaudiKernel/Algorithm.h" +#include "GaudiKernel/ToolHandle.h" +#include "GaudiKernel/MsgStream.h" +class IGeoSvc; + +// EDM4HEP +namespace edm4hep { + class Cluster; + class ClusterCollection; + class CalorimeterHitCollection; +} + +// DD4HEP +namespace dd4hep { + namespace DDSegmentation { + class BitFieldCoder; + } +} + +// ONNX +#include "onnxruntime/core/session/experimental_onnxruntime_cxx_api.h" + +/** @class CalibrateCaloClusters + * + * Apply an MVA energy calibration to the clusters reconstructed in the calorimeter. + * + * @author Giovanni Marchiori + */ + +class CalibrateCaloClusters : public Gaudi::Algorithm { + +public: + CalibrateCaloClusters(const std::string& name, ISvcLocator* svcLoc); + + virtual StatusCode initialize(); + + virtual StatusCode execute(const EventContext& evtCtx) const; + + virtual StatusCode finalize(); + +private: + /** + * Initialize output calorimeter cluster collection. + * + * @param[in] inClusters Pointer to the input cluster collection. + * + * @return Pointer to the output cluster collection. + */ + edm4hep::ClusterCollection* initializeOutputClusters(const edm4hep::ClusterCollection* inClusters) const; + + /** + * Initialize vectors of upstream and downstream correction functions. + * + * @return Status code. + */ + StatusCode readCalibrationFile(const std::string& calibrationFileName); + + /** + * Apply the calibration to the input clusters. + * + * @param[in] inClusters Pointer to the input cluster collection. + * @param[out] outClusters Pointer to the output cluster collection. + * + * @return Status code. + */ + StatusCode calibrateClusters(const edm4hep::ClusterCollection* inClusters, + edm4hep::ClusterCollection* outClusters) const; + + /** + * Get sum of energy from cells in each layer. + * This energy is not calibrated. + * + * @param[in] cluster Pointer to cluster of interest. + * @param[out] energiesInLayer Reference to vector that will contain the energies + */ + void calcEnergiesInLayers(edm4hep::Cluster cluster, + std::vector& energiesInLayer) const; + + + /// Handle for input calorimeter clusters collection + mutable DataHandle m_inClusters { + "inClusters", Gaudi::DataHandle::Reader, this + }; + /// Handle for corrected (output) calorimeter clusters collection + mutable DataHandle m_outClusters { + "outClusters", Gaudi::DataHandle::Writer, this + }; + + /// Pointer to the geometry service + ServiceHandle m_geoSvc; + + /// IDs of the detectors + Gaudi::Property> m_systemIDs { + this, "systemIDs", {4}, "IDs of systems" + }; + /// Names of the detector readouts, corresponding to system IDs + Gaudi::Property> m_readoutNames { + this, "readoutNames", {"ECalBarrelModuleThetaMerged"}, + "Names of the detector readout, corresponding to systemID" + }; + /// Name of the layer field + Gaudi::Property> m_layerFieldNames { + this, "layerFieldNames", {"layer"}, + "Identifier of layers, corresponding to systemID" + }; + /// Numbers of layers of the detectors + Gaudi::Property> m_numLayers { + this, "numLayers", {12}, "Numbers of layers of the systems" + }; + /// IDs of the first layers of the detectors + Gaudi::Property> m_firstLayerIDs { + this, "firstLayerIDs", {0}, "IDs of first layers in the systems" + }; + + /// File with the calibration model + // Gaudi::Property> m_calibrationFiles { + // this, "calibrationFiles", {}, "Files with the calibration parameters"}; + Gaudi::Property m_calibrationFile { + this, "calibrationFile", {}, "File with the calibration parameters"}; + + // total number of layers summed over the various subsystems + // should be equal to the number of input features of the MVA + size_t m_numLayersTotal; + + // the ONNX runtime session for applying the calibration, + // the environment, and the input and output shapes and names + Ort::Experimental::Session* m_ortSession = nullptr; + Ort::Env* m_ortEnv = nullptr; + std::vector m_input_shapes; + std::vector m_output_shapes; + std::vector m_input_names; + std::vector m_output_names; +}; + +#endif /* RECFCCEECALORIMETER_CALIBRATECALOCLUSTERS_H */ diff --git a/cmake/FindONNXRuntime.cmake b/cmake/FindONNXRuntime.cmake new file mode 100644 index 00000000..b8f5ba15 --- /dev/null +++ b/cmake/FindONNXRuntime.cmake @@ -0,0 +1,14 @@ +find_path(ONNXRUNTIME_INCLUDE_DIR onnxruntime/core/session/onnxruntime_cxx_inline.h + HINTS $ENV{ONNXRUNTIME_ROOT_DIR}/include ${ONNXRUNTIME_ROOT_DIR}/include) + +find_library(ONNXRUNTIME_LIBRARY NAMES onnxruntime + HINTS $ENV{ONNXRUNTIME_ROOT_DIR}/lib ${ONNXRUNTIME_ROOT_DIR}/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ONNXRuntime DEFAULT_MSG ONNXRUNTIME_INCLUDE_DIR ONNXRUNTIME_LIBRARY) + +mark_as_advanced(ONNXRUNTIME_FOUND ONNXRUNTIME_INCLUDE_DIR ONNXRUNTIME_LIBRARY) + +set(ONNXRUNTIME_INCLUDE_DIRS ${ONNXRUNTIME_INCLUDE_DIR}) +set(ONNXRUNTIME_LIBRARIES ${ONNXRUNTIME_LIBRARY}) +get_filename_component(ONNXRUNTIME_LIBRARY_DIRS ${ONNXRUNTIME_LIBRARY} PATH)