From 240d3cc636e8eefe1ba7a934306559cff8d9d536 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 5 Sep 2024 10:47:32 +0200 Subject: [PATCH] Make it possible to patch ParticleID meta information when patching collections (#193) * Avoid unnecessary copies of strings and vectors * Remove unused variable * Fix some trailing whitespaces * Fix docstring * Improve readability with structured bindings * Use std::tie to get rid of unnecessary temporary * Get rid of unnecessary branch * Keep track of ParticleID metadata while scanning * Make PIDMeta public to satisfy ROOT * Patch ParticleID meta data on request * Set the PID meta data after all collections have been converted * Make things work with c++17 * Add patching grammar to docstring --- src/cpp/include/UTIL/CheckCollections.h | 66 +++- src/cpp/src/UTIL/CheckCollections.cc | 404 ++++++++++++++++-------- 2 files changed, 329 insertions(+), 141 deletions(-) diff --git a/src/cpp/include/UTIL/CheckCollections.h b/src/cpp/include/UTIL/CheckCollections.h index 03b3b1621..b326145a8 100644 --- a/src/cpp/include/UTIL/CheckCollections.h +++ b/src/cpp/include/UTIL/CheckCollections.h @@ -9,7 +9,8 @@ namespace UTIL { - +class PIDHandler; + /** Utility class for checking and patching events with respect to collections that are not present * in every event of a set of files. * @@ -55,26 +56,69 @@ namespace UTIL { Vector getConsistentCollections() const ; /** Add a collection with (name,type) that should be added to events in patchEvent(). + * + * Depending on the contents of name and type one of the following things + * will happen: + * + * - if type is an LCIO type an empty collection of the given type will be + put into the event using the passed name + * - if type is `LCRelation[,]` an LCRelation collection + * will be created setting the and as FromType and + * ToType collection parameters. + * - if type contains a '|' the whole (name, type) pair will be considered + * to be ParticleID meta information and the name will be used as a PID + * algorithm name and the type information will be parsed as + * |[,], I.e. a ParticleID + * algorithm of name will be attached to the ReconstructedParticle + * collection with name . Additionally any parameter names + * that are in the comma-separated list after the '|' will be set via the + * PIDHandler for this ParticleID algorithm */ - void addPatchCollection(const std::string name, std::string type){ - _patchCols.push_back( {name, type} ) ; - } + void addPatchCollection(std::string name, std::string type); - /** Add a all collections as Vector(name,type), e.g. retrieved from getMissingCollections() that should be added to events in patchEvent(). + /** Add all collections as Vector(name,type), e.g. retrieved from getMissingCollections() that should be added to events in patchEvent(). */ - void addPatchCollections(Vector cols){ - for(const auto& p : cols) - _patchCols.push_back( p ) ; - } + void addPatchCollections(Vector cols); /** Add and empty collection to the event for any collection that is in patchCollections and not in the Event */ void patchCollections(EVENT::LCEvent* evt ) const ; - + + /// Metadata for ParticleIDs that are handled via the PIDHandler. Necessary + /// for consistency with EDM4hep, where ParticleID no longer lives in + /// ReconstructedParticle and where the direction of the relation has been + /// reversed. + struct PIDMeta { + // c++17 doesn't yet have aggregate initialization in vectors, so we + // need this constructor + PIDMeta(const std::string &n, const std::vector &parN, + uint32_t c = 0) + : name(n), paramNames(parN), count(c) {} + + // Since we have one non-default constructor we need to default the rest + // explicitly + constexpr PIDMeta() = default; + PIDMeta(const PIDMeta &) = default; + PIDMeta &operator=(const PIDMeta &) = default; + PIDMeta(PIDMeta &&) = default; + PIDMeta &operator=(PIDMeta &&) = default; + ~PIDMeta() = default; + + std::string name{}; ///< algorithm name + std::vector paramNames{}; ///< parameter names + uint32_t count{}; ///< How often this was found + }; + private: + + void insertParticleIDMetas(const UTIL::PIDHandler& pidHandler, const std::string& recoName); + unsigned _nEvents =0 ; std::unordered_map< std::string, std::pair< std::string, unsigned > > _map{} ; - Vector _patchCols {} ; + /// Map from ReconstructedParticle collection names to attached ParticleID + /// meta information + std::unordered_map> _particleIDMetas{}; + Vector _patchCols{}; }; // class diff --git a/src/cpp/src/UTIL/CheckCollections.cc b/src/cpp/src/UTIL/CheckCollections.cc index 2c58c7b1d..c13778581 100644 --- a/src/cpp/src/UTIL/CheckCollections.cc +++ b/src/cpp/src/UTIL/CheckCollections.cc @@ -1,177 +1,321 @@ #include "UTIL/CheckCollections.h" +#include "Exceptions.h" #include "lcio.h" -#include "MT/LCReader.h" #include "IMPL/LCCollectionVec.h" +#include "MT/LCReader.h" +#include "UTIL/PIDHandler.h" +#include #include -namespace UTIL{ - - void CheckCollections::checkFiles( const std::vector& fileNames, bool quiet){ - - for( auto n : fileNames ) - checkFile( n ,quiet) ; - } - - void CheckCollections::checkFile( const std::string& fileName, bool quiet){ - - MT::LCReader lcReader(MT::LCReader::directAccess) ; - lcReader.open( fileName ) ; - //----------- the event loop ----------- - while( const auto evt = lcReader.readNextEventHeader() ) { - - const auto* colNames = evt->getCollectionNames() ; - - for(const auto& name : *colNames){ - - auto it = _map.find( name ) ; - - if( it == _map.end() ){ - - auto col = evt->getCollection( name ) ; - // If the type of a collection is LCRelation we want to read the entire - // collections instead of just the header to get the 'ToType' and - // 'FromType'. setReadCollectionNames({name}) allows reading of only - // certain collections by name instead of an entire event. This flag has to - // be unset after reading in order for the reading of the headers to - // function properly. - std::string typeString; - if (col->getTypeName() == "LCRelation"){ - lcReader.setReadCollectionNames({name}); - auto fullEvt = lcReader.readEvent(evt->getRunNumber(), evt->getEventNumber()); - lcReader.setReadCollectionNames({}); - - auto fullcol = fullEvt->getCollection( name ) ; - const auto& params = fullcol->getParameters(); - const auto& fromType = params.getStringVal("FromType"); - const auto& toType = params.getStringVal("ToType"); - if (quiet == false){ - if (fromType == ""|| toType == ""){ - std::cout<< "WARNING! : Relation " << name <<" does not have the 'FromType' and 'ToType' set."< &fileNames, + bool quiet) { + + for (auto n : fileNames) + checkFile(n, quiet); +} + +void CheckCollections::checkFile(const std::string &fileName, bool quiet) { + + MT::LCReader lcReader(MT::LCReader::directAccess); + lcReader.open(fileName); + //----------- the event loop ----------- + while (const auto evt = lcReader.readNextEventHeader()) { + const auto *colNames = evt->getCollectionNames(); + std::vector recoCollections{}; + + for (const auto &name : *colNames) { + const auto col = evt->getCollection(name); + auto typeString = col->getTypeName(); + + // For ReconstructedParticle we also have to check the ParticleIDs for + // consistency. We have to do this regardless of whether the + // ReconstructedParticle collection is already in the map or not + if (typeString == "ReconstructedParticle") { + recoCollections.emplace_back(name); + } + + auto it = _map.find(name); + if (it == _map.end()) { + // If the type of a collection is LCRelation we want to read the entire + // collections instead of just the header to get the 'ToType' and + // 'FromType'. setReadCollectionNames({name}) allows reading of only + // certain collections by name instead of an entire event. This flag has + // to be unset after reading in order for the reading of the headers to + // function properly. + if (col->getTypeName() == "LCRelation") { + lcReader.setReadCollectionNames({name}); + auto fullEvt = + lcReader.readEvent(evt->getRunNumber(), evt->getEventNumber()); + lcReader.setReadCollectionNames({}); + + auto fullcol = fullEvt->getCollection(name); + const auto ¶ms = fullcol->getParameters(); + const auto &fromType = params.getStringVal("FromType"); + const auto &toType = params.getStringVal("ToType"); + if (quiet == false) { + if (fromType == "" || toType == "") { + std::cout << "WARNING! : Relation " << name + << " does not have the 'FromType' and 'ToType' set." + << std::endl; + } + } + typeString = "LCRelation[" + fromType + "," + toType + "]"; } + std::tie(it, std::ignore) = + _map.emplace(name, std::make_pair(std::move(typeString), 0)); } - typeString = "LCRelation["+fromType+","+toType+"]"; + + it->second.second++; } - else { - typeString = col->getTypeName(); + + lcReader.setReadCollectionNames(recoCollections); + auto fullEvt = + lcReader.readEvent(evt->getRunNumber(), evt->getEventNumber()); + lcReader.setReadCollectionNames({}); + + for (const auto &name : recoCollections) { + auto handler = PIDHandler(fullEvt->getCollection(name)); + insertParticleIDMetas(handler, name); } - const auto[ itx, inserted] = _map.emplace( name, std::make_pair( std::move(typeString) , 0 ) ) ; - it = itx ; - } + _nEvents++; + } - it->second.second ++ ; - } + lcReader.close(); +} + +void CheckCollections::insertParticleIDMetas(const UTIL::PIDHandler &pidHandler, + const std::string &recoName) { + const auto &algoIds = pidHandler.getAlgorithmIDs(); + auto mapIt = _particleIDMetas.find(recoName); + if (mapIt == _particleIDMetas.end()) { + std::tie(mapIt, std::ignore) = + _particleIDMetas.emplace(recoName, std::vector{}); + } - _nEvents ++ ; + auto &pidMetas = mapIt->second; + for (const auto id : algoIds) { + const auto &name = pidHandler.getAlgorithmName(id); + + if (auto it = std::find_if( + pidMetas.begin(), pidMetas.end(), + [&name](const auto &pidMeta) { return pidMeta.name == name; }); + it == pidMetas.end()) { + pidMetas.emplace_back(name, pidHandler.getParameterNames(id), 1); + } else { + it->count++; } + } +} + +CheckCollections::Vector CheckCollections::getMissingCollections() const { + Vector s; + for (const auto &e : _map) { + if (e.second.second != _nEvents) + s.push_back({e.first, e.second.first}); + } + return s; +} - lcReader.close() ; +CheckCollections::Vector CheckCollections::getConsistentCollections() const { + Vector s; + for (auto e : _map) { + if (e.second.second == _nEvents) + s.push_back({e.first, e.second.first}); } + return s; +} +// Obtain the name of the recontructed particle collection as well as the +// parameter names from an encoded "RecoColl|[name1[,names...]]" +std::tuple> +getRecoCollAndParamNames(const std::string_view fullType) { + auto delim = fullType.find('|'); + auto recoName = std::string(fullType.substr(0, delim)); - CheckCollections::Vector CheckCollections::getMissingCollections() const { - Vector s ; - for(const auto& e : _map ){ - if( e.second.second != _nEvents ) - s.push_back( {e.first, e.second.first } ) ; - } - return s ; + std::vector paramNames{}; + while (delim != std::string_view::npos) { + auto oldDelim = delim + 1; + delim = fullType.find(',', oldDelim); + paramNames.emplace_back(fullType.substr(oldDelim, delim)); } - - CheckCollections::Vector CheckCollections::getConsistentCollections() const { - Vector s ; - for(auto e : _map ){ - if( e.second.second == _nEvents ) - s.push_back( {e.first, e.second.first }) ; - } - return s ; - } + return {recoName, paramNames}; +} - // Obtain the from and to type from the encoded "LCRelation[From,To]" - std::tuple getToFromType(const std::string_view fullType) { - auto delim = fullType.find(','); - constexpr auto prefixLen = 11u; // length of "LCRelation[" +void CheckCollections::addPatchCollection(std::string name, std::string type) { + if (type.find('|') != std::string::npos) { + auto [recoName, paramNames] = getRecoCollAndParamNames(name); + _particleIDMetas[recoName].emplace_back(name, std::move(paramNames)); + } else { + _patchCols.emplace_back(std::move(name), std::move(type)); + } +} - return {fullType.substr(prefixLen, delim - prefixLen), - fullType.substr(delim + 1, fullType.size() - delim - 2)}; // need to strip final "]" as well +void CheckCollections::addPatchCollections(Vector cols) { + for (auto &&[name, type] : cols) { + if (type.find('|') != std::string::npos) { + auto [recoName, paramNames] = getRecoCollAndParamNames(type); + _particleIDMetas[recoName].emplace_back(name, std::move(paramNames)); + } else { + _patchCols.emplace_back(std::move(name), std::move(type)); + } } +} - void CheckCollections::patchCollections(EVENT::LCEvent* evt ) const { +// Obtain the from and to type from the encoded "LCRelation[From,To]" +std::tuple +getToFromType(const std::string_view fullType) { + auto delim = fullType.find(','); + constexpr auto prefixLen = 11u; // length of "LCRelation[" - for(const auto& c : _patchCols ){ + return {fullType.substr(prefixLen, delim - prefixLen), + fullType.substr(delim + 1, fullType.size() - delim - + 2)}; // need to strip final "]" as well +} - try{ - auto* coll = evt->getCollection( c.first ) ; +// Add all algorithms that are specified in the pidMetas to the PIDHandler, such +// that the necessary metadata is present +void patchParticleIDs(UTIL::PIDHandler &pidHandler, + const std::vector &pidMetas) { + for (const auto &[name, paramNames, _] : pidMetas) { + try { + // simply assume that param names are OK if we find the algorithm + pidHandler.getAlgorithmID(name); + } catch (UnknownAlgorithm &) { + pidHandler.addAlgorithm(name, paramNames); + } + } +} + +void CheckCollections::patchCollections(EVENT::LCEvent *evt) const { + for (const auto &[name, typeName] : _patchCols) { + try { + auto *coll = evt->getCollection(name); + const auto collType = coll->getTypeName(); + if (collType == "LCRelation") { // For LCRelations we still have to check whether the FromType and // ToType are set and correct in case they are not - if (coll->getTypeName() == "LCRelation") { - auto& params = coll->parameters(); - if (params.getStringVal("FromType").empty() || params.getStringVal("ToType").empty()) { - const auto [from, to] = getToFromType(c.second); - params.setValue("FromType", std::string(from)); - params.setValue("ToType", std::string(to)); - } - } - } catch( EVENT::DataNotAvailableException& e) { - //10 is the length of the String LCRelation after which the bracket is and the "ToType" and "FromType" start. - if (c.second.size() > 10 && c.second[10] == '[') { - auto relationColl = new IMPL::LCCollectionVec("LCRelation"); - auto& params = relationColl->parameters(); - - const auto [from, to] = getToFromType(c.second); + auto ¶ms = coll->parameters(); + if (params.getStringVal("FromType").empty() || + params.getStringVal("ToType").empty()) { + const auto [from, to] = getToFromType(typeName); params.setValue("FromType", std::string(from)); params.setValue("ToType", std::string(to)); - evt->addCollection( relationColl, c.first ) ; - } else { - evt->addCollection( new IMPL::LCCollectionVec(c.second), c.first ) ; } } + } catch (EVENT::DataNotAvailableException &e) { + // 10 is the length of the String LCRelation after which the bracket is + // and the "ToType" and "FromType" start. + if (typeName.size() > 10 && typeName[10] == '[') { + auto relationColl = new IMPL::LCCollectionVec("LCRelation"); + auto ¶ms = relationColl->parameters(); + + const auto [from, to] = getToFromType(typeName); + params.setValue("FromType", std::string(from)); + params.setValue("ToType", std::string(to)); + evt->addCollection(relationColl, name); + } else { + evt->addCollection(new IMPL::LCCollectionVec(typeName), name); + } } } - - void CheckCollections::print( std::ostream& os ,bool minimal) const { + for (const auto &[recoName, pidMeta] : _particleIDMetas) { + // Let the exception propagate. This is not something that we can easily + // handle in any meaningful way, so make users aware as early as possible + auto pidHandler = UTIL::PIDHandler(evt->getCollection(recoName)); + patchParticleIDs(pidHandler, pidMeta); + } +} + +void CheckCollections::print(std::ostream &os, bool minimal) const { + + unsigned width = 50; + if (minimal == false) { + os << " ================================================================ " + << std::endl; + os << std::endl << " " << _nEvents << " events read " << std::endl; + os << " collections that are not in all events : [# events where col " + "is present]" + << std::endl; + os << " ================================================================ " + << std::endl; + } + if (minimal == false) { + for (auto e : _map) { - unsigned width = 50 ; - if (minimal == false){ - os << " ================================================================ " << std::endl ; - os << std::endl << " " << _nEvents << " events read " << std::endl ; - os << " collections that are not in all events : [# events where col is present]" << std::endl ; - os << " ================================================================ " << std::endl ; + if (e.second.second != _nEvents) + os << " " << std::setw(width) << std::left << e.first << " " + << std::setw(width) << e.second.first << " [" << e.second.second + << "]" << std::endl; } - if (minimal == false){ - for(auto e : _map ){ - - if( e.second.second != _nEvents ) - os << " " << std::setw(width) << std::left << e.first << " " <