From 7d5cf669c48506a6b8ea76d87cd86632ccabd40e Mon Sep 17 00:00:00 2001 From: Kyle Reed Date: Wed, 18 Sep 2024 17:20:59 -0700 Subject: [PATCH 1/4] Add support for parsing and caching TraceLogging events. --- krabs/krabs/filtering/event_filter.hpp | 2 +- krabs/krabs/schema_locator.hpp | 145 +++++++++++++++++++++++-- 2 files changed, 134 insertions(+), 13 deletions(-) diff --git a/krabs/krabs/filtering/event_filter.hpp b/krabs/krabs/filtering/event_filter.hpp index e24c505..b317b32 100644 --- a/krabs/krabs/filtering/event_filter.hpp +++ b/krabs/krabs/filtering/event_filter.hpp @@ -61,7 +61,7 @@ namespace krabs { * Given optional predicate will be applied to ETW API filtered results * */ - event_filter(unsigned short event_id, filter_predicate predicate=nullptr); + event_filter(unsigned short event_id, filter_predicate predicate = nullptr); /** * diff --git a/krabs/krabs/schema_locator.hpp b/krabs/krabs/schema_locator.hpp index e3ec591..aa97069 100644 --- a/krabs/krabs/schema_locator.hpp +++ b/krabs/krabs/schema_locator.hpp @@ -32,28 +32,84 @@ namespace krabs { struct schema_key { guid provider; + + // Using a string_view for name so that keys can be constructed from + // EVENT_RECORD pointers without allocation. If the key instance is + // added to the cache, 'internalize_name' must be called first so that + // the name points to owned memory and doesn't dangle. + // Only events logged with the TraceLogger API will have a name set + // in the key because it's available as part of the EVENT_RECORD. + // Other events are uniquely distinguished by their event Id. + std::string_view name; + uint16_t id; - uint8_t opcode; uint8_t version; + uint8_t opcode; uint8_t level; + uint64_t keyword; + + private: + // See note on 'name', this is only set when internalized and + // provides memory ownership for the string_view. + std::unique_ptr backing_name; - schema_key(const EVENT_RECORD &record) + public: + schema_key(const EVENT_RECORD &record, const char* name = nullptr) : provider(record.EventHeader.ProviderId) + , name(name ? std::string_view{name} : std::string_view{}) , id(record.EventHeader.EventDescriptor.Id) + , version(record.EventHeader.EventDescriptor.Version) , opcode(record.EventHeader.EventDescriptor.Opcode) , level(record.EventHeader.EventDescriptor.Level) - , version(record.EventHeader.EventDescriptor.Version) { } + , keyword(record.EventHeader.EventDescriptor.Keyword) { } + + schema_key(const schema_key &rhs) + : provider(rhs.provider) + , name(rhs.name) + , id(rhs.id) + , version(rhs.version) + , opcode(rhs.opcode) + , level(rhs.level) + , keyword(rhs.keyword) + { + internalize_name(); + } + + schema_key& operator=(const schema_key &rhs) + { + schema_key temp(rhs); + std::swap(*this, temp); + return *this; + } bool operator==(const schema_key &rhs) const { + // NB: Compare 'name' last for perf. Do not compare 'backing_name'. return provider == rhs.provider && id == rhs.id && + version == rhs.version && opcode == rhs.opcode && level == rhs.level && - version == rhs.version; + keyword == rhs.keyword && + name == rhs.name; } bool operator!=(const schema_key &rhs) const { return !(*this == rhs); } + + /** + * + * Allocate the 'backing_name' and set 'name' to point at it. This must be + * called before adding the key to a cache so that the lifetime of the + * 'name' string_view matches the lifetime of the cached instance. + * + */ + void internalize_name() + { + if (!name.empty()) { + backing_name = std::make_unique(name); + name = *backing_name; + } + } }; } @@ -73,10 +129,12 @@ namespace std { size_t h = 2166136261; h ^= (h << 5) + (h >> 2) + std::hash()(key.provider); + h ^= (h << 5) + (h >> 2) + std::hash()(key.name); h ^= (h << 5) + (h >> 2) + key.id; - h ^= (h << 5) + (h >> 2) + key.opcode; h ^= (h << 5) + (h >> 2) + key.version; + h ^= (h << 5) + (h >> 2) + key.opcode; h ^= (h << 5) + (h >> 2) + key.level; + h ^= (h << 5) + (h >> 2) + key.keyword; return h; } @@ -92,6 +150,14 @@ namespace krabs { */ std::unique_ptr get_event_schema_from_tdh(const EVENT_RECORD &); + /** + * + * Returns a pointer to the event name if the specified event was logged + * with the TraceLogger API otherwise returns nullptr. + * + */ + const char* get_trace_logger_event_name(const EVENT_RECORD &); + /** * * Fetches and caches schemas from TDH. @@ -117,18 +183,73 @@ namespace krabs { // Implementation // ------------------------------------------------------------------------ + inline const char* get_trace_logger_event_name(const EVENT_RECORD & record) + { + char* metadataPtr = nullptr; + USHORT metadataSize = 0; + + // Look for a TraceLogger event schema in the extended data. + for (USHORT i = 0; i < record.ExtendedDataCount; ++i) { + auto& dataItem = record.ExtendedData[i]; + if (dataItem.ExtType == EVENT_HEADER_EXT_TYPE_EVENT_SCHEMA_TL) { + metadataSize = dataItem.DataSize; + metadataPtr = (char*)dataItem.DataPtr; + break; + } + } + + // Didn't find one or it was too small. + if (metadataPtr == nullptr || metadataSize < sizeof(USHORT)) { + return nullptr; + } + + // Ensure that the sizes match to prevent reading off the buffer. + USHORT structSize = *(USHORT*)metadataPtr; + if (structSize != metadataSize) { + return nullptr; + } + + // Skipping over the 'Extension' field of the block to find the name offset. + // Per code comment: Read until you hit a byte with high bit unset. + USHORT nameOffset = sizeof(USHORT); + while (nameOffset < structSize) { + char c = *(metadataPtr + nameOffset); + nameOffset++; // NB: always consume the character. + + // High-bit set? + if ((c & 0x80) != 0x80) { + break; + } + } + + // Ensure the offset found is valid. + if (nameOffset >= structSize) { + return nullptr; + } + + return metadataPtr + nameOffset; + } + inline const PTRACE_EVENT_INFO schema_locator::get_event_schema(const EVENT_RECORD &record) const { - // check the cache - auto key = schema_key(record); - auto& buffer = cache_[key]; + auto eventName = get_trace_logger_event_name(record); + auto key = schema_key(record, eventName); - if (!buffer) { - auto temp = get_event_schema_from_tdh(record); - buffer.swap(temp); + // Check the cache... + auto it = cache_.find(key); + if (it != cache_.end()) { + return (PTRACE_EVENT_INFO)it->second.get(); } - return (PTRACE_EVENT_INFO)(buffer.get()); + // Cache miss. Fetch the schema... + auto buffer = get_event_schema_from_tdh(record); + auto returnVal = (PTRACE_EVENT_INFO)buffer.get(); + + // Add the new instance to the cache. + // NB: key's 'internalize_name' gets called by the cctor here. + cache_.emplace(key, std::move(buffer)); + + return returnVal; } inline std::unique_ptr get_event_schema_from_tdh(const EVENT_RECORD &record) From 7eabb99cd9f49e1bc584df5ae6fe1a1e01c3d735 Mon Sep 17 00:00:00 2001 From: Kyle Reed Date: Thu, 19 Sep 2024 10:56:46 -0700 Subject: [PATCH 2/4] Use string_view instead of raw pointers, update unit tests --- krabs/krabs/schema_locator.hpp | 37 ++++++--- krabs/krabs/testing/record_builder.hpp | 5 ++ tests/krabstests/test_schema_key.cpp | 108 ++++++++++++++++++------- 3 files changed, 109 insertions(+), 41 deletions(-) diff --git a/krabs/krabs/schema_locator.hpp b/krabs/krabs/schema_locator.hpp index aa97069..d89565c 100644 --- a/krabs/krabs/schema_locator.hpp +++ b/krabs/krabs/schema_locator.hpp @@ -54,9 +54,9 @@ namespace krabs { std::unique_ptr backing_name; public: - schema_key(const EVENT_RECORD &record, const char* name = nullptr) + schema_key(const EVENT_RECORD &record, std::string_view name) : provider(record.EventHeader.ProviderId) - , name(name ? std::string_view{name} : std::string_view{}) + , name(name) , id(record.EventHeader.EventDescriptor.Id) , version(record.EventHeader.EventDescriptor.Version) , opcode(record.EventHeader.EventDescriptor.Opcode) @@ -152,11 +152,11 @@ namespace krabs { /** * - * Returns a pointer to the event name if the specified event was logged - * with the TraceLogger API otherwise returns nullptr. + * Returns a string_view to the event name if the specified event was logged + * with the TraceLogger API otherwise returns an empty string_view. * */ - const char* get_trace_logger_event_name(const EVENT_RECORD &); + std::string_view get_trace_logger_event_name(const EVENT_RECORD &); /** * @@ -183,8 +183,25 @@ namespace krabs { // Implementation // ------------------------------------------------------------------------ - inline const char* get_trace_logger_event_name(const EVENT_RECORD & record) + inline std::string_view get_trace_logger_event_name(const EVENT_RECORD & record) { + /* This implements part of the parsing that TDH would normally do so that + * a schema_key can be built without calling TDH (which is expensive). + * Here's pseudo code from the TraceLogger header describing the layout. + * + * // EventMetadata: + * // This pseudo-structure is the layout of the "event metadata" referenced by + * // EVENT_DATA_DESCRIPTOR_TYPE_EVENT_METADATA. + * // It provides the event's name, event tags, and field information. + * struct EventMetadata // Variable-length pseudo-structure, byte-aligned, tightly-packed. + * { + * UINT16 Size; // = sizeof(EventMetadata) + * UINT8 Extension[]; // 1 or more bytes. Read until you hit a byte with high bit unset. + * char Name[]; // UTF-8 nul-terminated event name + * FieldMetadata Fields[]; // 0 or more field definitions. + * }; + */ + char* metadataPtr = nullptr; USHORT metadataSize = 0; @@ -200,13 +217,13 @@ namespace krabs { // Didn't find one or it was too small. if (metadataPtr == nullptr || metadataSize < sizeof(USHORT)) { - return nullptr; + return {}; } // Ensure that the sizes match to prevent reading off the buffer. USHORT structSize = *(USHORT*)metadataPtr; if (structSize != metadataSize) { - return nullptr; + return {}; } // Skipping over the 'Extension' field of the block to find the name offset. @@ -224,10 +241,10 @@ namespace krabs { // Ensure the offset found is valid. if (nameOffset >= structSize) { - return nullptr; + return {}; } - return metadataPtr + nameOffset; + return {metadataPtr + nameOffset}; } inline const PTRACE_EVENT_INFO schema_locator::get_event_schema(const EVENT_RECORD &record) const diff --git a/krabs/krabs/testing/record_builder.hpp b/krabs/krabs/testing/record_builder.hpp index eee7a43..ded8993 100644 --- a/krabs/krabs/testing/record_builder.hpp +++ b/krabs/krabs/testing/record_builder.hpp @@ -83,6 +83,7 @@ namespace krabs { namespace testing { size_t version, size_t opcode = 0, size_t level = 0, + size_t keyword = 0, bool trim_string_null_terminator = false); /** @@ -180,6 +181,7 @@ namespace krabs { namespace testing { const size_t version_; const size_t opcode_; const size_t level_; + const size_t keyword_; EVENT_HEADER header_; std::vector properties_; bool trim_string_null_terminator_; @@ -214,12 +216,14 @@ namespace krabs { namespace testing { size_t version, size_t opcode, size_t level, + size_t keyword, bool trim_string_null_terminator) : providerId_(providerId) , id_(id) , version_(version) , opcode_(opcode) , level_(level) + , keyword_(keyword) , trim_string_null_terminator_(trim_string_null_terminator) { ZeroMemory(&header_, sizeof(EVENT_HEADER)); @@ -227,6 +231,7 @@ namespace krabs { namespace testing { header_.EventDescriptor.Version = static_cast(version_); header_.EventDescriptor.Opcode = static_cast(opcode_); header_.EventDescriptor.Level = static_cast(level_); + header_.EventDescriptor.Keyword = static_cast(keyword_); memcpy(&header_.ProviderId, (const GUID *)&providerId_, sizeof(GUID)); } diff --git a/tests/krabstests/test_schema_key.cpp b/tests/krabstests/test_schema_key.cpp index a6bab6d..d9fa955 100644 --- a/tests/krabstests/test_schema_key.cpp +++ b/tests/krabstests/test_schema_key.cpp @@ -20,12 +20,14 @@ namespace krabstests const size_t id, const size_t version, const size_t opcode, - const size_t level) + const size_t level, + const size_t keyword, + std::string_view name) { - krabs::testing::record_builder builder(providerId, id, version, opcode, level); + krabs::testing::record_builder builder(providerId, id, version, opcode, level, keyword); auto record = builder.create_stub_record(); - return krabs::schema_key{ EVENT_RECORD(record) }; + return krabs::schema_key{ EVENT_RECORD(record), name }; } public: @@ -39,16 +41,16 @@ namespace krabstests TEST_METHOD(should_be_equal_when_uninitialized) { const EVENT_RECORD eventRecord = {}; - const krabs::schema_key key1{ eventRecord }; - const krabs::schema_key key2{ eventRecord }; + const krabs::schema_key key1{ eventRecord, {} }; + const krabs::schema_key key2{ eventRecord, {} }; Assert::IsTrue(key1 == key2); } TEST_METHOD(should_hash_same_when_uninitialized) { const EVENT_RECORD eventRecord = {}; - const krabs::schema_key key1{ eventRecord }; - const krabs::schema_key key2{ eventRecord }; + const krabs::schema_key key1{ eventRecord, {} }; + const krabs::schema_key key2{ eventRecord, {} }; // When a defect is present, this may only fail in Release builds // Ref: https://github.com/microsoft/krabsetw/issues/139 @@ -56,85 +58,129 @@ namespace krabstests } TEST_METHOD(should_be_equal_when_identical_property_values) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); Assert::IsTrue(key1 == key2); } TEST_METHOD(should_hash_same_when_identical_property_values) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, "foo"); + auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, "foo"); Assert::IsTrue(hash(key1) == hash(key2)); } TEST_METHOD(should_not_be_equal_when_providerid_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider2, 1, 2, 3, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider2, 1, 2, 3, 4, 5, {}); Assert::IsFalse(key1 == key2); } TEST_METHOD(should_not_hash_same_when_providerid_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider2, 1, 2, 3, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider2, 1, 2, 3, 4, 5, {}); Assert::IsFalse(hash(key1) == hash(key2)); } TEST_METHOD(should_not_be_equal_when_id_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 2, 2, 3, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 0, 2, 3, 4, 5, {}); Assert::IsFalse(key1 == key2); } TEST_METHOD(should_not_hash_same_when_id_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 2, 2, 3, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 0, 2, 3, 4, 5, {}); Assert::IsFalse(hash(key1) == hash(key2)); } TEST_METHOD(should_not_be_equal_when_version_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 1, 3, 3, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 0, 3, 4, 5, {}); Assert::IsFalse(key1 == key2); } TEST_METHOD(should_not_hash_same_when_version_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 1, 3, 3, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 0, 3, 4, 5, {}); Assert::IsFalse(hash(key1) == hash(key2)); } TEST_METHOD(should_not_be_equal_when_opcode_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 1, 2, 4, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 2, 0, 4, 5, {}); Assert::IsFalse(key1 == key2); } TEST_METHOD(should_not_hash_same_when_opcode_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 1, 2, 4, 4); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 2, 0, 4, 5, {}); Assert::IsFalse(hash(key1) == hash(key2)); } TEST_METHOD(should_not_be_equal_when_level_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 5); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 0, 5, {}); Assert::IsFalse(key1 == key2); } TEST_METHOD(should_not_hash_same_when_level_differs) { - auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4); - auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 5); + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 0, 5, {}); + + Assert::IsFalse(hash(key1) == hash(key2)); + } + TEST_METHOD(should_not_be_equal_when_keyword_differs) + { + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 4, 0, {}); + + Assert::IsFalse(key1 == key2); + } + TEST_METHOD(should_not_hash_same_when_keyword_differs) + { + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, {}); + auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 4, 0, {}); + + Assert::IsFalse(hash(key1) == hash(key2)); + } + TEST_METHOD(should_not_be_equal_when_name_differs) + { + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, "net"); + auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, "proc"); + + Assert::IsFalse(key1 == key2); + } + TEST_METHOD(should_not_hash_same_when_name_differs) + { + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, "net"); + auto key2 = GetKeyForRecord(provider1, 1, 2, 3, 4, 0, "proc"); + + Assert::IsFalse(hash(key1) == hash(key2)); + } + TEST_METHOD(should_be_equal_after_internalizing) + { + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, "net"); + auto key2 = key1; + key1.internalize_name(); + + Assert::IsFalse(key1 == key2); + } + TEST_METHOD(should_hash_same_after_internalizing) + { + auto key1 = GetKeyForRecord(provider1, 1, 2, 3, 4, 5, "net"); + auto key2 = key1; + key1.internalize_name(); Assert::IsFalse(hash(key1) == hash(key2)); } From 9d259adcd08029d1e3775ca23de4230c89830187 Mon Sep 17 00:00:00 2001 From: Kyle Reed Date: Thu, 19 Sep 2024 11:39:49 -0700 Subject: [PATCH 3/4] Fix assertion --- tests/krabstests/test_schema_key.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/krabstests/test_schema_key.cpp b/tests/krabstests/test_schema_key.cpp index d9fa955..83f728a 100644 --- a/tests/krabstests/test_schema_key.cpp +++ b/tests/krabstests/test_schema_key.cpp @@ -174,7 +174,7 @@ namespace krabstests auto key2 = key1; key1.internalize_name(); - Assert::IsFalse(key1 == key2); + Assert::IsTrue(key1 == key2); } TEST_METHOD(should_hash_same_after_internalizing) { @@ -182,7 +182,7 @@ namespace krabstests auto key2 = key1; key1.internalize_name(); - Assert::IsFalse(hash(key1) == hash(key2)); + Assert::IsTrue(hash(key1) == hash(key2)); } }; } \ No newline at end of file From 3a1f2a7750082e4a2d8a476ebf0d3052a67d007c Mon Sep 17 00:00:00 2001 From: Kyle Reed Date: Thu, 19 Sep 2024 13:52:02 -0700 Subject: [PATCH 4/4] Comment style --- krabs/krabs/schema_locator.hpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/krabs/krabs/schema_locator.hpp b/krabs/krabs/schema_locator.hpp index d89565c..bdb8db2 100644 --- a/krabs/krabs/schema_locator.hpp +++ b/krabs/krabs/schema_locator.hpp @@ -33,13 +33,15 @@ namespace krabs { { guid provider; - // Using a string_view for name so that keys can be constructed from - // EVENT_RECORD pointers without allocation. If the key instance is - // added to the cache, 'internalize_name' must be called first so that - // the name points to owned memory and doesn't dangle. - // Only events logged with the TraceLogger API will have a name set - // in the key because it's available as part of the EVENT_RECORD. - // Other events are uniquely distinguished by their event Id. + /** + * Using a string_view for name so that keys can be constructed from + * EVENT_RECORD pointers without allocation. If the key instance is + * added to the cache, 'internalize_name' must be called first so that + * the name points to owned memory and doesn't dangle. + * Only events logged with the TraceLogger API will have a name set + * in the key because it's available as part of the EVENT_RECORD. + * Other events are uniquely distinguished by their event Id. + */ std::string_view name; uint16_t id; @@ -49,8 +51,10 @@ namespace krabs { uint64_t keyword; private: - // See note on 'name', this is only set when internalized and - // provides memory ownership for the string_view. + /** + * See note on 'name', this is only set when internalized and + * provides memory ownership for the string_view. + */ std::unique_ptr backing_name; public: @@ -185,7 +189,8 @@ namespace krabs { inline std::string_view get_trace_logger_event_name(const EVENT_RECORD & record) { - /* This implements part of the parsing that TDH would normally do so that + /** + * This implements part of the parsing that TDH would normally do so that * a schema_key can be built without calling TDH (which is expensive). * Here's pseudo code from the TraceLogger header describing the layout. *