Skip to content

Commit

Permalink
fix(cxx_common): attempt to reduce memory usage of bazel artifact sel…
Browse files Browse the repository at this point in the history
…ector (#5638)

* fix(cxx_common): eagerly convert from proto to reduce memory overhead in selector

* fix(cxx_common): have filesets share underlying file representation
  • Loading branch information
shahms authored May 18, 2023
1 parent da6a8bd commit 3d73771
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 35 deletions.
2 changes: 2 additions & 0 deletions kythe/cxx/extractor/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ cc_library(
"@com_github_google_glog//:glog",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/container:node_hash_map",
"@com_google_absl//absl/hash",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:optional",
Expand Down
5 changes: 5 additions & 0 deletions kythe/cxx/extractor/bazel_artifact.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ struct BazelArtifactFile {
bool operator!=(const BazelArtifactFile& other) const {
return !(*this == other);
}

template <typename H>
friend H AbslHashValue(H h, const BazelArtifactFile& file) {
return H::combine(std::move(h), file.local_path, file.uri);
}
};

/// \brief A list of extracted compilation units and the target which owns them.
Expand Down
113 changes: 82 additions & 31 deletions kythe/cxx/extractor/bazel_artifact_selector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ std::string AsLocalPath(const build_event_stream::File& file) {
return absl::StrJoin(parts, "/");
}

BazelArtifactFile ToBazelArtifactFile(const build_event_stream::File& file) {
return {
.local_path = AsLocalPath(file),
.uri = AsUri(file),
};
}

template <typename T>
struct FromRange {
template <typename U>
Expand Down Expand Up @@ -127,22 +134,50 @@ absl::optional<BazelArtifact> AspectArtifactSelector::Select(
bool AspectArtifactSelector::SerializeInto(google::protobuf::Any& state) const {
kythe::proto::BazelAspectArtifactSelectorState raw;
*raw.mutable_disposed() = FromRange{state_.disposed};
*raw.mutable_filesets() = FromRange{state_.filesets};
for (const auto& [key, fileset] : state_.filesets) {
auto& entry = (*raw.mutable_filesets())[key];
for (const auto& file : fileset.files) {
auto* file_entry = entry.add_files();
file_entry->set_name(file->local_path);
file_entry->set_uri(file->uri);
}
for (const auto& id : fileset.children) {
entry.add_file_sets()->set_id(id);
}
}
*raw.mutable_pending() = FromRange{state_.pending};

state.PackFrom(raw);
state.PackFrom(std::move(raw));
return true;
}

AspectArtifactSelector::AspectArtifactSelector(
const AspectArtifactSelector& other) {
*this = other;
}

AspectArtifactSelector& AspectArtifactSelector::operator=(
const AspectArtifactSelector& other) {
// Not particular efficient, but avoids false-sharing with the internal
// shared_ptr.
google::protobuf::Any state;
(void)other.SerializeInto(state);
(void)DeserializeFrom(state);
return *this;
}

absl::Status AspectArtifactSelector::DeserializeFrom(
const google::protobuf::Any& state) {
kythe::proto::BazelAspectArtifactSelectorState raw;
if (state.UnpackTo(&raw)) {
state_ = {
.disposed = FromRange{raw.disposed()},
.filesets = FromRange{raw.filesets()},
.filesets = {}, // Set below.
.pending = FromRange{raw.pending()},
};
for (auto& [key, fileset] : *raw.mutable_filesets()) {
InsertFileSet(key, fileset);
}
return absl::OkStatus();
}
if (state.Is<kythe::proto::BazelAspectArtifactSelectorState>()) {
Expand All @@ -154,30 +189,19 @@ absl::Status AspectArtifactSelector::DeserializeFrom(
}

absl::optional<BazelArtifact> AspectArtifactSelector::SelectFileSet(
absl::string_view id, const build_event_stream::NamedSetOfFiles& filesets) {
bool kept = false;
for (const auto& file : filesets.files()) {
if (options_.file_name_allowlist.Match(file.name())) {
kept = true;
*state_.filesets[id].add_files() = file;
}
}
for (const auto& child : filesets.file_sets()) {
if (!state_.disposed.contains(child.id())) {
kept = true;
*state_.filesets[id].add_file_sets() = child;
}
}
// TODO(shahms): check pending *before* doing all of the insertion.
if (auto iter = state_.pending.find(id); iter != state_.pending.end()) {
auto node = state_.pending.extract(iter);
absl::string_view id, const build_event_stream::NamedSetOfFiles& fileset) {
bool kept = InsertFileSet(id, fileset);

// TODO(shahms): check pending *before* the insertion.
if (auto node = state_.pending.extract(id); !node.empty()) {
BazelArtifact result = {.label = std::string(node.mapped())};
ReadFilesInto(id, result.label, result.files);
if (result.files.empty()) {
return absl::nullopt;
}
return result;
}

if (!kept) {
// There were no files, no children and no previous references, skip it.
state_.disposed.insert(std::string(id));
Expand Down Expand Up @@ -214,19 +238,22 @@ void AspectArtifactSelector::ReadFilesInto(
return;
}

if (auto iter = state_.filesets.find(id); iter != state_.filesets.end()) {
if (auto node = state_.filesets.extract(id); !node.empty()) {
state_.disposed.insert(std::string(id));
auto node = state_.filesets.extract(iter);
const build_event_stream::NamedSetOfFiles& filesets = node.mapped();

for (const auto& file : filesets.files()) {
files.push_back({
.local_path = AsLocalPath(file),
.uri = AsUri(file),
});
const FileSet& fileset = node.mapped();
files.reserve(files.size() + fileset.files.size());
for (const auto* file : fileset.files) {
auto iter = state_.files.find(*file);
CHECK(iter != state_.files.end()) << "Attempt to remove a missing file!";
if (--iter->second == 0) {
files.push_back(std::move(state_.files.extract(iter).key()));
} else {
files.push_back(iter->first);
}
}
for (const auto& child : filesets.file_sets()) {
ReadFilesInto(child.id(), target, files);

for (const auto& child : fileset.children) {
ReadFilesInto(child, target, files);
}

return;
Expand All @@ -239,6 +266,30 @@ void AspectArtifactSelector::ReadFilesInto(
state_.pending.emplace(id, target);
}

bool AspectArtifactSelector::InsertFileSet(
absl::string_view id, const build_event_stream::NamedSetOfFiles& fileset) {
if (state_.disposed.contains(id)) {
return false;
}
bool kept = false;
for (const auto& file : fileset.files()) {
if (options_.file_name_allowlist.Match(file.name())) {
auto iter = state_.files.try_emplace(ToBazelArtifactFile(file), 0).first;
iter->second++;
state_.filesets[id].files.push_back(&iter->first);
kept = true;
}
}
for (const auto& child : fileset.file_sets()) {
if (!state_.disposed.contains(child.id())) {
if (state_.filesets[id].children.insert(child.id()).second) {
kept = true;
}
}
}
return kept;
}

ExtraActionSelector::ExtraActionSelector(
absl::flat_hash_set<std::string> action_types)
: action_matches_([action_types = std::move(action_types)](
Expand Down
22 changes: 18 additions & 4 deletions kythe/cxx/extractor/bazel_artifact_selector.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/container/node_hash_map.h"
#include "absl/meta/type_traits.h"
#include "absl/status/status.h"
#include "absl/types/optional.h"
Expand Down Expand Up @@ -153,6 +154,11 @@ class AspectArtifactSelector final : public BazelArtifactSelector {
explicit AspectArtifactSelector(Options options)
: options_(std::move(options)) {}

AspectArtifactSelector(const AspectArtifactSelector& other);
AspectArtifactSelector& operator=(const AspectArtifactSelector& other);
AspectArtifactSelector(AspectArtifactSelector&&) = default;
AspectArtifactSelector& operator=(AspectArtifactSelector&&) = default;

/// \brief Selects an artifact if the event matches an expected
/// aspect-produced compilation unit.
absl::optional<BazelArtifact> Select(
Expand All @@ -168,13 +174,19 @@ class AspectArtifactSelector final : public BazelArtifactSelector {
absl::Status DeserializeFrom(const google::protobuf::Any& state) final;

private:
struct FileSet {
std::vector<const BazelArtifactFile*> files;
absl::flat_hash_set<std::string> children;
};

struct State {
// A record of all of the NamedSetOfFiles events which have been processed.
absl::flat_hash_set<std::string> disposed;
// Mapping from fileset id to NamedSetOfFiles whose file names matched the
// allowlist, but have not yet been consumed by an event.
absl::flat_hash_map<std::string, build_event_stream::NamedSetOfFiles>
filesets;
// Map of active files to count of filesets which contain it.
absl::node_hash_map<BazelArtifactFile, int> files;
// Mapping from fileset id to NamedSetOfFiles whose file names matched
// the allowlist, but have not yet been consumed by an event.
absl::flat_hash_map<std::string, FileSet> filesets;
// Mapping from fileset id to target name which required that
// file set when it had not yet been seen.
absl::flat_hash_map<std::string, std::string> pending;
Expand All @@ -188,6 +200,8 @@ class AspectArtifactSelector final : public BazelArtifactSelector {

void ReadFilesInto(absl::string_view id, absl::string_view target,
std::vector<BazelArtifactFile>& files);
bool InsertFileSet(absl::string_view id,
const build_event_stream::NamedSetOfFiles& fileset);

Options options_;
State state_;
Expand Down

0 comments on commit 3d73771

Please sign in to comment.