Skip to content

Commit

Permalink
add FilteredObjectId skeleton
Browse files Browse the repository at this point in the history
Summary:
Initial design of a FilteredObjectId. This will be used by the FilteredBackingStore to wrap ObjectIds used by other BackingStores.

** this is just an initial implementation. It will probably evolve as I start implementing FilteredBackingStore and start uncovering issues with the design **

Reviewed By: kmancini

Differential Revision: D46010041

fbshipit-source-id: a85ea448af0033278b0e9b4ac7208fec1f33d0d6
  • Loading branch information
MichaelCuevas authored and facebook-github-bot committed Jul 10, 2023
1 parent 812bb39 commit a40ea03
Show file tree
Hide file tree
Showing 4 changed files with 536 additions and 0 deletions.
19 changes: 19 additions & 0 deletions eden/fs/store/filter/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.

file(GLOB STORE_FILTER_SRCS "*.cpp")

add_library(
eden_store_filter STATIC
${STORE_FILTER_SRCS}
)

target_link_libraries(
eden_store_filter
PUBLIC
eden_model
eden_utils
${RE2_LIBRARY}
)
219 changes: 219 additions & 0 deletions eden/fs/store/filter/FilteredObjectId.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/

#include "eden/fs/store/filter/FilteredObjectId.h"

#include <folly/Varint.h>
#include <folly/logging/xlog.h>

#include "eden/fs/utils/Throw.h"

using folly::ByteRange;
using folly::Endian;
using folly::StringPiece;
using std::string;

namespace facebook::eden {

std::string FilteredObjectId::serializeBlob(const ObjectId& object) {
// If we're dealing with a blob FilteredObjectId, we only need to
// serialize two components: <type_byte><ObjectId>
std::string buf;
buf.reserve(1 + sizeof(object));
uint8_t objectType = FilteredObjectId::OBJECT_TYPE_BLOB;

buf.append(reinterpret_cast<const char*>(&objectType), sizeof(objectType));
buf.append(object.asString());
return buf;
}

std::string FilteredObjectId::serializeTree(
RelativePathPiece path,
std::string_view filterId,
const ObjectId& object) {
std::string buf;
// We serialize trees as
// <type_byte><varint><filter_set_id><varint><path><ObjectId>
size_t pathLen = path.value().length();
uint8_t pathVarint[folly::kMaxVarintLength64] = {};
size_t pathVarintLen = folly::encodeVarint(pathLen, pathVarint);
XLOGF(
DBG9,
"pathLen: {}, pathVarint: {}, pathVarintLen: {}",
pathLen,
pathVarint,
pathVarintLen);

size_t filterLen = filterId.length();
uint8_t filterVarint[folly::kMaxVarintLength64] = {};
size_t filterVarintLen = folly::encodeVarint(filterLen, filterVarint);
XLOGF(
INFO,
"filterLen: {}, filterVarint: {}, pathVarintLen: {}",
filterLen,
filterVarint,
filterVarintLen);
uint8_t objectType = FilteredObjectId::OBJECT_TYPE_TREE;

buf.reserve(
sizeof(objectType) + pathVarintLen + pathLen + filterVarintLen +
filterLen + sizeof(object));
buf.append(reinterpret_cast<const char*>(&objectType), sizeof(objectType));
buf.append(reinterpret_cast<const char*>(filterVarint), filterVarintLen);
buf.append(filterId);
buf.append(reinterpret_cast<const char*>(pathVarint), pathVarintLen);
buf.append(path.value().begin(), path.value().end());
buf.append(object.asString());
return buf;
}

RelativePathPiece FilteredObjectId::path() const {
switch (value_.data()[0]) {
case FilteredObjectId::OBJECT_TYPE_TREE:
// Skip the first byte of data that contains the type
folly::Range r(value_.data(), value_.size());
r.advance(sizeof(FilteredObjectId::OBJECT_TYPE_TREE));

// Skip the variable length filter id. decodeVarint() advances the
// range for us, so we don't need to skip the VarInt after reading it.
size_t varintSize = folly::decodeVarint(r);
r.advance(varintSize);
varintSize = folly::decodeVarint(r);

StringPiece data{r.begin(), varintSize};
// value_ was built with a known good RelativePath, thus we don't need
// to recheck it when deserializing.
return RelativePathPiece{data, detail::SkipPathSanityCheck{}};
}
// We don't know the path of non-tree objects. Throw.
throwf<std::invalid_argument>(
"Cannot determine path of non-tree FilteredObjectId: {}", value_);
}

StringPiece FilteredObjectId::filter() const {
switch (value_.data()[0]) {
case FilteredObjectId::OBJECT_TYPE_TREE:
// Skip the first byte of data that contains the type
folly::Range r(value_.data(), value_.size());
r.advance(sizeof(FilteredObjectId::OBJECT_TYPE_TREE));

// Determine the location/size of the filter
size_t varintSize = folly::decodeVarint(r);

// decodeVarint advances the range for us, so we can use the current
// start of the range.
StringPiece data{r.begin(), varintSize};
return data;
}
// We don't know the filter of non-tree objects. Throw.
throwf<std::invalid_argument>(
"Cannot determine filter for non-tree FilteredObjectId: {}", value_);
}

ObjectId FilteredObjectId::object() const {
switch (value_.data()[0]) {
case FilteredObjectId::OBJECT_TYPE_TREE: {
// Skip the first byte of data that contains the type
folly::Range r(value_.data(), value_.size());
r.advance(sizeof(FilteredObjectId::OBJECT_TYPE_TREE));

// Determine the location/size of the filter and skip it
size_t varintSize = folly::decodeVarint(r);
r.advance(varintSize);

// Determine the location/size of the path and skip it
varintSize = folly::decodeVarint(r);
r.advance(varintSize);

// Parse the ObjectId bytes and use them to create an ObjectId
ObjectId object = ObjectId{r};
return object;
}

case FilteredObjectId::OBJECT_TYPE_BLOB: {
folly::Range r(value_.data(), value_.size());
r.advance(sizeof(FilteredObjectId::OBJECT_TYPE_BLOB));
ObjectId object = ObjectId{r};
return object;
}
}
// Unknown FilteredObjectId type. Throw.
throwf<std::runtime_error>(
"Unknown FilteredObjectId type: {}", value_.data()[0]);
}

// Since some FilteredObjectIds are created without validation, we should
// validate that we return a valid type.
FilteredObjectId::FilteredObjectIdType FilteredObjectId::objectType() const {
switch (value_.data()[0]) {
case FilteredObjectId::OBJECT_TYPE_TREE:
return FilteredObjectIdType::OBJECT_TYPE_TREE;
case FilteredObjectId::OBJECT_TYPE_BLOB:
return FilteredObjectIdType::OBJECT_TYPE_BLOB;
}
// Unknown FilteredObjectId type. Throw.
throwf<std::runtime_error>("Unknown FilteredObjectId type: {}", value_[0]);
}

// It's possible that FilteredObjectIds with different filterIds evaluate to
// the same underlying object. However, that's not for the FilteredObjectId
// implementation to decide. This implementation strictly checks if the FOID
// contents are byte-wise equal.
bool FilteredObjectId::operator==(const FilteredObjectId& otherHash) const {
return value_ == otherHash.value_;
}

// The comment above for == also applies here.
bool FilteredObjectId::operator<(const FilteredObjectId& otherHash) const {
return value_ < otherHash.value_;
}

void FilteredObjectId::validate() {
ByteRange infoBytes = folly::Range{value_.data(), value_.size()};
XLOGF(DBG9, "{}", value_);

// Ensure the type byte is valid
uint8_t typeByte = infoBytes.data()[0];
if (typeByte != FilteredObjectId::OBJECT_TYPE_BLOB &&
typeByte != FilteredObjectId::OBJECT_TYPE_TREE) {
auto msg = fmt::format(
"Invalid FilteredObjectId type byte {}. Value_ = {}", typeByte, value_);
XLOGF(ERR, "{}", msg);
throw std::invalid_argument(msg);
}
infoBytes.advance(1);

// Validating the wrapped ObjectId is impossible since we don't know what
// it should contain. Therefore, we simply return if we're validating a
// filtered blob Id.
if (typeByte == FilteredObjectId::OBJECT_TYPE_BLOB) {
return;
}

// For trees, we can actually perform some validation. We can ensure the
// varints describing the filterid and path are valid
auto expectedSize = folly::tryDecodeVarint(infoBytes);
if (UNLIKELY(!expectedSize)) {
auto msg = fmt::format(
"failed to decode filter id VarInt when validating FilteredObjectId {}: {}",
value_,
expectedSize.error());
throw std::invalid_argument(msg);
}
infoBytes.advance(*expectedSize);

expectedSize = folly::tryDecodeVarint(infoBytes);
if (UNLIKELY(!expectedSize)) {
auto msg = fmt::format(
"failed to decode path length VarInt when validating FilteredObjectId {}: {}",
value_,
expectedSize.error());
throw std::invalid_argument(msg);
}
}

} // namespace facebook::eden
Loading

0 comments on commit a40ea03

Please sign in to comment.