Skip to content

Commit

Permalink
Merge pull request #571 from cloudflare/jsnell/remove-botmanagement-l…
Browse files Browse the repository at this point in the history
…ogging
  • Loading branch information
jasnell authored Apr 28, 2023
2 parents 23dd3ac + 5166d16 commit af0c339
Show file tree
Hide file tree
Showing 11 changed files with 54 additions and 173 deletions.
4 changes: 0 additions & 4 deletions src/workerd/api/actor-state.c++
Original file line number Diff line number Diff line change
Expand Up @@ -728,10 +728,6 @@ kj::Array<kj::byte> serializeV8Value(v8::Local<v8::Value> value, v8::Isolate* is
.version = 15,
.omitHeader = false,
});
// TODO(soon): Remove this terrible, horrible, no-good, very bad hack when we remove
// the cf.botManagement logging. The issue here is that Proxy objects are not cloneable
// via structuredClone.
value = maybeUnwrapBotManagement(isolate, value);
serializer.write(value);
auto released = serializer.release();
return kj::mv(released.data);
Expand Down
57 changes: 42 additions & 15 deletions src/workerd/api/global-scope.c++
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,41 @@ private:
}
};

void handleDefaultBotManagement(v8::Isolate* isolate, v8::Local<v8::Object> cf) {
// When the cfBotManagementNoOp compatibility flag is set, we'll check the
// request cf blob to see if it contains a botManagement field. If it does
// *not* we will add it using the following default fields.
// Note that if the botManagement team changes any of the fields they provide,
// this default value may need to be changed also.
static constexpr auto DEFAULT_BM = R"DATA({
"corporateProxy": false,
"verifiedBot": false,
"ja3Hash": "25b4882c2bcb50cd6b469ff28c596742",
"jsDetection": { "passed": false },
"staticResource": false,
"detectionIds": {},
"score": 99
})DATA"_kj;

auto context = isolate->GetCurrentContext();
auto name = jsg::v8StrIntern(isolate, "botManagement"_kj);
if (!jsg::check(cf->Has(context, name))) {
auto sym = v8::Private::ForApi(isolate, name);
// For performance reasons, we only want to construct the default values
// once per isolate so we cache the constructed value using an internal
// private field on the global scope. Whenever we need to use it again we
// pull the exact same value.
auto defaultBm = jsg::check(context->Global()->GetPrivate(context, sym));
if (defaultBm->IsUndefined()) {
defaultBm = jsg::check(v8::JSON::Parse(context, jsg::v8Str(isolate, DEFAULT_BM)));
KJ_ASSERT(defaultBm->IsObject());
jsg::recursivelyFreeze(context, defaultBm);
jsg::check(context->Global()->SetPrivate(context, sym, defaultBm));
}
jsg::check(cf->Set(context, name, defaultBm));
}
}

} // namespace

void ExecutionContext::waitUntil(kj::Promise<void> promise) {
Expand Down Expand Up @@ -145,9 +180,14 @@ kj::Promise<DeferredProxy<void>> ServiceWorkerGlobalScope::request(
auto handle = jsg::check(v8::JSON::Parse(context, jsonString));
KJ_ASSERT(handle->IsObject());

maybeWrapBotManagement(isolate, handle.As<v8::Object>());
if (!lock.getWorker()
.getIsolate()
.getApiIsolate()
.getFeatureFlags()
.getNoCfBotManagementDefault()) {
handleDefaultBotManagement(isolate, handle.As<v8::Object>());
}

NoRequestCfProxyLoggingScope noLoggingScope;
// For the inbound request, we make the `cf` blob immutable.
jsg::recursivelyFreeze(isolate->GetCurrentContext(), handle);

Expand Down Expand Up @@ -581,19 +621,6 @@ v8::Local<v8::Value> ServiceWorkerGlobalScope::structuredClone(
return transfer.asPtr();
});
}

// On the off chance the user is cloning the request.cf metadata, let's make sure
// any proxied logging is disabled here. Note that in this case we are not adding
// the proxied property handler to the resulting object which means they could
// clone the object and access proxied fields we'd normally log but that seems to
// be an edge case not worth handling here.
NoRequestCfProxyLoggingScope noLoggingScope;

// TODO(soon): Remove this terrible, horrible, no-good, very bad hack when we remove
// the cf.botManagement logging. The issue here is that Proxy objects are not cloneable
// via structuredClone.
value = maybeUnwrapBotManagement(isolate, value);

return jsg::structuredClone(value, isolate, transfers);
}

Expand Down
7 changes: 1 addition & 6 deletions src/workerd/api/http.c++
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,6 @@ void Request::shallowCopyHeadersTo(kj::HttpHeaders& out) {

kj::Maybe<kj::String> Request::serializeCfBlobJson(jsg::Lock& js) {
return cf.map([&](jsg::V8Ref<v8::Object>& obj) {
NoRequestCfProxyLoggingScope noLoggingScope;
return js.serializeJson(obj);
});
}
Expand Down Expand Up @@ -2025,11 +2024,7 @@ jsg::Promise<QueueResponse> Fetcher::queue(
.version = 15,
.omitHeader = false,
});
// TODO(soon): Remove this terrible, horrible, no-good, very bad hack when we remove
// the cf.botManagement logging. The issue here is that Proxy objects are not cloneable
// via structuredClone.
auto value = maybeUnwrapBotManagement(js.v8Isolate, msg.body.getHandle(js.v8Isolate));
serializer.write(value);
serializer.write(msg.body.getHandle(js));
encodedMessages.add(IncomingQueueMessage{
.id=kj::mv(msg.id),
.timestamp=msg.timestamp,
Expand Down
10 changes: 1 addition & 9 deletions src/workerd/api/queue.c++
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ kj::Promise<void> WorkerQueue::send(
.version = 15,
.omitHeader = false,
});
// TODO(soon): Remove this terrible, horrible, no-good, very bad hack when we remove
// the cf.botManagement logging. The issue here is that Proxy objects are not cloneable
// via structuredClone.
body = maybeUnwrapBotManagement(isolate, body);
serializer.write(body);
kj::Array<kj::byte> serialized = serializer.release().data;

Expand Down Expand Up @@ -72,11 +68,7 @@ kj::Promise<void> WorkerQueue::sendBatch(
.version = 15,
.omitHeader = false,
});
// TODO(soon): Remove this terrible, horrible, no-good, very bad hack when we remove
// the cf.botManagement logging. The issue here is that Proxy objects are not cloneable
// via structuredClone.
auto value = maybeUnwrapBotManagement(isolate, message.body.getHandle(isolate));
serializer.write(value);
serializer.write(kj::mv(message.body));
builder.add(serializer.release().data);
totalSize += builder.back().size();
largestMessage = kj::max(largestMessage, builder.back().size());
Expand Down
98 changes: 1 addition & 97 deletions src/workerd/api/util.c++
Original file line number Diff line number Diff line change
Expand Up @@ -289,104 +289,8 @@ kj::Maybe<jsg::V8Ref<v8::Object>> cloneRequestCf(
jsg::Lock& js,
kj::Maybe<jsg::V8Ref<v8::Object>> maybeCf) {
KJ_IF_MAYBE(cf, maybeCf) {
// In case the cf object has a logging proxy, we want to make sure
// the logging is not triggered here when the object is cloned.
NoRequestCfProxyLoggingScope noLoggingScope;
auto cloned = cf->deepClone(js);
maybeWrapBotManagement(js.v8Isolate, cloned.getHandle(js));
return kj::mv(cloned);
return cf->deepClone(js);
}
return nullptr;
}

v8::Local<v8::Value> maybeUnwrapBotManagement(v8::Isolate* isolate, v8::Local<v8::Value> value) {
if (!value->IsObject()) return value;
auto context = isolate->GetCurrentContext();
auto name = jsg::v8StrIntern(isolate, "botManagement");
auto obj = value.As<v8::Object>();
auto bm = jsg::check(obj->Get(context, name));
if (bm->IsProxy()) {
auto proxy = bm.As<v8::Proxy>();
auto sym = v8::Private::ForApi(isolate, jsg::v8StrIntern(isolate, "loggingProxyHandler"));
auto handler = jsg::check(context->Global()->GetPrivate(context, sym));

// If it's a proxy, it doesn't mean it's *our* proxy. Make sure it's ours.
// JavaScript is fun.
if (proxy->GetHandler()->StrictEquals(handler)) {
// Sadly, we can't just simply manipulate the object in place since request.cf
// is frozen before we pass it out. If we just tried obj->Clone() the new clone
// would also be frozen. Doh! So we end up having to copy all of the properties
// individually.... which sucks.
auto properties = jsg::check(obj->GetOwnPropertyNames(context));
auto size = properties->Length();
kj::Vector<v8::Local<v8::Name>> names(size);
kj::Vector<v8::Local<v8::Value>> values(size);
for (uint32_t n = 0; n < properties->Length(); n++) {
auto prop = jsg::check(properties->Get(context, n));
names.add(prop.As<v8::Name>());
if (!jsg::check(name->Equals(context, name))) {
values.add(jsg::check(obj->Get(context, name)));
} else {
values.add(proxy->GetTarget());
}
}
value = v8::Object::New(isolate, v8::Null(isolate),
names.begin(), values.begin(), size);
}
}
return value;
}

void maybeWrapBotManagement(v8::Isolate* isolate, v8::Local<v8::Object> handle) {
auto context = isolate->GetCurrentContext();
auto botManagement = jsg::v8StrIntern(isolate, "botManagement");
v8::Local<v8::Value> maybeBotManagement = jsg::check(handle->Get(context, botManagement));
// If the botManagement field exists, we replace its value here with a Proxy object that
// will log the first time any of its properties are accessed. Logging will occur only
// once per worker instance.
//
// Replacing the value with a proxy rather than setting an accessor for botManagement on
// the request.cf object itself avoids false positives when someone is simply iterating
// over the fields of request.cf without actually using them. It also allows us to avoid
// having to create accessors or a class to intercept every individual property on the
// botManagement object.
if (maybeBotManagement->IsObject() && !maybeBotManagement->IsProxy()) {
v8::Local<v8::Object> bmObj = maybeBotManagement.As<v8::Object>();
auto& js = jsg::Lock::from(isolate);

// Create the Proxy handler exactly once per global context and cache it using a private
// property on the global itself. The handler itself maintains no state so it is safe to
// use it over and over for all requests.
auto sym = v8::Private::ForApi(isolate, jsg::v8StrIntern(isolate, "loggingProxyHandler"));
auto handler = jsg::check(context->Global()->GetPrivate(context, sym));
if (handler->IsUndefined()) {
handler = v8::Object::New(isolate);
jsg::check(handler.As<v8::Object>()->Set(context, jsg::v8StrIntern(isolate, "get"),
js.wrapReturningFunction(context,
[](jsg::Lock& js, const v8::FunctionCallbackInfo<v8::Value>& args) {
if (IoContext::hasCurrent()) {
if (!NoRequestCfProxyLoggingScope::isActive()) {
IoContext::current().getMetrics().logBotManagementUse();
}
} else {
// TODO(later): There is an edge case where the request.cf could be set to a global
// scope variable and read outside of an IoContext. In such cases we would not get the
// logging. Is that ok? It should be rare but in theory it means the logging won't be
// 100% effective at identifying all uses.
}
if (args[0]->IsObject()) {
return jsg::check(args[0].As<v8::Object>()->Get(
js.v8Context(), args[1]));
} else {
return js.v8Undefined();
}
})));
context->Global()->SetPrivate(context, sym, handler);
}

jsg::check(handle->Set(context, botManagement,
jsg::check(v8::Proxy::New(context, bmObj, handler.As<v8::Object>()))));
}
}

} // namespace workerd::api
10 changes: 0 additions & 10 deletions src/workerd/api/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,4 @@ inline kj::Promise<DeferredProxy<void>> addNoopDeferredProxy(kj::Promise<void> p

kj::Maybe<jsg::V8Ref<v8::Object>> cloneRequestCf(
jsg::Lock& js, kj::Maybe<jsg::V8Ref<v8::Object>> maybeCf);
void maybeWrapBotManagement(v8::Isolate* isolate, v8::Local<v8::Object> handle);

v8::Local<v8::Value> maybeUnwrapBotManagement(v8::Isolate* isolate, v8::Local<v8::Value> value);
// This is a bit of a hack that fortunately we shouldn't need to live with forever. Specifically,
// in order to facilitate logging of the request.cf.botManagement uses, we wrap it in a JS Proxy
// object. Unfortunately, there are cases where we serialize request.cf using the structured
// clone algorithm (v8::Serializer), which does not support proxies. So in those cases we need
// to intercept the value and unwrap the proxy before we serialize. We should be able to drop
// this as soon as we are no longer logging cf.botManagement accesses.

} // namespace workerd::api
10 changes: 9 additions & 1 deletion src/workerd/io/compatibility-date.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,17 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef {
# Previously any uncaught exception in the callee would be returned to the caller as an empty
# HTTP 500 response.

serviceBindingExtraHandlers @28 :Bool
serviceBindingExtraHandlers @28 :Bool
$compatEnableFlag("service_binding_extra_handlers")
$experimental;
# Allows service bindings to call additional event handler methods on the target Worker.
# Initially only includes support for calling the queue() handler.

noCfBotManagementDefault @29 :Bool
$compatEnableFlag("no_cf_botmanagement_default")
$compatDisableFlag("cf_botmanagement_default")
$compatEnableDate("2023-08-01");
# This one operates a bit backwards. With the flag *enabled* no default cfBotManagement
# data will be included. The the flag *disable*, default cfBotManagement data will be
# included in the request.cf if the field is not present.
}
3 changes: 0 additions & 3 deletions src/workerd/io/observer.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ class RequestObserver: public kj::Refcounted {
virtual void finishedWaitUntilTask() {}

virtual void setFailedOpen(bool value) {}

virtual void logBotManagementUse() {}
// Logging accesses to the request.cf.botManagement object.
};

class IsolateObserver: public kj::AtomicRefcounted {
Expand Down
1 change: 0 additions & 1 deletion src/workerd/io/worker.c++
Original file line number Diff line number Diff line change
Expand Up @@ -1508,7 +1508,6 @@ void Worker::handleLog(jsg::Lock& js, LogLevel level, const v8::FunctionCallback
KJ_IF_MAYBE(exception, kj::runCatchingExceptions([&]() {
// On the off chance the the arg is the request.cf object, let's make
// sure we do not log proxied fields here.
NoRequestCfProxyLoggingScope noLoggingScope;
if (shouldSerialiseToJson) {
auto s = js.serializeJson(arg);
// serializeJson returns the string "undefined" for some values (undefined,
Expand Down
13 changes: 0 additions & 13 deletions src/workerd/util/thread-scopes.c++
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ namespace {

thread_local uint allowV8BackgroundThreadScopeCount = 0;
thread_local uint isolateShutdownThreadScopeCount = 0;
thread_local uint noProxyLoggingScope = 0;

bool multiTenantProcess = false;
bool predictableMode = false;
Expand Down Expand Up @@ -114,16 +113,4 @@ void ThreadProgressCounter::acknowledgeProgress() {
}
}

NoRequestCfProxyLoggingScope::NoRequestCfProxyLoggingScope() {
++noProxyLoggingScope;
}

NoRequestCfProxyLoggingScope::~NoRequestCfProxyLoggingScope() noexcept(false) {
--noProxyLoggingScope;
}

bool NoRequestCfProxyLoggingScope::isActive() {
return noProxyLoggingScope > 0;
}

} // namespace workerd
14 changes: 0 additions & 14 deletions src/workerd/util/thread-scopes.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,4 @@ class ThreadProgressCounter {

friend class Watchdog;
};

class NoRequestCfProxyLoggingScope {
// We current implement logging when the request.cf.botManagement properties are accessed.
// This scope is used specifically to suppress logging when we need to iterate or clone
// that structure.
public:
NoRequestCfProxyLoggingScope();
~NoRequestCfProxyLoggingScope() noexcept(false);

static bool isActive();

KJ_DISALLOW_COPY_AND_MOVE(NoRequestCfProxyLoggingScope);
};

} // namespace workerd

0 comments on commit af0c339

Please sign in to comment.