Skip to content

Commit

Permalink
Merge pull request #1023 from cloudflare/maizatskyi/2023-08-15-lazy-c…
Browse files Browse the repository at this point in the history
…f-parsing

lazy cf property parsing
  • Loading branch information
mikea authored Aug 23, 2023
2 parents b0d830e + 8e67e9a commit 1dcb10d
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 90 deletions.
131 changes: 131 additions & 0 deletions src/workerd/api/cf-property.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright (c) 2017-2022 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0

#include "cf-property.h"
#include <workerd/io/features.h>

namespace workerd::api {

static constexpr auto kDefaultBotManagementValue = R"DATA({
"corporateProxy": false,
"verifiedBot": false,
"jsDetection": { "passed": false },
"staticResource": false,
"detectionIds": {},
"score": 99
})DATA";

static void handleDefaultBotManagement(jsg::Lock& js, v8::Local<v8::Object> handle) {
// When the cfBotManagementNoOp compatibility flag is set, we'll check the
// request cf blob to see if it contains a botManagement field. If it does
// *not* we will add it using the following default fields.
// Note that if the botManagement team changes any of the fields they provide,
// this default value may need to be changed also.
auto context = js.v8Context();
if (!js.v8Has(handle, "botManagement"_kj)) {
auto sym = v8::Private::ForApi(js.v8Isolate,
jsg::v8StrIntern(js.v8Isolate, "botManagement"_kj));
// For performance reasons, we only want to construct the default values
// once per isolate so we cache the constructed value using an internal
// private field on the global scope. Whenever we need to use it again we
// pull the exact same value.
auto defaultBm = jsg::check(context->Global()->GetPrivate(context, sym));
if (defaultBm->IsUndefined()) {
auto bm = js.parseJson(kj::StringPtr(kDefaultBotManagementValue));
KJ_DASSERT(bm.getHandle(js)->IsObject());
js.recursivelyFreeze(bm);
defaultBm = bm.getHandle(js);
jsg::check(context->Global()->SetPrivate(context, sym, defaultBm));
}
js.v8Set(handle, "botManagement"_kj, defaultBm);
}
}

jsg::Optional<v8::Local<v8::Object>> CfProperty::get(jsg::Lock& js) {
return getRef(js).map([&js](jsg::V8Ref<v8::Object>&& ref) mutable {
return ref.getHandle(js);
});
}

jsg::Optional<jsg::V8Ref<v8::Object>> CfProperty::getRef(jsg::Lock& js) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
return parsed.addRef(js);
}
KJ_CASE_ONEOF(unparsed, kj::String) {
KJ_DBG(unparsed.asPtr());
auto parsed = js.parseJson(unparsed);
auto handle = parsed.getHandle(js);
KJ_ASSERT(handle->IsObject());

auto objectHandle = handle.As<v8::Object>();
if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) {
handleDefaultBotManagement(js, objectHandle);
}

// For the inbound request, we make the `cf` blob immutable.
js.recursivelyFreeze(parsed);

// replace unparsed string with a parsed v8 object
auto parsedObject = parsed.cast<v8::Object>(js);
this->value = parsedObject.addRef(js);
return kj::mv(parsedObject);
}
}
}

return nullptr;
}


kj::Maybe<kj::String> CfProperty::serialize(jsg::Lock& js) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
return js.serializeJson(parsed);
}
KJ_CASE_ONEOF(unparsed, kj::String) {
if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) {
// we mess up with the value on this code path,
// need to parse it, fix it and serialize back
return js.serializeJson(KJ_ASSERT_NONNULL(getRef(js)));
}

return kj::str(unparsed);
}
}
}

return nullptr;
}

CfProperty CfProperty::deepClone(jsg::Lock& js) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
auto ref = parsed.deepClone(js);
return CfProperty(kj::mv(ref));
}
KJ_CASE_ONEOF(unparsed, kj::String) {
return CfProperty(unparsed.asPtr());
}
}
}

return nullptr;
}

void CfProperty::visitForGc(jsg::GcVisitor& visitor) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
visitor.visit(parsed);
}
KJ_CASE_ONEOF_DEFAULT {}
}
}
}

} // namespace workerd::api
56 changes: 56 additions & 0 deletions src/workerd/api/cf-property.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) 2017-2022 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0

#pragma once

// Common functionality to manage cf headers and properties.

#include <workerd/jsg/jsg.h>

namespace workerd::api {

class CfProperty {
// A holder for Cf header property value.
// The string header is parsed on demand and the parsed value cached.

public:
KJ_DISALLOW_COPY(CfProperty);

explicit CfProperty() {}
CfProperty(decltype(nullptr)) {}
CfProperty(CfProperty&&) = default;
CfProperty& operator=(CfProperty&&) = default;

explicit CfProperty(kj::Maybe<kj::StringPtr> unparsed) {
KJ_IF_MAYBE(str, unparsed) {
value = kj::str(*str);
}
}

explicit CfProperty(kj::Maybe<jsg::V8Ref<v8::Object>>&& parsed) {
KJ_IF_MAYBE(v, parsed) {
value = kj::mv(*v);
}
}

jsg::Optional<v8::Local<v8::Object>> get(jsg::Lock& js);
// Get parsed value

jsg::Optional<jsg::V8Ref<v8::Object>> getRef(jsg::Lock& js);
// Get parsed value as a global ref

kj::Maybe<kj::String> serialize(jsg::Lock& js);
// Serialize to string

CfProperty deepClone(jsg::Lock& js);
// Clone by deep cloning parsed v8 object (if any).

void visitForGc(jsg::GcVisitor& visitor);

private:
kj::Maybe<kj::OneOf<kj::String, jsg::V8Ref<v8::Object>>> value;
};


} // namespace workerd::api
52 changes: 1 addition & 51 deletions src/workerd/api/global-scope.c++
Original file line number Diff line number Diff line change
Expand Up @@ -92,42 +92,6 @@ private:
}
};

static constexpr auto kDefaultBotManagementValue = R"DATA({
"corporateProxy": false,
"verifiedBot": false,
"jsDetection": { "passed": false },
"staticResource": false,
"detectionIds": {},
"score": 99
})DATA";

void handleDefaultBotManagement(jsg::Lock& js, jsg::Value& cf) {
// When the cfBotManagementNoOp compatibility flag is set, we'll check the
// request cf blob to see if it contains a botManagement field. If it does
// *not* we will add it using the following default fields.
// Note that if the botManagement team changes any of the fields they provide,
// this default value may need to be changed also.
auto context = js.v8Context();
auto handle = cf.getHandle(js).As<v8::Object>();
if (!js.v8Has(handle, "botManagement"_kj)) {
auto sym = v8::Private::ForApi(js.v8Isolate,
jsg::v8StrIntern(js.v8Isolate, "botManagement"_kj));
// For performance reasons, we only want to construct the default values
// once per isolate so we cache the constructed value using an internal
// private field on the global scope. Whenever we need to use it again we
// pull the exact same value.
auto defaultBm = jsg::check(context->Global()->GetPrivate(context, sym));
if (defaultBm->IsUndefined()) {
auto bm = js.parseJson(kj::StringPtr(kDefaultBotManagementValue));
KJ_DASSERT(bm.getHandle(js)->IsObject());
js.recursivelyFreeze(bm);
defaultBm = bm.getHandle(js);
jsg::check(context->Global()->SetPrivate(context, sym, defaultBm));
}
js.v8Set(handle, "botManagement"_kj, defaultBm);
}
}

kj::String getEventName(v8::PromiseRejectEvent type) {
switch (type) {
case v8::PromiseRejectEvent::kPromiseRejectWithNoHandler:
Expand Down Expand Up @@ -200,21 +164,7 @@ kj::Promise<DeferredProxy<void>> ServiceWorkerGlobalScope::request(
auto& ioContext = IoContext::current();
jsg::Lock& js = lock;

kj::Maybe<jsg::V8Ref<v8::Object>> cf;

KJ_IF_MAYBE(c, cfBlobJson) {
auto handle = js.parseJson(*c);
KJ_ASSERT(handle.getHandle(js)->IsObject());

if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) {
handleDefaultBotManagement(js, handle);
}

// For the inbound request, we make the `cf` blob immutable.
js.recursivelyFreeze(handle);

cf = handle.cast<v8::Object>(js);
}
CfProperty cf(cfBlobJson);

auto jsHeaders = jsg::alloc<Headers>(headers, Headers::Guard::REQUEST);
// We do not automatically decode gzipped request bodies because the fetch() standard doesn't
Expand Down
Loading

0 comments on commit 1dcb10d

Please sign in to comment.