Skip to content

Commit

Permalink
lazy cf property parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
mikea committed Aug 16, 2023
1 parent a4f075a commit 47a296e
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 90 deletions.
52 changes: 1 addition & 51 deletions src/workerd/api/global-scope.c++
Original file line number Diff line number Diff line change
Expand Up @@ -91,42 +91,6 @@ private:
}
};

static constexpr auto kDefaultBotManagementValue = R"DATA({
"corporateProxy": false,
"verifiedBot": false,
"jsDetection": { "passed": false },
"staticResource": false,
"detectionIds": {},
"score": 99
})DATA";

void handleDefaultBotManagement(jsg::Lock& js, jsg::Value& cf) {
// When the cfBotManagementNoOp compatibility flag is set, we'll check the
// request cf blob to see if it contains a botManagement field. If it does
// *not* we will add it using the following default fields.
// Note that if the botManagement team changes any of the fields they provide,
// this default value may need to be changed also.
auto context = js.v8Context();
auto handle = cf.getHandle(js).As<v8::Object>();
if (!js.v8Has(handle, "botManagement"_kj)) {
auto sym = v8::Private::ForApi(js.v8Isolate,
jsg::v8StrIntern(js.v8Isolate, "botManagement"_kj));
// For performance reasons, we only want to construct the default values
// once per isolate so we cache the constructed value using an internal
// private field on the global scope. Whenever we need to use it again we
// pull the exact same value.
auto defaultBm = jsg::check(context->Global()->GetPrivate(context, sym));
if (defaultBm->IsUndefined()) {
auto bm = js.parseJson(kj::StringPtr(kDefaultBotManagementValue));
KJ_DASSERT(bm.getHandle(js)->IsObject());
js.recursivelyFreeze(bm);
defaultBm = bm.getHandle(js);
jsg::check(context->Global()->SetPrivate(context, sym, defaultBm));
}
js.v8Set(handle, "botManagement"_kj, defaultBm);
}
}

kj::String getEventName(v8::PromiseRejectEvent type) {
switch (type) {
case v8::PromiseRejectEvent::kPromiseRejectWithNoHandler:
Expand Down Expand Up @@ -199,21 +163,7 @@ kj::Promise<DeferredProxy<void>> ServiceWorkerGlobalScope::request(
auto& ioContext = IoContext::current();
jsg::Lock& js = lock;

kj::Maybe<jsg::V8Ref<v8::Object>> cf;

KJ_IF_MAYBE(c, cfBlobJson) {
auto handle = js.parseJson(*c);
KJ_ASSERT(handle.getHandle(js)->IsObject());

if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) {
handleDefaultBotManagement(js, handle);
}

// For the inbound request, we make the `cf` blob immutable.
js.recursivelyFreeze(handle);

cf = handle.cast<v8::Object>(js);
}
CfProperty cf(cfBlobJson);

auto jsHeaders = jsg::alloc<Headers>(headers, Headers::Guard::REQUEST);
// We do not automatically decode gzipped request bodies because the fetch() standard doesn't
Expand Down
181 changes: 149 additions & 32 deletions src/workerd/api/http.c++
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ jsg::Ref<Request> Request::constructor(
kj::Maybe<jsg::Ref<Headers>> headers;
kj::Maybe<jsg::Ref<Fetcher>> fetcher;
kj::Maybe<jsg::Ref<AbortSignal>> signal;
kj::Maybe<jsg::V8Ref<v8::Object>> cf;
CfProperty cf;
kj::Maybe<Body::ExtractedBody> body;
Redirect redirect = Redirect::FOLLOW;

Expand Down Expand Up @@ -789,9 +789,7 @@ jsg::Ref<Request> Request::constructor(
url = kj::str(oldRequest->getUrl());
method = oldRequest->method;
headers = jsg::alloc<Headers>(*oldRequest->headers);
KJ_IF_MAYBE(oldCf, oldRequest->getCf(js)) {
cf = cloneRequestCf(js, js.v8Ref(*oldCf));
}
cf = oldRequest->cf.deepClone(js);
if (!ignoreInputBody) {
JSG_REQUIRE(!oldRequest->getBodyUsed(),
TypeError, "Cannot reconstruct a Request with a used body.");
Expand Down Expand Up @@ -862,7 +860,7 @@ jsg::Ref<Request> Request::constructor(
}

KJ_IF_MAYBE(newCf, initDict.cf) {
cf = cloneRequestCf(js, kj::mv(initDict.cf));
cf = CfProperty(kj::mv(*newCf));
}

KJ_IF_MAYBE(b, kj::mv(initDict.body).orDefault(nullptr)) {
Expand Down Expand Up @@ -895,8 +893,7 @@ jsg::Ref<Request> Request::constructor(
fetcher = otherRequest->getFetcher();
signal = otherRequest->getSignal();
headers = jsg::alloc<Headers>(*otherRequest->headers);
cf = cloneRequestCf(js, otherRequest->cf.map([&](jsg::V8Ref<v8::Object>& ref)
-> jsg::V8Ref<v8::Object> { return ref.addRef(js); }));
cf = otherRequest->cf.clone(js);
KJ_IF_MAYBE(b, otherRequest->getBody()) {
// Note that unlike when `input` (Request ctor's 1st parameter) is a Request object, here
// we're NOT stealing the other request's body, because we're supposed to pretend that the
Expand All @@ -920,10 +917,7 @@ jsg::Ref<Request> Request::constructor(
jsg::Ref<Request> Request::clone(jsg::Lock& js) {
auto headersClone = headers->clone();

auto cfClone = cf.map([&](jsg::V8Ref<v8::Object>& obj) {
return obj.deepClone(js);
});

auto cfClone = cf.deepClone(js);
auto bodyClone = Body::clone(js);

return jsg::alloc<Request>(
Expand Down Expand Up @@ -959,9 +953,7 @@ kj::Maybe<jsg::Ref<AbortSignal>> Request::getSignal() {
}

jsg::Optional<v8::Local<v8::Object>> Request::getCf(jsg::Lock& js) {
return cf.map([&](jsg::V8Ref<v8::Object>& handle) {
return handle.getHandle(js);
});
return cf.get(js);
}

jsg::Ref<AbortSignal> Request::getThisSignal(jsg::Lock& js) {
Expand Down Expand Up @@ -999,16 +991,14 @@ void Request::shallowCopyHeadersTo(kj::HttpHeaders& out) {
}

kj::Maybe<kj::String> Request::serializeCfBlobJson(jsg::Lock& js) {
return cf.map([&](jsg::V8Ref<v8::Object>& obj) {
return js.serializeJson(obj);
});
return cf.serialize(js);
}

// =======================================================================================

Response::Response(
jsg::Lock& js, int statusCode, kj::String statusText, jsg::Ref<Headers> headers,
kj::Maybe<jsg::V8Ref<v8::Object>> cf, kj::Maybe<Body::ExtractedBody> body,
CfProperty&& cf, kj::Maybe<Body::ExtractedBody> body,
kj::Array<kj::String> urlList,
kj::Maybe<jsg::Ref<WebSocket>> webSocket,
Response::BodyEncoding bodyEncoding)
Expand Down Expand Up @@ -1036,7 +1026,7 @@ jsg::Ref<Response> Response::constructor(
kj::Maybe<kj::String> statusText;
kj::Maybe<Body::ExtractedBody> body = nullptr;
jsg::Ref<Headers> headers = nullptr;
kj::Maybe<jsg::V8Ref<v8::Object>> cf = nullptr;
CfProperty cf;
kj::Maybe<jsg::Ref<WebSocket>> webSocket = nullptr;

KJ_SWITCH_ONEOF(init) {
Expand All @@ -1063,7 +1053,7 @@ jsg::Ref<Response> Response::constructor(
headers = jsg::alloc<Headers>(jsg::Dict<jsg::ByteString, jsg::ByteString>());
}

cf = cloneRequestCf(js, kj::mv(initDict.cf));
cf = CfProperty(kj::mv(initDict.cf));

KJ_IF_MAYBE(ws, initDict.webSocket) {
KJ_IF_MAYBE(ws2, *ws) {
Expand All @@ -1080,8 +1070,7 @@ jsg::Ref<Response> Response::constructor(
bodyEncoding = otherResponse->bodyEncoding;
statusText = kj::str(otherResponse->statusText);
headers = jsg::alloc<Headers>(*otherResponse->headers);
cf = cloneRequestCf(js, otherResponse->cf.map([&](jsg::V8Ref<v8::Object>& ref)
-> jsg::V8Ref<v8::Object> { return ref.addRef(js); }));
cf = otherResponse->cf.clone(js);
KJ_IF_MAYBE(otherWs, otherResponse->webSocket) {
webSocket = otherWs->addRef();
}
Expand Down Expand Up @@ -1244,13 +1233,11 @@ jsg::Ref<Response> Response::json_(
.status = res->statusCode,
.statusText = kj::str(res->statusText),
.headers = maybeSetContentType(Headers::constructor(js, res->headers.addRef())),
.cf = res->cf.getRef(js),
.encodeBody = kj::str(res->bodyEncoding == Response::BodyEncoding::MANUAL
? "manual" : "automatic"),
};

newInit.cf = cloneRequestCf(js, res->cf.map([&](jsg::V8Ref<v8::Object>& ref)
-> jsg::V8Ref<v8::Object> { return ref.addRef(js); }));

KJ_IF_MAYBE(otherWs, res->webSocket) {
newInit.webSocket = otherWs->addRef();
}
Expand All @@ -1272,10 +1259,7 @@ jsg::Ref<Response> Response::clone(jsg::Lock& js) {
TypeError, "Cannot clone a response to a WebSocket handshake.");

auto headersClone = headers->clone();

auto cfClone = cf.map([&](jsg::V8Ref<v8::Object>& obj) {
return obj.deepClone(js);
});
auto cfClone = cf.deepClone(js);

auto bodyClone = Body::clone(js);

Expand Down Expand Up @@ -1407,9 +1391,7 @@ kj::Maybe<jsg::Ref<WebSocket>> Response::getWebSocket(jsg::Lock& js) {

jsg::Optional<v8::Local<v8::Object>> Response::getCf(
const v8::PropertyCallbackInfo<v8::Value>& info) {
return cf.map([&](jsg::V8Ref<v8::Object>& handle) {
return handle.getHandle(jsg::Lock::from(info.GetIsolate()));
});
return cf.get(jsg::Lock::from(info.GetIsolate()));
}

// =======================================================================================
Expand Down Expand Up @@ -2272,4 +2254,139 @@ bool isRedirectStatusCode(uint statusCode) {
}
}

static constexpr auto kDefaultBotManagementValue = R"DATA({
"corporateProxy": false,
"verifiedBot": false,
"jsDetection": { "passed": false },
"staticResource": false,
"detectionIds": {},
"score": 99
})DATA";

static void handleDefaultBotManagement(jsg::Lock& js, v8::Local<v8::Object> handle) {
// When the cfBotManagementNoOp compatibility flag is set, we'll check the
// request cf blob to see if it contains a botManagement field. If it does
// *not* we will add it using the following default fields.
// Note that if the botManagement team changes any of the fields they provide,
// this default value may need to be changed also.
auto context = js.v8Context();
if (!js.v8Has(handle, "botManagement"_kj)) {
auto sym = v8::Private::ForApi(js.v8Isolate,
jsg::v8StrIntern(js.v8Isolate, "botManagement"_kj));
// For performance reasons, we only want to construct the default values
// once per isolate so we cache the constructed value using an internal
// private field on the global scope. Whenever we need to use it again we
// pull the exact same value.
auto defaultBm = jsg::check(context->Global()->GetPrivate(context, sym));
if (defaultBm->IsUndefined()) {
auto bm = js.parseJson(kj::StringPtr(kDefaultBotManagementValue));
KJ_DASSERT(bm.getHandle(js)->IsObject());
js.recursivelyFreeze(bm);
defaultBm = bm.getHandle(js);
jsg::check(context->Global()->SetPrivate(context, sym, defaultBm));
}
js.v8Set(handle, "botManagement"_kj, defaultBm);
}
}

jsg::Optional<v8::Local<v8::Object>> CfProperty::get(jsg::Lock& js) {
return getRef(js).map([&js](jsg::V8Ref<v8::Object>&& ref) mutable { return ref.getHandle(js); });
}

jsg::Optional<jsg::V8Ref<v8::Object>> CfProperty::getRef(jsg::Lock& js) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
return parsed.addRef(js);
}
KJ_CASE_ONEOF(unparsed, kj::String) {
KJ_DBG(unparsed.asPtr());
auto parsed = js.parseJson(unparsed);
auto handle = parsed.getHandle(js);
KJ_ASSERT(handle->IsObject());

auto objectHandle = handle.As<v8::Object>();
if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) {
handleDefaultBotManagement(js, objectHandle);
}

// For the inbound request, we make the `cf` blob immutable.
js.recursivelyFreeze(parsed);

// replace unparsed string with a parsed v8 object
auto parsedObject = parsed.cast<v8::Object>(js);
this->value = parsedObject.addRef(js);
return kj::mv(parsedObject);
}
}
}

return nullptr;
}


kj::Maybe<kj::String> CfProperty::serialize(jsg::Lock& js) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
return js.serializeJson(parsed);
}
KJ_CASE_ONEOF(unparsed, kj::String) {
if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) {
// we mess up with the value on this code path,
// need to parse it, fix it and serialize back
return js.serializeJson(KJ_ASSERT_NONNULL(getRef(js)));
}

return kj::str(unparsed);
}
}
}

return nullptr;
}

CfProperty CfProperty::clone(jsg::Lock& js) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
auto ref = parsed.addRef(js);
return CfProperty(kj::mv(ref));
}
KJ_CASE_ONEOF(unparsed, kj::String) {
return CfProperty(unparsed.asPtr());
}
}
}

return nullptr;
}

CfProperty CfProperty::deepClone(jsg::Lock& js) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
auto ref = parsed.deepClone(js);
return CfProperty(kj::mv(ref));
}
KJ_CASE_ONEOF(unparsed, kj::String) {
return CfProperty(unparsed.asPtr());
}
}
}

return nullptr;
}

void CfProperty::visitForGc(jsg::GcVisitor& visitor) {
KJ_IF_MAYBE(cf, value) {
KJ_SWITCH_ONEOF(*cf) {
KJ_CASE_ONEOF(parsed, jsg::V8Ref<v8::Object>) {
visitor.visit(parsed);
}
KJ_CASE_ONEOF_DEFAULT {}
}
}
}

} // namespace workerd::api
Loading

0 comments on commit 47a296e

Please sign in to comment.