From 8e67e9a6f8051258f504f523a58870a4f994e9d0 Mon Sep 17 00:00:00 2001 From: Mike Aizatsky Date: Tue, 15 Aug 2023 15:56:35 -0700 Subject: [PATCH] lazy cf property parsing --- src/workerd/api/cf-property.c++ | 131 +++++++++++++++++++++++++++++++ src/workerd/api/cf-property.h | 56 +++++++++++++ src/workerd/api/global-scope.c++ | 52 +----------- src/workerd/api/http.c++ | 48 ++++------- src/workerd/api/http.h | 9 ++- src/workerd/api/util.h | 3 - 6 files changed, 209 insertions(+), 90 deletions(-) create mode 100644 src/workerd/api/cf-property.c++ create mode 100644 src/workerd/api/cf-property.h diff --git a/src/workerd/api/cf-property.c++ b/src/workerd/api/cf-property.c++ new file mode 100644 index 00000000000..a3778a9ebf1 --- /dev/null +++ b/src/workerd/api/cf-property.c++ @@ -0,0 +1,131 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#include "cf-property.h" +#include + +namespace workerd::api { + +static constexpr auto kDefaultBotManagementValue = R"DATA({ + "corporateProxy": false, + "verifiedBot": false, + "jsDetection": { "passed": false }, + "staticResource": false, + "detectionIds": {}, + "score": 99 +})DATA"; + +static void handleDefaultBotManagement(jsg::Lock& js, v8::Local handle) { + // When the cfBotManagementNoOp compatibility flag is set, we'll check the + // request cf blob to see if it contains a botManagement field. If it does + // *not* we will add it using the following default fields. + // Note that if the botManagement team changes any of the fields they provide, + // this default value may need to be changed also. + auto context = js.v8Context(); + if (!js.v8Has(handle, "botManagement"_kj)) { + auto sym = v8::Private::ForApi(js.v8Isolate, + jsg::v8StrIntern(js.v8Isolate, "botManagement"_kj)); + // For performance reasons, we only want to construct the default values + // once per isolate so we cache the constructed value using an internal + // private field on the global scope. Whenever we need to use it again we + // pull the exact same value. + auto defaultBm = jsg::check(context->Global()->GetPrivate(context, sym)); + if (defaultBm->IsUndefined()) { + auto bm = js.parseJson(kj::StringPtr(kDefaultBotManagementValue)); + KJ_DASSERT(bm.getHandle(js)->IsObject()); + js.recursivelyFreeze(bm); + defaultBm = bm.getHandle(js); + jsg::check(context->Global()->SetPrivate(context, sym, defaultBm)); + } + js.v8Set(handle, "botManagement"_kj, defaultBm); + } +} + +jsg::Optional> CfProperty::get(jsg::Lock& js) { + return getRef(js).map([&js](jsg::V8Ref&& ref) mutable { + return ref.getHandle(js); + }); +} + +jsg::Optional> CfProperty::getRef(jsg::Lock& js) { + KJ_IF_MAYBE(cf, value) { + KJ_SWITCH_ONEOF(*cf) { + KJ_CASE_ONEOF(parsed, jsg::V8Ref) { + return parsed.addRef(js); + } + KJ_CASE_ONEOF(unparsed, kj::String) { + KJ_DBG(unparsed.asPtr()); + auto parsed = js.parseJson(unparsed); + auto handle = parsed.getHandle(js); + KJ_ASSERT(handle->IsObject()); + + auto objectHandle = handle.As(); + if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) { + handleDefaultBotManagement(js, objectHandle); + } + + // For the inbound request, we make the `cf` blob immutable. + js.recursivelyFreeze(parsed); + + // replace unparsed string with a parsed v8 object + auto parsedObject = parsed.cast(js); + this->value = parsedObject.addRef(js); + return kj::mv(parsedObject); + } + } + } + + return nullptr; +} + + +kj::Maybe CfProperty::serialize(jsg::Lock& js) { + KJ_IF_MAYBE(cf, value) { + KJ_SWITCH_ONEOF(*cf) { + KJ_CASE_ONEOF(parsed, jsg::V8Ref) { + return js.serializeJson(parsed); + } + KJ_CASE_ONEOF(unparsed, kj::String) { + if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) { + // we mess up with the value on this code path, + // need to parse it, fix it and serialize back + return js.serializeJson(KJ_ASSERT_NONNULL(getRef(js))); + } + + return kj::str(unparsed); + } + } + } + + return nullptr; +} + +CfProperty CfProperty::deepClone(jsg::Lock& js) { + KJ_IF_MAYBE(cf, value) { + KJ_SWITCH_ONEOF(*cf) { + KJ_CASE_ONEOF(parsed, jsg::V8Ref) { + auto ref = parsed.deepClone(js); + return CfProperty(kj::mv(ref)); + } + KJ_CASE_ONEOF(unparsed, kj::String) { + return CfProperty(unparsed.asPtr()); + } + } + } + + return nullptr; +} + +void CfProperty::visitForGc(jsg::GcVisitor& visitor) { + KJ_IF_MAYBE(cf, value) { + KJ_SWITCH_ONEOF(*cf) { + KJ_CASE_ONEOF(parsed, jsg::V8Ref) { + visitor.visit(parsed); + } + KJ_CASE_ONEOF_DEFAULT {} + } + } +} + +} // namespace workerd::api diff --git a/src/workerd/api/cf-property.h b/src/workerd/api/cf-property.h new file mode 100644 index 00000000000..c861551c718 --- /dev/null +++ b/src/workerd/api/cf-property.h @@ -0,0 +1,56 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#pragma once + +// Common functionality to manage cf headers and properties. + +#include + +namespace workerd::api { + +class CfProperty { + // A holder for Cf header property value. + // The string header is parsed on demand and the parsed value cached. + +public: + KJ_DISALLOW_COPY(CfProperty); + + explicit CfProperty() {} + CfProperty(decltype(nullptr)) {} + CfProperty(CfProperty&&) = default; + CfProperty& operator=(CfProperty&&) = default; + + explicit CfProperty(kj::Maybe unparsed) { + KJ_IF_MAYBE(str, unparsed) { + value = kj::str(*str); + } + } + + explicit CfProperty(kj::Maybe>&& parsed) { + KJ_IF_MAYBE(v, parsed) { + value = kj::mv(*v); + } + } + + jsg::Optional> get(jsg::Lock& js); + // Get parsed value + + jsg::Optional> getRef(jsg::Lock& js); + // Get parsed value as a global ref + + kj::Maybe serialize(jsg::Lock& js); + // Serialize to string + + CfProperty deepClone(jsg::Lock& js); + // Clone by deep cloning parsed v8 object (if any). + + void visitForGc(jsg::GcVisitor& visitor); + +private: + kj::Maybe>> value; +}; + + +} // namespace workerd::api diff --git a/src/workerd/api/global-scope.c++ b/src/workerd/api/global-scope.c++ index 49aa220ef76..34a8f8627c8 100644 --- a/src/workerd/api/global-scope.c++ +++ b/src/workerd/api/global-scope.c++ @@ -92,42 +92,6 @@ private: } }; -static constexpr auto kDefaultBotManagementValue = R"DATA({ - "corporateProxy": false, - "verifiedBot": false, - "jsDetection": { "passed": false }, - "staticResource": false, - "detectionIds": {}, - "score": 99 -})DATA"; - -void handleDefaultBotManagement(jsg::Lock& js, jsg::Value& cf) { - // When the cfBotManagementNoOp compatibility flag is set, we'll check the - // request cf blob to see if it contains a botManagement field. If it does - // *not* we will add it using the following default fields. - // Note that if the botManagement team changes any of the fields they provide, - // this default value may need to be changed also. - auto context = js.v8Context(); - auto handle = cf.getHandle(js).As(); - if (!js.v8Has(handle, "botManagement"_kj)) { - auto sym = v8::Private::ForApi(js.v8Isolate, - jsg::v8StrIntern(js.v8Isolate, "botManagement"_kj)); - // For performance reasons, we only want to construct the default values - // once per isolate so we cache the constructed value using an internal - // private field on the global scope. Whenever we need to use it again we - // pull the exact same value. - auto defaultBm = jsg::check(context->Global()->GetPrivate(context, sym)); - if (defaultBm->IsUndefined()) { - auto bm = js.parseJson(kj::StringPtr(kDefaultBotManagementValue)); - KJ_DASSERT(bm.getHandle(js)->IsObject()); - js.recursivelyFreeze(bm); - defaultBm = bm.getHandle(js); - jsg::check(context->Global()->SetPrivate(context, sym, defaultBm)); - } - js.v8Set(handle, "botManagement"_kj, defaultBm); - } -} - kj::String getEventName(v8::PromiseRejectEvent type) { switch (type) { case v8::PromiseRejectEvent::kPromiseRejectWithNoHandler: @@ -200,21 +164,7 @@ kj::Promise> ServiceWorkerGlobalScope::request( auto& ioContext = IoContext::current(); jsg::Lock& js = lock; - kj::Maybe> cf; - - KJ_IF_MAYBE(c, cfBlobJson) { - auto handle = js.parseJson(*c); - KJ_ASSERT(handle.getHandle(js)->IsObject()); - - if (!FeatureFlags::get(js).getNoCfBotManagementDefault()) { - handleDefaultBotManagement(js, handle); - } - - // For the inbound request, we make the `cf` blob immutable. - js.recursivelyFreeze(handle); - - cf = handle.cast(js); - } + CfProperty cf(cfBlobJson); auto jsHeaders = jsg::alloc(headers, Headers::Guard::REQUEST); // We do not automatically decode gzipped request bodies because the fetch() standard doesn't diff --git a/src/workerd/api/http.c++ b/src/workerd/api/http.c++ index a3ea7528f20..8216c00c4b4 100644 --- a/src/workerd/api/http.c++ +++ b/src/workerd/api/http.c++ @@ -739,7 +739,7 @@ jsg::Ref Request::constructor( kj::Maybe> headers; kj::Maybe> fetcher; kj::Maybe> signal; - kj::Maybe> cf; + CfProperty cf; kj::Maybe body; Redirect redirect = Redirect::FOLLOW; @@ -777,9 +777,7 @@ jsg::Ref Request::constructor( url = kj::str(oldRequest->getUrl()); method = oldRequest->method; headers = jsg::alloc(*oldRequest->headers); - KJ_IF_MAYBE(oldCf, oldRequest->getCf(js)) { - cf = cloneRequestCf(js, js.v8Ref(*oldCf)); - } + cf = oldRequest->cf.deepClone(js); if (!ignoreInputBody) { JSG_REQUIRE(!oldRequest->getBodyUsed(), TypeError, "Cannot reconstruct a Request with a used body."); @@ -850,7 +848,7 @@ jsg::Ref Request::constructor( } KJ_IF_MAYBE(newCf, initDict.cf) { - cf = cloneRequestCf(js, kj::mv(initDict.cf)); + cf = CfProperty(newCf->deepClone(js)); } KJ_IF_MAYBE(b, kj::mv(initDict.body).orDefault(nullptr)) { @@ -883,8 +881,7 @@ jsg::Ref Request::constructor( fetcher = otherRequest->getFetcher(); signal = otherRequest->getSignal(); headers = jsg::alloc(*otherRequest->headers); - cf = cloneRequestCf(js, otherRequest->cf.map([&](jsg::V8Ref& ref) - -> jsg::V8Ref { return ref.addRef(js); })); + cf = otherRequest->cf.deepClone(js); KJ_IF_MAYBE(b, otherRequest->getBody()) { // Note that unlike when `input` (Request ctor's 1st parameter) is a Request object, here // we're NOT stealing the other request's body, because we're supposed to pretend that the @@ -908,10 +905,7 @@ jsg::Ref Request::constructor( jsg::Ref Request::clone(jsg::Lock& js) { auto headersClone = headers->clone(); - auto cfClone = cf.map([&](jsg::V8Ref& obj) { - return obj.deepClone(js); - }); - + auto cfClone = cf.deepClone(js); auto bodyClone = Body::clone(js); return jsg::alloc( @@ -947,9 +941,7 @@ kj::Maybe> Request::getSignal() { } jsg::Optional> Request::getCf(jsg::Lock& js) { - return cf.map([&](jsg::V8Ref& handle) { - return handle.getHandle(js); - }); + return cf.get(js); } jsg::Ref Request::getThisSignal(jsg::Lock& js) { @@ -987,16 +979,14 @@ void Request::shallowCopyHeadersTo(kj::HttpHeaders& out) { } kj::Maybe Request::serializeCfBlobJson(jsg::Lock& js) { - return cf.map([&](jsg::V8Ref& obj) { - return js.serializeJson(obj); - }); + return cf.serialize(js); } // ======================================================================================= Response::Response( jsg::Lock& js, int statusCode, kj::String statusText, jsg::Ref headers, - kj::Maybe> cf, kj::Maybe body, + CfProperty&& cf, kj::Maybe body, kj::Array urlList, kj::Maybe> webSocket, Response::BodyEncoding bodyEncoding) @@ -1024,7 +1014,7 @@ jsg::Ref Response::constructor( kj::Maybe statusText; kj::Maybe body = nullptr; jsg::Ref headers = nullptr; - kj::Maybe> cf = nullptr; + CfProperty cf; kj::Maybe> webSocket = nullptr; KJ_SWITCH_ONEOF(init) { @@ -1051,7 +1041,9 @@ jsg::Ref Response::constructor( headers = jsg::alloc(jsg::Dict()); } - cf = cloneRequestCf(js, kj::mv(initDict.cf)); + KJ_IF_MAYBE(newCf, initDict.cf) { + cf = CfProperty(newCf->deepClone(js)); + } KJ_IF_MAYBE(ws, initDict.webSocket) { KJ_IF_MAYBE(ws2, *ws) { @@ -1068,8 +1060,7 @@ jsg::Ref Response::constructor( bodyEncoding = otherResponse->bodyEncoding; statusText = kj::str(otherResponse->statusText); headers = jsg::alloc(*otherResponse->headers); - cf = cloneRequestCf(js, otherResponse->cf.map([&](jsg::V8Ref& ref) - -> jsg::V8Ref { return ref.addRef(js); })); + cf = otherResponse->cf.deepClone(js); KJ_IF_MAYBE(otherWs, otherResponse->webSocket) { webSocket = otherWs->addRef(); } @@ -1232,13 +1223,11 @@ jsg::Ref Response::json_( .status = res->statusCode, .statusText = kj::str(res->statusText), .headers = maybeSetContentType(Headers::constructor(js, res->headers.addRef())), + .cf = res->cf.getRef(js), .encodeBody = kj::str(res->bodyEncoding == Response::BodyEncoding::MANUAL ? "manual" : "automatic"), }; - newInit.cf = cloneRequestCf(js, res->cf.map([&](jsg::V8Ref& ref) - -> jsg::V8Ref { return ref.addRef(js); })); - KJ_IF_MAYBE(otherWs, res->webSocket) { newInit.webSocket = otherWs->addRef(); } @@ -1260,10 +1249,7 @@ jsg::Ref Response::clone(jsg::Lock& js) { TypeError, "Cannot clone a response to a WebSocket handshake."); auto headersClone = headers->clone(); - - auto cfClone = cf.map([&](jsg::V8Ref& obj) { - return obj.deepClone(js); - }); + auto cfClone = cf.deepClone(js); auto bodyClone = Body::clone(js); @@ -1395,9 +1381,7 @@ kj::Maybe> Response::getWebSocket(jsg::Lock& js) { jsg::Optional> Response::getCf( const v8::PropertyCallbackInfo& info) { - return cf.map([&](jsg::V8Ref& handle) { - return handle.getHandle(jsg::Lock::from(info.GetIsolate())); - }); + return cf.get(jsg::Lock::from(info.GetIsolate())); } // ======================================================================================= diff --git a/src/workerd/api/http.h b/src/workerd/api/http.h index df7b0e556f4..8ede609b001 100644 --- a/src/workerd/api/http.h +++ b/src/workerd/api/http.h @@ -10,6 +10,7 @@ #include #include #include "basics.h" +#include "cf-property.h" #include "streams.h" #include "form-data.h" #include "web-socket.h" @@ -643,7 +644,7 @@ class Request: public Body { Request(kj::HttpMethod method, kj::StringPtr url, Redirect redirect, jsg::Ref headers, kj::Maybe> fetcher, - kj::Maybe> signal, kj::Maybe> cf, + kj::Maybe> signal, CfProperty&& cf, kj::Maybe body) : Body(kj::mv(body), *headers), method(method), url(kj::str(url)), redirect(redirect), headers(kj::mv(headers)), fetcher(kj::mv(fetcher)), @@ -830,7 +831,7 @@ class Request: public Body { // an optional AbortSignal passed in with the options), and "this' signal", which is an // AbortSignal that is always available via the request.signal accessor. When signal is // used explicity, thisSignal will not be. - kj::Maybe> cf; + CfProperty cf; void visitForGc(jsg::GcVisitor& visitor) { visitor.visit(headers, fetcher, signal, thisSignal, cf); @@ -845,7 +846,7 @@ class Response: public Body { }; Response(jsg::Lock& js, int statusCode, kj::String statusText, jsg::Ref headers, - kj::Maybe> cf, kj::Maybe body, + CfProperty&& cf, kj::Maybe body, kj::Array urlList = {}, kj::Maybe> webSocket = nullptr, Response::BodyEncoding bodyEncoding = Response::BodyEncoding::AUTO); @@ -997,7 +998,7 @@ class Response: public Body { int statusCode; kj::String statusText; jsg::Ref headers; - kj::Maybe> cf; + CfProperty cf; kj::Array urlList; // The URL list, per the Fetch spec. Only Responses actually created by fetch() have a non-empty diff --git a/src/workerd/api/util.h b/src/workerd/api/util.h index 2c7745324a8..2f93961b282 100644 --- a/src/workerd/api/util.h +++ b/src/workerd/api/util.h @@ -104,9 +104,6 @@ double dateNow(); // ======================================================================================= -kj::Maybe> cloneRequestCf( - jsg::Lock& js, kj::Maybe> maybeCf); - void maybeWarnIfNotText(kj::StringPtr str); } // namespace workerd::api