Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add WarningAggregator, improve unused stream warning #1790

Merged
merged 2 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 67 additions & 53 deletions src/workerd/api/streams/internal.c++
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <workerd/jsg/jsg.h>
#include <kj/vector.h>
#include <workerd/api/util.h>
#include <workerd/util/string-buffer.h>

namespace workerd::api {

Expand Down Expand Up @@ -349,11 +350,64 @@ private:
kj::Own<kj::AsyncInputStream> inner;
};

static const WarningAggregator::Key unusedStreamBranchKey;

class WarnIfUnusedStream final: public ReadableStreamSource {
public:
explicit WarnIfUnusedStream(kj::Own<ReadableStreamSource> inner, IoContext& ioContext)
: worker(kj::atomicAddRef(ioContext.getWorker())),
requestMetrics(kj::addRef(ioContext.getMetrics())),
class UnusedStreamWarningContext final : public WarningAggregator::WarningContext {
public:
UnusedStreamWarningContext(jsg::Lock& js)
: exception(jsg::JsRef(js, js.error(""_kjc))) {}

kj::String toString(jsg::Lock& js) override {
auto handle = exception.getHandle(js);
auto obj = KJ_ASSERT_NONNULL(handle.tryCast<jsg::JsObject>());
obj.set(js, "name"_kjc, js.str("Unused stream created at:"_kjc));
jasnell marked this conversation as resolved.
Show resolved Hide resolved
jasnell marked this conversation as resolved.
Show resolved Hide resolved
return obj.get(js, "stack"_kjc).toString(js);
}

private:
jsg::JsRef<jsg::JsValue> exception;
};

static kj::Own<WarningAggregator> createWarningAggregator(IoContext& context) {
return kj::atomicRefcounted<WarningAggregator>(context,
[](jsg::Lock& js, kj::Array<kj::Own<WarningAggregator::WarningContext>> warnings) {
StringBuffer<1024> message(1024);
if (warnings.size() > 1) {
message.append(kj::str(warnings.size()),
" ReadableStream branches were created but never consumed. ");
} else {
message.append("A ReadableStream branch was created but never consumed. ");
}
message.append(
"Such branches can be created, for instance, by calling the tee() "
"method on a ReadableStream, or by calling the clone() method on a "
"Request or Response object. If a branch is created but never consumed, "
"it can force the runtime to buffer the entire body of the stream in "
"memory, which may cause the Worker to exceed its memory limit and be "
"terminated. To avoid this, ensure that all branches created are consumed.\n");

if (warnings.size() > 1) {
for (int n = 0; n < warnings.size(); n++) {
auto& warning = warnings[n];
message.append("\n ", kj::str(n + 1) , ". ", warning->toString(js), "\n");
}
} else {
message.append("\n * ", warnings[0]->toString(js), "\n");
}
auto msg = message.toString();
js.logWarning(msg);
});
}

explicit WarnIfUnusedStream(jsg::Lock& js,
kj::Own<ReadableStreamSource> inner,
IoContext& ioContext)
: warningAggregator(ioContext.getWarningAggregator(
unusedStreamBranchKey,
[](IoContext& context) { return createWarningAggregator(context); })),
warningContext(kj::heap<UnusedStreamWarningContext>(js)),
inner(kj::mv(inner)) {}

kj::Promise<DeferredProxy<void>> pumpTo(WritableStreamSink& output, bool end) override {
Expand All @@ -366,65 +420,25 @@ public:
return inner->tryRead(buffer, minBytes, maxBytes);
}

// TODO(someday): we set `wasRead` to avoid warning here, but TeeBranch might still buffer the body.
// We should fix it not to buffer when cancelled.
// TODO(someday): we set `wasRead` to avoid warning here, but TeeBranch might still buffer the
// body. We should fix it not to buffer when cancelled.
void cancel(kj::Exception reason) override {
wasRead = true;
return inner->cancel(reason);
}

// No special behavior, just forward these verbatim.
kj::Maybe<uint64_t> tryGetLength(StreamEncoding encoding) override { return inner->tryGetLength(encoding); }
kj::Maybe<uint64_t> tryGetLength(StreamEncoding encoding) override {
return inner->tryGetLength(encoding);
}
kj::Maybe<Tee> tryTee(uint64_t limit) override { return inner->tryTee(limit); }

~WarnIfUnusedStream() {
// There's some subtlety here. If WarnIfUnused is being GC'd then we're
// fine; we can use `ctx->logWarning` like normal. However, if this is
// being destroyed at the end of the request then we're in the middle of
// tearing down the whole IoContext. In that case, we do *not* have
// an isolate lock available, and need to take one out. We can't call
// `ctx->isInspectorEnabled` without a lock either, so instead it's stored at
// the time the worker calls `clone()`.

// N.B. It's fine not to call `isInspectorEnabled` here because the
// inspector is either enabled or disabled for the whole lifetime of the
// isolate.
if (!wasRead) {
auto msg =
"Your worker created multiple branches of a single stream (for instance, by calling "
"`response.clone()` or `request.clone()`) but did not read the body of both branches. "
"This is wasteful, as it forces the system to buffer the entire stream of data in "
"memory, rather than streaming it through. This may cause your worker to be unexpectedly "
"terminated for going over the memory limit. If you only meant to copy the "
"request or response headers and metadata (e.g. in order to be able to modify them), "
"use the appropriate constructors instead (for instance, `new Response(response.body, "
"response)`, `new Request(request)`, etc).";
// If possible, give a warning pointing to the line number.
if (IoContext::hasCurrent()) {
// We're currently in a JavaScript execution context, which means the object must be being
// destroyed due to GC. V8 does not like having most of its APIs invoked in the middle of
// GC. So, delay our log warning until GC finishes.
auto& context = IoContext::current();
context.addTask(context.run([msg](Worker::Lock& lock) {
lock.logWarning(kj::str(msg));
}));
} else {
// We aren't in any JavaScript context. The stream might be being destroyed during
// IoContext shutdown or maybe even during deferred proxying. So, avoid touching
// the IoContext. Instead, we'll lock the worker directly.
//
// Synchronous lock is OK here since it only happens during preview. We don't have a
// metrics object to provide ,though.
worker->runInLockScope(Worker::Lock::TakeSynchronously(*requestMetrics),
[&](Worker::Lock& lock) {
lock.logWarning(msg);
});
}
}
if (!wasRead) { warningAggregator->add(kj::mv(warningContext)); }
}
private:
kj::Own<const Worker> worker;
kj::Own<RequestObserver> requestMetrics;
kj::Own<WarningAggregator> warningAggregator;
kj::Own<UnusedStreamWarningContext> warningContext;
kj::Own<ReadableStreamSource> inner;
// Used for tracking if this body was ever used.
bool wasRead = false;
Expand Down Expand Up @@ -699,8 +713,8 @@ ReadableStreamController::Tee ReadableStreamInternalController::tee(jsg::Lock& j
[&](kj::Own<ReadableStreamSource> b1, kj::Own<ReadableStreamSource> b2) -> Tee {
doClose(js);
if (ioContext.isInspectorEnabled()) {
b1 = kj::heap<WarnIfUnusedStream>(kj::mv(b1), ioContext);
b2 = kj::heap<WarnIfUnusedStream>(kj::mv(b2), ioContext);
b1 = kj::heap<WarnIfUnusedStream>(js, kj::mv(b1), ioContext);
b2 = kj::heap<WarnIfUnusedStream>(js, kj::mv(b2), ioContext);
}
return Tee {
.branch1 = jsg::alloc<ReadableStream>(ioContext, kj::mv(b1)),
Expand Down
49 changes: 49 additions & 0 deletions src/workerd/io/io-context.c++
Original file line number Diff line number Diff line change
Expand Up @@ -1325,4 +1325,53 @@ jsg::JsObject IoContext::getPromiseContextTag(jsg::Lock& js) {
return KJ_REQUIRE_NONNULL(promiseContextTag).getHandle(js);
}

// ======================================================================================

WarningAggregator::WarningAggregator(IoContext& context, EmitCallback emitter)
: worker(kj::atomicAddRef(context.getWorker())),
requestMetrics(kj::addRef(context.getMetrics())),
emitter(kj::mv(emitter)) {}

WarningAggregator::~WarningAggregator() noexcept(false) {
auto lock = warnings.lockExclusive();
if (lock->size() > 0) {
auto emitter = kj::mv(this->emitter);
auto warnings = lock->releaseAsArray();
if (IoContext::hasCurrent()) {
// We are currently in a JavaScript execution context. The object is likely being
// destroyed during garbage collection. V8 does not like having most of its API
// invoked in the middle of GC. So we'll delay our warning until GC finished.
auto& context = IoContext::current();
context.addTask(context.run(
[emitter=kj::mv(emitter), warnings=kj::mv(warnings)](Worker::Lock& lock) mutable {
emitter(lock, kj::mv(warnings));
}));
} else {
// We aren't in any JavaScript context. The object might be being destroyed during
// IoContext shutdown or maybe even during deferred proxying. So, avoid touching
// the IoContext. Instead, we'll lock the worker directly.
worker->runInLockScope(Worker::Lock::TakeSynchronously(*requestMetrics),
[emitter=kj::mv(emitter), warnings=kj::mv(warnings)](Worker::Lock& lock) mutable {
JSG_WITHIN_CONTEXT_SCOPE(lock, lock.getContext(), [&](jsg::Lock& js) {
emitter(lock, kj::mv(warnings));
});
});
}
}
}

void WarningAggregator::add(kj::Own<WarningContext> warning) const {
warnings.lockExclusive()->add(kj::mv(warning));
}

kj::Own<WarningAggregator> IoContext::getWarningAggregator(
const WarningAggregator::Key& key,
kj::Function<kj::Own<WarningAggregator>(IoContext&)> load) {
auto& instance = warningAggregatorMap.findOrCreate(key,
[this, load=kj::mv(load), &key]() mutable -> WarningAggregator::Map::Entry {
return {key, load(*this)};
});
return kj::atomicAddRef(*instance);
}

} // namespace workerd
54 changes: 54 additions & 0 deletions src/workerd/io/io-context.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,53 @@ namespace workerd {

class IoContext;

// A WarningAggregator is a helper utility for deduplicating related warning messages.
// It is a ref-counted object that is initially acquired from the IoContext, but is
// capable of outliving the IoContext. When destroyed, if there are any pending warnings,
// those will be emitted all at once with a single header message followed by contextual
// information for each individual collected instance.
class WarningAggregator final: public kj::AtomicRefcounted {
public:

// The IoContext will maintain a map of WarningAggregators based on an opaque key.
class Key final {
public:
Key() : hash(kj::hashCode(this)) {}
KJ_DISALLOW_COPY_AND_MOVE(Key);
inline uint hashCode() const { return hash; }
inline bool operator==(const Key& other) const { return this == &other; }
private:
uint hash;
};

// Captures the contextual information for a specific aggregated warning.
class WarningContext {
public:
virtual ~WarningContext() noexcept(false) = default;
virtual kj::String toString(jsg::Lock& js) = 0;
};

// The EmitCallback is called when the WarningAggregator is destroyed. It is
// responsible for actually emitting the warnings that are collected. It will
// only be called once and only if there are any collected warnings.
using EmitCallback = kj::Function<void(
Worker::Lock& lock,
kj::Array<kj::Own<WarningContext>> warnings)>;

WarningAggregator(IoContext& context, EmitCallback emitter);
~WarningAggregator() noexcept(false);

void add(kj::Own<WarningContext> warning) const;

using Map = kj::HashMap<const Key&, kj::Own<WarningAggregator>>;

private:
kj::Own<const Worker> worker;
kj::Own<RequestObserver> requestMetrics;
EmitCallback emitter;
kj::MutexGuarded<kj::Vector<kj::Own<WarningContext>>> warnings;
};

// Represents one incoming request being handled by a IoContext. In non-actor scenarios,
// there is only ever one IncomingRequest per IoContext, but with actors there could be many.
//
Expand Down Expand Up @@ -674,6 +721,11 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler

jsg::JsObject getPromiseContextTag(jsg::Lock& js);

// Returns the existing WarningAggregator for the specified key, or calls load to create one.
kj::Own<WarningAggregator> getWarningAggregator(
const WarningAggregator::Key& key,
kj::Function<kj::Own<WarningAggregator>(IoContext&)> load);

private:
ThreadContext& thread;

Expand Down Expand Up @@ -709,6 +761,8 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler
kj::Maybe<PendingEvent&> pendingEvent;
kj::Maybe<kj::Promise<void>> runFinalizersTask;

WarningAggregator::Map warningAggregatorMap;

// Objects pointed to by IoOwn<T>s.
// NOTE: This must live below `deleteQueue`, as some of these OwnedObjects may own attachctx()'ed
// objects which reference `deleteQueue` in their destructors.
Expand Down
Loading