Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add exception stack traces to trace/tail events. #1910

Merged
merged 1 commit into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ apple_support_dependencies()

http_archive(
name = "capnp-cpp",
sha256 = "e9220640c1e6453e2d27cf512a97933bfa59a92e708b566719b59326778827fc",
strip_prefix = "capnproto-capnproto-eef235d/c++",
sha256 = "21ae72a199a995b72e9bd359d4815539158d93a15cf36e284ef201fde7338c3c",
strip_prefix = "capnproto-capnproto-8ba7a6e/c++",
type = "tgz",
urls = ["https://github.com/capnproto/capnproto/tarball/eef235dfa7aa40087605538ee3942b75a8910787"],
urls = ["https://github.com/capnproto/capnproto/tarball/8ba7a6ef987eb56e8d41e7f11e080dabcbf2a04f"],
)

http_archive(
Expand Down
7 changes: 6 additions & 1 deletion src/workerd/api/trace.c++
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,8 @@ jsg::V8Ref<v8::Object> TraceLog::getMessage(jsg::Lock& js) {
TraceException::TraceException(const Trace& trace, const Trace::Exception& exception)
: timestamp(getTraceExceptionTimestamp(exception)),
name(kj::str(exception.name)),
message(kj::str(exception.message)) {}
message(kj::str(exception.message)),
stack(exception.stack.map([](kj::StringPtr s) { return kj::str(s); })) {}

double TraceException::getTimestamp() {
return timestamp;
Expand All @@ -557,6 +558,10 @@ kj::StringPtr TraceException::getName() {
return name;
}

jsg::Optional<kj::StringPtr> TraceException::getStack(jsg::Lock& js) {
return stack;
}

TraceMetrics::TraceMetrics(uint cpuTime, uint wallTime) : cpuTime(cpuTime), wallTime(wallTime) {}

jsg::Ref<TraceMetrics> UnsafeTraceMetrics::fromTrace(jsg::Ref<TraceItem> item) {
Expand Down
3 changes: 3 additions & 0 deletions src/workerd/api/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,11 +535,13 @@ class TraceException final: public jsg::Object {
double getTimestamp();
kj::StringPtr getName();
kj::StringPtr getMessage();
jsg::Optional<kj::StringPtr> getStack(jsg::Lock& js);

JSG_RESOURCE_TYPE(TraceException) {
JSG_LAZY_READONLY_INSTANCE_PROPERTY(timestamp, getTimestamp);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(message, getMessage);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(name, getName);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(stack, getStack);
}

void visitForMemoryInfo(jsg::MemoryTracker& tracker) const {
Expand All @@ -551,6 +553,7 @@ class TraceException final: public jsg::Object {
double timestamp;
kj::String name;
kj::String message;
kj::Maybe<kj::String> stack;
};

class TraceMetrics final : public jsg::Object {
Expand Down
7 changes: 3 additions & 4 deletions src/workerd/io/io-context.c++
Original file line number Diff line number Diff line change
Expand Up @@ -364,10 +364,8 @@ void IoContext::logUncaughtExceptionAsync(UncaughtExceptionSource source,
RunnableImpl(UncaughtExceptionSource source, kj::Exception&& exception)
: source(source), exception(kj::mv(exception)) {}
void run(Worker::Lock& lock) override {
jsg::Lock& js = lock;
auto error = js.exceptionToJsValue(kj::mv(exception));
// TODO(soon): Add logUncaughtException to jsg::Lock.
lock.logUncaughtException(source, error.getHandle(js));
lock.logUncaughtException(source, kj::mv(exception));
}
};

Expand Down Expand Up @@ -1086,7 +1084,8 @@ void IoContext::runImpl(Runnable& runnable, bool takePendingEvent,
} else {
if (tryCatch.Message().IsEmpty()) {
// Should never happen, but check for it because otherwise V8 will crash.
KJ_LOG(ERROR, "tryCatch.Message() was empty even when not HasTerminated()??");
KJ_LOG(ERROR, "tryCatch.Message() was empty even when not HasTerminated()??",
kj::getStackTrace());
JSG_FAIL_REQUIRE(Error, "(JavaScript exception with no message)");
} else {
auto jsException = tryCatch.Exception();
Expand Down
25 changes: 19 additions & 6 deletions src/workerd/io/trace.c++
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,9 @@ Trace::Log::Log(kj::Date timestamp, LogLevel logLevel, kj::String message)
logLevel(logLevel),
message(kj::mv(message)) {}

Trace::Exception::Exception(kj::Date timestamp, kj::String name, kj::String message)
: timestamp(timestamp), name(kj::mv(name)), message(kj::mv(message)) {}
Trace::Exception::Exception(kj::Date timestamp, kj::String name, kj::String message,
kj::Maybe<kj::String> stack)
: timestamp(timestamp), name(kj::mv(name)), message(kj::mv(message)), stack(kj::mv(stack)) {}

Trace::Trace(kj::Maybe<kj::String> stableId, kj::Maybe<kj::String> scriptName,
kj::Maybe<kj::Own<ScriptVersion::Reader>> scriptVersion,
Expand Down Expand Up @@ -356,6 +357,9 @@ void Trace::Exception::copyTo(rpc::Trace::Exception::Builder builder) {
builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS);
builder.setName(name);
builder.setMessage(message);
KJ_IF_SOME(s, stack) {
builder.setStack(s);
}
}

void Trace::mergeFrom(rpc::Trace::Reader reader, PipelineLogLevel pipelineLogLevel) {
Expand Down Expand Up @@ -448,7 +452,11 @@ Trace::Log::Log(rpc::Trace::Log::Reader reader)
Trace::Exception::Exception(rpc::Trace::Exception::Reader reader)
: timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS),
name(kj::str(reader.getName())),
message(kj::str(reader.getMessage())) {}
message(kj::str(reader.getMessage())) {
if (reader.hasStack()) {
stack = kj::str(reader.getStack());
}
}

SpanBuilder& SpanBuilder::operator=(SpanBuilder &&other) {
end();
Expand Down Expand Up @@ -566,7 +574,8 @@ void WorkerTracer::log(kj::Date timestamp, LogLevel logLevel, kj::String message
trace->logs.add(timestamp, logLevel, kj::mv(message));
}

void WorkerTracer::addException(kj::Date timestamp, kj::String name, kj::String message) {
void WorkerTracer::addException(kj::Date timestamp, kj::String name, kj::String message,
kj::Maybe<kj::String> stack) {
if (trace->exceededExceptionLimit) {
return;
}
Expand All @@ -577,15 +586,19 @@ void WorkerTracer::addException(kj::Date timestamp, kj::String name, kj::String
return;
}
size_t newSize = trace->bytesUsed + sizeof(Trace::Exception) + name.size() + message.size();
KJ_IF_SOME(s, stack) {
newSize += s.size();
}
if (newSize > MAX_TRACE_BYTES) {
trace->exceededExceptionLimit = true;
trace->exceptions.add(
timestamp, kj::str("Error"),
kj::str("Trace resource limit exceeded; subsequent exceptions not recorded."));
kj::str("Trace resource limit exceeded; subsequent exceptions not recorded."),
kj::none);
return;
}
trace->bytesUsed = newSize;
trace->exceptions.add(timestamp, kj::mv(name), kj::mv(message));
trace->exceptions.add(timestamp, kj::mv(name), kj::mv(message), kj::mv(stack));
}

void WorkerTracer::addDiagnosticChannelEvent(kj::Date timestamp,
Expand Down
9 changes: 6 additions & 3 deletions src/workerd/io/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,8 @@ class Trace final : public kj::Refcounted {

class Exception {
public:
explicit Exception(kj::Date timestamp, kj::String name, kj::String message);
explicit Exception(kj::Date timestamp, kj::String name, kj::String message,
kj::Maybe<kj::String> stack);
Exception(rpc::Trace::Exception::Reader reader);
Exception(Exception&&) = default;
KJ_DISALLOW_COPY(Exception);
Expand All @@ -250,9 +251,10 @@ class Trace final : public kj::Refcounted {
kj::Date timestamp;

kj::String name;
// TODO(someday): record exception source, line/column number, stack trace?
kj::String message;

kj::Maybe<kj::String> stack;

void copyTo(rpc::Trace::Exception::Builder builder);
};

Expand Down Expand Up @@ -366,7 +368,8 @@ class WorkerTracer final : public kj::Refcounted {
// TODO(soon): Eventually:
//void setMetrics(...) // Or get from MetricsCollector::Request directly?

void addException(kj::Date timestamp, kj::String name, kj::String message);
void addException(kj::Date timestamp, kj::String name, kj::String message,
kj::Maybe<kj::String> stack);

void addDiagnosticChannelEvent(kj::Date timestamp, kj::String channel,
kj::Array<kj::byte> message);
Expand Down
1 change: 1 addition & 0 deletions src/workerd/io/worker-interface.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct Trace @0x8e8d911203762d34 {
timestampNs @0 :Int64;
name @1 :Text;
message @2 :Text;
stack @3 :Text;
}

outcome @2 :EventOutcome;
Expand Down
86 changes: 83 additions & 3 deletions src/workerd/io/worker.c++
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,12 @@ void addExceptionToTrace(jsg::Lock& js,
}

auto timestamp = ioContext.now();
auto error = KJ_REQUIRE_NONNULL(errorTypeHandler.tryUnwrap(js, exception),
"Should always be possible to unwrap error interface from an object.");
Worker::Api::ErrorInterface error;

if (exception.isObject()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the isObject() check is needed here because the deserialized result isn't necessarily an Object? (But would typically be an Object?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Annoyingly, JavaScript permits you to throw any value, not just objects. I think there was a bug here that caused us to fail to log non-object exceptions, but it wasn't noticed before because our exception tunneling would turn non-objects into objects. (Exception tunneling today always produces Error values even if the original wasn't.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, OK... that would explain why the current test cases for throw "string" were working without the Object check.

error = KJ_REQUIRE_NONNULL(errorTypeHandler.tryUnwrap(js, exception),
"Should always be possible to unwrap error interface from an object.");
}

kj::String name;
KJ_IF_SOME(n, error.name) {
Expand All @@ -160,9 +164,49 @@ void addExceptionToTrace(jsg::Lock& js,
kj::String message;
KJ_IF_SOME(m, error.message) {
message = kj::str(m);
} else {
// This doesn't appear to be an Error object. Fall back to stringifying the whole value as
// the message.
if (!js.v8Isolate->IsExecutionTerminating()) {
v8::TryCatch tryCatch(js.v8Isolate);
try {
message = exception.toString(js);
} catch (jsg::JsExceptionThrown&) {
// Failed to stringify.
//
// Note that we're intentionally not checking tryCatch.CanContinue() here, because we still
// want to continue even if the isolate has been terminated.
}
}
}

kj::Maybe<kj::String> stack;
KJ_IF_SOME(s, error.stack) {
kj::StringPtr slice = s;

// Normally `error.stack` repeats the error type and message first. We don't want send two
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kentonv — see internal thread on this, I think by stripping out here to be efficient in what gets sent across, we ended up making it such that the stack property in the Tail Worker ends up not including full stack trace?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know which internal thread you mean, and I don't see how the code here could possibly be stripping any part of the actual stack trace.

// copies of that to the trace so we'll strip it off.
if (slice.startsWith(name)) {
slice = slice.slice(name.size());
if (slice.startsWith(": "_kj)) {
slice = slice.slice(2);
}
}

if (slice.startsWith(message)) {
slice = slice.slice(message.size());
if (slice.startsWith("\n")) {
slice = slice.slice(1);
}
}

if (slice.size() > 0) {
stack = kj::str(slice);
}
}

// TODO(someday): Limit size of exception content?
tracer.addException(timestamp, kj::mv(name), kj::mv(message));
tracer.addException(timestamp, kj::mv(name), kj::mv(message), kj::mv(stack));
}

void reportStartupError(
Expand Down Expand Up @@ -1839,6 +1883,42 @@ void Worker::Lock::logUncaughtException(UncaughtExceptionSource source,
}
}

void Worker::Lock::logUncaughtException(UncaughtExceptionSource source,
kj::Exception&& exception) {
jsg::Lock& js = *this;

// If we have an attached serialized exception, deserialize it and log that instead, rather
// than try to reconstruct based on the KJ exception description.
//
// TODO(cleanup): Eventually, js.exceptionToJsValue() should do this internally, and then we
// should remove the code from here.
KJ_IF_SOME(serializedJsError, exception.getDetail(jsg::TUNNELED_EXCEPTION_DETAIL_ID)) {
if (!js.v8Isolate->IsExecutionTerminating()) {
kj::Maybe<jsg::JsValue> deserialized;

v8::TryCatch tryCatch(js.v8Isolate);
try {
jsg::Deserializer deser(js, serializedJsError);
deserialized = deser.readValue(js);
} catch (jsg::JsExceptionThrown&) {
// Failed to deserialize, we'll continue with exceptionToJsValue() instead.
//
// Note that we're intentionally not checking tryCatch.CanContinue() here, because we still
// want to log the exception even if the isolate has been terminated.
}

KJ_IF_SOME(d, deserialized) {
Copy link
Contributor

@jclee jclee Mar 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like exceptionToJsValue() does some additional property population, filtering, and logging of internal tunneled exceptions that would be skipped when deserialization is successful. I guess the assumption here is that if a valid serialized value is present, it didn't come from an internal tunneled exception? (Or that the other populated properties are not currently consumed by logUncaughtException()?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, logUncaughtException() doesn't inspect any of those extra properties today. Only type, message, and now stack.

logUncaughtException(source, d);
return;
}
}
}

// Couldn't deserialize an attached exception, so use `exceptionToJsValue()`.
auto jsError = js.exceptionToJsValue(kj::mv(exception));
logUncaughtException(source, jsError.getHandle(js));
}

void Worker::Lock::reportPromiseRejectEvent(v8::PromiseRejectMessage& message) {
getGlobalScope().emitPromiseRejection(
*this, message.GetEvent(),
Expand Down
12 changes: 11 additions & 1 deletion src/workerd/io/worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,8 @@ class Worker::Api {
struct ErrorInterface {
jsg::Optional<kj::String> name;
jsg::Optional<kj::String> message;
JSG_STRUCT(name, message);
jsg::Optional<kj::String> stack;
JSG_STRUCT(name, message, stack);
};
virtual const jsg::TypeHandler<ErrorInterface>&
getErrorInterfaceTypeHandler(jsg::Lock& lock) const = 0;
Expand Down Expand Up @@ -529,10 +530,19 @@ class Worker::Lock {
void logUncaughtException(kj::StringPtr description);

// Logs an exception to the debug console or trace, if active.
//
// If the caller already has a copy of the exception stack, it can pass this in as an
// optimization. This value will be passed along to the trace handler, if there is one, rather
// than querying the property from the exception itself. This is also useful in the case that
// the exception itself is not the original and the stack is missing.
void logUncaughtException(UncaughtExceptionSource source,
const jsg::JsValue& exception,
const jsg::JsMessage& message = jsg::JsMessage());

// Version that takes a kj::Exception. If it has a serialized JS error attached as a detail, that
// error may be extracted and used.
void logUncaughtException(UncaughtExceptionSource source, kj::Exception&& exception);

void reportPromiseRejectEvent(v8::PromiseRejectMessage& message);

// Checks for problems with the registered event handlers (such as that there are none) and
Expand Down
24 changes: 24 additions & 0 deletions src/workerd/jsg/util.c++
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "jsg.h" // can't include util.h directly due to weird cyclic dependency...
#include "setup.h"
#include "ser.h"
#include <kj/debug.h>
#include <stdlib.h>

Expand Down Expand Up @@ -265,6 +266,15 @@ kj::StringPtr extractTunneledExceptionDescription(kj::StringPtr message) {

v8::Local<v8::Value> makeInternalError(v8::Isolate* isolate, kj::Exception&& exception) {
auto desc = exception.getDescription();

// TODO(someday): Deserialize encoded V8 exception from
// exception.getDetail(TUNNELED_EXCEPTION_DETAIL_ID), if present. WARNING: We must think
// carefully about security in the case that the exception has passed between workers that
// don't trust each other. Perhaps we should explicitly remove the stack trace in this case.
// REMINDER: Worker::logUncaughtException() currently deserializes TUNNELED_EXCEPTION_DETAIL_ID
// in order to extract a full stack trace. Once we do it here, we can remove the code from
// there.

auto tunneledException = decodeTunneledException(isolate, desc);

if (tunneledException.isInternal) {
Expand Down Expand Up @@ -341,6 +351,20 @@ void throwInternalError(v8::Isolate* isolate, kj::Exception&& exception) {
}
}

void addExceptionDetail(Lock& js, kj::Exception& exception, v8::Local<v8::Value> handle) {
js.tryCatch([&]() {
Serializer ser(js, {
// Make sure we don't break compatibility if V8 introduces a new version. This vaule can
// be bumped to match the new version once all of production is updated to understand it.
.version = 15,
});
ser.write(js, JsValue(handle));
Copy link
Contributor

@jclee jclee Mar 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be doing anything to bound the amount of data added to an exception?

Edit: Oh, I guess the existing trace size bound would help limit negative effects, and I suppose V8 might have some internal bound on stack trace generation.

exception.setDetail(TUNNELED_EXCEPTION_DETAIL_ID, ser.release().data);
}, [](jsg::Value&& error) {
// Exception not serializable, ignore.
});
}

static kj::String typeErrorMessage(TypeErrorContext c, const char* expectedType) {
kj::String type;

Expand Down
7 changes: 7 additions & 0 deletions src/workerd/jsg/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

namespace workerd::jsg {

class Lock;

typedef unsigned int uint;

// When a C++ callback wishes to throw a JavaScript exception, it should first call
Expand Down Expand Up @@ -61,6 +63,11 @@ void throwInternalError(v8::Isolate* isolate, kj::StringPtr internalMessage);
// calls makeInternalError() and then tells the isolate to throw it.
void throwInternalError(v8::Isolate* isolate, kj::Exception&& exception);

constexpr kj::Exception::DetailTypeId TUNNELED_EXCEPTION_DETAIL_ID = 0xe8027292171b1646ull;

// Add a serialized copy of the exception value to the KJ exception, as a "detail".
void addExceptionDetail(Lock& js, kj::Exception& exception, v8::Local<v8::Value> handle);

struct TypeErrorContext {
enum Kind: uint8_t {
METHOD_ARGUMENT, // has type name, member (method) name, and argument index
Expand Down
Loading
Loading