From 01764a004f0728a7744d8e79f9b06eb7953c3d59 Mon Sep 17 00:00:00 2001 From: Kenton Varda Date: Wed, 27 Mar 2024 11:23:51 -0500 Subject: [PATCH] Add exception stack traces to trace/tail events. A new property, `stack`, contains the stack trace. To support this in the case of async exceptions (which have already been converted to KJ), exception tunneling now adds a serialized copy of the V8 error to the exception itself, as a "detail". For now, this is _only_ used by `logUncaughtException()`. In the future, though, when the exception propagates back into JavaScript, we could de-tunnel it by deserializing this detail, rather than based on the description string. However, there are several complexities that need to be solved first, so I have not tried to do that in this PR. Depends on: https://github.com/capnproto/capnproto/pull/1978 --- WORKSPACE | 6 +- src/workerd/api/trace.c++ | 7 ++- src/workerd/api/trace.h | 3 + src/workerd/io/io-context.c++ | 7 +-- src/workerd/io/trace.c++ | 25 ++++++-- src/workerd/io/trace.h | 8 ++- src/workerd/io/worker-interface.capnp | 1 + src/workerd/io/worker.c++ | 86 +++++++++++++++++++++++++- src/workerd/io/worker.h | 12 +++- src/workerd/jsg/util.c++ | 20 ++++++ src/workerd/jsg/util.h | 7 +++ src/workerd/jsg/value.h | 89 ++++++++++++++------------- 12 files changed, 209 insertions(+), 62 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 7db05f410fff..cb8ca5b85a82 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -39,10 +39,10 @@ apple_support_dependencies() http_archive( name = "capnp-cpp", - sha256 = "e9220640c1e6453e2d27cf512a97933bfa59a92e708b566719b59326778827fc", - strip_prefix = "capnproto-capnproto-eef235d/c++", + sha256 = "21ae72a199a995b72e9bd359d4815539158d93a15cf36e284ef201fde7338c3c", + strip_prefix = "capnproto-capnproto-8ba7a6e/c++", type = "tgz", - urls = ["https://github.com/capnproto/capnproto/tarball/eef235dfa7aa40087605538ee3942b75a8910787"], + urls = ["https://github.com/capnproto/capnproto/tarball/8ba7a6ef987eb56e8d41e7f11e080dabcbf2a04f"], ) http_archive( diff --git a/src/workerd/api/trace.c++ b/src/workerd/api/trace.c++ index 48f7d55670cd..ff5634c881ec 100644 --- a/src/workerd/api/trace.c++ +++ b/src/workerd/api/trace.c++ @@ -526,7 +526,8 @@ jsg::V8Ref TraceLog::getMessage(jsg::Lock& js) { TraceException::TraceException(const Trace& trace, const Trace::Exception& exception) : timestamp(getTraceExceptionTimestamp(exception)), name(kj::str(exception.name)), - message(kj::str(exception.message)) {} + message(kj::str(exception.message)), + stack(exception.stack.map([](kj::StringPtr s) { return kj::str(s); })) {} double TraceException::getTimestamp() { return timestamp; @@ -540,6 +541,10 @@ kj::StringPtr TraceException::getName() { return name; } +jsg::Optional TraceException::getStack(jsg::Lock& js) { + return stack; +} + TraceMetrics::TraceMetrics(uint cpuTime, uint wallTime) : cpuTime(cpuTime), wallTime(wallTime) {} jsg::Ref UnsafeTraceMetrics::fromTrace(jsg::Ref item) { diff --git a/src/workerd/api/trace.h b/src/workerd/api/trace.h index 2d98884b6713..697160d48e74 100644 --- a/src/workerd/api/trace.h +++ b/src/workerd/api/trace.h @@ -507,11 +507,13 @@ class TraceException final: public jsg::Object { double getTimestamp(); kj::StringPtr getName(); kj::StringPtr getMessage(); + jsg::Optional getStack(jsg::Lock& js); JSG_RESOURCE_TYPE(TraceException) { JSG_LAZY_READONLY_INSTANCE_PROPERTY(timestamp, getTimestamp); JSG_LAZY_READONLY_INSTANCE_PROPERTY(message, getMessage); JSG_LAZY_READONLY_INSTANCE_PROPERTY(name, getName); + JSG_LAZY_READONLY_INSTANCE_PROPERTY(stack, getStack); } void visitForMemoryInfo(jsg::MemoryTracker& tracker) const { @@ -523,6 +525,7 @@ class TraceException final: public jsg::Object { double timestamp; kj::String name; kj::String message; + kj::Maybe stack; }; class TraceMetrics final : public jsg::Object { diff --git a/src/workerd/io/io-context.c++ b/src/workerd/io/io-context.c++ index 1dc32bb3f565..07737da31674 100644 --- a/src/workerd/io/io-context.c++ +++ b/src/workerd/io/io-context.c++ @@ -364,10 +364,8 @@ void IoContext::logUncaughtExceptionAsync(UncaughtExceptionSource source, RunnableImpl(UncaughtExceptionSource source, kj::Exception&& exception) : source(source), exception(kj::mv(exception)) {} void run(Worker::Lock& lock) override { - jsg::Lock& js = lock; - auto error = js.exceptionToJsValue(kj::mv(exception)); // TODO(soon): Add logUncaughtException to jsg::Lock. - lock.logUncaughtException(source, error.getHandle(js)); + lock.logUncaughtException(source, kj::mv(exception)); } }; @@ -1086,7 +1084,8 @@ void IoContext::runImpl(Runnable& runnable, bool takePendingEvent, } else { if (tryCatch.Message().IsEmpty()) { // Should never happen, but check for it because otherwise V8 will crash. - KJ_LOG(ERROR, "tryCatch.Message() was empty even when not HasTerminated()??"); + KJ_LOG(ERROR, "tryCatch.Message() was empty even when not HasTerminated()??", + kj::getStackTrace()); JSG_FAIL_REQUIRE(Error, "(JavaScript exception with no message)"); } else { auto jsException = tryCatch.Exception(); diff --git a/src/workerd/io/trace.c++ b/src/workerd/io/trace.c++ index 1f96cb236894..156e69103098 100644 --- a/src/workerd/io/trace.c++ +++ b/src/workerd/io/trace.c++ @@ -227,8 +227,9 @@ Trace::Log::Log(kj::Date timestamp, LogLevel logLevel, kj::String message) logLevel(logLevel), message(kj::mv(message)) {} -Trace::Exception::Exception(kj::Date timestamp, kj::String name, kj::String message) - : timestamp(timestamp), name(kj::mv(name)), message(kj::mv(message)) {} +Trace::Exception::Exception(kj::Date timestamp, kj::String name, kj::String message, + kj::Maybe stack) + : timestamp(timestamp), name(kj::mv(name)), message(kj::mv(message)), stack(kj::mv(stack)) {} Trace::Trace(kj::Maybe stableId, kj::Maybe scriptName, kj::Maybe> scriptVersion, kj::Maybe dispatchNamespace, @@ -335,6 +336,9 @@ void Trace::Exception::copyTo(rpc::Trace::Exception::Builder builder) { builder.setTimestampNs((timestamp - kj::UNIX_EPOCH) / kj::NANOSECONDS); builder.setName(name); builder.setMessage(message); + KJ_IF_SOME(s, stack) { + builder.setStack(s); + } } void Trace::mergeFrom(rpc::Trace::Reader reader, PipelineLogLevel pipelineLogLevel) { @@ -420,7 +424,11 @@ Trace::Log::Log(rpc::Trace::Log::Reader reader) Trace::Exception::Exception(rpc::Trace::Exception::Reader reader) : timestamp(kj::UNIX_EPOCH + reader.getTimestampNs() * kj::NANOSECONDS), name(kj::str(reader.getName())), - message(kj::str(reader.getMessage())) {} + message(kj::str(reader.getMessage())) { + if (reader.hasStack()) { + stack = kj::str(reader.getStack()); + } +} SpanBuilder& SpanBuilder::operator=(SpanBuilder &&other) { end(); @@ -537,7 +545,8 @@ void WorkerTracer::log(kj::Date timestamp, LogLevel logLevel, kj::String message trace->logs.add(timestamp, logLevel, kj::mv(message)); } -void WorkerTracer::addException(kj::Date timestamp, kj::String name, kj::String message) { +void WorkerTracer::addException(kj::Date timestamp, kj::String name, kj::String message, + kj::Maybe stack) { if (trace->exceededExceptionLimit) { return; } @@ -548,15 +557,19 @@ void WorkerTracer::addException(kj::Date timestamp, kj::String name, kj::String return; } size_t newSize = trace->bytesUsed + sizeof(Trace::Exception) + name.size() + message.size(); + KJ_IF_SOME(s, stack) { + newSize += s.size(); + } if (newSize > MAX_TRACE_BYTES) { trace->exceededExceptionLimit = true; trace->exceptions.add( timestamp, kj::str("Error"), - kj::str("Trace resource limit exceeded; subsequent exceptions not recorded.")); + kj::str("Trace resource limit exceeded; subsequent exceptions not recorded."), + kj::none); return; } trace->bytesUsed = newSize; - trace->exceptions.add(timestamp, kj::mv(name), kj::mv(message)); + trace->exceptions.add(timestamp, kj::mv(name), kj::mv(message), kj::mv(stack)); } void WorkerTracer::addDiagnosticChannelEvent(kj::Date timestamp, diff --git a/src/workerd/io/trace.h b/src/workerd/io/trace.h index 7b016279332f..8f12552dee7e 100644 --- a/src/workerd/io/trace.h +++ b/src/workerd/io/trace.h @@ -229,7 +229,8 @@ class Trace final : public kj::Refcounted { class Exception { public: - explicit Exception(kj::Date timestamp, kj::String name, kj::String message); + explicit Exception(kj::Date timestamp, kj::String name, kj::String message, + kj::Maybe stack); Exception(rpc::Trace::Exception::Reader reader); Exception(Exception&&) = default; KJ_DISALLOW_COPY(Exception); @@ -242,6 +243,8 @@ class Trace final : public kj::Refcounted { // TODO(someday): record exception source, line/column number, stack trace? kj::String message; + kj::Maybe stack; + void copyTo(rpc::Trace::Exception::Builder builder); }; @@ -352,7 +355,8 @@ class WorkerTracer final : public kj::Refcounted { // TODO(soon): Eventually: //void setMetrics(...) // Or get from MetricsCollector::Request directly? - void addException(kj::Date timestamp, kj::String name, kj::String message); + void addException(kj::Date timestamp, kj::String name, kj::String message, + kj::Maybe stack); void addDiagnosticChannelEvent(kj::Date timestamp, kj::String channel, kj::Array message); diff --git a/src/workerd/io/worker-interface.capnp b/src/workerd/io/worker-interface.capnp index 3806de0968aa..df21985fc444 100644 --- a/src/workerd/io/worker-interface.capnp +++ b/src/workerd/io/worker-interface.capnp @@ -37,6 +37,7 @@ struct Trace @0x8e8d911203762d34 { timestampNs @0 :Int64; name @1 :Text; message @2 :Text; + stack @3 :Text; } outcome @2 :EventOutcome; diff --git a/src/workerd/io/worker.c++ b/src/workerd/io/worker.c++ index 1ca67c7c8952..d7068e0c95c9 100644 --- a/src/workerd/io/worker.c++ +++ b/src/workerd/io/worker.c++ @@ -148,8 +148,12 @@ void addExceptionToTrace(jsg::Lock& js, } auto timestamp = ioContext.now(); - auto error = KJ_REQUIRE_NONNULL(errorTypeHandler.tryUnwrap(js, exception), - "Should always be possible to unwrap error interface from an object."); + Worker::Api::ErrorInterface error; + + if (exception.isObject()) { + error = KJ_REQUIRE_NONNULL(errorTypeHandler.tryUnwrap(js, exception), + "Should always be possible to unwrap error interface from an object."); + } kj::String name; KJ_IF_SOME(n, error.name) { @@ -160,9 +164,49 @@ void addExceptionToTrace(jsg::Lock& js, kj::String message; KJ_IF_SOME(m, error.message) { message = kj::str(m); + } else { + // This doesn't appear to be an Error object. Fall back to stringifying the whole value as + // the message. + if (!js.v8Isolate->IsExecutionTerminating()) { + v8::TryCatch tryCatch(js.v8Isolate); + try { + message = exception.toString(js); + } catch (jsg::JsExceptionThrown&) { + // Failed to stringify. + // + // Note that we're intentionally not checking tryCatch.CanContinue() here, because we still + // want to continue even if the isolate has been terminated. + } + } + } + + kj::Maybe stack; + KJ_IF_SOME(s, error.stack) { + kj::StringPtr slice = s; + + // Normally `error.stack` repeats the error type and message first. We don't want send two + // copies of that to the trace so we'll strip it off. + if (slice.startsWith(name)) { + slice = slice.slice(name.size()); + if (slice.startsWith(": "_kj)) { + slice = slice.slice(2); + } + } + + if (slice.startsWith(message)) { + slice = slice.slice(message.size()); + if (slice.startsWith("\n")) { + slice = slice.slice(1); + } + } + + if (slice.size() > 0) { + stack = kj::str(slice); + } } + // TODO(someday): Limit size of exception content? - tracer.addException(timestamp, kj::mv(name), kj::mv(message)); + tracer.addException(timestamp, kj::mv(name), kj::mv(message), kj::mv(stack)); } void reportStartupError( @@ -1839,6 +1883,42 @@ void Worker::Lock::logUncaughtException(UncaughtExceptionSource source, } } +void Worker::Lock::logUncaughtException(UncaughtExceptionSource source, + kj::Exception&& exception) { + jsg::Lock& js = *this; + + // If we have an attached serialized exception, deserialize it and log that instead, rather + // than try to reconstruct based on the KJ exception description. + // + // TODO(cleanup): Eventually, js.exceptionToJsValue() should do this internally, and then we + // should remove the code from here. + KJ_IF_SOME(serializedJsError, exception.getDetail(jsg::TUNNELED_EXCEPTION_DETAIL_ID)) { + if (!js.v8Isolate->IsExecutionTerminating()) { + kj::Maybe deserialized; + + v8::TryCatch tryCatch(js.v8Isolate); + try { + jsg::Deserializer deser(js, serializedJsError); + deserialized = deser.readValue(js); + } catch (jsg::JsExceptionThrown&) { + // Failed to deserialize, we'll continue with exceptionToJsValue() instead. + // + // Note that we're intentionally not checking tryCatch.CanContinue() here, because we still + // want to log the exception even if the isolate has been terminated. + } + + KJ_IF_SOME(d, deserialized) { + logUncaughtException(source, d); + return; + } + } + } + + // Couldn't deserialize an attached exception, so use `exceptionToJsValue()`. + auto jsError = js.exceptionToJsValue(kj::mv(exception)); + logUncaughtException(source, jsError.getHandle(js)); +} + void Worker::Lock::reportPromiseRejectEvent(v8::PromiseRejectMessage& message) { getGlobalScope().emitPromiseRejection( *this, message.GetEvent(), diff --git a/src/workerd/io/worker.h b/src/workerd/io/worker.h index da4d73a489d8..27c10fb4baab 100644 --- a/src/workerd/io/worker.h +++ b/src/workerd/io/worker.h @@ -447,7 +447,8 @@ class Worker::Api { struct ErrorInterface { jsg::Optional name; jsg::Optional message; - JSG_STRUCT(name, message); + jsg::Optional stack; + JSG_STRUCT(name, message, stack); }; virtual const jsg::TypeHandler& getErrorInterfaceTypeHandler(jsg::Lock& lock) const = 0; @@ -529,10 +530,19 @@ class Worker::Lock { void logUncaughtException(kj::StringPtr description); // Logs an exception to the debug console or trace, if active. + // + // If the caller already has a copy of the exception stack, it can pass this in as an + // optimization. This value will be passed along to the trace handler, if there is one, rather + // than querying the property from the exception itself. This is also useful in the case that + // the exception itself is not the original and the stack is missing. void logUncaughtException(UncaughtExceptionSource source, const jsg::JsValue& exception, const jsg::JsMessage& message = jsg::JsMessage()); + // Version that takes a kj::Exception. If it has a serialized JS error attached as a detail, that + // error may be extracted and used. + void logUncaughtException(UncaughtExceptionSource source, kj::Exception&& exception); + void reportPromiseRejectEvent(v8::PromiseRejectMessage& message); // Checks for problems with the registered event handlers (such as that there are none) and diff --git a/src/workerd/jsg/util.c++ b/src/workerd/jsg/util.c++ index 3e65dc6ff500..07d3118b0521 100644 --- a/src/workerd/jsg/util.c++ +++ b/src/workerd/jsg/util.c++ @@ -4,6 +4,7 @@ #include "jsg.h" // can't include util.h directly due to weird cyclic dependency... #include "setup.h" +#include "ser.h" #include #include @@ -265,6 +266,15 @@ kj::StringPtr extractTunneledExceptionDescription(kj::StringPtr message) { v8::Local makeInternalError(v8::Isolate* isolate, kj::Exception&& exception) { auto desc = exception.getDescription(); + + // TODO(someday): Deserialize encoded V8 exception from + // exception.getDetail(TUNNELED_EXCEPTION_DETAIL_ID), if present. WARNING: We must think + // carefully about security in the case that the exception has passed between workers that + // don't trust each other. Perhaps we should explicitly remove the stack trace in this case. + // REMINDER: Worker::logUncaughtException() currently deserializes TUNNELED_EXCEPTION_DETAIL_ID + // in order to extract a full stack trace. Once we do it here, we can remove the code from + // there. + auto tunneledException = decodeTunneledException(isolate, desc); if (tunneledException.isInternal) { @@ -341,6 +351,16 @@ void throwInternalError(v8::Isolate* isolate, kj::Exception&& exception) { } } +void addExceptionDetail(Lock& js, kj::Exception& exception, v8::Local handle) { + js.tryCatch([&]() { + Serializer ser(js); + ser.write(js, JsValue(handle)); + exception.setDetail(TUNNELED_EXCEPTION_DETAIL_ID, ser.release().data); + }, [](jsg::Value&& error) { + // Exception not serializable, ignore. + }); +} + static kj::String typeErrorMessage(TypeErrorContext c, const char* expectedType) { kj::String type; diff --git a/src/workerd/jsg/util.h b/src/workerd/jsg/util.h index 400d87d3f7d8..e177b17998c1 100644 --- a/src/workerd/jsg/util.h +++ b/src/workerd/jsg/util.h @@ -17,6 +17,8 @@ namespace workerd::jsg { +class Lock; + typedef unsigned int uint; // When a C++ callback wishes to throw a JavaScript exception, it should first call @@ -61,6 +63,11 @@ void throwInternalError(v8::Isolate* isolate, kj::StringPtr internalMessage); // calls makeInternalError() and then tells the isolate to throw it. void throwInternalError(v8::Isolate* isolate, kj::Exception&& exception); +constexpr kj::Exception::DetailTypeId TUNNELED_EXCEPTION_DETAIL_ID = 0xe8027292171b1646ull; + +// Add a serialized copy of the exception value to the KJ exception, as a "detail". +void addExceptionDetail(Lock& js, kj::Exception& exception, v8::Local handle); + struct TypeErrorContext { enum Kind: uint8_t { METHOD_ARGUMENT, // has type name, member (method) name, and argument index diff --git a/src/workerd/jsg/value.h b/src/workerd/jsg/value.h index 22cd1a09e790..d81811c5e809 100644 --- a/src/workerd/jsg/value.h +++ b/src/workerd/jsg/value.h @@ -1285,53 +1285,58 @@ class ExceptionWrapper { // introduced, they'll need to be handled explicitly here also. auto& js = Lock::from(context->GetIsolate()); auto& wrapper = TypeWrapper::from(js.v8Isolate); - KJ_IF_SOME(domException, wrapper.tryUnwrap(context, handle, - (DOMException*)nullptr, - parentObject)) { - return KJ_EXCEPTION(FAILED, - kj::str("jsg.DOMException(", domException.getName(), "): ", - domException.getMessage())); - } else { + kj::Exception result = [&]() { + KJ_IF_SOME(domException, wrapper.tryUnwrap(context, handle, + (DOMException*)nullptr, + parentObject)) { + return KJ_EXCEPTION(FAILED, + kj::str("jsg.DOMException(", domException.getName(), "): ", + domException.getMessage())); + } else { - static const constexpr kj::StringPtr PREFIXES[] = { - // JavaScript intrinsic Error Types - "Error"_kj, - "RangeError"_kj, - "TypeError"_kj, - "SyntaxError"_kj, - "ReferenceError"_kj, - // WASM Error Types - "CompileError"_kj, - "LinkError"_kj, - "RuntimeError"_kj, - // JSG_RESOURCE_TYPE Error Types - "DOMException"_kj, - }; + static const constexpr kj::StringPtr PREFIXES[] = { + // JavaScript intrinsic Error Types + "Error"_kj, + "RangeError"_kj, + "TypeError"_kj, + "SyntaxError"_kj, + "ReferenceError"_kj, + // WASM Error Types + "CompileError"_kj, + "LinkError"_kj, + "RuntimeError"_kj, + // JSG_RESOURCE_TYPE Error Types + "DOMException"_kj, + }; - kj::String reason; - if (!handle->IsObject()) { - // if the argument isn't an object, it couldn't possibly be an Error. - reason = kj::str(JSG_EXCEPTION(Error) ": ", handle); - } else { - reason = kj::str(handle); - bool found = false; - // If the error message starts with a platform error type that we tunnel, - // prefix it with "jsg." - for (auto name: PREFIXES) { - if (reason.startsWith(name)) { - reason = kj::str("jsg.", reason); - found = true; - break; + kj::String reason; + if (!handle->IsObject()) { + // if the argument isn't an object, it couldn't possibly be an Error. + reason = kj::str(JSG_EXCEPTION(Error) ": ", handle); + } else { + reason = kj::str(handle); + bool found = false; + // If the error message starts with a platform error type that we tunnel, + // prefix it with "jsg." + for (auto name: PREFIXES) { + if (reason.startsWith(name)) { + reason = kj::str("jsg.", reason); + found = true; + break; + } + } + // Everything else should just come through as a normal error. + if (!found) { + reason = kj::str(JSG_EXCEPTION(Error) ": ", reason); } } - // Everything else should just come through as a normal error. - if (!found) { - reason = kj::str(JSG_EXCEPTION(Error) ": ", reason); - } + return kj::Exception(kj::Exception::Type::FAILED, __FILE__, __LINE__, + kj::mv(reason)); } - return kj::Exception(kj::Exception::Type::FAILED, __FILE__, __LINE__, - kj::mv(reason)); - } + }(); + + addExceptionDetail(js, result, handle); + return result; } };