From 69aacdc78a4c1da35ac87f34d38c7de9d65198c1 Mon Sep 17 00:00:00 2001 From: Max Isom Date: Wed, 19 Jun 2024 10:17:30 -0700 Subject: [PATCH 1/3] [ENH] add panics to tracing --- Cargo.lock | 11 +++++++++++ rust/worker/Cargo.toml | 1 + rust/worker/src/lib.rs | 2 ++ 3 files changed, 14 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index f5dc7ea915a..36464039e21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4133,6 +4133,16 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-panic" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bf1298a179837099f9309243af3b554e840f7f67f65e9f55294913299bd4cc5" +dependencies = [ + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-subscriber" version = "0.3.18" @@ -4665,6 +4675,7 @@ dependencies = [ "tracing", "tracing-bunyan-formatter", "tracing-opentelemetry", + "tracing-panic", "tracing-subscriber", "uuid", ] diff --git a/rust/worker/Cargo.toml b/rust/worker/Cargo.toml index 18c84123e50..bf0bcbdb7ba 100644 --- a/rust/worker/Cargo.toml +++ b/rust/worker/Cargo.toml @@ -51,6 +51,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } opentelemetry = { version = "0.19.0", default-features = false, features = ["trace", "rt-tokio"] } opentelemetry-otlp = "0.12.0" shuttle = "0.7.1" +tracing-panic = "0.1.2" [dev-dependencies] diff --git a/rust/worker/src/lib.rs b/rust/worker/src/lib.rs index f043665ef9a..d11f37c9d28 100644 --- a/rust/worker/src/lib.rs +++ b/rust/worker/src/lib.rs @@ -42,6 +42,7 @@ pub async fn query_service_entrypoint() { &config.service_name, &config.otel_endpoint, ); + std::panic::set_hook(Box::new(tracing_panic::panic_hook)); let system: system::System = system::System::new(); let dispatcher = @@ -102,6 +103,7 @@ pub async fn compaction_service_entrypoint() { &config.service_name, &config.otel_endpoint, ); + std::panic::set_hook(Box::new(tracing_panic::panic_hook)); let system: system::System = system::System::new(); From 5fbb5ff8ea15e40ae5e8f7e2660835d11fa85e55 Mon Sep 17 00:00:00 2001 From: Max Isom Date: Wed, 19 Jun 2024 12:46:02 -0700 Subject: [PATCH 2/3] Make incremental Docker builds slightly faster --- rust/worker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/worker/Dockerfile b/rust/worker/Dockerfile index a4e3e7579b6..7ebebfd58db 100644 --- a/rust/worker/Dockerfile +++ b/rust/worker/Dockerfile @@ -34,9 +34,9 @@ RUN if [ "$RELEASE_MODE" = "1" ]; then mv target/release/query_service .; else m FROM debian:bookworm-slim as query_service +RUN apt-get update && apt-get install -y libssl-dev ca-certificates COPY --from=query_service_builder /chroma/query_service . COPY --from=query_service_builder /chroma/rust/worker/chroma_config.yaml . -RUN apt-get update && apt-get install -y libssl-dev ca-certificates ENTRYPOINT [ "./query_service" ] @@ -54,8 +54,8 @@ RUN if [ "$RELEASE_MODE" = "1" ]; then mv target/release/compaction_service .; e FROM debian:bookworm-slim as compaction_service +RUN apt-get update && apt-get install -y libssl-dev ca-certificates COPY --from=compaction_service_builder /chroma/compaction_service . COPY --from=compaction_service_builder /chroma/rust/worker/chroma_config.yaml . -RUN apt-get update && apt-get install -y libssl-dev ca-certificates ENTRYPOINT [ "./compaction_service" ] From dbbeb925647a0a46d1afd4a79ea7aba9d62ea52f Mon Sep 17 00:00:00 2001 From: Max Isom Date: Wed, 19 Jun 2024 12:46:48 -0700 Subject: [PATCH 3/3] Centralize panic handling --- Cargo.lock | 11 ------- rust/worker/Cargo.toml | 1 - rust/worker/src/lib.rs | 2 -- rust/worker/src/server.rs | 30 +++++++++++++------ .../src/tracing/opentelemetry_config.rs | 24 +++++++++++++++ 5 files changed, 45 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 931f199d083..4e882da95e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4156,16 +4156,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "tracing-panic" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bf1298a179837099f9309243af3b554e840f7f67f65e9f55294913299bd4cc5" -dependencies = [ - "tracing", - "tracing-subscriber", -] - [[package]] name = "tracing-subscriber" version = "0.3.18" @@ -4700,7 +4690,6 @@ dependencies = [ "tracing", "tracing-bunyan-formatter", "tracing-opentelemetry", - "tracing-panic", "tracing-subscriber", "uuid", ] diff --git a/rust/worker/Cargo.toml b/rust/worker/Cargo.toml index 8505fde9caf..f1862a1e7ff 100644 --- a/rust/worker/Cargo.toml +++ b/rust/worker/Cargo.toml @@ -54,7 +54,6 @@ opentelemetry = { version = "0.19.0", default-features = false, features = [ ] } opentelemetry-otlp = "0.12.0" shuttle = "0.7.1" -tracing-panic = "0.1.2" regex = "1.10.5" [dev-dependencies] diff --git a/rust/worker/src/lib.rs b/rust/worker/src/lib.rs index d11f37c9d28..f043665ef9a 100644 --- a/rust/worker/src/lib.rs +++ b/rust/worker/src/lib.rs @@ -42,7 +42,6 @@ pub async fn query_service_entrypoint() { &config.service_name, &config.otel_endpoint, ); - std::panic::set_hook(Box::new(tracing_panic::panic_hook)); let system: system::System = system::System::new(); let dispatcher = @@ -103,7 +102,6 @@ pub async fn compaction_service_entrypoint() { &config.service_name, &config.otel_endpoint, ); - std::panic::set_hook(Box::new(tracing_panic::panic_hook)); let system: system::System = system::System::new(); diff --git a/rust/worker/src/server.rs b/rust/worker/src/server.rs index 4ec705532a5..5d66bfc8da5 100644 --- a/rust/worker/src/server.rs +++ b/rust/worker/src/server.rs @@ -532,18 +532,30 @@ impl chroma_proto::metadata_reader_server::MetadataReader for WorkerServer { impl chroma_proto::debug_server::Debug for WorkerServer { async fn get_info( &self, - _: Request<()>, + request: Request<()>, ) -> Result, Status> { - let response = chroma_proto::GetInfoResponse { - version: option_env!("CARGO_PKG_VERSION") - .unwrap_or("unknown") - .to_string(), - }; - Ok(Response::new(response)) + // Note: We cannot write a middleware that instruments every service rpc + // with a span because of https://github.com/hyperium/tonic/pull/1202. + let request_span = trace_span!("Get info"); + + wrap_span_with_parent_context(request_span, request.metadata()).in_scope(|| { + let response = chroma_proto::GetInfoResponse { + version: option_env!("CARGO_PKG_VERSION") + .unwrap_or("unknown") + .to_string(), + }; + Ok(Response::new(response)) + }) } - async fn trigger_panic(&self, _: Request<()>) -> Result, Status> { - panic!("Intentional panic triggered"); + async fn trigger_panic(&self, request: Request<()>) -> Result, Status> { + // Note: We cannot write a middleware that instruments every service rpc + // with a span because of https://github.com/hyperium/tonic/pull/1202. + let request_span = trace_span!("Trigger panic"); + + wrap_span_with_parent_context(request_span, request.metadata()).in_scope(|| { + panic!("Intentional panic triggered"); + }) } } diff --git a/rust/worker/src/tracing/opentelemetry_config.rs b/rust/worker/src/tracing/opentelemetry_config.rs index 9a3a101b3af..f00ad53a8d9 100644 --- a/rust/worker/src/tracing/opentelemetry_config.rs +++ b/rust/worker/src/tracing/opentelemetry_config.rs @@ -49,4 +49,28 @@ pub(crate) fn init_otel_tracing(service_name: &String, otel_endpoint: &String) { tracing::subscriber::set_global_default(subscriber) .expect("Set global default subscriber failed"); println!("Set global subscriber for {}", service_name); + + // Add panics to tracing + let prev_hook = std::panic::take_hook(); + std::panic::set_hook(Box::new(move |panic_info| { + let payload = panic_info.payload(); + + #[allow(clippy::manual_map)] + let payload = if let Some(s) = payload.downcast_ref::<&str>() { + Some(&**s) + } else if let Some(s) = payload.downcast_ref::() { + Some(s.as_str()) + } else { + None + }; + + tracing::error!( + panic.payload = payload, + panic.location = panic_info.location().map(|l| l.to_string()), + panic.backtrace = tracing::field::display(std::backtrace::Backtrace::capture()), + "A panic occurred" + ); + + prev_hook(panic_info); + })); }