From 9fb3aa466df0a5adde4b22d6ac1bd586d4be2523 Mon Sep 17 00:00:00 2001 From: Barbara Poon Date: Thu, 22 Dec 2022 13:09:56 -0500 Subject: [PATCH 1/4] Bump buildnumber.dat --- buildnumber.dat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildnumber.dat b/buildnumber.dat index d00491fd7e..0cfbf08886 100644 --- a/buildnumber.dat +++ b/buildnumber.dat @@ -1 +1 @@ -1 +2 From 5ab6b72ef2a314264dd8c67fafad3a500d0f5071 Mon Sep 17 00:00:00 2001 From: cce <51567+cce@users.noreply.github.com> Date: Thu, 22 Dec 2022 09:57:10 -0500 Subject: [PATCH 2/4] telemetry: don't report let asyncTelemetryHook wrap and log its own errors (#4932) --- logging/telemetryhook.go | 5 ++++- logging/telemetryhook_test.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/logging/telemetryhook.go b/logging/telemetryhook.go index b74d8a4475..4f7c55dc36 100644 --- a/logging/telemetryhook.go +++ b/logging/telemetryhook.go @@ -88,7 +88,7 @@ func createAsyncHookLevels(wrappedHook logrus.Hook, channelDepth uint, maxQueueD if entry != nil { err := hook.wrappedHook.Fire(entry) if err != nil { - Base().Warnf("Unable to write event %#v to telemetry : %v", entry, err) + Base().WithFields(Fields{"TelemetryError": true}).Warnf("Unable to write event %#v to telemetry : %v", entry, err) } hook.wg.Done() } else { @@ -146,6 +146,9 @@ func (hook *asyncTelemetryHook) waitForEventAndReady() bool { // Fire is required to implement logrus hook interface func (hook *asyncTelemetryHook) Fire(entry *logrus.Entry) error { + if _, ok := entry.Data["TelemetryError"]; ok { + return nil + } hook.wg.Add(1) select { case <-hook.quit: diff --git a/logging/telemetryhook_test.go b/logging/telemetryhook_test.go index 09d916cedb..40c25832f9 100644 --- a/logging/telemetryhook_test.go +++ b/logging/telemetryhook_test.go @@ -229,3 +229,34 @@ func TestAsyncTelemetryHook_QueueDepth(t *testing.T) { // be one higher then the maxDepth. require.LessOrEqual(t, hookEntries, maxDepth+1) } + +// Ensure that errors from inside the telemetryhook.go implementation are not reported to telemetry. +func TestAsyncTelemetryHook_SelfReporting(t *testing.T) { + partitiontest.PartitionTest(t) + t.Parallel() + + const entryCount = 100 + const maxDepth = 10 + + filling := make(chan struct{}) + + testHook := makeMockTelemetryHook(logrus.DebugLevel) + testHook.cb = func(entry *logrus.Entry) { + <-filling // Block while filling + } + + hook := createAsyncHook(&testHook, 100, 10) + hook.ready = true + for i := 0; i < entryCount; i++ { + selfEntry := logrus.Entry{ + Level: logrus.ErrorLevel, + Data: logrus.Fields{"TelemetryError": true}, + Message: "Unable to write event", + } + hook.Fire(&selfEntry) + } + close(filling) + hook.Close() + + require.Len(t, testHook.entries(), 0) +} From 8821d9493d6ef19fb8d6a4a5006318d9d35d7e8e Mon Sep 17 00:00:00 2001 From: Pavel Zbitskiy <65323360+algorandskiy@users.noreply.github.com> Date: Thu, 22 Dec 2022 09:57:26 -0500 Subject: [PATCH 3/4] log: stderr offending large log messages (#4930) --- logging/cyclicWriter.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/logging/cyclicWriter.go b/logging/cyclicWriter.go index d7c20e6cec..0c88955df3 100644 --- a/logging/cyclicWriter.go +++ b/logging/cyclicWriter.go @@ -21,6 +21,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "text/template" "time" @@ -124,6 +125,16 @@ func (cyclic *CyclicFileWriter) Write(p []byte) (n int, err error) { if uint64(len(p)) > cyclic.limit { // there's no hope for writing this entry to the log + + // for the large lines this is a clear indication something does wrong, dump data into stderr + const minDebugLogLineSize = 10 * 1024 * 1024 + if len(p) >= minDebugLogLineSize { + buf := make([]byte, 16*1024) + stlen := runtime.Stack(buf, false) + fmt.Fprintf(os.Stderr, "Attempt to write a large log line:\n%s\n", string(buf[:stlen])) + fmt.Fprintf(os.Stderr, "The offending line:\n%s\n", string(p[:4096])) + } + return 0, fmt.Errorf("CyclicFileWriter: input too long to write. Len = %v", len(p)) } From 425443af2ab2dff3fd3197b748b8c0ba7ac9bf2a Mon Sep 17 00:00:00 2001 From: cce <51567+cce@users.noreply.github.com> Date: Thu, 22 Dec 2022 12:55:37 -0500 Subject: [PATCH 4/4] count telemetry server errors (#4933) --- logging/telemetryhook.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/logging/telemetryhook.go b/logging/telemetryhook.go index 4f7c55dc36..88ef932332 100644 --- a/logging/telemetryhook.go +++ b/logging/telemetryhook.go @@ -26,7 +26,8 @@ import ( "github.com/algorand/go-algorand/util/metrics" ) -var telemetryDrops = metrics.MakeCounter(metrics.MetricName{Name: "algod_telemetry_drops_total", Description: "telemetry messages not sent to server"}) +var telemetryDrops = metrics.MakeCounter(metrics.MetricName{Name: "algod_telemetry_drops_total", Description: "telemetry messages dropped due to full queues"}) +var telemetryErrors = metrics.MakeCounter(metrics.MetricName{Name: "algod_telemetry_errs_total", Description: "telemetry messages dropped due to server error"}) func createAsyncHook(wrappedHook logrus.Hook, channelDepth uint, maxQueueDepth int) *asyncTelemetryHook { return createAsyncHookLevels(wrappedHook, channelDepth, maxQueueDepth, makeLevels(logrus.InfoLevel)) @@ -89,6 +90,7 @@ func createAsyncHookLevels(wrappedHook logrus.Hook, channelDepth uint, maxQueueD err := hook.wrappedHook.Fire(entry) if err != nil { Base().WithFields(Fields{"TelemetryError": true}).Warnf("Unable to write event %#v to telemetry : %v", entry, err) + telemetryErrors.Inc(nil) } hook.wg.Done() } else {