From f42f63271b783c763cf3b5571411943df1c137fd Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Tue, 29 Oct 2024 09:44:07 -0400 Subject: [PATCH] Create global log redaction capability (#3522) Some source use client libraries that can emit errors that contain sensitive information - in particular, git-facing libraries that embed tokens into repository URLs. This PR introduces a way of redacting them - starting with GitLab (where we've seen this most recently), but in theory extensible to other sources as needed. This implementation uses a custom zap core; this might also be possible with a custom zap encoder, but I didn't test it out. (The deleted core.go file was entirely unused.) --- main.go | 14 ++-- pkg/log/core.go | 41 ------------ pkg/log/dynamic_redactor.go | 50 ++++++++++++++ pkg/log/level.go | 1 - pkg/log/log.go | 30 ++++++--- pkg/log/log_test.go | 125 ++++++++++++++++++++++++++++++++++- pkg/log/redaction_core.go | 42 ++++++++++++ pkg/sources/gitlab/gitlab.go | 4 ++ 8 files changed, 247 insertions(+), 60 deletions(-) delete mode 100644 pkg/log/core.go create mode 100644 pkg/log/dynamic_redactor.go create mode 100644 pkg/log/redaction_core.go diff --git a/main.go b/main.go index f5b629fb7ae6..a1a1510d46e0 100644 --- a/main.go +++ b/main.go @@ -27,10 +27,10 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/config" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/engine" + "github.com/trufflesecurity/trufflehog/v3/pkg/feature" "github.com/trufflesecurity/trufflehog/v3/pkg/handlers" "github.com/trufflesecurity/trufflehog/v3/pkg/log" "github.com/trufflesecurity/trufflehog/v3/pkg/output" - "github.com/trufflesecurity/trufflehog/v3/pkg/feature" "github.com/trufflesecurity/trufflehog/v3/pkg/sources" "github.com/trufflesecurity/trufflehog/v3/pkg/tui" "github.com/trufflesecurity/trufflehog/v3/pkg/updater" @@ -72,10 +72,10 @@ var ( jobReportFile = cli.Flag("output-report", "Write a scan report to the provided path.").Hidden().OpenFile(os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666) // Add feature flags - forceSkipBinaries = cli.Flag("force-skip-binaries", "Force skipping binaries.").Bool() - forceSkipArchives = cli.Flag("force-skip-archives", "Force skipping archives.").Bool() + forceSkipBinaries = cli.Flag("force-skip-binaries", "Force skipping binaries.").Bool() + forceSkipArchives = cli.Flag("force-skip-archives", "Force skipping archives.").Bool() skipAdditionalRefs = cli.Flag("skip-additional-refs", "Skip additional references.").Bool() - userAgentSuffix = cli.Flag("user-agent-suffix", "Suffix to add to User-Agent.").String() + userAgentSuffix = cli.Flag("user-agent-suffix", "Suffix to add to User-Agent.").String() gitScan = cli.Command("git", "Find credentials in git repositories.") gitScanURI = gitScan.Arg("uri", "Git repository URL. https://, file://, or ssh:// schema expected.").Required().String() @@ -285,7 +285,7 @@ func main() { if *jsonOut { logFormat = log.WithJSONSink } - logger, sync := log.New("trufflehog", logFormat(os.Stderr)) + logger, sync := log.New("trufflehog", logFormat(os.Stderr, log.WithGlobalRedaction())) // make it the default logger for contexts context.SetDefaultLogger(logger) @@ -375,7 +375,7 @@ func run(state overseer.State) { }() } - // Set feature configurations from CLI flags + // Set feature configurations from CLI flags if *forceSkipBinaries { feature.ForceSkipBinaries.Store(true) } @@ -383,7 +383,7 @@ func run(state overseer.State) { if *forceSkipArchives { feature.ForceSkipArchives.Store(true) } - + if *skipAdditionalRefs { feature.SkipAdditionalRefs.Store(true) } diff --git a/pkg/log/core.go b/pkg/log/core.go deleted file mode 100644 index 49a961c8ce80..000000000000 --- a/pkg/log/core.go +++ /dev/null @@ -1,41 +0,0 @@ -package log - -import ( - "go.uber.org/zap/zapcore" -) - -type levelFilterCore struct { - core zapcore.Core - level zapcore.LevelEnabler -} - -// NewLevelCore creates a core that can be used to independently control the -// level of an existing Core. This is essentially a filter that will only log -// if both the parent and the wrapper cores are enabled. -func NewLevelCore(core zapcore.Core, level zapcore.LevelEnabler) zapcore.Core { - return &levelFilterCore{core, level} -} - -func (c *levelFilterCore) Enabled(lvl zapcore.Level) bool { - return c.level.Enabled(lvl) -} - -func (c *levelFilterCore) With(fields []zapcore.Field) zapcore.Core { - return &levelFilterCore{c.core.With(fields), c.level} -} - -func (c *levelFilterCore) Check(ent zapcore.Entry, ce *zapcore.CheckedEntry) *zapcore.CheckedEntry { - if !c.Enabled(ent.Level) { - return ce - } - - return c.core.Check(ent, ce) -} - -func (c *levelFilterCore) Write(ent zapcore.Entry, fields []zapcore.Field) error { - return c.core.Write(ent, fields) -} - -func (c *levelFilterCore) Sync() error { - return c.core.Sync() -} diff --git a/pkg/log/dynamic_redactor.go b/pkg/log/dynamic_redactor.go new file mode 100644 index 000000000000..34ed0cbb3f72 --- /dev/null +++ b/pkg/log/dynamic_redactor.go @@ -0,0 +1,50 @@ +package log + +import ( + "strings" + "sync" + "sync/atomic" +) + +type dynamicRedactor struct { + denySet map[string]struct{} + denySlice []string + denyMu sync.Mutex + + replacer atomic.Pointer[strings.Replacer] +} + +var globalRedactor *dynamicRedactor + +func init() { + globalRedactor = &dynamicRedactor{denySet: make(map[string]struct{})} + globalRedactor.replacer.CompareAndSwap(nil, strings.NewReplacer()) +} + +// RedactGlobally configures the global log redactor to redact the provided value during log emission. The value will be +// redacted in log messages and values that are strings, but not in log keys or values of other types. +func RedactGlobally(sensitiveValue string) { + globalRedactor.configureForRedaction(sensitiveValue) +} + +func (r *dynamicRedactor) configureForRedaction(sensitiveValue string) { + if sensitiveValue == "" { + return + } + + r.denyMu.Lock() + defer r.denyMu.Unlock() + + if _, ok := r.denySet[sensitiveValue]; ok { + return + } + + r.denySet[sensitiveValue] = struct{}{} + r.denySlice = append(r.denySlice, sensitiveValue, "*****") + + r.replacer.Store(strings.NewReplacer(r.denySlice...)) +} + +func (r *dynamicRedactor) redact(s string) string { + return r.replacer.Load().Replace(s) +} diff --git a/pkg/log/level.go b/pkg/log/level.go index 99dae3cba653..d3c23b078f76 100644 --- a/pkg/log/level.go +++ b/pkg/log/level.go @@ -8,7 +8,6 @@ import ( "go.uber.org/zap/zapcore" ) -// TODO: Use a struct to make testing easier. var ( // Global, default log level control. globalLogLevel levelSetter = zap.NewAtomicLevel() diff --git a/pkg/log/log.go b/pkg/log/log.go index 41fbcae08478..35f6b10b9427 100644 --- a/pkg/log/log.go +++ b/pkg/log/log.go @@ -83,9 +83,10 @@ func WithSentry(opts sentry.ClientOptions, tags map[string]string) logConfig { } type sinkConfig struct { - encoder zapcore.Encoder - sink zapcore.WriteSyncer - level levelSetter + encoder zapcore.Encoder + sink zapcore.WriteSyncer + level levelSetter + redactor *dynamicRedactor } // WithJSONSink adds a JSON encoded output to the logger. @@ -176,6 +177,13 @@ func WithLeveler(leveler levelSetter) func(*sinkConfig) { } } +// WithGlobalRedaction adds values to be redacted from logs. +func WithGlobalRedaction() func(*sinkConfig) { + return func(conf *sinkConfig) { + conf.redactor = globalRedactor + } +} + // firstErrorFunc is a helper function that returns a function that executes // all provided args and returns the first error, if any. func firstErrorFunc(fs ...func() error) func() error { @@ -209,11 +217,15 @@ func newCoreConfig( for _, f := range opts { f(&conf) } - return logConfig{ - core: zapcore.NewCore( - conf.encoder, - conf.sink, - conf.level, - ), + core := zapcore.NewCore( + conf.encoder, + conf.sink, + conf.level, + ) + + if conf.redactor == nil { + return logConfig{core: core} } + + return logConfig{core: NewRedactionCore(core, conf.redactor)} } diff --git a/pkg/log/log_test.go b/pkg/log/log_test.go index b158ce2d975d..f1dced5e4d3c 100644 --- a/pkg/log/log_test.go +++ b/pkg/log/log_test.go @@ -10,14 +10,15 @@ import ( "github.com/getsentry/sentry-go" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.uber.org/zap" ) func TestNew(t *testing.T) { var jsonBuffer, consoleBuffer bytes.Buffer logger, flush := New("service-name", - WithJSONSink(&jsonBuffer), - WithConsoleSink(&consoleBuffer), + WithJSONSink(&jsonBuffer, WithGlobalRedaction()), + WithConsoleSink(&consoleBuffer, WithGlobalRedaction()), ) logger.Info("yay") assert.Nil(t, flush()) @@ -233,3 +234,123 @@ func TestFindLevel(t *testing.T) { assert.Equal(t, i8, findLevel(logger)) } } + +func TestGlobalRedaction_Console(t *testing.T) { + oldState := globalRedactor + globalRedactor = &dynamicRedactor{ + denySet: make(map[string]struct{}), + } + defer func() { globalRedactor = oldState }() + + var buf bytes.Buffer + logger, flush := New("console-redaction-test", + WithConsoleSink(&buf, WithGlobalRedaction()), + ) + RedactGlobally("foo") + RedactGlobally("bar") + + logger.Info("this foo is :bar", + "foo", "bar", + "array", []string{"foo", "bar", "baz"}, + "object", map[string]string{"foo": "bar"}) + require.NoError(t, flush()) + + gotParts := strings.Split(buf.String(), "\t")[1:] // The first item is the timestamp + wantParts := []string{ + "info-0", + "console-redaction-test", + "this ***** is :*****", + "{\"foo\": \"*****\", \"array\": [\"foo\", \"bar\", \"baz\"], \"object\": {\"foo\":\"bar\"}}\n", + } + assert.Equal(t, wantParts, gotParts) +} + +func TestGlobalRedaction_JSON(t *testing.T) { + oldState := globalRedactor + globalRedactor = &dynamicRedactor{ + denySet: make(map[string]struct{}), + } + defer func() { globalRedactor = oldState }() + + var jsonBuffer bytes.Buffer + logger, flush := New("json-redaction-test", + WithJSONSink(&jsonBuffer, WithGlobalRedaction()), + ) + RedactGlobally("foo") + RedactGlobally("bar") + logger.Info("this foo is :bar", + "foo", "bar", + "array", []string{"foo", "bar", "baz"}, + "object", map[string]string{"foo": "bar"}) + require.NoError(t, flush()) + + var parsedJSON map[string]any + require.NoError(t, json.Unmarshal(jsonBuffer.Bytes(), &parsedJSON)) + assert.NotEmpty(t, parsedJSON["ts"]) + delete(parsedJSON, "ts") + assert.Equal(t, + map[string]any{ + "level": "info-0", + "logger": "json-redaction-test", + "msg": "this ***** is :*****", + "foo": "*****", + "array": []any{"foo", "bar", "baz"}, + "object": map[string]interface{}{"foo": "bar"}, + }, + parsedJSON, + ) +} + +func BenchmarkLoggerRedact(b *testing.B) { + msg := "this is a message with 'foo' in it" + logKvps := []any{"key", "value", "foo", "bar", "bar", "baz", "longval", "84hblnqwp97ewilbgoab8fhqlngahs6dl3i269haa"} + redactor := &dynamicRedactor{denySet: make(map[string]struct{})} + redactor.replacer.CompareAndSwap(nil, strings.NewReplacer()) + + b.Run("no redaction", func(b *testing.B) { + logger, flush := New("redaction-benchmark", WithJSONSink( + io.Discard, + func(conf *sinkConfig) { conf.redactor = redactor }, + )) + for i := 0; i < b.N; i++ { + logger.Info(msg, logKvps...) + } + require.NoError(b, flush()) + }) + b.Run("1 redaction", func(b *testing.B) { + logger, flush := New("redaction-benchmark", WithJSONSink( + io.Discard, + func(conf *sinkConfig) { conf.redactor = redactor }, + )) + redactor.configureForRedaction("84hblnqwp97ewilbgoab8fhqlngahs6dl3i269haa") + for i := 0; i < b.N; i++ { + logger.Info(msg, logKvps...) + } + require.NoError(b, flush()) + }) + b.Run("2 redactions", func(b *testing.B) { + logger, flush := New("redaction-benchmark", WithJSONSink( + io.Discard, + func(conf *sinkConfig) { conf.redactor = redactor }, + )) + redactor.configureForRedaction("84hblnqwp97ewilbgoab8fhqlngahs6dl3i269haa") + redactor.configureForRedaction("foo") + for i := 0; i < b.N; i++ { + logger.Info(msg, logKvps...) + } + require.NoError(b, flush()) + }) + b.Run("3 redactions", func(b *testing.B) { + logger, flush := New("redaction-benchmark", WithJSONSink( + io.Discard, + func(conf *sinkConfig) { conf.redactor = redactor }, + )) + redactor.configureForRedaction("84hblnqwp97ewilbgoab8fhqlngahs6dl3i269haa") + redactor.configureForRedaction("foo") + redactor.configureForRedaction("bar") + for i := 0; i < b.N; i++ { + logger.Info(msg, logKvps...) + } + require.NoError(b, flush()) + }) +} diff --git a/pkg/log/redaction_core.go b/pkg/log/redaction_core.go new file mode 100644 index 000000000000..11237ac518c4 --- /dev/null +++ b/pkg/log/redaction_core.go @@ -0,0 +1,42 @@ +package log + +import ( + "go.uber.org/zap/zapcore" +) + +// redactionCore wraps a zapcore.Core to perform redaction of log messages in +// the message and field values. +type redactionCore struct { + zapcore.Core + redactor *dynamicRedactor +} + +// NewRedactionCore creates a zapcore.Core that performs redaction of logs in +// the message and field values. +func NewRedactionCore(core zapcore.Core, redactor *dynamicRedactor) zapcore.Core { + return &redactionCore{core, redactor} +} + +// Check overrides the embedded zapcore.Core Check() method to add the +// redactionCore to the zapcore.CheckedEntry. +func (c *redactionCore) Check(ent zapcore.Entry, ce *zapcore.CheckedEntry) *zapcore.CheckedEntry { + if c.Enabled(ent.Level) { + return ce.AddCore(ent, c) + } + return ce +} + +func (c *redactionCore) With(fields []zapcore.Field) zapcore.Core { + return NewRedactionCore(c.Core.With(fields), c.redactor) +} + +// Write overrides the embedded zapcore.Core Write() method to redact the message and fields before passing them to be +// written. Only message and string values are redacted; keys and non-string values (e.g. those inside of arrays and +// structured objects) are not redacted. +func (c *redactionCore) Write(ent zapcore.Entry, fields []zapcore.Field) error { + ent.Message = c.redactor.redact(ent.Message) + for i := range fields { + fields[i].String = c.redactor.redact(fields[i].String) + } + return c.Core.Write(ent, fields) +} diff --git a/pkg/sources/gitlab/gitlab.go b/pkg/sources/gitlab/gitlab.go index 38030d8f2934..dd1aaba345d0 100644 --- a/pkg/sources/gitlab/gitlab.go +++ b/pkg/sources/gitlab/gitlab.go @@ -8,6 +8,7 @@ import ( "strings" "sync" + "github.com/trufflesecurity/trufflehog/v3/pkg/log" "golang.org/x/sync/errgroup" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -108,9 +109,11 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou case *sourcespb.GitLab_Token: s.authMethod = "TOKEN" s.token = cred.Token + log.RedactGlobally(s.token) case *sourcespb.GitLab_Oauth: s.authMethod = "OAUTH" s.token = cred.Oauth.RefreshToken + log.RedactGlobally(s.token) // TODO: is it okay if there is no client id and secret? Might be an issue when marshalling config to proto case *sourcespb.GitLab_BasicAuth: s.authMethod = "BASIC_AUTH" @@ -118,6 +121,7 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou s.password = cred.BasicAuth.Password // We may need the password as a token if the user is using an access_token with basic auth. s.token = cred.BasicAuth.Password + log.RedactGlobally(cred.BasicAuth.Password) default: return fmt.Errorf("invalid configuration given for source %q (%s)", name, s.Type().String()) }