From ca7c4b3c6ac250a0e0d6d54e5e7e969e27cfcb47 Mon Sep 17 00:00:00 2001 From: Ashmita Date: Fri, 13 Nov 2020 07:18:53 +0800 Subject: [PATCH] Implement anonymizer's main program (#2621) --- .gitignore | 2 + Makefile | 4 + cmd/anonymizer/app/anonymizer/anonymizer.go | 32 ++++--- .../app/anonymizer/anonymizer_test.go | 20 +++-- cmd/anonymizer/app/flags.go | 89 +++++++++++++++++++ cmd/anonymizer/app/flags_test.go | 62 +++++++++++++ cmd/anonymizer/app/query/.nocover | 1 + cmd/anonymizer/app/query/query.go | 88 ++++++++++++++++++ cmd/anonymizer/app/writer/writer.go | 34 ++++--- cmd/anonymizer/main.go | 66 +++++++++++++- 10 files changed, 363 insertions(+), 35 deletions(-) create mode 100644 cmd/anonymizer/app/flags.go create mode 100644 cmd/anonymizer/app/flags_test.go create mode 100644 cmd/anonymizer/app/query/.nocover create mode 100644 cmd/anonymizer/app/query/query.go diff --git a/.gitignore b/.gitignore index 3ab8672ab4b..471f3c91849 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ examples/memstore-plugin/memstore-plugin cmd/all-in-one/all-in-one-* cmd/agent/agent cmd/agent/agent-* +cmd/anonymizer/anonymizer +cmd/anonymizer/anonymizer-* cmd/collector/collector cmd/collector/collector-* cmd/ingester/ingester diff --git a/Makefile b/Makefile index 61bc175cd59..469697ead79 100644 --- a/Makefile +++ b/Makefile @@ -250,6 +250,10 @@ build-all-in-one build-all-in-one-debug: build-ui elasticsearch-mappings build-agent build-agent-debug: $(GOBUILD) $(DISABLE_OPTIMIZATIONS) -o ./cmd/agent/agent$(SUFFIX)-$(GOOS)-$(GOARCH) $(BUILD_INFO) ./cmd/agent/main.go +.PHONY: build-anonymizer +build-anonymizer: + $(GOBUILD) $(DISABLE_OPTIMIZATIONS) -o ./cmd/anonymizer/anonymizer$(SUFFIX)-$(GOOS)-$(GOARCH) $(BUILD_INFO) ./cmd/anonymizer/main.go + .PHONY: build-query build-query-debug build-query build-query-debug: build-ui $(GOBUILD) $(DISABLE_OPTIMIZATIONS) -tags ui -o ./cmd/query/query$(SUFFIX)-$(GOOS)-$(GOARCH) $(BUILD_INFO) ./cmd/query/main.go diff --git a/cmd/anonymizer/app/anonymizer/anonymizer.go b/cmd/anonymizer/app/anonymizer/anonymizer.go index 9996900b518..13d8dd17737 100644 --- a/cmd/anonymizer/app/anonymizer/anonymizer.go +++ b/cmd/anonymizer/app/anonymizer/anonymizer.go @@ -53,19 +53,24 @@ type mapping struct { // // The mapping from original to obfuscated strings is stored in a file and can be reused between runs. type Anonymizer struct { - mappingFile string - logger *zap.Logger - lock sync.Mutex - mapping mapping - hashStandardTags bool - hashCustomTags bool - hashLogs bool - hashProcess bool + mappingFile string + logger *zap.Logger + lock sync.Mutex + mapping mapping + options Options +} + +// Options represents the various options with which the anonymizer can be configured. +type Options struct { + HashStandardTags bool `yaml:"hash_standard_tags" name:"hash_standard_tags"` + HashCustomTags bool `yaml:"hash_custom_tags" name:"hash_custom_tags"` + HashLogs bool `yaml:"hash_logs" name:"hash_logs"` + HashProcess bool `yaml:"hash_process" name:"hash_process"` } // New creates new Anonymizer. The mappingFile stores the mapping from original to // obfuscated strings, in case later investigations require looking at the original traces. -func New(mappingFile string, logger *zap.Logger) *Anonymizer { +func New(mappingFile string, options Options, logger *zap.Logger) *Anonymizer { a := &Anonymizer{ mappingFile: mappingFile, logger: logger, @@ -73,6 +78,7 @@ func New(mappingFile string, logger *zap.Logger) *Anonymizer { Services: make(map[string]string), Operations: make(map[string]string), }, + options: options, } if _, err := os.Stat(filepath.Clean(mappingFile)); err == nil { dat, err := ioutil.ReadFile(filepath.Clean(mappingFile)) @@ -142,18 +148,18 @@ func (a *Anonymizer) AnonymizeSpan(span *model.Span) *uimodel.Span { outputTags := filterStandardTags(span.Tags) // when true, the allowedTags are hashed and when false they are preserved as it is - if a.hashStandardTags { + if a.options.HashStandardTags { outputTags = hashTags(outputTags) } // when true, all tags other than allowedTags are hashed, when false they are dropped - if a.hashCustomTags { + if a.options.HashCustomTags { customTags := hashTags(filterCustomTags(span.Tags)) outputTags = append(outputTags, customTags...) } span.Tags = outputTags // when true, logs are hashed, when false, they are dropped - if a.hashLogs { + if a.options.HashLogs { for _, log := range span.Logs { log.Fields = hashTags(log.Fields) } @@ -164,7 +170,7 @@ func (a *Anonymizer) AnonymizeSpan(span *model.Span) *uimodel.Span { span.Process.ServiceName = a.mapServiceName(service) // when true, process tags are hashed, when false they are dropped - if a.hashProcess { + if a.options.HashProcess { span.Process.Tags = hashTags(span.Process.Tags) } else { span.Process.Tags = nil diff --git a/cmd/anonymizer/app/anonymizer/anonymizer_test.go b/cmd/anonymizer/app/anonymizer/anonymizer_test.go index d89279b37fb..94e35f7903f 100644 --- a/cmd/anonymizer/app/anonymizer/anonymizer_test.go +++ b/cmd/anonymizer/app/anonymizer/anonymizer_test.go @@ -103,10 +103,12 @@ func TestAnonymizer_AnonymizeSpan_AllTrue(t *testing.T) { Services: make(map[string]string), Operations: make(map[string]string), }, - hashStandardTags: true, - hashCustomTags: true, - hashProcess: true, - hashLogs: true, + options: Options{ + HashStandardTags: true, + HashCustomTags: true, + HashProcess: true, + HashLogs: true, + }, } _ = anonymizer.AnonymizeSpan(span1) assert.Equal(t, 3, len(span1.Tags)) @@ -120,10 +122,12 @@ func TestAnonymizer_AnonymizeSpan_AllFalse(t *testing.T) { Services: make(map[string]string), Operations: make(map[string]string), }, - hashStandardTags: false, - hashCustomTags: false, - hashProcess: false, - hashLogs: false, + options: Options{ + HashStandardTags: false, + HashCustomTags: false, + HashProcess: false, + HashLogs: false, + }, } _ = anonymizer.AnonymizeSpan(span2) assert.Equal(t, 2, len(span2.Tags)) diff --git a/cmd/anonymizer/app/flags.go b/cmd/anonymizer/app/flags.go new file mode 100644 index 00000000000..91382c15a47 --- /dev/null +++ b/cmd/anonymizer/app/flags.go @@ -0,0 +1,89 @@ +// Copyright (c) 2020 The Jaeger Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package app + +import ( + "github.com/spf13/cobra" +) + +// Options represent configurable parameters for jaeger-anonymizer +type Options struct { + QueryGRPCHostPort string + MaxSpansCount int + TraceID string + OutputDir string + HashStandardTags bool + HashCustomTags bool + HashLogs bool + HashProcess bool +} + +const ( + queryGRPCHostPortFlag = "query-host-port" + outputDirFlag = "output-dir" + traceIDFlag = "trace-id" + hashStandardTagsFlag = "hash-standard-tags" + hashCustomTagsFlag = "hash-custom-tags" + hashLogsFlag = "hash-logs" + hashProcessFlag = "hash-process" + maxSpansCount = "max-spans-count" +) + +// AddFlags adds flags for anonymizer main program +func (o *Options) AddFlags(command *cobra.Command) { + command.Flags().StringVar( + &o.QueryGRPCHostPort, + queryGRPCHostPortFlag, + "localhost:16686", + "The host:port of the jaeger-query endpoint") + command.Flags().StringVar( + &o.OutputDir, + outputDirFlag, + "/tmp", + "The directory to store the anonymized trace") + command.Flags().StringVar( + &o.TraceID, + traceIDFlag, + "", + "The trace-id of trace to anonymize") + command.Flags().BoolVar( + &o.HashStandardTags, + hashStandardTagsFlag, + false, + "Whether to hash standard tags") + command.Flags().BoolVar( + &o.HashCustomTags, + hashCustomTagsFlag, + false, + "Whether to hash custom tags") + command.Flags().BoolVar( + &o.HashLogs, + hashLogsFlag, + false, + "Whether to hash logs") + command.Flags().BoolVar( + &o.HashProcess, + hashProcessFlag, + false, + "Whether to hash process") + command.Flags().IntVar( + &o.MaxSpansCount, + maxSpansCount, + -1, + "The maximum number of spans to anonymize") + + // mark traceid flag as mandatory + command.MarkFlagRequired(traceIDFlag) +} diff --git a/cmd/anonymizer/app/flags_test.go b/cmd/anonymizer/app/flags_test.go new file mode 100644 index 00000000000..80790f15c15 --- /dev/null +++ b/cmd/anonymizer/app/flags_test.go @@ -0,0 +1,62 @@ +// Copyright (c) 2020 The Jaeger Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package app + +import ( + "testing" + + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" +) + +func TestOptionsWithDefaultFlags(t *testing.T) { + o := Options{} + c := cobra.Command{} + o.AddFlags(&c) + + assert.Equal(t, "localhost:16686", o.QueryGRPCHostPort) + assert.Equal(t, "/tmp", o.OutputDir) + assert.Equal(t, false, o.HashStandardTags) + assert.Equal(t, false, o.HashCustomTags) + assert.Equal(t, false, o.HashLogs) + assert.Equal(t, false, o.HashProcess) + assert.Equal(t, -1, o.MaxSpansCount) +} + +func TestOptionsWithFlags(t *testing.T) { + o := Options{} + c := cobra.Command{} + + o.AddFlags(&c) + c.ParseFlags([]string{ + "--query-host-port=192.168.1.10:16686", + "--output-dir=/data", + "--trace-id=6ef2debb698f2f7c", + "--hash-standard-tags", + "--hash-custom-tags", + "--hash-logs", + "--hash-process", + "--max-spans-count=100", + }) + + assert.Equal(t, "192.168.1.10:16686", o.QueryGRPCHostPort) + assert.Equal(t, "/data", o.OutputDir) + assert.Equal(t, "6ef2debb698f2f7c", o.TraceID) + assert.Equal(t, true, o.HashStandardTags) + assert.Equal(t, true, o.HashCustomTags) + assert.Equal(t, true, o.HashLogs) + assert.Equal(t, true, o.HashProcess) + assert.Equal(t, 100, o.MaxSpansCount) +} diff --git a/cmd/anonymizer/app/query/.nocover b/cmd/anonymizer/app/query/.nocover new file mode 100644 index 00000000000..5b583b79e93 --- /dev/null +++ b/cmd/anonymizer/app/query/.nocover @@ -0,0 +1 @@ +non-critical test utility diff --git a/cmd/anonymizer/app/query/query.go b/cmd/anonymizer/app/query/query.go new file mode 100644 index 00000000000..ffbf2126d9e --- /dev/null +++ b/cmd/anonymizer/app/query/query.go @@ -0,0 +1,88 @@ +// Copyright (c) 2020 The Jaeger Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package query + +import ( + "context" + "fmt" + "io" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/status" + + "github.com/jaegertracing/jaeger/model" + "github.com/jaegertracing/jaeger/proto-gen/api_v2" + "github.com/jaegertracing/jaeger/storage/spanstore" +) + +// Query represents a jaeger-query's query for trace-id +type Query struct { + client api_v2.QueryServiceClient + conn *grpc.ClientConn +} + +// New creates a Query object +func New(addr string) (*Query, error) { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + conn, err := grpc.DialContext(ctx, addr, grpc.WithInsecure()) + if err != nil { + return nil, fmt.Errorf("failed to connect with the jaeger-query service: %w", err) + } + + return &Query{ + client: api_v2.NewQueryServiceClient(conn), + conn: conn, + }, nil +} + +// unwrapNotFoundErr is a conversion function +func unwrapNotFoundErr(err error) error { + if s, _ := status.FromError(err); s != nil { + if s.Message() == spanstore.ErrTraceNotFound.Error() { + return spanstore.ErrTraceNotFound + } + } + return err +} + +// QueryTrace queries for a trace and returns all spans inside it +func (q *Query) QueryTrace(traceID string) ([]model.Span, error) { + mTraceID, err := model.TraceIDFromString(traceID) + if err != nil { + return nil, fmt.Errorf("failed to convert the provided trace id: %w", err) + } + + stream, err := q.client.GetTrace(context.Background(), &api_v2.GetTraceRequest{ + TraceID: mTraceID, + }) + if err != nil { + return nil, unwrapNotFoundErr(err) + } + + var spans []model.Span + for received, err := stream.Recv(); err != io.EOF; received, err = stream.Recv() { + if err != nil { + return nil, unwrapNotFoundErr(err) + } + for i := range received.Spans { + spans = append(spans, received.Spans[i]) + } + } + + return spans, nil +} diff --git a/cmd/anonymizer/app/writer/writer.go b/cmd/anonymizer/app/writer/writer.go index 5e36f8a4db2..5d04e61df53 100644 --- a/cmd/anonymizer/app/writer/writer.go +++ b/cmd/anonymizer/app/writer/writer.go @@ -30,10 +30,11 @@ import ( // Config contains parameters to NewWriter. type Config struct { - MaxSpansCount int `yaml:"max_spans_count" name:"max_spans_count"` - CapturedFile string `yaml:"captured_file" name:"captured_file"` - AnonymizedFile string `yaml:"anonymized_file" name:"anonymized_file"` - MappingFile string `yaml:"mapping_file" name:"mapping_file"` + MaxSpansCount int `yaml:"max_spans_count" name:"max_spans_count"` + CapturedFile string `yaml:"captured_file" name:"captured_file"` + AnonymizedFile string `yaml:"anonymized_file" name:"anonymized_file"` + MappingFile string `yaml:"mapping_file" name:"mapping_file"` + AnonymizerOpts anonymizer.Options `yaml:"anonymizer" name:"anonymizer"` } // Writer is a span Writer that obfuscates the span and writes it to a JSON file. @@ -75,12 +76,20 @@ func New(config Config, logger *zap.Logger) (*Writer, error) { if err != nil { return nil, fmt.Errorf("cannot write tp output file: %w", err) } + + options := anonymizer.Options{ + HashStandardTags: config.AnonymizerOpts.HashStandardTags, + HashCustomTags: config.AnonymizerOpts.HashCustomTags, + HashLogs: config.AnonymizerOpts.HashLogs, + HashProcess: config.AnonymizerOpts.HashProcess, + } + return &Writer{ config: config, logger: logger, capturedFile: cf, anonymizedFile: af, - anonymizer: anonymizer.New(config.MappingFile, logger), + anonymizer: anonymizer.New(config.MappingFile, options, logger), }, nil } @@ -120,13 +129,18 @@ func (w *Writer) WriteSpan(msg *model.Span) error { if w.spanCount >= w.config.MaxSpansCount { w.logger.Info("Saved enough spans, exiting...") - w.capturedFile.WriteString("\n]\n") - w.capturedFile.Close() - w.anonymizedFile.WriteString("\n]\n") - w.anonymizedFile.Close() - w.anonymizer.SaveMapping() + w.Close() os.Exit(0) } return nil } + +// Close closes the captured and anonymized files. +func (w *Writer) Close() { + w.capturedFile.WriteString("\n]\n") + w.capturedFile.Close() + w.anonymizedFile.WriteString("\n]\n") + w.anonymizedFile.Close() + w.anonymizer.SaveMapping() +} diff --git a/cmd/anonymizer/main.go b/cmd/anonymizer/main.go index 221bdc04eb8..a4dd00e4a9f 100644 --- a/cmd/anonymizer/main.go +++ b/cmd/anonymizer/main.go @@ -15,13 +15,71 @@ package main import ( + "fmt" + "os" + + "github.com/spf13/cobra" "go.uber.org/zap" - "github.com/jaegertracing/jaeger/cmd/anonymizer/app/writer" + app "github.com/jaegertracing/jaeger/cmd/anonymizer/app" + "github.com/jaegertracing/jaeger/cmd/anonymizer/app/anonymizer" + query "github.com/jaegertracing/jaeger/cmd/anonymizer/app/query" + writer "github.com/jaegertracing/jaeger/cmd/anonymizer/app/writer" + "github.com/jaegertracing/jaeger/pkg/version" ) +var logger, _ = zap.NewDevelopment() + func main() { - // TODO - _, _ = writer.New(writer.Config{}, zap.NewNop()) - println("not implemented") + var options = app.Options{} + + var command = &cobra.Command{ + Use: "jaeger-anonymizer", + Short: "Jaeger anonymizer hashes fields of a trace for easy sharing", + Long: `Jaeger anonymizer queries Jaeger query for a trace, anonymizes fields, and store in file`, + Run: func(cmd *cobra.Command, args []string) { + prefix := options.OutputDir + "/" + options.TraceID + conf := writer.Config{ + MaxSpansCount: options.MaxSpansCount, + CapturedFile: prefix + ".original", + AnonymizedFile: prefix + ".anonymized", + MappingFile: prefix + ".mapping", + AnonymizerOpts: anonymizer.Options{ + HashStandardTags: options.HashStandardTags, + HashCustomTags: options.HashCustomTags, + HashLogs: options.HashLogs, + HashProcess: options.HashProcess, + }, + } + + writer, err := writer.New(conf, logger) + if err != nil { + logger.Fatal("error while creating writer object", zap.Error(err)) + } + + query, err := query.New(options.QueryGRPCHostPort) + if err != nil { + logger.Fatal("error while creating query object", zap.Error(err)) + } + + spans, err := query.QueryTrace(options.TraceID) + if err != nil { + logger.Fatal("error while querying for trace", zap.Error(err)) + } + + for _, span := range spans { + writer.WriteSpan(&span) + } + writer.Close() + }, + } + + options.AddFlags(command) + + command.AddCommand(version.Command()) + + if error := command.Execute(); error != nil { + fmt.Println(error.Error()) + os.Exit(1) + } }