From d427ed06467149abb584fb1a82a1bdc8dc6748ba Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Sun, 11 Jun 2017 19:02:15 +0200 Subject: [PATCH 1/2] make server metrics registerable to custom registry --- server.go | 78 ++++++------------- server_metrics.go | 184 +++++++++++++++++++++++++++++++++++++++++++++ server_reporter.go | 137 ++++----------------------------- util.go | 23 ++++++ 4 files changed, 243 insertions(+), 179 deletions(-) create mode 100644 server_metrics.go diff --git a/server.go b/server.go index f85c8c2..036cfe0 100644 --- a/server.go +++ b/server.go @@ -6,69 +6,37 @@ package grpc_prometheus import ( - "golang.org/x/net/context" + prom "github.com/prometheus/client_golang/prometheus" "google.golang.org/grpc" ) -// PreregisterServices takes a gRPC server and pre-initializes all counters to 0. -// This allows for easier monitoring in Prometheus (no missing metrics), and should be called *after* all services have -// been registered with the server. -func Register(server *grpc.Server) { - serviceInfo := server.GetServiceInfo() - for serviceName, info := range serviceInfo { - for _, mInfo := range info.Methods { - preRegisterMethod(serviceName, &mInfo) - } - } -} - -// UnaryServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Unary RPCs. -func UnaryServerInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { - monitor := newServerReporter(Unary, info.FullMethod) - monitor.ReceivedMessage() - resp, err := handler(ctx, req) - monitor.Handled(grpc.Code(err)) - if err == nil { - monitor.SentMessage() - } - return resp, err -} +var ( + DefaultServerMetrics = NewServerMetrics() -// StreamServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Streaming RPCs. -func StreamServerInterceptor(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { - monitor := newServerReporter(streamRpcType(info), info.FullMethod) - err := handler(srv, &monitoredServerStream{ss, monitor}) - monitor.Handled(grpc.Code(err)) - return err -} + // UnaryServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Unary RPCs. + UnaryServerInterceptor = DefaultServerMetrics.UnaryServerInterceptor() -func streamRpcType(info *grpc.StreamServerInfo) grpcType { - if info.IsClientStream && !info.IsServerStream { - return ClientStream - } else if !info.IsClientStream && info.IsServerStream { - return ServerStream - } - return BidiStream -} + // StreamServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Streaming RPCs. + StreamServerInterceptor = DefaultServerMetrics.StreamServerInterceptor() +) -// monitoredStream wraps grpc.ServerStream allowing each Sent/Recv of message to increment counters. -type monitoredServerStream struct { - grpc.ServerStream - monitor *serverReporter +func init() { + prom.MustRegister(DefaultServerMetrics.serverStartedCounter) + prom.MustRegister(DefaultServerMetrics.serverHandledCounter) + prom.MustRegister(DefaultServerMetrics.serverStreamMsgReceived) + prom.MustRegister(DefaultServerMetrics.serverStreamMsgSent) } -func (s *monitoredServerStream) SendMsg(m interface{}) error { - err := s.ServerStream.SendMsg(m) - if err == nil { - s.monitor.SentMessage() - } - return err +// PreregisterServices takes a gRPC server and pre-initializes all counters to 0. +// This allows for easier monitoring in Prometheus (no missing metrics), and should be called *after* all services have +// been registered with the server. +func Register(server *grpc.Server) { + DefaultServerMetrics.InitializeMetrics(server) } -func (s *monitoredServerStream) RecvMsg(m interface{}) error { - err := s.ServerStream.RecvMsg(m) - if err == nil { - s.monitor.ReceivedMessage() - } - return err +// EnableHandlingTimeHistogram turns on recording of handling time of RPCs for server-side interceptors. +// Histogram metrics can be very expensive for Prometheus to retain and query. +func EnableHandlingTimeHistogram(opts ...HistogramOption) { + DefaultServerMetrics.EnableHandlingTimeHistogram(opts...) + prom.Register(DefaultServerMetrics.serverHandledHistogram) } diff --git a/server_metrics.go b/server_metrics.go new file mode 100644 index 0000000..6c47139 --- /dev/null +++ b/server_metrics.go @@ -0,0 +1,184 @@ +package grpc_prometheus + +import ( + prom "github.com/prometheus/client_golang/prometheus" + "golang.org/x/net/context" + "google.golang.org/grpc" +) + +type ServerMetrics struct { + serverStartedCounter *prom.CounterVec + serverHandledCounter *prom.CounterVec + serverStreamMsgReceived *prom.CounterVec + serverStreamMsgSent *prom.CounterVec + serverHandledHistogramEnabled bool + serverHandledHistogramOpts prom.HistogramOpts + serverHandledHistogram *prom.HistogramVec +} + +func NewServerMetrics() *ServerMetrics { + return &ServerMetrics{ + serverStartedCounter: prom.NewCounterVec( + prom.CounterOpts{ + Name: "grpc_server_started_total", + Help: "Total number of RPCs started on the server.", + }, []string{"grpc_type", "grpc_service", "grpc_method"}), + serverHandledCounter: prom.NewCounterVec( + prom.CounterOpts{ + Name: "grpc_server_handled_total", + Help: "Total number of RPCs completed on the server, regardless of success or failure.", + }, []string{"grpc_type", "grpc_service", "grpc_method", "grpc_code"}), + serverStreamMsgReceived: prom.NewCounterVec( + prom.CounterOpts{ + Name: "grpc_server_msg_received_total", + Help: "Total number of RPC stream messages received on the server.", + }, []string{"grpc_type", "grpc_service", "grpc_method"}), + serverStreamMsgSent: prom.NewCounterVec( + prom.CounterOpts{ + Name: "grpc_server_msg_sent_total", + Help: "Total number of gRPC stream messages sent by the server.", + }, []string{"grpc_type", "grpc_service", "grpc_method"}), + serverHandledHistogramEnabled: false, + serverHandledHistogramOpts: prom.HistogramOpts{ + Name: "grpc_server_handling_seconds", + Help: "Histogram of response latency (seconds) of gRPC that had been application-level handled by the server.", + Buckets: prom.DefBuckets, + }, + serverHandledHistogram: nil, + } +} + +type HistogramOption func(*prom.HistogramOpts) + +// WithHistogramBuckets allows you to specify custom bucket ranges for histograms if EnableHandlingTimeHistogram is on. +func WithHistogramBuckets(buckets []float64) HistogramOption { + return func(o *prom.HistogramOpts) { o.Buckets = buckets } +} + +func (m *ServerMetrics) EnableHandlingTimeHistogram(opts ...HistogramOption) { + for _, o := range opts { + o(&m.serverHandledHistogramOpts) + } + if !m.serverHandledHistogramEnabled { + m.serverHandledHistogram = prom.NewHistogramVec( + m.serverHandledHistogramOpts, + []string{"grpc_type", "grpc_service", "grpc_method"}, + ) + } + m.serverHandledHistogramEnabled = true +} + +// UnaryServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Unary RPCs. +func (m *ServerMetrics) UnaryServerInterceptor() func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { + monitor := newServerReporter(m, Unary, info.FullMethod) + monitor.ReceivedMessage() + resp, err := handler(ctx, req) + monitor.Handled(grpc.Code(err)) + if err == nil { + monitor.SentMessage() + } + return resp, err + } +} + +// StreamServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Streaming RPCs. +func (m *ServerMetrics) StreamServerInterceptor() func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { + return func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { + monitor := newServerReporter(m, streamRpcType(info), info.FullMethod) + err := handler(srv, &monitoredServerStream{ss, monitor}) + monitor.Handled(grpc.Code(err)) + return err + } +} + +func (m *ServerMetrics) InitializeMetrics(server *grpc.Server) { + serviceInfo := server.GetServiceInfo() + for serviceName, info := range serviceInfo { + for _, mInfo := range info.Methods { + preRegisterMethod(m, serviceName, &mInfo) + } + } +} + +func (m *ServerMetrics) Register(r prom.Registerer) error { + err := r.Register(m.serverStartedCounter) + if err != nil { + return err + } + err = r.Register(m.serverHandledCounter) + if err != nil { + return err + } + err = r.Register(m.serverStreamMsgReceived) + if err != nil { + return err + } + err = r.Register(m.serverStreamMsgSent) + if err != nil { + return err + } + + if m.serverHandledHistogramEnabled { + err = r.Register(m.serverHandledHistogram) + if err != nil { + return err + } + } + + return nil +} + +func (m *ServerMetrics) MustRegister(r prom.Registerer) { + err := m.Register(r) + if err != nil { + panic(err) + } +} + +func streamRpcType(info *grpc.StreamServerInfo) grpcType { + if info.IsClientStream && !info.IsServerStream { + return ClientStream + } else if !info.IsClientStream && info.IsServerStream { + return ServerStream + } + return BidiStream +} + +// monitoredStream wraps grpc.ServerStream allowing each Sent/Recv of message to increment counters. +type monitoredServerStream struct { + grpc.ServerStream + monitor *serverReporter +} + +func (s *monitoredServerStream) SendMsg(m interface{}) error { + err := s.ServerStream.SendMsg(m) + if err == nil { + s.monitor.SentMessage() + } + return err +} + +func (s *monitoredServerStream) RecvMsg(m interface{}) error { + err := s.ServerStream.RecvMsg(m) + if err == nil { + s.monitor.ReceivedMessage() + } + return err +} + +// preRegisterMethod is invoked on Register of a Server, allowing all gRPC services labels to be pre-populated. +func preRegisterMethod(metrics *ServerMetrics, serviceName string, mInfo *grpc.MethodInfo) { + methodName := mInfo.Name + methodType := string(typeFromMethodInfo(mInfo)) + // These are just references (no increments), as just referencing will create the labels but not set values. + metrics.serverStartedCounter.GetMetricWithLabelValues(methodType, serviceName, methodName) + metrics.serverStreamMsgReceived.GetMetricWithLabelValues(methodType, serviceName, methodName) + metrics.serverStreamMsgSent.GetMetricWithLabelValues(methodType, serviceName, methodName) + if metrics.serverHandledHistogramEnabled { + metrics.serverHandledHistogram.GetMetricWithLabelValues(methodType, serviceName, methodName) + } + for _, code := range allCodes { + metrics.serverHandledCounter.GetMetricWithLabelValues(methodType, serviceName, methodName, code.String()) + } +} diff --git a/server_reporter.go b/server_reporter.go index 628a890..aa9db54 100644 --- a/server_reporter.go +++ b/server_reporter.go @@ -7,151 +7,40 @@ import ( "time" "google.golang.org/grpc/codes" - - prom "github.com/prometheus/client_golang/prometheus" - "google.golang.org/grpc" -) - -type grpcType string - -const ( - Unary grpcType = "unary" - ClientStream grpcType = "client_stream" - ServerStream grpcType = "server_stream" - BidiStream grpcType = "bidi_stream" ) -var ( - serverStartedCounter = prom.NewCounterVec( - prom.CounterOpts{ - Namespace: "grpc", - Subsystem: "server", - Name: "started_total", - Help: "Total number of RPCs started on the server.", - }, []string{"grpc_type", "grpc_service", "grpc_method"}) - - serverHandledCounter = prom.NewCounterVec( - prom.CounterOpts{ - Namespace: "grpc", - Subsystem: "server", - Name: "handled_total", - Help: "Total number of RPCs completed on the server, regardless of success or failure.", - }, []string{"grpc_type", "grpc_service", "grpc_method", "grpc_code"}) - - serverStreamMsgReceived = prom.NewCounterVec( - prom.CounterOpts{ - Namespace: "grpc", - Subsystem: "server", - Name: "msg_received_total", - Help: "Total number of RPC stream messages received on the server.", - }, []string{"grpc_type", "grpc_service", "grpc_method"}) - - serverStreamMsgSent = prom.NewCounterVec( - prom.CounterOpts{ - Namespace: "grpc", - Subsystem: "server", - Name: "msg_sent_total", - Help: "Total number of gRPC stream messages sent by the server.", - }, []string{"grpc_type", "grpc_service", "grpc_method"}) - - serverHandledHistogramEnabled = false - serverHandledHistogramOpts = prom.HistogramOpts{ - Namespace: "grpc", - Subsystem: "server", - Name: "handling_seconds", - Help: "Histogram of response latency (seconds) of gRPC that had been application-level handled by the server.", - Buckets: prom.DefBuckets, - } - serverHandledHistogram *prom.HistogramVec -) - -func init() { - prom.MustRegister(serverStartedCounter) - prom.MustRegister(serverHandledCounter) - prom.MustRegister(serverStreamMsgReceived) - prom.MustRegister(serverStreamMsgSent) -} - -type HistogramOption func(*prom.HistogramOpts) - -// WithHistogramBuckets allows you to specify custom bucket ranges for histograms if EnableHandlingTimeHistogram is on. -func WithHistogramBuckets(buckets []float64) HistogramOption { - return func(o *prom.HistogramOpts) { o.Buckets = buckets } -} - -// EnableHandlingTimeHistogram turns on recording of handling time of RPCs for server-side interceptors. -// Histogram metrics can be very expensive for Prometheus to retain and query. -func EnableHandlingTimeHistogram(opts ...HistogramOption) { - for _, o := range opts { - o(&serverHandledHistogramOpts) - } - if !serverHandledHistogramEnabled { - serverHandledHistogram = prom.NewHistogramVec( - serverHandledHistogramOpts, - []string{"grpc_type", "grpc_service", "grpc_method"}, - ) - prom.Register(serverHandledHistogram) - } - serverHandledHistogramEnabled = true -} - type serverReporter struct { + metrics *ServerMetrics rpcType grpcType serviceName string methodName string startTime time.Time } -func newServerReporter(rpcType grpcType, fullMethod string) *serverReporter { - r := &serverReporter{rpcType: rpcType} - if serverHandledHistogramEnabled { +func newServerReporter(m *ServerMetrics, rpcType grpcType, fullMethod string) *serverReporter { + r := &serverReporter{ + metrics: m, + rpcType: rpcType, + } + if r.metrics.serverHandledHistogramEnabled { r.startTime = time.Now() } r.serviceName, r.methodName = splitMethodName(fullMethod) - serverStartedCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() + r.metrics.serverStartedCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() return r } func (r *serverReporter) ReceivedMessage() { - serverStreamMsgReceived.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() + r.metrics.serverStreamMsgReceived.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() } func (r *serverReporter) SentMessage() { - serverStreamMsgSent.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() + r.metrics.serverStreamMsgSent.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() } func (r *serverReporter) Handled(code codes.Code) { - serverHandledCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName, code.String()).Inc() - if serverHandledHistogramEnabled { - serverHandledHistogram.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Observe(time.Since(r.startTime).Seconds()) - } -} - -// preRegisterMethod is invoked on Register of a Server, allowing all gRPC services labels to be pre-populated. -func preRegisterMethod(serviceName string, mInfo *grpc.MethodInfo) { - methodName := mInfo.Name - methodType := string(typeFromMethodInfo(mInfo)) - // These are just references (no increments), as just referencing will create the labels but not set values. - serverStartedCounter.GetMetricWithLabelValues(methodType, serviceName, methodName) - serverStreamMsgReceived.GetMetricWithLabelValues(methodType, serviceName, methodName) - serverStreamMsgSent.GetMetricWithLabelValues(methodType, serviceName, methodName) - if serverHandledHistogramEnabled { - serverHandledHistogram.GetMetricWithLabelValues(methodType, serviceName, methodName) - } - for _, code := range allCodes { - serverHandledCounter.GetMetricWithLabelValues(methodType, serviceName, methodName, code.String()) - } -} - -func typeFromMethodInfo(mInfo *grpc.MethodInfo) grpcType { - if mInfo.IsClientStream == false && mInfo.IsServerStream == false { - return Unary - } - if mInfo.IsClientStream == true && mInfo.IsServerStream == false { - return ClientStream - } - if mInfo.IsClientStream == false && mInfo.IsServerStream == true { - return ServerStream + r.metrics.serverHandledCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName, code.String()).Inc() + if r.metrics.serverHandledHistogramEnabled { + r.metrics.serverHandledHistogram.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Observe(time.Since(r.startTime).Seconds()) } - return BidiStream } diff --git a/util.go b/util.go index 372460a..d581c79 100644 --- a/util.go +++ b/util.go @@ -6,9 +6,19 @@ package grpc_prometheus import ( "strings" + "google.golang.org/grpc" "google.golang.org/grpc/codes" ) +type grpcType string + +const ( + Unary grpcType = "unary" + ClientStream grpcType = "client_stream" + ServerStream grpcType = "server_stream" + BidiStream grpcType = "bidi_stream" +) + var ( allCodes = []codes.Code{ codes.OK, codes.Canceled, codes.Unknown, codes.InvalidArgument, codes.DeadlineExceeded, codes.NotFound, @@ -25,3 +35,16 @@ func splitMethodName(fullMethodName string) (string, string) { } return "unknown", "unknown" } + +func typeFromMethodInfo(mInfo *grpc.MethodInfo) grpcType { + if mInfo.IsClientStream == false && mInfo.IsServerStream == false { + return Unary + } + if mInfo.IsClientStream == true && mInfo.IsServerStream == false { + return ClientStream + } + if mInfo.IsClientStream == false && mInfo.IsServerStream == true { + return ServerStream + } + return BidiStream +} From 42b3df773f3aa462c73156e237034cbc3a864143 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Sun, 11 Jun 2017 19:57:52 +0200 Subject: [PATCH 2/2] make client metrics registerable to custom registry --- client.go | 79 ++++++++------------------ client_metrics.go | 139 +++++++++++++++++++++++++++++++++++++++++++++ client_reporter.go | 91 +++++------------------------ server.go | 16 ++++-- server_metrics.go | 17 ++++++ 5 files changed, 203 insertions(+), 139 deletions(-) create mode 100644 client_metrics.go diff --git a/client.go b/client.go index d9e87b2..751a4c7 100644 --- a/client.go +++ b/client.go @@ -6,67 +6,34 @@ package grpc_prometheus import ( - "io" - - "golang.org/x/net/context" - "google.golang.org/grpc" - "google.golang.org/grpc/codes" + prom "github.com/prometheus/client_golang/prometheus" ) -// UnaryClientInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Unary RPCs. -func UnaryClientInterceptor(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { - monitor := newClientReporter(Unary, method) - monitor.SentMessage() - err := invoker(ctx, method, req, reply, cc, opts...) - if err != nil { - monitor.ReceivedMessage() - } - monitor.Handled(grpc.Code(err)) - return err -} +var ( + // DefaultClientMetrics is the default instance of ClientMetrics. It is + // intended to be used in conjunction the default Prometheus metrics + // registry. + DefaultClientMetrics = NewClientMetrics() -// StreamServerInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Streaming RPCs. -func StreamClientInterceptor(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { - monitor := newClientReporter(clientStreamType(desc), method) - clientStream, err := streamer(ctx, desc, cc, method, opts...) - if err != nil { - monitor.Handled(grpc.Code(err)) - return nil, err - } - return &monitoredClientStream{clientStream, monitor}, nil -} + // UnaryClientInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Unary RPCs. + UnaryClientInterceptor = DefaultClientMetrics.UnaryClientInterceptor() -func clientStreamType(desc *grpc.StreamDesc) grpcType { - if desc.ClientStreams && !desc.ServerStreams { - return ClientStream - } else if !desc.ClientStreams && desc.ServerStreams { - return ServerStream - } - return BidiStream -} - -// monitoredClientStream wraps grpc.ClientStream allowing each Sent/Recv of message to increment counters. -type monitoredClientStream struct { - grpc.ClientStream - monitor *clientReporter -} + // StreamClientInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Streaming RPCs. + StreamClientInterceptor = DefaultClientMetrics.StreamClientInterceptor() +) -func (s *monitoredClientStream) SendMsg(m interface{}) error { - err := s.ClientStream.SendMsg(m) - if err == nil { - s.monitor.SentMessage() - } - return err +func init() { + prom.MustRegister(DefaultClientMetrics.clientStartedCounter) + prom.MustRegister(DefaultClientMetrics.clientHandledCounter) + prom.MustRegister(DefaultClientMetrics.clientStreamMsgReceived) + prom.MustRegister(DefaultClientMetrics.clientStreamMsgSent) } -func (s *monitoredClientStream) RecvMsg(m interface{}) error { - err := s.ClientStream.RecvMsg(m) - if err == nil { - s.monitor.ReceivedMessage() - } else if err == io.EOF { - s.monitor.Handled(codes.OK) - } else { - s.monitor.Handled(grpc.Code(err)) - } - return err +// EnableClientHandlingTimeHistogram turns on recording of handling time of +// RPCs. Histogram metrics can be very expensive for Prometheus to retain and +// query. This function acts on the DefaultClientMetrics variable and the +// default Prometheus metrics registry. +func EnableClientHandlingTimeHistogram(opts ...HistogramOption) { + DefaultClientMetrics.EnableClientHandlingTimeHistogram(opts...) + prom.Register(DefaultClientMetrics.clientHandledHistogram) } diff --git a/client_metrics.go b/client_metrics.go new file mode 100644 index 0000000..aee11fc --- /dev/null +++ b/client_metrics.go @@ -0,0 +1,139 @@ +package grpc_prometheus + +import ( + "io" + + prom "github.com/prometheus/client_golang/prometheus" + "golang.org/x/net/context" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" +) + +// ClientMetrics represents a collection of metrics to be registered on a +// Prometheus metrics registry for a gRPC client. +type ClientMetrics struct { + clientStartedCounter *prom.CounterVec + clientHandledCounter *prom.CounterVec + clientStreamMsgReceived *prom.CounterVec + clientStreamMsgSent *prom.CounterVec + clientHandledHistogramEnabled bool + clientHandledHistogramOpts prom.HistogramOpts + clientHandledHistogram *prom.HistogramVec +} + +// NewClientMetrics returns a ClientMetrics object. Use a new instance of +// ClientMetrics when not using the default Prometheus metrics registry, for +// example when wanting to control which metrics are added to a registry as +// opposed to automatically adding metrics via init functions. +func NewClientMetrics() *ClientMetrics { + return &ClientMetrics{ + clientStartedCounter: prom.NewCounterVec( + prom.CounterOpts{ + Name: "grpc_client_started_total", + Help: "Total number of RPCs started on the client.", + }, []string{"grpc_type", "grpc_service", "grpc_method"}), + + clientHandledCounter: prom.NewCounterVec( + prom.CounterOpts{ + Name: "grpc_client_handled_total", + Help: "Total number of RPCs completed by the client, regardless of success or failure.", + }, []string{"grpc_type", "grpc_service", "grpc_method", "grpc_code"}), + + clientStreamMsgReceived: prom.NewCounterVec( + prom.CounterOpts{ + Name: "grpc_client_msg_received_total", + Help: "Total number of RPC stream messages received by the client.", + }, []string{"grpc_type", "grpc_service", "grpc_method"}), + + clientStreamMsgSent: prom.NewCounterVec( + prom.CounterOpts{ + Name: "grpc_client_msg_sent_total", + Help: "Total number of gRPC stream messages sent by the client.", + }, []string{"grpc_type", "grpc_service", "grpc_method"}), + + clientHandledHistogramEnabled: false, + clientHandledHistogramOpts: prom.HistogramOpts{ + Name: "grpc_client_handling_seconds", + Help: "Histogram of response latency (seconds) of the gRPC until it is finished by the application.", + Buckets: prom.DefBuckets, + }, + clientHandledHistogram: nil, + } +} + +// EnableClientHandlingTimeHistogram turns on recording of handling time of RPCs. +// Histogram metrics can be very expensive for Prometheus to retain and query. +func (m *ClientMetrics) EnableClientHandlingTimeHistogram(opts ...HistogramOption) { + for _, o := range opts { + o(&m.clientHandledHistogramOpts) + } + if !m.clientHandledHistogramEnabled { + m.clientHandledHistogram = prom.NewHistogramVec( + m.clientHandledHistogramOpts, + []string{"grpc_type", "grpc_service", "grpc_method"}, + ) + } + m.clientHandledHistogramEnabled = true +} + +// UnaryClientInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Unary RPCs. +func (m *ClientMetrics) UnaryClientInterceptor() func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + monitor := newClientReporter(m, Unary, method) + monitor.SentMessage() + err := invoker(ctx, method, req, reply, cc, opts...) + if err != nil { + monitor.ReceivedMessage() + } + monitor.Handled(grpc.Code(err)) + return err + } +} + +// StreamServerInterceptor is a gRPC client-side interceptor that provides Prometheus monitoring for Streaming RPCs. +func (m *ClientMetrics) StreamClientInterceptor() func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { + return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { + monitor := newClientReporter(m, clientStreamType(desc), method) + clientStream, err := streamer(ctx, desc, cc, method, opts...) + if err != nil { + monitor.Handled(grpc.Code(err)) + return nil, err + } + return &monitoredClientStream{clientStream, monitor}, nil + } +} + +func clientStreamType(desc *grpc.StreamDesc) grpcType { + if desc.ClientStreams && !desc.ServerStreams { + return ClientStream + } else if !desc.ClientStreams && desc.ServerStreams { + return ServerStream + } + return BidiStream +} + +// monitoredClientStream wraps grpc.ClientStream allowing each Sent/Recv of message to increment counters. +type monitoredClientStream struct { + grpc.ClientStream + monitor *clientReporter +} + +func (s *monitoredClientStream) SendMsg(m interface{}) error { + err := s.ClientStream.SendMsg(m) + if err == nil { + s.monitor.SentMessage() + } + return err +} + +func (s *monitoredClientStream) RecvMsg(m interface{}) error { + err := s.ClientStream.RecvMsg(m) + if err == nil { + s.monitor.ReceivedMessage() + } else if err == io.EOF { + s.monitor.Handled(codes.OK) + } else { + s.monitor.Handled(grpc.Code(err)) + } + return err +} diff --git a/client_reporter.go b/client_reporter.go index 16b7615..cbf1532 100644 --- a/client_reporter.go +++ b/client_reporter.go @@ -7,105 +7,40 @@ import ( "time" "google.golang.org/grpc/codes" - - prom "github.com/prometheus/client_golang/prometheus" -) - -var ( - clientStartedCounter = prom.NewCounterVec( - prom.CounterOpts{ - Namespace: "grpc", - Subsystem: "client", - Name: "started_total", - Help: "Total number of RPCs started on the client.", - }, []string{"grpc_type", "grpc_service", "grpc_method"}) - - clientHandledCounter = prom.NewCounterVec( - prom.CounterOpts{ - Namespace: "grpc", - Subsystem: "client", - Name: "handled_total", - Help: "Total number of RPCs completed by the client, regardless of success or failure.", - }, []string{"grpc_type", "grpc_service", "grpc_method", "grpc_code"}) - - clientStreamMsgReceived = prom.NewCounterVec( - prom.CounterOpts{ - Namespace: "grpc", - Subsystem: "client", - Name: "msg_received_total", - Help: "Total number of RPC stream messages received by the client.", - }, []string{"grpc_type", "grpc_service", "grpc_method"}) - - clientStreamMsgSent = prom.NewCounterVec( - prom.CounterOpts{ - Namespace: "grpc", - Subsystem: "client", - Name: "msg_sent_total", - Help: "Total number of gRPC stream messages sent by the client.", - }, []string{"grpc_type", "grpc_service", "grpc_method"}) - - clientHandledHistogramEnabled = false - clientHandledHistogramOpts = prom.HistogramOpts{ - Namespace: "grpc", - Subsystem: "client", - Name: "handling_seconds", - Help: "Histogram of response latency (seconds) of the gRPC until it is finished by the application.", - Buckets: prom.DefBuckets, - } - clientHandledHistogram *prom.HistogramVec ) -func init() { - prom.MustRegister(clientStartedCounter) - prom.MustRegister(clientHandledCounter) - prom.MustRegister(clientStreamMsgReceived) - prom.MustRegister(clientStreamMsgSent) -} - -// EnableClientHandlingTimeHistogram turns on recording of handling time of RPCs. -// Histogram metrics can be very expensive for Prometheus to retain and query. -func EnableClientHandlingTimeHistogram(opts ...HistogramOption) { - for _, o := range opts { - o(&clientHandledHistogramOpts) - } - if !clientHandledHistogramEnabled { - clientHandledHistogram = prom.NewHistogramVec( - clientHandledHistogramOpts, - []string{"grpc_type", "grpc_service", "grpc_method"}, - ) - prom.Register(clientHandledHistogram) - } - clientHandledHistogramEnabled = true -} - type clientReporter struct { + metrics *ClientMetrics rpcType grpcType serviceName string methodName string startTime time.Time } -func newClientReporter(rpcType grpcType, fullMethod string) *clientReporter { - r := &clientReporter{rpcType: rpcType} - if clientHandledHistogramEnabled { +func newClientReporter(m *ClientMetrics, rpcType grpcType, fullMethod string) *clientReporter { + r := &clientReporter{ + metrics: m, + rpcType: rpcType, + } + if r.metrics.clientHandledHistogramEnabled { r.startTime = time.Now() } r.serviceName, r.methodName = splitMethodName(fullMethod) - clientStartedCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() + r.metrics.clientStartedCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() return r } func (r *clientReporter) ReceivedMessage() { - clientStreamMsgReceived.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() + r.metrics.clientStreamMsgReceived.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() } func (r *clientReporter) SentMessage() { - clientStreamMsgSent.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() + r.metrics.clientStreamMsgSent.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Inc() } func (r *clientReporter) Handled(code codes.Code) { - clientHandledCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName, code.String()).Inc() - if clientHandledHistogramEnabled { - clientHandledHistogram.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Observe(time.Since(r.startTime).Seconds()) + r.metrics.clientHandledCounter.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName, code.String()).Inc() + if r.metrics.clientHandledHistogramEnabled { + r.metrics.clientHandledHistogram.WithLabelValues(string(r.rpcType), r.serviceName, r.methodName).Observe(time.Since(r.startTime).Seconds()) } } diff --git a/server.go b/server.go index 036cfe0..322f990 100644 --- a/server.go +++ b/server.go @@ -11,6 +11,9 @@ import ( ) var ( + // DefaultServerMetrics is the default instance of ServerMetrics. It is + // intended to be used in conjunction the default Prometheus metrics + // registry. DefaultServerMetrics = NewServerMetrics() // UnaryServerInterceptor is a gRPC server-side interceptor that provides Prometheus monitoring for Unary RPCs. @@ -27,15 +30,18 @@ func init() { prom.MustRegister(DefaultServerMetrics.serverStreamMsgSent) } -// PreregisterServices takes a gRPC server and pre-initializes all counters to 0. -// This allows for easier monitoring in Prometheus (no missing metrics), and should be called *after* all services have -// been registered with the server. +// Register takes a gRPC server and pre-initializes all counters to 0. This +// allows for easier monitoring in Prometheus (no missing metrics), and should +// be called *after* all services have been registered with the server. This +// function acts on the DefaultServerMetrics variable. func Register(server *grpc.Server) { DefaultServerMetrics.InitializeMetrics(server) } -// EnableHandlingTimeHistogram turns on recording of handling time of RPCs for server-side interceptors. -// Histogram metrics can be very expensive for Prometheus to retain and query. +// EnableHandlingTimeHistogram turns on recording of handling time +// of RPCs. Histogram metrics can be very expensive for Prometheus +// to retain and query. This function acts on the DefaultServerMetrics +// variable and the default Prometheus metrics registry. func EnableHandlingTimeHistogram(opts ...HistogramOption) { DefaultServerMetrics.EnableHandlingTimeHistogram(opts...) prom.Register(DefaultServerMetrics.serverHandledHistogram) diff --git a/server_metrics.go b/server_metrics.go index 6c47139..988d25b 100644 --- a/server_metrics.go +++ b/server_metrics.go @@ -6,6 +6,8 @@ import ( "google.golang.org/grpc" ) +// ServerMetrics represents a collection of metrics to be registered on a +// Prometheus metrics registry for a gRPC server. type ServerMetrics struct { serverStartedCounter *prom.CounterVec serverHandledCounter *prom.CounterVec @@ -16,6 +18,10 @@ type ServerMetrics struct { serverHandledHistogram *prom.HistogramVec } +// NewServerMetrics returns a ServerMetrics object. Use a new instance of +// ServerMetrics when not using the default Prometheus metrics registry, for +// example when wanting to control which metrics are added to a registry as +// opposed to automatically adding metrics via init functions. func NewServerMetrics() *ServerMetrics { return &ServerMetrics{ serverStartedCounter: prom.NewCounterVec( @@ -55,6 +61,10 @@ func WithHistogramBuckets(buckets []float64) HistogramOption { return func(o *prom.HistogramOpts) { o.Buckets = buckets } } +// EnableHandlingTimeHistogram enables histograms being registered when +// registering the ServerMetrics on a Prometheus registry. Histograms can be +// expensive on Prometheus servers. It takes options to configure histogram +// options such as the defined buckets. func (m *ServerMetrics) EnableHandlingTimeHistogram(opts ...HistogramOption) { for _, o := range opts { o(&m.serverHandledHistogramOpts) @@ -92,6 +102,9 @@ func (m *ServerMetrics) StreamServerInterceptor() func(srv interface{}, ss grpc. } } +// InitializeMetrics initializes all metrics, with their appropriate null +// value, for all gRPC methods registered on a gRPC server. This is useful, to +// ensure that all metrics exist when collecting and querying. func (m *ServerMetrics) InitializeMetrics(server *grpc.Server) { serviceInfo := server.GetServiceInfo() for serviceName, info := range serviceInfo { @@ -101,6 +114,9 @@ func (m *ServerMetrics) InitializeMetrics(server *grpc.Server) { } } +// Register registers all server metrics in a given metrics registry. Depending +// on histogram options and whether they are enabled, histogram metrics are +// also registered. func (m *ServerMetrics) Register(r prom.Registerer) error { err := r.Register(m.serverStartedCounter) if err != nil { @@ -129,6 +145,7 @@ func (m *ServerMetrics) Register(r prom.Registerer) error { return nil } +// MustRegister tries to register all server metrics and panics on an error. func (m *ServerMetrics) MustRegister(r prom.Registerer) { err := m.Register(r) if err != nil {