runtime/pprof: plumb labels for goroutine profiles

Goroutines are directly associated with labels. It's relatively easy to plumb those through without creating goroutine-locals in the wild. This is accomplished by splitting out most of the code from the public `runtime.GoroutineProfile` into a new unexported `runtime.goroutineProfileWithLabels`, which then has a thin wrapper linked into the `runtime/pprof` package as `runtime_goroutineProfileWithLabels`. (mirroring the way labels get associated with the `g` for a goroutine in the first place) Per-#6104, OS-thread creation profiles are a bit useless, as `M`s tend to be created be created by a background goroutine. As such, I decided not to add support for capturing the labels at `M`-creation-time, since the stack-traces seem to always come out `nil` for my simple test binaries. This change currently provides labels for debug=0 and debug=1, as debug=2 is currently entirely generated by the runtime package and I don't see a clean way of getting the `labelMap` type handled properly within the `runtime` package. Update the comment added in cl/131275 to mention goroutine support for labels. Updates #23458 Change-Id: Ia4b558893d7d10156b77121cd9b70c4ccd9e1889 Reviewed-on: https://go-review.googlesource.com/c/go/+/189318 Reviewed-by: Hyang-Ah Hana Kim <[email protected]>
golang · Apr 22, 2020 · 38c2c12 · 38c2c12
1 parent 0329c91
commit 38c2c12
Show file tree

Hide file tree

Showing 5 changed files with 193 additions and 29 deletions.
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
@@ -711,13 +711,16 @@ func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
 	return
 }
 
-// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
-// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
-// If len(p) < n, GoroutineProfile does not change p and returns n, false.
-//
-// Most clients should use the runtime/pprof package instead
-// of calling GoroutineProfile directly.
-func GoroutineProfile(p []StackRecord) (n int, ok bool) {
+//go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels
+func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
+	return goroutineProfileWithLabels(p, labels)
+}
+
+// labels may be nil. If labels is non-nil, it must have the same length as p.
+func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
+	if labels != nil && len(labels) != len(p) {
+		labels = nil
+	}
 	gp := getg()
 
 	isOK := func(gp1 *g) bool {
@@ -737,7 +740,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) {
 
 	if n <= len(p) {
 		ok = true
-		r := p
+		r, lbl := p, labels
 
 		// Save current goroutine.
 		sp := getcallersp()
@@ -747,6 +750,12 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) {
 		})
 		r = r[1:]
 
+		// If we have a place to put our goroutine labelmap, insert it there.
+		if labels != nil {
+			lbl[0] = gp.labels
+			lbl = lbl[1:]
+		}
+
 		// Save other goroutines.
 		for _, gp1 := range allgs {
 			if isOK(gp1) {
@@ -756,16 +765,30 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) {
 					break
 				}
 				saveg(^uintptr(0), ^uintptr(0), gp1, &r[0])
+				if labels != nil {
+					lbl[0] = gp1.labels
+					lbl = lbl[1:]
+				}
 				r = r[1:]
 			}
 		}
 	}
 
 	startTheWorld()
-
 	return n, ok
 }
 
+// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
+// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
+// If len(p) < n, GoroutineProfile does not change p and returns n, false.
+//
+// Most clients should use the runtime/pprof package instead
+// of calling GoroutineProfile directly.
+func GoroutineProfile(p []StackRecord) (n int, ok bool) {
+
+	return goroutineProfileWithLabels(p, nil)
+}
+
 func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
 	n := gentraceback(pc, sp, 0, gp, 0, &r.Stack0[0], len(r.Stack0), nil, nil, 0)
 	if n < len(r.Stack0) {

diff --git a/src/runtime/pprof/label.go b/src/runtime/pprof/label.go
@@ -6,6 +6,9 @@ package pprof
 
 import (
 	"context"
+	"fmt"
+	"sort"
+	"strings"
 )
 
 type label struct {
@@ -34,6 +37,23 @@ func labelValue(ctx context.Context) labelMap {
 // that admits incremental immutable modification more efficiently.
 type labelMap map[string]string
 
+// String statisfies Stringer and returns key, value pairs in a consistent
+// order.
+func (l *labelMap) String() string {
+	if l == nil {
+		return ""
+	}
+	keyVals := make([]string, 0, len(*l))
+
+	for k, v := range *l {
+		keyVals = append(keyVals, fmt.Sprintf("%q:%q", k, v))
+	}
+
+	sort.Strings(keyVals)
+
+	return "{" + strings.Join(keyVals, ", ") + "}"
+}
+
 // WithLabels returns a new context.Context with the given labels added.
 // A label overwrites a prior label with the same key.
 func WithLabels(ctx context.Context, labels LabelSet) context.Context {
@@ -54,7 +74,8 @@ func WithLabels(ctx context.Context, labels LabelSet) context.Context {
 // Labels takes an even number of strings representing key-value pairs
 // and makes a LabelSet containing them.
 // A label overwrites a prior label with the same key.
-// Currently only CPU profile utilizes labels information.
+// Currently only the CPU and goroutine profiles utilize any labels
+// information.
 // See https://golang.org/issue/23458 for details.
 func Labels(args ...string) LabelSet {
 	if len(args)%2 != 0 {

diff --git a/src/runtime/pprof/label_test.go b/src/runtime/pprof/label_test.go
@@ -80,3 +80,35 @@ func TestContextLabels(t *testing.T) {
 		t.Errorf("(sorted) labels on context: got %v, want %v", gotLabels, wantLabels)
 	}
 }
+
+func TestLabelMapStringer(t *testing.T) {
+	for _, tbl := range []struct {
+		m        labelMap
+		expected string
+	}{
+		{
+			m: labelMap{
+				// empty map
+			},
+			expected: "{}",
+		}, {
+			m: labelMap{
+				"foo": "bar",
+			},
+			expected: `{"foo":"bar"}`,
+		}, {
+			m: labelMap{
+				"foo":             "bar",
+				"key1":            "value1",
+				"key2":            "value2",
+				"key3":            "value3",
+				"key4WithNewline": "\nvalue4",
+			},
+			expected: `{"foo":"bar", "key1":"value1", "key2":"value2", "key3":"value3", "key4WithNewline":"\nvalue4"}`,
+		},
+	} {
+		if got := tbl.m.String(); tbl.expected != got {
+			t.Errorf("%#v.String() = %q; want %q", tbl.m, got, tbl.expected)
+		}
+	}
+}
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
@@ -357,6 +357,7 @@ type stackProfile [][]uintptr
 
 func (x stackProfile) Len() int              { return len(x) }
 func (x stackProfile) Stack(i int) []uintptr { return x[i] }
+func (x stackProfile) Label(i int) *labelMap { return nil }
 
 // A countProfile is a set of stack traces to be printed as counts
 // grouped by stack trace. There are multiple implementations:
@@ -365,6 +366,7 @@ func (x stackProfile) Stack(i int) []uintptr { return x[i] }
 type countProfile interface {
 	Len() int
 	Stack(i int) []uintptr
+	Label(i int) *labelMap
 }
 
 // printCountCycleProfile outputs block profile records (for block or mutex profiles)
@@ -402,20 +404,24 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, scaler fun
 func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
 	// Build count of each stack.
 	var buf bytes.Buffer
-	key := func(stk []uintptr) string {
+	key := func(stk []uintptr, lbls *labelMap) string {
 		buf.Reset()
 		fmt.Fprintf(&buf, "@")
 		for _, pc := range stk {
 			fmt.Fprintf(&buf, " %#x", pc)
 		}
+		if lbls != nil {
+			buf.WriteString("\n# labels: ")
+			buf.WriteString(lbls.String())
+		}
 		return buf.String()
 	}
 	count := map[string]int{}
 	index := map[string]int{}
 	var keys []string
 	n := p.Len()
 	for i := 0; i < n; i++ {
-		k := key(p.Stack(i))
+		k := key(p.Stack(i), p.Label(i))
 		if count[k] == 0 {
 			index[k] = i
 			keys = append(keys, k)
@@ -449,7 +455,16 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
 		// For count profiles, all stack addresses are
 		// return PCs, which is what appendLocsForStack expects.
 		locs = b.appendLocsForStack(locs[:0], p.Stack(index[k]))
-		b.pbSample(values, locs, nil)
+		idx := index[k]
+		var labels func()
+		if p.Label(idx) != nil {
+			labels = func() {
+				for k, v := range *p.Label(idx) {
+					b.pbLabel(tagSample_Label, k, v, 0)
+				}
+			}
+		}
+		b.pbSample(values, locs, labels)
 	}
 	b.build()
 	return nil
@@ -645,20 +660,28 @@ func countThreadCreate() int {
 
 // writeThreadCreate writes the current runtime ThreadCreateProfile to w.
 func writeThreadCreate(w io.Writer, debug int) error {
-	return writeRuntimeProfile(w, debug, "threadcreate", runtime.ThreadCreateProfile)
+	// Until https://golang.org/issues/6104 is addressed, wrap
+	// ThreadCreateProfile because there's no point in tracking labels when we
+	// don't get any stack-traces.
+	return writeRuntimeProfile(w, debug, "threadcreate", func(p []runtime.StackRecord, _ []unsafe.Pointer) (n int, ok bool) {
+		return runtime.ThreadCreateProfile(p)
+	})
 }
 
 // countGoroutine returns the number of goroutines.
 func countGoroutine() int {
 	return runtime.NumGoroutine()
 }
 
+// runtime_goroutineProfileWithLabels is defined in runtime/mprof.go
+func runtime_goroutineProfileWithLabels(p []runtime.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
+
 // writeGoroutine writes the current runtime GoroutineProfile to w.
 func writeGoroutine(w io.Writer, debug int) error {
 	if debug >= 2 {
 		return writeGoroutineStacks(w)
 	}
-	return writeRuntimeProfile(w, debug, "goroutine", runtime.GoroutineProfile)
+	return writeRuntimeProfile(w, debug, "goroutine", runtime_goroutineProfileWithLabels)
 }
 
 func writeGoroutineStacks(w io.Writer) error {
@@ -682,35 +705,41 @@ func writeGoroutineStacks(w io.Writer) error {
 	return err
 }
 
-func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord) (int, bool)) error {
+func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord, []unsafe.Pointer) (int, bool)) error {
 	// Find out how many records there are (fetch(nil)),
 	// allocate that many records, and get the data.
 	// There's a race—more records might be added between
 	// the two calls—so allocate a few extra records for safety
 	// and also try again if we're very unlucky.
 	// The loop should only execute one iteration in the common case.
 	var p []runtime.StackRecord
-	n, ok := fetch(nil)
+	var labels []unsafe.Pointer
+	n, ok := fetch(nil, nil)
 	for {
 		// Allocate room for a slightly bigger profile,
 		// in case a few more entries have been added
 		// since the call to ThreadProfile.
 		p = make([]runtime.StackRecord, n+10)
-		n, ok = fetch(p)
+		labels = make([]unsafe.Pointer, n+10)
+		n, ok = fetch(p, labels)
 		if ok {
 			p = p[0:n]
 			break
 		}
 		// Profile grew; try again.
 	}
 
-	return printCountProfile(w, debug, name, runtimeProfile(p))
+	return printCountProfile(w, debug, name, &runtimeProfile{p, labels})
 }
 
-type runtimeProfile []runtime.StackRecord
+type runtimeProfile struct {
+	stk    []runtime.StackRecord
+	labels []unsafe.Pointer
+}
 
-func (p runtimeProfile) Len() int              { return len(p) }
-func (p runtimeProfile) Stack(i int) []uintptr { return p[i].Stack() }
+func (p *runtimeProfile) Len() int              { return len(p.stk) }
+func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack() }
+func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) }
 
 var cpu struct {
 	sync.Mutex