diff --git a/doc/next/6-stdlib/99-minor/runtime/pprof/43669.md b/doc/next/6-stdlib/99-minor/runtime/pprof/43669.md new file mode 100644 index 0000000000000..119308b46a2d6 --- /dev/null +++ b/doc/next/6-stdlib/99-minor/runtime/pprof/43669.md @@ -0,0 +1,2 @@ +The maximum stack depth for alloc, mutex, block, threadcreate and goroutine +profiles has been raised from 32 to 128 frames. diff --git a/src/cmd/internal/objabi/pkgspecial.go b/src/cmd/internal/objabi/pkgspecial.go index 6c2425d3ffa38..2925896bd8bef 100644 --- a/src/cmd/internal/objabi/pkgspecial.go +++ b/src/cmd/internal/objabi/pkgspecial.go @@ -58,6 +58,7 @@ var runtimePkgs = []string{ "internal/godebugs", "internal/goexperiment", "internal/goos", + "internal/profilerecord", "internal/stringslite", } diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index c83ad23cc6660..067298cf42507 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -45,7 +45,7 @@ var depsRules = ` internal/goarch, internal/godebugs, internal/goexperiment, internal/goos, internal/byteorder, internal/goversion, internal/nettrace, internal/platform, - internal/trace/traceviewer/format, + internal/profilerecord, internal/trace/traceviewer/format, log/internal, unicode/utf8, unicode/utf16, unicode, unsafe; @@ -65,7 +65,8 @@ var depsRules = ` internal/goarch, internal/godebugs, internal/goexperiment, - internal/goos + internal/goos, + internal/profilerecord < internal/bytealg < internal/stringslite < internal/itoa diff --git a/src/internal/profilerecord/profilerecord.go b/src/internal/profilerecord/profilerecord.go new file mode 100644 index 0000000000000..a5efdced8f77c --- /dev/null +++ b/src/internal/profilerecord/profilerecord.go @@ -0,0 +1,28 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package profilerecord holds internal types used to represent profiling +// records with deep stack traces. +// +// TODO: Consider moving this to internal/runtime, see golang.org/issue/65355. +package profilerecord + +type StackRecord struct { + Stack []uintptr +} + +type MemProfileRecord struct { + AllocBytes, FreeBytes int64 + AllocObjects, FreeObjects int64 + Stack []uintptr +} + +func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes } +func (r *MemProfileRecord) InUseObjects() int64 { return r.AllocObjects - r.FreeObjects } + +type BlockProfileRecord struct { + Count int64 + Cycles int64 + Stack []uintptr +} diff --git a/src/runtime/cpuprof.go b/src/runtime/cpuprof.go index b2898ba9094e1..80490aa585ca2 100644 --- a/src/runtime/cpuprof.go +++ b/src/runtime/cpuprof.go @@ -209,8 +209,8 @@ func CPUProfile() []byte { panic("CPUProfile no longer available") } -//go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond -func runtime_pprof_runtime_cyclesPerSecond() int64 { +//go:linkname pprof_cyclesPerSecond +func pprof_cyclesPerSecond() int64 { return ticksPerSecond() } diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index f0e5533cec282..df0f2552af085 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -9,6 +9,7 @@ package runtime import ( "internal/abi" + "internal/profilerecord" "internal/runtime/atomic" "runtime/internal/sys" "unsafe" @@ -56,7 +57,7 @@ const ( // includes inlined frames. We may record more than this many // "physical" frames when using frame pointer unwinding to account // for deferred handling of skipping frames & inline expansion. - maxLogicalStack = 32 + maxLogicalStack = 128 // maxSkip is to account for deferred inline expansion // when using frame pointer unwinding. We record the stack // with "physical" frame pointers but handle skipping "logical" @@ -445,7 +446,16 @@ func mProf_PostSweep() { // Called by malloc to record a profiled block. func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) { - nstk := callers(4, mp.profStack) + if mp.profStack == nil { + // mp.profStack is nil if we happen to sample an allocation during the + // initialization of mp. This case is rare, so we just ignore such + // allocations. Change MemProfileRate to 1 if you need to reproduce such + // cases for testing purposes. + return + } + // Only use the part of mp.profStack we need and ignore the extra space + // reserved for delayed inline expansion with frame pointer unwinding. + nstk := callers(4, mp.profStack[:maxLogicalStack]) index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) b := stkbucket(memProfile, size, mp.profStack[:nstk], true) @@ -536,7 +546,6 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) { print("requested skip=", skip) throw("invalid skip value") } - gp := getg() mp := acquirem() // we must not be preempted while accessing profstack nstk := 1 @@ -937,6 +946,16 @@ func (r *MemProfileRecord) Stack() []uintptr { // the testing package's -test.memprofile flag instead // of calling MemProfile directly. func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { + return memProfileInternal(len(p), inuseZero, func(r profilerecord.MemProfileRecord) { + copyMemProfileRecord(&p[0], r) + p = p[1:] + }) +} + +// memProfileInternal returns the number of records n in the profile. If there +// are less than size records, copyFn is invoked for each record, and ok returns +// true. +func memProfileInternal(size int, inuseZero bool, copyFn func(profilerecord.MemProfileRecord)) (n int, ok bool) { cycle := mProfCycle.read() // If we're between mProf_NextCycle and mProf_Flush, take care // of flushing to the active profile so we only have to look @@ -976,14 +995,19 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { } } } - if n <= len(p) { + if n <= size { ok = true - idx := 0 for b := head; b != nil; b = b.allnext { mp := b.mp() if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { - record(&p[idx], b) - idx++ + r := profilerecord.MemProfileRecord{ + AllocBytes: int64(mp.active.alloc_bytes), + FreeBytes: int64(mp.active.free_bytes), + AllocObjects: int64(mp.active.allocs), + FreeObjects: int64(mp.active.frees), + Stack: b.stk(), + } + copyFn(r) } } } @@ -991,24 +1015,30 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) { return } -// Write b's data to r. -func record(r *MemProfileRecord, b *bucket) { - mp := b.mp() - r.AllocBytes = int64(mp.active.alloc_bytes) - r.FreeBytes = int64(mp.active.free_bytes) - r.AllocObjects = int64(mp.active.allocs) - r.FreeObjects = int64(mp.active.frees) +func copyMemProfileRecord(dst *MemProfileRecord, src profilerecord.MemProfileRecord) { + dst.AllocBytes = src.AllocBytes + dst.FreeBytes = src.FreeBytes + dst.AllocObjects = src.AllocObjects + dst.FreeObjects = src.FreeObjects if raceenabled { - racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile)) + racewriterangepc(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile)) } if msanenabled { - msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) + msanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0)) } if asanenabled { - asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) + asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0)) } - i := copy(r.Stack0[:], b.stk()) - clear(r.Stack0[i:]) + i := copy(dst.Stack0[:], src.Stack) + clear(dst.Stack0[i:]) +} + +//go:linkname pprof_memProfileInternal +func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool) { + return memProfileInternal(len(p), inuseZero, func(r profilerecord.MemProfileRecord) { + p[0] = r + p = p[1:] + }) } func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) { @@ -1037,41 +1067,66 @@ type BlockProfileRecord struct { // the [testing] package's -test.blockprofile flag instead // of calling BlockProfile directly. func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { + return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) { + copyBlockProfileRecord(&p[0], r) + p = p[1:] + }) +} + +// blockProfileInternal returns the number of records n in the profile. If there +// are less than size records, copyFn is invoked for each record, and ok returns +// true. +func blockProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord)) (n int, ok bool) { lock(&profBlockLock) head := (*bucket)(bbuckets.Load()) for b := head; b != nil; b = b.allnext { n++ } - if n <= len(p) { + if n <= size { ok = true for b := head; b != nil; b = b.allnext { bp := b.bp() - r := &p[0] - r.Count = int64(bp.count) + r := profilerecord.BlockProfileRecord{ + Count: int64(bp.count), + Cycles: bp.cycles, + Stack: b.stk(), + } // Prevent callers from having to worry about division by zero errors. // See discussion on http://golang.org/cl/299991. if r.Count == 0 { r.Count = 1 } - r.Cycles = bp.cycles - if raceenabled { - racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile)) - } - if msanenabled { - msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) - } - if asanenabled { - asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0)) - } - i := fpunwindExpand(r.Stack0[:], b.stk()) - clear(r.Stack0[i:]) - p = p[1:] + copyFn(r) } } unlock(&profBlockLock) return } +func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProfileRecord) { + dst.Count = src.Count + dst.Cycles = src.Cycles + if raceenabled { + racewriterangepc(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile)) + } + if msanenabled { + msanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0)) + } + if asanenabled { + asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0)) + } + i := fpunwindExpand(dst.Stack0[:], src.Stack) + clear(dst.Stack0[i:]) +} + +//go:linkname pprof_blockProfileInternal +func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) { + return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) { + p[0] = r + p = p[1:] + }) +} + // MutexProfile returns n, the number of records in the current mutex profile. // If len(p) >= n, MutexProfile copies the profile into p and returns n, true. // Otherwise, MutexProfile does not change p, and returns n, false. @@ -1079,27 +1134,45 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { // Most clients should use the [runtime/pprof] package // instead of calling MutexProfile directly. func MutexProfile(p []BlockProfileRecord) (n int, ok bool) { + return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) { + copyBlockProfileRecord(&p[0], r) + p = p[1:] + }) +} + +// mutexProfileInternal returns the number of records n in the profile. If there +// are less than size records, copyFn is invoked for each record, and ok returns +// true. +func mutexProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord)) (n int, ok bool) { lock(&profBlockLock) head := (*bucket)(xbuckets.Load()) for b := head; b != nil; b = b.allnext { n++ } - if n <= len(p) { + if n <= size { ok = true for b := head; b != nil; b = b.allnext { bp := b.bp() - r := &p[0] - r.Count = int64(bp.count) - r.Cycles = bp.cycles - i := fpunwindExpand(r.Stack0[:], b.stk()) - clear(r.Stack0[i:]) - p = p[1:] + r := profilerecord.BlockProfileRecord{ + Count: int64(bp.count), + Cycles: bp.cycles, + Stack: b.stk(), + } + copyFn(r) } } unlock(&profBlockLock) return } +//go:linkname pprof_mutexProfileInternal +func pprof_mutexProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) { + return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) { + p[0] = r + p = p[1:] + }) +} + // ThreadCreateProfile returns n, the number of records in the thread creation profile. // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true. // If len(p) < n, ThreadCreateProfile does not change p and returns n, false. @@ -1107,28 +1180,45 @@ func MutexProfile(p []BlockProfileRecord) (n int, ok bool) { // Most clients should use the runtime/pprof package instead // of calling ThreadCreateProfile directly. func ThreadCreateProfile(p []StackRecord) (n int, ok bool) { + return threadCreateProfileInternal(len(p), func(r profilerecord.StackRecord) { + copy(p[0].Stack0[:], r.Stack) + p = p[1:] + }) +} + +// threadCreateProfileInternal returns the number of records n in the profile. +// If there are less than size records, copyFn is invoked for each record, and +// ok returns true. +func threadCreateProfileInternal(size int, copyFn func(profilerecord.StackRecord)) (n int, ok bool) { first := (*m)(atomic.Loadp(unsafe.Pointer(&allm))) for mp := first; mp != nil; mp = mp.alllink { n++ } - if n <= len(p) { + if n <= size { ok = true - i := 0 for mp := first; mp != nil; mp = mp.alllink { - p[i].Stack0 = mp.createstack - i++ + r := profilerecord.StackRecord{Stack: mp.createstack[:]} + copyFn(r) } } return } -//go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels -func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +//go:linkname pprof_threadCreateInternal +func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool) { + return threadCreateProfileInternal(len(p), func(r profilerecord.StackRecord) { + p[0] = r + p = p[1:] + }) +} + +//go:linkname pprof_goroutineProfileWithLabels +func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { return goroutineProfileWithLabels(p, labels) } // labels may be nil. If labels is non-nil, it must have the same length as p. -func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { if labels != nil && len(labels) != len(p) { labels = nil } @@ -1140,7 +1230,7 @@ var goroutineProfile = struct { sema uint32 active bool offset atomic.Int64 - records []StackRecord + records []profilerecord.StackRecord labels []unsafe.Pointer }{ sema: 1, @@ -1179,7 +1269,7 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new)) } -func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { if len(p) == 0 { // An empty slice is obviously too small. Return a rough // allocation estimate without bothering to STW. As long as @@ -1192,6 +1282,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point ourg := getg() + pcbuf := makeProfStack() // see saveg() for explanation stw := stopTheWorld(stwGoroutineProfile) // Using gcount while the world is stopped should give us a consistent view // of the number of live goroutines, minus the number of goroutines that are @@ -1218,7 +1309,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point sp := getcallersp() pc := getcallerpc() systemstack(func() { - saveg(pc, sp, ourg, &p[0]) + saveg(pc, sp, ourg, &p[0], pcbuf) }) if labels != nil { labels[0] = ourg.labels @@ -1240,7 +1331,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point if fing != nil { fing.goroutineProfiled.Store(goroutineProfileSatisfied) if readgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) { - doRecordGoroutineProfile(fing) + doRecordGoroutineProfile(fing, pcbuf) } } startTheWorld(stw) @@ -1257,7 +1348,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point // call will start by adding itself to the profile (before the act of // executing can cause any changes in its stack). forEachGRace(func(gp1 *g) { - tryRecordGoroutineProfile(gp1, Gosched) + tryRecordGoroutineProfile(gp1, pcbuf, Gosched) }) stw = stopTheWorld(stwGoroutineProfileCleanup) @@ -1301,13 +1392,13 @@ func tryRecordGoroutineProfileWB(gp1 *g) { if getg().m.p.ptr() == nil { throw("no P available, write barriers are forbidden") } - tryRecordGoroutineProfile(gp1, osyield) + tryRecordGoroutineProfile(gp1, nil, osyield) } // tryRecordGoroutineProfile ensures that gp1 has the appropriate representation // in the current goroutine profile: either that it should not be profiled, or // that a snapshot of its call stack and labels are now in the profile. -func tryRecordGoroutineProfile(gp1 *g, yield func()) { +func tryRecordGoroutineProfile(gp1 *g, pcbuf []uintptr, yield func()) { if readgstatus(gp1) == _Gdead { // Dead goroutines should not appear in the profile. Goroutines that // start while profile collection is active will get goroutineProfiled @@ -1342,7 +1433,7 @@ func tryRecordGoroutineProfile(gp1 *g, yield func()) { // in this limbo. mp := acquirem() if gp1.goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) { - doRecordGoroutineProfile(gp1) + doRecordGoroutineProfile(gp1, pcbuf) gp1.goroutineProfiled.Store(goroutineProfileSatisfied) } releasem(mp) @@ -1356,7 +1447,7 @@ func tryRecordGoroutineProfile(gp1 *g, yield func()) { // goroutine that is coordinating the goroutine profile (running on its own // stack), or from the scheduler in preparation to execute gp1 (running on the // system stack). -func doRecordGoroutineProfile(gp1 *g) { +func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) { if readgstatus(gp1) == _Grunning { print("doRecordGoroutineProfile gp1=", gp1.goid, "\n") throw("cannot read stack of running goroutine") @@ -1379,14 +1470,14 @@ func doRecordGoroutineProfile(gp1 *g) { // set gp1.goroutineProfiled to goroutineProfileInProgress and so are still // preventing it from being truly _Grunnable. So we'll use the system stack // to avoid schedule delays. - systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset]) }) + systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset], pcbuf) }) if goroutineProfile.labels != nil { goroutineProfile.labels[offset] = gp1.labels } } -func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { gp := getg() isOK := func(gp1 *g) bool { @@ -1395,6 +1486,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false) } + pcbuf := makeProfStack() // see saveg() for explanation stw := stopTheWorld(stwGoroutineProfile) // World is stopped, no locking required. @@ -1413,7 +1505,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n sp := getcallersp() pc := getcallerpc() systemstack(func() { - saveg(pc, sp, gp, &r[0]) + saveg(pc, sp, gp, &r[0], pcbuf) }) r = r[1:] @@ -1438,7 +1530,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n // The world is stopped, so it cannot use cgocall (which will be // blocked at exitsyscall). Do it on the system stack so it won't // call into the schedular (see traceback.go:cgoContextPCs). - systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) }) + systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0], pcbuf) }) if labels != nil { lbl[0] = gp1.labels lbl = lbl[1:] @@ -1462,17 +1554,41 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n // Most clients should use the [runtime/pprof] package instead // of calling GoroutineProfile directly. func GoroutineProfile(p []StackRecord) (n int, ok bool) { + records := make([]profilerecord.StackRecord, len(p)) + n, ok = goroutineProfileInternal(records) + if !ok { + return + } + for i, mr := range records[0:n] { + copy(p[i].Stack0[:], mr.Stack) + } + return +} +func goroutineProfileInternal(p []profilerecord.StackRecord) (n int, ok bool) { return goroutineProfileWithLabels(p, nil) } -func saveg(pc, sp uintptr, gp *g, r *StackRecord) { +func saveg(pc, sp uintptr, gp *g, r *profilerecord.StackRecord, pcbuf []uintptr) { + // To reduce memory usage, we want to allocate a r.Stack that is just big + // enough to hold gp's stack trace. Naively we might achieve this by + // recording our stack trace into mp.profStack, and then allocating a + // r.Stack of the right size. However, mp.profStack is also used for + // allocation profiling, so it could get overwritten if the slice allocation + // gets profiled. So instead we record the stack trace into a temporary + // pcbuf which is usually given to us by our caller. When it's not, we have + // to allocate one here. This will only happen for goroutines that were in a + // syscall when the goroutine profile started or for goroutines that manage + // to execute before we finish iterating over all the goroutines. + if pcbuf == nil { + pcbuf = makeProfStack() + } + var u unwinder u.initAt(pc, sp, 0, gp, unwindSilentErrors) - n := tracebackPCs(&u, 0, r.Stack0[:]) - if n < len(r.Stack0) { - r.Stack0[n] = 0 - } + n := tracebackPCs(&u, 0, pcbuf) + r.Stack = make([]uintptr, n) + copy(r.Stack, pcbuf) } // Stack formats a stack trace of the calling goroutine into buf diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index e352b39caf5de..0ef217eef8f16 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -76,6 +76,7 @@ import ( "bufio" "fmt" "internal/abi" + "internal/profilerecord" "io" "runtime" "sort" @@ -411,7 +412,7 @@ type countProfile interface { // as the pprof-proto format output. Translations from cycle count to time duration // are done because The proto expects count and time (nanoseconds) instead of count // and the number of cycles for block, contention profiles. -func printCountCycleProfile(w io.Writer, countName, cycleName string, records []runtime.BlockProfileRecord) error { +func printCountCycleProfile(w io.Writer, countName, cycleName string, records []profilerecord.BlockProfileRecord) error { // Output profile in protobuf form. b := newProfileBuilder(w) b.pbValueType(tagProfile_PeriodType, countName, "count") @@ -419,16 +420,18 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, records [] b.pbValueType(tagProfile_SampleType, countName, "count") b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds") - cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9 + cpuGHz := float64(pprof_cyclesPerSecond()) / 1e9 values := []int64{0, 0} var locs []uint64 + expandedStack := pprof_makeProfStack() for _, r := range records { values[0] = r.Count values[1] = int64(float64(r.Cycles) / cpuGHz) // For count profiles, all stack addresses are // return PCs, which is what appendLocsForStack expects. - locs = b.appendLocsForStack(locs[:0], r.Stack()) + n := pprof_fpunwindExpand(expandedStack[:], r.Stack) + locs = b.appendLocsForStack(locs[:0], expandedStack[:n]) b.pbSample(values, locs, nil) } b.build() @@ -593,14 +596,14 @@ func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error { // the two calls—so allocate a few extra records for safety // and also try again if we're very unlucky. // The loop should only execute one iteration in the common case. - var p []runtime.MemProfileRecord - n, ok := runtime.MemProfile(nil, true) + var p []profilerecord.MemProfileRecord + n, ok := pprof_memProfileInternal(nil, true) for { // Allocate room for a slightly bigger profile, // in case a few more entries have been added // since the call to MemProfile. - p = make([]runtime.MemProfileRecord, n+50) - n, ok = runtime.MemProfile(p, true) + p = make([]profilerecord.MemProfileRecord, n+50) + n, ok = pprof_memProfileInternal(p, true) if ok { p = p[0:n] break @@ -654,11 +657,11 @@ func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error { fmt.Fprintf(w, "%d: %d [%d: %d] @", r.InUseObjects(), r.InUseBytes(), r.AllocObjects, r.AllocBytes) - for _, pc := range r.Stack() { + for _, pc := range r.Stack { fmt.Fprintf(w, " %#x", pc) } fmt.Fprintf(w, "\n") - printStackRecord(w, r.Stack(), false) + printStackRecord(w, r.Stack, false) } // Print memstats information too. @@ -713,8 +716,8 @@ func writeThreadCreate(w io.Writer, debug int) error { // Until https://golang.org/issues/6104 is addressed, wrap // ThreadCreateProfile because there's no point in tracking labels when we // don't get any stack-traces. - return writeRuntimeProfile(w, debug, "threadcreate", func(p []runtime.StackRecord, _ []unsafe.Pointer) (n int, ok bool) { - return runtime.ThreadCreateProfile(p) + return writeRuntimeProfile(w, debug, "threadcreate", func(p []profilerecord.StackRecord, _ []unsafe.Pointer) (n int, ok bool) { + return pprof_threadCreateInternal(p) }) } @@ -723,15 +726,12 @@ func countGoroutine() int { return runtime.NumGoroutine() } -// runtime_goroutineProfileWithLabels is defined in runtime/mprof.go -func runtime_goroutineProfileWithLabels(p []runtime.StackRecord, labels []unsafe.Pointer) (n int, ok bool) - // writeGoroutine writes the current runtime GoroutineProfile to w. func writeGoroutine(w io.Writer, debug int) error { if debug >= 2 { return writeGoroutineStacks(w) } - return writeRuntimeProfile(w, debug, "goroutine", runtime_goroutineProfileWithLabels) + return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels) } func writeGoroutineStacks(w io.Writer) error { @@ -755,14 +755,14 @@ func writeGoroutineStacks(w io.Writer) error { return err } -func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord, []unsafe.Pointer) (int, bool)) error { +func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error { // Find out how many records there are (fetch(nil)), // allocate that many records, and get the data. // There's a race—more records might be added between // the two calls—so allocate a few extra records for safety // and also try again if we're very unlucky. // The loop should only execute one iteration in the common case. - var p []runtime.StackRecord + var p []profilerecord.StackRecord var labels []unsafe.Pointer n, ok := fetch(nil, nil) @@ -770,7 +770,7 @@ func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runti // Allocate room for a slightly bigger profile, // in case a few more entries have been added // since the call to ThreadProfile. - p = make([]runtime.StackRecord, n+10) + p = make([]profilerecord.StackRecord, n+10) labels = make([]unsafe.Pointer, n+10) n, ok = fetch(p, labels) if ok { @@ -784,12 +784,12 @@ func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runti } type runtimeProfile struct { - stk []runtime.StackRecord + stk []profilerecord.StackRecord labels []unsafe.Pointer } func (p *runtimeProfile) Len() int { return len(p.stk) } -func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack() } +func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack } func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) } var cpu struct { @@ -894,20 +894,20 @@ func countMutex() int { // writeBlock writes the current blocking profile to w. func writeBlock(w io.Writer, debug int) error { - return writeProfileInternal(w, debug, "contention", runtime.BlockProfile) + return writeProfileInternal(w, debug, "contention", pprof_blockProfileInternal) } // writeMutex writes the current mutex profile to w. func writeMutex(w io.Writer, debug int) error { - return writeProfileInternal(w, debug, "mutex", runtime.MutexProfile) + return writeProfileInternal(w, debug, "mutex", pprof_mutexProfileInternal) } // writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters. -func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]runtime.BlockProfileRecord) (int, bool)) error { - var p []runtime.BlockProfileRecord +func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]profilerecord.BlockProfileRecord) (int, bool)) error { + var p []profilerecord.BlockProfileRecord n, ok := runtimeProfile(nil) for { - p = make([]runtime.BlockProfileRecord, n+50) + p = make([]profilerecord.BlockProfileRecord, n+50) n, ok = runtimeProfile(p) if ok { p = p[:n] @@ -926,19 +926,22 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu w = tw fmt.Fprintf(w, "--- %v:\n", name) - fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) + fmt.Fprintf(w, "cycles/second=%v\n", pprof_cyclesPerSecond()) if name == "mutex" { fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1)) } + expandedStack := pprof_makeProfStack() for i := range p { r := &p[i] fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count) - for _, pc := range r.Stack() { + n := pprof_fpunwindExpand(expandedStack, r.Stack) + stack := expandedStack[:n] + for _, pc := range stack { fmt.Fprintf(w, " %#x", pc) } fmt.Fprint(w, "\n") if debug > 0 { - printStackRecord(w, r.Stack(), true) + printStackRecord(w, stack, true) } } @@ -948,4 +951,26 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu return b.Flush() } -func runtime_cyclesPerSecond() int64 +//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels +func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) + +//go:linkname pprof_cyclesPerSecond runtime.pprof_cyclesPerSecond +func pprof_cyclesPerSecond() int64 + +//go:linkname pprof_memProfileInternal runtime.pprof_memProfileInternal +func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool) + +//go:linkname pprof_blockProfileInternal runtime.pprof_blockProfileInternal +func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) + +//go:linkname pprof_mutexProfileInternal runtime.pprof_mutexProfileInternal +func pprof_mutexProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) + +//go:linkname pprof_threadCreateInternal runtime.pprof_threadCreateInternal +func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool) + +//go:linkname pprof_fpunwindExpand runtime.pprof_fpunwindExpand +func pprof_fpunwindExpand(dst, src []uintptr) int + +//go:linkname pprof_makeProfStack runtime.pprof_makeProfStack +func pprof_makeProfStack() []uintptr diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index 1c92c7e1f4725..e6fa068060758 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -2444,7 +2444,7 @@ func TestProfilerStackDepth(t *testing.T) { runtime.SetMutexProfileFraction(oldMutexRate) }) - const depth = 32 + const depth = 128 go produceProfileEvents(t, depth) awaitBlockedGoroutine(t, "chan receive", "goroutineDeep", 1) diff --git a/src/runtime/pprof/protomem.go b/src/runtime/pprof/protomem.go index fa75a28c6263c..ab3550f43f9a3 100644 --- a/src/runtime/pprof/protomem.go +++ b/src/runtime/pprof/protomem.go @@ -5,6 +5,7 @@ package pprof import ( + "internal/profilerecord" "io" "math" "runtime" @@ -12,7 +13,7 @@ import ( ) // writeHeapProto writes the current heap profile in protobuf format to w. -func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error { +func writeHeapProto(w io.Writer, p []profilerecord.MemProfileRecord, rate int64, defaultSampleType string) error { b := newProfileBuilder(w) b.pbValueType(tagProfile_PeriodType, "space", "bytes") b.pb.int64Opt(tagProfile_Period, rate) @@ -29,7 +30,7 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defau for _, r := range p { hideRuntime := true for tries := 0; tries < 2; tries++ { - stk := r.Stack() + stk := r.Stack // For heap profiles, all stack // addresses are return PCs, which is // what appendLocsForStack expects. diff --git a/src/runtime/pprof/protomem_test.go b/src/runtime/pprof/protomem_test.go index 5fb67c53f6753..8e9732a33156e 100644 --- a/src/runtime/pprof/protomem_test.go +++ b/src/runtime/pprof/protomem_test.go @@ -8,6 +8,7 @@ import ( "bytes" "fmt" "internal/profile" + "internal/profilerecord" "internal/testenv" "runtime" "slices" @@ -24,10 +25,10 @@ func TestConvertMemProfile(t *testing.T) { // from these and get back to addr1 and addr2. a1, a2 := uintptr(addr1)+1, uintptr(addr2)+1 rate := int64(512 * 1024) - rec := []runtime.MemProfileRecord{ - {AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack0: [32]uintptr{a1, a2}}, - {AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack0: [32]uintptr{a2 + 1, a2 + 2}}, - {AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}}, + rec := []profilerecord.MemProfileRecord{ + {AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack: []uintptr{a1, a2}}, + {AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack: []uintptr{a2 + 1, a2 + 2}}, + {AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack: []uintptr{a1 + 1, a1 + 2, a2 + 3}}, } periodType := &profile.ValueType{Type: "space", Unit: "bytes"} diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 418f1c5a6649f..a9d60faa69f9f 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -930,10 +930,30 @@ func mcommoninit(mp *m, id int64) { // malloc and runtime locks for mLockProfile. // TODO(mknyszek): Implement lazy allocation if this becomes a problem. func mProfStackInit(mp *m) { - mp.profStack = make([]uintptr, maxStack) - mp.mLockProfile.stack = make([]uintptr, maxStack) + mp.profStack = makeProfStackFP() + mp.mLockProfile.stack = makeProfStackFP() } +// makeProfStackFP creates a buffer large enough to hold a maximum-sized stack +// trace as well as any additional frames needed for frame pointer unwinding +// with delayed inline expansion. +func makeProfStackFP() []uintptr { + // The "1" term is to account for the first stack entry being + // taken up by a "skip" sentinel value for profilers which + // defer inline frame expansion until the profile is reported. + // The "maxSkip" term is for frame pointer unwinding, where we + // want to end up with debug.profstackdebth frames but will discard + // some "physical" frames to account for skipping. + return make([]uintptr, 1+maxSkip+maxLogicalStack) +} + +// makeProfStack returns a buffer large enough to hold a maximum-sized stack +// trace. +func makeProfStack() []uintptr { return make([]uintptr, maxLogicalStack) } + +//go:linkname pprof_makeProfStack +func pprof_makeProfStack() []uintptr { return makeProfStack() } + func (mp *m) becomeSpinning() { mp.spinning = true sched.nmspinning.Add(1) @@ -3132,7 +3152,7 @@ func execute(gp *g, inheritTime bool) { // Make sure that gp has had its stack written out to the goroutine // profile, exactly as it was when the goroutine profiler first stopped // the world. - tryRecordGoroutineProfile(gp, osyield) + tryRecordGoroutineProfile(gp, nil, osyield) } // Assign gp.m before entering _Grunning so running Gs have an diff --git a/src/runtime/tracestack.go b/src/runtime/tracestack.go index 477526d7cb469..69f6bb974e127 100644 --- a/src/runtime/tracestack.go +++ b/src/runtime/tracestack.go @@ -262,6 +262,11 @@ func fpTracebackPCs(fp unsafe.Pointer, pcBuf []uintptr) (i int) { return i } +//go:linkname pprof_fpunwindExpand +func pprof_fpunwindExpand(dst, src []uintptr) int { + return fpunwindExpand(dst, src) +} + // fpunwindExpand expands a call stack from pcBuf into dst, // returning the number of PCs written to dst. // pcBuf and dst should not overlap.