profiler.go

package sentry

import (
	"container/ring"
	"strconv"

	"runtime"
	"sync"
	"sync/atomic"
	"time"

	"github.com/getsentry/sentry-go/internal/traceparser"
)

// Start a profiler that collects samples continuously, with a buffer of up to 30 seconds.
// Later, you can collect a slice from this buffer, producing a Trace.
func startProfiling(startTime time.Time) profiler {
	onProfilerStart()

	p := newProfiler(startTime)

	// Wait for the profiler to finish setting up before returning to the caller.
	started := make(chan struct{})
	go p.run(started)

	if _, ok := <-started; ok {
		return p
	}
	return nil
}

type profiler interface {
	// GetSlice returns a slice of the profiled data between the given times.
	GetSlice(startTime, endTime time.Time) *profilerResult
	Stop(wait bool)
}

type profilerResult struct {
	callerGoID uint64
	trace      *profileTrace
}

func getCurrentGoID() uint64 {
	// We shouldn't panic but let's be super safe.
	defer func() {
		if err := recover(); err != nil {
			Logger.Printf("Profiler panic in getCurrentGoID(): %v\n", err)
		}
	}()

	// Buffer to read the stack trace into. We should be good with a small buffer because we only need the first line.
	var stacksBuffer = make([]byte, 100)
	var n = runtime.Stack(stacksBuffer, false)
	if n > 0 {
		var traces = traceparser.Parse(stacksBuffer[0:n])
		if traces.Length() > 0 {
			var trace = traces.Item(0)
			return trace.GoID()
		}
	}
	return 0
}

const profilerSamplingRateHz = 101 // 101 Hz; not 100 Hz because of the lockstep sampling (https://stackoverflow.com/a/45471031/1181370)
const profilerSamplingRate = time.Second / profilerSamplingRateHz
const stackBufferMaxGrowth = 512 * 1024
const stackBufferLimit = 10 * 1024 * 1024
const profilerRuntimeLimit = 30 // seconds

type profileRecorder struct {
	startTime         time.Time
	stopSignal        chan struct{}
	stopped           int64
	mutex             sync.RWMutex
	testProfilerPanic int64

	// Map from runtime.StackRecord.Stack0 to an index in stacks.
	stackIndexes   map[string]int
	stacks         []profileStack
	newStacks      []profileStack // New stacks created in the current interation.
	stackKeyBuffer []byte

	// Map from runtime.Frame.PC to an index in frames.
	frameIndexes map[string]int
	frames       []*Frame
	newFrames    []*Frame // New frames created in the current interation.

	// We keep a ring buffer of 30 seconds worth of samples, so that we can later slice it.
	// Each bucket is a slice of samples all taken at the same time.
	samplesBucketsHead *ring.Ring

	// Buffer to read current stacks - will grow automatically up to stackBufferLimit.
	stacksBuffer []byte
}

func newProfiler(startTime time.Time) *profileRecorder {
	// Pre-allocate the profile trace for the currently active number of routines & 100 ms worth of samples.
	// Other coefficients are just guesses of what might be a good starting point to avoid allocs on short runs.
	return &profileRecorder{
		startTime:  startTime,
		stopSignal: make(chan struct{}, 1),

		stackIndexes: make(map[string]int, 32),
		stacks:       make([]profileStack, 0, 32),
		newStacks:    make([]profileStack, 0, 32),

		frameIndexes: make(map[string]int, 128),
		frames:       make([]*Frame, 0, 128),
		newFrames:    make([]*Frame, 0, 128),

		samplesBucketsHead: ring.New(profilerRuntimeLimit * profilerSamplingRateHz),

		// A buffer of 2 KiB per goroutine stack looks like a good starting point (empirically determined).
		stacksBuffer: make([]byte, runtime.NumGoroutine()*2048),
	}
}

// This allows us to test whether panic during profiling are handled correctly and don't block execution.
// If the number is lower than 0, profilerGoroutine() will panic immedately.
// If the number is higher than 0, profiler.onTick() will panic when the given samples-set index is being collected.
var testProfilerPanic int64
var profilerRunning int64

func (p *profileRecorder) run(started chan struct{}) {
	// Code backup for manual test debugging:
	// if !atomic.CompareAndSwapInt64(&profilerRunning, 0, 1) {
	// 	panic("Only one profiler can be running at a time")
	// }

	// We shouldn't panic but let's be super safe.
	defer func() {
		if err := recover(); err != nil {
			Logger.Printf("Profiler panic in run(): %v\n", err)
		}
		atomic.StoreInt64(&testProfilerPanic, 0)
		close(started)
		p.stopSignal <- struct{}{}
		atomic.StoreInt64(&p.stopped, 1)
		atomic.StoreInt64(&profilerRunning, 0)
	}()

	p.testProfilerPanic = atomic.LoadInt64(&testProfilerPanic)
	if p.testProfilerPanic < 0 {
		Logger.Printf("Profiler panicking during startup because testProfilerPanic == %v\n", p.testProfilerPanic)
		panic("This is an expected panic in profilerGoroutine() during tests")
	}

	// Collect the first sample immediately.
	p.onTick()

	// Periodically collect stacks, starting after profilerSamplingRate has passed.
	collectTicker := profilerTickerFactory(profilerSamplingRate)
	defer collectTicker.Stop()
	var tickerChannel = collectTicker.TickSource()

	started <- struct{}{}

	for {
		select {
		case <-tickerChannel:
			p.onTick()
			collectTicker.Ticked()
		case <-p.stopSignal:
			return
		}
	}
}

func (p *profileRecorder) Stop(wait bool) {
	if atomic.LoadInt64(&p.stopped) == 1 {
		return
	}
	p.stopSignal <- struct{}{}
	if wait {
		<-p.stopSignal
	}
}

func (p *profileRecorder) GetSlice(startTime, endTime time.Time) *profilerResult {
	// Unlikely edge cases - profiler wasn't running at all or the given times are invalid in relation to each other.
	if p.startTime.After(endTime) || startTime.After(endTime) {
		return nil
	}

	var relativeStartNS = uint64(0)
	if p.startTime.Before(startTime) {
		relativeStartNS = uint64(startTime.Sub(p.startTime).Nanoseconds())
	}
	var relativeEndNS = uint64(endTime.Sub(p.startTime).Nanoseconds())

	samplesCount, bucketsReversed, trace := p.getBuckets(relativeStartNS, relativeEndNS)
	if samplesCount == 0 {
		return nil
	}

	var result = &profilerResult{
		callerGoID: getCurrentGoID(),
		trace:      trace,
	}

	trace.Samples = make([]profileSample, samplesCount)
	trace.ThreadMetadata = make(map[uint64]*profileThreadMetadata, len(bucketsReversed[0].goIDs))
	var s = samplesCount - 1
	for _, bucket := range bucketsReversed {
		var elapsedSinceStartNS = bucket.relativeTimeNS - relativeStartNS
		for i, goID := range bucket.goIDs {
			trace.Samples[s].ElapsedSinceStartNS = elapsedSinceStartNS
			trace.Samples[s].ThreadID = goID
			trace.Samples[s].StackID = bucket.stackIDs[i]
			s--

			if _, goroutineExists := trace.ThreadMetadata[goID]; !goroutineExists {
				trace.ThreadMetadata[goID] = &profileThreadMetadata{
					Name: "Goroutine " + strconv.FormatUint(goID, 10),
				}
			}
		}
	}

	return result
}

// Collect all buckets of samples in the given time range while holding a read lock.
func (p *profileRecorder) getBuckets(relativeStartNS, relativeEndNS uint64) (samplesCount int, buckets []*profileSamplesBucket, trace *profileTrace) {
	p.mutex.RLock()
	defer p.mutex.RUnlock()

	// sampleBucketsHead points at the last stored bucket so it's a good starting point to search backwards for the end.
	var end = p.samplesBucketsHead
	for end.Value != nil && end.Value.(*profileSamplesBucket).relativeTimeNS > relativeEndNS {
		end = end.Prev()
	}

	// Edge case - no items stored before the given endTime.
	if end.Value == nil {
		return 0, nil, nil
	}

	{ // Find the first item after the given startTime.
		var start = end
		var prevBucket *profileSamplesBucket
		samplesCount = 0
		buckets = make([]*profileSamplesBucket, 0, int64((relativeEndNS-relativeStartNS)/uint64(profilerSamplingRate.Nanoseconds()))+1)
		for start.Value != nil {
			var bucket = start.Value.(*profileSamplesBucket)

			// If this bucket's time is before the requests start time, don't collect it (and stop iterating further).
			if bucket.relativeTimeNS < relativeStartNS {
				break
			}

			// If this bucket time is greater than previous the bucket's time, we have exhausted the whole ring buffer
			// before we were able to find the start time. That means the start time is not present and we must break.
			// This happens if the slice duration exceeds the ring buffer capacity.
			if prevBucket != nil && bucket.relativeTimeNS > prevBucket.relativeTimeNS {
				break
			}

			samplesCount += len(bucket.goIDs)
			buckets = append(buckets, bucket)

			start = start.Prev()
			prevBucket = bucket
		}
	}

	// Edge case - if the period requested was too short and we haven't collected enough samples.
	if len(buckets) < 2 {
		return 0, nil, nil
	}

	trace = &profileTrace{
		Frames: p.frames,
		Stacks: p.stacks,
	}
	return samplesCount, buckets, trace
}

func (p *profileRecorder) onTick() {
	elapsedNs := time.Since(p.startTime).Nanoseconds()

	if p.testProfilerPanic > 0 {
		Logger.Printf("Profiler testProfilerPanic == %v\n", p.testProfilerPanic)
		if p.testProfilerPanic == 1 {
			Logger.Println("Profiler panicking onTick()")
			panic("This is an expected panic in Profiler.OnTick() during tests")
		}
		p.testProfilerPanic--
	}

	records := p.collectRecords()
	p.processRecords(uint64(elapsedNs), records)

	// Free up some memory if we don't need such a large buffer anymore.
	if len(p.stacksBuffer) > len(records)*3 {
		p.stacksBuffer = make([]byte, len(records)*3)
	}
}

func (p *profileRecorder) collectRecords() []byte {
	for {
		// Capture stacks for all existing goroutines.
		// Note: runtime.GoroutineProfile() would be better but we can't use it at the moment because
		//       it doesn't give us `gid` for each routine, see https://github.com/golang/go/issues/59663
		n := runtime.Stack(p.stacksBuffer, true)

		// If we couldn't read everything, increase the buffer and try again.
		if n >= len(p.stacksBuffer) && n < stackBufferLimit {
			var newSize = n * 2
			if newSize > n+stackBufferMaxGrowth {
				newSize = n + stackBufferMaxGrowth
			}
			if newSize > stackBufferLimit {
				newSize = stackBufferLimit
			}
			p.stacksBuffer = make([]byte, newSize)
		} else {
			return p.stacksBuffer[0:n]
		}
	}
}

func (p *profileRecorder) processRecords(elapsedNs uint64, stacksBuffer []byte) {
	var traces = traceparser.Parse(stacksBuffer)
	var length = traces.Length()

	// Shouldn't happen but let's be safe and don't store empty buckets.
	if length == 0 {
		return
	}

	var bucket = &profileSamplesBucket{
		relativeTimeNS: elapsedNs,
		stackIDs:       make([]int, length),
		goIDs:          make([]uint64, length),
	}

	// reset buffers
	p.newFrames = p.newFrames[:0]
	p.newStacks = p.newStacks[:0]

	for i := 0; i < length; i++ {
		var stack = traces.Item(i)
		bucket.stackIDs[i] = p.addStackTrace(stack)
		bucket.goIDs[i] = stack.GoID()
	}

	p.mutex.Lock()
	defer p.mutex.Unlock()

	p.stacks = append(p.stacks, p.newStacks...)
	p.frames = append(p.frames, p.newFrames...)

	p.samplesBucketsHead = p.samplesBucketsHead.Next()
	p.samplesBucketsHead.Value = bucket
}

func (p *profileRecorder) addStackTrace(capturedStack traceparser.Trace) int {
	iter := capturedStack.Frames()
	stack := make(profileStack, 0, iter.LengthUpperBound())

	// Originally, we've used `capturedStack.UniqueIdentifier()` as a key but that was incorrect because it also
	// contains function arguments and we want to group stacks by function name and file/line only.
	// Instead, we need to parse frames and we use a list of their indexes as a key.
	// We reuse the same buffer for each stack to avoid allocations; this is a hot spot.
	var expectedBufferLen = cap(stack) * 5 // 4 bytes per frame + 1 byte for space
	if cap(p.stackKeyBuffer) < expectedBufferLen {
		p.stackKeyBuffer = make([]byte, 0, expectedBufferLen)
	} else {
		p.stackKeyBuffer = p.stackKeyBuffer[:0]
	}

	for iter.HasNext() {
		var frame = iter.Next()
		if frameIndex := p.addFrame(frame); frameIndex >= 0 {
			stack = append(stack, frameIndex)

			p.stackKeyBuffer = append(p.stackKeyBuffer, 0) // space

			// The following code is just like binary.AppendUvarint() which isn't yet available in Go 1.18.
			x := uint64(frameIndex) + 1
			for x >= 0x80 {
				p.stackKeyBuffer = append(p.stackKeyBuffer, byte(x)|0x80)
				x >>= 7
			}
			p.stackKeyBuffer = append(p.stackKeyBuffer, byte(x))
		}
	}

	stackIndex, exists := p.stackIndexes[string(p.stackKeyBuffer)]
	if !exists {
		stackIndex = len(p.stacks) + len(p.newStacks)
		p.newStacks = append(p.newStacks, stack)
		p.stackIndexes[string(p.stackKeyBuffer)] = stackIndex
	}

	return stackIndex
}

func (p *profileRecorder) addFrame(capturedFrame traceparser.Frame) int {
	// NOTE: Don't convert to string yet, it's expensive and compiler can avoid it when
	//       indexing into a map (only needs a copy when adding a new key to the map).
	var key = capturedFrame.UniqueIdentifier()

	frameIndex, exists := p.frameIndexes[string(key)]
	if !exists {
		module, function := splitQualifiedFunctionName(string(capturedFrame.Func()))
		file, line := capturedFrame.File()
		frame := newFrame(module, function, string(file), line)
		frameIndex = len(p.frames) + len(p.newFrames)
		p.newFrames = append(p.newFrames, &frame)
		p.frameIndexes[string(key)] = frameIndex
	}
	return frameIndex
}

type profileSamplesBucket struct {
	relativeTimeNS uint64
	stackIDs       []int
	goIDs          []uint64
}

// A Ticker holds a channel that delivers “ticks” of a clock at intervals.
type profilerTicker interface {
	// Stop turns off a ticker. After Stop, no more ticks will be sent.
	Stop()

	// TickSource returns a read-only channel of ticks.
	TickSource() <-chan time.Time

	// Ticked is called by the Profiler after a tick is processed to notify the ticker. Used for testing.
	Ticked()
}

type timeTicker struct {
	*time.Ticker
}

func (t *timeTicker) TickSource() <-chan time.Time {
	return t.C
}

func (t *timeTicker) Ticked() {}

func profilerTickerFactoryDefault(d time.Duration) profilerTicker {
	return &timeTicker{time.NewTicker(d)}
}

// We allow overriding the ticker for tests. CI is terribly flaky
// because the time.Ticker doesn't guarantee regular ticks - they may come (a lot) later than the given interval.
var profilerTickerFactory = profilerTickerFactoryDefault