Skip to content

Commit

Permalink
reduce cost of pipeline stages by pooling maps we create for labels (#…
Browse files Browse the repository at this point in the history
…11484)

If we look at profiling of ingesters in some of our production
deployments during which we have high ingester load from querying we see
that we spend roughly 15-20% of our CPU time on GC related work.

Looking at different `runtime` memory related calls, it looks like ~50%
of all CPU time during Pipeline stage processing is the creation of the
map for labels that is then passed to text template parsing. This change
will make the pipeline processing itself ~15% faster but hopefully will
also reduce (very slightly) the impact of GC in heavy query load
situations.

```
goos: linux
goarch: amd64
pkg: github.com/grafana/loki/pkg/logql/log
cpu: AMD Ryzen 9 5950X 16-Core Processor
                                    │     base     │                mapPool                │
                                    │    sec/op    │    sec/op     vs base                 │
_Pipeline/pipeline_bytes-32           7.749µ ± ∞ ¹   6.464µ ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/pipeline_string-32          7.691µ ± ∞ ¹   6.549µ ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/line_extractor_bytes-32     7.922µ ± ∞ ¹   6.882µ ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/line_extractor_string-32    7.900µ ± ∞ ¹   6.878µ ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/label_extractor_bytes-32    7.952µ ± ∞ ¹   6.976µ ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/label_extractor_string-32   7.940µ ± ∞ ¹   7.102µ ± ∞ ¹        ~ (p=1.000 n=1) ²
geomean                               7.858µ         6.805µ        -13.41%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05

                                    │     base      │                mapPool                 │
                                    │     B/op      │     B/op       vs base                 │
_Pipeline/pipeline_bytes-32           4.963Ki ± ∞ ¹   1.413Ki ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/pipeline_string-32          5.025Ki ± ∞ ¹   1.476Ki ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/line_extractor_bytes-32     5.030Ki ± ∞ ¹   1.478Ki ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/line_extractor_string-32    5.028Ki ± ∞ ¹   1.478Ki ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/label_extractor_bytes-32    5.027Ki ± ∞ ¹   1.478Ki ± ∞ ¹        ~ (p=1.000 n=1) ²
_Pipeline/label_extractor_string-32   5.027Ki ± ∞ ¹   1.478Ki ± ∞ ¹        ~ (p=1.000 n=1) ²
geomean                               5.017Ki         1.466Ki        -70.77%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05

                                    │    base     │               mapPool               │
                                    │  allocs/op  │  allocs/op   vs base                │
_Pipeline/pipeline_bytes-32           37.00 ± ∞ ¹   35.00 ± ∞ ¹       ~ (p=1.000 n=1) ²
_Pipeline/pipeline_string-32          38.00 ± ∞ ¹   36.00 ± ∞ ¹       ~ (p=1.000 n=1) ²
_Pipeline/line_extractor_bytes-32     37.00 ± ∞ ¹   35.00 ± ∞ ¹       ~ (p=1.000 n=1) ²
_Pipeline/line_extractor_string-32    37.00 ± ∞ ¹   35.00 ± ∞ ¹       ~ (p=1.000 n=1) ²
_Pipeline/label_extractor_bytes-32    37.00 ± ∞ ¹   35.00 ± ∞ ¹       ~ (p=1.000 n=1) ²
_Pipeline/label_extractor_string-32   37.00 ± ∞ ¹   35.00 ± ∞ ¹       ~ (p=1.000 n=1) ²
geomean                               37.16         35.16        -5.38%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05
```

---------

Signed-off-by: Callum Styan <[email protected]>
  • Loading branch information
cstyan authored Dec 16, 2023
1 parent be71a80 commit e93243f
Show file tree
Hide file tree
Showing 11 changed files with 251 additions and 6 deletions.
12 changes: 12 additions & 0 deletions pkg/logql/log/base
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
goos: linux
goarch: amd64
pkg: github.com/grafana/loki/pkg/logql/log
cpu: AMD Ryzen 9 5950X 16-Core Processor
Benchmark_Pipeline/pipeline_bytes-32 153394 7749 ns/op 5082 B/op 37 allocs/op
Benchmark_Pipeline/pipeline_string-32 153319 7691 ns/op 5146 B/op 38 allocs/op
Benchmark_Pipeline/line_extractor_bytes-32 148069 7922 ns/op 5151 B/op 37 allocs/op
Benchmark_Pipeline/line_extractor_string-32 149070 7900 ns/op 5149 B/op 37 allocs/op
Benchmark_Pipeline/label_extractor_bytes-32 146860 7952 ns/op 5148 B/op 37 allocs/op
Benchmark_Pipeline/label_extractor_string-32 147686 7940 ns/op 5148 B/op 37 allocs/op
PASS
ok github.com/grafana/loki/pkg/logql/log 190.270s
Binary file added pkg/logql/log/cpu-2.out
Binary file not shown.
14 changes: 9 additions & 5 deletions pkg/logql/log/fmt.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,10 @@ func (lf *LineFormatter) Process(ts int64, line []byte, lbs *LabelsBuilder) ([]b
lf.currentLine = line
lf.currentTs = ts

if err := lf.Template.Execute(lf.buf, lbs.Map()); err != nil {
// map now is taking from a pool
m := lbs.Map()
defer smp.Put(m)
if err := lf.Template.Execute(lf.buf, m); err != nil {
lbs.SetErr(errTemplateFormat)
lbs.SetErrorDetails(err.Error())
return line, true
Expand Down Expand Up @@ -380,7 +383,8 @@ func (lf *LabelsFormatter) Process(ts int64, l []byte, lbs *LabelsBuilder) ([]by
lf.currentLine = l
lf.currentTs = ts

var data interface{}
var m = smp.Get()
defer smp.Put(m)
for _, f := range lf.formats {
if f.Rename {
v, category, ok := lbs.GetWithCategory(f.Value)
Expand All @@ -391,10 +395,10 @@ func (lf *LabelsFormatter) Process(ts int64, l []byte, lbs *LabelsBuilder) ([]by
continue
}
lf.buf.Reset()
if data == nil {
data = lbs.Map()
if len(m) == 0 {
lbs.IntoMap(m)
}
if err := f.tmpl.Execute(lf.buf, data); err != nil {
if err := f.tmpl.Execute(lf.buf, m); err != nil {
lbs.SetErr(errTemplateFormat)
lbs.SetErrorDetails(err.Error())
continue
Expand Down
50 changes: 49 additions & 1 deletion pkg/logql/log/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package log
import (
"fmt"
"sort"
"sync"

"github.com/prometheus/prometheus/model/labels"

Expand Down Expand Up @@ -437,6 +438,52 @@ func (b *LabelsBuilder) UnsortedLabels(buf labels.Labels, categories ...LabelCat
return buf
}

type stringMapPool struct {
pool sync.Pool
}

func newStringMapPool() *stringMapPool {
return &stringMapPool{
pool: sync.Pool{
New: func() interface{} {
return make(map[string]string)
},
},
}
}

func (s *stringMapPool) Get() map[string]string {
m := s.pool.Get().(map[string]string)
clear(m)
return m
}

func (s *stringMapPool) Put(m map[string]string) {
s.pool.Put(m)
}

var smp = newStringMapPool()

// puts labels entries into an existing map, it is up to the caller to
// properly clear the map if it is going to be reused
func (b *LabelsBuilder) IntoMap(m map[string]string) {
if !b.hasDel() && !b.hasAdd() && !b.HasErr() {
if b.baseMap == nil {
b.baseMap = b.base.Map()
for k, v := range b.baseMap {
m[k] = v
}
}
return
}
b.buf = b.UnsortedLabels(b.buf)
// todo should we also cache maps since limited by the result ?
// Maps also don't create a copy of the labels.
for _, l := range b.buf {
m[l.Name] = l.Value
}
}

func (b *LabelsBuilder) Map() map[string]string {
if !b.hasDel() && !b.hasAdd() && !b.HasErr() {
if b.baseMap == nil {
Expand All @@ -447,7 +494,8 @@ func (b *LabelsBuilder) Map() map[string]string {
b.buf = b.UnsortedLabels(b.buf)
// todo should we also cache maps since limited by the result ?
// Maps also don't create a copy of the labels.
res := make(map[string]string, len(b.buf))
res := smp.Get()
clear(res)
for _, l := range b.buf {
res[l.Name] = l.Value
}
Expand Down
12 changes: 12 additions & 0 deletions pkg/logql/log/last
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
goos: linux
goarch: amd64
pkg: github.com/grafana/loki/pkg/logql/log
cpu: AMD Ryzen 9 5950X 16-Core Processor
Benchmark_Pipeline/pipeline_bytes-32 172089 6673 ns/op 1447 B/op 35 allocs/op
Benchmark_Pipeline/pipeline_string-32 179335 6670 ns/op 1511 B/op 36 allocs/op
Benchmark_Pipeline/line_extractor_bytes-32 172724 6853 ns/op 1513 B/op 35 allocs/op
Benchmark_Pipeline/line_extractor_string-32 170238 6866 ns/op 1513 B/op 35 allocs/op
Benchmark_Pipeline/label_extractor_bytes-32 169942 7004 ns/op 1513 B/op 35 allocs/op
Benchmark_Pipeline/label_extractor_string-32 168538 6982 ns/op 1513 B/op 35 allocs/op
PASS
ok github.com/grafana/loki/pkg/logql/log 7.650s
Binary file added pkg/logql/log/log.test
Binary file not shown.
12 changes: 12 additions & 0 deletions pkg/logql/log/mapPool
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
goos: linux
goarch: amd64
pkg: github.com/grafana/loki/pkg/logql/log
cpu: AMD Ryzen 9 5950X 16-Core Processor
Benchmark_Pipeline/pipeline_bytes-32 183506 6356 ns/op 1422 B/op 33 allocs/op
Benchmark_Pipeline/pipeline_string-32 182916 6377 ns/op 1486 B/op 34 allocs/op
Benchmark_Pipeline/line_extractor_bytes-32 173494 6676 ns/op 1489 B/op 33 allocs/op
Benchmark_Pipeline/line_extractor_string-32 175282 6797 ns/op 1489 B/op 33 allocs/op
Benchmark_Pipeline/label_extractor_bytes-32 172417 6954 ns/op 1489 B/op 33 allocs/op
Benchmark_Pipeline/label_extractor_string-32 173976 6749 ns/op 1489 B/op 33 allocs/op
PASS
ok github.com/grafana/loki/pkg/logql/log 7.650s
Loading

0 comments on commit e93243f

Please sign in to comment.