Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: use prometheus histogram and summary interfaces #8808

Merged
merged 22 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ae1af08
metrics: swap remaining VictoriaMetrics usages with erigon-lib/metrics
taratorio Nov 17, 2023
a6b38d5
Merge branch 'devel' of github.com:ledgerwatch/erigon into swap-vm-wi…
taratorio Nov 17, 2023
e224408
fix lint
taratorio Nov 17, 2023
932082b
metrics: remove VictoriaMetrics/metrics lib
taratorio Nov 17, 2023
c52cfc5
metrics: correctly use prometheus client counter and gauge interfaces
taratorio Nov 20, 2023
01a8131
Merge branch 'devel' of github.com:ledgerwatch/erigon into remove-vic…
taratorio Nov 20, 2023
f778e98
fix lint go mod tidy
taratorio Nov 20, 2023
6472839
Merge branch 'remove-victoria-metrics' of github.com:ledgerwatch/erig…
taratorio Nov 20, 2023
d802ad8
fix lint
taratorio Nov 20, 2023
1bd4e41
better ux for floats
taratorio Nov 20, 2023
ccb212b
more ux improvements
taratorio Nov 20, 2023
83ee8bb
remove unnecessary
taratorio Nov 20, 2023
41df846
metrics: use prometheus interfaces for histograms and summaries
taratorio Nov 20, 2023
b39f163
final touch
taratorio Nov 20, 2023
273ae03
Merge branch 'metrics-improve-interfaces' of github.com:ledgerwatch/e…
taratorio Nov 20, 2023
65db37e
metrics: use prometheus interfaces for histograms and summaries (#8798)
taratorio Nov 21, 2023
51671ef
Revert "metrics: use prometheus interfaces for histograms and summari…
taratorio Nov 21, 2023
a2101b9
Merge branch 'metrics-improve-interfaces' of github.com:ledgerwatch/e…
taratorio Nov 21, 2023
382547a
Merge branch 'devel' of github.com:ledgerwatch/erigon into metrics-im…
taratorio Nov 24, 2023
747b9b8
Merge branch 'metrics-improve-interfaces' of github.com:ledgerwatch/e…
taratorio Nov 24, 2023
a78ae24
Merge branch 'devel' of github.com:ledgerwatch/erigon into metrics-im…
taratorio Nov 24, 2023
fb5ef2c
use ptr receiver
taratorio Nov 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cl/phase1/core/state/ssz.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ func (b *CachingBeaconState) EncodeSSZ(buf []byte) ([]byte, error) {
}
h.PutSince()
sz := metrics.NewHistTimer("encode_ssz_beacon_state_size")
sz.Update(float64(len(bts)))
sz.Observe(float64(len(bts)))
return bts, err
}

Expand All @@ -23,7 +23,7 @@ func (b *CachingBeaconState) DecodeSSZ(buf []byte, version int) error {
return err
}
sz := metrics.NewHistTimer("decode_ssz_beacon_state_size")
sz.Update(float64(len(buf)))
sz.Observe(float64(len(buf)))
h.PutSince()
return b.initBeaconState()
}
Expand Down
2 changes: 1 addition & 1 deletion consensus/bor/heimdall/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,5 @@ func sendMetrics(ctx context.Context, start time.Time, isSuccessful bool) {
}

meters.request[isSuccessful].Set(1)
meters.timer.UpdateDuration(start)
meters.timer.ObserveDuration(start)
}
10 changes: 4 additions & 6 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,16 @@ package core

import (
"fmt"
"github.com/ledgerwatch/erigon-lib/metrics"
"time"

"github.com/ledgerwatch/log/v3"
"golang.org/x/crypto/sha3"
"golang.org/x/exp/slices"

"github.com/ledgerwatch/log/v3"

"github.com/ledgerwatch/erigon-lib/chain"
libcommon "github.com/ledgerwatch/erigon-lib/common"
"github.com/ledgerwatch/erigon-lib/common/cmp"

"github.com/ledgerwatch/erigon-lib/metrics"
"github.com/ledgerwatch/erigon/common/math"
"github.com/ledgerwatch/erigon/common/u256"
"github.com/ledgerwatch/erigon/consensus"
Expand All @@ -42,7 +40,7 @@ import (
)

var (
BlockExecutionTimer = metrics.GetOrCreateSummary("chain_execution_seconds")
blockExecutionTimer = metrics.GetOrCreateSummary("chain_execution_seconds")
)

type SyncMode string
Expand Down Expand Up @@ -85,7 +83,7 @@ func ExecuteBlockEphemerally(
logger log.Logger,
) (*EphemeralExecResult, error) {

defer BlockExecutionTimer.UpdateDuration(time.Now())
defer blockExecutionTimer.ObserveDuration(time.Now())
block.Uncles()
ibs := state.New(stateReader)
header := block.Header()
Expand Down
10 changes: 5 additions & 5 deletions erigon-lib/kv/mdbx/kv_mdbx.go
Original file line number Diff line number Diff line change
Expand Up @@ -833,12 +833,12 @@ func (tx *MdbxTx) Commit() error {
}

if tx.db.opts.label == kv.ChainDB {
kv.DbCommitPreparation.Update(latency.Preparation.Seconds())
kv.DbCommitPreparation.Observe(latency.Preparation.Seconds())
//kv.DbCommitAudit.Update(latency.Audit.Seconds())
kv.DbCommitWrite.Update(latency.Write.Seconds())
kv.DbCommitSync.Update(latency.Sync.Seconds())
kv.DbCommitEnding.Update(latency.Ending.Seconds())
kv.DbCommitTotal.Update(latency.Whole.Seconds())
kv.DbCommitWrite.Observe(latency.Write.Seconds())
kv.DbCommitSync.Observe(latency.Sync.Seconds())
kv.DbCommitEnding.Observe(latency.Ending.Seconds())
kv.DbCommitTotal.Observe(latency.Whole.Seconds())

//kv.DbGcWorkPnlMergeTime.Update(latency.GCDetails.WorkPnlMergeTime.Seconds())
//kv.DbGcWorkPnlMergeVolume.Set(uint64(latency.GCDetails.WorkPnlMergeVolume))
Expand Down
14 changes: 14 additions & 0 deletions erigon-lib/metrics/duration_observer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package metrics

import (
"time"
)

type DurationObserver interface {
// ObserveDuration observes duration since start time
ObserveDuration(start time.Time)
}

func secondsSince(start time.Time) float64 {
return time.Since(start).Seconds()
}
20 changes: 20 additions & 0 deletions erigon-lib/metrics/histogram.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package metrics

import (
"time"

"github.com/prometheus/client_golang/prometheus"
)

type Histogram interface {
prometheus.Histogram
DurationObserver
}

type histogram struct {
prometheus.Summary
}

func (h *histogram) ObserveDuration(start time.Time) {
h.Observe(secondsSince(start))
}
49 changes: 4 additions & 45 deletions erigon-lib/metrics/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,8 @@ package metrics

import (
"fmt"
"time"

"github.com/prometheus/client_golang/prometheus"
)

type Histogram interface {
// UpdateDuration updates request duration based on the given startTime.
UpdateDuration(time.Time)

// Update updates h with v.
//
// Negative values and NaNs are ignored.
Update(float64)
}

type Summary interface {
Histogram
}

// NewCounter registers and returns new counter with the given name.
//
// name must be valid Prometheus-compatible metric with possible labels.
Expand Down Expand Up @@ -105,18 +88,6 @@ func GetOrCreateGauge(name string) Gauge {
return &gauge{g}
}

type summary struct {
prometheus.Summary
}

func (sm summary) UpdateDuration(startTime time.Time) {
sm.Observe(time.Since(startTime).Seconds())
}

func (sm summary) Update(v float64) {
sm.Observe(v)
}

// NewSummary creates and returns new summary with the given name.
//
// name must be valid Prometheus-compatible metric with possible labels.
Expand All @@ -133,7 +104,7 @@ func NewSummary(name string) Summary {
panic(fmt.Errorf("could not create new summary: %w", err))
}

return summary{s}
return &summary{s}
}

// GetOrCreateSummary returns registered summary with the given name
Expand All @@ -156,19 +127,7 @@ func GetOrCreateSummary(name string) Summary {
panic(fmt.Errorf("could not get or create new summary: %w", err))
}

return summary{s}
}

type histogram struct {
prometheus.Histogram
}

func (h histogram) UpdateDuration(startTime time.Time) {
h.Observe(time.Since(startTime).Seconds())
}

func (h histogram) Update(v float64) {
h.Observe(v)
return &summary{s}
}

// NewHistogram creates and returns new histogram with the given name.
Expand All @@ -187,7 +146,7 @@ func NewHistogram(name string) Histogram {
panic(fmt.Errorf("could not create new histogram: %w", err))
}

return histogram{h}
return &histogram{h}
}

// GetOrCreateHistogram returns registered histogram with the given name
Expand All @@ -210,5 +169,5 @@ func GetOrCreateHistogram(name string) Histogram {
panic(fmt.Errorf("could not get or create new histogram: %w", err))
}

return histogram{h}
return &histogram{h}
}
20 changes: 20 additions & 0 deletions erigon-lib/metrics/summary.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package metrics

import (
"time"

"github.com/prometheus/client_golang/prometheus"
)

type Summary interface {
prometheus.Summary
DurationObserver
}

type summary struct {
prometheus.Summary
}

func (s *summary) ObserveDuration(start time.Time) {
s.Observe(secondsSince(start))
}
2 changes: 1 addition & 1 deletion erigon-lib/metrics/timer.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func NewHistTimer(name string) *HistTimer {
}

func (h *HistTimer) PutSince() {
h.Histogram.UpdateDuration(h.start)
h.Histogram.ObserveDuration(h.start)
}

func (h *HistTimer) Tag(pairs ...string) *HistTimer {
Expand Down
20 changes: 10 additions & 10 deletions erigon-lib/state/aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error {
start := time.Now()
collation, err := d.collateStream(ctx, step, txFrom, txTo, d.tx)
mxRunningCollations.Dec()
mxCollateTook.UpdateDuration(start)
mxCollateTook.ObserveDuration(start)

//mxCollationSize.Set(uint64(collation.valuesComp.Count()))
mxCollationSizeHist.SetInt(collation.historyComp.Count())
Expand Down Expand Up @@ -481,8 +481,8 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error {
mxPruningProgress.Dec()
mxPruningProgress.Dec()

mxPruneTook.Update(d.stats.LastPruneTook.Seconds())
mxPruneHistTook.Update(d.stats.LastPruneHistTook.Seconds())
mxPruneTook.Observe(d.stats.LastPruneTook.Seconds())
mxPruneHistTook.Observe(d.stats.LastPruneHistTook.Seconds())
}

// when domain files are build and db is pruned, we can merge them
Expand All @@ -503,7 +503,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error {
start := time.Now()
collation, err := d.collate(ctx, step*a.aggregationStep, (step+1)*a.aggregationStep, d.tx)
mxRunningCollations.Dec()
mxCollateTook.UpdateDuration(start)
mxCollateTook.ObserveDuration(start)

if err != nil {
return fmt.Errorf("index collation %q has failed: %w", d.filenameBase, err)
Expand All @@ -523,7 +523,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error {
}

mxRunningMerges.Dec()
mxBuildTook.UpdateDuration(start)
mxBuildTook.ObserveDuration(start)

d.integrateFiles(sf, step*a.aggregationStep, (step+1)*a.aggregationStep)

Expand All @@ -547,7 +547,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error {
if err := d.prune(ctx, txFrom, txTo, math.MaxUint64, logEvery); err != nil {
return err
}
mxPruneTook.UpdateDuration(startPrune)
mxPruneTook.ObserveDuration(startPrune)
mxPruningProgress.Dec()
}

Expand All @@ -565,7 +565,7 @@ func (a *Aggregator) aggregate(ctx context.Context, step uint64) error {
"range", fmt.Sprintf("%.2fM-%.2fM", float64(txFrom)/10e5, float64(txTo)/10e5),
"took", time.Since(stepStartedAt))

mxStepTook.UpdateDuration(stepStartedAt)
mxStepTook.ObserveDuration(stepStartedAt)

return nil
}
Expand Down Expand Up @@ -601,7 +601,7 @@ func (a *Aggregator) mergeLoopStep(ctx context.Context, maxEndTxNum uint64, work
closeAll = false

for _, s := range []DomainStats{a.accounts.stats, a.code.stats, a.storage.stats} {
mxBuildTook.Update(s.LastFileBuildingTook.Seconds())
mxBuildTook.Observe(s.LastFileBuildingTook.Seconds())
}

a.logger.Info("[stat] finished merge step",
Expand Down Expand Up @@ -855,9 +855,9 @@ func (a *Aggregator) ComputeCommitment(saveStateAfter, trace bool) (rootHash []b
}

mxCommitmentKeys.AddUint64(a.commitment.comKeys)
mxCommitmentTook.Update(a.commitment.comTook.Seconds())
mxCommitmentTook.Observe(a.commitment.comTook.Seconds())

defer func(t time.Time) { mxCommitmentWriteTook.UpdateDuration(t) }(time.Now())
defer func(t time.Time) { mxCommitmentWriteTook.ObserveDuration(t) }(time.Now())

for pref, update := range branchNodeUpdates {
prefix := []byte(pref)
Expand Down
2 changes: 1 addition & 1 deletion erigon-lib/state/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ func (d *Domain) aggregate(ctx context.Context, step uint64, txFrom, txTo uint64
start := time.Now()
collation, err := d.collateStream(ctx, step, txFrom, txTo, tx)
mxRunningCollations.Dec()
mxCollateTook.UpdateDuration(start)
mxCollateTook.ObserveDuration(start)

mxCollationSize.SetInt(collation.valuesComp.Count())
mxCollationSizeHist.SetInt(collation.historyComp.Count())
Expand Down
12 changes: 6 additions & 6 deletions erigon-lib/txpool/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ func (p *TxPool) OnNewBlock(ctx context.Context, stateChanges *remote.StateChang
return err
}

defer newBlockTimer.UpdateDuration(time.Now())
defer newBlockTimer.ObserveDuration(time.Now())
//t := time.Now()

coreDB, cache := p.coreDBWithCache()
Expand Down Expand Up @@ -414,7 +414,7 @@ func (p *TxPool) processRemoteTxs(ctx context.Context) error {
return fmt.Errorf("txpool not started yet")
}

defer processBatchTxsTimer.UpdateDuration(time.Now())
defer processBatchTxsTimer.ObserveDuration(time.Now())
coreDB, cache := p.coreDBWithCache()
coreTx, err := coreDB.BeginRo(ctx)
if err != nil {
Expand Down Expand Up @@ -718,7 +718,7 @@ func (p *TxPool) AddRemoteTxs(_ context.Context, newTxs types.TxSlots) {
return
}

defer addRemoteTxsTimer.UpdateDuration(time.Now())
defer addRemoteTxsTimer.ObserveDuration(time.Now())
p.lock.Lock()
defer p.lock.Unlock()
for i, txn := range newTxs.Txs {
Expand Down Expand Up @@ -1702,7 +1702,7 @@ func MainLoop(ctx context.Context, db kv.RwDB, coreDB kv.RoDB, p *TxPool, newTxs
if announcements.Len() == 0 {
return
}
defer propagateNewTxsTimer.UpdateDuration(time.Now())
defer propagateNewTxsTimer.ObserveDuration(time.Now())

announcements = announcements.DedupCopy()

Expand Down Expand Up @@ -1804,7 +1804,7 @@ func MainLoop(ctx context.Context, db kv.RwDB, coreDB kv.RoDB, p *TxPool, newTxs
var sizes []uint32
types, sizes, hashes = p.AppendAllAnnouncements(types, sizes, hashes[:0])
go send.PropagatePooledTxsToPeersList(newPeers, types, sizes, hashes)
propagateToNewPeerTimer.UpdateDuration(t)
propagateToNewPeerTimer.ObserveDuration(t)
}
}
}
Expand All @@ -1830,7 +1830,7 @@ func (p *TxPool) flushNoFsync(ctx context.Context, db kv.RwDB) (written uint64,
}

func (p *TxPool) flush(ctx context.Context, db kv.RwDB) (written uint64, err error) {
defer writeToDBTimer.UpdateDuration(time.Now())
defer writeToDBTimer.ObserveDuration(time.Now())
// 1. get global lock on txpool and flush it to db, without fsync (to release lock asap)
// 2. then fsync db without txpool lock
written, err = p.flushNoFsync(ctx, db)
Expand Down
2 changes: 1 addition & 1 deletion rpc/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ func (h *handler) handleCall(cp *callProc, msg *jsonrpcMessage, stream *jsoniter
if answer != nil && answer.Error != nil {
failedReqeustGauge.Inc()
}
newRPCServingTimerMS(msg.Method, answer == nil || answer.Error == nil).UpdateDuration(start)
newRPCServingTimerMS(msg.Method, answer == nil || answer.Error == nil).ObserveDuration(start)
}
return answer
}
Expand Down
Loading