filecoin-project · iand · Feb 9, 2021 · Feb 1, 2021 · placer14 · Feb 8, 2021
diff --git a/chain/actor.go b/chain/actor.go
@@ -8,9 +8,11 @@ import (
 	"github.com/filecoin-project/go-address"
 	"github.com/filecoin-project/lotus/chain/types"
 	"github.com/ipfs/go-cid"
+	"go.opencensus.io/tag"
 	"golang.org/x/xerrors"
 
 	"github.com/filecoin-project/sentinel-visor/lens"
+	"github.com/filecoin-project/sentinel-visor/metrics"
 	"github.com/filecoin-project/sentinel-visor/model"
 	visormodel "github.com/filecoin-project/sentinel-visor/model/visor"
 	"github.com/filecoin-project/sentinel-visor/tasks/actorstate"
@@ -119,6 +121,8 @@ func (p *ActorStateProcessor) ProcessActors(ctx context.Context, ts *types.TipSe
 }
 
 func (p *ActorStateProcessor) runActorStateExtraction(ctx context.Context, ts *types.TipSet, pts *types.TipSet, addrStr string, act types.Actor, results chan *ActorStateResult) {
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.ActorCode, actorstate.ActorNameByCode(act.Code)))
+
 	res := &ActorStateResult{
 		Code:    act.Code,
 		Head:    act.Head,

diff --git a/chain/indexer.go b/chain/indexer.go
@@ -121,6 +121,8 @@ func NewTipSetIndexer(o lens.APIOpener, d model.Storage, window time.Duration, n
 
 // TipSet is called when a new tipset has been discovered
 func (t *TipSetIndexer) TipSet(ctx context.Context, ts *types.TipSet) error {
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.Name, t.name))
+
 	var cancel func()
 	var tctx context.Context // cancellable context for the task
 	if t.window > 0 {
@@ -285,7 +287,10 @@ func (t *TipSetIndexer) TipSet(ctx context.Context, ts *types.TipSet) error {
 		for task, p := range taskOutputs {
 			go func(task string, p model.Persistable) {
 				defer wg.Done()
+				ctx, _ = tag.New(ctx, tag.Upsert(metrics.TaskType, task))
+
 				if err := t.storage.PersistBatch(ctx, p); err != nil {
+					stats.Record(ctx, metrics.PersistFailure.M(1))
 					ll.Errorw("persistence failed", "task", task, "error", err)
 					return
 				}
@@ -307,6 +312,7 @@ func (t *TipSetIndexer) runProcessor(ctx context.Context, p TipSetProcessor, nam
 
 	data, report, err := p.ProcessTipSet(ctx, ts)
 	if err != nil {
+		stats.Record(ctx, metrics.ProcessingFailure.M(1))
 		results <- &TaskResult{
 			Task:  name,
 			Error: err,
@@ -321,8 +327,14 @@ func (t *TipSetIndexer) runProcessor(ctx context.Context, p TipSetProcessor, nam
 }
 
 func (t *TipSetIndexer) runActorProcessor(ctx context.Context, p ActorProcessor, name string, ts, pts *types.TipSet, actors map[string]types.Actor, results chan *TaskResult) {
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.TaskType, name))
+	stats.Record(ctx, metrics.TipsetHeight.M(int64(ts.Height())))
+	stop := metrics.Timer(ctx, metrics.ProcessingDuration)
+	defer stop()
+
 	data, report, err := p.ProcessActors(ctx, ts, pts, actors)
 	if err != nil {
+		stats.Record(ctx, metrics.ProcessingFailure.M(1))
 		results <- &TaskResult{
 			Task:  name,
 			Error: err,

diff --git a/chain/walker.go b/chain/walker.go
@@ -2,16 +2,15 @@ package chain
 
 import (
 	"context"
+
 	"github.com/filecoin-project/go-state-types/abi"
 	"github.com/filecoin-project/lotus/chain/types"
-	"go.opencensus.io/tag"
 	"go.opentelemetry.io/otel/api/global"
 	"go.opentelemetry.io/otel/api/trace"
 	"go.opentelemetry.io/otel/label"
 	"golang.org/x/xerrors"
 
 	"github.com/filecoin-project/sentinel-visor/lens"
-	"github.com/filecoin-project/sentinel-visor/metrics"
 )
 
 func NewWalker(obs TipSetObserver, opener lens.APIOpener, minHeight, maxHeight int64) *Walker {
@@ -69,8 +68,6 @@ func (c *Walker) WalkChain(ctx context.Context, node lens.API, ts *types.TipSet)
 	ctx, span := global.Tracer("").Start(ctx, "Walker.WalkChain", trace.WithAttributes(label.Int64("height", c.maxHeight)))
 	defer span.End()
 
-	ctx, _ = tag.New(ctx, tag.Upsert(metrics.TaskType, "indexhistoryblock"))
-
 	log.Debugw("found tipset", "height", ts.Height())
 	if err := c.obs.TipSet(ctx, ts); err != nil {
 		return xerrors.Errorf("notify tipset: %w", err)

diff --git a/chain/watcher.go b/chain/watcher.go
@@ -2,15 +2,14 @@ package chain
 
 import (
 	"context"
-	"go.opencensus.io/tag"
+
 	"go.opentelemetry.io/otel/api/global"
 	"golang.org/x/xerrors"
 
 	lotus_api "github.com/filecoin-project/lotus/api"
 	store "github.com/filecoin-project/lotus/chain/store"
 
 	"github.com/filecoin-project/sentinel-visor/lens"
-	"github.com/filecoin-project/sentinel-visor/metrics"
 )
 
 // NewWatcher creates a new Watcher. confidence sets the number of tipsets that will be held
@@ -67,8 +66,6 @@ func (c *Watcher) index(ctx context.Context, headEvents []*lotus_api.HeadChange)
 	ctx, span := global.Tracer("").Start(ctx, "Watcher.index")
 	defer span.End()
 
-	ctx, _ = tag.New(ctx, tag.Upsert(metrics.TaskType, "indexheadblock"))
-
 	for _, ch := range headEvents {
 		switch ch.Type {
 		case store.HCCurrent:

diff --git a/lens/lotus/api.go b/lens/lotus/api.go
@@ -78,6 +78,11 @@ func (aw *APIWrapper) ChainGetParentMessages(ctx context.Context, bcid cid.Cid)
 func (aw *APIWrapper) StateGetReceipt(ctx context.Context, bcid cid.Cid, tsk types.TipSetKey) (*types.MessageReceipt, error) {
 	ctx, span := global.Tracer("").Start(ctx, "Lotus.StateGetReceipt")
 	defer span.End()
+
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.API, "StateGetReceipt"))
+	stop := metrics.Timer(ctx, metrics.LensRequestDuration)
+	defer stop()
+
 	return aw.FullNode.StateGetReceipt(ctx, bcid, tsk)
 }
 
@@ -186,12 +191,21 @@ func (aw *APIWrapper) StateReadState(ctx context.Context, actor address.Address,
 func (aw *APIWrapper) StateVMCirculatingSupplyInternal(ctx context.Context, tsk types.TipSetKey) (api.CirculatingSupply, error) {
 	ctx, span := global.Tracer("").Start(ctx, "Lotus.StateCirculatingSupply")
 	defer span.End()
+
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.API, "StateVMCirculatingSupplyInternal"))
+	stop := metrics.Timer(ctx, metrics.LensRequestDuration)
+	defer stop()
+
 	return aw.FullNode.StateVMCirculatingSupplyInternal(ctx, tsk)
 }
 
 // GetExecutedMessagesForTipset returns a list of messages sent as part of pts (parent) with receipts found in ts (child).
 // No attempt at deduplication of messages is made.
 func (aw *APIWrapper) GetExecutedMessagesForTipset(ctx context.Context, ts, pts *types.TipSet) ([]*lens.ExecutedMessage, error) {
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.API, "GetExecutedMessagesForTipset"))
+	stop := metrics.Timer(ctx, metrics.LensRequestDuration)
+	defer stop()
+
 	if !types.CidArrsEqual(ts.Parents().Cids(), pts.Cids()) {
 		return nil, xerrors.Errorf("child tipset (%s) is not on the same chain as parent (%s)", ts.Key(), pts.Key())
 	}

diff --git a/lens/lotus/cachestore.go b/lens/lotus/cachestore.go
@@ -6,9 +6,11 @@ import (
 	"fmt"
 
 	"github.com/filecoin-project/lotus/api"
+	"github.com/filecoin-project/sentinel-visor/metrics"
 	lru "github.com/hashicorp/golang-lru"
 	"github.com/ipfs/go-cid"
 	cbg "github.com/whyrusleeping/cbor-gen"
+	"go.opencensus.io/tag"
 	"go.opentelemetry.io/otel/api/global"
 	"golang.org/x/xerrors"
 )
@@ -34,6 +36,7 @@ func (cs *CacheCtxStore) Context() context.Context {
 func (cs *CacheCtxStore) Get(ctx context.Context, c cid.Cid, out interface{}) error {
 	ctx, span := global.Tracer("").Start(ctx, "CacheCtxStore.Get")
 	defer span.End()
+
 	cu, ok := out.(cbg.CBORUnmarshaler)
 	if !ok {
 		return fmt.Errorf("out parameter does not implement CBORUnmarshaler")
@@ -45,6 +48,10 @@ func (cs *CacheCtxStore) Get(ctx context.Context, c cid.Cid, out interface{}) er
 		return cu.UnmarshalCBOR(bytes.NewReader(v.([]byte)))
 	}
 
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.API, "ChainReadObj"))
+	stop := metrics.Timer(ctx, metrics.LensRequestDuration)
+	defer stop()
+
 	// miss :(
 	raw, err := cs.api.ChainReadObj(ctx, c)
 	if err != nil {

diff --git a/metrics/metrics.go b/metrics/metrics.go
@@ -12,87 +12,79 @@ import (
 var defaultMillisecondsDistribution = view.Distribution(0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1, 2, 3, 4, 5, 6, 8, 10, 13, 16, 20, 25, 30, 40, 50, 65, 80, 100, 130, 160, 200, 250, 300, 400, 500, 650, 800, 1000, 2000, 5000, 10000, 20000, 30000, 50000, 100000, 200000, 500000, 1000000, 2000000, 5000000, 10000000, 10000000)
 
 var (
-	Error, _     = tag.NewKey("error")
-	TaskType, _  = tag.NewKey("task")
+	TaskType, _  = tag.NewKey("task")  // name of task processor
+	Name, _      = tag.NewKey("name")  // name of running instance of visor
+	Table, _     = tag.NewKey("table") // name of table data is persisted for
 	ConnState, _ = tag.NewKey("conn_state")
-	State, _     = tag.NewKey("state")
-	API, _       = tag.NewKey("api")
+	API, _       = tag.NewKey("api")        // name of method on lotus api
+	ActorCode, _ = tag.NewKey("actor_code") // human readable code of actor being processed
 )
 
 var (
-	ProcessingDuration      = stats.Float64("processing_duration_ms", "Time taken to process a single item", stats.UnitMilliseconds)
-	PersistDuration         = stats.Float64("persist_duration_ms", "Duration of a models persist operation", stats.UnitMilliseconds)
-	BatchSelectionDuration  = stats.Float64("batch_selection_duration_ms", "Time taken to select a batch of work", stats.UnitMilliseconds)
-	CompletionDuration      = stats.Float64("completion_duration_ms", "Time taken to mark an item as completed", stats.UnitMilliseconds)
-	DBConns                 = stats.Int64("db_conns", "Database connections held", stats.UnitDimensionless)
-	HistoricalIndexerHeight = stats.Int64("historical_sync_height", "Sync height of the historical indexer", stats.UnitDimensionless)
-	EpochsToSync            = stats.Int64("epochs_to_sync", "Epochs yet to sync", stats.UnitDimensionless)
-	LensRequestDuration     = stats.Float64("lens_request_duration_ms", "Duration of lotus api requets", stats.UnitMilliseconds)
-	TipsetHeight            = stats.Int64("tipset_height", "The height of the tipset being processed", stats.UnitDimensionless)
+	ProcessingDuration  = stats.Float64("processing_duration_ms", "Time taken to process a single item", stats.UnitMilliseconds)
+	PersistDuration     = stats.Float64("persist_duration_ms", "Duration of a models persist operation", stats.UnitMilliseconds)
+	DBConns             = stats.Int64("db_conns", "Database connections held", stats.UnitDimensionless)
+	LensRequestDuration = stats.Float64("lens_request_duration_ms", "Duration of lotus api requets", stats.UnitMilliseconds)
+	TipsetHeight        = stats.Int64("tipset_height", "The height of the tipset being processed", stats.UnitDimensionless)
+	ProcessingFailure   = stats.Int64("processing_failure", "Number of processing failures", stats.UnitDimensionless)
+	PersistFailure      = stats.Int64("persist_failure", "Number of persistence failures", stats.UnitDimensionless)
 )
 
 var (
 	ProcessingDurationView = &view.View{
 		Measure:     ProcessingDuration,
 		Aggregation: defaultMillisecondsDistribution,
-		TagKeys:     []tag.Key{TaskType},
+		TagKeys:     []tag.Key{TaskType, ActorCode},
 	}
 	PersistDurationView = &view.View{
 		Measure:     PersistDuration,
 		Aggregation: defaultMillisecondsDistribution,
-		TagKeys:     []tag.Key{TaskType},
-	}
-	BatchSelectionDurationView = &view.View{
-		Measure:     BatchSelectionDuration,
-		Aggregation: defaultMillisecondsDistribution,
-		TagKeys:     []tag.Key{TaskType},
-	}
-	CompletionDurationView = &view.View{
-		Measure:     CompletionDuration,
-		Aggregation: defaultMillisecondsDistribution,
-		TagKeys:     []tag.Key{TaskType},
+		TagKeys:     []tag.Key{TaskType, Table, ActorCode},
 	}
 	DBConnsView = &view.View{
 		Measure:     DBConns,
 		Aggregation: view.Count(),
 		TagKeys:     []tag.Key{ConnState},
 	}
-	HistoricalIndexerHeightView = &view.View{
-		Measure:     HistoricalIndexerHeight,
-		Aggregation: view.Sum(),
-	}
-	EpochsToSyncView = &view.View{
-		Measure:     EpochsToSync,
-		Aggregation: view.LastValue(),
-	}
 	LensRequestDurationView = &view.View{
 		Measure:     LensRequestDuration,
 		Aggregation: defaultMillisecondsDistribution,
-		TagKeys:     []tag.Key{TaskType, API},
+		TagKeys:     []tag.Key{TaskType, API, ActorCode},
 	}
 	LensRequestTotal = &view.View{
 		Name:        "lens_request_total",
 		Measure:     LensRequestDuration,
 		Aggregation: view.Count(),
-		TagKeys:     []tag.Key{TaskType, API},
+		TagKeys:     []tag.Key{TaskType, API, ActorCode},
 	}
 	TipsetHeightView = &view.View{
 		Measure:     TipsetHeight,
 		Aggregation: view.LastValue(),
 		TagKeys:     []tag.Key{TaskType},
 	}
+	ProcessingFailureTotalView = &view.View{
+		Name:        ProcessingFailure.Name() + "_total",
+		Measure:     ProcessingFailure,
+		Aggregation: view.Sum(),
+		TagKeys:     []tag.Key{TaskType, ActorCode},
+	}
+	PersistFailureTotalView = &view.View{
+		Name:        PersistFailure.Name() + "_total",
+		Measure:     PersistFailure,
+		Aggregation: view.Sum(),
+		TagKeys:     []tag.Key{TaskType, Table, ActorCode},
+	}
 )
 
 var DefaultViews = []*view.View{
 	ProcessingDurationView,
 	PersistDurationView,
-	BatchSelectionDurationView,
-	CompletionDurationView,
 	DBConnsView,
-	HistoricalIndexerHeightView,
-	EpochsToSyncView,
 	LensRequestDurationView,
+	LensRequestTotal,
 	TipsetHeightView,
+	ProcessingFailureTotalView,
+	PersistFailureTotalView,
 }
 
 // SinceInMilliseconds returns the duration of time since the provide time as a float64.

diff --git a/model/actors/common/actors.go b/model/actors/common/actors.go
@@ -3,10 +3,12 @@ package common
 import (
 	"context"
 
+	"go.opencensus.io/tag"
 	"go.opentelemetry.io/otel/api/global"
 	"go.opentelemetry.io/otel/api/trace"
 	"go.opentelemetry.io/otel/label"
 
+	"github.com/filecoin-project/sentinel-visor/metrics"
 	"github.com/filecoin-project/sentinel-visor/model"
 )
 
@@ -23,6 +25,11 @@ type Actor struct {
 func (a *Actor) Persist(ctx context.Context, s model.StorageBatch) error {
 	ctx, span := global.Tracer("").Start(ctx, "Actor.Persist")
 	defer span.End()
+
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.Table, "actors"))
+	stop := metrics.Timer(ctx, metrics.PersistDuration)
+	defer stop()
+
 	return s.PersistModel(ctx, a)
 }
 
@@ -34,6 +41,10 @@ func (actors ActorList) Persist(ctx context.Context, s model.StorageBatch) error
 	ctx, span := global.Tracer("").Start(ctx, "ActorList.Persist", trace.WithAttributes(label.Int("count", len(actors))))
 	defer span.End()
 
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.Table, "actors"))
+	stop := metrics.Timer(ctx, metrics.PersistDuration)
+	defer stop()
+
 	if len(actors) == 0 {
 		return nil
 	}
@@ -51,6 +62,11 @@ type ActorState struct {
 func (as *ActorState) Persist(ctx context.Context, s model.StorageBatch) error {
 	ctx, span := global.Tracer("").Start(ctx, "ActorState.Persist")
 	defer span.End()
+
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.Table, "actor_states"))
+	stop := metrics.Timer(ctx, metrics.PersistDuration)
+	defer stop()
+
 	return s.PersistModel(ctx, as)
 }
 
@@ -62,6 +78,10 @@ func (states ActorStateList) Persist(ctx context.Context, s model.StorageBatch)
 	ctx, span := global.Tracer("").Start(ctx, "ActorStateList.Persist", trace.WithAttributes(label.Int("count", len(states))))
 	defer span.End()
 
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.Table, "actor_states"))
+	stop := metrics.Timer(ctx, metrics.PersistDuration)
+	defer stop()
+
 	if len(states) == 0 {
 		return nil
 	}

diff --git a/model/actors/init/idaddress.go b/model/actors/init/idaddress.go
@@ -3,10 +3,12 @@ package init
 import (
 	"context"
 
+	"go.opencensus.io/tag"
 	"go.opentelemetry.io/otel/api/global"
 	"go.opentelemetry.io/otel/api/trace"
 	"go.opentelemetry.io/otel/label"
 
+	"github.com/filecoin-project/sentinel-visor/metrics"
 	"github.com/filecoin-project/sentinel-visor/model"
 )
 
@@ -16,11 +18,24 @@ type IdAddress struct {
 	StateRoot string `pg:",pk,notnull"`
 }
 
+func (ia *IdAddress) Persist(ctx context.Context, s model.StorageBatch) error {
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.Table, "id_addresses"))
+	stop := metrics.Timer(ctx, metrics.PersistDuration)
+	defer stop()
+
+	return s.PersistModel(ctx, ia)
+}
+
 type IdAddressList []*IdAddress
 
 func (ias IdAddressList) Persist(ctx context.Context, s model.StorageBatch) error {
 	ctx, span := global.Tracer("").Start(ctx, "IdAddressList.PersistWithTx", trace.WithAttributes(label.Int("count", len(ias))))
 	defer span.End()
+
+	ctx, _ = tag.New(ctx, tag.Upsert(metrics.Table, "id_addresses"))
+	stop := metrics.Timer(ctx, metrics.PersistDuration)
+	defer stop()
+
 	for _, ia := range ias {
 		if err := s.PersistModel(ctx, ia); err != nil {
 			return err