Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: scope metrics to active config, add optional per-host metrics #6531

Merged
merged 15 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ type AdminPermissions struct {

// newAdminHandler reads admin's config and returns an http.Handler suitable
// for use in an admin endpoint server, which will be listening on listenAddr.
func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool) adminHandler {
func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool, ctx Context) adminHandler {
muxWrap := adminHandler{mux: http.NewServeMux()}

// secure the local or remote endpoint respectively
Expand Down Expand Up @@ -270,7 +270,6 @@ func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool) admi
// register third-party module endpoints
for _, m := range GetModules("admin.api") {
router := m.New().(AdminRouter)
handlerLabel := m.ID.Name()
for _, route := range router.Routes() {
addRoute(route.Pattern, handlerLabel, route.Handler)
}
Expand Down Expand Up @@ -382,7 +381,9 @@ func (admin AdminConfig) allowedOrigins(addr NetworkAddress) []*url.URL {
// for the admin endpoint exists in cfg, a default one is used, so
// that there is always an admin server (unless it is explicitly
// configured to be disabled).
func replaceLocalAdminServer(cfg *Config) error {
// Critically note that some elements and functionality of the context
// may not be ready, e.g. storage. Tread carefully.
func replaceLocalAdminServer(cfg *Config, ctx Context) error {
// always* be sure to close down the old admin endpoint
// as gracefully as possible, even if the new one is
// disabled -- careful to use reference to the current
Expand Down Expand Up @@ -424,7 +425,7 @@ func replaceLocalAdminServer(cfg *Config) error {
return err
}

handler := cfg.Admin.newAdminHandler(addr, false)
handler := cfg.Admin.newAdminHandler(addr, false, ctx)

ln, err := addr.Listen(context.TODO(), 0, net.ListenConfig{})
if err != nil {
Expand Down Expand Up @@ -545,7 +546,7 @@ func replaceRemoteAdminServer(ctx Context, cfg *Config) error {

// make the HTTP handler but disable Host/Origin enforcement
// because we are using TLS authentication instead
handler := cfg.Admin.newAdminHandler(addr, true)
handler := cfg.Admin.newAdminHandler(addr, true, ctx)

// create client certificate pool for TLS mutual auth, and extract public keys
// so that we can enforce access controls at the application layer
Expand Down
9 changes: 7 additions & 2 deletions caddy.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ func unsyncedDecodeAndRun(cfgJSON []byte, allowPersist bool) error {
func run(newCfg *Config, start bool) (Context, error) {
ctx, err := provisionContext(newCfg, start)
if err != nil {
globalMetrics.configSuccess.Set(0)
return ctx, err
}

Expand All @@ -410,6 +411,7 @@ func run(newCfg *Config, start bool) (Context, error) {
// some of the other apps at runtime
err = ctx.cfg.Admin.provisionAdminRouters(ctx)
if err != nil {
globalMetrics.configSuccess.Set(0)
return ctx, err
}

Expand All @@ -435,9 +437,11 @@ func run(newCfg *Config, start bool) (Context, error) {
return nil
}()
if err != nil {
globalMetrics.configSuccess.Set(0)
return ctx, err
}

globalMetrics.configSuccess.Set(1)
globalMetrics.configSuccessTime.SetToCurrentTime()
// now that the user's config is running, finish setting up anything else,
// such as remote admin endpoint, config loader, etc.
return ctx, finishSettingUp(ctx, ctx.cfg)
Expand Down Expand Up @@ -471,6 +475,7 @@ func provisionContext(newCfg *Config, replaceAdminServer bool) (Context, error)
ctx, cancel := NewContext(Context{Context: context.Background(), cfg: newCfg})
defer func() {
if err != nil {
globalMetrics.configSuccess.Set(0)
// if there were any errors during startup,
// we should cancel the new context we created
// since the associated config won't be used;
Expand All @@ -497,7 +502,7 @@ func provisionContext(newCfg *Config, replaceAdminServer bool) (Context, error)

// start the admin endpoint (and stop any prior one)
if replaceAdminServer {
err = replaceLocalAdminServer(newCfg)
err = replaceLocalAdminServer(newCfg, ctx)
if err != nil {
return ctx, fmt.Errorf("starting caddy administration endpoint: %v", err)
}
Expand Down
12 changes: 6 additions & 6 deletions caddyconfig/httpcaddyfile/serveroptions.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,13 @@ func unmarshalCaddyfileServerOptions(d *caddyfile.Dispenser) (any, error) {
}

case "metrics":
if d.NextArg() {
return nil, d.ArgErr()
}
if nesting := d.Nesting(); d.NextBlock(nesting) {
return nil, d.ArgErr()
}
serverOpts.Metrics = new(caddyhttp.Metrics)
for nesting := d.Nesting(); d.NextBlock(nesting); {
switch d.Val() {
case "per_host":
serverOpts.Metrics.PerHost = true
}
}

case "trace":
if d.NextArg() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
servers :80 {
metrics {
per_host
}
}
}
:80 {
respond "Hello"
}
----------
{
"apps": {
"http": {
"servers": {
"srv0": {
"listen": [
":80"
],
"routes": [
{
"handle": [
{
"body": "Hello",
"handler": "static_response"
}
]
}
],
"metrics": {
"per_host": true
}
}
}
}
}
}
22 changes: 21 additions & 1 deletion context.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"reflect"

"github.com/caddyserver/certmagic"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"go.uber.org/zap"
"go.uber.org/zap/exp/zapslog"

Expand All @@ -47,6 +49,7 @@ type Context struct {
ancestry []Module
cleanupFuncs []func() // invoked at every config unload
exitFuncs []func(context.Context) // invoked at config unload ONLY IF the process is exiting (EXPERIMENTAL)
metricsRegistry *prometheus.Registry
}

// NewContext provides a new context derived from the given
Expand All @@ -58,7 +61,7 @@ type Context struct {
// modules which are loaded will be properly unloaded.
// See standard library context package's documentation.
func NewContext(ctx Context) (Context, context.CancelFunc) {
newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg}
newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg, metricsRegistry: prometheus.NewPedanticRegistry()}
c, cancel := context.WithCancel(ctx.Context)
wrappedCancel := func() {
cancel()
Expand All @@ -79,6 +82,7 @@ func NewContext(ctx Context) (Context, context.CancelFunc) {
}
}
newCtx.Context = c
newCtx.initMetrics()
return newCtx, wrappedCancel
}

Expand All @@ -97,6 +101,22 @@ func (ctx *Context) Filesystems() FileSystems {
return ctx.cfg.filesystems
}

// Returns the active metrics registry for the context
// EXPERIMENTAL: This API is subject to change.
func (ctx *Context) GetMetricsRegistry() *prometheus.Registry {
return ctx.metricsRegistry
}

func (ctx *Context) initMetrics() {
ctx.metricsRegistry.MustRegister(
collectors.NewBuildInfoCollector(),
adminMetrics.requestCount,
adminMetrics.requestErrors,
globalMetrics.configSuccess,
globalMetrics.configSuccessTime,
)
}

// OnExit executes f when the process exits gracefully.
// The function is only executed if the process is gracefully
// shut down while this context is active.
Expand Down
23 changes: 16 additions & 7 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,33 @@ import (
"net/http"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/caddyserver/caddy/v2/internal/metrics"
)

// define and register the metrics used in this package.
func init() {
prometheus.MustRegister(collectors.NewBuildInfoCollector())

const ns, sub = "caddy", "admin"

adminMetrics.requestCount = promauto.NewCounterVec(prometheus.CounterOpts{
adminMetrics.requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "http_requests_total",
Help: "Counter of requests made to the Admin API's HTTP endpoints.",
}, []string{"handler", "path", "code", "method"})
adminMetrics.requestErrors = promauto.NewCounterVec(prometheus.CounterOpts{
adminMetrics.requestErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "http_request_errors_total",
Help: "Number of requests resulting in middleware errors.",
}, []string{"handler", "path", "method"})
globalMetrics.configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "caddy_config_last_reload_successful",
Help: "Whether the last configuration reload attempt was successful.",
})
globalMetrics.configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "caddy_config_last_reload_success_timestamp_seconds",
Help: "Timestamp of the last successful configuration reload.",
})
}

// adminMetrics is a collection of metrics that can be tracked for the admin API.
Expand All @@ -36,6 +39,12 @@ var adminMetrics = struct {
requestErrors *prometheus.CounterVec
}{}

// globalMetrics is a collection of metrics that can be tracked for Caddy global state
var globalMetrics = struct {
configSuccess prometheus.Gauge
configSuccessTime prometheus.Gauge
}{}

// Similar to promhttp.InstrumentHandlerCounter, but upper-cases method names
// instead of lower-casing them.
//
Expand Down
4 changes: 4 additions & 0 deletions modules/caddyhttp/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,10 @@ func (app *App) Provision(ctx caddy.Context) error {
// route handler so that important security checks are done, etc.
primaryRoute := emptyHandler
if srv.Routes != nil {
if srv.Metrics != nil {
srv.Metrics.init = sync.Once{}
srv.Metrics.httpMetrics = &httpMetrics{}
}
err := srv.Routes.ProvisionHandlers(ctx, srv.Metrics)
if err != nil {
return fmt.Errorf("server %s: setting up route handlers: %v", srvName, err)
Expand Down
Loading
Loading