diff --git a/cmd/gobgpd/main.go b/cmd/gobgpd/main.go index 37f6793d1..4fe0d3993 100644 --- a/cmd/gobgpd/main.go +++ b/cmd/gobgpd/main.go @@ -57,6 +57,7 @@ func main() { ConfigFile string `short:"f" long:"config-file" description:"specifying a config file"` ConfigType string `short:"t" long:"config-type" description:"specifying config type (toml, yaml, json)" default:"toml"` ConfigAutoReload bool `short:"a" long:"config-auto-reload" description:"activate config auto reload on changes"` + ConfigStrict bool `long:"config-strict" description:"make any config error fatal"` LogLevel string `short:"l" long:"log-level" description:"specifying log level"` LogPlain bool `short:"p" long:"log-plain" description:"use plain format for logging (json by default)"` UseSyslog string `short:"s" long:"syslog" description:"use syslogd"` @@ -197,7 +198,8 @@ func main() { } logger.Info("gobgpd started") - bgpServer := server.NewBgpServer(server.GrpcListenAddress(opts.GrpcHosts), server.GrpcOption(grpcOpts), server.LoggerOption(&builtinLogger{logger: logger})) + bgpLogger := &builtinLogger{logger: logger, cfgStrict: opts.ConfigStrict} + bgpServer := server.NewBgpServer(server.GrpcListenAddress(opts.GrpcHosts), server.GrpcOption(grpcOpts), server.LoggerOption(bgpLogger)) prometheus.MustRegister(metrics.NewBgpCollector(bgpServer)) go bgpServer.Serve() @@ -258,6 +260,8 @@ func main() { }) } + // Avoid crashing gobgpd on reload - it shouldn't flush policy entirely, so it's safe to continue to run + bgpLogger.cfgStrict = false for sig := range sigCh { if sig != syscall.SIGHUP { stopServer(bgpServer, opts.UseSdNotify) diff --git a/cmd/gobgpd/util.go b/cmd/gobgpd/util.go index c20965023..7e5e9b4da 100644 --- a/cmd/gobgpd/util.go +++ b/cmd/gobgpd/util.go @@ -105,7 +105,8 @@ func addSyslogHook(host, facility string) error { } type builtinLogger struct { - logger *logrus.Logger + logger *logrus.Logger + cfgStrict bool } func (l *builtinLogger) Panic(msg string, fields log.Fields) { @@ -113,6 +114,12 @@ func (l *builtinLogger) Panic(msg string, fields log.Fields) { } func (l *builtinLogger) Fatal(msg string, fields log.Fields) { + if fields.HasFacility(log.FacilityConfig) && !l.cfgStrict { + // Backward compatibility with old behavior when any logical config error was treated as warning + l.logger.WithFields(logrus.Fields(fields)).Warn(msg) + return + } + l.logger.WithFields(logrus.Fields(fields)).Fatal(msg) } diff --git a/cmd/gobgpd/util_windows.go b/cmd/gobgpd/util_windows.go index 43f043b79..795237f97 100644 --- a/cmd/gobgpd/util_windows.go +++ b/cmd/gobgpd/util_windows.go @@ -29,10 +29,17 @@ func addSyslogHook(_, _ string) error { } type builtinLogger struct { - logger *logrus.Logger + logger *logrus.Logger + cfgStrict bool } func (l *builtinLogger) Panic(msg string, fields log.Fields) { + if fields.HasFacility(log.FacilityConfig) && !l.cfgStrict { + // Backward compatibility with old behavior when any logical config error was treated as warning + l.logger.WithFields(logrus.Fields(fields)).Warn(msg) + return + } + l.logger.WithFields(logrus.Fields(fields)).Panic(msg) } diff --git a/internal/pkg/table/policy.go b/internal/pkg/table/policy.go index d959f25fd..f9f74f047 100644 --- a/internal/pkg/table/policy.go +++ b/internal/pkg/table/policy.go @@ -4027,8 +4027,10 @@ func (r *RoutingPolicy) Reset(rp *oc.RoutingPolicy, ap map[string]oc.ApplyPolicy defer r.mu.Unlock() if err := r.reload(*rp); err != nil { - r.logger.Error("failed to create routing policy", + r.logger.Fatal("failed to create routing policy", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "Policy", "Error": err}) return err diff --git a/pkg/config/config.go b/pkg/config/config.go index 3482ce6dd..c6555a68f 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -98,8 +98,10 @@ func addPeerGroups(ctx context.Context, bgpServer *server.BgpServer, addedPg []o if err := bgpServer.AddPeerGroup(ctx, &api.AddPeerGroupRequest{ PeerGroup: oc.NewPeerGroupFromConfigStruct(&pg), }); err != nil { - bgpServer.Log().Warn("Failed to add PeerGroup", + bgpServer.Log().Fatal("Failed to add PeerGroup", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "config", "Key": pg.Config.PeerGroupName, "Error": err}) @@ -116,8 +118,10 @@ func deletePeerGroups(ctx context.Context, bgpServer *server.BgpServer, deletedP if err := bgpServer.DeletePeerGroup(ctx, &api.DeletePeerGroupRequest{ Name: pg.Config.PeerGroupName, }); err != nil { - bgpServer.Log().Warn("Failed to delete PeerGroup", + bgpServer.Log().Fatal("Failed to delete PeerGroup", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "config", "Key": pg.Config.PeerGroupName, "Error": err}) @@ -134,8 +138,10 @@ func updatePeerGroups(ctx context.Context, bgpServer *server.BgpServer, updatedP if u, err := bgpServer.UpdatePeerGroup(ctx, &api.UpdatePeerGroupRequest{ PeerGroup: oc.NewPeerGroupFromConfigStruct(&pg), }); err != nil { - bgpServer.Log().Warn("Failed to update PeerGroup", + bgpServer.Log().Fatal("Failed to update PeerGroup", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "config", "Key": pg.Config.PeerGroupName, "Error": err}) @@ -159,8 +165,10 @@ func addDynamicNeighbors(ctx context.Context, bgpServer *server.BgpServer, dynam PeerGroup: dn.Config.PeerGroup, }, }); err != nil { - bgpServer.Log().Warn("Failed to add Dynamic Neighbor to PeerGroup", + bgpServer.Log().Fatal("Failed to add Dynamic Neighbor to PeerGroup", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "config", "Key": dn.Config.PeerGroup, "Prefix": dn.Config.Prefix, @@ -178,8 +186,10 @@ func addNeighbors(ctx context.Context, bgpServer *server.BgpServer, added []oc.N if err := bgpServer.AddPeer(ctx, &api.AddPeerRequest{ Peer: oc.NewPeerFromConfigStruct(&p), }); err != nil { - bgpServer.Log().Warn("Failed to add Peer", + bgpServer.Log().Fatal("Failed to add Peer", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "config", "Key": p.State.NeighborAddress, "Error": err}) @@ -196,8 +206,10 @@ func deleteNeighbors(ctx context.Context, bgpServer *server.BgpServer, deleted [ if err := bgpServer.DeletePeer(ctx, &api.DeletePeerRequest{ Address: p.State.NeighborAddress, }); err != nil { - bgpServer.Log().Warn("Failed to delete Peer", + bgpServer.Log().Fatal("Failed to delete Peer", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "config", "Key": p.State.NeighborAddress, "Error": err}) @@ -214,8 +226,10 @@ func updateNeighbors(ctx context.Context, bgpServer *server.BgpServer, updated [ if u, err := bgpServer.UpdatePeer(ctx, &api.UpdatePeerRequest{ Peer: oc.NewPeerFromConfigStruct(&p), }); err != nil { - bgpServer.Log().Warn("Failed to update Peer", + bgpServer.Log().Fatal("Failed to update Peer", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "config", "Key": p.State.NeighborAddress, "Error": err}) @@ -381,8 +395,10 @@ func UpdateConfig(ctx context.Context, bgpServer *server.BgpServer, c, newConfig p := oc.ConfigSetToRoutingPolicy(newConfig) rp, err := table.NewAPIRoutingPolicyFromConfigStruct(p) if err != nil { - bgpServer.Log().Warn("failed to update policy config", + bgpServer.Log().Fatal("failed to update policy config", log.Fields{ + log.FieldFacility: log.FacilityConfig, + "Topic": "config", "Error": err}) } else { diff --git a/pkg/config/server_config_test.go b/pkg/config/server_config_test.go new file mode 100644 index 000000000..79db58e2f --- /dev/null +++ b/pkg/config/server_config_test.go @@ -0,0 +1,117 @@ +package config + +import ( + "context" + "testing" + + api "github.com/osrg/gobgp/v3/api" + "github.com/osrg/gobgp/v3/pkg/config/oc" + "github.com/osrg/gobgp/v3/pkg/log" + "github.com/osrg/gobgp/v3/pkg/server" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type configErrorLogger struct { + log.DefaultLogger + + configErrors []string +} + +func (l *configErrorLogger) Fatal(msg string, fields log.Fields) { + if fields.HasFacility(log.FacilityConfig) { + l.configErrors = append(l.configErrors, msg) + l.DefaultLogger.Error(msg, fields) + } else { + l.DefaultLogger.Fatal(msg, fields) + } +} + +func TestConfigErrors(t *testing.T) { + globalCfg := oc.Global{ + Config: oc.GlobalConfig{ + As: 1, + RouterId: "1.1.1.1", + Port: 11179, + }, + } + + for _, tt := range []struct { + name string + expectedErrors []string + cfg *oc.BgpConfigSet + }{ + { + name: "peer with a valid peer-group", + cfg: &oc.BgpConfigSet{ + Global: globalCfg, + Neighbors: []oc.Neighbor{ + { + Config: oc.NeighborConfig{ + PeerGroup: "router", + NeighborAddress: "1.1.1.2", + }, + }, + }, + PeerGroups: []oc.PeerGroup{ + { + Config: oc.PeerGroupConfig{ + PeerGroupName: "router", + PeerAs: 2, + }, + }, + }, + }, + }, + { + name: "peer without peer-group", + expectedErrors: []string{"Failed to add Peer"}, + cfg: &oc.BgpConfigSet{ + Global: globalCfg, + Neighbors: []oc.Neighbor{ + { + Config: oc.NeighborConfig{ + PeerGroup: "not-exists", + NeighborAddress: "1.1.1.2", + }, + }, + }, + }, + }, + { + name: "policy without a set", + expectedErrors: []string{"failed to create routing policy"}, + cfg: &oc.BgpConfigSet{ + Global: globalCfg, + PolicyDefinitions: []oc.PolicyDefinition{ + { + Name: "policy-without-a-set", + Statements: []oc.Statement{ + { + Conditions: oc.Conditions{ + MatchNeighborSet: oc.MatchNeighborSet{ + NeighborSet: "not-existing-neighbor-set", + }, + }, + }, + }, + }, + }, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + logger := &configErrorLogger{ + DefaultLogger: *log.NewDefaultLogger(), + } + bgpServer := server.NewBgpServer(server.LoggerOption(logger)) + go bgpServer.Serve() + + _, err := InitialConfig(ctx, bgpServer, tt.cfg, false) + bgpServer.StopBgp(ctx, &api.StopBgpRequest{}) + require.NoError(t, err) + assert.Equal(t, tt.expectedErrors, logger.configErrors) + }) + } +} diff --git a/pkg/log/logger.go b/pkg/log/logger.go index 97a783057..fb978bc89 100644 --- a/pkg/log/logger.go +++ b/pkg/log/logger.go @@ -31,8 +31,24 @@ const ( TraceLevel ) +const ( + FieldFacility = "_facility" +) + +var ( + FacilityUnspecified interface{} = "unspecified" + FacilityConfig interface{} = "config" +) + type Fields map[string]interface{} +func (fields Fields) HasFacility(facility interface{}) bool { + if fieldsFacility, hasFacility := fields[FieldFacility]; hasFacility && fieldsFacility == facility { + return true + } + return false +} + type Logger interface { Panic(msg string, fields Fields) Fatal(msg string, fields Fields) diff --git a/test/scenario_test/graceful_restart_test.py b/test/scenario_test/graceful_restart_test.py index f7617865e..3a3fd2fa0 100644 --- a/test/scenario_test/graceful_restart_test.py +++ b/test/scenario_test/graceful_restart_test.py @@ -151,6 +151,7 @@ def test_05_holdtime_expiry_graceful_restart(self): g1 = self.bgpds['g1'] g2 = self.bgpds['g2'] g3 = self.bgpds['g3'] + g2.wait_for(expected_state=BGP_FSM_ESTABLISHED, peer=g1) g1.local("ip route add blackhole {}/32".format(g2.ip_addrs[0][1].split("/")[0])) g1.local("ip route add blackhole {}/32".format(g3.ip_addrs[0][1].split("/")[0])) g2.wait_for(expected_state=BGP_FSM_ACTIVE, peer=g1)