diff --git a/CHANGELOG.md b/CHANGELOG.md index efcd655521..a8ca05b54a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,6 +63,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - [#2471](https://github.com/apache/trafficcontrol/issues/2471) - A PR check to ensure added db migration file is the latest. - [#5609](https://github.com/apache/trafficcontrol/issues/5609) - Fixed GET /servercheck filter for an extra query param. - [#5954](https://github.com/apache/trafficcontrol/issues/5954) - Traffic Ops HTTP response write errors are ignored +- [#6048](https://github.com/apache/trafficcontrol/issues/6048) - TM sometimes sets cachegroups to unavailable even though all caches are online - [#5288](https://github.com/apache/trafficcontrol/issues/5288) - Fixed the ability to create and update a server with MTU value >= 1280. - [#5284](https://github.com/apache/trafficcontrol/issues/5284) - Fixed error message when creating a server with non-existent profile - Fixed a NullPointerException in TR when a client passes a null SNI hostname in a TLS request diff --git a/traffic_monitor/datareq/datareq_test.go b/traffic_monitor/datareq/datareq_test.go index 6adfe116e9..7ed98a2d1d 100644 --- a/traffic_monitor/datareq/datareq_test.go +++ b/traffic_monitor/datareq/datareq_test.go @@ -79,14 +79,8 @@ func getMockLastHealthTimes() map[tc.CacheName]time.Duration { } func getMockCRStatesDeliveryService() tc.CRStatesDeliveryService { - numCGs := 10 - disabledLocations := []tc.CacheGroupName{} - for i := 0; i < numCGs; i++ { - disabledLocations = append(disabledLocations, tc.CacheGroupName(randStr())) - } - return tc.CRStatesDeliveryService{ - DisabledLocations: disabledLocations, + DisabledLocations: []tc.CacheGroupName{}, IsAvailable: randBool(), } } diff --git a/traffic_monitor/ds/stat.go b/traffic_monitor/ds/stat.go index 3b6076a9e0..56c88591a3 100644 --- a/traffic_monitor/ds/stat.go +++ b/traffic_monitor/ds/stat.go @@ -130,18 +130,13 @@ func addAvailableData(dsStats *dsdata.Stats, crStates tc.CRStates, serverCachegr } // TODO move to its own func? - for dsName, ds := range crStates.DeliveryService { + for dsName := range crStates.DeliveryService { stat, ok := dsStats.DeliveryService[dsName] if !ok { log.Infof("CreateStats not adding disabledLocations for '%s': not found in Stats\n", dsName) continue // TODO log warning? Error? } - - // TODO determine if a deep copy is necessary - stat.CommonStats.CachesDisabled = make([]string, len(ds.DisabledLocations), len(ds.DisabledLocations)) - for i, v := range ds.DisabledLocations { - stat.CommonStats.CachesDisabled[i] = string(v) - } + stat.CommonStats.CachesDisabled = make([]string, 0) } } diff --git a/traffic_monitor/health/cache.go b/traffic_monitor/health/cache.go index efcab8f4b0..30889d51a3 100644 --- a/traffic_monitor/health/cache.go +++ b/traffic_monitor/health/cache.go @@ -385,15 +385,10 @@ func CalcAvailability( localCacheStatuses[result.ID] = availStatus } - calculateDeliveryServiceState(toData.DeliveryServiceServers, localStates, toData) + calculateDeliveryServiceState(toData.DeliveryServiceServers, localStates) localCacheStatusThreadsafe.Set(localCacheStatuses) } -func setErr(newResult *cache.Result, err error) { - newResult.Error = err - newResult.Available = false -} - // ExceedsThresholdMsg returns a human-readable message for why the given value exceeds the threshold. It does NOT check whether the value actually exceeds the threshold; call `InThreshold` to check first. func exceedsThresholdMsg(stat string, threshold tc.HealthThreshold, val float64) string { switch threshold.Comparator { @@ -435,52 +430,16 @@ func eventDesc(status tc.CacheStatus, message string) string { } //calculateDeliveryServiceState calculates the state of delivery services from the new cache state data `cacheState` and the CRConfig data `deliveryServiceServers` and puts the calculated state in the outparam `deliveryServiceStates` -func calculateDeliveryServiceState(deliveryServiceServers map[tc.DeliveryServiceName][]tc.CacheName, states peer.CRStatesThreadsafe, toData todata.TOData) { - cacheStates := states.GetCaches() - +func calculateDeliveryServiceState(deliveryServiceServers map[tc.DeliveryServiceName][]tc.CacheName, states peer.CRStatesThreadsafe) { deliveryServices := states.GetDeliveryServices() for deliveryServiceName, deliveryServiceState := range deliveryServices { if _, ok := deliveryServiceServers[deliveryServiceName]; !ok { log.Infof("CRConfig does not have delivery service %s, but traffic monitor poller does; skipping\n", deliveryServiceName) continue } - deliveryServiceState.DisabledLocations = getDisabledLocations(deliveryServiceName, toData.DeliveryServiceServers[deliveryServiceName], cacheStates, toData.ServerCachegroups) + // NOTE: DisabledLocations is always empty, and it's important that it isn't nil, so it serialises to the JSON `[]` instead of `null`. + // It's no longer populated due to it being an unnecessary optimization for Traffic Router, but the field is kept for compatibility. + deliveryServiceState.DisabledLocations = []tc.CacheGroupName{} states.SetDeliveryService(deliveryServiceName, deliveryServiceState) } } - -func getDisabledLocations(deliveryService tc.DeliveryServiceName, deliveryServiceServers []tc.CacheName, cacheStates map[tc.CacheName]tc.IsAvailable, serverCacheGroups map[tc.CacheName]tc.CacheGroupName) []tc.CacheGroupName { - disabledLocations := []tc.CacheGroupName{} // it's important this isn't nil, so it serialises to the JSON `[]` instead of `null` - dsCacheStates := getDeliveryServiceCacheAvailability(cacheStates, deliveryServiceServers) - dsCachegroupsAvailable := getDeliveryServiceCachegroupAvailability(dsCacheStates, serverCacheGroups) - for cg, avail := range dsCachegroupsAvailable { - if avail { - continue - } - disabledLocations = append(disabledLocations, cg) - } - return disabledLocations -} - -func getDeliveryServiceCacheAvailability(cacheStates map[tc.CacheName]tc.IsAvailable, deliveryServiceServers []tc.CacheName) map[tc.CacheName]tc.IsAvailable { - dsCacheStates := map[tc.CacheName]tc.IsAvailable{} - for _, server := range deliveryServiceServers { - dsCacheStates[server] = cacheStates[tc.CacheName(server)] - } - return dsCacheStates -} - -func getDeliveryServiceCachegroupAvailability(dsCacheStates map[tc.CacheName]tc.IsAvailable, serverCachegroups map[tc.CacheName]tc.CacheGroupName) map[tc.CacheGroupName]bool { - cgAvail := map[tc.CacheGroupName]bool{} - for cache, available := range dsCacheStates { - cg, ok := serverCachegroups[cache] - if !ok { - log.Errorf("cache %v not found in cachegroups!\n", cache) - continue - } - if _, ok := cgAvail[cg]; !ok || available.IsAvailable { - cgAvail[cg] = available.IsAvailable - } - } - return cgAvail -} diff --git a/traffic_monitor/health/cache_test.go b/traffic_monitor/health/cache_test.go index f87f7443d0..013847b8ff 100644 --- a/traffic_monitor/health/cache_test.go +++ b/traffic_monitor/health/cache_test.go @@ -417,6 +417,7 @@ func TestCalcAvailabilityThresholds(t *testing.T) { localCacheStatusThreadsafe := threadsafe.NewCacheAvailableStatus() localStates := peer.NewCRStatesThreadsafe() + localStates.SetDeliveryService("myDS", tc.CRStatesDeliveryService{}) events := NewThreadsafeEvents(200) // test that a normal stat poll over the kbps threshold marks down @@ -433,6 +434,16 @@ func TestCalcAvailabilityThresholds(t *testing.T) { CalcAvailability(results, pollerName, statResultHistory, mc, toData, localCacheStatusThreadsafe, localStates, events, config.Both) + // ensure that the DisabledLocations is an empty, non-nil slice + for _, ds := range localStates.GetDeliveryServices() { + if ds.DisabledLocations == nil { + t.Error("expected: non-nil DisabledLocations, actual: nil") + } + if len(ds.DisabledLocations) > 0 { + t.Errorf("expected: empty DisabledLocations, actual: %d", len(ds.DisabledLocations)) + } + } + localCacheStatuses := localCacheStatusThreadsafe.Get() localCacheStatus, ok := localCacheStatuses[result.ID] if !ok { diff --git a/traffic_monitor/manager/statecombiner.go b/traffic_monitor/manager/statecombiner.go index 694913942f..922b69689f 100644 --- a/traffic_monitor/manager/statecombiner.go +++ b/traffic_monitor/manager/statecombiner.go @@ -21,7 +21,6 @@ package manager import ( "fmt" - "sort" "strings" "time" @@ -148,7 +147,6 @@ func combineDSState( if localDeliveryService.IsAvailable { deliveryService.IsAvailable = true } - deliveryService.DisabledLocations = localDeliveryService.DisabledLocations for peerName, iPeerStates := range peerStates.GetCrstates() { peerDeliveryService, ok := iPeerStates.DeliveryService[deliveryServiceName] @@ -159,7 +157,6 @@ func combineDSState( if peerDeliveryService.IsAvailable { deliveryService.IsAvailable = true } - deliveryService.DisabledLocations = intersection(deliveryService.DisabledLocations, peerDeliveryService.DisabledLocations) } combinedStates.SetDeliveryService(deliveryServiceName, deliveryService) } @@ -218,18 +215,3 @@ type CacheGroupNameSlice []tc.CacheGroupName func (p CacheGroupNameSlice) Len() int { return len(p) } func (p CacheGroupNameSlice) Less(i, j int) bool { return p[i] < p[j] } func (p CacheGroupNameSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } - -// intersection returns strings in both a and b. -// Note this modifies a and b. Specifically, it sorts them. If that isn't acceptable, pass copies of your real data. -func intersection(a []tc.CacheGroupName, b []tc.CacheGroupName) []tc.CacheGroupName { - sort.Sort(CacheGroupNameSlice(a)) - sort.Sort(CacheGroupNameSlice(b)) - c := []tc.CacheGroupName{} // important to initialize, so JSON is `[]` not `null` - for _, s := range a { - i := sort.Search(len(b), func(i int) bool { return b[i] >= s }) - if i < len(b) && b[i] == s { - c = append(c, s) - } - } - return c -}