Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TM: do not populate delivery service disabled locations #6051

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- [#2471](https://github.com/apache/trafficcontrol/issues/2471) - A PR check to ensure added db migration file is the latest.
- [#5609](https://github.com/apache/trafficcontrol/issues/5609) - Fixed GET /servercheck filter for an extra query param.
- [#5954](https://github.com/apache/trafficcontrol/issues/5954) - Traffic Ops HTTP response write errors are ignored
- [#6048](https://github.com/apache/trafficcontrol/issues/6048) - TM sometimes sets cachegroups to unavailable even though all caches are online
- [#5288](https://github.com/apache/trafficcontrol/issues/5288) - Fixed the ability to create and update a server with MTU value >= 1280.
- [#5284](https://github.com/apache/trafficcontrol/issues/5284) - Fixed error message when creating a server with non-existent profile
- Fixed a NullPointerException in TR when a client passes a null SNI hostname in a TLS request
Expand Down
8 changes: 1 addition & 7 deletions traffic_monitor/datareq/datareq_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,8 @@ func getMockLastHealthTimes() map[tc.CacheName]time.Duration {
}

func getMockCRStatesDeliveryService() tc.CRStatesDeliveryService {
numCGs := 10
disabledLocations := []tc.CacheGroupName{}
for i := 0; i < numCGs; i++ {
disabledLocations = append(disabledLocations, tc.CacheGroupName(randStr()))
}

return tc.CRStatesDeliveryService{
DisabledLocations: disabledLocations,
DisabledLocations: []tc.CacheGroupName{},
IsAvailable: randBool(),
}
}
Expand Down
9 changes: 2 additions & 7 deletions traffic_monitor/ds/stat.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,18 +130,13 @@ func addAvailableData(dsStats *dsdata.Stats, crStates tc.CRStates, serverCachegr
}

// TODO move to its own func?
for dsName, ds := range crStates.DeliveryService {
for dsName := range crStates.DeliveryService {
stat, ok := dsStats.DeliveryService[dsName]
if !ok {
log.Infof("CreateStats not adding disabledLocations for '%s': not found in Stats\n", dsName)
continue // TODO log warning? Error?
}

// TODO determine if a deep copy is necessary
stat.CommonStats.CachesDisabled = make([]string, len(ds.DisabledLocations), len(ds.DisabledLocations))
for i, v := range ds.DisabledLocations {
stat.CommonStats.CachesDisabled[i] = string(v)
}
stat.CommonStats.CachesDisabled = make([]string, 0)
}
}

Expand Down
51 changes: 5 additions & 46 deletions traffic_monitor/health/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,15 +385,10 @@ func CalcAvailability(

localCacheStatuses[result.ID] = availStatus
}
calculateDeliveryServiceState(toData.DeliveryServiceServers, localStates, toData)
calculateDeliveryServiceState(toData.DeliveryServiceServers, localStates)
localCacheStatusThreadsafe.Set(localCacheStatuses)
}

func setErr(newResult *cache.Result, err error) {
newResult.Error = err
newResult.Available = false
}

// ExceedsThresholdMsg returns a human-readable message for why the given value exceeds the threshold. It does NOT check whether the value actually exceeds the threshold; call `InThreshold` to check first.
func exceedsThresholdMsg(stat string, threshold tc.HealthThreshold, val float64) string {
switch threshold.Comparator {
Expand Down Expand Up @@ -435,52 +430,16 @@ func eventDesc(status tc.CacheStatus, message string) string {
}

//calculateDeliveryServiceState calculates the state of delivery services from the new cache state data `cacheState` and the CRConfig data `deliveryServiceServers` and puts the calculated state in the outparam `deliveryServiceStates`
func calculateDeliveryServiceState(deliveryServiceServers map[tc.DeliveryServiceName][]tc.CacheName, states peer.CRStatesThreadsafe, toData todata.TOData) {
cacheStates := states.GetCaches()

func calculateDeliveryServiceState(deliveryServiceServers map[tc.DeliveryServiceName][]tc.CacheName, states peer.CRStatesThreadsafe) {
deliveryServices := states.GetDeliveryServices()
for deliveryServiceName, deliveryServiceState := range deliveryServices {
if _, ok := deliveryServiceServers[deliveryServiceName]; !ok {
log.Infof("CRConfig does not have delivery service %s, but traffic monitor poller does; skipping\n", deliveryServiceName)
continue
}
deliveryServiceState.DisabledLocations = getDisabledLocations(deliveryServiceName, toData.DeliveryServiceServers[deliveryServiceName], cacheStates, toData.ServerCachegroups)
// NOTE: DisabledLocations is always empty, and it's important that it isn't nil, so it serialises to the JSON `[]` instead of `null`.
// It's no longer populated due to it being an unnecessary optimization for Traffic Router, but the field is kept for compatibility.
deliveryServiceState.DisabledLocations = []tc.CacheGroupName{}
states.SetDeliveryService(deliveryServiceName, deliveryServiceState)
}
}

func getDisabledLocations(deliveryService tc.DeliveryServiceName, deliveryServiceServers []tc.CacheName, cacheStates map[tc.CacheName]tc.IsAvailable, serverCacheGroups map[tc.CacheName]tc.CacheGroupName) []tc.CacheGroupName {
disabledLocations := []tc.CacheGroupName{} // it's important this isn't nil, so it serialises to the JSON `[]` instead of `null`
dsCacheStates := getDeliveryServiceCacheAvailability(cacheStates, deliveryServiceServers)
dsCachegroupsAvailable := getDeliveryServiceCachegroupAvailability(dsCacheStates, serverCacheGroups)
for cg, avail := range dsCachegroupsAvailable {
if avail {
continue
}
disabledLocations = append(disabledLocations, cg)
}
return disabledLocations
}

func getDeliveryServiceCacheAvailability(cacheStates map[tc.CacheName]tc.IsAvailable, deliveryServiceServers []tc.CacheName) map[tc.CacheName]tc.IsAvailable {
dsCacheStates := map[tc.CacheName]tc.IsAvailable{}
for _, server := range deliveryServiceServers {
dsCacheStates[server] = cacheStates[tc.CacheName(server)]
}
return dsCacheStates
}

func getDeliveryServiceCachegroupAvailability(dsCacheStates map[tc.CacheName]tc.IsAvailable, serverCachegroups map[tc.CacheName]tc.CacheGroupName) map[tc.CacheGroupName]bool {
cgAvail := map[tc.CacheGroupName]bool{}
for cache, available := range dsCacheStates {
cg, ok := serverCachegroups[cache]
if !ok {
log.Errorf("cache %v not found in cachegroups!\n", cache)
continue
}
if _, ok := cgAvail[cg]; !ok || available.IsAvailable {
cgAvail[cg] = available.IsAvailable
}
}
return cgAvail
}
11 changes: 11 additions & 0 deletions traffic_monitor/health/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ func TestCalcAvailabilityThresholds(t *testing.T) {

localCacheStatusThreadsafe := threadsafe.NewCacheAvailableStatus()
localStates := peer.NewCRStatesThreadsafe()
localStates.SetDeliveryService("myDS", tc.CRStatesDeliveryService{})
events := NewThreadsafeEvents(200)

// test that a normal stat poll over the kbps threshold marks down
Expand All @@ -433,6 +434,16 @@ func TestCalcAvailabilityThresholds(t *testing.T) {

CalcAvailability(results, pollerName, statResultHistory, mc, toData, localCacheStatusThreadsafe, localStates, events, config.Both)

// ensure that the DisabledLocations is an empty, non-nil slice
for _, ds := range localStates.GetDeliveryServices() {
if ds.DisabledLocations == nil {
t.Error("expected: non-nil DisabledLocations, actual: nil")
}
if len(ds.DisabledLocations) > 0 {
t.Errorf("expected: empty DisabledLocations, actual: %d", len(ds.DisabledLocations))
}
}

localCacheStatuses := localCacheStatusThreadsafe.Get()
localCacheStatus, ok := localCacheStatuses[result.ID]
if !ok {
Expand Down
18 changes: 0 additions & 18 deletions traffic_monitor/manager/statecombiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ package manager

import (
"fmt"
"sort"
"strings"
"time"

Expand Down Expand Up @@ -148,7 +147,6 @@ func combineDSState(
if localDeliveryService.IsAvailable {
deliveryService.IsAvailable = true
}
deliveryService.DisabledLocations = localDeliveryService.DisabledLocations

for peerName, iPeerStates := range peerStates.GetCrstates() {
peerDeliveryService, ok := iPeerStates.DeliveryService[deliveryServiceName]
Expand All @@ -159,7 +157,6 @@ func combineDSState(
if peerDeliveryService.IsAvailable {
deliveryService.IsAvailable = true
}
deliveryService.DisabledLocations = intersection(deliveryService.DisabledLocations, peerDeliveryService.DisabledLocations)
}
combinedStates.SetDeliveryService(deliveryServiceName, deliveryService)
}
Expand Down Expand Up @@ -218,18 +215,3 @@ type CacheGroupNameSlice []tc.CacheGroupName
func (p CacheGroupNameSlice) Len() int { return len(p) }
func (p CacheGroupNameSlice) Less(i, j int) bool { return p[i] < p[j] }
func (p CacheGroupNameSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }

// intersection returns strings in both a and b.
// Note this modifies a and b. Specifically, it sorts them. If that isn't acceptable, pass copies of your real data.
func intersection(a []tc.CacheGroupName, b []tc.CacheGroupName) []tc.CacheGroupName {
sort.Sort(CacheGroupNameSlice(a))
sort.Sort(CacheGroupNameSlice(b))
c := []tc.CacheGroupName{} // important to initialize, so JSON is `[]` not `null`
for _, s := range a {
i := sort.Search(len(b), func(i int) bool { return b[i] >= s })
if i < len(b) && b[i] == s {
c = append(c, s)
}
}
return c
}