Skip to content

Commit

Permalink
finder: tagged search tune (simplify logic)
Browse files Browse the repository at this point in the history
  • Loading branch information
msaf1980 committed Feb 15, 2022
1 parent 7f4c461 commit a4ed7cc
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 75 deletions.
2 changes: 1 addition & 1 deletion config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type IndexReverseRule struct {
}

type Costs struct {
Cost int `toml:"cost" json:"cost" comment:"default cost (for wildcarded equalence or matched with regex, or if no value cost set)"`
Cost *int `toml:"cost" json:"cost" comment:"default cost (for wildcarded equalence or matched with regex, or if no value cost set)"`
ValuesCost map[string]int `toml:"values-cost" json:"values-cost" comment:"cost with some value (for equalence without wildcards) (additional tuning, usually not needed)"`
}

Expand Down
81 changes: 35 additions & 46 deletions finder/tagged.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ type TaggedTerm struct {
Value string
HasWildcard bool // only for TaggedTermEq

Cost int // tag cost for use ad primary filter (use tag with maximal selectivity). 0 by default, minimal is better.
NonDefaultCost bool
Cost int // tag cost for use ad primary filter (use tag with maximal selectivity). 0 by default, minimal is better.
// __name__ tag is prefered, if some tag has better selectivity than name, set it cost to < 0
// values with wildcards or regex matching also has lower priority, set if needed it cost to < 0
}
Expand Down Expand Up @@ -189,31 +190,19 @@ func TaggedTermWhereN(term *TaggedTerm) (string, error) {
}

func setCost(term *TaggedTerm, costs *config.Costs) {
if len(costs.ValuesCost) > 0 {
if cost, ok := costs.ValuesCost[term.Value]; ok {
term.Cost = cost
return
}
}
if term.Op == TaggedTermEq && !term.HasWildcard {
term.Cost = costs.Cost // only for non-wildcared eq
}
}

func lessCosts(terms []TaggedTerm, i, j int) (bool, bool) {
if terms[i].Cost != terms[j].Cost {
if terms[i].Cost == 0 && (terms[i].Op != TaggedTermEq || terms[i].HasWildcard) {
return false, false
if term.Op == TaggedTermEq || term.Op == TaggedTermMatch {
if len(costs.ValuesCost) > 0 {
if cost, ok := costs.ValuesCost[term.Value]; ok {
term.Cost = cost
term.NonDefaultCost = true
return
}
}
if terms[j].Cost == 0 && (terms[j].Op != TaggedTermEq || terms[j].HasWildcard) {
return false, false
if term.Op == TaggedTermEq && !term.HasWildcard && costs.Cost != nil {
term.Cost = *costs.Cost // only for non-wildcared eq
term.NonDefaultCost = true
}

// compare taggs costs
return terms[i].Cost < terms[j].Cost, true
}

return false, false
}

func ParseTaggedConditions(conditions []string, taggedCosts map[string]*config.Costs) ([]TaggedTerm, error) {
Expand Down Expand Up @@ -274,35 +263,35 @@ func ParseTaggedConditions(conditions []string, taggedCosts map[string]*config.C
} else {
// compare with taggs costs
sort.Slice(terms, func(i, j int) bool {
eq, comparable := lessCosts(terms, i, j)
if comparable {
return eq
}

if terms[i].Op < terms[j].Op {
return true
}
if terms[i].Op > terms[j].Op {
return false
}

if terms[i].Op == TaggedTermEq && !terms[i].HasWildcard && terms[j].HasWildcard {
// globs as fist eq might be have a bad perfomance
return true
// compare taggs costs, if all of TaggegTerms has custom cost.
// this is allow overwrite operators order (Eq with or without wildcards/Match), use with carefully
if terms[i].Cost != terms[j].Cost {
if terms[i].NonDefaultCost && terms[j].NonDefaultCost ||
(terms[j].NonDefaultCost && terms[i].Op == TaggedTermEq && !terms[i].HasWildcard) ||
(terms[i].NonDefaultCost && terms[j].Op == TaggedTermEq && !terms[j].HasWildcard) {
return terms[i].Cost < terms[j].Cost
}
}

if terms[i].Key == "__name__" && terms[j].Key != "__name__" {
return true
}
if terms[i].Op == terms[j].Op {
if terms[i].Op == TaggedTermEq && !terms[i].HasWildcard && terms[j].HasWildcard {
// globs as fist eq might be have a bad perfomance
return true
}

if (terms[i].Cost >= 0 || terms[j].Cost >= 0) && terms[i].HasWildcard == terms[j].HasWildcard {
// compare taggs costs
if terms[i].Cost < terms[j].Cost {
if terms[i].Key == "__name__" && terms[j].Key != "__name__" {
return true
}
}

return false
if terms[i].Cost != terms[j].Cost && terms[i].HasWildcard == terms[j].HasWildcard {
// compare taggs costs
return terms[i].Cost < terms[j].Cost
}

return false
} else {
return terms[i].Op < terms[j].Op
}
})
}

Expand Down
71 changes: 43 additions & 28 deletions finder/tagged_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ func TestParseSeriesByTag(t *testing.T) {
ok := func(query string, expected []TaggedTerm) {
p, err := ParseSeriesByTag(query, nil)
assert.NoError(err)
assert.Equal(len(expected), len(p))
length := len(expected)
if length < len(p) {
length = len(p)
Expand Down Expand Up @@ -119,14 +120,20 @@ func TestParseSeriesByTag(t *testing.T) {

}

func newInt(i int) *int {
p := new(int)
*p = i
return p
}

func TestParseSeriesByTagWithCosts(t *testing.T) {
assert := assert.New(t)

taggedCosts := map[string]*config.Costs{
"environment": {Cost: 100},
"dc": {Cost: 60},
"project": {Cost: 50},
"__name__": {Cost: 0, ValuesCost: map[string]int{"high_cost": 70}},
"environment": {Cost: newInt(100)},
"dc": {Cost: newInt(60)},
"project": {Cost: newInt(50)},
"__name__": {Cost: newInt(0), ValuesCost: map[string]int{"high_cost": 70}},
"key": {ValuesCost: map[string]int{"value2": 70, "value3": -1, "val*4": -1, "^val.*4$": -1}},
}

Expand All @@ -150,62 +157,70 @@ func TestParseSeriesByTagWithCosts(t *testing.T) {

ok(`seriesByTag('environment=production', 'dc=west', 'key=value')`, []TaggedTerm{
{Op: TaggedTermEq, Key: "key", Value: "value"},
{Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100},
{Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
})

// Check for values cost (key=value2)
ok(`seriesByTag('environment=production', 'dc=west', 'key=value2')`, []TaggedTerm{
{Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60},
{Op: TaggedTermEq, Key: "key", Value: "value2", Cost: 70},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100},
{Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "key", Value: "value2", Cost: 70, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
})

// Check for __name_ preference
ok(`seriesByTag('environment=production', 'dc=west', 'key=value', 'name=cpu.load_avg')`, []TaggedTerm{
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg", Cost: 0, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "key", Value: "value"},
{Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100},
{Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
})

// Check for __name_ preference overrided
ok(`seriesByTag('environment=production', 'dc=west', 'name=cpu.load_avg', 'key=value3')`, []TaggedTerm{
{Op: TaggedTermEq, Key: "key", Value: "value3", Cost: -1},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"},
{Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100},
{Op: TaggedTermEq, Key: "key", Value: "value3", Cost: -1, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg", Cost: 0, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
})

// wildcard (dc=west*)
ok(`seriesByTag('environment=production', 'dc=west*', 'name=cpu.load_avg', 'key=value3')`, []TaggedTerm{
{Op: TaggedTermEq, Key: "key", Value: "value3", Cost: -1},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100},
{Op: TaggedTermEq, Key: "key", Value: "value3", Cost: -1, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg", Cost: 0, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "dc", Value: "west*", HasWildcard: true},
})

// wildcard cost -1
ok(`seriesByTag('dc=west*', 'environment=production', 'name=cpu.load_avg', 'key=val*4')`, []TaggedTerm{
{Op: TaggedTermEq, Key: "key", Value: "val*4", Cost: -1, HasWildcard: true},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100},
{Op: TaggedTermEq, Key: "key", Value: "val*4", Cost: -1, HasWildcard: true, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg", Cost: 0, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "dc", Value: "west*", HasWildcard: true},
})

// match cost -1 - not as wildcard
ok(`seriesByTag('dc=~west.*', 'environment=production', 'name=cpu.load_avg', 'key=~^val.*4$')`, []TaggedTerm{
{Op: TaggedTermMatch, Key: "key", Value: "^val.*4$", Cost: -1},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100},
{Op: TaggedTermMatch, Key: "key", Value: "^val.*4$", Cost: -1, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg", Cost: 0, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
{Op: TaggedTermMatch, Key: "dc", Value: "west.*"},
})

// match cost -1 - and no cost
ok(`seriesByTag('dc=~west.*', 'environment=production', 'Name=cpu.load_avg', 'key=~^val.*4$')`, []TaggedTerm{
{Op: TaggedTermMatch, Key: "key", Value: "^val.*4$", Cost: -1, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "Name", Value: "cpu.load_avg"},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
{Op: TaggedTermMatch, Key: "dc", Value: "west.*"},
})

// reduce cost for __name__
ok(`seriesByTag('dc=~west.*', 'environment=production', 'name=high_cost', 'key=~^val.*4$', 'key2=~^val.*4$', 'key3=val.*4')`, []TaggedTerm{
{Op: TaggedTermMatch, Key: "key", Value: "^val.*4$", Cost: -1},
{Op: TaggedTermEq, Key: "__name__", Value: "high_cost", Cost: 70},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100},
{Op: TaggedTermMatch, Key: "key", Value: "^val.*4$", Cost: -1, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "__name__", Value: "high_cost", Cost: 70, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true},
{Op: TaggedTermEq, Key: "key3", Value: "val.*4", HasWildcard: true},
{Op: TaggedTermMatch, Key: "dc", Value: "west.*"},
{Op: TaggedTermMatch, Key: "key2", Value: "^val.*4$"},
Expand Down

0 comments on commit a4ed7cc

Please sign in to comment.