Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Commit

Permalink
Be smarter in how we look at matchers. (#572)
Browse files Browse the repository at this point in the history
* Add unittests for PostingsForMatcher.

* Selector methods are all stateless, don't need a reference.

* Be smarter in how we look at matchers.

Look at all matchers to see if a label can be empty.

Optimise Not handling, so i!="2" is a simple lookup
rather than an inverse postings list.

All all the Withouts together, rather than
having to subtract each from all postings.

Change the pre-expand the postings logic to always do it before doing a
Without only. Don't do that if it's already a list.

The initial goal here was that the oft-seen pattern
i=~"something.+",i!="foo",i!="bar" becomes more efficient.

benchmark                                                            old ns/op     new ns/op     delta
BenchmarkHeadPostingForMatchers/n="1"-4                              5888          6160          +4.62%
BenchmarkHeadPostingForMatchers/n="1",j="foo"-4                      7190          6640          -7.65%
BenchmarkHeadPostingForMatchers/j="foo",n="1"-4                      6038          5923          -1.90%
BenchmarkHeadPostingForMatchers/n="1",j!="foo"-4                     6030884       4850525       -19.57%
BenchmarkHeadPostingForMatchers/i=~".*"-4                            887377940     230329137     -74.04%
BenchmarkHeadPostingForMatchers/i=~".+"-4                            490316101     319931758     -34.75%
BenchmarkHeadPostingForMatchers/i=~""-4                              594961991     130279313     -78.10%
BenchmarkHeadPostingForMatchers/i!=""-4                              537542388     318751015     -40.70%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",j="foo"-4              10460243      8565195       -18.12%
BenchmarkHeadPostingForMatchers/n="1",i=~".*",i!="2",j="foo"-4       44964267      8561546       -80.96%
BenchmarkHeadPostingForMatchers/n="1",i!="",j="foo"-4                42244885      29137737      -31.03%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",j="foo"-4              35285834      32774584      -7.12%
BenchmarkHeadPostingForMatchers/n="1",i=~"1.+",j="foo"-4             8951047       8379024       -6.39%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!="2",j="foo"-4       63813335      30672688      -51.93%
BenchmarkHeadPostingForMatchers/n="1",i=~".+",i!~"2.*",j="foo"-4     45381112      44924397      -1.01%

Signed-off-by: Brian Brazil <[email protected]>
  • Loading branch information
brian-brazil authored Apr 9, 2019
1 parent 7ab060c commit 259847a
Show file tree
Hide file tree
Showing 6 changed files with 318 additions and 56 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
- [BUGFIX] Fsync the meta file to persist it on disk to avoid data loss in case of a host crash.
- [BUGFIX] Fix fd and vm_area leak on error path in chunks.NewDirReader.
- [BUGFIX] Fix fd and vm_area leak on error path in index.NewFileReader.
- [ENHANCEMENT] Be smarter in how we look at matchers.
- [ENHANCEMENT] PostListings and NotMatcher now public.

## 0.6.1
- [BUGFIX] Update `last` after appending a non-overlapping chunk in `chunks.MergeOverlappingChunks`. [#539](https://github.com/prometheus/tsdb/pull/539)
Expand Down
8 changes: 6 additions & 2 deletions head_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ func BenchmarkHeadStripeSeriesCreateParallel(b *testing.B) {
})
}

// TODO: generalize benchmark and pass all postings for matchers here
func BenchmarkHeadPostingForMatchers(b *testing.B) {
// Put a series, select it. GC it and then access it.
h, err := NewHead(nil, nil, nil, 1000)
testutil.Ok(b, err)
defer func() {
Expand All @@ -66,6 +64,12 @@ func BenchmarkHeadPostingForMatchers(b *testing.B) {
// Have some series that won't be matched, to properly test inverted matches.
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "0_"+strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "1_"+strconv.Itoa(i), "j", "bar"))
hash++
h.getOrCreate(hash, labels.FromStrings("i", strconv.Itoa(i), "n", "2_"+strconv.Itoa(i), "j", "bar"))
hash++
}
}

Expand Down
16 changes: 8 additions & 8 deletions index/postings.go
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,8 @@ func (rp *removedPostings) Err() error {
return rp.remove.Err()
}

// listPostings implements the Postings interface over a plain list.
type listPostings struct {
// ListPostings implements the Postings interface over a plain list.
type ListPostings struct {
list []uint64
cur uint64
}
Expand All @@ -610,15 +610,15 @@ func NewListPostings(list []uint64) Postings {
return newListPostings(list...)
}

func newListPostings(list ...uint64) *listPostings {
return &listPostings{list: list}
func newListPostings(list ...uint64) *ListPostings {
return &ListPostings{list: list}
}

func (it *listPostings) At() uint64 {
func (it *ListPostings) At() uint64 {
return it.cur
}

func (it *listPostings) Next() bool {
func (it *ListPostings) Next() bool {
if len(it.list) > 0 {
it.cur = it.list[0]
it.list = it.list[1:]
Expand All @@ -628,7 +628,7 @@ func (it *listPostings) Next() bool {
return false
}

func (it *listPostings) Seek(x uint64) bool {
func (it *ListPostings) Seek(x uint64) bool {
// If the current value satisfies, then return.
if it.cur >= x {
return true
Expand All @@ -650,7 +650,7 @@ func (it *listPostings) Seek(x uint64) bool {
return false
}

func (it *listPostings) Err() error {
func (it *ListPostings) Err() error {
return nil
}

Expand Down
26 changes: 17 additions & 9 deletions labels/selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package labels

import (
"fmt"
"regexp"
)

Expand All @@ -36,6 +37,8 @@ type Matcher interface {
Name() string
// Matches checks whether a value fulfills the constraints.
Matches(v string) bool
// String returns a human readable matcher.
String() string
}

// EqualMatcher matches on equality.
Expand All @@ -44,13 +47,16 @@ type EqualMatcher struct {
}

// Name implements Matcher interface.
func (m *EqualMatcher) Name() string { return m.name }
func (m EqualMatcher) Name() string { return m.name }

// Matches implements Matcher interface.
func (m *EqualMatcher) Matches(v string) bool { return v == m.value }
func (m EqualMatcher) Matches(v string) bool { return v == m.value }

// String implements Matcher interface.
func (m EqualMatcher) String() string { return fmt.Sprintf("%s=%q", m.name, m.value) }

// Value returns the matched value.
func (m *EqualMatcher) Value() string { return m.value }
func (m EqualMatcher) Value() string { return m.value }

// NewEqualMatcher returns a new matcher matching an exact label value.
func NewEqualMatcher(name, value string) Matcher {
Expand All @@ -62,8 +68,9 @@ type regexpMatcher struct {
re *regexp.Regexp
}

func (m *regexpMatcher) Name() string { return m.name }
func (m *regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
func (m regexpMatcher) Name() string { return m.name }
func (m regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
func (m regexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }

// NewRegexpMatcher returns a new matcher verifying that a value matches
// the regular expression pattern.
Expand All @@ -87,14 +94,15 @@ func NewMustRegexpMatcher(name, pattern string) Matcher {

}

// notMatcher inverts the matching result for a matcher.
type notMatcher struct {
// NotMatcher inverts the matching result for a matcher.
type NotMatcher struct {
Matcher
}

func (m *notMatcher) Matches(v string) bool { return !m.Matcher.Matches(v) }
func (m NotMatcher) Matches(v string) bool { return !m.Matcher.Matches(v) }
func (m NotMatcher) String() string { return fmt.Sprintf("not(%s)", m.Matcher.String()) }

// Not inverts the matcher's matching result.
func Not(m Matcher) Matcher {
return &notMatcher{m}
return &NotMatcher{m}
}
112 changes: 76 additions & 36 deletions querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,37 +262,92 @@ func (q *blockQuerier) Close() error {
}

// PostingsForMatchers assembles a single postings iterator against the index reader
// based on the given matchers. It returns a list of label names that must be manually
// checked to not exist in series the postings list points to.
// It returns EmptyPostings() if it can be determined beforehand that no results will be found.
// based on the given matchers.
func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings, error) {
var its []index.Postings
var its, notIts []index.Postings
// See which label must be non-empty.
labelMustBeSet := make(map[string]bool, len(ms))
for _, m := range ms {
if !m.Matches("") {
labelMustBeSet[m.Name()] = true
}
}

for _, m := range ms {
it, err := postingsForMatcher(ix, m)
matchesEmpty := m.Matches("")
if labelMustBeSet[m.Name()] || !matchesEmpty {
// If this matcher must be non-empty, we can be smarter.
nm, isNot := m.(*labels.NotMatcher)
if isNot && matchesEmpty { // l!="foo"
// If the label can't be empty and is a Not and the inner matcher
// doesn't match empty, then subtract it out at the end.
it, err := postingsForMatcher(ix, nm.Matcher)
if err != nil {
return nil, err
}
notIts = append(notIts, it)
} else if isNot && !matchesEmpty { // l!=""
// If the label can't be empty and is a Not, but the inner matcher can
// be empty we need to use inversePostingsForMatcher.
it, err := inversePostingsForMatcher(ix, nm.Matcher)
if err != nil {
return nil, err
}
its = append(its, it)
} else { // l="a"
// Non-Not matcher, use normal postingsForMatcher.
it, err := postingsForMatcher(ix, m)
if err != nil {
return nil, err
}
its = append(its, it)
}
} else { // l=""
// If the matchers for a labelname selects an empty value, it selects all
// the series which don't have the label name set too. See:
// https://github.com/prometheus/prometheus/issues/3575 and
// https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555
it, err := inversePostingsForMatcher(ix, m)
if err != nil {
return nil, err
}
notIts = append(notIts, it)
}
}

// If there's nothing to subtract from, add in everything and remove the notIts later.
if len(its) == 0 && len(notIts) != 0 {
allPostings, err := ix.Postings(index.AllPostingsKey())
if err != nil {
return nil, err
}
its = append(its, it)
its = append(its, allPostings)
}

it := index.Intersect(its...)

for _, n := range notIts {
if _, ok := n.(*index.ListPostings); !ok {
// Best to pre-calculate the merged lists via next rather than have a ton
// of seeks in Without.
pl, err := index.ExpandPostings(n)
if err != nil {
return nil, err
}
n = index.NewListPostings(pl)
}
it = index.Without(it, n)
}
return ix.SortedPostings(index.Intersect(its...)), nil

return ix.SortedPostings(it), nil
}

func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
// If the matcher selects an empty value, it selects all the series which don't
// have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575
// and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555
if m.Matches("") {
return postingsForUnsetLabelMatcher(ix, m)
}
// This method will not return postings for missing labels.

// Fast-path for equal matching.
if em, ok := m.(*labels.EqualMatcher); ok {
it, err := ix.Postings(em.Name(), em.Value())
if err != nil {
return nil, err
}
return it, nil
return ix.Postings(em.Name(), em.Value())
}

tpls, err := ix.LabelValues(m.Name())
Expand Down Expand Up @@ -328,7 +383,8 @@ func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error
return index.Merge(rit...), nil
}

func postingsForUnsetLabelMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
// inversePostingsForMatcher eeturns the postings for the series with the label name set but not matching the matcher.
func inversePostingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error) {
tpls, err := ix.LabelValues(m.Name())
if err != nil {
return nil, err
Expand Down Expand Up @@ -356,23 +412,7 @@ func postingsForUnsetLabelMatcher(ix IndexReader, m labels.Matcher) (index.Posti
rit = append(rit, it)
}

merged := index.Merge(rit...)
// With many many postings, it's best to pre-calculate
// the merged list via next rather than have a ton of seeks
// in Without/Intersection.
if len(rit) > 100 {
pl, err := index.ExpandPostings(merged)
if err != nil {
return nil, err
}
merged = index.NewListPostings(pl)
}

allPostings, err := ix.Postings(index.AllPostingsKey())
if err != nil {
return nil, err
}
return index.Without(allPostings, merged), nil
return index.Merge(rit...), nil
}

func mergeStrings(a, b []string) []string {
Expand Down
Loading

0 comments on commit 259847a

Please sign in to comment.