Skip to content

Commit

Permalink
speed up the search of tag bytes in series ID
Browse files Browse the repository at this point in the history
  • Loading branch information
vpranckaitis committed Jan 11, 2021
1 parent ac80c69 commit f0370d8
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 26 deletions.
57 changes: 33 additions & 24 deletions src/dbnode/storage/index/convert/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ import (
"github.com/m3db/m3/src/x/pool"
)

const (
// NB: this assumes that series ID has a format:
// {tag1="value1",tag2="value2",...}
//
// Thus seriesIDFirstTagBytesIdx points to the 't' immediately after curly brace '{', and
// seriesIDDistanceBetweenTagBytes corresponds to either '="' or '",' that separate
// tag from it's value or value from the next tag.
seriesIDFirstTagBytesIdx int = 1
seriesIDDistanceBetweenTagBytes int = 2
)

var (
// ReservedFieldNameID is the field name used to index the ID in the
// m3ninx subsytem.
Expand Down Expand Up @@ -108,22 +119,15 @@ func ValidateSeriesTag(tag ident.Tag) error {

// FromSeriesIDAndTags converts the provided series id+tags into a document.
func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) {
clonedID := clone(id)
clonedID := clone(id.Bytes())
fields := make([]doc.Field, 0, len(tags.Values()))
expectedIdx := seriesIDFirstTagBytesIdx
for _, tag := range tags.Values() {
nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes()

var clonedName, clonedValue []byte
if idx := bytes.Index(clonedID, nameBytes); idx != -1 {
clonedName = clonedID[idx : idx+len(nameBytes)]
} else {
clonedName = append([]byte(nil), nameBytes...)
}
if idx := bytes.Index(clonedID, valueBytes); idx != -1 {
clonedValue = clonedID[idx : idx+len(valueBytes)]
} else {
clonedValue = append([]byte(nil), valueBytes...)
}
clonedName, expectedIdx = findSliceOrClone(clonedID, nameBytes, expectedIdx)
clonedValue, expectedIdx = findSliceOrClone(clonedID, valueBytes, expectedIdx)

fields = append(fields, doc.Field{
Name: clonedName,
Expand All @@ -143,23 +147,17 @@ func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) {

// FromSeriesIDAndTagIter converts the provided series id+tags into a document.
func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata, error) {
clonedID := clone(id)
clonedID := clone(id.Bytes())
fields := make([]doc.Field, 0, tags.Remaining())

expectedIdx := 1
for tags.Next() {
tag := tags.Current()
nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes()

var clonedName, clonedValue []byte
if idx := bytes.Index(clonedID, nameBytes); idx != -1 {
clonedName = clonedID[idx : idx+len(nameBytes)]
} else {
clonedName = append([]byte(nil), nameBytes...)
}
if idx := bytes.Index(clonedID, valueBytes); idx != -1 {
clonedValue = clonedID[idx : idx+len(valueBytes)]
} else {
clonedValue = append([]byte(nil), valueBytes...)
}
clonedName, expectedIdx = findSliceOrClone(clonedID, nameBytes, expectedIdx)
clonedValue, expectedIdx = findSliceOrClone(clonedID, valueBytes, expectedIdx)

fields = append(fields, doc.Field{
Name: clonedName,
Expand All @@ -180,6 +178,18 @@ func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata,
return d, nil
}

func findSliceOrClone(id, tag []byte, expectedIdx int) ([]byte, int) {
n := len(tag)
expectedEnd := expectedIdx + n
if expectedIdx != -1 && expectedEnd <= len(id) && bytes.Equal(id[expectedIdx:expectedEnd], tag) {
return id[expectedIdx:expectedEnd], expectedEnd + seriesIDDistanceBetweenTagBytes
} else if idx := bytes.Index(id, tag); idx != -1 {
return id[idx : idx+n], expectedEnd + seriesIDDistanceBetweenTagBytes
} else {
return clone(tag), -1
}
}

// TagsFromTagsIter returns an ident.Tags from a TagIterator. It also tries
// to re-use bytes from the seriesID if they're also present in the tags
// instead of re-allocating them. This requires that the ident.Tags that is
Expand Down Expand Up @@ -252,8 +262,7 @@ func TagsFromTagsIter(
// NB(prateek): we take an independent copy of the bytes underlying
// any ids provided, as we need to maintain the lifecycle of the indexed
// bytes separately from the rest of the storage subsystem.
func clone(id ident.ID) []byte {
original := id.Bytes()
func clone(original []byte) []byte {
clone := make([]byte, len(original))
copy(clone, original)
return clone
Expand Down
4 changes: 2 additions & 2 deletions src/dbnode/storage/index/convert/convert_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ var samples = []struct {
},
}

// BenchmarkFromSeriesIDAndTagIter-12 189643 6096 ns/op
// BenchmarkFromSeriesIDAndTagIter-12 254090 4689 ns/op
func BenchmarkFromSeriesIDAndTagIter(b *testing.B) {
testData, err := prepareIDAndEncodedTags(b)
require.NoError(b, err)
Expand All @@ -153,7 +153,7 @@ func BenchmarkFromSeriesIDAndTagIter(b *testing.B) {
}
}

// BenchmarkFromSeriesIDAndTags-12 586689 2584 ns/op
// BenchmarkFromSeriesIDAndTags-12 1000000 1311 ns/op
func BenchmarkFromSeriesIDAndTags(b *testing.B) {
testData, err := prepareIDAndTags(b)
require.NoError(b, err)
Expand Down

0 comments on commit f0370d8

Please sign in to comment.