speed up the search of tag bytes in series ID

m3db · Jan 11, 2021 · f0370d8 · f0370d8
1 parent ac80c69
commit f0370d8
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 26 deletions.
diff --git a/src/dbnode/storage/index/convert/convert.go b/src/dbnode/storage/index/convert/convert.go
@@ -32,6 +32,17 @@ import (
 	"github.com/m3db/m3/src/x/pool"
 )
 
+const (
+	// NB: this assumes that series ID has a format:
+	//   {tag1="value1",tag2="value2",...}
+	//
+	// Thus seriesIDFirstTagBytesIdx points to the 't' immediately after curly brace '{', and
+	// seriesIDDistanceBetweenTagBytes corresponds to either '="' or '",' that separate
+	// tag from it's value or value from the next tag.
+	seriesIDFirstTagBytesIdx        int = 1
+	seriesIDDistanceBetweenTagBytes int = 2
+)
+
 var (
 	// ReservedFieldNameID is the field name used to index the ID in the
 	// m3ninx subsytem.
@@ -108,22 +119,15 @@ func ValidateSeriesTag(tag ident.Tag) error {
 
 // FromSeriesIDAndTags converts the provided series id+tags into a document.
 func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) {
-	clonedID := clone(id)
+	clonedID := clone(id.Bytes())
 	fields := make([]doc.Field, 0, len(tags.Values()))
+	expectedIdx := seriesIDFirstTagBytesIdx
 	for _, tag := range tags.Values() {
 		nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes()
 
 		var clonedName, clonedValue []byte
-		if idx := bytes.Index(clonedID, nameBytes); idx != -1 {
-			clonedName = clonedID[idx : idx+len(nameBytes)]
-		} else {
-			clonedName = append([]byte(nil), nameBytes...)
-		}
-		if idx := bytes.Index(clonedID, valueBytes); idx != -1 {
-			clonedValue = clonedID[idx : idx+len(valueBytes)]
-		} else {
-			clonedValue = append([]byte(nil), valueBytes...)
-		}
+		clonedName, expectedIdx = findSliceOrClone(clonedID, nameBytes, expectedIdx)
+		clonedValue, expectedIdx = findSliceOrClone(clonedID, valueBytes, expectedIdx)
 
 		fields = append(fields, doc.Field{
 			Name:  clonedName,
@@ -143,23 +147,17 @@ func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) {
 
 // FromSeriesIDAndTagIter converts the provided series id+tags into a document.
 func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata, error) {
-	clonedID := clone(id)
+	clonedID := clone(id.Bytes())
 	fields := make([]doc.Field, 0, tags.Remaining())
+
+	expectedIdx := 1
 	for tags.Next() {
 		tag := tags.Current()
 		nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes()
 
 		var clonedName, clonedValue []byte
-		if idx := bytes.Index(clonedID, nameBytes); idx != -1 {
-			clonedName = clonedID[idx : idx+len(nameBytes)]
-		} else {
-			clonedName = append([]byte(nil), nameBytes...)
-		}
-		if idx := bytes.Index(clonedID, valueBytes); idx != -1 {
-			clonedValue = clonedID[idx : idx+len(valueBytes)]
-		} else {
-			clonedValue = append([]byte(nil), valueBytes...)
-		}
+		clonedName, expectedIdx = findSliceOrClone(clonedID, nameBytes, expectedIdx)
+		clonedValue, expectedIdx = findSliceOrClone(clonedID, valueBytes, expectedIdx)
 
 		fields = append(fields, doc.Field{
 			Name:  clonedName,
@@ -180,6 +178,18 @@ func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata,
 	return d, nil
 }
 
+func findSliceOrClone(id, tag []byte, expectedIdx int) ([]byte, int) {
+	n := len(tag)
+	expectedEnd := expectedIdx + n
+	if expectedIdx != -1 && expectedEnd <= len(id) && bytes.Equal(id[expectedIdx:expectedEnd], tag) {
+		return id[expectedIdx:expectedEnd], expectedEnd + seriesIDDistanceBetweenTagBytes
+	} else if idx := bytes.Index(id, tag); idx != -1 {
+		return id[idx : idx+n], expectedEnd + seriesIDDistanceBetweenTagBytes
+	} else {
+		return clone(tag), -1
+	}
+}
+
 // TagsFromTagsIter returns an ident.Tags from a TagIterator. It also tries
 // to re-use bytes from the seriesID if they're also present in the tags
 // instead of re-allocating them. This requires that the ident.Tags that is
@@ -252,8 +262,7 @@ func TagsFromTagsIter(
 // NB(prateek): we take an independent copy of the bytes underlying
 // any ids provided, as we need to maintain the lifecycle of the indexed
 // bytes separately from the rest of the storage subsystem.
-func clone(id ident.ID) []byte {
-	original := id.Bytes()
+func clone(original []byte) []byte {
 	clone := make([]byte, len(original))
 	copy(clone, original)
 	return clone

diff --git a/src/dbnode/storage/index/convert/convert_benchmark_test.go b/src/dbnode/storage/index/convert/convert_benchmark_test.go
@@ -131,7 +131,7 @@ var samples = []struct {
 	},
 }
 
-// BenchmarkFromSeriesIDAndTagIter-12    	  189643	      6096 ns/op
+// BenchmarkFromSeriesIDAndTagIter-12    	  254090	      4689 ns/op
 func BenchmarkFromSeriesIDAndTagIter(b *testing.B) {
 	testData, err := prepareIDAndEncodedTags(b)
 	require.NoError(b, err)
@@ -153,7 +153,7 @@ func BenchmarkFromSeriesIDAndTagIter(b *testing.B) {
 	}
 }
 
-// BenchmarkFromSeriesIDAndTags-12       	  586689	      2584 ns/op
+// BenchmarkFromSeriesIDAndTags-12       	 1000000	      1311 ns/op
 func BenchmarkFromSeriesIDAndTags(b *testing.B) {
 	testData, err := prepareIDAndTags(b)
 	require.NoError(b, err)