From db514c290d747cedb9ff1d535319616fae16de57 Mon Sep 17 00:00:00 2001 From: Harshil Goel <54325286+harshil-goel@users.noreply.github.com> Date: Fri, 13 Oct 2023 22:28:41 +0800 Subject: [PATCH] perf(query): Update CompressedBin IntersectionAlgo (#9000) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated algo to intersect. Getting upto 175% improvment overall ``` goos: linux goarch: amd64 pkg: github.com/dgraph-io/dgraph/algo cpu: 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz │ in1 │ main_in │ │ sec/op │ sec/op vs base │ ListIntersectCompressBin/compressed:IntersectWith:ratio=0.01:size=100:overlap=0.01:-8 91.21n ± ∞ ¹ 387.70n ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=0.1:size=100:overlap=0.01:-8 336.0n ± ∞ ¹ 1733.0n ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=1:size=100:overlap=0.01:-8 1.093µ ± ∞ ¹ 3.481µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=10:size=100:overlap=0.01:-8 4.719µ ± ∞ ¹ 8.600µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=100:size=100:overlap=0.01:-8 19.76µ ± ∞ ¹ 44.32µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=0.01:size=1000:overlap=0.01:-8 481.7n ± ∞ ¹ 784.2n ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=0.1:size=1000:overlap=0.01:-8 1.502µ ± ∞ ¹ 3.667µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=1:size=1000:overlap=0.01:-8 5.454µ ± ∞ ¹ 36.977µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=10:size=1000:overlap=0.01:-8 33.14µ ± ∞ ¹ 70.73µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=100:size=1000:overlap=0.01:-8 179.8µ ± ∞ ¹ 485.1µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=0.01:size=10000:overlap=0.01:-8 3.214µ ± ∞ ¹ 4.459µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=0.1:size=10000:overlap=0.01:-8 12.00µ ± ∞ ¹ 32.78µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=1:size=10000:overlap=0.01:-8 76.90µ ± ∞ ¹ 356.05µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=10:size=10000:overlap=0.01:-8 371.6µ ± ∞ ¹ 923.1µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=100:size=10000:overlap=0.01:-8 1.894m ± ∞ ¹ 5.141m ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=0.01:size=100000:overlap=0.01:-8 48.13µ ± ∞ ¹ 60.59µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=0.1:size=100000:overlap=0.01:-8 146.7µ ± ∞ ¹ 611.9µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=1:size=100000:overlap=0.01:-8 943.7µ ± ∞ ¹ 3359.6µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=10:size=100000:overlap=0.01:-8 3.926m ± ∞ ¹ 8.849m ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=100:size=100000:overlap=0.01:-8 20.78m ± ∞ ¹ ListIntersectCompressBin/compressed:IntersectWith:ratio=0.01:size=1000000:overlap=0.01:-8 862.4µ ± ∞ ¹ 1142.5µ ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=0.1:size=1000000:overlap=0.01:-8 1.599m ± ∞ ¹ 7.878m ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=1:size=1000000:overlap=0.01:-8 9.791m ± ∞ ¹ 33.871m ± ∞ ¹ ~ (p=1.000 n=1) ² ListIntersectCompressBin/compressed:IntersectWith:ratio=10:size=1000000:overlap=0.01:-8 44.40m ± ∞ ¹ geomean 66.19µ 104.6µ +175.93% ³ ``` --- algo/uidlist.go | 55 ++++++++++++++++++++-------------------- algo/uidlist_test.go | 60 ++++++++++++++++++++++++++++++++++++++------ codec/codec.go | 58 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 34 deletions(-) diff --git a/algo/uidlist.go b/algo/uidlist.go index 1465d4ff2fe..cb37232f3bd 100644 --- a/algo/uidlist.go +++ b/algo/uidlist.go @@ -24,7 +24,8 @@ import ( "github.com/dgraph-io/dgraph/protos/pb" ) -const jump = 32 // Jump size in InsersectWithJump. +const jump = 32 // Jump size in InsersectWithJump. +const linVsBinRatio = 10 // When is linear search better than binary // ApplyFilter applies a filter to our UIDList. func ApplyFilter(u *pb.List, f func(uint64, int) bool) { @@ -60,7 +61,7 @@ func IntersectCompressedWith(pack *pb.UidPack, afterUID uint64, v, o *pb.List) { // Select appropriate function based on heuristics. ratio := float64(m) / float64(n) - if ratio < 500 { + if ratio < linVsBinRatio { IntersectCompressedWithLinJump(&dec, v.Uids, &dst) } else { IntersectCompressedWithBin(&dec, v.Uids, &dst) @@ -94,7 +95,7 @@ func IntersectCompressedWithLinJump(dec *codec.Decoder, v []uint64, o *[]uint64) // https://link.springer.com/chapter/10.1007/978-3-642-12476-1_3 // Call seek on dec before calling this function func IntersectCompressedWithBin(dec *codec.Decoder, q []uint64, o *[]uint64) { - ld := dec.ApproxLen() + ld := codec.ExactLen(dec.Pack) lq := len(q) if lq == 0 { @@ -105,46 +106,44 @@ func IntersectCompressedWithBin(dec *codec.Decoder, q []uint64, o *[]uint64) { } // Pick the shorter list and do binary search - if ld < lq { + if ld <= lq { for { blockUids := dec.Uids() if len(blockUids) == 0 { break } - IntersectWithBin(blockUids, q, o) - lastUid := blockUids[len(blockUids)-1] - qidx := sort.Search(len(q), func(idx int) bool { - return q[idx] >= lastUid - }) - if qidx >= len(q) { + _, off := IntersectWithJump(blockUids, q, o) + q = q[off:] + if len(q) == 0 { return } - q = q[qidx:] dec.Next() } return } - var uids []uint64 - for _, u := range q { + uids := dec.Uids() + qidx := 0 + for { + if qidx >= len(q) { + return + } + u := q[qidx] if len(uids) == 0 || u > uids[len(uids)-1] { - uids = dec.Seek(u, codec.SeekStart) + if lq*linVsBinRatio < ld { + uids = dec.LinearSeek(u) + } else { + uids = dec.SeekToBlock(u, codec.SeekCurrent) + } if len(uids) == 0 { return } } - uidIdx := sort.Search(len(uids), func(idx int) bool { - return uids[idx] >= u - }) - if uidIdx >= len(uids) { - // We know that u < max(uids). If we didn't find it here, it's not here. - continue - } - if uids[uidIdx] == u { - *o = append(*o, u) - uidIdx++ + _, off := IntersectWithJump(uids, q[qidx:], o) + if off == 0 { + off = 1 // if v[k] isn't in u, move forward } - uids = uids[uidIdx:] + qidx += off } } @@ -233,7 +232,8 @@ func IntersectWithJump(u, v []uint64, o *[]uint64) (int, int) { // IntersectWithBin is based on the paper // "Fast Intersection Algorithms for Sorted Sequences" // https://link.springer.com/chapter/10.1007/978-3-642-12476-1_3 -func IntersectWithBin(d, q []uint64, o *[]uint64) { +// Returns where to move the second array(q) to. O means not found +func IntersectWithBin(d, q []uint64, o *[]uint64) int { ld := len(d) lq := len(q) @@ -242,7 +242,7 @@ func IntersectWithBin(d, q []uint64, o *[]uint64) { d, q = q, d } if ld == 0 || lq == 0 || d[ld-1] < q[0] || q[lq-1] < d[0] { - return + return 0 } val := d[0] @@ -256,6 +256,7 @@ func IntersectWithBin(d, q []uint64, o *[]uint64) { }) binIntersect(d, q[minq:maxq], o) + return maxq } // binIntersect is the recursive function used. diff --git a/algo/uidlist_test.go b/algo/uidlist_test.go index 05eafba68e3..50fddb30b38 100644 --- a/algo/uidlist_test.go +++ b/algo/uidlist_test.go @@ -373,7 +373,7 @@ func BenchmarkListIntersectCompressBin(b *testing.B) { for _, r := range rs { sz1 := sz sz2 := int(float64(sz) * r) - if sz2 > 1000000 || sz2 == 0 { + if sz2 > 10000000 || sz2 == 0 { break } @@ -389,8 +389,18 @@ func BenchmarkListIntersectCompressBin(b *testing.B) { sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] }) dst2 := &pb.List{} + dst1 := &pb.List{} compressedUids := codec.Encode(v1, 256) + b.Run(fmt.Sprintf("linJump:IntersectWith:ratio=%v:size=%d:overlap=%.2f:", r, sz, overlap), + func(b *testing.B) { + for k := 0; k < b.N; k++ { + dec := codec.Decoder{Pack: compressedUids} + dec.Seek(0, codec.SeekStart) + IntersectCompressedWithLinJump(&dec, u1, &dst1.Uids) + } + }) + b.Run(fmt.Sprintf("compressed:IntersectWith:ratio=%v:size=%d:overlap=%.2f:", r, sz, overlap), func(b *testing.B) { for k := 0; k < b.N; k++ { @@ -399,7 +409,6 @@ func BenchmarkListIntersectCompressBin(b *testing.B) { IntersectCompressedWithBin(&dec, u1, &dst2.Uids) } }) - fmt.Println() codec.FreePack(compressedUids) } @@ -493,6 +502,43 @@ func sortUint64(nums []uint64) { sort.Slice(nums, func(i, j int) bool { return nums[i] < nums[j] }) } +func fillNumsDiff(N1, N2, N3 int) ([]uint64, []uint64, []uint64) { + rand.Seed(time.Now().UnixNano()) + + commonNums := make([]uint64, N1) + blockNums := make([]uint64, N1+N2) + otherNums := make([]uint64, N1+N3) + allC := make(map[uint64]bool) + + for i := 0; i < N1; i++ { + val := rand.Uint64() % 1000 + commonNums[i] = val + blockNums[i] = val + otherNums[i] = val + allC[val] = true + } + + for i := N1; i < N1+N2; i++ { + val := rand.Uint64() % 1000 + blockNums[i] = val + allC[val] = true + } + + for i := N1; i < N1+N3; i++ { + val := rand.Uint64() + for ok := true; ok; _, ok = allC[val] { + val = rand.Uint64() % 1000 + } + otherNums[i] = val + } + + sortUint64(commonNums) + sortUint64(blockNums) + sortUint64(otherNums) + + return commonNums, blockNums, otherNums +} + func fillNums(N1, N2 int) ([]uint64, []uint64, []uint64) { rand.Seed(time.Now().UnixNano()) @@ -545,12 +591,12 @@ func TestIntersectCompressedWithLinJump(t *testing.T) { } func TestIntersectCompressedWithBin(t *testing.T) { - lengths := []int{0, 1, 3, 11, 100} + //lengths := []int{0, 1, 3, 11, 100, 500, 1000} - for _, N1 := range lengths { - for _, N2 := range lengths { + for _, N1 := range []int{11} { + for _, N2 := range []int{3} { // Intersection of blockNums and otherNums is commonNums. - commonNums, blockNums, otherNums := fillNums(N1, N2) + commonNums, blockNums, otherNums := fillNumsDiff(N1/10, N1, N2) enc := codec.Encoder{BlockSize: 10} for _, num := range blockNums { @@ -570,7 +616,7 @@ func TestIntersectCompressedWithBin(t *testing.T) { } func TestIntersectCompressedWithBinMissingSize(t *testing.T) { - lengths := []int{0, 1, 3, 11, 100} + lengths := []int{0, 1, 3, 11, 100, 500, 1000} for _, N1 := range lengths { for _, N2 := range lengths { diff --git a/codec/codec.go b/codec/codec.go index 4ebc17a341d..60359f474fa 100644 --- a/codec/codec.go +++ b/codec/codec.go @@ -223,6 +223,64 @@ func (d *Decoder) ApproxLen() int { type searchFunc func(int) bool +// SeekToBlock will find the block containing the uid, and unpack it. When we are going to +// intersect the list later, this function is useful. As this function skips the search function +// and returns the entire block, it is faster than Seek. Unlike seek, we don't truncate the uids +// returned, which would be done by the intersect function anyways. +func (d *Decoder) SeekToBlock(uid uint64, whence seekPos) []uint64 { + if d.Pack == nil { + return []uint64{} + } + prevBlockIdx := d.blockIdx + d.blockIdx = 0 + if uid == 0 { + return d.UnpackBlock() + } + + // If for some reason we are searching an older uid, we need to search the entire pack + if prevBlockIdx > 0 && uid < d.Pack.Blocks[prevBlockIdx].Base { + prevBlockIdx = 0 + } + + blocksFunc := func() searchFunc { + var f searchFunc + switch whence { + case SeekStart: + f = func(i int) bool { return d.Pack.Blocks[i+prevBlockIdx].Base >= uid } + case SeekCurrent: + f = func(i int) bool { return d.Pack.Blocks[i+prevBlockIdx].Base > uid } + } + return f + } + + idx := sort.Search(len(d.Pack.Blocks[prevBlockIdx:]), blocksFunc()) + prevBlockIdx + // The first block.Base >= uid. + if idx == 0 { + return d.UnpackBlock() + } + // The uid is the first entry in the block. + if idx < len(d.Pack.Blocks) && d.Pack.Blocks[idx].Base == uid { + d.blockIdx = idx + return d.UnpackBlock() + } + + // Either the idx = len(pack.Blocks) that means it wasn't found in any of the block's base. Or, + // we found the first block index whose base is greater than uid. In these cases, go to the + // previous block and search there. + d.blockIdx = idx - 1 // Move to the previous block. If blockIdx<0, unpack will deal with it. + if d.blockIdx != prevBlockIdx { + d.UnpackBlock() // And get all their uids. + } + + if uid <= d.uids[len(d.uids)-1] { + return d.uids + } + + // Could not find any uid in the block, which is >= uid. The next block might still have valid + // entries > uid. + return d.Next() +} + // Seek will search for uid in a packed block using the specified whence position. // The value of whence must be one of the predefined values SeekStart or SeekCurrent. // SeekStart searches uid and includes it as part of the results.