From a576cc05996dc4d63957783fde85ca46f649aad5 Mon Sep 17 00:00:00 2001 From: crazycs Date: Sun, 19 Aug 2018 15:45:13 +0800 Subject: [PATCH 01/41] iterator copy init --- executor/joiner.go | 43 ++++++++++++++++++++++--------- util/chunk/chunk.go | 63 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 12 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 562afa75dc6be..ffb19a547d425 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -142,6 +142,14 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } +func makeJoinRightRowsToChunk(chk *chunk.Chunk, lhser chunk.Iterator, rhs chunk.Row, l int) { + chk.AppendRightMultiRows(lhser, rhs, l) +} + +func makeJoinRowsToChunk(chk *chunk.Chunk, lhs chunk.Row, rhser chunk.Iterator, rowLen int) { + chk.AppendMultiRows(lhs, rhser, rowLen) +} + func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { j.selected, err = expression.VectorizedFilter(j.ctx, j.conditions, chunk.NewIterator4Chunk(input), j.selected) if err != nil { @@ -171,23 +179,34 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu inners.ReachEnd() return true, nil } - - for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { + rowsLen := 64 + for { j.chk.Reset() if j.outerIsRight { - j.makeJoinRowToChunk(j.chk, inner, outer) + makeJoinRightRowsToChunk(j.chk, inners, outer, rowsLen) } else { - j.makeJoinRowToChunk(j.chk, outer, inner) + makeJoinRowsToChunk(j.chk, outer, inners, rowsLen) } - - matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) - if err != nil { - return false, errors.Trace(err) + //j.chk.Reset() + //if j.outerIsRight { + // j.makeJoinRowToChunk(j.chk, inner, outer) + //} else { + // j.makeJoinRowToChunk(j.chk, outer, inner) + //} + + for i := 0; i < j.chk.NumRows(); i++ { + matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(i)) + if err != nil { + return false, errors.Trace(err) + } + if matched { + chk.AppendPartialRow(0, outer) + inners.ReachEnd() + return true, nil + } } - if matched { - chk.AppendPartialRow(0, outer) - inners.ReachEnd() - return true, nil + if inners.Current() == inners.End() { + break } } return false, nil diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index ccdcdc7219d8c..877a066b62160 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -160,6 +160,69 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } } +func (c *Chunk) AppendPartialRows(colIdx int, rower Iterator, maxLen int) int { + columns := rower.Current().c.columns + oldLen := c.columns[colIdx+0].length + for i, rowCol := range columns { + chkCol := c.columns[colIdx+i] + + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + for row, j := rower.Current(), 0; j < maxLen && row != rower.End(); row, j = rower.Next(), j+1 { + chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) + offset := row.idx * elemLen + chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) + chkCol.length++ + } + + } else { + for row, j := rower.Current(), 0; j < maxLen && row != rower.End(); row, j = rower.Next(), j+1 { + chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) + start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + chkCol.data = append(chkCol.data, rowCol.data[start:end]...) + chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) + chkCol.length++ + } + } + + } + return c.columns[colIdx+0].length - oldLen +} + +func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, l int) { + for i, rowCol := range row.c.columns { + chkCol := c.columns[colIdx+i] + for j := 0; j < l; j++ { + chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) + } + for j := 0; j < l; j++ { + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + offset := row.idx * elemLen + chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) + } else { + start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + chkCol.data = append(chkCol.data, rowCol.data[start:end]...) + chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) + } + } + chkCol.length += l + } +} + +func (c *Chunk) AppendRightMultiRows(lhser Iterator, rhs Row, maxLen int) { + c.numVirtualRows += maxLen + lhsLen := lhser.Current().Len() + rowsLen := c.AppendPartialRows(0, lhser, maxLen) + c.AppendPartialSameRows(lhsLen, rhs, rowsLen) +} + +func (c *Chunk) AppendMultiRows(lhs Row, rhser Iterator, maxLen int) { + c.numVirtualRows += maxLen + rowsLen := c.AppendPartialRows(lhs.Len(), rhser, maxLen) + c.AppendPartialSameRows(0, lhs, rowsLen) +} + // Append appends rows in [begin, end) in another Chunk to a Chunk. func (c *Chunk) Append(other *Chunk, begin, end int) { for colID, src := range other.columns { From 79b74e79cb877c9232e775377236d49b8c453688 Mon Sep 17 00:00:00 2001 From: crazycs Date: Sun, 19 Aug 2018 17:00:18 +0800 Subject: [PATCH 02/41] fix iterator bug --- executor/joiner.go | 19 ++++++++++--------- util/chunk/chunk.go | 29 +++++++++++++++++------------ util/chunk/iterator.go | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index ffb19a547d425..d23cdf96251cd 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -14,6 +14,7 @@ package executor import ( + "fmt" "github.com/juju/errors" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/plan" @@ -142,12 +143,12 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } -func makeJoinRightRowsToChunk(chk *chunk.Chunk, lhser chunk.Iterator, rhs chunk.Row, l int) { - chk.AppendRightMultiRows(lhser, rhs, l) +func makeJoinRightRowsToChunk(chk *chunk.Chunk, lhser chunk.Iterator, rhs chunk.Row, maxLen int) int { + return chk.AppendRightMultiRows(lhser, rhs, maxLen) } -func makeJoinRowsToChunk(chk *chunk.Chunk, lhs chunk.Row, rhser chunk.Iterator, rowLen int) { - chk.AppendMultiRows(lhs, rhser, rowLen) +func makeJoinRowsToChunk(chk *chunk.Chunk, lhs chunk.Row, rhser chunk.Iterator, maxLen int) int { + return chk.AppendMultiRows(lhs, rhser, maxLen) } func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { @@ -183,9 +184,9 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu for { j.chk.Reset() if j.outerIsRight { - makeJoinRightRowsToChunk(j.chk, inners, outer, rowsLen) + rowsLen = makeJoinRightRowsToChunk(j.chk, inners, outer, 64) } else { - makeJoinRowsToChunk(j.chk, outer, inners, rowsLen) + rowsLen = makeJoinRowsToChunk(j.chk, outer, inners, 64) } //j.chk.Reset() //if j.outerIsRight { @@ -193,8 +194,8 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu //} else { // j.makeJoinRowToChunk(j.chk, outer, inner) //} - - for i := 0; i < j.chk.NumRows(); i++ { + fmt.Printf("idx: %d, len: %d\n", 0, rowsLen) + for i := 0; i < rowsLen; i++ { matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(i)) if err != nil { return false, errors.Trace(err) @@ -205,7 +206,7 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu return true, nil } } - if inners.Current() == inners.End() { + if rowsLen < 64 { break } } diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 877a066b62160..8e1f953722d01 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -160,11 +160,16 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } } -func (c *Chunk) AppendPartialRows(colIdx int, rower Iterator, maxLen int) int { - columns := rower.Current().c.columns - oldLen := c.columns[colIdx+0].length +func (c *Chunk) AppendPartialRows(colIdx int, rowIt Iterator, maxLen int) int { + + oldRowLen := c.columns[colIdx+0].length + columns := rowIt.Current().c.columns for i, rowCol := range columns { chkCol := c.columns[colIdx+i] + rower := rowIt.Copy() + if i == len(columns)-1 { + rower = rowIt + } if rowCol.isFixed() { elemLen := len(rowCol.elemBuf) @@ -174,7 +179,6 @@ func (c *Chunk) AppendPartialRows(colIdx int, rower Iterator, maxLen int) int { chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) chkCol.length++ } - } else { for row, j := rower.Current(), 0; j < maxLen && row != rower.End(); row, j = rower.Next(), j+1 { chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) @@ -184,18 +188,17 @@ func (c *Chunk) AppendPartialRows(colIdx int, rower Iterator, maxLen int) int { chkCol.length++ } } - } - return c.columns[colIdx+0].length - oldLen + return c.columns[colIdx+0].length - oldRowLen } -func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, l int) { +func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) { for i, rowCol := range row.c.columns { chkCol := c.columns[colIdx+i] - for j := 0; j < l; j++ { + for j := 0; j < rowsLen; j++ { chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) } - for j := 0; j < l; j++ { + for j := 0; j < rowsLen; j++ { if rowCol.isFixed() { elemLen := len(rowCol.elemBuf) offset := row.idx * elemLen @@ -206,21 +209,23 @@ func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, l int) { chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) } } - chkCol.length += l + chkCol.length += rowsLen } } -func (c *Chunk) AppendRightMultiRows(lhser Iterator, rhs Row, maxLen int) { +func (c *Chunk) AppendRightMultiRows(lhser Iterator, rhs Row, maxLen int) int { c.numVirtualRows += maxLen lhsLen := lhser.Current().Len() rowsLen := c.AppendPartialRows(0, lhser, maxLen) c.AppendPartialSameRows(lhsLen, rhs, rowsLen) + return rowsLen } -func (c *Chunk) AppendMultiRows(lhs Row, rhser Iterator, maxLen int) { +func (c *Chunk) AppendMultiRows(lhs Row, rhser Iterator, maxLen int) int { c.numVirtualRows += maxLen rowsLen := c.AppendPartialRows(lhs.Len(), rhser, maxLen) c.AppendPartialSameRows(0, lhs, rowsLen) + return rowsLen } // Append appends rows in [begin, end) in another Chunk to a Chunk. diff --git a/util/chunk/iterator.go b/util/chunk/iterator.go index 7b9d34208bae0..c923fa346d0b6 100644 --- a/util/chunk/iterator.go +++ b/util/chunk/iterator.go @@ -43,6 +43,8 @@ type Iterator interface { // ReachEnd reaches the end of iterator. ReachEnd() + + Copy() Iterator } // NewIterator4Slice returns a Iterator for Row slice. @@ -98,6 +100,14 @@ func (it *iterator4Slice) Len() int { return len(it.rows) } +// Copy implements the Iterator interface. +func (it *iterator4Slice) Copy() Iterator { + return &iterator4Slice{ + rows: it.rows, + cursor: it.cursor, + } +} + // NewIterator4Chunk returns a iterator for Chunk. func NewIterator4Chunk(chk *Chunk) *Iterator4Chunk { return &Iterator4Chunk{chk: chk} @@ -152,6 +162,14 @@ func (it *Iterator4Chunk) Len() int { return it.chk.NumRows() } +// Copy implements the Iterator interface. +func (it *Iterator4Chunk) Copy() Iterator { + return &Iterator4Chunk{ + chk: it.chk, + cursor: it.cursor, + } +} + // NewIterator4List returns a Iterator for List. func NewIterator4List(li *List) Iterator { return &iterator4List{li: li} @@ -224,6 +242,15 @@ func (it *iterator4List) Len() int { return it.li.Len() } +// Copy implements the Iterator interface. +func (it *iterator4List) Copy() Iterator { + return &iterator4List{ + li: it.li, + chkCursor: it.chkCursor, + rowCursor: it.rowCursor, + } +} + // NewIterator4RowPtr returns a Iterator for RowPtrs. func NewIterator4RowPtr(li *List, ptrs []RowPtr) Iterator { return &iterator4RowPtr{li: li, ptrs: ptrs} @@ -277,3 +304,12 @@ func (it *iterator4RowPtr) ReachEnd() { func (it *iterator4RowPtr) Len() int { return len(it.ptrs) } + +// Copy implements the Iterator interface. +func (it *iterator4RowPtr) Copy() Iterator { + return &iterator4RowPtr{ + li: it.li, + ptrs: it.ptrs, + cursor: it.cursor, + } +} From d9fe98bd66eb0906201e41002fe1ba5154c64d89 Mon Sep 17 00:00:00 2001 From: crazycs Date: Sun, 19 Aug 2018 21:21:33 +0800 Subject: [PATCH 03/41] fix nullmap index out of range and add test --- executor/joiner.go | 5 +- util/chunk/chunk.go | 5 +- util/chunk/chunk_copy_test.go | 165 ++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 7 deletions(-) create mode 100644 util/chunk/chunk_copy_test.go diff --git a/executor/joiner.go b/executor/joiner.go index d23cdf96251cd..1be1179ddfdd3 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -181,7 +181,7 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu return true, nil } rowsLen := 64 - for { + for inners.Current() != inners.End() { j.chk.Reset() if j.outerIsRight { rowsLen = makeJoinRightRowsToChunk(j.chk, inners, outer, 64) @@ -206,9 +206,6 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu return true, nil } } - if rowsLen < 64 { - break - } } return false, nil } diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 8e1f953722d01..5e791f67e3093 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -161,7 +161,6 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } func (c *Chunk) AppendPartialRows(colIdx int, rowIt Iterator, maxLen int) int { - oldRowLen := c.columns[colIdx+0].length columns := rowIt.Current().c.columns for i, rowCol := range columns { @@ -197,10 +196,11 @@ func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) { chkCol := c.columns[colIdx+i] for j := 0; j < rowsLen; j++ { chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) + chkCol.length++ } + elemLen := len(rowCol.elemBuf) for j := 0; j < rowsLen; j++ { if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) offset := row.idx * elemLen chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) } else { @@ -209,7 +209,6 @@ func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) { chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) } } - chkCol.length += rowsLen } } diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go new file mode 100644 index 0000000000000..d424e32f661ec --- /dev/null +++ b/util/chunk/chunk_copy_test.go @@ -0,0 +1,165 @@ +package chunk + +import ( + "testing" +) + +var ( + numRows = 1024 +) + +func newChunkWithInitCap(cap int, elemLen ...int) *Chunk { + chk := &Chunk{} + for _, l := range elemLen { + if l > 0 { + chk.addFixedLenColumn(l, cap) + } else { + chk.addVarLenColumn(cap) + } + } + return chk +} + +func getChunk() *Chunk { + chk := newChunkWithInitCap(1024, 8, 8, 0, 0) + for i := 0; i < numRows; i++ { + //chk.AppendNull(0) + chk.AppendInt64(0, int64(i)) + chk.AppendInt64(1, 1) + chk.AppendString(2, "abcd") + chk.AppendBytes(3, []byte("01234567890zxcvbnmqwer")) + } + return chk +} + +func TestCopyFieldByField(t *testing.T) { + chk1 := getChunk() + row := chk1.GetRow(0) + it1 := NewIterator4Chunk(chk1) + it1.Begin() + dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + + dst.Reset() + for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { + dst.AppendRow(lhs) + dst.AppendPartialRow(lhs.Len(), row) + } + for i := 0; i < 8; i++ { + if dst.columns[i].length != numRows { + t.Fail() + } + } + for j := 0; j < numRows; j++ { + row := dst.GetRow(j) + if row.GetInt64(0) != int64(j) { + t.Fail() + } + if row.GetInt64(1) != 1 { + t.Fail() + } + if row.GetString(2) != "abcd" { + t.Fail() + } + if string(row.GetBytes(3)) != "01234567890zxcvbnmqwer" { + t.Fail() + } + + if row.GetInt64(4) != 0 { + t.Fail() + } + if row.GetInt64(5) != 1 { + t.Fail() + } + if row.GetString(6) != "abcd" { + t.Fail() + } + if string(row.GetBytes(7)) != "01234567890zxcvbnmqwer" { + t.Fail() + } + } + +} + +func TestCopyColumnByColumn(t *testing.T) { + chk1 := getChunk() + row := chk1.GetRow(0) + it1 := NewIterator4Chunk(chk1) + it1.Begin() + dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + + dst.Reset() + for it1.Current() != it1.End() { + dst.AppendRightMultiRows(it1, row, 1024) + } + for i := 0; i < 8; i++ { + if dst.columns[i].length != numRows { + t.Fail() + } + } + for j := 0; j < numRows; j++ { + row := dst.GetRow(j) + if row.GetInt64(0) != int64(j) { + t.Fail() + } + if row.GetInt64(1) != 1 { + t.Fail() + } + if row.GetString(2) != "abcd" { + t.Fail() + } + if string(row.GetBytes(3)) != "01234567890zxcvbnmqwer" { + t.Fail() + } + + if row.GetInt64(4) != 0 { + t.Fail() + } + if row.GetInt64(5) != 1 { + t.Fail() + } + if row.GetString(6) != "abcd" { + t.Fail() + } + if string(row.GetBytes(7)) != "01234567890zxcvbnmqwer" { + t.Fail() + } + } +} + +func BenchmarkCopyFieldByField(b *testing.B) { + b.ReportAllocs() + chk1 := getChunk() + row := getChunk().GetRow(0) + + it1 := NewIterator4Chunk(chk1) + + dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + dst.Reset() + for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { + dst.AppendRow(lhs) + dst.AppendPartialRow(lhs.Len(), row) + } + } +} + +func BenchmarkCopyColumnByColumn(b *testing.B) { + b.ReportAllocs() + chk1 := getChunk() + row := getChunk().GetRow(0) + + it1 := NewIterator4Chunk(chk1) + + dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + dst.Reset() + it1.Begin() + for it1.Current() != it1.End() { + dst.AppendRightMultiRows(it1, row, 128) + } + } +} From 8fa2ad8292d65cb392a5173c0a996628bdacc9c9 Mon Sep 17 00:00:00 2001 From: crazycs Date: Sun, 19 Aug 2018 22:32:12 +0800 Subject: [PATCH 04/41] refine code --- util/chunk/chunk.go | 18 ++++++---- util/chunk/chunk_copy_test.go | 68 ++++++++++------------------------- 2 files changed, 30 insertions(+), 56 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 5e791f67e3093..a4b68cb4947c7 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -198,17 +198,21 @@ func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) { chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) chkCol.length++ } - elemLen := len(rowCol.elemBuf) - for j := 0; j < rowsLen; j++ { - if rowCol.isFixed() { - offset := row.idx * elemLen - chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) - } else { - start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + start := row.idx * elemLen + end := start + elemLen + for j := 0; j < rowsLen; j++ { + chkCol.data = append(chkCol.data, rowCol.data[start:start+end]...) + } + } else { + start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + for j := 0; j < rowsLen; j++ { chkCol.data = append(chkCol.data, rowCol.data[start:end]...) chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) } } + } } diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index d424e32f661ec..0230c166f34db 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -32,18 +32,7 @@ func getChunk() *Chunk { return chk } -func TestCopyFieldByField(t *testing.T) { - chk1 := getChunk() - row := chk1.GetRow(0) - it1 := NewIterator4Chunk(chk1) - it1.Begin() - dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) - - dst.Reset() - for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { - dst.AppendRow(lhs) - dst.AppendPartialRow(lhs.Len(), row) - } +func checkDstChk(t *testing.T, dst *Chunk) { for i := 0; i < 8; i++ { if dst.columns[i].length != numRows { t.Fail() @@ -77,10 +66,9 @@ func TestCopyFieldByField(t *testing.T) { t.Fail() } } - } -func TestCopyColumnByColumn(t *testing.T) { +func TestCopyFieldByField(t *testing.T) { chk1 := getChunk() row := chk1.GetRow(0) it1 := NewIterator4Chunk(chk1) @@ -88,42 +76,25 @@ func TestCopyColumnByColumn(t *testing.T) { dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) dst.Reset() - for it1.Current() != it1.End() { - dst.AppendRightMultiRows(it1, row, 1024) - } - for i := 0; i < 8; i++ { - if dst.columns[i].length != numRows { - t.Fail() - } + for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { + dst.AppendRow(lhs) + dst.AppendPartialRow(lhs.Len(), row) } - for j := 0; j < numRows; j++ { - row := dst.GetRow(j) - if row.GetInt64(0) != int64(j) { - t.Fail() - } - if row.GetInt64(1) != 1 { - t.Fail() - } - if row.GetString(2) != "abcd" { - t.Fail() - } - if string(row.GetBytes(3)) != "01234567890zxcvbnmqwer" { - t.Fail() - } + checkDstChk(t, dst) +} - if row.GetInt64(4) != 0 { - t.Fail() - } - if row.GetInt64(5) != 1 { - t.Fail() - } - if row.GetString(6) != "abcd" { - t.Fail() - } - if string(row.GetBytes(7)) != "01234567890zxcvbnmqwer" { - t.Fail() - } +func TestCopyColumnByColumn(t *testing.T) { + chk1 := getChunk() + row := chk1.GetRow(0) + it1 := NewIterator4Chunk(chk1) + + dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + + dst.Reset() + for it1.Begin(); it1.Current() != it1.End(); { + dst.AppendRightMultiRows(it1, row, 1024) } + checkDstChk(t, dst) } func BenchmarkCopyFieldByField(b *testing.B) { @@ -157,8 +128,7 @@ func BenchmarkCopyColumnByColumn(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { dst.Reset() - it1.Begin() - for it1.Current() != it1.End() { + for it1.Begin(); it1.Current() != it1.End(); { dst.AppendRightMultiRows(it1, row, 128) } } From 33ae5a09904a3514765d76c0cf9b52d747347071 Mon Sep 17 00:00:00 2001 From: crazycs Date: Sun, 19 Aug 2018 22:42:30 +0800 Subject: [PATCH 05/41] refine code --- util/chunk/chunk_copy_test.go | 39 ++++++++++++++--------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 0230c166f34db..2556eea7afe7d 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -21,7 +21,7 @@ func newChunkWithInitCap(cap int, elemLen ...int) *Chunk { } func getChunk() *Chunk { - chk := newChunkWithInitCap(1024, 8, 8, 0, 0) + chk := newChunkWithInitCap(numRows, 8, 8, 0, 0) for i := 0; i < numRows; i++ { //chk.AppendNull(0) chk.AppendInt64(0, int64(i)) @@ -32,6 +32,15 @@ func getChunk() *Chunk { return chk } +func prepareChks() (it1 Iterator, row Row, dst *Chunk) { + chk1 := getChunk() + row = chk1.GetRow(0) + it1 = NewIterator4Chunk(chk1) + it1.Begin() + dst = newChunkWithInitCap(numRows, 8, 8, 0, 0, 8, 8, 0, 0) + return it1, row, dst +} + func checkDstChk(t *testing.T, dst *Chunk) { for i := 0; i < 8; i++ { if dst.columns[i].length != numRows { @@ -69,11 +78,7 @@ func checkDstChk(t *testing.T, dst *Chunk) { } func TestCopyFieldByField(t *testing.T) { - chk1 := getChunk() - row := chk1.GetRow(0) - it1 := NewIterator4Chunk(chk1) - it1.Begin() - dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + it1, row, dst := prepareChks() dst.Reset() for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { @@ -84,11 +89,7 @@ func TestCopyFieldByField(t *testing.T) { } func TestCopyColumnByColumn(t *testing.T) { - chk1 := getChunk() - row := chk1.GetRow(0) - it1 := NewIterator4Chunk(chk1) - - dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + it1, row, dst := prepareChks() dst.Reset() for it1.Begin(); it1.Current() != it1.End(); { @@ -99,12 +100,7 @@ func TestCopyColumnByColumn(t *testing.T) { func BenchmarkCopyFieldByField(b *testing.B) { b.ReportAllocs() - chk1 := getChunk() - row := getChunk().GetRow(0) - - it1 := NewIterator4Chunk(chk1) - - dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + it1, row, dst := prepareChks() b.ResetTimer() for i := 0; i < b.N; i++ { @@ -118,18 +114,13 @@ func BenchmarkCopyFieldByField(b *testing.B) { func BenchmarkCopyColumnByColumn(b *testing.B) { b.ReportAllocs() - chk1 := getChunk() - row := getChunk().GetRow(0) - - it1 := NewIterator4Chunk(chk1) - - dst := newChunkWithInitCap(1024, 8, 8, 0, 0, 8, 8, 0, 0) + it1, row, dst := prepareChks() b.ResetTimer() for i := 0; i < b.N; i++ { dst.Reset() for it1.Begin(); it1.Current() != it1.End(); { - dst.AppendRightMultiRows(it1, row, 128) + dst.AppendRightMultiRows(it1, row, 1024) } } } From 255c64b09debaa07e92085681c11004ff31151fd Mon Sep 17 00:00:00 2001 From: crazycs Date: Sun, 19 Aug 2018 23:13:56 +0800 Subject: [PATCH 06/41] remove iterator copy and use back to pre rows --- util/chunk/chunk.go | 6 ++-- util/chunk/chunk_copy_test.go | 4 +-- util/chunk/iterator.go | 61 ++++++++++++++--------------------- 3 files changed, 30 insertions(+), 41 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index a4b68cb4947c7..c3be0074c80e3 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -165,9 +165,9 @@ func (c *Chunk) AppendPartialRows(colIdx int, rowIt Iterator, maxLen int) int { columns := rowIt.Current().c.columns for i, rowCol := range columns { chkCol := c.columns[colIdx+i] - rower := rowIt.Copy() - if i == len(columns)-1 { - rower = rowIt + rower := rowIt + if i != 0 { + rower.PreRows(c.columns[colIdx+0].length - oldRowLen) } if rowCol.isFixed() { diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 2556eea7afe7d..61f6f6c005478 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -93,7 +93,7 @@ func TestCopyColumnByColumn(t *testing.T) { dst.Reset() for it1.Begin(); it1.Current() != it1.End(); { - dst.AppendRightMultiRows(it1, row, 1024) + dst.AppendRightMultiRows(it1, row, 128) } checkDstChk(t, dst) } @@ -120,7 +120,7 @@ func BenchmarkCopyColumnByColumn(b *testing.B) { for i := 0; i < b.N; i++ { dst.Reset() for it1.Begin(); it1.Current() != it1.End(); { - dst.AppendRightMultiRows(it1, row, 1024) + dst.AppendRightMultiRows(it1, row, 128) } } } diff --git a/util/chunk/iterator.go b/util/chunk/iterator.go index c923fa346d0b6..3e9967a36b64b 100644 --- a/util/chunk/iterator.go +++ b/util/chunk/iterator.go @@ -32,6 +32,8 @@ type Iterator interface { // Next returns the next Row. Next() Row + PreRows(i int) + // End returns the invalid end Row. End() Row @@ -43,8 +45,6 @@ type Iterator interface { // ReachEnd reaches the end of iterator. ReachEnd() - - Copy() Iterator } // NewIterator4Slice returns a Iterator for Row slice. @@ -77,6 +77,11 @@ func (it *iterator4Slice) Next() Row { return row } +// PreRows implements the Iterator interface. +func (it *iterator4Slice) PreRows(i int) { + +} + // Current implements the Iterator interface. func (it *iterator4Slice) Current() Row { if it.cursor == 0 || it.cursor > it.Len() { @@ -100,14 +105,6 @@ func (it *iterator4Slice) Len() int { return len(it.rows) } -// Copy implements the Iterator interface. -func (it *iterator4Slice) Copy() Iterator { - return &iterator4Slice{ - rows: it.rows, - cursor: it.cursor, - } -} - // NewIterator4Chunk returns a iterator for Chunk. func NewIterator4Chunk(chk *Chunk) *Iterator4Chunk { return &Iterator4Chunk{chk: chk} @@ -139,6 +136,14 @@ func (it *Iterator4Chunk) Next() Row { return row } +// PreRows implements the Iterator interface. +func (it *Iterator4Chunk) PreRows(i int) { + if it.cursor < i { + it.Begin() + } + it.cursor = it.cursor - i +} + // Current implements the Iterator interface. func (it *Iterator4Chunk) Current() Row { if it.cursor == 0 || it.cursor > it.Len() { @@ -162,14 +167,6 @@ func (it *Iterator4Chunk) Len() int { return it.chk.NumRows() } -// Copy implements the Iterator interface. -func (it *Iterator4Chunk) Copy() Iterator { - return &Iterator4Chunk{ - chk: it.chk, - cursor: it.cursor, - } -} - // NewIterator4List returns a Iterator for List. func NewIterator4List(li *List) Iterator { return &iterator4List{li: li} @@ -214,6 +211,11 @@ func (it *iterator4List) Next() Row { return row } +// PreRows implements the Iterator interface. +func (it *iterator4List) PreRows(i int) { + +} + // Current implements the Iterator interface. func (it *iterator4List) Current() Row { if (it.chkCursor == 0 && it.rowCursor == 0) || it.chkCursor > it.li.NumChunks() { @@ -242,15 +244,6 @@ func (it *iterator4List) Len() int { return it.li.Len() } -// Copy implements the Iterator interface. -func (it *iterator4List) Copy() Iterator { - return &iterator4List{ - li: it.li, - chkCursor: it.chkCursor, - rowCursor: it.rowCursor, - } -} - // NewIterator4RowPtr returns a Iterator for RowPtrs. func NewIterator4RowPtr(li *List, ptrs []RowPtr) Iterator { return &iterator4RowPtr{li: li, ptrs: ptrs} @@ -282,6 +275,11 @@ func (it *iterator4RowPtr) Next() Row { return row } +// PreRows implements the Iterator interface. +func (it *iterator4RowPtr) PreRows(i int) { + +} + // Current implements the Iterator interface. func (it *iterator4RowPtr) Current() Row { if it.cursor == 0 || it.cursor > it.Len() { @@ -304,12 +302,3 @@ func (it *iterator4RowPtr) ReachEnd() { func (it *iterator4RowPtr) Len() int { return len(it.ptrs) } - -// Copy implements the Iterator interface. -func (it *iterator4RowPtr) Copy() Iterator { - return &iterator4RowPtr{ - li: it.li, - ptrs: it.ptrs, - cursor: it.cursor, - } -} From 097337387cef6a2a7b6ed236b166fb933293440d Mon Sep 17 00:00:00 2001 From: crazycs Date: Sun, 19 Aug 2018 23:16:49 +0800 Subject: [PATCH 07/41] checkout joiner.go file --- executor/joiner.go | 43 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 1be1179ddfdd3..562afa75dc6be 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -14,7 +14,6 @@ package executor import ( - "fmt" "github.com/juju/errors" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/plan" @@ -143,14 +142,6 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } -func makeJoinRightRowsToChunk(chk *chunk.Chunk, lhser chunk.Iterator, rhs chunk.Row, maxLen int) int { - return chk.AppendRightMultiRows(lhser, rhs, maxLen) -} - -func makeJoinRowsToChunk(chk *chunk.Chunk, lhs chunk.Row, rhser chunk.Iterator, maxLen int) int { - return chk.AppendMultiRows(lhs, rhser, maxLen) -} - func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { j.selected, err = expression.VectorizedFilter(j.ctx, j.conditions, chunk.NewIterator4Chunk(input), j.selected) if err != nil { @@ -180,31 +171,23 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu inners.ReachEnd() return true, nil } - rowsLen := 64 - for inners.Current() != inners.End() { + + for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.chk.Reset() if j.outerIsRight { - rowsLen = makeJoinRightRowsToChunk(j.chk, inners, outer, 64) + j.makeJoinRowToChunk(j.chk, inner, outer) } else { - rowsLen = makeJoinRowsToChunk(j.chk, outer, inners, 64) + j.makeJoinRowToChunk(j.chk, outer, inner) } - //j.chk.Reset() - //if j.outerIsRight { - // j.makeJoinRowToChunk(j.chk, inner, outer) - //} else { - // j.makeJoinRowToChunk(j.chk, outer, inner) - //} - fmt.Printf("idx: %d, len: %d\n", 0, rowsLen) - for i := 0; i < rowsLen; i++ { - matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(i)) - if err != nil { - return false, errors.Trace(err) - } - if matched { - chk.AppendPartialRow(0, outer) - inners.ReachEnd() - return true, nil - } + + matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) + if err != nil { + return false, errors.Trace(err) + } + if matched { + chk.AppendPartialRow(0, outer) + inners.ReachEnd() + return true, nil } } return false, nil From 055a41ab568500c1a99c11f5f772e4a78507d69d Mon Sep 17 00:00:00 2001 From: crazycs Date: Mon, 20 Aug 2018 11:18:07 +0800 Subject: [PATCH 08/41] add check to bench --- util/chunk/chunk_copy_test.go | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 61f6f6c005478..6164e5e3be29d 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -41,40 +41,41 @@ func prepareChks() (it1 Iterator, row Row, dst *Chunk) { return it1, row, dst } -func checkDstChk(t *testing.T, dst *Chunk) { +func checkDstChk(dst *Chunk) bool { for i := 0; i < 8; i++ { if dst.columns[i].length != numRows { - t.Fail() + return false } } for j := 0; j < numRows; j++ { row := dst.GetRow(j) if row.GetInt64(0) != int64(j) { - t.Fail() + return false } if row.GetInt64(1) != 1 { - t.Fail() + return false } if row.GetString(2) != "abcd" { - t.Fail() + return false } if string(row.GetBytes(3)) != "01234567890zxcvbnmqwer" { - t.Fail() + return false } if row.GetInt64(4) != 0 { - t.Fail() + return false } if row.GetInt64(5) != 1 { - t.Fail() + return false } if row.GetString(6) != "abcd" { - t.Fail() + return false } if string(row.GetBytes(7)) != "01234567890zxcvbnmqwer" { - t.Fail() + return false } } + return true } func TestCopyFieldByField(t *testing.T) { @@ -85,7 +86,9 @@ func TestCopyFieldByField(t *testing.T) { dst.AppendRow(lhs) dst.AppendPartialRow(lhs.Len(), row) } - checkDstChk(t, dst) + if !checkDstChk(dst) { + t.Fail() + } } func TestCopyColumnByColumn(t *testing.T) { @@ -95,7 +98,9 @@ func TestCopyColumnByColumn(t *testing.T) { for it1.Begin(); it1.Current() != it1.End(); { dst.AppendRightMultiRows(it1, row, 128) } - checkDstChk(t, dst) + if !checkDstChk(dst) { + t.Fail() + } } func BenchmarkCopyFieldByField(b *testing.B) { @@ -109,6 +114,7 @@ func BenchmarkCopyFieldByField(b *testing.B) { dst.AppendRow(lhs) dst.AppendPartialRow(lhs.Len(), row) } + checkDstChk(dst) } } @@ -122,5 +128,6 @@ func BenchmarkCopyColumnByColumn(b *testing.B) { for it1.Begin(); it1.Current() != it1.End(); { dst.AppendRightMultiRows(it1, row, 128) } + checkDstChk(dst) } } From e7c6c6408c3cdc4c32ec728d18aca7b42ff4e0c6 Mon Sep 17 00:00:00 2001 From: crazycs Date: Tue, 21 Aug 2018 21:03:21 +0800 Subject: [PATCH 09/41] iterator only once --- util/chunk/chunk.go | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index c3be0074c80e3..146a52b996bfc 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -163,23 +163,31 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { func (c *Chunk) AppendPartialRows(colIdx int, rowIt Iterator, maxLen int) int { oldRowLen := c.columns[colIdx+0].length columns := rowIt.Current().c.columns - for i, rowCol := range columns { - chkCol := c.columns[colIdx+i] - rower := rowIt - if i != 0 { - rower.PreRows(c.columns[colIdx+0].length - oldRowLen) + rowsCap := 32 + rows := make([]Row, 0, rowsCap) + for row, j := rowIt.Current(), 0; j < maxLen && row != rowIt.End(); row, j = rowIt.Next(), j+1 { + rows = append(rows, row) + if j%rowsCap == 0 { + appendPartialRows(colIdx, rows, c, columns) + rows = rows[:0] } + } + appendPartialRows(colIdx, rows, c, columns) + return c.columns[colIdx+0].length - oldRowLen +} - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) - for row, j := rower.Current(), 0; j < maxLen && row != rower.End(); row, j = rower.Next(), j+1 { +func appendPartialRows(colIdx int, rows []Row, chk *Chunk, columns []*column) { + for _, row := range rows { + for i, rowCol := range columns { + chkCol := chk.columns[colIdx+i] + + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) offset := row.idx * elemLen chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) chkCol.length++ - } - } else { - for row, j := rower.Current(), 0; j < maxLen && row != rower.End(); row, j = rower.Next(), j+1 { + } else { chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] chkCol.data = append(chkCol.data, rowCol.data[start:end]...) @@ -188,7 +196,6 @@ func (c *Chunk) AppendPartialRows(colIdx int, rowIt Iterator, maxLen int) int { } } } - return c.columns[colIdx+0].length - oldRowLen } func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) { From ce8eb4fee44f4e869246ac53085e63da4d046b13 Mon Sep 17 00:00:00 2001 From: crazycs Date: Tue, 21 Aug 2018 21:50:34 +0800 Subject: [PATCH 10/41] add appendMultiSameNullBitmap --- util/chunk/chunk.go | 6 ++---- util/chunk/column.go | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 146a52b996bfc..8b7db12d4df34 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -201,10 +201,8 @@ func appendPartialRows(colIdx int, rows []Row, chk *Chunk, columns []*column) { func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) { for i, rowCol := range row.c.columns { chkCol := c.columns[colIdx+i] - for j := 0; j < rowsLen; j++ { - chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) - chkCol.length++ - } + chkCol.appendMultiSameNullBitmap(!rowCol.isNull(row.idx), uint(rowsLen)) + chkCol.length += rowsLen if rowCol.isFixed() { elemLen := len(rowCol.elemBuf) start := row.idx * elemLen diff --git a/util/chunk/column.go b/util/chunk/column.go index 903adbeebc5c2..cb671227670f8 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -85,6 +85,44 @@ func (c *column) appendNullBitmap(on bool) { } } +func (c *column) appendMultiSameNullBitmap(on bool, num uint) { + l := ((c.length + int(num)) >> 3) - len(c.nullBitmap) + 1 + if l > 0 { + for i := 0; i < l; i++ { + c.nullBitmap = append(c.nullBitmap, 0) + } + } + if on { + idx := c.length >> 3 + pos := uint(c.length) & 7 + if pos > num { + num = pos + } + l := 8 - pos + if l > num { + l = num + num = 0 + } else { + num = num - l + l = 8 + } + for i := pos; i < l; i++ { + c.nullBitmap[idx] |= byte(1 << i) + } + for num > 8 { + idx++ + c.nullBitmap[idx] = 0xff + num = num - 8 + } + idx++ + for i := uint(0); i < num; i++ { + c.nullBitmap[idx] |= byte(1 << i) + } + } else { + c.nullCount += int(num) + } +} + func (c *column) appendNull() { c.appendNullBitmap(false) if c.isFixed() { From 499b6f99860b966ab273ff01739d1cefa6f49e14 Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 22 Aug 2018 11:53:29 +0800 Subject: [PATCH 11/41] field by field only one line 2X --- util/chunk/chunk_copy_test.go | 155 +++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 31 deletions(-) diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 6164e5e3be29d..6e2d13d344c2e 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -1,6 +1,8 @@ package chunk import ( + "fmt" + "github.com/juju/errors" "testing" ) @@ -25,9 +27,14 @@ func getChunk() *Chunk { for i := 0; i < numRows; i++ { //chk.AppendNull(0) chk.AppendInt64(0, int64(i)) - chk.AppendInt64(1, 1) - chk.AppendString(2, "abcd") - chk.AppendBytes(3, []byte("01234567890zxcvbnmqwer")) + if i%3 == 0 { + chk.AppendNull(1) + } else { + chk.AppendInt64(1, int64(i)) + } + + chk.AppendString(2, fmt.Sprintf("abcd-%d", i)) + chk.AppendBytes(3, []byte(fmt.Sprintf("01234567890zxcvbnmqwer-%d", i))) } return chk } @@ -41,41 +48,60 @@ func prepareChks() (it1 Iterator, row Row, dst *Chunk) { return it1, row, dst } -func checkDstChk(dst *Chunk) bool { +func checkDstChk(dst *Chunk) error { for i := 0; i < 8; i++ { if dst.columns[i].length != numRows { - return false + return errors.Errorf("col-%d length no equal", i) } } for j := 0; j < numRows; j++ { row := dst.GetRow(j) - if row.GetInt64(0) != int64(j) { - return false - } - if row.GetInt64(1) != 1 { - return false + if err := checkDstChkRow(row, j); err != nil { + return err } - if row.GetString(2) != "abcd" { - return false + } + return nil +} +func checkDstChkRow(row Row, j int) error { + if row.GetInt64(0) != int64(j) { + return errors.Errorf("row-%d col-%d expect: %d, but get: %d", j, 0, j, row.GetInt64(0)) + } + if j%3 == 0 { + if !row.IsNull(1) { + return errors.Errorf("row-%d col-%d expect: null, but get: not null", j, 1) } - if string(row.GetBytes(3)) != "01234567890zxcvbnmqwer" { - return false + } else { + if row.GetInt64(1) != int64(j) { + return errors.Errorf("row-%d col-%d expect: %d, but get: %d", j, 1, j, row.GetInt64(1)) } + } - if row.GetInt64(4) != 0 { - return false - } - if row.GetInt64(5) != 1 { - return false - } - if row.GetString(6) != "abcd" { - return false - } - if string(row.GetBytes(7)) != "01234567890zxcvbnmqwer" { - return false - } + if row.GetString(2) != fmt.Sprintf("abcd-%d", j) { + return errors.Errorf("row-%d col-%d expect: %s, but get: %s", j, 2, fmt.Sprintf("abcd-%d", j), row.GetString(2)) + } + if string(row.GetBytes(3)) != fmt.Sprintf("01234567890zxcvbnmqwer-%d", j) { + return errors.Errorf("row-%d col-%d expect: %s, but get: %s", j, 3, fmt.Sprintf("01234567890zxcvbnmqwer-%d", j), string(row.GetBytes(3))) } - return true + + if row.GetInt64(4) != 0 { + return errors.Errorf("row-%d col-%d expect: %d, but get: %d", j, 4, 0, row.GetInt64(0)) + } + + if !row.IsNull(5) { + return errors.Errorf("row-%d col-%d expect: null, but get: not null", j, 5) + } + if row.GetString(6) != fmt.Sprintf("abcd-%d", 0) { + return errors.Errorf("row-%d col-%d expect: %s, but get: %s", j, 6, fmt.Sprintf("abcd-%d", 0), row.GetString(6)) + } + if string(row.GetBytes(7)) != fmt.Sprintf("01234567890zxcvbnmqwer-%d", 0) { + return errors.Errorf("row-%d col-%d expect: %s, but get: %s", j, 7, fmt.Sprintf("01234567890zxcvbnmqwer-%d", 0), string(row.GetBytes(7))) + } + return nil +} + +func printRow(row Row) { + fmt.Printf("%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s \n", row.GetInt64(0), row.GetInt64(1), row.GetString(2), string(row.GetBytes(3)), + row.GetInt64(4), row.GetInt64(5), row.GetString(6), string(row.GetBytes(7))) } func TestCopyFieldByField(t *testing.T) { @@ -86,7 +112,8 @@ func TestCopyFieldByField(t *testing.T) { dst.AppendRow(lhs) dst.AppendPartialRow(lhs.Len(), row) } - if !checkDstChk(dst) { + if err := checkDstChk(dst); err != nil { + t.Log(err) t.Fail() } } @@ -98,11 +125,59 @@ func TestCopyColumnByColumn(t *testing.T) { for it1.Begin(); it1.Current() != it1.End(); { dst.AppendRightMultiRows(it1, row, 128) } - if !checkDstChk(dst) { + if err := checkDstChk(dst); err != nil { + t.Log(err) t.Fail() } } +func TestCopy3(t *testing.T) { + it1, row, dst := prepareChks() + + dst.Reset() + + lhs := it1.Begin() + + for _, c := range dst.columns { + c.nullBitmap = append(c.nullBitmap, 0) + c.offsets = append(c.offsets, 0) + c.length = 1 + } + + rowIdx := 0 + for ; lhs != it1.End(); lhs = it1.Next() { + appendPartialRow(0, lhs, dst) + appendPartialRow(lhs.Len(), row, dst) + + if err := checkDstChkRow(dst.GetRow(0), rowIdx); err != nil { + t.Log(err) + t.Fail() + } + rowIdx++ + } +} + +func appendPartialRow(colIdx int, row Row, dst *Chunk) { + for i, rowCol := range row.c.columns { + chkCol := dst.columns[colIdx+i] + if !rowCol.isNull(row.idx) { + chkCol.nullBitmap[0] = 1 + } else { + chkCol.nullBitmap[0] = 0 + } + + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + offset := row.idx * elemLen + chkCol.data = rowCol.data[offset : offset+elemLen] + } else { + start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + chkCol.data = rowCol.data[start:end] + chkCol.offsets[1] = int32(len(chkCol.data)) + } + } +} + func BenchmarkCopyFieldByField(b *testing.B) { b.ReportAllocs() it1, row, dst := prepareChks() @@ -114,7 +189,6 @@ func BenchmarkCopyFieldByField(b *testing.B) { dst.AppendRow(lhs) dst.AppendPartialRow(lhs.Len(), row) } - checkDstChk(dst) } } @@ -128,6 +202,25 @@ func BenchmarkCopyColumnByColumn(b *testing.B) { for it1.Begin(); it1.Current() != it1.End(); { dst.AppendRightMultiRows(it1, row, 128) } - checkDstChk(dst) + } +} + +func BenchmarkCopy3(b *testing.B) { + b.ReportAllocs() + it1, row, dst := prepareChks() + + b.ResetTimer() + for _, c := range dst.columns { + c.nullBitmap = append(c.nullBitmap, 0) + c.offsets = append(c.offsets, 0) + c.length = 1 + } + + for i := 0; i < b.N; i++ { + lhs := it1.Begin() + for ; lhs != it1.End(); lhs = it1.Next() { + appendPartialRow(0, lhs, dst) + appendPartialRow(lhs.Len(), row, dst) + } } } From 2a295a1e91546a08a28317f6b475bd2fdb5694cb Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 22 Aug 2018 14:40:04 +0800 Subject: [PATCH 12/41] refine column copy --- util/chunk/chunk.go | 10 ++++------ util/chunk/chunk_copy_test.go | 4 ++-- util/chunk/column.go | 25 +++++++++++++------------ 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 8b7db12d4df34..d563541e33c2c 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -180,19 +180,17 @@ func appendPartialRows(colIdx int, rows []Row, chk *Chunk, columns []*column) { for _, row := range rows { for i, rowCol := range columns { chkCol := chk.columns[colIdx+i] - + chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) + chkCol.length++ if rowCol.isFixed() { elemLen := len(rowCol.elemBuf) - chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) offset := row.idx * elemLen chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) - chkCol.length++ } else { - chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] chkCol.data = append(chkCol.data, rowCol.data[start:end]...) chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) - chkCol.length++ + } } } @@ -201,7 +199,7 @@ func appendPartialRows(colIdx int, rows []Row, chk *Chunk, columns []*column) { func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) { for i, rowCol := range row.c.columns { chkCol := c.columns[colIdx+i] - chkCol.appendMultiSameNullBitmap(!rowCol.isNull(row.idx), uint(rowsLen)) + chkCol.appendMultiSameNullBitmap(!rowCol.isNull(row.idx), rowsLen) chkCol.length += rowsLen if rowCol.isFixed() { elemLen := len(rowCol.elemBuf) diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 6e2d13d344c2e..eae0e0e8d2e62 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -131,7 +131,7 @@ func TestCopyColumnByColumn(t *testing.T) { } } -func TestCopy3(t *testing.T) { +func TestCopyFieldByFieldOne(t *testing.T) { it1, row, dst := prepareChks() dst.Reset() @@ -205,7 +205,7 @@ func BenchmarkCopyColumnByColumn(b *testing.B) { } } -func BenchmarkCopy3(b *testing.B) { +func BenchmarkCopyFieldByFieldOne(b *testing.B) { b.ReportAllocs() it1, row, dst := prepareChks() diff --git a/util/chunk/column.go b/util/chunk/column.go index cb671227670f8..54b1c6574aa6c 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -85,8 +85,9 @@ func (c *column) appendNullBitmap(on bool) { } } -func (c *column) appendMultiSameNullBitmap(on bool, num uint) { - l := ((c.length + int(num)) >> 3) - len(c.nullBitmap) + 1 +func (c *column) appendMultiSameNullBitmap(on bool, num int) { + unum := uint(num) + l := ((c.length + num) >> 3) - len(c.nullBitmap) + 1 if l > 0 { for i := 0; i < l; i++ { c.nullBitmap = append(c.nullBitmap, 0) @@ -95,31 +96,31 @@ func (c *column) appendMultiSameNullBitmap(on bool, num uint) { if on { idx := c.length >> 3 pos := uint(c.length) & 7 - if pos > num { - num = pos + if pos > unum { + unum = pos } l := 8 - pos - if l > num { - l = num - num = 0 + if l > unum { + l = unum + unum = 0 } else { - num = num - l + unum = unum - l l = 8 } for i := pos; i < l; i++ { c.nullBitmap[idx] |= byte(1 << i) } - for num > 8 { + for unum > 8 { idx++ c.nullBitmap[idx] = 0xff - num = num - 8 + unum = unum - 8 } idx++ - for i := uint(0); i < num; i++ { + for i := uint(0); i < unum; i++ { c.nullBitmap[idx] |= byte(1 << i) } } else { - c.nullCount += int(num) + c.nullCount += num } } From 9d8244770c393b9d5ddb8707e5d8b563930b9aa3 Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 22 Aug 2018 15:44:43 +0800 Subject: [PATCH 13/41] refine column copy --- util/chunk/chunk_copy_test.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index eae0e0e8d2e62..a5f22739427a7 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -143,11 +143,10 @@ func TestCopyFieldByFieldOne(t *testing.T) { c.offsets = append(c.offsets, 0) c.length = 1 } - rowIdx := 0 for ; lhs != it1.End(); lhs = it1.Next() { - appendPartialRow(0, lhs, dst) - appendPartialRow(lhs.Len(), row, dst) + appendPartialRowOne(0, lhs, dst) + appendPartialRowOne(lhs.Len(), row, dst) if err := checkDstChkRow(dst.GetRow(0), rowIdx); err != nil { t.Log(err) @@ -157,7 +156,7 @@ func TestCopyFieldByFieldOne(t *testing.T) { } } -func appendPartialRow(colIdx int, row Row, dst *Chunk) { +func appendPartialRowOne(colIdx int, row Row, dst *Chunk) { for i, rowCol := range row.c.columns { chkCol := dst.columns[colIdx+i] if !rowCol.isNull(row.idx) { @@ -219,8 +218,8 @@ func BenchmarkCopyFieldByFieldOne(b *testing.B) { for i := 0; i < b.N; i++ { lhs := it1.Begin() for ; lhs != it1.End(); lhs = it1.Next() { - appendPartialRow(0, lhs, dst) - appendPartialRow(lhs.Len(), row, dst) + appendPartialRowOne(0, lhs, dst) + appendPartialRowOne(lhs.Len(), row, dst) } } } From ef85948354aceddf10de59165a31767eeff4ceba Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 22 Aug 2018 16:29:00 +0800 Subject: [PATCH 14/41] add shadow copy to join and move code --- executor/joiner.go | 45 ++++-- util/chunk/chunk.go | 80 +++------- util/chunk/chunk_copy_test.go | 286 ++++++++++++++++++++-------------- 3 files changed, 215 insertions(+), 196 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 562afa75dc6be..63a61a34437be 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -172,15 +172,21 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu return true, nil } + var rowL, rowR chunk.Row + dst := j.chk + chunk.ShadowChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - j.chk.Reset() if j.outerIsRight { - j.makeJoinRowToChunk(j.chk, inner, outer) + rowL = inner + rowR = outer } else { - j.makeJoinRowToChunk(j.chk, outer, inner) + rowL = outer + rowR = inner } + chunk.ShadowPartialRowOne(0, rowL, dst) + chunk.ShadowPartialRowOne(rowL.Len(), rowR, dst) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) + matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) if err != nil { return false, errors.Trace(err) } @@ -211,15 +217,21 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk return true, nil } + var rowL, rowR chunk.Row + dst := j.chk + chunk.ShadowChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - j.chk.Reset() if j.outerIsRight { - j.makeJoinRowToChunk(j.chk, inner, outer) + rowL = inner + rowR = outer } else { - j.makeJoinRowToChunk(j.chk, outer, inner) + rowL = outer + rowR = inner } + chunk.ShadowPartialRowOne(0, rowL, dst) + chunk.ShadowPartialRowOne(rowL.Len(), rowR, dst) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) + matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) if err != nil { return false, errors.Trace(err) } @@ -251,11 +263,13 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, return true, nil } + dst := j.chk + chunk.ShadowChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - j.chk.Reset() - j.makeJoinRowToChunk(j.chk, outer, inner) + chunk.ShadowPartialRowOne(0, outer, dst) + chunk.ShadowPartialRowOne(outer.Len(), inner, dst) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) + matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) if err != nil { return false, errors.Trace(err) } @@ -294,10 +308,13 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera return true, nil } + dst := j.chk + chunk.ShadowChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - j.chk.Reset() - j.makeJoinRowToChunk(j.chk, outer, inner) - matched, err := expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) + chunk.ShadowPartialRowOne(0, outer, dst) + chunk.ShadowPartialRowOne(outer.Len(), inner, dst) + + matched, err := expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) if err != nil { return false, errors.Trace(err) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index d563541e33c2c..23765c4262824 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -160,78 +160,34 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } } -func (c *Chunk) AppendPartialRows(colIdx int, rowIt Iterator, maxLen int) int { - oldRowLen := c.columns[colIdx+0].length - columns := rowIt.Current().c.columns - rowsCap := 32 - rows := make([]Row, 0, rowsCap) - for row, j := rowIt.Current(), 0; j < maxLen && row != rowIt.End(); row, j = rowIt.Next(), j+1 { - rows = append(rows, row) - if j%rowsCap == 0 { - appendPartialRows(colIdx, rows, c, columns) - rows = rows[:0] - } - } - appendPartialRows(colIdx, rows, c, columns) - return c.columns[colIdx+0].length - oldRowLen -} - -func appendPartialRows(colIdx int, rows []Row, chk *Chunk, columns []*column) { - for _, row := range rows { - for i, rowCol := range columns { - chkCol := chk.columns[colIdx+i] - chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) - chkCol.length++ - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) - offset := row.idx * elemLen - chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) - } else { - start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] - chkCol.data = append(chkCol.data, rowCol.data[start:end]...) - chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) - - } +func ShadowPartialRowOne(colIdx int, row Row, dst *Chunk) { + for i, rowCol := range row.c.columns { + chkCol := dst.columns[colIdx+i] + if !rowCol.isNull(row.idx) { + chkCol.nullBitmap[0] = 1 + } else { + chkCol.nullBitmap[0] = 0 } - } -} -func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) { - for i, rowCol := range row.c.columns { - chkCol := c.columns[colIdx+i] - chkCol.appendMultiSameNullBitmap(!rowCol.isNull(row.idx), rowsLen) - chkCol.length += rowsLen if rowCol.isFixed() { elemLen := len(rowCol.elemBuf) - start := row.idx * elemLen - end := start + elemLen - for j := 0; j < rowsLen; j++ { - chkCol.data = append(chkCol.data, rowCol.data[start:start+end]...) - } + offset := row.idx * elemLen + chkCol.data = rowCol.data[offset : offset+elemLen] } else { start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] - for j := 0; j < rowsLen; j++ { - chkCol.data = append(chkCol.data, rowCol.data[start:end]...) - chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) - } + chkCol.data = rowCol.data[start:end] + chkCol.offsets[1] = int32(len(chkCol.data)) } - } } -func (c *Chunk) AppendRightMultiRows(lhser Iterator, rhs Row, maxLen int) int { - c.numVirtualRows += maxLen - lhsLen := lhser.Current().Len() - rowsLen := c.AppendPartialRows(0, lhser, maxLen) - c.AppendPartialSameRows(lhsLen, rhs, rowsLen) - return rowsLen -} - -func (c *Chunk) AppendMultiRows(lhs Row, rhser Iterator, maxLen int) int { - c.numVirtualRows += maxLen - rowsLen := c.AppendPartialRows(lhs.Len(), rhser, maxLen) - c.AppendPartialSameRows(0, lhs, rowsLen) - return rowsLen +func ShadowChkInit(chk *Chunk) { + chk.Reset() + for _, c := range chk.columns { + c.nullBitmap = append(c.nullBitmap, 0) + c.offsets = append(c.offsets, 0) + c.length = 1 + } } // Append appends rows in [begin, end) in another Chunk to a Chunk. diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index a5f22739427a7..7542fc204755a 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -10,6 +10,172 @@ var ( numRows = 1024 ) +func TestCopyFieldByField(t *testing.T) { + it1, row, dst := prepareChks() + + dst.Reset() + for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { + dst.AppendRow(lhs) + dst.AppendPartialRow(lhs.Len(), row) + } + if err := checkDstChk(dst); err != nil { + t.Log(err) + t.Fail() + } +} + +func TestCopyColumnByColumn(t *testing.T) { + it1, row, dst := prepareChks() + + dst.Reset() + for it1.Begin(); it1.Current() != it1.End(); { + appendRightMultiRows(dst, it1, row, 128) + } + if err := checkDstChk(dst); err != nil { + t.Log(err) + t.Fail() + } +} + +func TestCopyFieldByFieldOne(t *testing.T) { + it1, row, dst := prepareChks() + + dst.Reset() + + lhs := it1.Begin() + + for _, c := range dst.columns { + c.nullBitmap = append(c.nullBitmap, 0) + c.offsets = append(c.offsets, 0) + c.length = 1 + } + rowIdx := 0 + for ; lhs != it1.End(); lhs = it1.Next() { + ShadowPartialRowOne(0, lhs, dst) + ShadowPartialRowOne(lhs.Len(), row, dst) + + if err := checkDstChkRow(dst.GetRow(0), rowIdx); err != nil { + t.Log(err) + t.Fail() + } + rowIdx++ + } +} + +func BenchmarkCopyFieldByField(b *testing.B) { + b.ReportAllocs() + it1, row, dst := prepareChks() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + dst.Reset() + for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { + dst.AppendRow(lhs) + dst.AppendPartialRow(lhs.Len(), row) + } + } +} + +func BenchmarkCopyColumnByColumn(b *testing.B) { + b.ReportAllocs() + it1, row, dst := prepareChks() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + dst.Reset() + for it1.Begin(); it1.Current() != it1.End(); { + appendRightMultiRows(dst, it1, row, 128) + } + } +} + +func BenchmarkCopyFieldByFieldOne(b *testing.B) { + b.ReportAllocs() + it1, row, dst := prepareChks() + + b.ResetTimer() + for _, c := range dst.columns { + c.nullBitmap = append(c.nullBitmap, 0) + c.offsets = append(c.offsets, 0) + c.length = 1 + } + + for i := 0; i < b.N; i++ { + lhs := it1.Begin() + for ; lhs != it1.End(); lhs = it1.Next() { + ShadowPartialRowOne(0, lhs, dst) + ShadowPartialRowOne(lhs.Len(), row, dst) + } + } +} + +func AppendPartialRows(c *Chunk, colIdx int, rowIt Iterator, maxLen int) int { + oldRowLen := c.columns[colIdx+0].length + columns := rowIt.Current().c.columns + rowsCap := 32 + rows := make([]Row, 0, rowsCap) + for row, j := rowIt.Current(), 0; j < maxLen && row != rowIt.End(); row, j = rowIt.Next(), j+1 { + rows = append(rows, row) + if j%rowsCap == 0 { + appendPartialRows(colIdx, rows, c, columns) + rows = rows[:0] + } + } + appendPartialRows(colIdx, rows, c, columns) + return c.columns[colIdx+0].length - oldRowLen +} + +func appendPartialRows(colIdx int, rows []Row, chk *Chunk, columns []*column) { + for _, row := range rows { + for i, rowCol := range columns { + chkCol := chk.columns[colIdx+i] + chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) + chkCol.length++ + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + offset := row.idx * elemLen + chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) + } else { + start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + chkCol.data = append(chkCol.data, rowCol.data[start:end]...) + chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) + + } + } + } +} + +func appendPartialSameRows(c *Chunk, colIdx int, row Row, rowsLen int) { + for i, rowCol := range row.c.columns { + chkCol := c.columns[colIdx+i] + chkCol.appendMultiSameNullBitmap(!rowCol.isNull(row.idx), rowsLen) + chkCol.length += rowsLen + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + start := row.idx * elemLen + end := start + elemLen + for j := 0; j < rowsLen; j++ { + chkCol.data = append(chkCol.data, rowCol.data[start:start+end]...) + } + } else { + start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + for j := 0; j < rowsLen; j++ { + chkCol.data = append(chkCol.data, rowCol.data[start:end]...) + chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) + } + } + + } +} + +func appendRightMultiRows(c *Chunk, lhser Iterator, rhs Row, maxLen int) int { + c.numVirtualRows += maxLen + lhsLen := lhser.Current().Len() + rowsLen := AppendPartialRows(c, 0, lhser, maxLen) + appendPartialSameRows(c, lhsLen, rhs, rowsLen) + return rowsLen +} + func newChunkWithInitCap(cap int, elemLen ...int) *Chunk { chk := &Chunk{} for _, l := range elemLen { @@ -103,123 +269,3 @@ func printRow(row Row) { fmt.Printf("%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s \n", row.GetInt64(0), row.GetInt64(1), row.GetString(2), string(row.GetBytes(3)), row.GetInt64(4), row.GetInt64(5), row.GetString(6), string(row.GetBytes(7))) } - -func TestCopyFieldByField(t *testing.T) { - it1, row, dst := prepareChks() - - dst.Reset() - for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { - dst.AppendRow(lhs) - dst.AppendPartialRow(lhs.Len(), row) - } - if err := checkDstChk(dst); err != nil { - t.Log(err) - t.Fail() - } -} - -func TestCopyColumnByColumn(t *testing.T) { - it1, row, dst := prepareChks() - - dst.Reset() - for it1.Begin(); it1.Current() != it1.End(); { - dst.AppendRightMultiRows(it1, row, 128) - } - if err := checkDstChk(dst); err != nil { - t.Log(err) - t.Fail() - } -} - -func TestCopyFieldByFieldOne(t *testing.T) { - it1, row, dst := prepareChks() - - dst.Reset() - - lhs := it1.Begin() - - for _, c := range dst.columns { - c.nullBitmap = append(c.nullBitmap, 0) - c.offsets = append(c.offsets, 0) - c.length = 1 - } - rowIdx := 0 - for ; lhs != it1.End(); lhs = it1.Next() { - appendPartialRowOne(0, lhs, dst) - appendPartialRowOne(lhs.Len(), row, dst) - - if err := checkDstChkRow(dst.GetRow(0), rowIdx); err != nil { - t.Log(err) - t.Fail() - } - rowIdx++ - } -} - -func appendPartialRowOne(colIdx int, row Row, dst *Chunk) { - for i, rowCol := range row.c.columns { - chkCol := dst.columns[colIdx+i] - if !rowCol.isNull(row.idx) { - chkCol.nullBitmap[0] = 1 - } else { - chkCol.nullBitmap[0] = 0 - } - - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) - offset := row.idx * elemLen - chkCol.data = rowCol.data[offset : offset+elemLen] - } else { - start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] - chkCol.data = rowCol.data[start:end] - chkCol.offsets[1] = int32(len(chkCol.data)) - } - } -} - -func BenchmarkCopyFieldByField(b *testing.B) { - b.ReportAllocs() - it1, row, dst := prepareChks() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - dst.Reset() - for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { - dst.AppendRow(lhs) - dst.AppendPartialRow(lhs.Len(), row) - } - } -} - -func BenchmarkCopyColumnByColumn(b *testing.B) { - b.ReportAllocs() - it1, row, dst := prepareChks() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - dst.Reset() - for it1.Begin(); it1.Current() != it1.End(); { - dst.AppendRightMultiRows(it1, row, 128) - } - } -} - -func BenchmarkCopyFieldByFieldOne(b *testing.B) { - b.ReportAllocs() - it1, row, dst := prepareChks() - - b.ResetTimer() - for _, c := range dst.columns { - c.nullBitmap = append(c.nullBitmap, 0) - c.offsets = append(c.offsets, 0) - c.length = 1 - } - - for i := 0; i < b.N; i++ { - lhs := it1.Begin() - for ; lhs != it1.End(); lhs = it1.Next() { - appendPartialRowOne(0, lhs, dst) - appendPartialRowOne(lhs.Len(), row, dst) - } - } -} From 5bf279f68f124d3a667b4bc3bdd8763c59b68f1a Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 22 Aug 2018 16:42:30 +0800 Subject: [PATCH 15/41] rename function --- util/chunk/chunk_copy_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 7542fc204755a..97997c94e17c5 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -37,7 +37,7 @@ func TestCopyColumnByColumn(t *testing.T) { } } -func TestCopyFieldByFieldOne(t *testing.T) { +func TestCopyShadow(t *testing.T) { it1, row, dst := prepareChks() dst.Reset() @@ -89,7 +89,7 @@ func BenchmarkCopyColumnByColumn(b *testing.B) { } } -func BenchmarkCopyFieldByFieldOne(b *testing.B) { +func BenchmarkCopyShadow(b *testing.B) { b.ReportAllocs() it1, row, dst := prepareChks() From 96905068bdcc657ee12e4f3cb0b91ddb4e712e11 Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 22 Aug 2018 16:51:15 +0800 Subject: [PATCH 16/41] add comment --- util/chunk/chunk.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 23765c4262824..8f38dea8b3bc8 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -160,6 +160,9 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } } +// ShadowPartialRowOne use shadow copy to instead of AppendPartialRow, +// ShadowPartialRowOne use to merge muti-rows to one row. +// this dst chk can only contain one row. otherwise will be wrong. func ShadowPartialRowOne(colIdx int, row Row, dst *Chunk) { for i, rowCol := range row.c.columns { chkCol := dst.columns[colIdx+i] @@ -181,6 +184,7 @@ func ShadowPartialRowOne(colIdx int, row Row, dst *Chunk) { } } +// ShadowChkInit init chk for ShadowPartialRowOne. func ShadowChkInit(chk *Chunk) { chk.Reset() for _, c := range chk.columns { From 8db639f6ba1ef76fb92127028e716a29f81afd11 Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 22 Aug 2018 17:34:33 +0800 Subject: [PATCH 17/41] add shadow copy to inner join --- executor/joiner.go | 34 ++++++++++++++++++++-------------- expression/chunk_executor.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 1d084bbe556f9..81672316d8787 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -413,26 +413,32 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch if inners.Len() == 0 { return false, nil } - j.chk.Reset() - chkForJoin := j.chk - if len(j.conditions) == 0 { - chkForJoin = chk - } + + match := false + var rowL, rowR chunk.Row + dst := j.chk + chunk.ShadowChkInit(dst) inner, numToAppend := inners.Current(), j.maxChunkSize-chk.NumRows() for ; inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { if j.outerIsRight { - j.makeJoinRowToChunk(chkForJoin, inner, outer) + rowL, rowR = inner, outer } else { - j.makeJoinRowToChunk(chkForJoin, outer, inner) + rowL, rowR = outer, inner } - } - if len(j.conditions) == 0 { - return true, nil - } + chunk.ShadowPartialRowOne(0, rowL, dst) + chunk.ShadowPartialRowOne(rowL.Len(), rowR, dst) - // reach here, chkForJoin is j.chk - matched, err := j.filter(chkForJoin, chk) - return matched, errors.Trace(err) + matched, err := expression.VectorizedFilterOneRow(j.ctx, j.conditions, dst.GetRow(0)) + if err != nil { + return false, errors.Trace(err) + } + if matched { + match = true + chk.AppendRow(dst.GetRow(0)) + } + + } + return match, nil } func (j *innerJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { diff --git a/expression/chunk_executor.go b/expression/chunk_executor.go index 887eb2463c3a3..064854c323470 100644 --- a/expression/chunk_executor.go +++ b/expression/chunk_executor.go @@ -263,3 +263,31 @@ func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *ch } return selected, nil } + +// VectorizedFilterOneRow applies a list of filters to a row. +func VectorizedFilterOneRow(ctx sessionctx.Context, filters []Expression, row chunk.Row) (bool, error) { + selected := true + for _, filter := range filters { + isIntType := true + if filter.GetType().EvalType() != types.ETInt { + isIntType = false + } + if isIntType { + filterResult, isNull, err := filter.EvalInt(ctx, row) + if err != nil { + return false, errors.Trace(err) + } + selected = selected && !isNull && (filterResult != 0) + } else { + // TODO: should rewrite the filter to `cast(expr as SIGNED) != 0` and always use `EvalInt`. + bVal, err := EvalBool(ctx, []Expression{filter}, row) + if err != nil { + return false, errors.Trace(err) + } + selected = selected && bVal + } + + } + return selected, nil + +} From 7a55ff5dcf3163412c763020e45ac5da784d531c Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 01:15:14 +0800 Subject: [PATCH 18/41] refine code --- executor/joiner.go | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 81672316d8787..d68793fad1d02 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -142,6 +142,14 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } +func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, dst *chunk.Chunk) { + if !isRight { + inner, outer = outer, inner + } + chunk.ShadowPartialRowOne(0, inner, dst) + chunk.ShadowPartialRowOne(inner.Len(), outer, dst) +} + func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { j.selected, err = expression.VectorizedFilter(j.ctx, j.conditions, chunk.NewIterator4Chunk(input), j.selected) if err != nil { @@ -172,17 +180,10 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu return true, nil } - var rowL, rowR chunk.Row dst := j.chk chunk.ShadowChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - if j.outerIsRight { - rowL, rowR = inner, outer - } else { - rowL, rowR = outer, inner - } - chunk.ShadowPartialRowOne(0, rowL, dst) - chunk.ShadowPartialRowOne(rowL.Len(), rowR, dst) + makeShadowJoinRow(j.outerIsRight, inner, outer, dst) matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) if err != nil { @@ -215,17 +216,10 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk return true, nil } - var rowL, rowR chunk.Row dst := j.chk chunk.ShadowChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - if j.outerIsRight { - rowL, rowR = inner, outer - } else { - rowL, rowR = outer, inner - } - chunk.ShadowPartialRowOne(0, rowL, dst) - chunk.ShadowPartialRowOne(rowL.Len(), rowR, dst) + makeShadowJoinRow(j.outerIsRight, inner, outer, dst) matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) if err != nil { @@ -262,8 +256,7 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, dst := j.chk chunk.ShadowChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - chunk.ShadowPartialRowOne(0, outer, dst) - chunk.ShadowPartialRowOne(outer.Len(), inner, dst) + makeShadowJoinRow(false, inner, outer, dst) matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) if err != nil { @@ -307,8 +300,7 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera dst := j.chk chunk.ShadowChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - chunk.ShadowPartialRowOne(0, outer, dst) - chunk.ShadowPartialRowOne(outer.Len(), inner, dst) + makeShadowJoinRow(false, inner, outer, dst) matched, err := expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) @@ -415,18 +407,11 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch } match := false - var rowL, rowR chunk.Row dst := j.chk chunk.ShadowChkInit(dst) inner, numToAppend := inners.Current(), j.maxChunkSize-chk.NumRows() for ; inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { - if j.outerIsRight { - rowL, rowR = inner, outer - } else { - rowL, rowR = outer, inner - } - chunk.ShadowPartialRowOne(0, rowL, dst) - chunk.ShadowPartialRowOne(rowL.Len(), rowR, dst) + makeShadowJoinRow(j.outerIsRight, inner, outer, dst) matched, err := expression.VectorizedFilterOneRow(j.ctx, j.conditions, dst.GetRow(0)) if err != nil { From 05c1273053a43c90b5a89f0353b83e6fe7dcc51b Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 01:41:01 +0800 Subject: [PATCH 19/41] add shadow copy to all join --- executor/joiner.go | 55 +++++++++++----------------------------------- 1 file changed, 13 insertions(+), 42 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index d68793fad1d02..279a3c474b10e 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -303,7 +303,6 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera makeShadowJoinRow(false, inner, outer, dst) matched, err := expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) - if err != nil { return false, errors.Trace(err) } @@ -336,24 +335,8 @@ func (j *leftOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk return false, nil } - j.chk.Reset() - chkForJoin := j.chk - if len(j.conditions) == 0 { - chkForJoin = chk - } - numToAppend := j.maxChunkSize - chk.NumRows() - for ; inners.Current() != inners.End() && numToAppend > 0; numToAppend-- { - j.makeJoinRowToChunk(chkForJoin, outer, inners.Current()) - inners.Next() - } - if len(j.conditions) == 0 { - return true, nil - } - - // reach here, chkForJoin is j.chk - matched, err := j.filter(chkForJoin, chk) - return matched, errors.Trace(err) + return tryToMatchInerAndOuter(j.ctx, false, outer, inners, j.conditions, j.chk, chk, numToAppend) } func (j *leftOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -371,24 +354,8 @@ func (j *rightOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, ch return false, nil } - j.chk.Reset() - chkForJoin := j.chk - if len(j.conditions) == 0 { - chkForJoin = chk - } - numToAppend := j.maxChunkSize - chk.NumRows() - for ; inners.Current() != inners.End() && numToAppend > 0; numToAppend-- { - j.makeJoinRowToChunk(chkForJoin, inners.Current(), outer) - inners.Next() - } - if len(j.conditions) == 0 { - return true, nil - } - - // reach here, chkForJoin is j.chk - matched, err := j.filter(chkForJoin, chk) - return matched, errors.Trace(err) + return tryToMatchInerAndOuter(j.ctx, true, outer, inners, j.conditions, j.chk, chk, numToAppend) } func (j *rightOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -406,22 +373,26 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch return false, nil } + numToAppend := j.maxChunkSize - chk.NumRows() + return tryToMatchInerAndOuter(j.ctx, j.outerIsRight, outer, inners, j.conditions, j.chk, chk, numToAppend) +} + +func tryToMatchInerAndOuter(ctx sessionctx.Context, isRight bool, outer chunk.Row, inners chunk.Iterator, conditions []expression.Expression, midChk, outChk *chunk.Chunk, numToAppend int) (bool, error) { match := false - dst := j.chk + dst := midChk chunk.ShadowChkInit(dst) - inner, numToAppend := inners.Current(), j.maxChunkSize-chk.NumRows() - for ; inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { - makeShadowJoinRow(j.outerIsRight, inner, outer, dst) - matched, err := expression.VectorizedFilterOneRow(j.ctx, j.conditions, dst.GetRow(0)) + for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { + makeShadowJoinRow(isRight, inner, outer, dst) + + matched, err := expression.VectorizedFilterOneRow(ctx, conditions, dst.GetRow(0)) if err != nil { return false, errors.Trace(err) } if matched { match = true - chk.AppendRow(dst.GetRow(0)) + outChk.AppendRow(dst.GetRow(0)) } - } return match, nil } From 66a133c1ac4cc194ebad9e8e3bcd4437245d37bd Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 02:16:36 +0800 Subject: [PATCH 20/41] remove redundancy code --- util/chunk/iterator.go | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/util/chunk/iterator.go b/util/chunk/iterator.go index 3e9967a36b64b..7b9d34208bae0 100644 --- a/util/chunk/iterator.go +++ b/util/chunk/iterator.go @@ -32,8 +32,6 @@ type Iterator interface { // Next returns the next Row. Next() Row - PreRows(i int) - // End returns the invalid end Row. End() Row @@ -77,11 +75,6 @@ func (it *iterator4Slice) Next() Row { return row } -// PreRows implements the Iterator interface. -func (it *iterator4Slice) PreRows(i int) { - -} - // Current implements the Iterator interface. func (it *iterator4Slice) Current() Row { if it.cursor == 0 || it.cursor > it.Len() { @@ -136,14 +129,6 @@ func (it *Iterator4Chunk) Next() Row { return row } -// PreRows implements the Iterator interface. -func (it *Iterator4Chunk) PreRows(i int) { - if it.cursor < i { - it.Begin() - } - it.cursor = it.cursor - i -} - // Current implements the Iterator interface. func (it *Iterator4Chunk) Current() Row { if it.cursor == 0 || it.cursor > it.Len() { @@ -211,11 +196,6 @@ func (it *iterator4List) Next() Row { return row } -// PreRows implements the Iterator interface. -func (it *iterator4List) PreRows(i int) { - -} - // Current implements the Iterator interface. func (it *iterator4List) Current() Row { if (it.chkCursor == 0 && it.rowCursor == 0) || it.chkCursor > it.li.NumChunks() { @@ -275,11 +255,6 @@ func (it *iterator4RowPtr) Next() Row { return row } -// PreRows implements the Iterator interface. -func (it *iterator4RowPtr) PreRows(i int) { - -} - // Current implements the Iterator interface. func (it *iterator4RowPtr) Current() Row { if it.cursor == 0 || it.cursor > it.Len() { From b4192e43bbc4916d5908dc809d56520c1f0ce55b Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 13:27:53 +0800 Subject: [PATCH 21/41] remove column copy and redundancy code --- util/chunk/chunk_copy_test.go | 140 +++++----------------------------- util/chunk/column.go | 39 ---------- 2 files changed, 18 insertions(+), 161 deletions(-) diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 97997c94e17c5..295cc59e3bb60 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -2,7 +2,6 @@ package chunk import ( "fmt" - "github.com/juju/errors" "testing" ) @@ -18,21 +17,7 @@ func TestCopyFieldByField(t *testing.T) { dst.AppendRow(lhs) dst.AppendPartialRow(lhs.Len(), row) } - if err := checkDstChk(dst); err != nil { - t.Log(err) - t.Fail() - } -} - -func TestCopyColumnByColumn(t *testing.T) { - it1, row, dst := prepareChks() - - dst.Reset() - for it1.Begin(); it1.Current() != it1.End(); { - appendRightMultiRows(dst, it1, row, 128) - } - if err := checkDstChk(dst); err != nil { - t.Log(err) + if !checkDstChk(dst) { t.Fail() } } @@ -54,8 +39,7 @@ func TestCopyShadow(t *testing.T) { ShadowPartialRowOne(0, lhs, dst) ShadowPartialRowOne(lhs.Len(), row, dst) - if err := checkDstChkRow(dst.GetRow(0), rowIdx); err != nil { - t.Log(err) + if !checkDstChkRow(dst.GetRow(0), rowIdx) { t.Fail() } rowIdx++ @@ -76,19 +60,6 @@ func BenchmarkCopyFieldByField(b *testing.B) { } } -func BenchmarkCopyColumnByColumn(b *testing.B) { - b.ReportAllocs() - it1, row, dst := prepareChks() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - dst.Reset() - for it1.Begin(); it1.Current() != it1.End(); { - appendRightMultiRows(dst, it1, row, 128) - } - } -} - func BenchmarkCopyShadow(b *testing.B) { b.ReportAllocs() it1, row, dst := prepareChks() @@ -109,73 +80,6 @@ func BenchmarkCopyShadow(b *testing.B) { } } -func AppendPartialRows(c *Chunk, colIdx int, rowIt Iterator, maxLen int) int { - oldRowLen := c.columns[colIdx+0].length - columns := rowIt.Current().c.columns - rowsCap := 32 - rows := make([]Row, 0, rowsCap) - for row, j := rowIt.Current(), 0; j < maxLen && row != rowIt.End(); row, j = rowIt.Next(), j+1 { - rows = append(rows, row) - if j%rowsCap == 0 { - appendPartialRows(colIdx, rows, c, columns) - rows = rows[:0] - } - } - appendPartialRows(colIdx, rows, c, columns) - return c.columns[colIdx+0].length - oldRowLen -} - -func appendPartialRows(colIdx int, rows []Row, chk *Chunk, columns []*column) { - for _, row := range rows { - for i, rowCol := range columns { - chkCol := chk.columns[colIdx+i] - chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) - chkCol.length++ - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) - offset := row.idx * elemLen - chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...) - } else { - start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] - chkCol.data = append(chkCol.data, rowCol.data[start:end]...) - chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) - - } - } - } -} - -func appendPartialSameRows(c *Chunk, colIdx int, row Row, rowsLen int) { - for i, rowCol := range row.c.columns { - chkCol := c.columns[colIdx+i] - chkCol.appendMultiSameNullBitmap(!rowCol.isNull(row.idx), rowsLen) - chkCol.length += rowsLen - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) - start := row.idx * elemLen - end := start + elemLen - for j := 0; j < rowsLen; j++ { - chkCol.data = append(chkCol.data, rowCol.data[start:start+end]...) - } - } else { - start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] - for j := 0; j < rowsLen; j++ { - chkCol.data = append(chkCol.data, rowCol.data[start:end]...) - chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) - } - } - - } -} - -func appendRightMultiRows(c *Chunk, lhser Iterator, rhs Row, maxLen int) int { - c.numVirtualRows += maxLen - lhsLen := lhser.Current().Len() - rowsLen := AppendPartialRows(c, 0, lhser, maxLen) - appendPartialSameRows(c, lhsLen, rhs, rowsLen) - return rowsLen -} - func newChunkWithInitCap(cap int, elemLen ...int) *Chunk { chk := &Chunk{} for _, l := range elemLen { @@ -214,58 +118,50 @@ func prepareChks() (it1 Iterator, row Row, dst *Chunk) { return it1, row, dst } -func checkDstChk(dst *Chunk) error { +func checkDstChk(dst *Chunk) bool { for i := 0; i < 8; i++ { if dst.columns[i].length != numRows { - return errors.Errorf("col-%d length no equal", i) + return false } } for j := 0; j < numRows; j++ { row := dst.GetRow(j) - if err := checkDstChkRow(row, j); err != nil { - return err + if !checkDstChkRow(row, j) { + return false } } - return nil + return true } -func checkDstChkRow(row Row, j int) error { +func checkDstChkRow(row Row, j int) bool { if row.GetInt64(0) != int64(j) { - return errors.Errorf("row-%d col-%d expect: %d, but get: %d", j, 0, j, row.GetInt64(0)) + return false } if j%3 == 0 { if !row.IsNull(1) { - return errors.Errorf("row-%d col-%d expect: null, but get: not null", j, 1) + return false } } else { if row.GetInt64(1) != int64(j) { - return errors.Errorf("row-%d col-%d expect: %d, but get: %d", j, 1, j, row.GetInt64(1)) + return false } } - if row.GetString(2) != fmt.Sprintf("abcd-%d", j) { - return errors.Errorf("row-%d col-%d expect: %s, but get: %s", j, 2, fmt.Sprintf("abcd-%d", j), row.GetString(2)) + return false } if string(row.GetBytes(3)) != fmt.Sprintf("01234567890zxcvbnmqwer-%d", j) { - return errors.Errorf("row-%d col-%d expect: %s, but get: %s", j, 3, fmt.Sprintf("01234567890zxcvbnmqwer-%d", j), string(row.GetBytes(3))) + return false } - if row.GetInt64(4) != 0 { - return errors.Errorf("row-%d col-%d expect: %d, but get: %d", j, 4, 0, row.GetInt64(0)) + return false } - if !row.IsNull(5) { - return errors.Errorf("row-%d col-%d expect: null, but get: not null", j, 5) + return false } if row.GetString(6) != fmt.Sprintf("abcd-%d", 0) { - return errors.Errorf("row-%d col-%d expect: %s, but get: %s", j, 6, fmt.Sprintf("abcd-%d", 0), row.GetString(6)) + return false } if string(row.GetBytes(7)) != fmt.Sprintf("01234567890zxcvbnmqwer-%d", 0) { - return errors.Errorf("row-%d col-%d expect: %s, but get: %s", j, 7, fmt.Sprintf("01234567890zxcvbnmqwer-%d", 0), string(row.GetBytes(7))) + return false } - return nil -} - -func printRow(row Row) { - fmt.Printf("%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s \n", row.GetInt64(0), row.GetInt64(1), row.GetString(2), string(row.GetBytes(3)), - row.GetInt64(4), row.GetInt64(5), row.GetString(6), string(row.GetBytes(7))) + return true } diff --git a/util/chunk/column.go b/util/chunk/column.go index 54b1c6574aa6c..903adbeebc5c2 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -85,45 +85,6 @@ func (c *column) appendNullBitmap(on bool) { } } -func (c *column) appendMultiSameNullBitmap(on bool, num int) { - unum := uint(num) - l := ((c.length + num) >> 3) - len(c.nullBitmap) + 1 - if l > 0 { - for i := 0; i < l; i++ { - c.nullBitmap = append(c.nullBitmap, 0) - } - } - if on { - idx := c.length >> 3 - pos := uint(c.length) & 7 - if pos > unum { - unum = pos - } - l := 8 - pos - if l > unum { - l = unum - unum = 0 - } else { - unum = unum - l - l = 8 - } - for i := pos; i < l; i++ { - c.nullBitmap[idx] |= byte(1 << i) - } - for unum > 8 { - idx++ - c.nullBitmap[idx] = 0xff - unum = unum - 8 - } - idx++ - for i := uint(0); i < unum; i++ { - c.nullBitmap[idx] |= byte(1 << i) - } - } else { - c.nullCount += num - } -} - func (c *column) appendNull() { c.appendNullBitmap(false) if c.isFixed() { From 4096997f6f652fa634eefce02e84e9aa9c66fc7d Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 13:40:37 +0800 Subject: [PATCH 22/41] address comment --- executor/joiner.go | 2 ++ util/chunk/chunk.go | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 279a3c474b10e..a687e74e94868 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -142,6 +142,8 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } +// makeShadowJoinRow combines inner, outer row into one join row and append to the chunk. +// The result of join row will always append to the dst chunk's first row. func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, dst *chunk.Chunk) { if !isRight { inner, outer = outer, inner diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 8f38dea8b3bc8..05ed0c0ec9e5a 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -161,8 +161,8 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } // ShadowPartialRowOne use shadow copy to instead of AppendPartialRow, -// ShadowPartialRowOne use to merge muti-rows to one row. -// this dst chk can only contain one row. otherwise will be wrong. +// ShadowPartialRowOne appends a row to the chunk's first row. +// The dst chk can only contain one row. otherwise will be wrong. func ShadowPartialRowOne(colIdx int, row Row, dst *Chunk) { for i, rowCol := range row.c.columns { chkCol := dst.columns[colIdx+i] @@ -185,6 +185,7 @@ func ShadowPartialRowOne(colIdx int, row Row, dst *Chunk) { } // ShadowChkInit init chk for ShadowPartialRowOne. +// The chk chunk will only contain one row, so initial the nullBitMap , offsets and length first for performance. func ShadowChkInit(chk *Chunk) { chk.Reset() for _, c := range chk.columns { From b802941062b7d44888af048c89b95aae8fe1eb6c Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 15:48:26 +0800 Subject: [PATCH 23/41] add mutchunk --- executor/joiner.go | 30 +++++++++--------- util/chunk/chunk.go | 35 --------------------- util/chunk/chunk_copy_test.go | 8 ++--- util/chunk/mutchunk.go | 59 +++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 53 deletions(-) create mode 100644 util/chunk/mutchunk.go diff --git a/executor/joiner.go b/executor/joiner.go index a687e74e94868..a7e9d3620398d 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -92,6 +92,7 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType, colTypes = append(colTypes, lhsColTypes...) colTypes = append(colTypes, rhsColTypes...) base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) + base.mutChk = chunk.NewMutChunk(colTypes) base.selected = make([]bool, 0, chunk.InitialCapacity) if joinType == plan.LeftOuterJoin || joinType == plan.RightOuterJoin { innerColTypes := lhsColTypes @@ -125,6 +126,7 @@ type baseJoiner struct { defaultInner chunk.Row outerIsRight bool chk *chunk.Chunk + mutChk *chunk.MutChunk selected []bool maxChunkSize int } @@ -148,8 +150,8 @@ func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, dst *chunk.Chunk) { if !isRight { inner, outer = outer, inner } - chunk.ShadowPartialRowOne(0, inner, dst) - chunk.ShadowPartialRowOne(inner.Len(), outer, dst) + chunk.ShadowPartialRow(0, inner, dst) + chunk.ShadowPartialRow(inner.Len(), outer, dst) } func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { @@ -182,8 +184,8 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu return true, nil } - dst := j.chk - chunk.ShadowChkInit(dst) + dst := j.mutChk + chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { makeShadowJoinRow(j.outerIsRight, inner, outer, dst) @@ -218,8 +220,8 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk return true, nil } - dst := j.chk - chunk.ShadowChkInit(dst) + dst := j.mutChk + chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { makeShadowJoinRow(j.outerIsRight, inner, outer, dst) @@ -255,8 +257,8 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, return true, nil } - dst := j.chk - chunk.ShadowChkInit(dst) + dst := j.mutChk + chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { makeShadowJoinRow(false, inner, outer, dst) @@ -299,8 +301,8 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera return true, nil } - dst := j.chk - chunk.ShadowChkInit(dst) + dst := j.mutChk + chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { makeShadowJoinRow(false, inner, outer, dst) @@ -338,7 +340,7 @@ func (j *leftOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk } numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, false, outer, inners, j.conditions, j.chk, chk, numToAppend) + return tryToMatchInerAndOuter(j.ctx, false, outer, inners, j.conditions, j.mutChk, chk, numToAppend) } func (j *leftOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -357,7 +359,7 @@ func (j *rightOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, ch } numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, true, outer, inners, j.conditions, j.chk, chk, numToAppend) + return tryToMatchInerAndOuter(j.ctx, true, outer, inners, j.conditions, j.mutChk, chk, numToAppend) } func (j *rightOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -376,13 +378,13 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch } numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, j.outerIsRight, outer, inners, j.conditions, j.chk, chk, numToAppend) + return tryToMatchInerAndOuter(j.ctx, j.outerIsRight, outer, inners, j.conditions, j.mutChk, chk, numToAppend) } func tryToMatchInerAndOuter(ctx sessionctx.Context, isRight bool, outer chunk.Row, inners chunk.Iterator, conditions []expression.Expression, midChk, outChk *chunk.Chunk, numToAppend int) (bool, error) { match := false dst := midChk - chunk.ShadowChkInit(dst) + chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { makeShadowJoinRow(isRight, inner, outer, dst) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 05ed0c0ec9e5a..ccdcdc7219d8c 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -160,41 +160,6 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } } -// ShadowPartialRowOne use shadow copy to instead of AppendPartialRow, -// ShadowPartialRowOne appends a row to the chunk's first row. -// The dst chk can only contain one row. otherwise will be wrong. -func ShadowPartialRowOne(colIdx int, row Row, dst *Chunk) { - for i, rowCol := range row.c.columns { - chkCol := dst.columns[colIdx+i] - if !rowCol.isNull(row.idx) { - chkCol.nullBitmap[0] = 1 - } else { - chkCol.nullBitmap[0] = 0 - } - - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) - offset := row.idx * elemLen - chkCol.data = rowCol.data[offset : offset+elemLen] - } else { - start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] - chkCol.data = rowCol.data[start:end] - chkCol.offsets[1] = int32(len(chkCol.data)) - } - } -} - -// ShadowChkInit init chk for ShadowPartialRowOne. -// The chk chunk will only contain one row, so initial the nullBitMap , offsets and length first for performance. -func ShadowChkInit(chk *Chunk) { - chk.Reset() - for _, c := range chk.columns { - c.nullBitmap = append(c.nullBitmap, 0) - c.offsets = append(c.offsets, 0) - c.length = 1 - } -} - // Append appends rows in [begin, end) in another Chunk to a Chunk. func (c *Chunk) Append(other *Chunk, begin, end int) { for colID, src := range other.columns { diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 295cc59e3bb60..0378c62da3344 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -36,8 +36,8 @@ func TestCopyShadow(t *testing.T) { } rowIdx := 0 for ; lhs != it1.End(); lhs = it1.Next() { - ShadowPartialRowOne(0, lhs, dst) - ShadowPartialRowOne(lhs.Len(), row, dst) + ShadowPartialRow(0, lhs, dst) + ShadowPartialRow(lhs.Len(), row, dst) if !checkDstChkRow(dst.GetRow(0), rowIdx) { t.Fail() @@ -74,8 +74,8 @@ func BenchmarkCopyShadow(b *testing.B) { for i := 0; i < b.N; i++ { lhs := it1.Begin() for ; lhs != it1.End(); lhs = it1.Next() { - ShadowPartialRowOne(0, lhs, dst) - ShadowPartialRowOne(lhs.Len(), row, dst) + ShadowPartialRow(0, lhs, dst) + ShadowPartialRow(lhs.Len(), row, dst) } } } diff --git a/util/chunk/mutchunk.go b/util/chunk/mutchunk.go new file mode 100644 index 0000000000000..fb6e05c8c3a78 --- /dev/null +++ b/util/chunk/mutchunk.go @@ -0,0 +1,59 @@ +// Copyright 2018 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package chunk + +import "github.com/pingcap/tidb/types" + +// MutChunk represents a mutable Chunk. +// The underlying columns only contains one row and not exposed to the user. +type MutChunk = Chunk + +// NewMutChunk return a chunk with capacity 1. +func NewMutChunk(colTypes []*types.FieldType) *MutChunk { + return NewChunkWithCapacity(colTypes, 1) +} + +// MutChkInit init chk for ShadowPartialRow. +// The chk chunk will only contain one row, so initial the nullBitMap , offsets and length first for performance. +func MutChkInit(chk *MutChunk) { + chk.Reset() + for _, c := range chk.columns { + c.nullBitmap = append(c.nullBitmap, 0) + c.offsets = append(c.offsets, 0) + c.length = 1 + } +} + +// ShadowPartialRow use shadow copy to instead of AppendPartialRow, +// ShadowPartialRow appends a row to the mut chunk's first row. +func ShadowPartialRow(colIdx int, row Row, dst *MutChunk) { + for i, rowCol := range row.c.columns { + chkCol := dst.columns[colIdx+i] + if !rowCol.isNull(row.idx) { + chkCol.nullBitmap[0] = 1 + } else { + chkCol.nullBitmap[0] = 0 + } + + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + offset := row.idx * elemLen + chkCol.data = rowCol.data[offset : offset+elemLen] + } else { + start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + chkCol.data = rowCol.data[start:end] + chkCol.offsets[1] = int32(len(chkCol.data)) + } + } +} From c5cfdf134d4f6c21f599d341cfdff568a3ca4463 Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 16:03:06 +0800 Subject: [PATCH 24/41] address comment --- executor/joiner.go | 4 ++-- expression/chunk_executor.go | 14 ++++---------- util/chunk/chunk_copy_test.go | 8 ++++---- util/chunk/mutchunk.go | 8 ++++---- 4 files changed, 14 insertions(+), 20 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index a7e9d3620398d..98c75aaeb03d8 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -150,8 +150,8 @@ func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, dst *chunk.Chunk) { if !isRight { inner, outer = outer, inner } - chunk.ShadowPartialRow(0, inner, dst) - chunk.ShadowPartialRow(inner.Len(), outer, dst) + chunk.ShadowCopyPartialRow(0, inner, dst) + chunk.ShadowCopyPartialRow(inner.Len(), outer, dst) } func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { diff --git a/expression/chunk_executor.go b/expression/chunk_executor.go index 064854c323470..af68e92d2978b 100644 --- a/expression/chunk_executor.go +++ b/expression/chunk_executor.go @@ -237,15 +237,12 @@ func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *ch selected = append(selected, true) } for _, filter := range filters { - isIntType := true - if filter.GetType().EvalType() != types.ETInt { - isIntType = false - } + isTypeInt := filter.GetType().EvalType() == types.ETInt for row := iterator.Begin(); row != iterator.End(); row = iterator.Next() { if !selected[row.Idx()] { continue } - if isIntType { + if isTypeInt { filterResult, isNull, err := filter.EvalInt(ctx, row) if err != nil { return nil, errors.Trace(err) @@ -268,11 +265,8 @@ func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *ch func VectorizedFilterOneRow(ctx sessionctx.Context, filters []Expression, row chunk.Row) (bool, error) { selected := true for _, filter := range filters { - isIntType := true - if filter.GetType().EvalType() != types.ETInt { - isIntType = false - } - if isIntType { + isTypeInt := filter.GetType().EvalType() == types.ETInt + if isTypeInt { filterResult, isNull, err := filter.EvalInt(ctx, row) if err != nil { return false, errors.Trace(err) diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 0378c62da3344..44dde63bb3955 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -36,8 +36,8 @@ func TestCopyShadow(t *testing.T) { } rowIdx := 0 for ; lhs != it1.End(); lhs = it1.Next() { - ShadowPartialRow(0, lhs, dst) - ShadowPartialRow(lhs.Len(), row, dst) + ShadowCopyPartialRow(0, lhs, dst) + ShadowCopyPartialRow(lhs.Len(), row, dst) if !checkDstChkRow(dst.GetRow(0), rowIdx) { t.Fail() @@ -74,8 +74,8 @@ func BenchmarkCopyShadow(b *testing.B) { for i := 0; i < b.N; i++ { lhs := it1.Begin() for ; lhs != it1.End(); lhs = it1.Next() { - ShadowPartialRow(0, lhs, dst) - ShadowPartialRow(lhs.Len(), row, dst) + ShadowCopyPartialRow(0, lhs, dst) + ShadowCopyPartialRow(lhs.Len(), row, dst) } } } diff --git a/util/chunk/mutchunk.go b/util/chunk/mutchunk.go index fb6e05c8c3a78..e467fbb4fab5c 100644 --- a/util/chunk/mutchunk.go +++ b/util/chunk/mutchunk.go @@ -24,7 +24,7 @@ func NewMutChunk(colTypes []*types.FieldType) *MutChunk { return NewChunkWithCapacity(colTypes, 1) } -// MutChkInit init chk for ShadowPartialRow. +// MutChkInit init chk for ShadowCopyPartialRow. // The chk chunk will only contain one row, so initial the nullBitMap , offsets and length first for performance. func MutChkInit(chk *MutChunk) { chk.Reset() @@ -35,9 +35,9 @@ func MutChkInit(chk *MutChunk) { } } -// ShadowPartialRow use shadow copy to instead of AppendPartialRow, -// ShadowPartialRow appends a row to the mut chunk's first row. -func ShadowPartialRow(colIdx int, row Row, dst *MutChunk) { +// ShadowCopyPartialRow use shadow copy to instead of AppendPartialRow, +// ShadowCopyPartialRow copies the data of row to the first row of dst. +func ShadowCopyPartialRow(colIdx int, row Row, dst *MutChunk) { for i, rowCol := range row.c.columns { chkCol := dst.columns[colIdx+i] if !rowCol.isNull(row.idx) { From 947f9d4d69155706e76e4358b290ea2ccef93be0 Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 17:29:19 +0800 Subject: [PATCH 25/41] use mutRow instead of mut chunk. --- executor/joiner.go | 53 ++++++++++++------------------- util/chunk/chunk_copy_test.go | 53 +++++++++++++++++-------------- util/chunk/mutchunk.go | 59 ----------------------------------- util/chunk/mutrow.go | 24 ++++++++++++++ 4 files changed, 74 insertions(+), 115 deletions(-) delete mode 100644 util/chunk/mutchunk.go diff --git a/executor/joiner.go b/executor/joiner.go index 98c75aaeb03d8..1b4666c4076a8 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -91,8 +91,7 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType, colTypes := make([]*types.FieldType, 0, len(lhsColTypes)+len(rhsColTypes)) colTypes = append(colTypes, lhsColTypes...) colTypes = append(colTypes, rhsColTypes...) - base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) - base.mutChk = chunk.NewMutChunk(colTypes) + base.mutRow = chunk.MutRowFromTypes(colTypes) base.selected = make([]bool, 0, chunk.InitialCapacity) if joinType == plan.LeftOuterJoin || joinType == plan.RightOuterJoin { innerColTypes := lhsColTypes @@ -125,8 +124,7 @@ type baseJoiner struct { conditions []expression.Expression defaultInner chunk.Row outerIsRight bool - chk *chunk.Chunk - mutChk *chunk.MutChunk + mutRow chunk.MutRow selected []bool maxChunkSize int } @@ -146,12 +144,12 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { // makeShadowJoinRow combines inner, outer row into one join row and append to the chunk. // The result of join row will always append to the dst chunk's first row. -func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, dst *chunk.Chunk) { +func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, mutRow chunk.MutRow) { if !isRight { inner, outer = outer, inner } - chunk.ShadowCopyPartialRow(0, inner, dst) - chunk.ShadowCopyPartialRow(inner.Len(), outer, dst) + chunk.ShadowCopyPartialRow(0, inner, mutRow) + chunk.ShadowCopyPartialRow(inner.Len(), outer, mutRow) } func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { @@ -184,12 +182,10 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu return true, nil } - dst := j.mutChk - chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - makeShadowJoinRow(j.outerIsRight, inner, outer, dst) + makeShadowJoinRow(j.outerIsRight, inner, outer, j.mutRow) - matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -220,12 +216,10 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk return true, nil } - dst := j.mutChk - chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - makeShadowJoinRow(j.outerIsRight, inner, outer, dst) + makeShadowJoinRow(j.outerIsRight, inner, outer, j.mutRow) - matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -257,12 +251,10 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, return true, nil } - dst := j.mutChk - chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - makeShadowJoinRow(false, inner, outer, dst) + makeShadowJoinRow(false, inner, outer, j.mutRow) - matched, err = expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -301,12 +293,10 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera return true, nil } - dst := j.mutChk - chunk.MutChkInit(dst) for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - makeShadowJoinRow(false, inner, outer, dst) + makeShadowJoinRow(false, inner, outer, j.mutRow) - matched, err := expression.EvalBool(j.ctx, j.conditions, dst.GetRow(0)) + matched, err := expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -340,7 +330,7 @@ func (j *leftOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk } numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, false, outer, inners, j.conditions, j.mutChk, chk, numToAppend) + return tryToMatchInerAndOuter(j.ctx, false, outer, inners, j.conditions, j.mutRow, chk, numToAppend) } func (j *leftOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -359,7 +349,7 @@ func (j *rightOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, ch } numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, true, outer, inners, j.conditions, j.mutChk, chk, numToAppend) + return tryToMatchInerAndOuter(j.ctx, true, outer, inners, j.conditions, j.mutRow, chk, numToAppend) } func (j *rightOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -378,24 +368,21 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch } numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, j.outerIsRight, outer, inners, j.conditions, j.mutChk, chk, numToAppend) + return tryToMatchInerAndOuter(j.ctx, j.outerIsRight, outer, inners, j.conditions, j.mutRow, chk, numToAppend) } -func tryToMatchInerAndOuter(ctx sessionctx.Context, isRight bool, outer chunk.Row, inners chunk.Iterator, conditions []expression.Expression, midChk, outChk *chunk.Chunk, numToAppend int) (bool, error) { +func tryToMatchInerAndOuter(ctx sessionctx.Context, isRight bool, outer chunk.Row, inners chunk.Iterator, conditions []expression.Expression, mutRow chunk.MutRow, outChk *chunk.Chunk, numToAppend int) (bool, error) { match := false - dst := midChk - chunk.MutChkInit(dst) - for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { - makeShadowJoinRow(isRight, inner, outer, dst) + makeShadowJoinRow(isRight, inner, outer, mutRow) - matched, err := expression.VectorizedFilterOneRow(ctx, conditions, dst.GetRow(0)) + matched, err := expression.VectorizedFilterOneRow(ctx, conditions, mutRow.ToRow()) if err != nil { return false, errors.Trace(err) } if matched { match = true - outChk.AppendRow(dst.GetRow(0)) + outChk.AppendRow(mutRow.ToRow()) } } return match, nil diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 44dde63bb3955..9d402954dd52f 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -2,6 +2,8 @@ package chunk import ( "fmt" + "github.com/pingcap/tidb/mysql" + "github.com/pingcap/tidb/types" "testing" ) @@ -23,23 +25,14 @@ func TestCopyFieldByField(t *testing.T) { } func TestCopyShadow(t *testing.T) { - it1, row, dst := prepareChks() - - dst.Reset() - - lhs := it1.Begin() + it1, row, mutRow := prepareChksForShadowCopy() - for _, c := range dst.columns { - c.nullBitmap = append(c.nullBitmap, 0) - c.offsets = append(c.offsets, 0) - c.length = 1 - } rowIdx := 0 - for ; lhs != it1.End(); lhs = it1.Next() { - ShadowCopyPartialRow(0, lhs, dst) - ShadowCopyPartialRow(lhs.Len(), row, dst) + for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { + ShadowCopyPartialRow(0, lhs, mutRow) + ShadowCopyPartialRow(lhs.Len(), row, mutRow) - if !checkDstChkRow(dst.GetRow(0), rowIdx) { + if !checkDstChkRow(mutRow.ToRow(), rowIdx) { t.Fail() } rowIdx++ @@ -62,20 +55,14 @@ func BenchmarkCopyFieldByField(b *testing.B) { func BenchmarkCopyShadow(b *testing.B) { b.ReportAllocs() - it1, row, dst := prepareChks() + it1, row, mutRow := prepareChksForShadowCopy() b.ResetTimer() - for _, c := range dst.columns { - c.nullBitmap = append(c.nullBitmap, 0) - c.offsets = append(c.offsets, 0) - c.length = 1 - } - for i := 0; i < b.N; i++ { lhs := it1.Begin() for ; lhs != it1.End(); lhs = it1.Next() { - ShadowCopyPartialRow(0, lhs, dst) - ShadowCopyPartialRow(lhs.Len(), row, dst) + ShadowCopyPartialRow(0, lhs, mutRow) + ShadowCopyPartialRow(lhs.Len(), row, mutRow) } } } @@ -118,6 +105,26 @@ func prepareChks() (it1 Iterator, row Row, dst *Chunk) { return it1, row, dst } +func prepareChksForShadowCopy() (it1 Iterator, row Row, mutRow MutRow) { + chk1 := getChunk() + row = chk1.GetRow(0) + it1 = NewIterator4Chunk(chk1) + it1.Begin() + + colTypes := make([]*types.FieldType, 0, 8) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + + mutRow = MutRowFromTypes(colTypes) + return it1, row, mutRow +} + func checkDstChk(dst *Chunk) bool { for i := 0; i < 8; i++ { if dst.columns[i].length != numRows { diff --git a/util/chunk/mutchunk.go b/util/chunk/mutchunk.go deleted file mode 100644 index e467fbb4fab5c..0000000000000 --- a/util/chunk/mutchunk.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2018 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package chunk - -import "github.com/pingcap/tidb/types" - -// MutChunk represents a mutable Chunk. -// The underlying columns only contains one row and not exposed to the user. -type MutChunk = Chunk - -// NewMutChunk return a chunk with capacity 1. -func NewMutChunk(colTypes []*types.FieldType) *MutChunk { - return NewChunkWithCapacity(colTypes, 1) -} - -// MutChkInit init chk for ShadowCopyPartialRow. -// The chk chunk will only contain one row, so initial the nullBitMap , offsets and length first for performance. -func MutChkInit(chk *MutChunk) { - chk.Reset() - for _, c := range chk.columns { - c.nullBitmap = append(c.nullBitmap, 0) - c.offsets = append(c.offsets, 0) - c.length = 1 - } -} - -// ShadowCopyPartialRow use shadow copy to instead of AppendPartialRow, -// ShadowCopyPartialRow copies the data of row to the first row of dst. -func ShadowCopyPartialRow(colIdx int, row Row, dst *MutChunk) { - for i, rowCol := range row.c.columns { - chkCol := dst.columns[colIdx+i] - if !rowCol.isNull(row.idx) { - chkCol.nullBitmap[0] = 1 - } else { - chkCol.nullBitmap[0] = 0 - } - - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) - offset := row.idx * elemLen - chkCol.data = rowCol.data[offset : offset+elemLen] - } else { - start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] - chkCol.data = rowCol.data[start:end] - chkCol.offsets[1] = int32(len(chkCol.data)) - } - } -} diff --git a/util/chunk/mutrow.go b/util/chunk/mutrow.go index 7cf1721296475..19f20e8627abb 100644 --- a/util/chunk/mutrow.go +++ b/util/chunk/mutrow.go @@ -346,3 +346,27 @@ func setMutRowJSON(col *column, j json.BinaryJSON) { copy(col.data[1:], j.Value) col.offsets[1] = int32(dataLen) } + +// ShadowCopyPartialRow use shadow copy to instead of AppendPartialRow, +// ShadowCopyPartialRow copies the data of row to the first row of dst. +func ShadowCopyPartialRow(colIdx int, row Row, mutRow MutRow) { + dst := mutRow.c + for i, rowCol := range row.c.columns { + chkCol := dst.columns[colIdx+i] + if !rowCol.isNull(row.idx) { + chkCol.nullBitmap[0] = 1 + } else { + chkCol.nullBitmap[0] = 0 + } + + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + offset := row.idx * elemLen + chkCol.data = rowCol.data[offset : offset+elemLen] + } else { + start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] + chkCol.data = rowCol.data[start:end] + chkCol.offsets[1] = int32(len(chkCol.data)) + } + } +} From 24ab90e7a8546626667c5a8356f1c6884b05e572 Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 17:35:25 +0800 Subject: [PATCH 26/41] address comment --- executor/joiner.go | 3 +-- util/chunk/mutrow.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 1b4666c4076a8..d4d6b16213bda 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -142,8 +142,7 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } -// makeShadowJoinRow combines inner, outer row into one join row and append to the chunk. -// The result of join row will always append to the dst chunk's first row. +// makeShadowJoinRow combines inner, outer row into mutRow. func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, mutRow chunk.MutRow) { if !isRight { inner, outer = outer, inner diff --git a/util/chunk/mutrow.go b/util/chunk/mutrow.go index 19f20e8627abb..8223f7d847415 100644 --- a/util/chunk/mutrow.go +++ b/util/chunk/mutrow.go @@ -348,7 +348,7 @@ func setMutRowJSON(col *column, j json.BinaryJSON) { } // ShadowCopyPartialRow use shadow copy to instead of AppendPartialRow, -// ShadowCopyPartialRow copies the data of row to the first row of dst. +// ShadowCopyPartialRow copies the data of row to the mutRow. func ShadowCopyPartialRow(colIdx int, row Row, mutRow MutRow) { dst := mutRow.c for i, rowCol := range row.c.columns { From 2b8d8960ee3ccb334b5e97ff7cc4fdaf69ef5688 Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 20:21:16 +0800 Subject: [PATCH 27/41] refine code --- executor/joiner.go | 6 +++--- expression/chunk_executor.go | 34 ++++++++++------------------------ util/chunk/chunk_copy_test.go | 11 ++++++----- util/chunk/mutrow.go | 4 ++-- 4 files changed, 21 insertions(+), 34 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index d4d6b16213bda..8a0db717a8afd 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -147,8 +147,8 @@ func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, mutRow chunk.MutRow if !isRight { inner, outer = outer, inner } - chunk.ShadowCopyPartialRow(0, inner, mutRow) - chunk.ShadowCopyPartialRow(inner.Len(), outer, mutRow) + mutRow.ShadowCopyPartialRow(0, inner) + mutRow.ShadowCopyPartialRow(inner.Len(), outer) } func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { @@ -375,7 +375,7 @@ func tryToMatchInerAndOuter(ctx sessionctx.Context, isRight bool, outer chunk.Ro for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { makeShadowJoinRow(isRight, inner, outer, mutRow) - matched, err := expression.VectorizedFilterOneRow(ctx, conditions, mutRow.ToRow()) + matched, err := expression.VectorizedFilterRow(ctx, conditions, mutRow.ToRow()) if err != nil { return false, errors.Trace(err) } diff --git a/expression/chunk_executor.go b/expression/chunk_executor.go index af68e92d2978b..07c14e94a6c44 100644 --- a/expression/chunk_executor.go +++ b/expression/chunk_executor.go @@ -236,33 +236,21 @@ func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *ch for i, numRows := 0, iterator.Len(); i < numRows; i++ { selected = append(selected, true) } - for _, filter := range filters { - isTypeInt := filter.GetType().EvalType() == types.ETInt - for row := iterator.Begin(); row != iterator.End(); row = iterator.Next() { - if !selected[row.Idx()] { - continue - } - if isTypeInt { - filterResult, isNull, err := filter.EvalInt(ctx, row) - if err != nil { - return nil, errors.Trace(err) - } - selected[row.Idx()] = selected[row.Idx()] && !isNull && (filterResult != 0) - } else { - // TODO: should rewrite the filter to `cast(expr as SIGNED) != 0` and always use `EvalInt`. - bVal, err := EvalBool(ctx, []Expression{filter}, row) - if err != nil { - return nil, errors.Trace(err) - } - selected[row.Idx()] = selected[row.Idx()] && bVal - } + var err error + for row := iterator.Begin(); row != iterator.End(); row = iterator.Next() { + if !selected[row.Idx()] { + continue + } + selected[row.Idx()], err = VectorizedFilterRow(ctx, filters, row) + if err != nil { + return nil, errors.Trace(err) } } return selected, nil } -// VectorizedFilterOneRow applies a list of filters to a row. -func VectorizedFilterOneRow(ctx sessionctx.Context, filters []Expression, row chunk.Row) (bool, error) { +// VectorizedFilterRow applies a list of filters to a row. +func VectorizedFilterRow(ctx sessionctx.Context, filters []Expression, row chunk.Row) (bool, error) { selected := true for _, filter := range filters { isTypeInt := filter.GetType().EvalType() == types.ETInt @@ -280,8 +268,6 @@ func VectorizedFilterOneRow(ctx sessionctx.Context, filters []Expression, row ch } selected = selected && bVal } - } return selected, nil - } diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 9d402954dd52f..5dfc381913c82 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -2,9 +2,10 @@ package chunk import ( "fmt" + "testing" + "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/types" - "testing" ) var ( @@ -29,8 +30,8 @@ func TestCopyShadow(t *testing.T) { rowIdx := 0 for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { - ShadowCopyPartialRow(0, lhs, mutRow) - ShadowCopyPartialRow(lhs.Len(), row, mutRow) + mutRow.ShadowCopyPartialRow(0, lhs) + mutRow.ShadowCopyPartialRow(lhs.Len(), row) if !checkDstChkRow(mutRow.ToRow(), rowIdx) { t.Fail() @@ -61,8 +62,8 @@ func BenchmarkCopyShadow(b *testing.B) { for i := 0; i < b.N; i++ { lhs := it1.Begin() for ; lhs != it1.End(); lhs = it1.Next() { - ShadowCopyPartialRow(0, lhs, mutRow) - ShadowCopyPartialRow(lhs.Len(), row, mutRow) + mutRow.ShadowCopyPartialRow(0, lhs) + mutRow.ShadowCopyPartialRow(lhs.Len(), row) } } } diff --git a/util/chunk/mutrow.go b/util/chunk/mutrow.go index 8223f7d847415..e5065825a7c0a 100644 --- a/util/chunk/mutrow.go +++ b/util/chunk/mutrow.go @@ -349,8 +349,8 @@ func setMutRowJSON(col *column, j json.BinaryJSON) { // ShadowCopyPartialRow use shadow copy to instead of AppendPartialRow, // ShadowCopyPartialRow copies the data of row to the mutRow. -func ShadowCopyPartialRow(colIdx int, row Row, mutRow MutRow) { - dst := mutRow.c +func (mr MutRow) ShadowCopyPartialRow(colIdx int, row Row) { + dst := mr.c for i, rowCol := range row.c.columns { chkCol := dst.columns[colIdx+i] if !rowCol.isNull(row.idx) { From 3f82d2b3853eeb945c881a350ac1f4962f660851 Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 20:45:28 +0800 Subject: [PATCH 28/41] address comment --- executor/joiner.go | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 8a0db717a8afd..146fa3e46f5b9 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -142,13 +142,13 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } -// makeShadowJoinRow combines inner, outer row into mutRow. -func makeShadowJoinRow(isRight bool, inner, outer chunk.Row, mutRow chunk.MutRow) { - if !isRight { +// makeJoinRow combines inner, outer row into mutRow. +func (j *baseJoiner) makeJoinRow(isRightJoin bool, inner, outer chunk.Row) { + if !isRightJoin { inner, outer = outer, inner } - mutRow.ShadowCopyPartialRow(0, inner) - mutRow.ShadowCopyPartialRow(inner.Len(), outer) + j.mutRow.ShadowCopyPartialRow(0, inner) + j.mutRow.ShadowCopyPartialRow(inner.Len(), outer) } func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { @@ -182,7 +182,7 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu } for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - makeShadowJoinRow(j.outerIsRight, inner, outer, j.mutRow) + j.makeJoinRow(j.outerIsRight, inner, outer) matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { @@ -216,7 +216,7 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk } for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - makeShadowJoinRow(j.outerIsRight, inner, outer, j.mutRow) + j.makeJoinRow(j.outerIsRight, inner, outer) matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { @@ -251,7 +251,7 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, } for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - makeShadowJoinRow(false, inner, outer, j.mutRow) + j.makeJoinRow(false, inner, outer) matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { @@ -293,7 +293,7 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera } for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - makeShadowJoinRow(false, inner, outer, j.mutRow) + j.makeJoinRow(false, inner, outer) matched, err := expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { @@ -327,9 +327,7 @@ func (j *leftOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk if inners.Len() == 0 { return false, nil } - - numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, false, outer, inners, j.conditions, j.mutRow, chk, numToAppend) + return j.tryToMatchInnerAndOuter(false, outer, inners, chk) } func (j *leftOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -347,8 +345,7 @@ func (j *rightOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, ch return false, nil } - numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, true, outer, inners, j.conditions, j.mutRow, chk, numToAppend) + return j.tryToMatchInnerAndOuter(true, outer, inners, chk) } func (j *rightOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -366,22 +363,25 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch return false, nil } - numToAppend := j.maxChunkSize - chk.NumRows() - return tryToMatchInerAndOuter(j.ctx, j.outerIsRight, outer, inners, j.conditions, j.mutRow, chk, numToAppend) + return j.tryToMatchInnerAndOuter(j.outerIsRight, outer, inners, chk) } -func tryToMatchInerAndOuter(ctx sessionctx.Context, isRight bool, outer chunk.Row, inners chunk.Iterator, conditions []expression.Expression, mutRow chunk.MutRow, outChk *chunk.Chunk, numToAppend int) (bool, error) { +// tryToMatchInnerAndOuter does 2 things: +// 1. Combine outer and inner row to join row. +// 2. Evaluate the join row whether match the join conditions. +func (j *baseJoiner) tryToMatchInnerAndOuter(isRight bool, outer chunk.Row, inners chunk.Iterator, outChk *chunk.Chunk) (bool, error) { match := false + numToAppend := j.maxChunkSize - outChk.NumRows() for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { - makeShadowJoinRow(isRight, inner, outer, mutRow) + j.makeJoinRow(isRight, inner, outer) - matched, err := expression.VectorizedFilterRow(ctx, conditions, mutRow.ToRow()) + matched, err := expression.VectorizedFilterRow(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { return false, errors.Trace(err) } if matched { match = true - outChk.AppendRow(mutRow.ToRow()) + outChk.AppendRow(j.mutRow.ToRow()) } } return match, nil From e5f4cbec0ca8943448dc079dfb3a4b96a4789044 Mon Sep 17 00:00:00 2001 From: crazycs Date: Thu, 23 Aug 2018 20:53:27 +0800 Subject: [PATCH 29/41] address comment --- executor/joiner.go | 1 + 1 file changed, 1 insertion(+) diff --git a/executor/joiner.go b/executor/joiner.go index 146fa3e46f5b9..c09a49c090ee8 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -143,6 +143,7 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { } // makeJoinRow combines inner, outer row into mutRow. +// combines will uses shadow copy inner and outer row data to mutRow. func (j *baseJoiner) makeJoinRow(isRightJoin bool, inner, outer chunk.Row) { if !isRightJoin { inner, outer = outer, inner From abbc2c9e26be09e80fa3cde614849459f94958d7 Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 24 Aug 2018 11:17:30 +0800 Subject: [PATCH 30/41] address comment --- executor/joiner.go | 2 +- util/chunk/chunk_copy_test.go | 1 + util/chunk/mutrow.go | 7 +++---- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index c09a49c090ee8..05aca33b56dde 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -369,7 +369,7 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch // tryToMatchInnerAndOuter does 2 things: // 1. Combine outer and inner row to join row. -// 2. Evaluate the join row whether match the join conditions. +// 2. Check whether the join row matches the join conditions, if so, append it to the `outChk`. func (j *baseJoiner) tryToMatchInnerAndOuter(isRight bool, outer chunk.Row, inners chunk.Iterator, outChk *chunk.Chunk) (bool, error) { match := false numToAppend := j.maxChunkSize - outChk.NumRows() diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 5dfc381913c82..933e0e1567183 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -117,6 +117,7 @@ func prepareChksForShadowCopy() (it1 Iterator, row Row, mutRow MutRow) { colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) diff --git a/util/chunk/mutrow.go b/util/chunk/mutrow.go index e5065825a7c0a..62d94d8203311 100644 --- a/util/chunk/mutrow.go +++ b/util/chunk/mutrow.go @@ -347,12 +347,11 @@ func setMutRowJSON(col *column, j json.BinaryJSON) { col.offsets[1] = int32(dataLen) } -// ShadowCopyPartialRow use shadow copy to instead of AppendPartialRow, -// ShadowCopyPartialRow copies the data of row to the mutRow. +// ShadowCopyPartialRow shadow copies the data of `row` to MutRow. func (mr MutRow) ShadowCopyPartialRow(colIdx int, row Row) { - dst := mr.c + chk := mr.c for i, rowCol := range row.c.columns { - chkCol := dst.columns[colIdx+i] + chkCol := chk.columns[colIdx+i] if !rowCol.isNull(row.idx) { chkCol.nullBitmap[0] = 1 } else { From e1dd31df5cae3fc0c1d1d12ed90f9ffaee6c712e Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 24 Aug 2018 17:08:55 +0800 Subject: [PATCH 31/41] address comment and add test to mutRow_test --- util/chunk/chunk.go | 2 +- util/chunk/chunk_copy_test.go | 28 ++++---------------- util/chunk/mutrow_test.go | 50 +++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index ccdcdc7219d8c..a0920bb45dd6f 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -182,7 +182,7 @@ func (c *Chunk) Append(other *Chunk, begin, end int) { c.numVirtualRows += end - begin } -// TruncateTo truncates rows from tail to head in a Chunk to "numRows" rows. +// TruncateTo truncates rows from tail to head in a Chunk to "rowsNum" rows. func (c *Chunk) TruncateTo(numRows int) { for _, col := range c.columns { if col.isFixed() { diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go index 933e0e1567183..25a3a6a3ece61 100644 --- a/util/chunk/chunk_copy_test.go +++ b/util/chunk/chunk_copy_test.go @@ -8,10 +8,6 @@ import ( "github.com/pingcap/tidb/types" ) -var ( - numRows = 1024 -) - func TestCopyFieldByField(t *testing.T) { it1, row, dst := prepareChks() @@ -40,20 +36,6 @@ func TestCopyShadow(t *testing.T) { } } -func BenchmarkCopyFieldByField(b *testing.B) { - b.ReportAllocs() - it1, row, dst := prepareChks() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - dst.Reset() - for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { - dst.AppendRow(lhs) - dst.AppendPartialRow(lhs.Len(), row) - } - } -} - func BenchmarkCopyShadow(b *testing.B) { b.ReportAllocs() it1, row, mutRow := prepareChksForShadowCopy() @@ -81,8 +63,8 @@ func newChunkWithInitCap(cap int, elemLen ...int) *Chunk { } func getChunk() *Chunk { - chk := newChunkWithInitCap(numRows, 8, 8, 0, 0) - for i := 0; i < numRows; i++ { + chk := newChunkWithInitCap(rowsNum, 8, 8, 0, 0) + for i := 0; i < rowsNum; i++ { //chk.AppendNull(0) chk.AppendInt64(0, int64(i)) if i%3 == 0 { @@ -102,7 +84,7 @@ func prepareChks() (it1 Iterator, row Row, dst *Chunk) { row = chk1.GetRow(0) it1 = NewIterator4Chunk(chk1) it1.Begin() - dst = newChunkWithInitCap(numRows, 8, 8, 0, 0, 8, 8, 0, 0) + dst = newChunkWithInitCap(rowsNum, 8, 8, 0, 0, 8, 8, 0, 0) return it1, row, dst } @@ -129,11 +111,11 @@ func prepareChksForShadowCopy() (it1 Iterator, row Row, mutRow MutRow) { func checkDstChk(dst *Chunk) bool { for i := 0; i < 8; i++ { - if dst.columns[i].length != numRows { + if dst.columns[i].length != rowsNum { return false } } - for j := 0; j < numRows; j++ { + for j := 0; j < rowsNum; j++ { row := dst.GetRow(j) if !checkDstChkRow(row, j) { return false diff --git a/util/chunk/mutrow_test.go b/util/chunk/mutrow_test.go index b4264e39a5855..da52f76c7e916 100644 --- a/util/chunk/mutrow_test.go +++ b/util/chunk/mutrow_test.go @@ -134,3 +134,53 @@ func BenchmarkMutRowFromValues(b *testing.B) { MutRowFromValues(values) } } + +func (s *testChunkSuite) TestMutRowShadowCopyPartialRow(c *check.C) { + colTypes := make([]*types.FieldType, 0, 8) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeDatetime}) + + mutRow := MutRowFromTypes(colTypes) + row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow() + mutRow.ShadowCopyPartialRow(0, row) + c.Assert(row.GetString(0), check.DeepEquals, mutRow.ToRow().GetString(0)) + c.Assert(row.GetString(1), check.DeepEquals, mutRow.ToRow().GetString(1)) + c.Assert(row.GetInt64(2), check.DeepEquals, mutRow.ToRow().GetInt64(2)) + c.Assert(row.GetInt64(3), check.DeepEquals, mutRow.ToRow().GetInt64(3)) + c.Assert(row.GetTime(4), check.DeepEquals, mutRow.ToRow().GetTime(4)) +} + +var rowsNum = 1024 + +func BenchmarkMutRowShadowCopyPartialRow(b *testing.B) { + b.ReportAllocs() + colTypes := make([]*types.FieldType, 0, 8) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeDatetime}) + + mutRow := MutRowFromTypes(colTypes) + row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow() + for i := 0; i < b.N; i++ { + for j := 0; j < rowsNum; j++ { + mutRow.ShadowCopyPartialRow(0, row) + } + } +} + +func BenchmarkChunkAppendPartialRow(b *testing.B) { + b.ReportAllocs() + chk := newChunkWithInitCap(rowsNum, 0, 0, 8, 8, 16) + row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow() + for i := 0; i < b.N; i++ { + chk.Reset() + for j := 0; j < rowsNum; j++ { + chk.AppendPartialRow(0, row) + } + } +} From 593b31ce1af71e5f1a89b676b65c008df4418814 Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 24 Aug 2018 17:12:09 +0800 Subject: [PATCH 32/41] remove chunk_copy_test.go --- util/chunk/chunk_copy_test.go | 158 ---------------------------------- util/chunk/chunk_test.go | 12 +++ 2 files changed, 12 insertions(+), 158 deletions(-) delete mode 100644 util/chunk/chunk_copy_test.go diff --git a/util/chunk/chunk_copy_test.go b/util/chunk/chunk_copy_test.go deleted file mode 100644 index 25a3a6a3ece61..0000000000000 --- a/util/chunk/chunk_copy_test.go +++ /dev/null @@ -1,158 +0,0 @@ -package chunk - -import ( - "fmt" - "testing" - - "github.com/pingcap/tidb/mysql" - "github.com/pingcap/tidb/types" -) - -func TestCopyFieldByField(t *testing.T) { - it1, row, dst := prepareChks() - - dst.Reset() - for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { - dst.AppendRow(lhs) - dst.AppendPartialRow(lhs.Len(), row) - } - if !checkDstChk(dst) { - t.Fail() - } -} - -func TestCopyShadow(t *testing.T) { - it1, row, mutRow := prepareChksForShadowCopy() - - rowIdx := 0 - for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() { - mutRow.ShadowCopyPartialRow(0, lhs) - mutRow.ShadowCopyPartialRow(lhs.Len(), row) - - if !checkDstChkRow(mutRow.ToRow(), rowIdx) { - t.Fail() - } - rowIdx++ - } -} - -func BenchmarkCopyShadow(b *testing.B) { - b.ReportAllocs() - it1, row, mutRow := prepareChksForShadowCopy() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - lhs := it1.Begin() - for ; lhs != it1.End(); lhs = it1.Next() { - mutRow.ShadowCopyPartialRow(0, lhs) - mutRow.ShadowCopyPartialRow(lhs.Len(), row) - } - } -} - -func newChunkWithInitCap(cap int, elemLen ...int) *Chunk { - chk := &Chunk{} - for _, l := range elemLen { - if l > 0 { - chk.addFixedLenColumn(l, cap) - } else { - chk.addVarLenColumn(cap) - } - } - return chk -} - -func getChunk() *Chunk { - chk := newChunkWithInitCap(rowsNum, 8, 8, 0, 0) - for i := 0; i < rowsNum; i++ { - //chk.AppendNull(0) - chk.AppendInt64(0, int64(i)) - if i%3 == 0 { - chk.AppendNull(1) - } else { - chk.AppendInt64(1, int64(i)) - } - - chk.AppendString(2, fmt.Sprintf("abcd-%d", i)) - chk.AppendBytes(3, []byte(fmt.Sprintf("01234567890zxcvbnmqwer-%d", i))) - } - return chk -} - -func prepareChks() (it1 Iterator, row Row, dst *Chunk) { - chk1 := getChunk() - row = chk1.GetRow(0) - it1 = NewIterator4Chunk(chk1) - it1.Begin() - dst = newChunkWithInitCap(rowsNum, 8, 8, 0, 0, 8, 8, 0, 0) - return it1, row, dst -} - -func prepareChksForShadowCopy() (it1 Iterator, row Row, mutRow MutRow) { - chk1 := getChunk() - row = chk1.GetRow(0) - it1 = NewIterator4Chunk(chk1) - it1.Begin() - - colTypes := make([]*types.FieldType, 0, 8) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) - - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) - - mutRow = MutRowFromTypes(colTypes) - return it1, row, mutRow -} - -func checkDstChk(dst *Chunk) bool { - for i := 0; i < 8; i++ { - if dst.columns[i].length != rowsNum { - return false - } - } - for j := 0; j < rowsNum; j++ { - row := dst.GetRow(j) - if !checkDstChkRow(row, j) { - return false - } - } - return true -} -func checkDstChkRow(row Row, j int) bool { - if row.GetInt64(0) != int64(j) { - return false - } - if j%3 == 0 { - if !row.IsNull(1) { - return false - } - } else { - if row.GetInt64(1) != int64(j) { - return false - } - } - if row.GetString(2) != fmt.Sprintf("abcd-%d", j) { - return false - } - if string(row.GetBytes(3)) != fmt.Sprintf("01234567890zxcvbnmqwer-%d", j) { - return false - } - if row.GetInt64(4) != 0 { - return false - } - if !row.IsNull(5) { - return false - } - if row.GetString(6) != fmt.Sprintf("abcd-%d", 0) { - return false - } - if string(row.GetBytes(7)) != fmt.Sprintf("01234567890zxcvbnmqwer-%d", 0) { - return false - } - return true -} diff --git a/util/chunk/chunk_test.go b/util/chunk/chunk_test.go index 7c0feea0e18fc..b15508809ff9f 100644 --- a/util/chunk/chunk_test.go +++ b/util/chunk/chunk_test.go @@ -256,6 +256,18 @@ func newChunk(elemLen ...int) *Chunk { return chk } +func newChunkWithInitCap(cap int, elemLen ...int) *Chunk { + chk := &Chunk{} + for _, l := range elemLen { + if l > 0 { + chk.addFixedLenColumn(l, cap) + } else { + chk.addVarLenColumn(cap) + } + } + return chk +} + var allTypes = []*types.FieldType{ types.NewFieldType(mysql.TypeTiny), types.NewFieldType(mysql.TypeShort), From 3a6fbb7afb32c516d5802226e4ba123d7fe519a5 Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 24 Aug 2018 17:37:29 +0800 Subject: [PATCH 33/41] refine code --- executor/joiner.go | 2 +- expression/chunk_executor.go | 48 +++++++++++++++--------------------- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 05aca33b56dde..c9e7d24fc10a9 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -376,7 +376,7 @@ func (j *baseJoiner) tryToMatchInnerAndOuter(isRight bool, outer chunk.Row, inne for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { j.makeJoinRow(isRight, inner, outer) - matched, err := expression.VectorizedFilterRow(j.ctx, j.conditions, j.mutRow.ToRow()) + matched, err := expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) if err != nil { return false, errors.Trace(err) } diff --git a/expression/chunk_executor.go b/expression/chunk_executor.go index 07c14e94a6c44..887eb2463c3a3 100644 --- a/expression/chunk_executor.go +++ b/expression/chunk_executor.go @@ -236,37 +236,29 @@ func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *ch for i, numRows := 0, iterator.Len(); i < numRows; i++ { selected = append(selected, true) } - var err error - for row := iterator.Begin(); row != iterator.End(); row = iterator.Next() { - if !selected[row.Idx()] { - continue - } - selected[row.Idx()], err = VectorizedFilterRow(ctx, filters, row) - if err != nil { - return nil, errors.Trace(err) - } - } - return selected, nil -} - -// VectorizedFilterRow applies a list of filters to a row. -func VectorizedFilterRow(ctx sessionctx.Context, filters []Expression, row chunk.Row) (bool, error) { - selected := true for _, filter := range filters { - isTypeInt := filter.GetType().EvalType() == types.ETInt - if isTypeInt { - filterResult, isNull, err := filter.EvalInt(ctx, row) - if err != nil { - return false, errors.Trace(err) + isIntType := true + if filter.GetType().EvalType() != types.ETInt { + isIntType = false + } + for row := iterator.Begin(); row != iterator.End(); row = iterator.Next() { + if !selected[row.Idx()] { + continue } - selected = selected && !isNull && (filterResult != 0) - } else { - // TODO: should rewrite the filter to `cast(expr as SIGNED) != 0` and always use `EvalInt`. - bVal, err := EvalBool(ctx, []Expression{filter}, row) - if err != nil { - return false, errors.Trace(err) + if isIntType { + filterResult, isNull, err := filter.EvalInt(ctx, row) + if err != nil { + return nil, errors.Trace(err) + } + selected[row.Idx()] = selected[row.Idx()] && !isNull && (filterResult != 0) + } else { + // TODO: should rewrite the filter to `cast(expr as SIGNED) != 0` and always use `EvalInt`. + bVal, err := EvalBool(ctx, []Expression{filter}, row) + if err != nil { + return nil, errors.Trace(err) + } + selected[row.Idx()] = selected[row.Idx()] && bVal } - selected = selected && bVal } } return selected, nil From 600fdc35b7a376e48afbea4575a51d65c923cee2 Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 24 Aug 2018 18:51:14 +0800 Subject: [PATCH 34/41] refine test --- util/chunk/mutrow_test.go | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/util/chunk/mutrow_test.go b/util/chunk/mutrow_test.go index da52f76c7e916..b9cae59889bcc 100644 --- a/util/chunk/mutrow_test.go +++ b/util/chunk/mutrow_test.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" + "time" ) func (s *testChunkSuite) TestMutRow(c *check.C) { @@ -136,21 +137,30 @@ func BenchmarkMutRowFromValues(b *testing.B) { } func (s *testChunkSuite) TestMutRowShadowCopyPartialRow(c *check.C) { - colTypes := make([]*types.FieldType, 0, 8) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) + colTypes := make([]*types.FieldType, 0, 3) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) - colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeDatetime}) + colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeTimestamp}) mutRow := MutRowFromTypes(colTypes) - row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow() + row := MutRowFromValues("abc", 123, types.ZeroTimestamp).ToRow() mutRow.ShadowCopyPartialRow(0, row) - c.Assert(row.GetString(0), check.DeepEquals, mutRow.ToRow().GetString(0)) - c.Assert(row.GetString(1), check.DeepEquals, mutRow.ToRow().GetString(1)) - c.Assert(row.GetInt64(2), check.DeepEquals, mutRow.ToRow().GetInt64(2)) - c.Assert(row.GetInt64(3), check.DeepEquals, mutRow.ToRow().GetInt64(3)) - c.Assert(row.GetTime(4), check.DeepEquals, mutRow.ToRow().GetTime(4)) + c.Assert(row.GetString(0), check.Equals, mutRow.ToRow().GetString(0)) + c.Assert(row.GetInt64(1), check.Equals, mutRow.ToRow().GetInt64(1)) + c.Assert(row.GetTime(2), check.DeepEquals, mutRow.ToRow().GetTime(2)) + + row.c.Reset() + d := types.NewStringDatum("dfg") + row.c.AppendDatum(0, &d) + d = types.NewIntDatum(567) + row.c.AppendDatum(1, &d) + d = types.NewTimeDatum(types.Time{Time: types.FromGoTime(time.Now()), Fsp: 6, Type: mysql.TypeTimestamp}) + row.c.AppendDatum(2, &d) + + c.Assert(d.GetMysqlTime(), check.DeepEquals, mutRow.ToRow().GetTime(2)) + c.Assert(row.GetString(0), check.Equals, mutRow.ToRow().GetString(0)) + c.Assert(row.GetInt64(1), check.Equals, mutRow.ToRow().GetInt64(1)) + c.Assert(row.GetTime(2), check.DeepEquals, mutRow.ToRow().GetTime(2)) } var rowsNum = 1024 @@ -166,6 +176,7 @@ func BenchmarkMutRowShadowCopyPartialRow(b *testing.B) { mutRow := MutRowFromTypes(colTypes) row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow() + b.ResetTimer() for i := 0; i < b.N; i++ { for j := 0; j < rowsNum; j++ { mutRow.ShadowCopyPartialRow(0, row) @@ -177,6 +188,7 @@ func BenchmarkChunkAppendPartialRow(b *testing.B) { b.ReportAllocs() chk := newChunkWithInitCap(rowsNum, 0, 0, 8, 8, 16) row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow() + b.ResetTimer() for i := 0; i < b.N; i++ { chk.Reset() for j := 0; j < rowsNum; j++ { From f4fbd707b1b6375dfa7b055f7e5c6579bc3f3383 Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 24 Aug 2018 19:07:11 +0800 Subject: [PATCH 35/41] refine test and code --- executor/joiner.go | 24 ++++++++++++------------ expression/chunk_executor.go | 23 +++++++++++++++++++++++ util/chunk/mutrow.go | 4 ++-- util/chunk/mutrow_test.go | 4 ++-- 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index c9e7d24fc10a9..ba5fc5fbe4943 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -91,7 +91,7 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType, colTypes := make([]*types.FieldType, 0, len(lhsColTypes)+len(rhsColTypes)) colTypes = append(colTypes, lhsColTypes...) colTypes = append(colTypes, rhsColTypes...) - base.mutRow = chunk.MutRowFromTypes(colTypes) + base.shadowRow = chunk.MutRowFromTypes(colTypes) base.selected = make([]bool, 0, chunk.InitialCapacity) if joinType == plan.LeftOuterJoin || joinType == plan.RightOuterJoin { innerColTypes := lhsColTypes @@ -124,7 +124,7 @@ type baseJoiner struct { conditions []expression.Expression defaultInner chunk.Row outerIsRight bool - mutRow chunk.MutRow + shadowRow chunk.MutRow selected []bool maxChunkSize int } @@ -142,14 +142,14 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } -// makeJoinRow combines inner, outer row into mutRow. -// combines will uses shadow copy inner and outer row data to mutRow. +// makeJoinRow combines inner, outer row into shadowRow. +// combines will uses shadow copy inner and outer row data to shadowRow. func (j *baseJoiner) makeJoinRow(isRightJoin bool, inner, outer chunk.Row) { if !isRightJoin { inner, outer = outer, inner } - j.mutRow.ShadowCopyPartialRow(0, inner) - j.mutRow.ShadowCopyPartialRow(inner.Len(), outer) + j.shadowRow.ShallowCopyPartialRow(0, inner) + j.shadowRow.ShallowCopyPartialRow(inner.Len(), outer) } func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { @@ -185,7 +185,7 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.makeJoinRow(j.outerIsRight, inner, outer) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -219,7 +219,7 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.makeJoinRow(j.outerIsRight, inner, outer) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -254,7 +254,7 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.makeJoinRow(false, inner, outer) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -296,7 +296,7 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.makeJoinRow(false, inner, outer) - matched, err := expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) + matched, err := expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -376,13 +376,13 @@ func (j *baseJoiner) tryToMatchInnerAndOuter(isRight bool, outer chunk.Row, inne for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { j.makeJoinRow(isRight, inner, outer) - matched, err := expression.EvalBool(j.ctx, j.conditions, j.mutRow.ToRow()) + matched, err := expression.FilterRow(j.ctx, j.conditions, j.shadowRow.ToRow()) if err != nil { return false, errors.Trace(err) } if matched { match = true - outChk.AppendRow(j.mutRow.ToRow()) + outChk.AppendRow(j.shadowRow.ToRow()) } } return match, nil diff --git a/expression/chunk_executor.go b/expression/chunk_executor.go index 887eb2463c3a3..4ed9c81664a1e 100644 --- a/expression/chunk_executor.go +++ b/expression/chunk_executor.go @@ -263,3 +263,26 @@ func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *ch } return selected, nil } + +// FilterRow applies a list of filters to a row. +func FilterRow(ctx sessionctx.Context, filters []Expression, row chunk.Row) (bool, error) { + selected := true + for _, filter := range filters { + isTypeInt := filter.GetType().EvalType() == types.ETInt + if isTypeInt { + filterResult, isNull, err := filter.EvalInt(ctx, row) + if err != nil { + return false, errors.Trace(err) + } + selected = selected && !isNull && (filterResult != 0) + } else { + // TODO: should rewrite the filter to `cast(expr as SIGNED) != 0` and always use `EvalInt`. + bVal, err := EvalBool(ctx, []Expression{filter}, row) + if err != nil { + return false, errors.Trace(err) + } + selected = selected && bVal + } + } + return selected, nil +} diff --git a/util/chunk/mutrow.go b/util/chunk/mutrow.go index 62d94d8203311..e6a3c5410b444 100644 --- a/util/chunk/mutrow.go +++ b/util/chunk/mutrow.go @@ -347,8 +347,8 @@ func setMutRowJSON(col *column, j json.BinaryJSON) { col.offsets[1] = int32(dataLen) } -// ShadowCopyPartialRow shadow copies the data of `row` to MutRow. -func (mr MutRow) ShadowCopyPartialRow(colIdx int, row Row) { +// ShallowCopyPartialRow shadow copies the data of `row` to MutRow. +func (mr MutRow) ShallowCopyPartialRow(colIdx int, row Row) { chk := mr.c for i, rowCol := range row.c.columns { chkCol := chk.columns[colIdx+i] diff --git a/util/chunk/mutrow_test.go b/util/chunk/mutrow_test.go index b9cae59889bcc..d2880859c6eae 100644 --- a/util/chunk/mutrow_test.go +++ b/util/chunk/mutrow_test.go @@ -144,7 +144,7 @@ func (s *testChunkSuite) TestMutRowShadowCopyPartialRow(c *check.C) { mutRow := MutRowFromTypes(colTypes) row := MutRowFromValues("abc", 123, types.ZeroTimestamp).ToRow() - mutRow.ShadowCopyPartialRow(0, row) + mutRow.ShallowCopyPartialRow(0, row) c.Assert(row.GetString(0), check.Equals, mutRow.ToRow().GetString(0)) c.Assert(row.GetInt64(1), check.Equals, mutRow.ToRow().GetInt64(1)) c.Assert(row.GetTime(2), check.DeepEquals, mutRow.ToRow().GetTime(2)) @@ -179,7 +179,7 @@ func BenchmarkMutRowShadowCopyPartialRow(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { for j := 0; j < rowsNum; j++ { - mutRow.ShadowCopyPartialRow(0, row) + mutRow.ShallowCopyPartialRow(0, row) } } } From e9ef7ddeb6890a2c50554328d31687296aae7926 Mon Sep 17 00:00:00 2001 From: crazycs Date: Sat, 25 Aug 2018 00:29:39 +0800 Subject: [PATCH 36/41] optimize append num --- executor/joiner.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/executor/joiner.go b/executor/joiner.go index ba5fc5fbe4943..1848e340511ea 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -373,7 +373,7 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch func (j *baseJoiner) tryToMatchInnerAndOuter(isRight bool, outer chunk.Row, inners chunk.Iterator, outChk *chunk.Chunk) (bool, error) { match := false numToAppend := j.maxChunkSize - outChk.NumRows() - for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { + for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner = inners.Next() { j.makeJoinRow(isRight, inner, outer) matched, err := expression.FilterRow(j.ctx, j.conditions, j.shadowRow.ToRow()) @@ -382,6 +382,7 @@ func (j *baseJoiner) tryToMatchInnerAndOuter(isRight bool, outer chunk.Row, inne } if matched { match = true + numToAppend-- outChk.AppendRow(j.shadowRow.ToRow()) } } From 21b54174d555e7b7c7be220cecf293654e60188d Mon Sep 17 00:00:00 2001 From: crazycs Date: Mon, 27 Aug 2018 08:04:06 +0800 Subject: [PATCH 37/41] remove shadown copy on inner, leftOut, rightOut join, vectorized filter batch copy may be better. --- executor/joiner.go | 80 +++++++++++++++++++++++++----------- expression/chunk_executor.go | 23 ----------- util/chunk/chunk.go | 2 +- util/chunk/mutrow_test.go | 2 +- 4 files changed, 58 insertions(+), 49 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 1848e340511ea..1690496ba9902 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -92,6 +92,7 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType, colTypes = append(colTypes, lhsColTypes...) colTypes = append(colTypes, rhsColTypes...) base.shadowRow = chunk.MutRowFromTypes(colTypes) + base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) base.selected = make([]bool, 0, chunk.InitialCapacity) if joinType == plan.LeftOuterJoin || joinType == plan.RightOuterJoin { innerColTypes := lhsColTypes @@ -124,6 +125,7 @@ type baseJoiner struct { conditions []expression.Expression defaultInner chunk.Row outerIsRight bool + chk *chunk.Chunk shadowRow chunk.MutRow selected []bool maxChunkSize int @@ -328,7 +330,24 @@ func (j *leftOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk if inners.Len() == 0 { return false, nil } - return j.tryToMatchInnerAndOuter(false, outer, inners, chk) + j.chk.Reset() + chkForJoin := j.chk + if len(j.conditions) == 0 { + chkForJoin = chk + } + + numToAppend := j.maxChunkSize - chk.NumRows() + for ; inners.Current() != inners.End() && numToAppend > 0; numToAppend-- { + j.makeJoinRowToChunk(chkForJoin, outer, inners.Current()) + inners.Next() + } + if len(j.conditions) == 0 { + return true, nil + } + + // reach here, chkForJoin is j.chk + matched, err := j.filter(chkForJoin, chk) + return matched, errors.Trace(err) } func (j *leftOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -346,7 +365,24 @@ func (j *rightOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, ch return false, nil } - return j.tryToMatchInnerAndOuter(true, outer, inners, chk) + j.chk.Reset() + chkForJoin := j.chk + if len(j.conditions) == 0 { + chkForJoin = chk + } + + numToAppend := j.maxChunkSize - chk.NumRows() + for ; inners.Current() != inners.End() && numToAppend > 0; numToAppend-- { + j.makeJoinRowToChunk(chkForJoin, inners.Current(), outer) + inners.Next() + } + if len(j.conditions) == 0 { + return true, nil + } + + // reach here, chkForJoin is j.chk + matched, err := j.filter(chkForJoin, chk) + return matched, errors.Trace(err) } func (j *rightOuterJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { @@ -363,30 +399,26 @@ func (j *innerJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *ch if inners.Len() == 0 { return false, nil } - - return j.tryToMatchInnerAndOuter(j.outerIsRight, outer, inners, chk) -} - -// tryToMatchInnerAndOuter does 2 things: -// 1. Combine outer and inner row to join row. -// 2. Check whether the join row matches the join conditions, if so, append it to the `outChk`. -func (j *baseJoiner) tryToMatchInnerAndOuter(isRight bool, outer chunk.Row, inners chunk.Iterator, outChk *chunk.Chunk) (bool, error) { - match := false - numToAppend := j.maxChunkSize - outChk.NumRows() - for inner := inners.Current(); inner != inners.End() && numToAppend > 0; inner = inners.Next() { - j.makeJoinRow(isRight, inner, outer) - - matched, err := expression.FilterRow(j.ctx, j.conditions, j.shadowRow.ToRow()) - if err != nil { - return false, errors.Trace(err) - } - if matched { - match = true - numToAppend-- - outChk.AppendRow(j.shadowRow.ToRow()) + j.chk.Reset() + chkForJoin := j.chk + if len(j.conditions) == 0 { + chkForJoin = chk + } + inner, numToAppend := inners.Current(), j.maxChunkSize-chk.NumRows() + for ; inner != inners.End() && numToAppend > 0; inner, numToAppend = inners.Next(), numToAppend-1 { + if j.outerIsRight { + j.makeJoinRowToChunk(chkForJoin, inner, outer) + } else { + j.makeJoinRowToChunk(chkForJoin, outer, inner) } } - return match, nil + if len(j.conditions) == 0 { + return true, nil + } + + // reach here, chkForJoin is j.chk + matched, err := j.filter(chkForJoin, chk) + return matched, errors.Trace(err) } func (j *innerJoiner) onMissMatch(outer chunk.Row, chk *chunk.Chunk) { diff --git a/expression/chunk_executor.go b/expression/chunk_executor.go index 4ed9c81664a1e..887eb2463c3a3 100644 --- a/expression/chunk_executor.go +++ b/expression/chunk_executor.go @@ -263,26 +263,3 @@ func VectorizedFilter(ctx sessionctx.Context, filters []Expression, iterator *ch } return selected, nil } - -// FilterRow applies a list of filters to a row. -func FilterRow(ctx sessionctx.Context, filters []Expression, row chunk.Row) (bool, error) { - selected := true - for _, filter := range filters { - isTypeInt := filter.GetType().EvalType() == types.ETInt - if isTypeInt { - filterResult, isNull, err := filter.EvalInt(ctx, row) - if err != nil { - return false, errors.Trace(err) - } - selected = selected && !isNull && (filterResult != 0) - } else { - // TODO: should rewrite the filter to `cast(expr as SIGNED) != 0` and always use `EvalInt`. - bVal, err := EvalBool(ctx, []Expression{filter}, row) - if err != nil { - return false, errors.Trace(err) - } - selected = selected && bVal - } - } - return selected, nil -} diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index a0920bb45dd6f..ccdcdc7219d8c 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -182,7 +182,7 @@ func (c *Chunk) Append(other *Chunk, begin, end int) { c.numVirtualRows += end - begin } -// TruncateTo truncates rows from tail to head in a Chunk to "rowsNum" rows. +// TruncateTo truncates rows from tail to head in a Chunk to "numRows" rows. func (c *Chunk) TruncateTo(numRows int) { for _, col := range c.columns { if col.isFixed() { diff --git a/util/chunk/mutrow_test.go b/util/chunk/mutrow_test.go index d2880859c6eae..50a19961ac04b 100644 --- a/util/chunk/mutrow_test.go +++ b/util/chunk/mutrow_test.go @@ -15,13 +15,13 @@ package chunk import ( "testing" + "time" "github.com/pingcap/check" "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" - "time" ) func (s *testChunkSuite) TestMutRow(c *check.C) { From 0aadbf688412aeab6e7031b2efbb10b70eb1a696 Mon Sep 17 00:00:00 2001 From: crazycs Date: Mon, 27 Aug 2018 14:15:39 +0800 Subject: [PATCH 38/41] address comment --- executor/joiner.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 1690496ba9902..5ae3ac2dca98b 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -91,8 +91,6 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType, colTypes := make([]*types.FieldType, 0, len(lhsColTypes)+len(rhsColTypes)) colTypes = append(colTypes, lhsColTypes...) colTypes = append(colTypes, rhsColTypes...) - base.shadowRow = chunk.MutRowFromTypes(colTypes) - base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) base.selected = make([]bool, 0, chunk.InitialCapacity) if joinType == plan.LeftOuterJoin || joinType == plan.RightOuterJoin { innerColTypes := lhsColTypes @@ -103,18 +101,25 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType, } switch joinType { case plan.SemiJoin: + base.shadowRow = chunk.MutRowFromTypes(colTypes) return &semiJoiner{base} case plan.AntiSemiJoin: + base.shadowRow = chunk.MutRowFromTypes(colTypes) return &antiSemiJoiner{base} case plan.LeftOuterSemiJoin: + base.shadowRow = chunk.MutRowFromTypes(colTypes) return &leftOuterSemiJoiner{base} case plan.AntiLeftOuterSemiJoin: + base.shadowRow = chunk.MutRowFromTypes(colTypes) return &antiLeftOuterSemiJoiner{base} case plan.LeftOuterJoin: + base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) return &leftOuterJoiner{base} case plan.RightOuterJoin: + base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) return &rightOuterJoiner{base} case plan.InnerJoin: + base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) return &innerJoiner{base} } panic("unsupported join type in func newJoiner()") @@ -144,8 +149,7 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } -// makeJoinRow combines inner, outer row into shadowRow. -// combines will uses shadow copy inner and outer row data to shadowRow. +// makeJoinRow shallow copies `inner` and `outer` into `shallowRow`. func (j *baseJoiner) makeJoinRow(isRightJoin bool, inner, outer chunk.Row) { if !isRightJoin { inner, outer = outer, inner From 0de20636a5eeb76b58d7eff7f6b6f4bafcdc517a Mon Sep 17 00:00:00 2001 From: crazycs Date: Mon, 27 Aug 2018 15:19:06 +0800 Subject: [PATCH 39/41] address comment --- executor/joiner.go | 22 +++++++++++----------- util/chunk/mutrow.go | 2 +- util/chunk/mutrow_test.go | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 5ae3ac2dca98b..9ec55421c95d4 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -101,16 +101,16 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType, } switch joinType { case plan.SemiJoin: - base.shadowRow = chunk.MutRowFromTypes(colTypes) + base.shallowRow = chunk.MutRowFromTypes(colTypes) return &semiJoiner{base} case plan.AntiSemiJoin: - base.shadowRow = chunk.MutRowFromTypes(colTypes) + base.shallowRow = chunk.MutRowFromTypes(colTypes) return &antiSemiJoiner{base} case plan.LeftOuterSemiJoin: - base.shadowRow = chunk.MutRowFromTypes(colTypes) + base.shallowRow = chunk.MutRowFromTypes(colTypes) return &leftOuterSemiJoiner{base} case plan.AntiLeftOuterSemiJoin: - base.shadowRow = chunk.MutRowFromTypes(colTypes) + base.shallowRow = chunk.MutRowFromTypes(colTypes) return &antiLeftOuterSemiJoiner{base} case plan.LeftOuterJoin: base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) @@ -131,7 +131,7 @@ type baseJoiner struct { defaultInner chunk.Row outerIsRight bool chk *chunk.Chunk - shadowRow chunk.MutRow + shallowRow chunk.MutRow selected []bool maxChunkSize int } @@ -154,8 +154,8 @@ func (j *baseJoiner) makeJoinRow(isRightJoin bool, inner, outer chunk.Row) { if !isRightJoin { inner, outer = outer, inner } - j.shadowRow.ShallowCopyPartialRow(0, inner) - j.shadowRow.ShallowCopyPartialRow(inner.Len(), outer) + j.shallowRow.ShallowCopyPartialRow(0, inner) + j.shallowRow.ShallowCopyPartialRow(inner.Len(), outer) } func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { @@ -191,7 +191,7 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.makeJoinRow(j.outerIsRight, inner, outer) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -225,7 +225,7 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.makeJoinRow(j.outerIsRight, inner, outer) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -260,7 +260,7 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.makeJoinRow(false, inner, outer) - matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) + matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow()) if err != nil { return false, errors.Trace(err) } @@ -302,7 +302,7 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { j.makeJoinRow(false, inner, outer) - matched, err := expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) + matched, err := expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow()) if err != nil { return false, errors.Trace(err) } diff --git a/util/chunk/mutrow.go b/util/chunk/mutrow.go index e6a3c5410b444..276ee7c560620 100644 --- a/util/chunk/mutrow.go +++ b/util/chunk/mutrow.go @@ -347,7 +347,7 @@ func setMutRowJSON(col *column, j json.BinaryJSON) { col.offsets[1] = int32(dataLen) } -// ShallowCopyPartialRow shadow copies the data of `row` to MutRow. +// ShallowCopyPartialRow shallow copies the data of `row` to MutRow. func (mr MutRow) ShallowCopyPartialRow(colIdx int, row Row) { chk := mr.c for i, rowCol := range row.c.columns { diff --git a/util/chunk/mutrow_test.go b/util/chunk/mutrow_test.go index 50a19961ac04b..bf2e925c7fb41 100644 --- a/util/chunk/mutrow_test.go +++ b/util/chunk/mutrow_test.go @@ -136,7 +136,7 @@ func BenchmarkMutRowFromValues(b *testing.B) { } } -func (s *testChunkSuite) TestMutRowShadowCopyPartialRow(c *check.C) { +func (s *testChunkSuite) TestMutRowShallowCopyPartialRow(c *check.C) { colTypes := make([]*types.FieldType, 0, 3) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) @@ -165,7 +165,7 @@ func (s *testChunkSuite) TestMutRowShadowCopyPartialRow(c *check.C) { var rowsNum = 1024 -func BenchmarkMutRowShadowCopyPartialRow(b *testing.B) { +func BenchmarkMutRowShallowCopyPartialRow(b *testing.B) { b.ReportAllocs() colTypes := make([]*types.FieldType, 0, 8) colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) From c681658c2dc8f14589ebf99129ecece247922ebf Mon Sep 17 00:00:00 2001 From: crazycs Date: Tue, 28 Aug 2018 12:20:01 +0800 Subject: [PATCH 40/41] address comment --- executor/joiner.go | 12 ++++++------ util/chunk/mutrow.go | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/executor/joiner.go b/executor/joiner.go index 9ec55421c95d4..870669268d064 100644 --- a/executor/joiner.go +++ b/executor/joiner.go @@ -149,8 +149,8 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { chk.AppendPartialRow(lhs.Len(), rhs) } -// makeJoinRow shallow copies `inner` and `outer` into `shallowRow`. -func (j *baseJoiner) makeJoinRow(isRightJoin bool, inner, outer chunk.Row) { +// makeShallowJoinRow shallow copies `inner` and `outer` into `shallowRow`. +func (j *baseJoiner) makeShallowJoinRow(isRightJoin bool, inner, outer chunk.Row) { if !isRightJoin { inner, outer = outer, inner } @@ -189,7 +189,7 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu } for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - j.makeJoinRow(j.outerIsRight, inner, outer) + j.makeShallowJoinRow(j.outerIsRight, inner, outer) matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow()) if err != nil { @@ -223,7 +223,7 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk } for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - j.makeJoinRow(j.outerIsRight, inner, outer) + j.makeShallowJoinRow(j.outerIsRight, inner, outer) matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow()) if err != nil { @@ -258,7 +258,7 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, } for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - j.makeJoinRow(false, inner, outer) + j.makeShallowJoinRow(false, inner, outer) matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow()) if err != nil { @@ -300,7 +300,7 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera } for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { - j.makeJoinRow(false, inner, outer) + j.makeShallowJoinRow(false, inner, outer) matched, err := expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow()) if err != nil { diff --git a/util/chunk/mutrow.go b/util/chunk/mutrow.go index 276ee7c560620..1eba29a2f9e5c 100644 --- a/util/chunk/mutrow.go +++ b/util/chunk/mutrow.go @@ -349,23 +349,23 @@ func setMutRowJSON(col *column, j json.BinaryJSON) { // ShallowCopyPartialRow shallow copies the data of `row` to MutRow. func (mr MutRow) ShallowCopyPartialRow(colIdx int, row Row) { - chk := mr.c - for i, rowCol := range row.c.columns { - chkCol := chk.columns[colIdx+i] - if !rowCol.isNull(row.idx) { - chkCol.nullBitmap[0] = 1 + for i, srcCol := range row.c.columns { + dstCol := mr.c.columns[colIdx+i] + if !srcCol.isNull(row.idx) { + // MutRow only contains one row, so we can directly set the whole byte. + dstCol.nullBitmap[0] = 1 } else { - chkCol.nullBitmap[0] = 0 + dstCol.nullBitmap[0] = 0 } - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) + if srcCol.isFixed() { + elemLen := len(srcCol.elemBuf) offset := row.idx * elemLen - chkCol.data = rowCol.data[offset : offset+elemLen] + dstCol.data = srcCol.data[offset : offset+elemLen] } else { - start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] - chkCol.data = rowCol.data[start:end] - chkCol.offsets[1] = int32(len(chkCol.data)) + start, end := srcCol.offsets[row.idx], srcCol.offsets[row.idx+1] + dstCol.data = srcCol.data[start:end] + dstCol.offsets[1] = int32(len(dstCol.data)) } } } From 1e8a9f029003627eb483a2274f82d0f95468ea46 Mon Sep 17 00:00:00 2001 From: crazycs Date: Wed, 29 Aug 2018 13:55:36 +0800 Subject: [PATCH 41/41] update test after merge --- util/chunk/chunk_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/chunk/chunk_test.go b/util/chunk/chunk_test.go index 28cfe99da53ca..f5158ca4b4da2 100644 --- a/util/chunk/chunk_test.go +++ b/util/chunk/chunk_test.go @@ -262,9 +262,9 @@ func newChunkWithInitCap(cap int, elemLen ...int) *Chunk { chk := &Chunk{} for _, l := range elemLen { if l > 0 { - chk.addFixedLenColumn(l, cap) + chk.columns = append(chk.columns, newFixedLenColumn(l, cap)) } else { - chk.addVarLenColumn(cap) + chk.columns = append(chk.columns, newVarLenColumn(cap, nil)) } } return chk