Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

executor/join : use shallow copy for join. #7433

Merged
merged 49 commits into from
Aug 29, 2018
Merged
Show file tree
Hide file tree
Changes from 48 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
a576cc0
iterator copy init
crazycs520 Aug 19, 2018
79b74e7
fix iterator bug
crazycs520 Aug 19, 2018
d9fe98b
fix nullmap index out of range and add test
crazycs520 Aug 19, 2018
8fa2ad8
refine code
crazycs520 Aug 19, 2018
33ae5a0
refine code
crazycs520 Aug 19, 2018
255c64b
remove iterator copy and use back to pre rows
crazycs520 Aug 19, 2018
0973373
checkout joiner.go file
crazycs520 Aug 19, 2018
055a41a
add check to bench
crazycs520 Aug 20, 2018
c5eeff9
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 20, 2018
e7c6c64
iterator only once
crazycs520 Aug 21, 2018
ce8eb4f
add appendMultiSameNullBitmap
crazycs520 Aug 21, 2018
499b6f9
field by field only one line 2X
crazycs520 Aug 22, 2018
2a295a1
refine column copy
crazycs520 Aug 22, 2018
9d82447
refine column copy
crazycs520 Aug 22, 2018
ef85948
add shadow copy to join and move code
crazycs520 Aug 22, 2018
45b4631
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 22, 2018
5bf279f
rename function
crazycs520 Aug 22, 2018
9690506
add comment
crazycs520 Aug 22, 2018
8db639f
add shadow copy to inner join
crazycs520 Aug 22, 2018
7a55ff5
refine code
crazycs520 Aug 22, 2018
05c1273
add shadow copy to all join
crazycs520 Aug 22, 2018
66a133c
remove redundancy code
crazycs520 Aug 22, 2018
dadb047
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 22, 2018
b4192e4
remove column copy and redundancy code
crazycs520 Aug 23, 2018
4096997
address comment
crazycs520 Aug 23, 2018
b802941
add mutchunk
crazycs520 Aug 23, 2018
c5cfdf1
address comment
crazycs520 Aug 23, 2018
947f9d4
use mutRow instead of mut chunk.
crazycs520 Aug 23, 2018
24ab90e
address comment
crazycs520 Aug 23, 2018
2b8d896
refine code
crazycs520 Aug 23, 2018
3f82d2b
address comment
crazycs520 Aug 23, 2018
604e49d
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 23, 2018
e5f4cbe
address comment
crazycs520 Aug 23, 2018
abbc2c9
address comment
crazycs520 Aug 24, 2018
e1dd31d
address comment and add test to mutRow_test
crazycs520 Aug 24, 2018
593b31c
remove chunk_copy_test.go
crazycs520 Aug 24, 2018
3a6fbb7
refine code
crazycs520 Aug 24, 2018
600fdc3
refine test
crazycs520 Aug 24, 2018
f4fbd70
refine test and code
crazycs520 Aug 24, 2018
23eaf1e
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 24, 2018
e9ef7dd
optimize append num
crazycs520 Aug 24, 2018
21b5417
remove shadown copy on inner, leftOut, rightOut join, vectorized filt…
crazycs520 Aug 27, 2018
0aadbf6
address comment
crazycs520 Aug 27, 2018
0de2063
address comment
crazycs520 Aug 27, 2018
c681658
address comment
crazycs520 Aug 28, 2018
f8ccdf2
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 28, 2018
c7b2301
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 29, 2018
1e8a9f0
update test after merge
crazycs520 Aug 29, 2018
b939b3b
Merge branch 'master' into column-copy
XuHuaiyu Aug 29, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 25 additions & 22 deletions executor/joiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType,
colTypes := make([]*types.FieldType, 0, len(lhsColTypes)+len(rhsColTypes))
colTypes = append(colTypes, lhsColTypes...)
colTypes = append(colTypes, rhsColTypes...)
base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize)
base.selected = make([]bool, 0, chunk.InitialCapacity)
if joinType == plan.LeftOuterJoin || joinType == plan.RightOuterJoin {
innerColTypes := lhsColTypes
Expand All @@ -102,18 +101,25 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType,
}
switch joinType {
case plan.SemiJoin:
base.shallowRow = chunk.MutRowFromTypes(colTypes)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What not change it on line 94 directly? So we don't repeat it multiple times?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only SemiJoin, AntiSemiJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin need shallowRow.

return &semiJoiner{base}
case plan.AntiSemiJoin:
base.shallowRow = chunk.MutRowFromTypes(colTypes)
return &antiSemiJoiner{base}
case plan.LeftOuterSemiJoin:
base.shallowRow = chunk.MutRowFromTypes(colTypes)
return &leftOuterSemiJoiner{base}
case plan.AntiLeftOuterSemiJoin:
base.shallowRow = chunk.MutRowFromTypes(colTypes)
return &antiLeftOuterSemiJoiner{base}
case plan.LeftOuterJoin:
base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize)
return &leftOuterJoiner{base}
case plan.RightOuterJoin:
base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize)
return &rightOuterJoiner{base}
case plan.InnerJoin:
base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize)
return &innerJoiner{base}
}
panic("unsupported join type in func newJoiner()")
Expand All @@ -125,6 +131,7 @@ type baseJoiner struct {
defaultInner chunk.Row
outerIsRight bool
chk *chunk.Chunk
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be removed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. Inner join, left out join and right out join will use chk to do deep copy. deep copy + vectorize filter + batch copy have better performance.

shallowRow chunk.MutRow
selected []bool
maxChunkSize int
}
Expand All @@ -142,6 +149,15 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) {
chk.AppendPartialRow(lhs.Len(), rhs)
}

// makeShallowJoinRow shallow copies `inner` and `outer` into `shallowRow`.
func (j *baseJoiner) makeShallowJoinRow(isRightJoin bool, inner, outer chunk.Row) {
if !isRightJoin {
inner, outer = outer, inner
}
j.shallowRow.ShallowCopyPartialRow(0, inner)
j.shallowRow.ShallowCopyPartialRow(inner.Len(), outer)
}

func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) {
j.selected, err = expression.VectorizedFilter(j.ctx, j.conditions, chunk.NewIterator4Chunk(input), j.selected)
if err != nil {
Expand Down Expand Up @@ -173,14 +189,9 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu
}

for inner := inners.Current(); inner != inners.End(); inner = inners.Next() {
j.chk.Reset()
if j.outerIsRight {
j.makeJoinRowToChunk(j.chk, inner, outer)
} else {
j.makeJoinRowToChunk(j.chk, outer, inner)
}
j.makeShallowJoinRow(j.outerIsRight, inner, outer)

matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0))
matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow())
if err != nil {
return false, errors.Trace(err)
}
Expand Down Expand Up @@ -212,14 +223,9 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk
}

for inner := inners.Current(); inner != inners.End(); inner = inners.Next() {
j.chk.Reset()
if j.outerIsRight {
j.makeJoinRowToChunk(j.chk, inner, outer)
} else {
j.makeJoinRowToChunk(j.chk, outer, inner)
}
j.makeShallowJoinRow(j.outerIsRight, inner, outer)

matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0))
matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow())
if err != nil {
return false, errors.Trace(err)
}
Expand Down Expand Up @@ -252,10 +258,9 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator,
}

for inner := inners.Current(); inner != inners.End(); inner = inners.Next() {
j.chk.Reset()
j.makeJoinRowToChunk(j.chk, outer, inner)
j.makeShallowJoinRow(false, inner, outer)

matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0))
matched, err = expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow())
if err != nil {
return false, errors.Trace(err)
}
Expand Down Expand Up @@ -295,10 +300,9 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera
}

for inner := inners.Current(); inner != inners.End(); inner = inners.Next() {
j.chk.Reset()
j.makeJoinRowToChunk(j.chk, outer, inner)
matched, err := expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0))
j.makeShallowJoinRow(false, inner, outer)

matched, err := expression.EvalBool(j.ctx, j.conditions, j.shallowRow.ToRow())
if err != nil {
return false, errors.Trace(err)
}
Expand Down Expand Up @@ -330,7 +334,6 @@ func (j *leftOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk
if inners.Len() == 0 {
return false, nil
}

j.chk.Reset()
chkForJoin := j.chk
if len(j.conditions) == 0 {
Expand Down
12 changes: 12 additions & 0 deletions util/chunk/chunk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,18 @@ func newChunk(elemLen ...int) *Chunk {
return chk
}

func newChunkWithInitCap(cap int, elemLen ...int) *Chunk {
chk := &Chunk{}
for _, l := range elemLen {
if l > 0 {
chk.columns = append(chk.columns, newFixedLenColumn(l, cap))
} else {
chk.columns = append(chk.columns, newVarLenColumn(cap, nil))
}
}
return chk
}

var allTypes = []*types.FieldType{
types.NewFieldType(mysql.TypeTiny),
types.NewFieldType(mysql.TypeShort),
Expand Down
23 changes: 23 additions & 0 deletions util/chunk/mutrow.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,26 @@ func setMutRowJSON(col *column, j json.BinaryJSON) {
copy(col.data[1:], j.Value)
col.offsets[1] = int32(dataLen)
}

// ShallowCopyPartialRow shallow copies the data of `row` to MutRow.
func (mr MutRow) ShallowCopyPartialRow(colIdx int, row Row) {
for i, srcCol := range row.c.columns {
dstCol := mr.c.columns[colIdx+i]
if !srcCol.isNull(row.idx) {
// MutRow only contains one row, so we can directly set the whole byte.
dstCol.nullBitmap[0] = 1
} else {
dstCol.nullBitmap[0] = 0
}

if srcCol.isFixed() {
elemLen := len(srcCol.elemBuf)
offset := row.idx * elemLen
dstCol.data = srcCol.data[offset : offset+elemLen]
} else {
start, end := srcCol.offsets[row.idx], srcCol.offsets[row.idx+1]
dstCol.data = srcCol.data[start:end]
dstCol.offsets[1] = int32(len(dstCol.data))
}
}
}
62 changes: 62 additions & 0 deletions util/chunk/mutrow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package chunk

import (
"testing"
"time"

"github.com/pingcap/check"
"github.com/pingcap/tidb/mysql"
Expand Down Expand Up @@ -134,3 +135,64 @@ func BenchmarkMutRowFromValues(b *testing.B) {
MutRowFromValues(values)
}
}

func (s *testChunkSuite) TestMutRowShallowCopyPartialRow(c *check.C) {
colTypes := make([]*types.FieldType, 0, 3)
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString})
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong})
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeTimestamp})

mutRow := MutRowFromTypes(colTypes)
row := MutRowFromValues("abc", 123, types.ZeroTimestamp).ToRow()
mutRow.ShallowCopyPartialRow(0, row)
c.Assert(row.GetString(0), check.Equals, mutRow.ToRow().GetString(0))
c.Assert(row.GetInt64(1), check.Equals, mutRow.ToRow().GetInt64(1))
c.Assert(row.GetTime(2), check.DeepEquals, mutRow.ToRow().GetTime(2))

row.c.Reset()
d := types.NewStringDatum("dfg")
row.c.AppendDatum(0, &d)
d = types.NewIntDatum(567)
row.c.AppendDatum(1, &d)
d = types.NewTimeDatum(types.Time{Time: types.FromGoTime(time.Now()), Fsp: 6, Type: mysql.TypeTimestamp})
row.c.AppendDatum(2, &d)

c.Assert(d.GetMysqlTime(), check.DeepEquals, mutRow.ToRow().GetTime(2))
c.Assert(row.GetString(0), check.Equals, mutRow.ToRow().GetString(0))
c.Assert(row.GetInt64(1), check.Equals, mutRow.ToRow().GetInt64(1))
c.Assert(row.GetTime(2), check.DeepEquals, mutRow.ToRow().GetTime(2))
}

var rowsNum = 1024

func BenchmarkMutRowShallowCopyPartialRow(b *testing.B) {
b.ReportAllocs()
colTypes := make([]*types.FieldType, 0, 8)
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString})
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString})
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong})
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong})
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeDatetime})

mutRow := MutRowFromTypes(colTypes)
row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow()
b.ResetTimer()
for i := 0; i < b.N; i++ {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

b.ResetTimer() before this loop.

for j := 0; j < rowsNum; j++ {
mutRow.ShallowCopyPartialRow(0, row)
}
}
}

func BenchmarkChunkAppendPartialRow(b *testing.B) {
b.ReportAllocs()
chk := newChunkWithInitCap(rowsNum, 0, 0, 8, 8, 16)
row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow()
b.ResetTimer()
for i := 0; i < b.N; i++ {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

chk.Reset()
for j := 0; j < rowsNum; j++ {
chk.AppendPartialRow(0, row)
}
}
}