Skip to content

Commit

Permalink
optimize encoder and adjust some config (pingcap#338)
Browse files Browse the repository at this point in the history
* optimize local backend

* update some config

* update batch size

* fix type in config.toml and tidy go.mod

* update tools failpoint

* fix session

* update test config

* reset default batch size for not-local backend

* reset batch-size for example toml
  • Loading branch information
glorv committed Jun 28, 2020
1 parent cd6210d commit 4af19d5
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 39 deletions.
11 changes: 6 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@ require (
github.com/joho/sqltocsv v0.0.0-20190824231449-5650f27fd5b6
github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect
github.com/onsi/ginkgo v1.13.0 // indirect
github.com/pingcap/br v0.0.0-20200521085655-53201addd4ad
github.com/pingcap/br v0.0.0-20200617120402-56e151ad8b67
github.com/pingcap/check v0.0.0-20200212061837-5e12011dc712
github.com/pingcap/errors v0.11.5-0.20190809092503-95897b64e011
github.com/pingcap/failpoint v0.0.0-20200506114213-c17f16071c53
github.com/pingcap/kvproto v0.0.0-20200518112156-d4aeb467de29
github.com/pingcap/failpoint v0.0.0-20200603062251-b230c36c413c
github.com/pingcap/kvproto v0.0.0-20200608081027-d02a6f65e956
github.com/pingcap/log v0.0.0-20200511115504-543df19646ad
github.com/pingcap/parser v0.0.0-20200522094936-3b720a0512a6
github.com/pingcap/parser v0.0.0-20200623082809-b74301ac298b
github.com/pingcap/pd/v4 v4.0.0-rc.2.0.20200520083007-2c251bd8f181
github.com/pingcap/tidb v1.1.0-beta.0.20200527030457-572bba0499e1
github.com/pingcap/tidb v1.1.0-beta.0.20200624071801-127ad504b29b
github.com/pingcap/tidb-tools v4.0.1+incompatible
github.com/pingcap/tipb v0.0.0-20200615034523-dcfcea0b5965
github.com/prometheus/client_golang v1.5.1
github.com/prometheus/client_model v0.2.0
github.com/satori/go.uuid v1.2.0
Expand Down
43 changes: 41 additions & 2 deletions go.sum

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion lightning/backend/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,6 @@ func (local *local) WriteToTiKV(
}
}
count = 0
pairs = pairs[:0]
bytesBuf.reset()
firstLoop = false
}
Expand Down
21 changes: 18 additions & 3 deletions lightning/backend/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ package backend

import (
"context"
"fmt"
"strconv"

"github.com/pingcap/tidb/sessionctx"

"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/variable"

"github.com/pingcap/tidb-lightning/lightning/common"
Expand Down Expand Up @@ -97,6 +99,8 @@ type session struct {
sessionctx.Context
txn transaction
vars *variable.SessionVars
// currently, we only set `CommonAddRecordCtx`
values map[fmt.Stringer]interface{}
}

// SessionOptions is the initial configuration of the session.
Expand All @@ -123,8 +127,9 @@ func newSession(options *SessionOptions) *session {
vars.TxnCtx = nil

s := &session{
txn: transaction{},
vars: vars,
txn: transaction{},
vars: vars,
values: make(map[fmt.Stringer]interface{}, 1),
}

return s
Expand All @@ -146,5 +151,15 @@ func (se *session) GetSessionVars() *variable.SessionVars {
return se.vars
}

// SetValue saves a value associated with this context for key.
func (se *session) SetValue(key fmt.Stringer, value interface{}) {
se.values[key] = value
}

// Value returns the value associated with this context for key.
func (se *session) Value(key fmt.Stringer) interface{} {
return se.values[key]
}

// StmtAddDirtyTableOP implements the sessionctx.Context interface
func (se *session) StmtAddDirtyTableOP(op int, physicalID int64, handle kv.Handle) {}
13 changes: 10 additions & 3 deletions lightning/backend/sql2kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ import (
"github.com/pingcap/errors"
"github.com/pingcap/parser/model"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/meta/autoid"
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/table/tables"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/types"
"go.uber.org/zap"
Expand All @@ -40,10 +42,15 @@ type tableKVEncoder struct {

func NewTableKVEncoder(tbl table.Table, options *SessionOptions) Encoder {
metric.KvEncoderCounter.WithLabelValues("open").Inc()

se := newSession(options)
// Set CommonAddRecordCtx to session to reuse the slices and BufStore in AddRecord
txn, _ := se.Txn(true)
store := kv.NewStagingBufferStore(txn)
recordCtx := tables.NewCommonAddRecordCtx(len(tbl.Cols()), store)
tables.SetAddRecordCtx(se, recordCtx)
return &tableKVEncoder{
tbl: tbl,
se: newSession(options),
se: se,
}
}

Expand Down Expand Up @@ -175,7 +182,7 @@ func (kvcodec *tableKVEncoder) Encode(
if j >= 0 && j < len(row) {
value, err = table.CastValue(kvcodec.se, row[j], col.ToInfo(), false, false)
if err == nil {
value, err = col.HandleBadNull(value, kvcodec.se.vars.StmtCtx)
err = col.HandleBadNull(&value, kvcodec.se.vars.StmtCtx)
}
} else if isAutoIncCol {
// we still need a conversion, e.g. to catch overflow with a TINYINT column.
Expand Down
14 changes: 10 additions & 4 deletions lightning/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb-lightning/lightning/common"
"github.com/pingcap/tidb-lightning/lightning/log"
"github.com/pingcap/tidb-tools/pkg/table-filter"
filter "github.com/pingcap/tidb-tools/pkg/table-filter"
router "github.com/pingcap/tidb-tools/pkg/table-router"
tidbcfg "github.com/pingcap/tidb/config"
"go.uber.org/zap"
Expand Down Expand Up @@ -264,7 +264,7 @@ func NewConfig() *Config {
Backend: BackendImporter,
OnDuplicate: ReplaceOnDup,
MaxKVPairs: 32,
SendKVPairs: 100000,
SendKVPairs: 32768,
RegionSplitSize: SplitRegionSize,
},
PostRestore: PostRestore{
Expand Down Expand Up @@ -406,7 +406,7 @@ func (cfg *Config) Adjust() error {
cfg.App.TableConcurrency = 6
}
if cfg.TikvImporter.RangeConcurrency == 0 {
cfg.TikvImporter.RangeConcurrency = 32
cfg.TikvImporter.RangeConcurrency = 16
}
if cfg.TikvImporter.RegionSplitSize == 0 {
cfg.TikvImporter.RegionSplitSize = SplitRegionSize
Expand Down Expand Up @@ -504,7 +504,13 @@ func (cfg *Config) Adjust() error {

// handle mydumper
if cfg.Mydumper.BatchSize <= 0 {
cfg.Mydumper.BatchSize = 100 * _G
// a smaller batch size can improve performance by about 5% in local mode.
if cfg.TikvImporter.Backend == BackendLocal {
cfg.Mydumper.BatchSize = 10 * _G
} else {
cfg.Mydumper.BatchSize = 100 * _G
}

}
if cfg.Mydumper.BatchImportRatio < 0.0 || cfg.Mydumper.BatchImportRatio >= 1.0 {
cfg.Mydumper.BatchImportRatio = 0.75
Expand Down
3 changes: 3 additions & 0 deletions tests/local_backend/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@ table-concurrency = 1
enable = true
driver = "file"

[tikv-importer]
send-kv-pairs = 2

[mydumper]
batch-size = 50 # force splitting the data into 4 batches
9 changes: 5 additions & 4 deletions tidb-lightning.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,20 @@ addr = "127.0.0.1:8287"
# the TiKV region size to avoid further region splitting. The default value is 96 MiB.
#region-split-size = 100_663_296
# write key-values pairs to tikv batch size
#send-kv-pairs = 100000
# local storage directory used in "local" backend. Put this direcory in a different disk from `data-source-dir`
# can make lightning run faster.
#send-kv-pairs = 32768
# local storage directory used in "local" backend.
#sorted-kv-dir = ""
# range-concurrency controls the maximum ingest concurrently while writing to tikv, It can affect the network traffic.
# this default config can make full use of a 10Gib bandwidth network, if the network bandwidth is higher, you can increase
# this to gain better performance. Larger value will also increase the memory usage slightly.
#range-concurrency = 16

[mydumper]
# block size of file reading
read-block-size = 65536 # Byte (default = 64 KB)
# minimum size (in terms of source data file) of each batch of import.
# Lightning will split a large table into multiple engine files according to this size.
batch-size = 107_374_182_400 # Byte (default = 100 GiB)
#batch-size = 107_374_182_400 # Byte (default = 100 GiB)

# Engine file needs to be imported sequentially. Due to table-concurrency, multiple engines will be
# imported nearly the same time, and this will create a queue and this wastes resources. Therefore,
Expand Down
8 changes: 1 addition & 7 deletions tools/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,9 @@ module github.com/pingcap/tidb/_tools
go 1.12

require (
github.com/go-playground/overalls v0.0.0-20180201144345-22ec1a223b7c // indirect
github.com/golangci/golangci-lint v1.24.0
github.com/google/shlex v0.0.0-20181106134648-c34317bd91bf // indirect
github.com/mgechev/revive v1.0.2
github.com/nicksnyder/go-i18n v1.10.0 // indirect
github.com/pelletier/go-toml v1.3.0 // indirect
github.com/pingcap/failpoint v0.0.0-20200506114213-c17f16071c53
github.com/pingcap/failpoint v0.0.0-20200603062251-b230c36c413c
github.com/shurcooL/vfsgen v0.0.0-20181202132449-6a9ea43bcacd
github.com/yookoala/realpath v1.0.0 // indirect
gopkg.in/alecthomas/gometalinter.v2 v2.0.12 // indirect
gopkg.in/alecthomas/kingpin.v3-unstable v3.0.0-20180810215634-df19058c872c // indirect
)
13 changes: 3 additions & 10 deletions tools/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8=
github.com/go-playground/overalls v0.0.0-20180201144345-22ec1a223b7c/go.mod h1:UqxAgEOt89sCiXlrc/ycnx00LVvUO/eS8tMUkWX4R7w=
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-toolsmith/astcast v1.0.0 h1:JojxlmI6STnFVG9yOImLeGREv8W2ocNUM+iOhR6jE7g=
Expand Down Expand Up @@ -117,7 +116,6 @@ github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ
github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/shlex v0.0.0-20181106134648-c34317bd91bf/go.mod h1:RpwtwJQFrIEPstU94h88MWPXP2ektJZ8cZ0YntAmXiE=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
Expand Down Expand Up @@ -189,7 +187,6 @@ github.com/mozilla/tls-observatory v0.0.0-20190404164649-a3c1b6cfecfd/go.mod h1:
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/nbutton23/zxcvbn-go v0.0.0-20180912185939-ae427f1e4c1d h1:AREM5mwr4u1ORQBMvzfzBgpsctsbQikCVpvC+tX285E=
github.com/nbutton23/zxcvbn-go v0.0.0-20180912185939-ae427f1e4c1d/go.mod h1:o96djdrsSGy3AWPyBgZMAGfxZNfgntdJG+11KU4QvbU=
github.com/nicksnyder/go-i18n v1.10.0/go.mod h1:HrK7VCrbOvQoUAQ7Vpy7i87N7JZZZ7R2xBGjv0j365Q=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8=
github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA=
Expand All @@ -206,10 +203,8 @@ github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8 h1:USx2/E1bX46VG32FI
github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ=
github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
github.com/pingcap/failpoint v0.0.0-20200210140405-f8f9fb234798 h1:6DMbRqPI1qzQ8N1xc3+nKY8IxSACd9VqQKkRVvbyoIg=
github.com/pingcap/failpoint v0.0.0-20200210140405-f8f9fb234798/go.mod h1:DNS3Qg7bEDhU6EXNHF+XSv/PGznQaMJ5FWvctpm6pQI=
github.com/pingcap/failpoint v0.0.0-20200506114213-c17f16071c53 h1:8sC8OLinmaw24xLeeJlYBFvUBsOiOYBtNqTuVOTnynQ=
github.com/pingcap/failpoint v0.0.0-20200506114213-c17f16071c53/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk=
github.com/pingcap/failpoint v0.0.0-20200603062251-b230c36c413c h1:cm0zAj+Tab94mp4OH+VoLJiSNQvZO4pWDGJ8KEk2a0c=
github.com/pingcap/failpoint v0.0.0-20200603062251-b230c36c413c/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down Expand Up @@ -274,6 +269,7 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
Expand All @@ -296,7 +292,6 @@ github.com/valyala/quicktemplate v1.2.0/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOV
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
github.com/yookoala/realpath v1.0.0/go.mod h1:gJJMA9wuX7AcqLy1+ffPatSCySA1FQ2S8Ya9AIoYBpE=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
Expand Down Expand Up @@ -375,9 +370,7 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
gopkg.in/alecthomas/gometalinter.v2 v2.0.12/go.mod h1:NDRytsqEZyolNuAgTzJkZMkSQM7FIKyzVzGhjB/qfYo=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/alecthomas/kingpin.v3-unstable v3.0.0-20180810215634-df19058c872c/go.mod h1:3HH7i1SgMqlzxCcBmUHW657sD4Kvv9sC3HpL3YukzwA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
Expand Down

0 comments on commit 4af19d5

Please sign in to comment.