@@ -489,9 +489,9 @@ type LoadDataInfo struct {
489
489
columns []* table.Column
490
490
}
491
491
492
- // SetBatchCount sets the number of rows to insert in a batch.
493
- func (e * LoadDataInfo ) SetBatchCount (limit int64 ) {
494
- e .insertVal .batchRows = limit
492
+ // SetMaxRowsInBatch sets the max number of rows to insert in a batch.
493
+ func (e * LoadDataInfo ) SetMaxRowsInBatch (limit uint64 ) {
494
+ e .insertVal .maxRowsInBatch = limit
495
495
}
496
496
497
497
// getValidData returns prevData and curData that starts from starting symbol.
@@ -606,6 +606,7 @@ func (e *LoadDataInfo) InsertData(prevData, curData []byte) ([]byte, bool, error
606
606
isEOF = true
607
607
prevData , curData = curData , prevData
608
608
}
609
+ rows := make ([][]types.Datum , 0 , e .insertVal .maxRowsInBatch )
609
610
for len (curData ) > 0 {
610
611
line , curData , hasStarting = e .getLine (prevData , curData )
611
612
prevData = nil
@@ -631,15 +632,22 @@ func (e *LoadDataInfo) InsertData(prevData, curData []byte) ([]byte, bool, error
631
632
if err != nil {
632
633
return nil , false , errors .Trace (err )
633
634
}
634
- e . insertData (cols )
635
- e .insertVal .currRow ++
636
- if e .insertVal .batchRows != 0 && e .insertVal .currRow % e .insertVal .batchRows == 0 {
635
+ rows = append ( rows , e . colsToRow (cols ) )
636
+ e .insertVal .rowCount ++
637
+ if e .insertVal .maxRowsInBatch != 0 && e .insertVal .rowCount % e .insertVal .maxRowsInBatch == 0 {
637
638
reachLimit = true
638
639
log .Infof ("This insert rows has reached the batch %d, current total rows %d" ,
639
- e .insertVal .batchRows , e .insertVal .currRow )
640
+ e .insertVal .maxRowsInBatch , e .insertVal .rowCount )
640
641
break
641
642
}
642
643
}
644
+ rows , err := batchMarkDupRows (e .Ctx , e .Table , rows )
645
+ if err != nil {
646
+ return nil , reachLimit , errors .Trace (err )
647
+ }
648
+ for _ , row := range rows {
649
+ e .insertData (row )
650
+ }
643
651
if e .insertVal .lastInsertID != 0 {
644
652
e .insertVal .ctx .GetSessionVars ().SetLastInsertID (e .insertVal .lastInsertID )
645
653
}
@@ -715,7 +723,7 @@ func escapeChar(c byte) byte {
715
723
return c
716
724
}
717
725
718
- func (e * LoadDataInfo ) insertData (cols []string ) {
726
+ func (e * LoadDataInfo ) colsToRow (cols []string ) types. DatumRow {
719
727
for i := 0 ; i < len (e .row ); i ++ {
720
728
if i >= len (cols ) {
721
729
e .row [i ].SetString ("" )
@@ -727,9 +735,16 @@ func (e *LoadDataInfo) insertData(cols []string) {
727
735
if err != nil {
728
736
warnLog := fmt .Sprintf ("Load Data: insert data:%v failed:%v" , e .row , errors .ErrorStack (err ))
729
737
e .insertVal .handleLoadDataWarnings (err , warnLog )
738
+ return nil
739
+ }
740
+ return row
741
+ }
742
+
743
+ func (e * LoadDataInfo ) insertData (row types.DatumRow ) {
744
+ if row == nil {
730
745
return
731
746
}
732
- _ , err = e .Table .AddRecord (e .insertVal .ctx , row , false )
747
+ _ , err : = e .Table .AddRecord (e .insertVal .ctx , row , false )
733
748
if err != nil {
734
749
warnLog := fmt .Sprintf ("Load Data: insert data:%v failed:%v" , row , errors .ErrorStack (err ))
735
750
e .insertVal .handleLoadDataWarnings (err , warnLog )
@@ -817,8 +832,8 @@ type defaultVal struct {
817
832
type InsertValues struct {
818
833
baseExecutor
819
834
820
- currRow int64
821
- batchRows int64
835
+ rowCount uint64
836
+ maxRowsInBatch uint64
822
837
lastInsertID uint64
823
838
needFillDefaultValues bool
824
839
@@ -869,7 +884,7 @@ func (e *InsertExec) exec(goCtx goctx.Context, rows [][]types.Datum) (Row, error
869
884
// Using BatchGet in insert ignore to mark rows as duplicated before we add records to the table.
870
885
if e .IgnoreErr {
871
886
var err error
872
- rows , err = e . batchMarkDupRows (rows )
887
+ rows , err = batchMarkDupRows (e . ctx , e . Table , rows )
873
888
if err != nil {
874
889
return nil , errors .Trace (err )
875
890
}
@@ -928,12 +943,12 @@ type keyWithDupError struct {
928
943
dupErr error
929
944
}
930
945
931
- func ( e * InsertExec ) getRecordIDs ( rows [][]types.Datum ) ([]int64 , error ) {
946
+ func getRecordIDs ( ctx context. Context , t table. Table , rows [][]types.Datum ) ([]int64 , error ) {
932
947
recordIDs := make ([]int64 , 0 , len (rows ))
933
- if e . Table .Meta ().PKIsHandle {
948
+ if t .Meta ().PKIsHandle {
934
949
var handleCol * table.Column
935
- for _ , col := range e . Table .Cols () {
936
- if col .IsPKHandleColumn (e . Table .Meta ()) {
950
+ for _ , col := range t .Cols () {
951
+ if col .IsPKHandleColumn (t .Meta ()) {
937
952
handleCol = col
938
953
break
939
954
}
@@ -943,7 +958,7 @@ func (e *InsertExec) getRecordIDs(rows [][]types.Datum) ([]int64, error) {
943
958
}
944
959
} else {
945
960
for range rows {
946
- recordID , err := e . Table . AllocAutoID (e . ctx )
961
+ recordID , err := t . AllocAutoID (ctx )
947
962
if err != nil {
948
963
return nil , errors .Trace (err )
949
964
}
@@ -955,30 +970,30 @@ func (e *InsertExec) getRecordIDs(rows [][]types.Datum) ([]int64, error) {
955
970
956
971
// getKeysNeedCheck gets keys converted from to-be-insert rows to record keys and unique index keys,
957
972
// which need to be checked whether they are duplicate keys.
958
- func ( e * InsertExec ) getKeysNeedCheck ( rows [][]types.Datum ) ([][]keyWithDupError , error ) {
973
+ func getKeysNeedCheck ( ctx context. Context , t table. Table , rows [][]types.Datum ) ([][]keyWithDupError , error ) {
959
974
nUnique := 0
960
- for _ , v := range e . Table .WritableIndices () {
975
+ for _ , v := range t .WritableIndices () {
961
976
if v .Meta ().Unique {
962
977
nUnique ++
963
978
}
964
979
}
965
980
rowWithKeys := make ([][]keyWithDupError , 0 , len (rows ))
966
981
967
982
// get recordIDs
968
- recordIDs , err := e . getRecordIDs (rows )
983
+ recordIDs , err := getRecordIDs (ctx , t , rows )
969
984
if err != nil {
970
985
return nil , errors .Trace (err )
971
986
}
972
987
973
988
for i , row := range rows {
974
989
keysWithErr := make ([]keyWithDupError , 0 , nUnique + 1 )
975
990
// append record keys and errors
976
- if e . Table .Meta ().PKIsHandle {
977
- keysWithErr = append (keysWithErr , keyWithDupError {e . Table .RecordKey (recordIDs [i ]), kv .ErrKeyExists .FastGen ("Duplicate entry '%d' for key 'PRIMARY'" , recordIDs [i ])})
991
+ if t .Meta ().PKIsHandle {
992
+ keysWithErr = append (keysWithErr , keyWithDupError {t .RecordKey (recordIDs [i ]), kv .ErrKeyExists .FastGen ("Duplicate entry '%d' for key 'PRIMARY'" , recordIDs [i ])})
978
993
}
979
994
980
995
// append unique keys and errors
981
- for _ , v := range e . Table .WritableIndices () {
996
+ for _ , v := range t .WritableIndices () {
982
997
if ! v .Meta ().Unique {
983
998
continue
984
999
}
@@ -989,7 +1004,7 @@ func (e *InsertExec) getKeysNeedCheck(rows [][]types.Datum) ([][]keyWithDupError
989
1004
}
990
1005
var key []byte
991
1006
var distinct bool
992
- key , distinct , err = v .GenIndexKey (e . ctx .GetSessionVars ().StmtCtx ,
1007
+ key , distinct , err = v .GenIndexKey (ctx .GetSessionVars ().StmtCtx ,
993
1008
colVals , recordIDs [i ], nil )
994
1009
if err != nil {
995
1010
return nil , errors .Trace (err )
@@ -1007,9 +1022,9 @@ func (e *InsertExec) getKeysNeedCheck(rows [][]types.Datum) ([][]keyWithDupError
1007
1022
// batchMarkDupRows marks rows with duplicate errors as nil.
1008
1023
// All duplicate rows were marked and appended as duplicate warnings
1009
1024
// to the statement context in batch.
1010
- func ( e * InsertExec ) batchMarkDupRows ( rows [][]types.Datum ) ([][]types.Datum , error ) {
1025
+ func batchMarkDupRows ( ctx context. Context , t table. Table , rows [][]types.Datum ) ([][]types.Datum , error ) {
1011
1026
// get keys need to be checked
1012
- rowWithKeys , err := e . getKeysNeedCheck (rows )
1027
+ rowWithKeys , err := getKeysNeedCheck (ctx , t , rows )
1013
1028
1014
1029
// batch get values
1015
1030
nKeys := 0
@@ -1022,7 +1037,7 @@ func (e *InsertExec) batchMarkDupRows(rows [][]types.Datum) ([][]types.Datum, er
1022
1037
batchKeys = append (batchKeys , k .key )
1023
1038
}
1024
1039
}
1025
- values , err := e . ctx .Txn ().GetSnapshot ().BatchGet (batchKeys )
1040
+ values , err := ctx .Txn ().GetSnapshot ().BatchGet (batchKeys )
1026
1041
if err != nil {
1027
1042
return nil , errors .Trace (err )
1028
1043
}
@@ -1033,7 +1048,7 @@ func (e *InsertExec) batchMarkDupRows(rows [][]types.Datum) ([][]types.Datum, er
1033
1048
if _ , found := values [string (k .key )]; found {
1034
1049
// If duplicate keys were found in BatchGet, mark row = nil.
1035
1050
rows [i ] = nil
1036
- e . ctx .GetSessionVars ().StmtCtx .AppendWarning (k .dupErr )
1051
+ ctx .GetSessionVars ().StmtCtx .AppendWarning (k .dupErr )
1037
1052
break
1038
1053
}
1039
1054
}
@@ -1048,7 +1063,7 @@ func (e *InsertExec) batchMarkDupRows(rows [][]types.Datum) ([][]types.Datum, er
1048
1063
}
1049
1064
1050
1065
// this statement was already been checked
1051
- e . ctx .GetSessionVars ().StmtCtx .BatchCheck = true
1066
+ ctx .GetSessionVars ().StmtCtx .BatchCheck = true
1052
1067
return rows , nil
1053
1068
}
1054
1069
@@ -1240,7 +1255,7 @@ func (e *InsertValues) getRows(cols []*table.Column, ignoreErr bool) (rows [][]t
1240
1255
if err = e .checkValueCount (length , len (list ), len (e .GenColumns ), i , cols ); err != nil {
1241
1256
return nil , errors .Trace (err )
1242
1257
}
1243
- e .currRow = int64 (i )
1258
+ e .rowCount = uint64 (i )
1244
1259
rows [i ], err = e .getRow (cols , list , ignoreErr )
1245
1260
if err != nil {
1246
1261
return nil , errors .Trace (err )
@@ -1320,7 +1335,7 @@ func (e *InsertValues) getRowsSelect(goCtx goctx.Context, cols []*table.Column,
1320
1335
if innerRow == nil {
1321
1336
break
1322
1337
}
1323
- e .currRow = int64 (len (rows ))
1338
+ e .rowCount = uint64 (len (rows ))
1324
1339
row , err := e .fillRowData (cols , innerRow , ignoreErr )
1325
1340
if err != nil {
1326
1341
return nil , errors .Trace (err )
@@ -1350,7 +1365,7 @@ func (e *InsertValues) getRowsSelectChunk(goCtx goctx.Context, cols []*table.Col
1350
1365
1351
1366
for innerChunkRow := chk .Begin (); innerChunkRow != chk .End (); innerChunkRow = innerChunkRow .Next () {
1352
1367
innerRow := innerChunkRow .GetDatumRow (fields )
1353
- e .currRow = int64 (len (rows ))
1368
+ e .rowCount = uint64 (len (rows ))
1354
1369
row , err := e .fillRowData (cols , innerRow , ignoreErr )
1355
1370
if err != nil {
1356
1371
return nil , errors .Trace (err )
@@ -1519,7 +1534,7 @@ func (e *InsertValues) adjustAutoIncrementDatum(row []types.Datum, i int, c *tab
1519
1534
return errors .Trace (err )
1520
1535
}
1521
1536
// It's compatible with mysql. So it sets last insert id to the first row.
1522
- if e .currRow == 0 {
1537
+ if e .rowCount == 0 {
1523
1538
e .lastInsertID = uint64 (recordID )
1524
1539
}
1525
1540
}
0 commit comments