Skip to content

Commit 459917c

Browse files
authored
executor: fix load data panic if the data is broken at escape character (#30868) (#31773)
close #31589
1 parent d3a3830 commit 459917c

File tree

2 files changed

+29
-109
lines changed

2 files changed

+29
-109
lines changed

executor/load_data.go

Lines changed: 27 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -363,65 +363,20 @@ func (e *LoadDataInfo) SetMaxRowsInBatch(limit uint64) {
363363
e.curBatchCnt = 0
364364
}
365365

366-
// getValidData returns prevData and curData that starts from starting symbol.
367-
// If the data doesn't have starting symbol, prevData is nil and curData is curData[len(curData)-startingLen+1:].
368-
// If curData size less than startingLen, curData is returned directly.
369-
func (e *LoadDataInfo) getValidData(prevData, curData []byte) ([]byte, []byte) {
370-
startingLen := len(e.LinesInfo.Starting)
371-
if startingLen == 0 {
372-
return prevData, curData
373-
}
374-
375-
prevLen := len(prevData)
376-
if prevLen > 0 {
377-
// starting symbol in the prevData
378-
idx := strings.Index(string(hack.String(prevData)), e.LinesInfo.Starting)
379-
if idx != -1 {
380-
return prevData[idx:], curData
381-
}
382-
383-
// starting symbol in the middle of prevData and curData
384-
restStart := curData
385-
if len(curData) >= startingLen {
386-
restStart = curData[:startingLen-1]
387-
}
388-
prevData = append(prevData, restStart...)
389-
idx = strings.Index(string(hack.String(prevData)), e.LinesInfo.Starting)
390-
if idx != -1 {
391-
return prevData[idx:prevLen], curData
392-
}
393-
}
394-
395-
// starting symbol in the curData
366+
// getValidData returns curData that starts from starting symbol.
367+
// If the data doesn't have starting symbol, return curData[len(curData)-startingLen+1:] and false.
368+
func (e *LoadDataInfo) getValidData(curData []byte) ([]byte, bool) {
396369
idx := strings.Index(string(hack.String(curData)), e.LinesInfo.Starting)
397-
if idx != -1 {
398-
return nil, curData[idx:]
370+
if idx == -1 {
371+
return curData[len(curData)-len(e.LinesInfo.Starting)+1:], false
399372
}
400373

401-
// no starting symbol
402-
if len(curData) >= startingLen {
403-
curData = curData[len(curData)-startingLen+1:]
404-
}
405-
return nil, curData
406-
}
407-
408-
func (e *LoadDataInfo) isInQuoter(bs []byte) bool {
409-
inQuoter := false
410-
for i := 0; i < len(bs); i++ {
411-
switch bs[i] {
412-
case e.FieldsInfo.Enclosed:
413-
inQuoter = !inQuoter
414-
case e.FieldsInfo.Escaped:
415-
i++
416-
default:
417-
}
418-
}
419-
return inQuoter
374+
return curData[idx:], true
420375
}
421376

422377
// indexOfTerminator return index of terminator, if not, return -1.
423378
// normally, the field terminator and line terminator is short, so we just use brute force algorithm.
424-
func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int {
379+
func (e *LoadDataInfo) indexOfTerminator(bs []byte) int {
425380
fieldTerm := []byte(e.FieldsInfo.Terminated)
426381
fieldTermLen := len(fieldTerm)
427382
lineTerm := []byte(e.LinesInfo.Terminated)
@@ -462,15 +417,13 @@ func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int {
462417
inQuoter := false
463418
loop:
464419
for i := 0; i < len(bs); i++ {
465-
if atFieldStart && bs[i] == e.FieldsInfo.Enclosed {
466-
if !isInQuoter {
467-
inQuoter = true
468-
}
420+
if atFieldStart && e.FieldsInfo.Enclosed != byte(0) && bs[i] == e.FieldsInfo.Enclosed {
421+
inQuoter = !inQuoter
469422
atFieldStart = false
470423
continue
471424
}
472425
restLen := len(bs) - i - 1
473-
if inQuoter && bs[i] == e.FieldsInfo.Enclosed {
426+
if inQuoter && e.FieldsInfo.Enclosed != byte(0) && bs[i] == e.FieldsInfo.Enclosed {
474427
// look ahead to see if it is end of line or field.
475428
switch cmpTerm(restLen, bs[i+1:]) {
476429
case lineTermType:
@@ -508,67 +461,32 @@ loop:
508461
// getLine returns a line, curData, the next data start index and a bool value.
509462
// If it has starting symbol the bool is true, otherwise is false.
510463
func (e *LoadDataInfo) getLine(prevData, curData []byte, ignore bool) ([]byte, []byte, bool) {
511-
startingLen := len(e.LinesInfo.Starting)
512-
prevData, curData = e.getValidData(prevData, curData)
513-
if prevData == nil && len(curData) < startingLen {
514-
return nil, curData, false
515-
}
516-
inquotor := e.isInQuoter(prevData)
517-
prevLen := len(prevData)
518-
terminatedLen := len(e.LinesInfo.Terminated)
519-
curStartIdx := 0
520-
if prevLen < startingLen {
521-
curStartIdx = startingLen - prevLen
522-
}
523-
endIdx := -1
524-
if len(curData) >= curStartIdx {
525-
if ignore {
526-
endIdx = strings.Index(string(hack.String(curData[curStartIdx:])), e.LinesInfo.Terminated)
527-
} else {
528-
endIdx = e.indexOfTerminator(curData[curStartIdx:], inquotor)
529-
}
530-
}
531-
if endIdx == -1 {
532-
// no terminated symbol
533-
if len(prevData) == 0 {
534-
return nil, curData, true
535-
}
536-
537-
// terminated symbol in the middle of prevData and curData
464+
if prevData != nil {
538465
curData = append(prevData, curData...)
539-
if ignore {
540-
endIdx = strings.Index(string(hack.String(curData[startingLen:])), e.LinesInfo.Terminated)
541-
} else {
542-
endIdx = e.indexOfTerminator(curData[startingLen:], inquotor)
466+
}
467+
startLen := len(e.LinesInfo.Starting)
468+
if startLen != 0 {
469+
if len(curData) < startLen {
470+
return nil, curData, false
543471
}
544-
if endIdx != -1 {
545-
nextDataIdx := startingLen + endIdx + terminatedLen
546-
return curData[startingLen : startingLen+endIdx], curData[nextDataIdx:], true
472+
var ok bool
473+
curData, ok = e.getValidData(curData)
474+
if !ok {
475+
return nil, curData, false
547476
}
548-
// no terminated symbol
549-
return nil, curData, true
550-
}
551-
552-
// terminated symbol in the curData
553-
nextDataIdx := curStartIdx + endIdx + terminatedLen
554-
if len(prevData) == 0 {
555-
return curData[curStartIdx : curStartIdx+endIdx], curData[nextDataIdx:], true
556477
}
557-
558-
// terminated symbol in the curData
559-
prevData = append(prevData, curData[:nextDataIdx]...)
478+
var endIdx int
560479
if ignore {
561-
endIdx = strings.Index(string(hack.String(prevData[startingLen:])), e.LinesInfo.Terminated)
480+
endIdx = strings.Index(string(hack.String(curData[startLen:])), e.LinesInfo.Terminated)
562481
} else {
563-
endIdx = e.indexOfTerminator(prevData[startingLen:], inquotor)
482+
endIdx = e.indexOfTerminator(curData[startLen:])
564483
}
565-
if endIdx >= prevLen {
566-
return prevData[startingLen : startingLen+endIdx], curData[nextDataIdx:], true
484+
485+
if endIdx == -1 {
486+
return nil, curData, true
567487
}
568488

569-
// terminated symbol in the middle of prevData and curData
570-
lineLen := startingLen + endIdx + terminatedLen
571-
return prevData[startingLen : startingLen+endIdx], curData[lineLen-prevLen:], true
489+
return curData[startLen : startLen+endIdx], curData[startLen+endIdx+len(e.LinesInfo.Terminated):], true
572490
}
573491

574492
// InsertData inserts data into specified table according to the specified format.

executor/write_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,6 +2326,8 @@ func (s *testSuite4) TestLoadDataEscape(c *C) {
23262326
{nil, []byte("7\trtn0ZbN\n"), []string{"7|" + string([]byte{'r', 't', 'n', '0', 'Z', 'b', 'N'})}, nil, trivialMsg},
23272327
{nil, []byte("8\trtn0Zb\\N\n"), []string{"8|" + string([]byte{'r', 't', 'n', '0', 'Z', 'b', 'N'})}, nil, trivialMsg},
23282328
{nil, []byte("9\ttab\\ tab\n"), []string{"9|tab tab"}, nil, trivialMsg},
2329+
// data broken at escape character.
2330+
{[]byte("1\ta string\\"), []byte("\n1\n"), []string{"1|a string\n1"}, nil, trivialMsg},
23292331
}
23302332
deleteSQL := "delete from load_data_test"
23312333
selectSQL := "select * from load_data_test;"

0 commit comments

Comments
 (0)