@@ -363,65 +363,20 @@ func (e *LoadDataInfo) SetMaxRowsInBatch(limit uint64) {
363
363
e .curBatchCnt = 0
364
364
}
365
365
366
- // getValidData returns prevData and curData that starts from starting symbol.
367
- // If the data doesn't have starting symbol, prevData is nil and curData is curData[len(curData)-startingLen+1:].
368
- // If curData size less than startingLen, curData is returned directly.
369
- func (e * LoadDataInfo ) getValidData (prevData , curData []byte ) ([]byte , []byte ) {
370
- startingLen := len (e .LinesInfo .Starting )
371
- if startingLen == 0 {
372
- return prevData , curData
373
- }
374
-
375
- prevLen := len (prevData )
376
- if prevLen > 0 {
377
- // starting symbol in the prevData
378
- idx := strings .Index (string (hack .String (prevData )), e .LinesInfo .Starting )
379
- if idx != - 1 {
380
- return prevData [idx :], curData
381
- }
382
-
383
- // starting symbol in the middle of prevData and curData
384
- restStart := curData
385
- if len (curData ) >= startingLen {
386
- restStart = curData [:startingLen - 1 ]
387
- }
388
- prevData = append (prevData , restStart ... )
389
- idx = strings .Index (string (hack .String (prevData )), e .LinesInfo .Starting )
390
- if idx != - 1 {
391
- return prevData [idx :prevLen ], curData
392
- }
393
- }
394
-
395
- // starting symbol in the curData
366
+ // getValidData returns curData that starts from starting symbol.
367
+ // If the data doesn't have starting symbol, return curData[len(curData)-startingLen+1:] and false.
368
+ func (e * LoadDataInfo ) getValidData (curData []byte ) ([]byte , bool ) {
396
369
idx := strings .Index (string (hack .String (curData )), e .LinesInfo .Starting )
397
- if idx ! = - 1 {
398
- return nil , curData [idx :]
370
+ if idx = = - 1 {
371
+ return curData [len ( curData ) - len ( e . LinesInfo . Starting ) + 1 :], false
399
372
}
400
373
401
- // no starting symbol
402
- if len (curData ) >= startingLen {
403
- curData = curData [len (curData )- startingLen + 1 :]
404
- }
405
- return nil , curData
406
- }
407
-
408
- func (e * LoadDataInfo ) isInQuoter (bs []byte ) bool {
409
- inQuoter := false
410
- for i := 0 ; i < len (bs ); i ++ {
411
- switch bs [i ] {
412
- case e .FieldsInfo .Enclosed :
413
- inQuoter = ! inQuoter
414
- case e .FieldsInfo .Escaped :
415
- i ++
416
- default :
417
- }
418
- }
419
- return inQuoter
374
+ return curData [idx :], true
420
375
}
421
376
422
377
// indexOfTerminator return index of terminator, if not, return -1.
423
378
// normally, the field terminator and line terminator is short, so we just use brute force algorithm.
424
- func (e * LoadDataInfo ) indexOfTerminator (bs []byte , isInQuoter bool ) int {
379
+ func (e * LoadDataInfo ) indexOfTerminator (bs []byte ) int {
425
380
fieldTerm := []byte (e .FieldsInfo .Terminated )
426
381
fieldTermLen := len (fieldTerm )
427
382
lineTerm := []byte (e .LinesInfo .Terminated )
@@ -462,15 +417,13 @@ func (e *LoadDataInfo) indexOfTerminator(bs []byte, isInQuoter bool) int {
462
417
inQuoter := false
463
418
loop:
464
419
for i := 0 ; i < len (bs ); i ++ {
465
- if atFieldStart && bs [i ] == e .FieldsInfo .Enclosed {
466
- if ! isInQuoter {
467
- inQuoter = true
468
- }
420
+ if atFieldStart && e .FieldsInfo .Enclosed != byte (0 ) && bs [i ] == e .FieldsInfo .Enclosed {
421
+ inQuoter = ! inQuoter
469
422
atFieldStart = false
470
423
continue
471
424
}
472
425
restLen := len (bs ) - i - 1
473
- if inQuoter && bs [i ] == e .FieldsInfo .Enclosed {
426
+ if inQuoter && e . FieldsInfo . Enclosed != byte ( 0 ) && bs [i ] == e .FieldsInfo .Enclosed {
474
427
// look ahead to see if it is end of line or field.
475
428
switch cmpTerm (restLen , bs [i + 1 :]) {
476
429
case lineTermType :
@@ -508,67 +461,32 @@ loop:
508
461
// getLine returns a line, curData, the next data start index and a bool value.
509
462
// If it has starting symbol the bool is true, otherwise is false.
510
463
func (e * LoadDataInfo ) getLine (prevData , curData []byte , ignore bool ) ([]byte , []byte , bool ) {
511
- startingLen := len (e .LinesInfo .Starting )
512
- prevData , curData = e .getValidData (prevData , curData )
513
- if prevData == nil && len (curData ) < startingLen {
514
- return nil , curData , false
515
- }
516
- inquotor := e .isInQuoter (prevData )
517
- prevLen := len (prevData )
518
- terminatedLen := len (e .LinesInfo .Terminated )
519
- curStartIdx := 0
520
- if prevLen < startingLen {
521
- curStartIdx = startingLen - prevLen
522
- }
523
- endIdx := - 1
524
- if len (curData ) >= curStartIdx {
525
- if ignore {
526
- endIdx = strings .Index (string (hack .String (curData [curStartIdx :])), e .LinesInfo .Terminated )
527
- } else {
528
- endIdx = e .indexOfTerminator (curData [curStartIdx :], inquotor )
529
- }
530
- }
531
- if endIdx == - 1 {
532
- // no terminated symbol
533
- if len (prevData ) == 0 {
534
- return nil , curData , true
535
- }
536
-
537
- // terminated symbol in the middle of prevData and curData
464
+ if prevData != nil {
538
465
curData = append (prevData , curData ... )
539
- if ignore {
540
- endIdx = strings .Index (string (hack .String (curData [startingLen :])), e .LinesInfo .Terminated )
541
- } else {
542
- endIdx = e .indexOfTerminator (curData [startingLen :], inquotor )
466
+ }
467
+ startLen := len (e .LinesInfo .Starting )
468
+ if startLen != 0 {
469
+ if len (curData ) < startLen {
470
+ return nil , curData , false
543
471
}
544
- if endIdx != - 1 {
545
- nextDataIdx := startingLen + endIdx + terminatedLen
546
- return curData [startingLen : startingLen + endIdx ], curData [nextDataIdx :], true
472
+ var ok bool
473
+ curData , ok = e .getValidData (curData )
474
+ if ! ok {
475
+ return nil , curData , false
547
476
}
548
- // no terminated symbol
549
- return nil , curData , true
550
- }
551
-
552
- // terminated symbol in the curData
553
- nextDataIdx := curStartIdx + endIdx + terminatedLen
554
- if len (prevData ) == 0 {
555
- return curData [curStartIdx : curStartIdx + endIdx ], curData [nextDataIdx :], true
556
477
}
557
-
558
- // terminated symbol in the curData
559
- prevData = append (prevData , curData [:nextDataIdx ]... )
478
+ var endIdx int
560
479
if ignore {
561
- endIdx = strings .Index (string (hack .String (prevData [ startingLen :])), e .LinesInfo .Terminated )
480
+ endIdx = strings .Index (string (hack .String (curData [ startLen :])), e .LinesInfo .Terminated )
562
481
} else {
563
- endIdx = e .indexOfTerminator (prevData [ startingLen :], inquotor )
482
+ endIdx = e .indexOfTerminator (curData [ startLen :] )
564
483
}
565
- if endIdx >= prevLen {
566
- return prevData [startingLen : startingLen + endIdx ], curData [nextDataIdx :], true
484
+
485
+ if endIdx == - 1 {
486
+ return nil , curData , true
567
487
}
568
488
569
- // terminated symbol in the middle of prevData and curData
570
- lineLen := startingLen + endIdx + terminatedLen
571
- return prevData [startingLen : startingLen + endIdx ], curData [lineLen - prevLen :], true
489
+ return curData [startLen : startLen + endIdx ], curData [startLen + endIdx + len (e .LinesInfo .Terminated ):], true
572
490
}
573
491
574
492
// InsertData inserts data into specified table according to the specified format.
0 commit comments