Skip to content

Commit 441a913

Browse files
authored
executor: Optimize slow log parsing's splitByColon function (#54630) (#55798)
close #54538
1 parent 02e85b2 commit 441a913

File tree

2 files changed

+121
-24
lines changed

2 files changed

+121
-24
lines changed

executor/slow_query.go

Lines changed: 90 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"io"
2222
"os"
2323
"path/filepath"
24-
"regexp"
2524
"runtime"
2625
"sort"
2726
"strconv"
@@ -47,6 +46,7 @@ import (
4746
"github.com/pingcap/tidb/util/execdetails"
4847
"github.com/pingcap/tidb/util/hack"
4948
"github.com/pingcap/tidb/util/logutil"
49+
"github.com/pingcap/tidb/util/mathutil"
5050
"github.com/pingcap/tidb/util/memory"
5151
"github.com/pingcap/tidb/util/plancodec"
5252
"go.uber.org/zap"
@@ -526,32 +526,99 @@ func getLineIndex(offset offset, index int) int {
526526
return fileLine
527527
}
528528

529-
// kvSplitRegex: it was just for split "field: value field: value..."
530-
var kvSplitRegex = regexp.MustCompile(`\w+: `)
529+
// findMatchedRightBracket returns the rightBracket index which matchs line[leftBracketIdx]
530+
// leftBracketIdx should be valid string index for line
531+
// Returns -1 if invalid inputs are given
532+
func findMatchedRightBracket(line string, leftBracketIdx int) int {
533+
leftBracket := line[leftBracketIdx]
534+
rightBracket := byte('}')
535+
if leftBracket == '[' {
536+
rightBracket = ']'
537+
} else if leftBracket != '{' {
538+
return -1
539+
}
540+
lineLength := len(line)
541+
current := leftBracketIdx
542+
leftBracketCnt := 0
543+
for current < lineLength {
544+
b := line[current]
545+
if b == leftBracket {
546+
leftBracketCnt++
547+
current++
548+
} else if b == rightBracket {
549+
leftBracketCnt--
550+
if leftBracketCnt > 0 {
551+
current++
552+
} else if leftBracketCnt == 0 {
553+
if current+1 < lineLength && line[current+1] != ' ' {
554+
return -1
555+
}
556+
return current
557+
} else {
558+
return -1
559+
}
560+
} else {
561+
current++
562+
}
563+
}
564+
return -1
565+
}
566+
567+
func isLetterOrNumeric(b byte) bool {
568+
return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') || ('0' <= b && b <= '9')
569+
}
531570

532571
// splitByColon split a line like "field: value field: value..."
572+
// Note:
573+
// 1. field string's first character can only be ASCII letters or digits, and can't contain ':'
574+
// 2. value string may be surrounded by brackets, allowed brackets includes "[]" and "{}", like {key: value,{key: value}}
575+
// "[]" can only be nested inside "[]"; "{}" can only be nested inside "{}"
576+
// 3. value string can't contain ' ' character unless it is inside brackets
533577
func splitByColon(line string) (fields []string, values []string) {
534-
matches := kvSplitRegex.FindAllStringIndex(line, -1)
535-
fields = make([]string, 0, len(matches))
536-
values = make([]string, 0, len(matches))
537-
538-
beg := 0
539-
end := 0
540-
for _, match := range matches {
541-
// trim ": "
542-
fields = append(fields, line[match[0]:match[1]-2])
543-
544-
end = match[0]
545-
if beg != 0 {
546-
// trim " "
547-
values = append(values, line[beg:end-1])
548-
}
549-
beg = match[1]
578+
fields = make([]string, 0, 1)
579+
values = make([]string, 0, 1)
580+
581+
lineLength := len(line)
582+
parseKey := true
583+
start := 0
584+
errMsg := ""
585+
for current := 0; current < lineLength; {
586+
if parseKey {
587+
// Find key start
588+
for current < lineLength && !isLetterOrNumeric(line[current]) {
589+
current++
590+
}
591+
start = current
592+
if current >= lineLength {
593+
break
594+
}
595+
for current < lineLength && line[current] != ':' {
596+
current++
597+
}
598+
fields = append(fields, line[start:current])
599+
parseKey = false
600+
current += 2 // bypass ": "
601+
} else {
602+
start = current
603+
if current < lineLength && (line[current] == '{' || line[current] == '[') {
604+
rBraceIdx := findMatchedRightBracket(line, current)
605+
if rBraceIdx == -1 {
606+
errMsg = "Braces matched error"
607+
break
608+
}
609+
current = rBraceIdx + 1
610+
} else {
611+
for current < lineLength && line[current] != ' ' {
612+
current++
613+
}
614+
}
615+
values = append(values, line[start:mathutil.Min(current, len(line))])
616+
parseKey = true
617+
}
550618
}
551-
552-
if end != len(line) {
553-
// " " does not exist in the end
554-
values = append(values, line[beg:])
619+
if len(errMsg) > 0 {
620+
logutil.BgLogger().Warn("slow query parse slow log error", zap.String("Error", errMsg), zap.String("Log", line))
621+
return nil, nil
555622
}
556623
return fields, values
557624
}

executor/slow_query_test.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,8 @@ func TestSplitbyColon(t *testing.T) {
498498
},
499499
{
500500
"123a",
501-
[]string{},
502501
[]string{"123a"},
502+
[]string{},
503503
},
504504
{
505505
"1a: 2b",
@@ -516,6 +516,36 @@ func TestSplitbyColon(t *testing.T) {
516516
[]string{"1a", "4d"},
517517
[]string{"[2b,3c]", "5e"},
518518
},
519+
{
520+
"1a: [2b,[3c: 3cc]] 4d: 5e",
521+
[]string{"1a", "4d"},
522+
[]string{"[2b,[3c: 3cc]]", "5e"},
523+
},
524+
{
525+
"1a: {2b 3c} 4d: 5e",
526+
[]string{"1a", "4d"},
527+
[]string{"{2b 3c}", "5e"},
528+
},
529+
{
530+
"1a: {2b,3c} 4d: 5e",
531+
[]string{"1a", "4d"},
532+
[]string{"{2b,3c}", "5e"},
533+
},
534+
{
535+
"1a: {2b,{3c: 3cc}} 4d: 5e",
536+
[]string{"1a", "4d"},
537+
[]string{"{2b,{3c: 3cc}}", "5e"},
538+
},
539+
{
540+
"1a: {{{2b,{3c: 3cc}} 4d: 5e",
541+
nil,
542+
nil,
543+
},
544+
{
545+
"1a: [2b,[3c: 3cc]]]] 4d: 5e",
546+
nil,
547+
nil,
548+
},
519549
{
520550

521551
"Time: 2021-09-08T14:39:54.506967433+08:00",

0 commit comments

Comments
 (0)