Skip to content

Commit e71b6c3

Browse files
authored
executor: Optimize slow log parsing's splitByColon function (#54630) (#55795)
close #54538
1 parent 7497458 commit e71b6c3

File tree

2 files changed

+120
-24
lines changed

2 files changed

+120
-24
lines changed

pkg/executor/slow_query.go

Lines changed: 89 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"io"
2222
"os"
2323
"path/filepath"
24-
"regexp"
2524
"runtime"
2625
"slices"
2726
"strconv"
@@ -502,32 +501,99 @@ func getLineIndex(offset offset, index int) int {
502501
return fileLine
503502
}
504503

505-
// kvSplitRegex: it was just for split "field: value field: value..."
506-
var kvSplitRegex = regexp.MustCompile(`\w+: `)
504+
// findMatchedRightBracket returns the rightBracket index which matchs line[leftBracketIdx]
505+
// leftBracketIdx should be valid string index for line
506+
// Returns -1 if invalid inputs are given
507+
func findMatchedRightBracket(line string, leftBracketIdx int) int {
508+
leftBracket := line[leftBracketIdx]
509+
rightBracket := byte('}')
510+
if leftBracket == '[' {
511+
rightBracket = ']'
512+
} else if leftBracket != '{' {
513+
return -1
514+
}
515+
lineLength := len(line)
516+
current := leftBracketIdx
517+
leftBracketCnt := 0
518+
for current < lineLength {
519+
b := line[current]
520+
if b == leftBracket {
521+
leftBracketCnt++
522+
current++
523+
} else if b == rightBracket {
524+
leftBracketCnt--
525+
if leftBracketCnt > 0 {
526+
current++
527+
} else if leftBracketCnt == 0 {
528+
if current+1 < lineLength && line[current+1] != ' ' {
529+
return -1
530+
}
531+
return current
532+
} else {
533+
return -1
534+
}
535+
} else {
536+
current++
537+
}
538+
}
539+
return -1
540+
}
541+
542+
func isLetterOrNumeric(b byte) bool {
543+
return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') || ('0' <= b && b <= '9')
544+
}
507545

508546
// splitByColon split a line like "field: value field: value..."
547+
// Note:
548+
// 1. field string's first character can only be ASCII letters or digits, and can't contain ':'
549+
// 2. value string may be surrounded by brackets, allowed brackets includes "[]" and "{}", like {key: value,{key: value}}
550+
// "[]" can only be nested inside "[]"; "{}" can only be nested inside "{}"
551+
// 3. value string can't contain ' ' character unless it is inside brackets
509552
func splitByColon(line string) (fields []string, values []string) {
510-
matches := kvSplitRegex.FindAllStringIndex(line, -1)
511-
fields = make([]string, 0, len(matches))
512-
values = make([]string, 0, len(matches))
513-
514-
beg := 0
515-
end := 0
516-
for _, match := range matches {
517-
// trim ": "
518-
fields = append(fields, line[match[0]:match[1]-2])
519-
520-
end = match[0]
521-
if beg != 0 {
522-
// trim " "
523-
values = append(values, line[beg:end-1])
524-
}
525-
beg = match[1]
553+
fields = make([]string, 0, 1)
554+
values = make([]string, 0, 1)
555+
556+
lineLength := len(line)
557+
parseKey := true
558+
start := 0
559+
errMsg := ""
560+
for current := 0; current < lineLength; {
561+
if parseKey {
562+
// Find key start
563+
for current < lineLength && !isLetterOrNumeric(line[current]) {
564+
current++
565+
}
566+
start = current
567+
if current >= lineLength {
568+
break
569+
}
570+
for current < lineLength && line[current] != ':' {
571+
current++
572+
}
573+
fields = append(fields, line[start:current])
574+
parseKey = false
575+
current += 2 // bypass ": "
576+
} else {
577+
start = current
578+
if current < lineLength && (line[current] == '{' || line[current] == '[') {
579+
rBraceIdx := findMatchedRightBracket(line, current)
580+
if rBraceIdx == -1 {
581+
errMsg = "Braces matched error"
582+
break
583+
}
584+
current = rBraceIdx + 1
585+
} else {
586+
for current < lineLength && line[current] != ' ' {
587+
current++
588+
}
589+
}
590+
values = append(values, line[start:min(current, len(line))])
591+
parseKey = true
592+
}
526593
}
527-
528-
if end != len(line) {
529-
// " " does not exist in the end
530-
values = append(values, line[beg:])
594+
if len(errMsg) > 0 {
595+
logutil.BgLogger().Warn("slow query parse slow log error", zap.String("Error", errMsg), zap.String("Log", line))
596+
return nil, nil
531597
}
532598
return fields, values
533599
}

pkg/executor/slow_query_test.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,8 +508,8 @@ func TestSplitbyColon(t *testing.T) {
508508
},
509509
{
510510
"123a",
511-
[]string{},
512511
[]string{"123a"},
512+
[]string{},
513513
},
514514
{
515515
"1a: 2b",
@@ -526,6 +526,36 @@ func TestSplitbyColon(t *testing.T) {
526526
[]string{"1a", "4d"},
527527
[]string{"[2b,3c]", "5e"},
528528
},
529+
{
530+
"1a: [2b,[3c: 3cc]] 4d: 5e",
531+
[]string{"1a", "4d"},
532+
[]string{"[2b,[3c: 3cc]]", "5e"},
533+
},
534+
{
535+
"1a: {2b 3c} 4d: 5e",
536+
[]string{"1a", "4d"},
537+
[]string{"{2b 3c}", "5e"},
538+
},
539+
{
540+
"1a: {2b,3c} 4d: 5e",
541+
[]string{"1a", "4d"},
542+
[]string{"{2b,3c}", "5e"},
543+
},
544+
{
545+
"1a: {2b,{3c: 3cc}} 4d: 5e",
546+
[]string{"1a", "4d"},
547+
[]string{"{2b,{3c: 3cc}}", "5e"},
548+
},
549+
{
550+
"1a: {{{2b,{3c: 3cc}} 4d: 5e",
551+
nil,
552+
nil,
553+
},
554+
{
555+
"1a: [2b,[3c: 3cc]]]] 4d: 5e",
556+
nil,
557+
nil,
558+
},
529559
{
530560

531561
"Time: 2021-09-08T14:39:54.506967433+08:00",

0 commit comments

Comments
 (0)