Skip to content

Commit 5315279

Browse files
authored
executor: Optimize slow log parsing's splitByColon function (#54630) (#55794)
close #54538
1 parent 083a1bf commit 5315279

File tree

2 files changed

+120
-24
lines changed

2 files changed

+120
-24
lines changed

pkg/executor/slow_query.go

Lines changed: 89 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"io"
2323
"os"
2424
"path/filepath"
25-
"regexp"
2625
"runtime"
2726
"slices"
2827
"strconv"
@@ -523,32 +522,99 @@ func getLineIndex(offset offset, index int) int {
523522
return fileLine
524523
}
525524

526-
// kvSplitRegex: it was just for split "field: value field: value..."
527-
var kvSplitRegex = regexp.MustCompile(`\w+: `)
525+
// findMatchedRightBracket returns the rightBracket index which matchs line[leftBracketIdx]
526+
// leftBracketIdx should be valid string index for line
527+
// Returns -1 if invalid inputs are given
528+
func findMatchedRightBracket(line string, leftBracketIdx int) int {
529+
leftBracket := line[leftBracketIdx]
530+
rightBracket := byte('}')
531+
if leftBracket == '[' {
532+
rightBracket = ']'
533+
} else if leftBracket != '{' {
534+
return -1
535+
}
536+
lineLength := len(line)
537+
current := leftBracketIdx
538+
leftBracketCnt := 0
539+
for current < lineLength {
540+
b := line[current]
541+
if b == leftBracket {
542+
leftBracketCnt++
543+
current++
544+
} else if b == rightBracket {
545+
leftBracketCnt--
546+
if leftBracketCnt > 0 {
547+
current++
548+
} else if leftBracketCnt == 0 {
549+
if current+1 < lineLength && line[current+1] != ' ' {
550+
return -1
551+
}
552+
return current
553+
} else {
554+
return -1
555+
}
556+
} else {
557+
current++
558+
}
559+
}
560+
return -1
561+
}
562+
563+
func isLetterOrNumeric(b byte) bool {
564+
return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') || ('0' <= b && b <= '9')
565+
}
528566

529567
// splitByColon split a line like "field: value field: value..."
568+
// Note:
569+
// 1. field string's first character can only be ASCII letters or digits, and can't contain ':'
570+
// 2. value string may be surrounded by brackets, allowed brackets includes "[]" and "{}", like {key: value,{key: value}}
571+
// "[]" can only be nested inside "[]"; "{}" can only be nested inside "{}"
572+
// 3. value string can't contain ' ' character unless it is inside brackets
530573
func splitByColon(line string) (fields []string, values []string) {
531-
matches := kvSplitRegex.FindAllStringIndex(line, -1)
532-
fields = make([]string, 0, len(matches))
533-
values = make([]string, 0, len(matches))
534-
535-
beg := 0
536-
end := 0
537-
for _, match := range matches {
538-
// trim ": "
539-
fields = append(fields, line[match[0]:match[1]-2])
540-
541-
end = match[0]
542-
if beg != 0 {
543-
// trim " "
544-
values = append(values, line[beg:end-1])
545-
}
546-
beg = match[1]
574+
fields = make([]string, 0, 1)
575+
values = make([]string, 0, 1)
576+
577+
lineLength := len(line)
578+
parseKey := true
579+
start := 0
580+
errMsg := ""
581+
for current := 0; current < lineLength; {
582+
if parseKey {
583+
// Find key start
584+
for current < lineLength && !isLetterOrNumeric(line[current]) {
585+
current++
586+
}
587+
start = current
588+
if current >= lineLength {
589+
break
590+
}
591+
for current < lineLength && line[current] != ':' {
592+
current++
593+
}
594+
fields = append(fields, line[start:current])
595+
parseKey = false
596+
current += 2 // bypass ": "
597+
} else {
598+
start = current
599+
if current < lineLength && (line[current] == '{' || line[current] == '[') {
600+
rBraceIdx := findMatchedRightBracket(line, current)
601+
if rBraceIdx == -1 {
602+
errMsg = "Braces matched error"
603+
break
604+
}
605+
current = rBraceIdx + 1
606+
} else {
607+
for current < lineLength && line[current] != ' ' {
608+
current++
609+
}
610+
}
611+
values = append(values, line[start:min(current, len(line))])
612+
parseKey = true
613+
}
547614
}
548-
549-
if end != len(line) {
550-
// " " does not exist in the end
551-
values = append(values, line[beg:])
615+
if len(errMsg) > 0 {
616+
logutil.BgLogger().Warn("slow query parse slow log error", zap.String("Error", errMsg), zap.String("Log", line))
617+
return nil, nil
552618
}
553619
return fields, values
554620
}

pkg/executor/slow_query_test.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,8 +532,8 @@ func TestSplitbyColon(t *testing.T) {
532532
},
533533
{
534534
"123a",
535-
[]string{},
536535
[]string{"123a"},
536+
[]string{},
537537
},
538538
{
539539
"1a: 2b",
@@ -550,6 +550,36 @@ func TestSplitbyColon(t *testing.T) {
550550
[]string{"1a", "4d"},
551551
[]string{"[2b,3c]", "5e"},
552552
},
553+
{
554+
"1a: [2b,[3c: 3cc]] 4d: 5e",
555+
[]string{"1a", "4d"},
556+
[]string{"[2b,[3c: 3cc]]", "5e"},
557+
},
558+
{
559+
"1a: {2b 3c} 4d: 5e",
560+
[]string{"1a", "4d"},
561+
[]string{"{2b 3c}", "5e"},
562+
},
563+
{
564+
"1a: {2b,3c} 4d: 5e",
565+
[]string{"1a", "4d"},
566+
[]string{"{2b,3c}", "5e"},
567+
},
568+
{
569+
"1a: {2b,{3c: 3cc}} 4d: 5e",
570+
[]string{"1a", "4d"},
571+
[]string{"{2b,{3c: 3cc}}", "5e"},
572+
},
573+
{
574+
"1a: {{{2b,{3c: 3cc}} 4d: 5e",
575+
nil,
576+
nil,
577+
},
578+
{
579+
"1a: [2b,[3c: 3cc]]]] 4d: 5e",
580+
nil,
581+
nil,
582+
},
553583
{
554584

555585
"Time: 2021-09-08T14:39:54.506967433+08:00",

0 commit comments

Comments
 (0)