Skip to content

Commit 2f84b4b

Browse files
authored
executor: Optimize slow log parsing's splitByColon function (#54630) (#55796)
close #54538
1 parent ff9162f commit 2f84b4b

File tree

2 files changed

+121
-24
lines changed

2 files changed

+121
-24
lines changed

executor/slow_query.go

Lines changed: 90 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"io"
2222
"os"
2323
"path/filepath"
24-
"regexp"
2524
"runtime"
2625
"strconv"
2726
"strings"
@@ -46,6 +45,7 @@ import (
4645
"github.com/pingcap/tidb/util/execdetails"
4746
"github.com/pingcap/tidb/util/hack"
4847
"github.com/pingcap/tidb/util/logutil"
48+
"github.com/pingcap/tidb/util/mathutil"
4949
"github.com/pingcap/tidb/util/memory"
5050
"github.com/pingcap/tidb/util/plancodec"
5151
"go.uber.org/zap"
@@ -502,32 +502,99 @@ func getLineIndex(offset offset, index int) int {
502502
return fileLine
503503
}
504504

505-
// kvSplitRegex: it was just for split "field: value field: value..."
506-
var kvSplitRegex = regexp.MustCompile(`\w+: `)
505+
// findMatchedRightBracket returns the rightBracket index which matchs line[leftBracketIdx]
506+
// leftBracketIdx should be valid string index for line
507+
// Returns -1 if invalid inputs are given
508+
func findMatchedRightBracket(line string, leftBracketIdx int) int {
509+
leftBracket := line[leftBracketIdx]
510+
rightBracket := byte('}')
511+
if leftBracket == '[' {
512+
rightBracket = ']'
513+
} else if leftBracket != '{' {
514+
return -1
515+
}
516+
lineLength := len(line)
517+
current := leftBracketIdx
518+
leftBracketCnt := 0
519+
for current < lineLength {
520+
b := line[current]
521+
if b == leftBracket {
522+
leftBracketCnt++
523+
current++
524+
} else if b == rightBracket {
525+
leftBracketCnt--
526+
if leftBracketCnt > 0 {
527+
current++
528+
} else if leftBracketCnt == 0 {
529+
if current+1 < lineLength && line[current+1] != ' ' {
530+
return -1
531+
}
532+
return current
533+
} else {
534+
return -1
535+
}
536+
} else {
537+
current++
538+
}
539+
}
540+
return -1
541+
}
542+
543+
func isLetterOrNumeric(b byte) bool {
544+
return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') || ('0' <= b && b <= '9')
545+
}
507546

508547
// splitByColon split a line like "field: value field: value..."
548+
// Note:
549+
// 1. field string's first character can only be ASCII letters or digits, and can't contain ':'
550+
// 2. value string may be surrounded by brackets, allowed brackets includes "[]" and "{}", like {key: value,{key: value}}
551+
// "[]" can only be nested inside "[]"; "{}" can only be nested inside "{}"
552+
// 3. value string can't contain ' ' character unless it is inside brackets
509553
func splitByColon(line string) (fields []string, values []string) {
510-
matches := kvSplitRegex.FindAllStringIndex(line, -1)
511-
fields = make([]string, 0, len(matches))
512-
values = make([]string, 0, len(matches))
513-
514-
beg := 0
515-
end := 0
516-
for _, match := range matches {
517-
// trim ": "
518-
fields = append(fields, line[match[0]:match[1]-2])
519-
520-
end = match[0]
521-
if beg != 0 {
522-
// trim " "
523-
values = append(values, line[beg:end-1])
524-
}
525-
beg = match[1]
554+
fields = make([]string, 0, 1)
555+
values = make([]string, 0, 1)
556+
557+
lineLength := len(line)
558+
parseKey := true
559+
start := 0
560+
errMsg := ""
561+
for current := 0; current < lineLength; {
562+
if parseKey {
563+
// Find key start
564+
for current < lineLength && !isLetterOrNumeric(line[current]) {
565+
current++
566+
}
567+
start = current
568+
if current >= lineLength {
569+
break
570+
}
571+
for current < lineLength && line[current] != ':' {
572+
current++
573+
}
574+
fields = append(fields, line[start:current])
575+
parseKey = false
576+
current += 2 // bypass ": "
577+
} else {
578+
start = current
579+
if current < lineLength && (line[current] == '{' || line[current] == '[') {
580+
rBraceIdx := findMatchedRightBracket(line, current)
581+
if rBraceIdx == -1 {
582+
errMsg = "Braces matched error"
583+
break
584+
}
585+
current = rBraceIdx + 1
586+
} else {
587+
for current < lineLength && line[current] != ' ' {
588+
current++
589+
}
590+
}
591+
values = append(values, line[start:mathutil.Min(current, len(line))])
592+
parseKey = true
593+
}
526594
}
527-
528-
if end != len(line) {
529-
// " " does not exist in the end
530-
values = append(values, line[beg:])
595+
if len(errMsg) > 0 {
596+
logutil.BgLogger().Warn("slow query parse slow log error", zap.String("Error", errMsg), zap.String("Log", line))
597+
return nil, nil
531598
}
532599
return fields, values
533600
}

executor/slow_query_test.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,8 @@ func TestSplitbyColon(t *testing.T) {
503503
},
504504
{
505505
"123a",
506-
[]string{},
507506
[]string{"123a"},
507+
[]string{},
508508
},
509509
{
510510
"1a: 2b",
@@ -521,6 +521,36 @@ func TestSplitbyColon(t *testing.T) {
521521
[]string{"1a", "4d"},
522522
[]string{"[2b,3c]", "5e"},
523523
},
524+
{
525+
"1a: [2b,[3c: 3cc]] 4d: 5e",
526+
[]string{"1a", "4d"},
527+
[]string{"[2b,[3c: 3cc]]", "5e"},
528+
},
529+
{
530+
"1a: {2b 3c} 4d: 5e",
531+
[]string{"1a", "4d"},
532+
[]string{"{2b 3c}", "5e"},
533+
},
534+
{
535+
"1a: {2b,3c} 4d: 5e",
536+
[]string{"1a", "4d"},
537+
[]string{"{2b,3c}", "5e"},
538+
},
539+
{
540+
"1a: {2b,{3c: 3cc}} 4d: 5e",
541+
[]string{"1a", "4d"},
542+
[]string{"{2b,{3c: 3cc}}", "5e"},
543+
},
544+
{
545+
"1a: {{{2b,{3c: 3cc}} 4d: 5e",
546+
nil,
547+
nil,
548+
},
549+
{
550+
"1a: [2b,[3c: 3cc]]]] 4d: 5e",
551+
nil,
552+
nil,
553+
},
524554
{
525555

526556
"Time: 2021-09-08T14:39:54.506967433+08:00",

0 commit comments

Comments
 (0)