Skip to content

Commit 062b861

Browse files
authored
executor: Optimize slow log parsing's splitByColon function (#54630) (#55797)
close #54538
1 parent fa1fa8e commit 062b861

File tree

2 files changed

+121
-24
lines changed

2 files changed

+121
-24
lines changed

executor/slow_query.go

Lines changed: 90 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"io"
2222
"os"
2323
"path/filepath"
24-
"regexp"
2524
"runtime"
2625
"strconv"
2726
"strings"
@@ -47,6 +46,7 @@ import (
4746
"github.com/pingcap/tidb/util/execdetails"
4847
"github.com/pingcap/tidb/util/hack"
4948
"github.com/pingcap/tidb/util/logutil"
49+
"github.com/pingcap/tidb/util/mathutil"
5050
"github.com/pingcap/tidb/util/memory"
5151
"github.com/pingcap/tidb/util/plancodec"
5252
"go.uber.org/zap"
@@ -527,32 +527,99 @@ func getLineIndex(offset offset, index int) int {
527527
return fileLine
528528
}
529529

530-
// kvSplitRegex: it was just for split "field: value field: value..."
531-
var kvSplitRegex = regexp.MustCompile(`\w+: `)
530+
// findMatchedRightBracket returns the rightBracket index which matchs line[leftBracketIdx]
531+
// leftBracketIdx should be valid string index for line
532+
// Returns -1 if invalid inputs are given
533+
func findMatchedRightBracket(line string, leftBracketIdx int) int {
534+
leftBracket := line[leftBracketIdx]
535+
rightBracket := byte('}')
536+
if leftBracket == '[' {
537+
rightBracket = ']'
538+
} else if leftBracket != '{' {
539+
return -1
540+
}
541+
lineLength := len(line)
542+
current := leftBracketIdx
543+
leftBracketCnt := 0
544+
for current < lineLength {
545+
b := line[current]
546+
if b == leftBracket {
547+
leftBracketCnt++
548+
current++
549+
} else if b == rightBracket {
550+
leftBracketCnt--
551+
if leftBracketCnt > 0 {
552+
current++
553+
} else if leftBracketCnt == 0 {
554+
if current+1 < lineLength && line[current+1] != ' ' {
555+
return -1
556+
}
557+
return current
558+
} else {
559+
return -1
560+
}
561+
} else {
562+
current++
563+
}
564+
}
565+
return -1
566+
}
567+
568+
func isLetterOrNumeric(b byte) bool {
569+
return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') || ('0' <= b && b <= '9')
570+
}
532571

533572
// splitByColon split a line like "field: value field: value..."
573+
// Note:
574+
// 1. field string's first character can only be ASCII letters or digits, and can't contain ':'
575+
// 2. value string may be surrounded by brackets, allowed brackets includes "[]" and "{}", like {key: value,{key: value}}
576+
// "[]" can only be nested inside "[]"; "{}" can only be nested inside "{}"
577+
// 3. value string can't contain ' ' character unless it is inside brackets
534578
func splitByColon(line string) (fields []string, values []string) {
535-
matches := kvSplitRegex.FindAllStringIndex(line, -1)
536-
fields = make([]string, 0, len(matches))
537-
values = make([]string, 0, len(matches))
538-
539-
beg := 0
540-
end := 0
541-
for _, match := range matches {
542-
// trim ": "
543-
fields = append(fields, line[match[0]:match[1]-2])
544-
545-
end = match[0]
546-
if beg != 0 {
547-
// trim " "
548-
values = append(values, line[beg:end-1])
549-
}
550-
beg = match[1]
579+
fields = make([]string, 0, 1)
580+
values = make([]string, 0, 1)
581+
582+
lineLength := len(line)
583+
parseKey := true
584+
start := 0
585+
errMsg := ""
586+
for current := 0; current < lineLength; {
587+
if parseKey {
588+
// Find key start
589+
for current < lineLength && !isLetterOrNumeric(line[current]) {
590+
current++
591+
}
592+
start = current
593+
if current >= lineLength {
594+
break
595+
}
596+
for current < lineLength && line[current] != ':' {
597+
current++
598+
}
599+
fields = append(fields, line[start:current])
600+
parseKey = false
601+
current += 2 // bypass ": "
602+
} else {
603+
start = current
604+
if current < lineLength && (line[current] == '{' || line[current] == '[') {
605+
rBraceIdx := findMatchedRightBracket(line, current)
606+
if rBraceIdx == -1 {
607+
errMsg = "Braces matched error"
608+
break
609+
}
610+
current = rBraceIdx + 1
611+
} else {
612+
for current < lineLength && line[current] != ' ' {
613+
current++
614+
}
615+
}
616+
values = append(values, line[start:mathutil.Min(current, len(line))])
617+
parseKey = true
618+
}
551619
}
552-
553-
if end != len(line) {
554-
// " " does not exist in the end
555-
values = append(values, line[beg:])
620+
if len(errMsg) > 0 {
621+
logutil.BgLogger().Warn("slow query parse slow log error", zap.String("Error", errMsg), zap.String("Log", line))
622+
return nil, nil
556623
}
557624
return fields, values
558625
}

executor/slow_query_test.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,8 +498,8 @@ func TestSplitbyColon(t *testing.T) {
498498
},
499499
{
500500
"123a",
501-
[]string{},
502501
[]string{"123a"},
502+
[]string{},
503503
},
504504
{
505505
"1a: 2b",
@@ -516,6 +516,36 @@ func TestSplitbyColon(t *testing.T) {
516516
[]string{"1a", "4d"},
517517
[]string{"[2b,3c]", "5e"},
518518
},
519+
{
520+
"1a: [2b,[3c: 3cc]] 4d: 5e",
521+
[]string{"1a", "4d"},
522+
[]string{"[2b,[3c: 3cc]]", "5e"},
523+
},
524+
{
525+
"1a: {2b 3c} 4d: 5e",
526+
[]string{"1a", "4d"},
527+
[]string{"{2b 3c}", "5e"},
528+
},
529+
{
530+
"1a: {2b,3c} 4d: 5e",
531+
[]string{"1a", "4d"},
532+
[]string{"{2b,3c}", "5e"},
533+
},
534+
{
535+
"1a: {2b,{3c: 3cc}} 4d: 5e",
536+
[]string{"1a", "4d"},
537+
[]string{"{2b,{3c: 3cc}}", "5e"},
538+
},
539+
{
540+
"1a: {{{2b,{3c: 3cc}} 4d: 5e",
541+
nil,
542+
nil,
543+
},
544+
{
545+
"1a: [2b,[3c: 3cc]]]] 4d: 5e",
546+
nil,
547+
nil,
548+
},
519549
{
520550

521551
"Time: 2021-09-08T14:39:54.506967433+08:00",

0 commit comments

Comments
 (0)