Skip to content

Commit aa9b6f4

Browse files
author
yibin
authored
util: add tiflash wait time in execution summary (#57058)
ref pingcap/tiflash#9489, close #57059
1 parent 8832684 commit aa9b6f4

File tree

5 files changed

+119
-26
lines changed

5 files changed

+119
-26
lines changed

DEPS.bzl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5828,13 +5828,13 @@ def go_deps():
58285828
name = "com_github_pingcap_tipb",
58295829
build_file_proto_mode = "disable_global",
58305830
importpath = "github.com/pingcap/tipb",
5831-
sha256 = "b39e154272ba36d145c6049947a012a76be740b32a44a46d7253caa145c56cc9",
5832-
strip_prefix = "github.com/pingcap/[email protected]20241008083645-0bcddae67837",
5831+
sha256 = "1b707429b5b938a05b250b5770be2a6aa243d6a4983d23b01bbca164e86b3e3c",
5832+
strip_prefix = "github.com/pingcap/[email protected]20241022082558-0607513e7fa4",
58335833
urls = [
5834-
"http://bazel-cache.pingcap.net:8080/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241008083645-0bcddae67837.zip",
5835-
"http://ats.apps.svc/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241008083645-0bcddae67837.zip",
5836-
"https://cache.hawkingrei.com/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241008083645-0bcddae67837.zip",
5837-
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241008083645-0bcddae67837.zip",
5834+
"http://bazel-cache.pingcap.net:8080/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241022082558-0607513e7fa4.zip",
5835+
"http://ats.apps.svc/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241022082558-0607513e7fa4.zip",
5836+
"https://cache.hawkingrei.com/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241022082558-0607513e7fa4.zip",
5837+
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/pingcap/tipb/com_github_pingcap_tipb-v0.0.0-20241022082558-0607513e7fa4.zip",
58385838
],
58395839
)
58405840
go_repository(

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ require (
9191
github.com/pingcap/log v1.1.1-0.20240314023424-862ccc32f18d
9292
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5
9393
github.com/pingcap/tidb/pkg/parser v0.0.0-20211011031125-9b13dc409c5e
94-
github.com/pingcap/tipb v0.0.0-20241008083645-0bcddae67837
94+
github.com/pingcap/tipb v0.0.0-20241022082558-0607513e7fa4
9595
github.com/prometheus/client_golang v1.20.5
9696
github.com/prometheus/client_model v0.6.1
9797
github.com/prometheus/common v0.57.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,8 @@ github.com/pingcap/log v1.1.1-0.20240314023424-862ccc32f18d h1:y3EueKVfVykdpTyfU
680680
github.com/pingcap/log v1.1.1-0.20240314023424-862ccc32f18d/go.mod h1:ORfBOFp1eteu2odzsyaxI+b8TzJwgjwyQcGhI+9SfEA=
681681
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5 h1:T4pXRhBflzDeAhmOQHNPRRogMYxP13V7BkYw3ZsoSfE=
682682
github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5/go.mod h1:rlimy0GcTvjiJqvD5mXTRr8O2eNZPBrcUgiWVYp9530=
683-
github.com/pingcap/tipb v0.0.0-20241008083645-0bcddae67837 h1:tyIymn821fB8gUmqafdvLlcFkVOpgyJXImoYJ8n9oJE=
684-
github.com/pingcap/tipb v0.0.0-20241008083645-0bcddae67837/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs=
683+
github.com/pingcap/tipb v0.0.0-20241022082558-0607513e7fa4 h1:wvaUybJT0fUReCDcFtV3CEvMuI9iu+G7IW72tbSlil4=
684+
github.com/pingcap/tipb v0.0.0-20241022082558-0607513e7fa4/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs=
685685
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
686686
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
687687
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=

pkg/util/execdetails/execdetails.go

Lines changed: 96 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ type basicCopRuntimeStats struct {
555555
procTimes Percentile[Duration]
556556
// executor extra infos
557557
tiflashScanContext TiFlashScanContext
558+
tiflashWaitSummary TiFlashWaitSummary
558559
}
559560

560561
type canGetFloat64 interface {
@@ -682,7 +683,10 @@ func (p *Percentile[valueType]) Sum() float64 {
682683
// String implements the RuntimeStats interface.
683684
func (e *basicCopRuntimeStats) String() string {
684685
if e.storeType == "tiflash" {
685-
return fmt.Sprintf("time:%v, loops:%d, threads:%d, ", FormatDuration(time.Duration(e.consume.Load())), e.loop.Load(), e.threads) + e.tiflashScanContext.String()
686+
if e.tiflashWaitSummary.CanBeIgnored() {
687+
return fmt.Sprintf("time:%v, loops:%d, threads:%d, %s", FormatDuration(time.Duration(e.consume.Load())), e.loop.Load(), e.threads, e.tiflashScanContext.String())
688+
}
689+
return fmt.Sprintf("time:%v, loops:%d, threads:%d, %s, %s", FormatDuration(time.Duration(e.consume.Load())), e.loop.Load(), e.threads, e.tiflashWaitSummary.String(), e.tiflashScanContext.String())
686690
}
687691
return fmt.Sprintf("time:%v, loops:%d", FormatDuration(time.Duration(e.consume.Load())), e.loop.Load())
688692
}
@@ -700,6 +704,7 @@ func (e *basicCopRuntimeStats) Clone() RuntimeStats {
700704
stats.consume.Store(e.consume.Load())
701705
stats.rows.Store(e.rows.Load())
702706
stats.tiflashScanContext = e.tiflashScanContext.Clone()
707+
stats.tiflashWaitSummary = e.tiflashWaitSummary.Clone()
703708
return stats
704709
}
705710

@@ -720,6 +725,7 @@ func (e *basicCopRuntimeStats) Merge(rs RuntimeStats) {
720725
e.procTimes.MergePercentile(&tmp.procTimes)
721726
}
722727
e.tiflashScanContext.Merge(tmp.tiflashScanContext)
728+
e.tiflashWaitSummary.Merge(tmp.tiflashWaitSummary)
723729
}
724730

725731
// Tp implements the RuntimeStats interface.
@@ -799,7 +805,14 @@ func (crs *CopRuntimeStats) RecordOneCopTask(address string, summary *tipb.Execu
799805
totalVectorIdxReadVecTimeMs: summary.GetTiflashScanContext().GetTotalVectorIdxReadVecTimeMs(),
800806
totalVectorIdxReadOthersTimeMs: summary.GetTiflashScanContext().GetTotalVectorIdxReadOthersTimeMs(),
801807
},
808+
tiflashWaitSummary: TiFlashWaitSummary{
809+
executionTime: *summary.TimeProcessedNs,
810+
minTSOWaitTime: summary.GetTiflashWaitSummary().GetMinTSOWaitNs(),
811+
pipelineBreakerWaitTime: summary.GetTiflashWaitSummary().GetPipelineBreakerWaitNs(),
812+
pipelineQueueWaitTime: summary.GetTiflashWaitSummary().GetPipelineQueueWaitNs(),
813+
},
802814
}
815+
803816
for _, instance := range summary.GetTiflashScanContext().GetRegionsOfInstance() {
804817
data.tiflashScanContext.regionsOfInstance[instance.GetInstanceId()] = instance.GetRegionNum()
805818
}
@@ -826,7 +839,7 @@ func (crs *CopRuntimeStats) GetTasks() (totalTasks int32) {
826839
}
827840

828841
// MergeBasicStats traverses basicCopRuntimeStats in the CopRuntimeStats and collects some useful information.
829-
func (crs *CopRuntimeStats) MergeBasicStats() (procTimes Percentile[Duration], totalTime time.Duration, totalTasks, totalLoops, totalThreads int32, totalTiFlashScanContext TiFlashScanContext) {
842+
func (crs *CopRuntimeStats) MergeBasicStats() (procTimes Percentile[Duration], totalTime time.Duration, totalTasks, totalLoops, totalThreads int32, totalTiFlashScanContext TiFlashScanContext, totalTiFlashWaitSummary TiFlashWaitSummary) {
830843
totalTiFlashScanContext = TiFlashScanContext{
831844
regionsOfInstance: make(map[string]uint64),
832845
}
@@ -836,6 +849,7 @@ func (crs *CopRuntimeStats) MergeBasicStats() (procTimes Percentile[Duration], t
836849
totalLoops += instanceStats.loop.Load()
837850
totalThreads += instanceStats.threads
838851
totalTiFlashScanContext.Merge(instanceStats.tiflashScanContext)
852+
totalTiFlashWaitSummary.Merge(instanceStats.tiflashWaitSummary)
839853
totalTasks += instanceStats.totalTasks
840854
}
841855
return
@@ -846,15 +860,18 @@ func (crs *CopRuntimeStats) String() string {
846860
return ""
847861
}
848862

849-
procTimes, totalTime, totalTasks, totalLoops, totalThreads, totalTiFlashScanContext := crs.MergeBasicStats()
863+
procTimes, totalTime, totalTasks, totalLoops, totalThreads, totalTiFlashScanContext, totalTiFlashWaitSummary := crs.MergeBasicStats()
850864
avgTime := time.Duration(totalTime.Nanoseconds() / int64(totalTasks))
851865
isTiFlashCop := crs.storeType == "tiflash"
852866

853867
buf := bytes.NewBuffer(make([]byte, 0, 16))
854868
{
855-
printTiFlashScanContext := func() {
869+
printTiFlashSpecificInfo := func() {
856870
if isTiFlashCop {
857871
fmt.Fprintf(buf, ", threads:%d}", totalThreads)
872+
if !totalTiFlashWaitSummary.CanBeIgnored() {
873+
buf.WriteString(", " + totalTiFlashWaitSummary.String())
874+
}
858875
if !totalTiFlashScanContext.Empty() {
859876
buf.WriteString(", " + totalTiFlashScanContext.String())
860877
}
@@ -864,12 +881,12 @@ func (crs *CopRuntimeStats) String() string {
864881
}
865882
if totalTasks == 1 {
866883
fmt.Fprintf(buf, "%v_task:{time:%v, loops:%d", crs.storeType, FormatDuration(time.Duration(procTimes.GetPercentile(0))), totalLoops)
867-
printTiFlashScanContext()
884+
printTiFlashSpecificInfo()
868885
} else {
869886
fmt.Fprintf(buf, "%v_task:{proc max:%v, min:%v, avg: %v, p80:%v, p95:%v, iters:%v, tasks:%v",
870887
crs.storeType, FormatDuration(time.Duration(procTimes.GetMax().GetFloat64())), FormatDuration(time.Duration(procTimes.GetMin().GetFloat64())), FormatDuration(avgTime),
871888
FormatDuration(time.Duration(procTimes.GetPercentile(0.8))), FormatDuration(time.Duration(procTimes.GetPercentile(0.95))), totalLoops, totalTasks)
872-
printTiFlashScanContext()
889+
printTiFlashSpecificInfo()
873890
}
874891
}
875892
if !isTiFlashCop {
@@ -1211,6 +1228,79 @@ func (context *TiFlashScanContext) Empty() bool {
12111228
return res
12121229
}
12131230

1231+
// TiFlashWaitSummary is used to express all kinds of wait information in tiflash
1232+
type TiFlashWaitSummary struct {
1233+
// keep execution time to do merge work, always record the wait time with largest execution time
1234+
executionTime uint64
1235+
minTSOWaitTime uint64
1236+
pipelineBreakerWaitTime uint64
1237+
pipelineQueueWaitTime uint64
1238+
}
1239+
1240+
// Clone implements the deep copy of * TiFlashWaitSummary
1241+
func (waitSummary *TiFlashWaitSummary) Clone() TiFlashWaitSummary {
1242+
newSummary := TiFlashWaitSummary{
1243+
executionTime: waitSummary.executionTime,
1244+
minTSOWaitTime: waitSummary.minTSOWaitTime,
1245+
pipelineBreakerWaitTime: waitSummary.pipelineBreakerWaitTime,
1246+
pipelineQueueWaitTime: waitSummary.pipelineQueueWaitTime,
1247+
}
1248+
return newSummary
1249+
}
1250+
1251+
// String dumps TiFlashWaitSummary info as string
1252+
func (waitSummary *TiFlashWaitSummary) String() string {
1253+
if waitSummary.CanBeIgnored() {
1254+
return ""
1255+
}
1256+
buf := bytes.NewBuffer(make([]byte, 0, 32))
1257+
buf.WriteString("tiflash_wait: {")
1258+
empty := true
1259+
if waitSummary.minTSOWaitTime >= uint64(time.Millisecond) {
1260+
buf.WriteString("minTSO_wait: ")
1261+
buf.WriteString(strconv.FormatInt(time.Duration(waitSummary.minTSOWaitTime).Milliseconds(), 10))
1262+
buf.WriteString("ms")
1263+
empty = false
1264+
}
1265+
if waitSummary.pipelineBreakerWaitTime >= uint64(time.Millisecond) {
1266+
if !empty {
1267+
buf.WriteString(", ")
1268+
}
1269+
buf.WriteString("pipeline_breaker_wait: ")
1270+
buf.WriteString(strconv.FormatInt(time.Duration(waitSummary.pipelineBreakerWaitTime).Milliseconds(), 10))
1271+
buf.WriteString("ms")
1272+
empty = false
1273+
}
1274+
if waitSummary.pipelineQueueWaitTime >= uint64(time.Millisecond) {
1275+
if !empty {
1276+
buf.WriteString(", ")
1277+
}
1278+
buf.WriteString("pipeline_queue_wait: ")
1279+
buf.WriteString(strconv.FormatInt(time.Duration(waitSummary.pipelineQueueWaitTime).Milliseconds(), 10))
1280+
buf.WriteString("ms")
1281+
}
1282+
buf.WriteString("}")
1283+
return buf.String()
1284+
}
1285+
1286+
// Merge make sum to merge the information in TiFlashWaitSummary
1287+
func (waitSummary *TiFlashWaitSummary) Merge(other TiFlashWaitSummary) {
1288+
if waitSummary.executionTime < other.executionTime {
1289+
waitSummary.executionTime = other.executionTime
1290+
waitSummary.minTSOWaitTime = other.minTSOWaitTime
1291+
waitSummary.pipelineBreakerWaitTime = other.pipelineBreakerWaitTime
1292+
waitSummary.pipelineQueueWaitTime = other.pipelineQueueWaitTime
1293+
}
1294+
}
1295+
1296+
// CanBeIgnored check whether TiFlashWaitSummary can be ignored, not all tidb executors have significant tiflash wait summary
1297+
func (waitSummary *TiFlashWaitSummary) CanBeIgnored() bool {
1298+
res := waitSummary.minTSOWaitTime < uint64(time.Millisecond) &&
1299+
waitSummary.pipelineBreakerWaitTime < uint64(time.Millisecond) &&
1300+
waitSummary.pipelineQueueWaitTime < uint64(time.Millisecond)
1301+
return res
1302+
}
1303+
12141304
// BasicRuntimeStats is the basic runtime stats.
12151305
type BasicRuntimeStats struct {
12161306
// executor's Next() called times.

0 commit comments

Comments
 (0)