From 8632e7e5d333274bdb468351dc711168ebede84c Mon Sep 17 00:00:00 2001 From: Michi Mutsuzaki Date: Fri, 18 Jul 2025 21:05:20 +0000 Subject: [PATCH] bugtool: Collect pprof CPU profile Collect pprof CPU profile as a part of the bugtool output. This makes the bugtool command 30 seconds slower, but I think it's still worth it to always collect CPU profile. Having access to CPU profile is immensely valuable for troubleshooting performance issues, and we often end up manually running 'gops pprof-cpu' to collect it anyways. Bump the timeout for "Test Tetragon with a different tracing-policy-dir" step in Packages e2e Tests from 30 seconds to 60 seconds. This step runs tetra bugtool command. Signed-off-by: Michi Mutsuzaki --- .github/workflows/packages-e2e-tests.yaml | 4 +-- pkg/bugtool/bugtool.go | 33 ++++++++++++++++------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/.github/workflows/packages-e2e-tests.yaml b/.github/workflows/packages-e2e-tests.yaml index f10d9b03fc8..a286d2dd47d 100644 --- a/.github/workflows/packages-e2e-tests.yaml +++ b/.github/workflows/packages-e2e-tests.yaml @@ -156,7 +156,7 @@ jobs: - name: Test Tetragon with a different tracing-policy-dir uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2 with: - timeout_seconds: 30 + timeout_seconds: 60 max_attempts: 5 retry_wait_seconds: 5 retry_on: error @@ -165,7 +165,7 @@ jobs: sudo tetra status sudo grep "tetra" /var/log/tetragon/tetragon.log sudo tetra tracingpolicy list | grep bpf - - sudo tetra bugtool 2>&1 | grep "Successfully dumped gops pprof-heap" - + sudo tetra bugtool 2>&1 | grep "Successfully dumped gops pprof.*profile=heap" - - name: Uninstall Tetragon Tarball run: | diff --git a/pkg/bugtool/bugtool.go b/pkg/bugtool/bugtool.go index b6ae66d5447..b4618bc0f8b 100644 --- a/pkg/bugtool/bugtool.go +++ b/pkg/bugtool/bugtool.go @@ -517,7 +517,7 @@ func (s *bugtoolInfo) addBpftoolInfo(tarWriter *tar.Writer) { s.execCmd(tarWriter, "bpftool-cgroups.json", s.info.BpfToolPath, "cgroup", "tree", "-j") } -func (s *bugtoolInfo) getPProf(tarWriter *tar.Writer, file string) error { +func (s *bugtoolInfo) getPProf(tarWriter *tar.Writer, file string, gopsSignal byte) error { if s.info.GopsAddr == "" { s.multiLog.Info("Skipping gops dump info as daemon is running without gops, use --gops-address to enable gops") return nil @@ -531,15 +531,15 @@ func (s *bugtoolInfo) getPProf(tarWriter *tar.Writer, file string) error { return err } - buf := []byte{gopssignal.HeapProfile} + buf := []byte{gopsSignal} if _, err := conn.Write(buf); err != nil { - s.multiLog.WithField("gops-address", s.info.GopsAddr).WithError(err).Warn("Failed to send gops pprof-heap command") + s.multiLog.WithField("gops-address", s.info.GopsAddr).WithField("file", file).WithError(err).Warn("Failed to send gops pprof command") return err } buff := new(bytes.Buffer) if _, err = buff.ReadFrom(conn); err != nil { - s.multiLog.WithField("gops-address", s.info.GopsAddr).WithError(err).Warn("Failed reading gops pprof-heap response") + s.multiLog.WithField("gops-address", s.info.GopsAddr).WithField("file", file).WithError(err).Warn("Failed reading gops pprof response") } return s.tarAddBuff(tarWriter, file, buff) } @@ -566,11 +566,26 @@ func (s *bugtoolInfo) addGopsInfo(tarWriter *tar.Writer) { s.execCmd(tarWriter, "gops.stack", s.info.GopsPath, "stack", s.info.GopsAddr) s.execCmd(tarWriter, "gops.stats", s.info.GopsPath, "stats", s.info.GopsAddr) s.execCmd(tarWriter, "gops.memstats", s.info.GopsPath, "memstats", s.info.GopsAddr) - err = s.getPProf(tarWriter, "gops.pprof-heap") - if err != nil { - s.multiLog.WithField("gops-address", s.info.GopsAddr).WithField("gops-path", s.info.GopsPath).WithError(err).Warn("Failed to dump gops pprof-heap") - } else { - s.multiLog.WithField("gops-address", s.info.GopsAddr).WithField("gops-path", s.info.GopsPath).Info("Successfully dumped gops pprof-heap") + profiles := map[string]byte{ + "cpu": gopssignal.CPUProfile, + "heap": gopssignal.HeapProfile, + } + for name, signal := range profiles { + err = s.getPProf(tarWriter, "gops.pprof-"+name, signal) + if err != nil { + s.multiLog. + WithField("gops-address", s.info.GopsAddr). + WithField("gops-path", s.info.GopsPath). + WithField("profile", name). + WithError(err). + Warn("Failed to dump gops pprof") + } else { + s.multiLog. + WithField("gops-address", s.info.GopsAddr). + WithField("gops-path", s.info.GopsPath). + WithField("profile", name). + Info("Successfully dumped gops pprof") + } } }