Skip to content

Commit 4d5e69e

Browse files
committed
Add extra telemetry to monitor failures
1 parent d715a75 commit 4d5e69e

File tree

3 files changed

+6
-0
lines changed

3 files changed

+6
-0
lines changed

policy/handler.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"sync"
1313
"time"
1414

15+
metrics "github.com/armon/go-metrics"
1516
"github.com/google/go-cmp/cmp"
1617
hclog "github.com/hashicorp/go-hclog"
1718
"github.com/hashicorp/go-multierror"
@@ -228,6 +229,7 @@ func (h *Handler) handleTick(ctx context.Context, policy *sdk.ScalingPolicy) (*s
228229

229230
status, err := target.Status(policy.Target.Config)
230231
if err != nil {
232+
metrics.IncrCounter([]string{"policy", "target_status", "failure_count"}, 1)
231233
h.log.Warn("failed to get target status", "error", err)
232234
return nil, err
233235
}

policy/manager.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ func (m *Manager) monitorPolicies(ctx context.Context, evalCh chan<- *sdk.Scalin
116116

117117
case err := <-m.policyIDsErrCh:
118118
m.log.Error("encountered an error monitoring policy IDs", "error", err)
119+
metrics.IncrCounter([]string{"policy", "manager", "failure_count"}, 1)
119120
if isUnrecoverableError(err) {
120121
return err
121122
}

policyeval/base_worker.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ func (w *BaseWorker) handlePolicy(ctx context.Context, eval *sdk.ScalingEvaluati
181181
"on_check_error", eval.Policy.OnCheckError,
182182
"error", err)
183183

184+
metrics.IncrCounterWithLabels([]string{"policy", "run_check", "failure_count"}, 1, []metrics.Label{{Name: "check", Value: checkEval.Check.Name}})
185+
184186
// Define how to handle error.
185187
// Use check behaviour if set or fail iff the policy is set to fail.
186188
switch checkEval.Check.OnError {
@@ -287,6 +289,7 @@ func (w *BaseWorker) handlePolicy(ctx context.Context, eval *sdk.ScalingEvaluati
287289

288290
err = w.scaleTarget(logger, target, eval.Policy, *winner.action, currentStatus)
289291
if err != nil {
292+
metrics.IncrCounter([]string{"target", "scale", "failure_count"}, 1)
290293
return err
291294
}
292295

0 commit comments

Comments
 (0)