@@ -4,10 +4,12 @@ import (
4
4
"context"
5
5
"errors"
6
6
"fmt"
7
+ "sync"
7
8
"time"
8
9
9
10
"github.com/coder/quartz"
10
11
"github.com/go-kit/log"
12
+ "github.com/go-kit/log/level"
11
13
"github.com/grafana/dskit/ring"
12
14
"github.com/grafana/dskit/services"
13
15
"github.com/prometheus/client_golang/prometheus"
@@ -25,6 +27,11 @@ const (
25
27
// Ring
26
28
RingKey = "ingest-limits"
27
29
RingName = "ingest-limits"
30
+
31
+ // The maximum amount of time to wait to join the consumer group and be
32
+ // assigned some partitions before giving up and going ready in
33
+ // [Service.CheckReady].
34
+ partitionReadinessWaitAssignPeriod = 30 * time .Second
28
35
)
29
36
30
37
// Service is a service that manages stream metadata limits.
@@ -47,6 +54,11 @@ type Service struct {
47
54
// Metrics.
48
55
streamEvictionsTotal * prometheus.CounterVec
49
56
57
+ // Readiness check, see [Service.CheckReady].
58
+ partitionReadinessPassed bool
59
+ partitionReadinessMtx sync.Mutex
60
+ partitionReadinessWaitAssignSince time.Time
61
+
50
62
// Used for tests.
51
63
clock quartz.Clock
52
64
}
@@ -180,10 +192,55 @@ func (s *Service) CheckReady(ctx context.Context) error {
180
192
if s .State () != services .Running {
181
193
return fmt .Errorf ("service is not running: %v" , s .State ())
182
194
}
183
- err := s .lifecycler .CheckReady (ctx )
184
- if err != nil {
195
+ if err := s .lifecycler .CheckReady (ctx ); err != nil {
185
196
return fmt .Errorf ("lifecycler not ready: %w" , err )
186
197
}
198
+ s .partitionReadinessMtx .Lock ()
199
+ defer s .partitionReadinessMtx .Unlock ()
200
+ // We are ready when all of our assigned partitions have replayed the
201
+ // last active window records. This is referred to as partition readiness.
202
+ // Once we have passed partition readiness we never check it again as
203
+ // otherwise the service could become unready during a partition rebalance.
204
+ if ! s .partitionReadinessPassed {
205
+ if s .partitionManager .Count () == 0 {
206
+ // If partition readiness, once passed, is never checked again,
207
+ // we can assume that the service has recently started and is
208
+ // trying to become ready for the first time. If we do not have
209
+ // any assigned partitions we should wait some time in case we
210
+ // eventually get assigned some partitions, and if not, we give
211
+ // up and become ready to guarantee liveness.
212
+ return s .checkPartitionsAssigned (ctx )
213
+ }
214
+ return s .checkPartitionsReady (ctx )
215
+ }
216
+ return nil
217
+ }
218
+
219
+ // checkPartitionsAssigned checks if we either have been assigned some
220
+ // partitions or the wait assign period has elapsed. It must not be called
221
+ // without a lock on partitionReadinessMtx.
222
+ func (s * Service ) checkPartitionsAssigned (_ context.Context ) error {
223
+ if s .partitionReadinessWaitAssignSince == (time.Time {}) {
224
+ s .partitionReadinessWaitAssignSince = s .clock .Now ()
225
+ }
226
+ if s .clock .Since (s .partitionReadinessWaitAssignSince ) < partitionReadinessWaitAssignPeriod {
227
+ return errors .New ("waiting to be assigned some partitions" )
228
+ }
229
+ level .Warn (s .logger ).Log ("msg" , "no partitions assigned, going ready" )
230
+ s .partitionReadinessPassed = true
231
+ return nil
232
+ }
233
+
234
+ // checkPartitionsReady checks if all our assigned partitions are ready.
235
+ // It must not be called without a lock on partitionReadinessMtx.
236
+ func (s * Service ) checkPartitionsReady (_ context.Context ) error {
237
+ // If we lose our assigned partitions while replaying them we want to
238
+ // wait another complete wait assign period.
239
+ s .partitionReadinessWaitAssignSince = time.Time {}
240
+ if ! s .partitionManager .CheckReady () {
241
+ return errors .New ("partitions are not ready" )
242
+ }
243
+ s .partitionReadinessPassed = true
187
244
return nil
188
245
}
189
246
0 commit comments