-
Notifications
You must be signed in to change notification settings - Fork 3.8k
feat: limits-frontend failover to other zones #17408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -2,6 +2,9 @@ package frontend | |||||||||||
|
||||||||||||
import ( | ||||||||||||
"context" | ||||||||||||
"slices" | ||||||||||||
"sort" | ||||||||||||
"strings" | ||||||||||||
|
||||||||||||
"github.com/go-kit/log" | ||||||||||||
"github.com/go-kit/log/level" | ||||||||||||
|
@@ -19,6 +22,11 @@ const ( | |||||||||||
|
||||||||||||
var ( | ||||||||||||
LimitsRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) | ||||||||||||
|
||||||||||||
// defaultZoneCmp compares two zones using [strings.Compare]. | ||||||||||||
defaultZoneCmp = func(a, b string) int { | ||||||||||||
return strings.Compare(a, b) | ||||||||||||
} | ||||||||||||
) | ||||||||||||
|
||||||||||||
// ringGatherer uses a ring to find limits instances. | ||||||||||||
|
@@ -28,6 +36,7 @@ type ringGatherer struct { | |||||||||||
pool *ring_client.Pool | ||||||||||||
numPartitions int | ||||||||||||
assignedPartitionsCache cache[string, *proto.GetAssignedPartitionsResponse] | ||||||||||||
zoneCmp func(a, b string) int | ||||||||||||
} | ||||||||||||
|
||||||||||||
// newRingGatherer returns a new ringGatherer. | ||||||||||||
|
@@ -44,10 +53,11 @@ func newRingGatherer( | |||||||||||
pool: pool, | ||||||||||||
numPartitions: numPartitions, | ||||||||||||
assignedPartitionsCache: assignedPartitionsCache, | ||||||||||||
zoneCmp: defaultZoneCmp, | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
// ExceedsLimits implements the [exceedsLimitsGatherer] interface. | ||||||||||||
// exceedsLimits implements the [exceedsLimitsGatherer] interface. | ||||||||||||
func (g *ringGatherer) exceedsLimits(ctx context.Context, req *proto.ExceedsLimitsRequest) ([]*proto.ExceedsLimitsResponse, error) { | ||||||||||||
if len(req.Streams) == 0 { | ||||||||||||
return nil, nil | ||||||||||||
|
@@ -56,50 +66,111 @@ func (g *ringGatherer) exceedsLimits(ctx context.Context, req *proto.ExceedsLimi | |||||||||||
if err != nil { | ||||||||||||
return nil, err | ||||||||||||
} | ||||||||||||
partitionConsumers, err := g.getPartitionConsumers(ctx, rs.Instances) | ||||||||||||
// Get the partition consumers for each zone. | ||||||||||||
zonesPartitions, err := g.getZoneAwarePartitionConsumers(ctx, rs.Instances) | ||||||||||||
if err != nil { | ||||||||||||
return nil, err | ||||||||||||
} | ||||||||||||
ownedStreams := make(map[string][]*proto.StreamMetadata) | ||||||||||||
for _, s := range req.Streams { | ||||||||||||
partition := int32(s.StreamHash % uint64(g.numPartitions)) | ||||||||||||
addr, ok := partitionConsumers[partition] | ||||||||||||
// In practice we want zones to be queried in random order to spread | ||||||||||||
// reads. However, in tests we want a deterministic order so test cases | ||||||||||||
// are stable and reproducible. Having a custom sort func supports both | ||||||||||||
// use cases as zoneCmp can be switched out in tests. | ||||||||||||
zonesToQuery := make([]string, 0, len(zonesPartitions)) | ||||||||||||
for zone := range zonesPartitions { | ||||||||||||
zonesToQuery = append(zonesToQuery, zone) | ||||||||||||
} | ||||||||||||
slices.SortFunc(zonesToQuery, g.zoneCmp) | ||||||||||||
// Make a copy of the streams from the request. We will prune this slice | ||||||||||||
// each time we receive the responses from a zone. | ||||||||||||
streams := make([]*proto.StreamMetadata, 0, len(req.Streams)) | ||||||||||||
for _, stream := range req.Streams { | ||||||||||||
streams = append(streams, stream) | ||||||||||||
} | ||||||||||||
// Query each zone as ordered in zonesToQuery. If a zone answers all | ||||||||||||
// streams, the request is satisifed and there is no need to query | ||||||||||||
// subsequent zones. If a zone answers just a subset of streams | ||||||||||||
// (i.e. the instance that is consuming a partition is unavailable or the | ||||||||||||
// partition that owns one or more streams does not have a consumer) | ||||||||||||
// then query the next zone for the remaining streams. We repeat this | ||||||||||||
// process until all streams have been queried or we have exhausted all | ||||||||||||
// zones. | ||||||||||||
responses := make([]*proto.ExceedsLimitsResponse, 0) | ||||||||||||
for _, zone := range zonesToQuery { | ||||||||||||
resps, answered, err := g.doExceedsLimitsRPCs(ctx, req.Tenant, streams, zonesPartitions[zone]) | ||||||||||||
if err != nil { | ||||||||||||
continue | ||||||||||||
} | ||||||||||||
responses = append(responses, resps...) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We also need to delete the streams, can't just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Going to look at this in the next PR 👍 |
||||||||||||
// Remove the answered streams from the slice. The slice of answered | ||||||||||||
// streams must be sorted so we can use sort.Search to subtract the | ||||||||||||
// two slices. | ||||||||||||
slices.Sort(answered) | ||||||||||||
streams = slices.DeleteFunc(streams, func(stream *proto.StreamMetadata) bool { | ||||||||||||
// see https://pkg.go.dev/sort#Search | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be O(N + logN) |
||||||||||||
i := sort.Search(len(answered), func(i int) bool { | ||||||||||||
return answered[i] >= stream.StreamHash | ||||||||||||
}) | ||||||||||||
return i < len(answered) && answered[i] == stream.StreamHash | ||||||||||||
}) | ||||||||||||
// All streams been checked against per-tenant limits. | ||||||||||||
if len(streams) == 0 { | ||||||||||||
break | ||||||||||||
} | ||||||||||||
} | ||||||||||||
// TODO(grobinson): In a subsequent change, I will figure out what to do | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At present, unanswered streams are permitted. First I want to emit a metric to track this, and I will do this in a follow up PR to make the change easier to see. |
||||||||||||
// about unanswered streams. | ||||||||||||
return responses, nil | ||||||||||||
} | ||||||||||||
|
||||||||||||
func (g *ringGatherer) doExceedsLimitsRPCs(ctx context.Context, tenant string, streams []*proto.StreamMetadata, partitions map[int32]string) ([]*proto.ExceedsLimitsResponse, []uint64, error) { | ||||||||||||
// For each stream, figure out which instance consume its partition. | ||||||||||||
instancesForStreams := make(map[string][]*proto.StreamMetadata) | ||||||||||||
for _, stream := range streams { | ||||||||||||
partition := int32(stream.StreamHash % uint64(g.numPartitions)) | ||||||||||||
addr, ok := partitions[partition] | ||||||||||||
if !ok { | ||||||||||||
// TODO(grobinson): Drop streams when ok is false. | ||||||||||||
level.Warn(g.logger).Log("msg", "no instance found for partition", "partition", partition) | ||||||||||||
continue | ||||||||||||
} | ||||||||||||
ownedStreams[addr] = append(ownedStreams[addr], s) | ||||||||||||
instancesForStreams[addr] = append(instancesForStreams[addr], stream) | ||||||||||||
} | ||||||||||||
errg, ctx := errgroup.WithContext(ctx) | ||||||||||||
responseCh := make(chan *proto.ExceedsLimitsResponse, len(ownedStreams)) | ||||||||||||
for addr, streams := range ownedStreams { | ||||||||||||
responseCh := make(chan *proto.ExceedsLimitsResponse, len(instancesForStreams)) | ||||||||||||
answeredCh := make(chan uint64, len(streams)) | ||||||||||||
for addr, streams := range instancesForStreams { | ||||||||||||
errg.Go(func() error { | ||||||||||||
client, err := g.pool.GetClientFor(addr) | ||||||||||||
if err != nil { | ||||||||||||
level.Error(g.logger).Log("msg", "failed to get client for instance", "instance", addr, "err", err.Error()) | ||||||||||||
return err | ||||||||||||
return nil | ||||||||||||
} | ||||||||||||
resp, err := client.(proto.IngestLimitsClient).ExceedsLimits(ctx, &proto.ExceedsLimitsRequest{ | ||||||||||||
Tenant: req.Tenant, | ||||||||||||
Tenant: tenant, | ||||||||||||
Streams: streams, | ||||||||||||
}) | ||||||||||||
if err != nil { | ||||||||||||
return err | ||||||||||||
level.Error(g.logger).Log("failed check execeed limits for instance", "instance", addr, "err", err.Error()) | ||||||||||||
return nil | ||||||||||||
} | ||||||||||||
responseCh <- resp | ||||||||||||
for _, stream := range streams { | ||||||||||||
answeredCh <- stream.StreamHash | ||||||||||||
} | ||||||||||||
return nil | ||||||||||||
}) | ||||||||||||
} | ||||||||||||
if err = errg.Wait(); err != nil { | ||||||||||||
return nil, err | ||||||||||||
} | ||||||||||||
_ = errg.Wait() | ||||||||||||
close(responseCh) | ||||||||||||
responses := make([]*proto.ExceedsLimitsResponse, 0, len(rs.Instances)) | ||||||||||||
for resp := range responseCh { | ||||||||||||
responses = append(responses, resp) | ||||||||||||
close(answeredCh) | ||||||||||||
responses := make([]*proto.ExceedsLimitsResponse, 0, len(instancesForStreams)) | ||||||||||||
for r := range responseCh { | ||||||||||||
responses = append(responses, r) | ||||||||||||
} | ||||||||||||
return responses, nil | ||||||||||||
answered := make([]uint64, 0, len(streams)) | ||||||||||||
for streamHash := range answeredCh { | ||||||||||||
answered = append(answered, streamHash) | ||||||||||||
} | ||||||||||||
return responses, answered, nil | ||||||||||||
} | ||||||||||||
|
||||||||||||
type zonePartitionConsumersResult struct { | ||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Double checking, no need for a mutex for these? no concurrent test cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No mutex required right now. Each test case uses a separate mock, and requests are executed in sequence.