Skip to content

Commit 46263ab

Browse files
authored
Move to kelindar/roaring (#35)
1 parent ce79974 commit 46263ab

File tree

4 files changed

+69
-68
lines changed

4 files changed

+69
-68
lines changed

go.mod

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,19 @@ go 1.24.2
55
toolchain go1.24.4
66

77
require (
8+
github.com/kelindar/roaring v0.0.4
89
github.com/kelindar/s3 v0.0.3
910
github.com/klauspost/compress v1.18.0
1011
github.com/stretchr/testify v1.9.0
11-
github.com/weaviate/sroar v0.0.10
1212
)
1313

1414
require (
1515
github.com/davecgh/go-spew v1.1.1 // indirect
16-
github.com/mschoch/smat v0.2.0 // indirect
17-
github.com/pkg/errors v0.9.1 // indirect
16+
github.com/kelindar/bitmap v1.5.3 // indirect
17+
github.com/kelindar/simd v1.1.2 // indirect
18+
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
1819
github.com/pmezard/go-difflib v1.0.0 // indirect
1920
golang.org/x/sync v0.15.0 // indirect
21+
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e // indirect
2022
gopkg.in/yaml.v3 v3.0.1 // indirect
2123
)

go.sum

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
github.com/RoaringBitmap/roaring v0.6.1 h1:O36Tdaj1Fi/zyr25shTHwlQPGdq53+u4WkM08AOEjiE=
2-
github.com/RoaringBitmap/roaring v0.6.1/go.mod h1:WZ83fjBF/7uBHi6QoFyfGL4+xuV4Qn+xFkm4+vSzrhE=
31
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
42
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3+
github.com/kelindar/bitmap v1.5.3 h1:/ty1SvbLE5ZKO4ToFNeXe3P3RrQsoj4a0x5gZNp5Vzo=
4+
github.com/kelindar/bitmap v1.5.3/go.mod h1:j3qZjxH9s4OtvsnFTP2bmPkjqil9Y2xQlxPYHexasEA=
5+
github.com/kelindar/roaring v0.0.4 h1:WB8EKtbO+RP3bnXVtaArLiTLQDOGkdsDRm5T+hw/nlE=
6+
github.com/kelindar/roaring v0.0.4/go.mod h1:rVjbn421GwJsYSGCg5hBK0A9Eth4SpdANUzOeDn5MNA=
57
github.com/kelindar/s3 v0.0.3 h1:izXVdKkH7faO1vM+qQ1zSA9Y6L/8C8p2CkF/FIrnUa0=
68
github.com/kelindar/s3 v0.0.3/go.mod h1:O2/uN3efPfCUVNmNPHNBY242Bm7LIu+RHQ7bDU5iUXs=
9+
github.com/kelindar/simd v1.1.2 h1:KduKb+M9cMY2HIH8S/cdJyD+5n5EGgq+Aeeleos55To=
10+
github.com/kelindar/simd v1.1.2/go.mod h1:inq4DFudC7W8L5fhxoeZflLRNpWSs0GNx6MlWFvuvr0=
711
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
812
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
9-
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
10-
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
11-
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
12-
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
13+
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
14+
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
1315
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
1416
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
1517
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
1618
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
17-
github.com/weaviate/sroar v0.0.10 h1:maCKGW39Vg0ZV7f1Y7nthj+ITqEICV1qaq8bPZBUsL4=
18-
github.com/weaviate/sroar v0.0.10/go.mod h1:I6HAMeJjGMDI8cuFDUK4TIRsy5Csn5RFncNkosyNgKE=
19-
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
20-
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
2119
golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
2220
golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
21+
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e h1:CsOuNlbOuf0mzxJIefr6Q4uAUetRUwZE4qt7VfzP+xo=
22+
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
2323
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
2424
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
2525
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

internal/buffer/buffer.go

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,27 @@ import (
88
"slices"
99
"time"
1010

11+
"github.com/kelindar/roaring"
1112
"github.com/kelindar/tales/internal/codec"
12-
"github.com/weaviate/sroar"
1313
)
1414

1515
// Buffer represents the in-memory buffer for the current chunk.
1616
type Buffer struct {
17-
codec *codec.Codec // Codec for compression
18-
data []byte // Raw concatenated log entries
19-
index map[uint32]*sroar.Bitmap // Actor ID -> sequence IDs bitmap
20-
length int // Number of entries in buffer
21-
maxSize int // Maximum number of entries
22-
start time.Time // Start time of the buffer
23-
time [2]uint32 // Time bounds [min, max] (Unix seconds)
17+
codec *codec.Codec // Codec for compression
18+
data []byte // Raw concatenated log entries
19+
index map[uint32]*roaring.Bitmap // Actor ID -> sequence IDs bitmap
20+
length int // Number of entries in buffer
21+
maxSize int // Maximum number of entries
22+
start time.Time // Start time of the buffer
23+
time [2]uint32 // Time bounds [min, max] (Unix seconds)
2424
}
2525

2626
// New creates a new buffer with the specified maximum size.
2727
func New(maxSize int, codec *codec.Codec) *Buffer {
2828
return &Buffer{
2929
codec: codec,
3030
data: make([]byte, 0, maxSize*100), // Estimate ~100 bytes per entry
31-
index: make(map[uint32]*sroar.Bitmap),
31+
index: make(map[uint32]*roaring.Bitmap),
3232
length: 0,
3333
maxSize: maxSize,
3434
start: time.Now(),
@@ -65,10 +65,10 @@ func (b *Buffer) Add(entry codec.LogEntry, entryTime time.Time) bool {
6565
for actorID := range entry.Actors() {
6666
bitmap, ok := b.index[actorID]
6767
if !ok || bitmap == nil {
68-
bitmap = sroar.NewBitmap()
68+
bitmap = roaring.New()
6969
b.index[actorID] = bitmap
7070
}
71-
bitmap.Set(uint64(entry.ID()))
71+
bitmap.Set(entry.ID())
7272
}
7373

7474
return true
@@ -172,7 +172,8 @@ func (b *Buffer) Flush() (Flush, error) {
172172
if bm == nil {
173173
continue
174174
}
175-
bitmapData := bm.ToBuffer()
175+
176+
bitmapData := bm.ToBytes()
176177

177178
// Do not compress bitmap, just store raw bytes
178179
index = append(index, Index{
@@ -205,7 +206,7 @@ func (b *Buffer) reset() {
205206
b.time[0] = 0
206207
b.time[1] = 0
207208
for k := range b.index {
208-
b.index[k] = sroar.NewBitmap()
209+
b.index[k].Clear()
209210
}
210211
b.start = time.Now()
211212
}

tales_query.go

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ import (
99
"iter"
1010
"time"
1111

12+
"github.com/kelindar/roaring"
1213
"github.com/kelindar/tales/internal/codec"
1314
"github.com/kelindar/tales/internal/seq"
14-
"github.com/weaviate/sroar"
1515
)
1616

1717
// queryWarm queries the in-memory buffer for entries.
@@ -60,12 +60,13 @@ func (l *Service) queryDay(ctx context.Context, actors []uint32, day time.Time,
6060

6161
// For each chunk, load all relevant bitmaps and compute intersection
6262
for _, chunk := range meta.Chunks {
63-
// Skip chunks that don't overlap with query time range
6463
if !chunk.Between(fromSec, toSec) {
6564
continue
6665
}
66+
6767
chunkKey := keyOfChunk(seq.FormatDate(day), chunk.Offset())
68-
var index *sroar.Bitmap
68+
69+
var index *roaring.Bitmap
6970
for _, a := range actors {
7071
idx, ok := chunk.Actors[a]
7172
if !ok || uint32(idx[0]) < t0 || uint32(idx[0]) > t1 {
@@ -82,14 +83,14 @@ func (l *Service) queryDay(ctx context.Context, actors []uint32, day time.Time,
8283
switch {
8384
case index == nil:
8485
index = bitmap
85-
case !index.IsEmpty():
86+
case index.Count() > 0:
8687
index.And(bitmap)
8788
}
8889
}
8990

9091
// Query log section with intersection bitmap
91-
if index != nil && !index.IsEmpty() {
92-
if !l.queryChunk(ctx, chunkKey, chunk, *index, day, from, to, yield) {
92+
if index != nil && index.Count() > 0 {
93+
if !l.queryChunk(ctx, chunkKey, chunk, index, day, from, to, yield) {
9394
return false
9495
}
9596
}
@@ -99,7 +100,7 @@ func (l *Service) queryDay(ctx context.Context, actors []uint32, day time.Time,
99100
}
100101

101102
// loadBitmap downloads and decodes a single bitmap for a given index entry.
102-
func (l *Service) loadBitmap(ctx context.Context, key string, entry codec.IndexEntry) (*sroar.Bitmap, error) {
103+
func (l *Service) loadBitmap(ctx context.Context, key string, entry codec.IndexEntry) (*roaring.Bitmap, error) {
103104
i0 := int64(entry[1])
104105
i1 := i0 + int64(entry[2]) - 1
105106

@@ -109,27 +110,26 @@ func (l *Service) loadBitmap(ctx context.Context, key string, entry codec.IndexE
109110
}
110111

111112
// Bitmaps are stored uncompressed, so just deserialize
112-
bm := sroar.FromBuffer(data)
113+
bm := roaring.FromBytes(data)
113114
return bm, nil
114115
}
115116

116117
// queryChunk queries a specific log chunk for sequence IDs using an optimized bitmap iterator.
117118
// This function efficiently filters log entries by leveraging the sorted nature of both the log entries
118119
// and the bitmap, avoiding unnecessary bitmap lookups for entries that don't match.
119-
func (l *Service) queryChunk(ctx context.Context, chunkKey string, chunk codec.ChunkEntry, sids sroar.Bitmap, day, from, to time.Time, yield func(time.Time, string) bool) bool {
120-
if sids.IsEmpty() {
120+
func (l *Service) queryChunk(ctx context.Context, chunkKey string, chunk codec.ChunkEntry, sids *roaring.Bitmap, day, from, to time.Time, yield func(time.Time, string) bool) bool {
121+
if sids.Count() == 0 {
121122
return true
122123
}
123124

124-
entries, err := l.rangeChunks(ctx, chunkKey, chunk, &sids)
125+
entries, err := l.rangeChunks(ctx, chunkKey, chunk, sids)
125126
if err != nil {
126127
return true // Skip chunks that fail to process
127128
}
128129

129130
// Process only the filtered entries
130131
for entry := range entries {
131-
id := entry.ID()
132-
ts := seq.TimeOf(id, day)
132+
ts := entry.Time(day)
133133
if !ts.Before(from) && !ts.After(to) && !yield(ts, entry.Text()) {
134134
return false // Stop iteration
135135
}
@@ -140,8 +140,8 @@ func (l *Service) queryChunk(ctx context.Context, chunkKey string, chunk codec.C
140140

141141
// rangeChunks downloads the log section from a chunk file, decompresses it, and returns
142142
// an iterator over log entries that are filtered using an optimized bitmap iterator merge algorithm.
143-
func (l *Service) rangeChunks(ctx context.Context, chunkKey string, chunk codec.ChunkEntry, sids *sroar.Bitmap) (iter.Seq[codec.LogEntry], error) {
144-
if chunk.DataSize() == 0 || sids.IsEmpty() {
143+
func (l *Service) rangeChunks(ctx context.Context, chunkKey string, chunk codec.ChunkEntry, sids *roaring.Bitmap) (iter.Seq[codec.LogEntry], error) {
144+
if chunk.DataSize() == 0 || sids.Count() == 0 {
145145
return func(yield func(codec.LogEntry) bool) {}, nil // Empty iterator
146146
}
147147

@@ -159,36 +159,34 @@ func (l *Service) rangeChunks(ctx context.Context, chunkKey string, chunk codec.
159159
}
160160

161161
return func(yield func(codec.LogEntry) bool) {
162-
iter := sids.NewIterator()
163-
idx := iter.Next()
164-
for len(buffer) > 4 && idx != 0 {
165-
entry := codec.LogEntry(buffer)
166-
size := entry.Size()
167-
if size == 0 || uint32(len(buffer)) < size {
168-
return // Invalid size or not enough data, stop iteration
169-
}
170-
171-
// Advance bitmap iterator until we find a target >= current entry ID
172-
entryID := uint64(entry.ID())
173-
for idx != 0 && idx < entryID {
174-
idx = iter.Next()
175-
}
162+
sids.Range(func(sidToFind uint32) bool {
163+
for len(buffer) > 4 {
164+
entry := codec.LogEntry(buffer)
165+
size := entry.Size()
166+
if size == 0 || uint32(len(buffer)) < size {
167+
return false // Invalid size or not enough data, stop iteration
168+
}
176169

177-
// If we've exhausted all targets, we're done
178-
if idx == 0 {
179-
return
180-
}
170+
entryID := entry.ID()
171+
if entryID < sidToFind {
172+
buffer = buffer[size:] // Advance buffer
173+
continue // Continue scanning buffer for current sid
174+
}
181175

182-
// If current entry matches current target, yield it
183-
if entryID == idx {
184-
if !yield(entry[:size]) {
185-
return // Stop iteration if yield returns false
176+
// If we found the entry, yield it. If we overshot, the entry is not
177+
// in the buffer, so we just move to the next sid.
178+
if entryID == sidToFind {
179+
if !yield(entry[:size]) {
180+
return false // Stop iteration if yield returns false
181+
}
182+
buffer = buffer[size:] // Advance buffer
186183
}
187-
idx = iter.Next()
188-
}
189184

190-
// Always advance buffer after processing each entry
191-
buffer = buffer[size:]
192-
}
185+
// We either found the entry and yielded it, or we've overshot.
186+
// In both cases, we are done with this sid and should get the next one.
187+
return true // Continue to next sid
188+
}
189+
return false // Buffer is exhausted, stop iteration
190+
})
193191
}, nil
194192
}

0 commit comments

Comments
 (0)