Skip to content

Commit 8d78556

Browse files
authored
Merge pull request #131 from wildcatdb/pr-061825.1
- db.go minor replacement of options and better comment explainations…
2 parents 5b0d98d + aa3167e commit 8d78556

File tree

9 files changed

+174
-70
lines changed

9 files changed

+174
-70
lines changed

blockmanager/fsyncdata.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"syscall"
77
)
88

9+
// Fdatasync is a Linux-specific implementation of fdatasync.
910
func Fdatasync(fd uintptr) error {
1011
err := syscall.Fdatasync(int(fd))
1112
if err != nil {

blockmanager/fsyncdata_darwin.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"golang.org/x/sys/unix"
88
)
99

10+
// // Fdatasync is a Darwin-specific implementation of fdatasync.
1011
func Fdatasync(fd uintptr) error {
1112
// F_FULLFSYNC forces the drive to flush its buffers to stable storage.
1213
_, _, errno := unix.Syscall(

blockmanager/openfile.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"syscall"
88
)
99

10+
// OpenFile opens a file with the specified name and flags, returning a file handle.
1011
func OpenFile(name string, flags int, perm uint32) (uintptr, error) {
1112
fd, err := syscall.Open(name, flags, perm)
1213
if err != nil {
@@ -15,6 +16,7 @@ func OpenFile(name string, flags int, perm uint32) (uintptr, error) {
1516
return uintptr(fd), nil
1617
}
1718

19+
// NewFileFromFd creates a new os.File from a file descriptor handle and a name.
1820
func NewFileFromFd(handle uintptr, name string) *os.File {
1921
return os.NewFile(handle, name)
2022
}

blockmanager/openfile_windows.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"syscall"
99
)
1010

11+
// OpenFile opens a file with the specified name and flags, returning a file handle.
1112
func OpenFile(name string, flags int, perm uint32) (uintptr, error) {
1213
var access uint32
1314
var creation uint32
@@ -75,6 +76,7 @@ func OpenFile(name string, flags int, perm uint32) (uintptr, error) {
7576
return uintptr(handle), nil
7677
}
7778

79+
// NewFileFromFd creates a new os.File from a file descriptor handle and name.
7880
func NewFileFromFd(handle uintptr, name string) *os.File {
7981
return os.NewFile(handle, name)
8082
}

db.go

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ import (
3232
"unicode/utf8"
3333
)
3434

35-
// SyncOption is a block manager sync option that can be set by the user.
35+
// SyncOption is a block manager sync option that can be set for a *DB instance.
36+
// the sync option will be used for all block managers created by the *DB instance internally.
3637
type SyncOption int
3738

3839
const (
@@ -100,6 +101,7 @@ type Options struct {
100101
BlockManagerLRUAccesWeight float64 // Access weight for the LRU cache
101102
Permission os.FileMode // Permission for created files
102103
LogChannel chan string // Channel for logging
104+
STDOutLogging bool // Enable logging to standard output (default is false and if set, channel is ignored)
103105
BloomFilter bool // Enable Bloom filter for SSTables
104106
MaxCompactionConcurrency int // Maximum number of concurrent compactions
105107
CompactionCooldownPeriod time.Duration // Cooldown period for compaction
@@ -110,20 +112,19 @@ type Options struct {
110112
CompactionScoreCountWeight float64 // Weight for count-based score
111113
CompactionSizeTieredSimilarityRatio float64 // Similarity ratio for size-tiered compaction. For grouping SSTables that are "roughly the same size" together for compaction.
112114
CompactionActiveSSTReadWaitBackoff time.Duration // Backoff time for active SSTable read wait during compaction, to avoid busy waiting
113-
FlusherTickerInterval time.Duration // Interval for flusher ticker
115+
CompactionPartitionRatio float64 // How much to move back (0.6 = 60% of data). Used for last level compaction
116+
CompactionPartitionDistributionRatio float64 // How to split between L-1 and L-2 (0.7 = 70% to L-1, 30% to L-2)
114117
CompactorTickerInterval time.Duration // Interval for compactor ticker
118+
FlusherTickerInterval time.Duration // Interval for flusher ticker
115119
BloomFilterFPR float64 // False positive rate for Bloom filter
116120
WalAppendRetry int // Number of retries for WAL append
117121
WalAppendBackoff time.Duration // Backoff duration for WAL append
118122
SSTableBTreeOrder int // Order of the B-tree for SSTables
119-
STDOutLogging bool // Enable logging to standard output (default is false and if set, channel is ignored)
120123
MaxConcurrentTxns int // Maximum concurrent transactions (buffer size)
121124
TxnBeginRetry int // Number of retries for Begin() when buffer full
122125
TxnBeginBackoff time.Duration // Initial backoff duration for Begin() retries
123126
TxnBeginMaxBackoff time.Duration // Maximum backoff duration for Begin() retries
124127
RecoverUncommittedTxns bool // Whether to recover uncommitted transactions on startup
125-
CompactionPartitionRatio float64 // How much to move back (0.6 = 60% of data). Used for last level compaction
126-
CompactionPartitionDistributionRatio float64 // How to split between L-1 and L-2 (0.7 = 70% to L-1, 30% to L-2)
127128
}
128129

129130
// DB represents the main Wildcat structure
@@ -167,14 +168,16 @@ func Open(opts *Options) (*DB, error) {
167168
}
168169

169170
// Set default values for what options are not set
170-
opts.setDefaults()
171+
err := opts.setDefaults()
172+
if err != nil {
173+
return nil, err
174+
}
171175

172176
buff, err := buffer.New(opts.MaxConcurrentTxns)
173177
if err != nil {
174178
return nil, fmt.Errorf("failed to create transaction buffer: %w", err)
175179
}
176180

177-
// We create a db instance with the provided options
178181
db := &DB{
179182
lru: lru.New(int64(opts.BlockManagerLRUSize), opts.BlockManagerLRUEvictRatio, opts.BlockManagerLRUAccesWeight), // New block manager LRU cache
180183
wg: &sync.WaitGroup{}, // Wait group for background operations
@@ -185,6 +188,7 @@ func Open(opts *Options) (*DB, error) {
185188
}
186189

187190
// Initialize flusher and compactor
191+
// The flusher and compactor run in the background and handle flushing memtables to disk and compacting levels respectively.
188192
db.flusher = newFlusher(db)
189193
db.compactor = newCompactor(db)
190194

@@ -242,11 +246,9 @@ func Open(opts *Options) (*DB, error) {
242246

243247
level.sstables = atomic.Pointer[[]*SSTable]{} // Atomic pointer to SSTables in this level
244248
level.path = fmt.Sprintf("%s%s%d%s", db.opts.Directory, LevelPrefix, i+1, string(os.PathSeparator)) // Path for the level directory
245-
246-
// Set level
247249
levels[i] = level
248250

249-
db.log(fmt.Sprintf("Creating level %d with capacity %d bytes at path %s", level.id, level.capacity, level.path))
251+
db.log(fmt.Sprintf("Initializing level %d with capacity %d bytes at path %s", level.id, level.capacity, level.path))
250252

251253
// Create or ensure the level directory exists
252254
if err := os.MkdirAll(level.path, db.opts.Permission); err != nil {
@@ -275,7 +277,7 @@ func Open(opts *Options) (*DB, error) {
275277
db.wg.Add(1)
276278
go db.flusher.backgroundProcess()
277279

278-
// Start the compaction manager
280+
// Start the background compactor
279281
db.wg.Add(1)
280282
go db.compactor.backgroundProcess()
281283

@@ -284,7 +286,7 @@ func Open(opts *Options) (*DB, error) {
284286
}
285287

286288
// setDefaults checks and sets default values for db options
287-
func (opts *Options) setDefaults() {
289+
func (opts *Options) setDefaults() error {
288290
if opts.WriteBufferSize <= 0 {
289291
opts.WriteBufferSize = DefaultWriteBufferSize
290292
}
@@ -371,6 +373,12 @@ func (opts *Options) setDefaults() {
371373

372374
if opts.SSTableBTreeOrder <= 0 {
373375
opts.SSTableBTreeOrder = DefaultSSTableBTreeOrder
376+
} else {
377+
// Ensure the B-tree order is reasonable
378+
if opts.SSTableBTreeOrder < 2 {
379+
return fmt.Errorf("SSTable B-tree order must be at least 2, got %d", opts.SSTableBTreeOrder)
380+
}
381+
374382
}
375383

376384
if opts.CompactionSizeTieredSimilarityRatio <= 0 {
@@ -405,6 +413,8 @@ func (opts *Options) setDefaults() {
405413
opts.CompactionPartitionDistributionRatio = DefaultCompactionPartitionDistributionRatio
406414
}
407415

416+
return nil
417+
408418
}
409419

410420
// Close closes the database and all open resources

level.go

Lines changed: 61 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -91,22 +91,6 @@ func (l *Level) reopen() error {
9191
// Get file paths
9292
klogPath := fmt.Sprintf("%s%s%d%s", levelPath, SSTablePrefix, id, KLogExtension)
9393

94-
// Get file sizes to calculate SSTable size
95-
klogInfo, err := os.Stat(klogPath)
96-
if err != nil {
97-
l.db.log(fmt.Sprintf("Warning: Failed to stat KLog file for SSTable %d: %v - skipping", id, err))
98-
continue
99-
}
100-
101-
vlogInfo, err := os.Stat(vlogPath)
102-
if err != nil {
103-
l.db.log(fmt.Sprintf("Warning: Failed to stat VLog file for SSTable %d: %v - skipping", id, err))
104-
continue
105-
}
106-
107-
// Calculate total size from file system
108-
sstable.Size = klogInfo.Size() + vlogInfo.Size()
109-
11094
// Open the KLog file to try to get metadata from B-tree
11195
klogBm, err := blockmanager.Open(klogPath, os.O_RDONLY, l.db.opts.Permission, blockmanager.SyncOption(l.db.opts.SyncOption))
11296
if err != nil {
@@ -156,15 +140,16 @@ func (l *Level) reopen() error {
156140

157141
default:
158142
// This should not occur but if it does log a warning
143+
// The system will still function using the sstable but without proper metadata setting nil
159144
l.db.log(fmt.Sprintf("Warning: Unknown metadata type %T for SSTable %d - using file system metadata", extraMeta, id))
160145

161-
sstable.Min = []byte{}
162-
sstable.Max = []byte{}
146+
sstable.Min = nil
147+
sstable.Max = nil
163148
sstable.EntryCount = 0
164149
}
165150
} else {
166-
sstable.Min = []byte{}
167-
sstable.Max = []byte{}
151+
sstable.Min = nil
152+
sstable.Max = nil
168153
sstable.EntryCount = 0
169154
}
170155

@@ -229,37 +214,69 @@ func (l *Level) extractSSTableFromBSON(sstable *SSTable, doc primitive.D) {
229214
} else if level, ok := elem.Value.(int64); ok {
230215
sstable.Level = int(level)
231216
}
217+
case "timestamp":
218+
if timestamp, ok := elem.Value.(int64); ok {
219+
sstable.Timestamp = timestamp
220+
}
221+
case "bloomfilter":
222+
if l.db.opts.BloomFilter {
223+
if err := sstable.reconstructBloomFilter(); err != nil {
224+
l.db.log(fmt.Sprintf("Warning: Failed to reconstruct bloom filter for SSTable %d: %v", sstable.Id, err))
225+
}
226+
}
232227
}
233228
}
234229
}
235230

236231
// extractSSTableFromMap a helper method to extract SSTable metadata from map[string]interface{}
237232
func (l *Level) extractSSTableFromMap(sstable *SSTable, meta map[string]interface{}) {
238-
if id, ok := meta["id"].(int64); ok {
239-
sstable.Id = id
240-
}
241-
if minBytes, ok := meta["min"].([]byte); ok {
242-
sstable.Min = minBytes
243-
}
244-
if maxBytes, ok := meta["max"].([]byte); ok {
245-
sstable.Max = maxBytes
246-
}
247-
if size, ok := meta["size"].(int64); ok && size > 0 {
248-
sstable.Size = size
249-
}
250-
if count, ok := meta["entrycount"].(int); ok {
251-
sstable.EntryCount = count
252-
} else if count, ok := meta["entrycount"].(int32); ok {
253-
sstable.EntryCount = int(count)
254-
} else if count, ok := meta["entrycount"].(int64); ok {
255-
sstable.EntryCount = int(count)
233+
for key, value := range meta {
234+
switch key {
235+
case "id":
236+
if id, ok := value.(int64); ok {
237+
sstable.Id = id
238+
}
239+
case "min":
240+
if minBytes, ok := value.([]byte); ok {
241+
sstable.Min = minBytes
242+
}
243+
case "max":
244+
if maxBytes, ok := value.([]byte); ok {
245+
sstable.Max = maxBytes
246+
}
247+
case "size":
248+
if size, ok := value.(int64); ok && size > 0 {
249+
sstable.Size = size
250+
}
251+
case "entrycount":
252+
switch count := value.(type) {
253+
case int:
254+
sstable.EntryCount = count
255+
case int32:
256+
sstable.EntryCount = int(count)
257+
case int64:
258+
sstable.EntryCount = int(count)
259+
}
260+
case "level":
261+
switch level := value.(type) {
262+
case int:
263+
sstable.Level = level
264+
case int32:
265+
sstable.Level = int(level)
266+
case int64:
267+
sstable.Level = int(level)
268+
}
269+
case "timestamp":
270+
if timestamp, ok := value.(int64); ok {
271+
sstable.Timestamp = timestamp
272+
}
273+
}
256274
}
257-
if level, ok := meta["level"].(int); ok {
258-
sstable.Level = level
259-
} else if level, ok := meta["level"].(int32); ok {
260-
sstable.Level = int(level)
261-
} else if level, ok := meta["level"].(int64); ok {
262-
sstable.Level = int(level)
275+
276+
if l.db.opts.BloomFilter {
277+
if err := sstable.reconstructBloomFilter(); err != nil {
278+
l.db.log(fmt.Sprintf("Warning: Failed to reconstruct bloom filter for SSTable %d: %v", sstable.Id, err))
279+
}
263280
}
264281
}
265282

readme.md

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Wildcat is a high-performance embedded key-value database (or storage engine) wr
2323
- Automatic multi-threaded background compaction with configurable concurrency
2424
- ACID transaction support with configurable durability guarantees
2525
- Range, prefix, and full iteration support with bidirectional traversal
26-
- High transactional throughput per second with low latency due to lock-free and non-blocking design.
26+
- High transactional throughput per second with low latency due to lock-free and non-blocking design from memory to disk
2727
- Optional Bloom filters per SSTable for improved key lookup performance
2828
- Key-value separation optimization (`.klog` for keys, `.vlog` for values)
2929
- Tombstone-aware and version-aware compaction with retention based on active transaction read windows
@@ -134,7 +134,7 @@ The easiest way to interact with Wildcat is through the Update method, which han
134134
```go
135135
// Write a value
136136
err := db.Update(func(txn *wildcat.Txn) error {
137-
return txn.Put([]byte("hello"), []byte("world")) // Put update's existing key's values.
137+
return txn.Put([]byte("hello"), []byte("world")) // Put update's existing key's values
138138
})
139139
if err != nil {
140140
// Handle error
@@ -158,7 +158,6 @@ if err != nil {
158158
### Manual Transaction Management
159159
For more complex operations, you can manually manage transactions.
160160
```go
161-
// Begin a transaction
162161
txn, err := db.Begin()
163162
if err != nil {
164163
// Handle error
@@ -221,7 +220,6 @@ err := db.Update(func(txn *wildcat.Txn) error {
221220
```go
222221
// Perform batch operations
223222
for i := 0; i < 1000; i++ {
224-
// Begin a transaction
225223
txn, err := db.Begin()
226224
if err != nil {
227225
// Handle error
@@ -237,7 +235,6 @@ for i := 0; i < 1000; i++ {
237235
return
238236
}
239237

240-
// Commit the transaction
241238
err = txn.Commit()
242239
if err != nil {
243240
// Handle error
@@ -293,7 +290,6 @@ err := db.View(func(txn *wildcat.Txn) error {
293290
#### Range Iterator (bidirectional)
294291
```go
295292
err := db.View(func(txn *wildcat.Txn) error {
296-
// Create range iterator
297293
iter, err := txn.NewRangeIterator([]byte("start"), []byte("end"), true)
298294
if err != nil {
299295
return err
@@ -331,7 +327,6 @@ err := db.View(func(txn *wildcat.Txn) error {
331327
#### Prefix Iterator (bidirectional)
332328
```go
333329
err := db.View(func(txn *wildcat.Txn) error {
334-
// Create prefix iterator
335330
iter, err := txn.NewPrefixIterator([]byte("prefix"), true)
336331
if err != nil {
337332
return err

0 commit comments

Comments
 (0)