Skip to content

Commit 72aad85

Browse files
committed
feat: add IntrusiveStringSet
1 parent 777195b commit 72aad85

File tree

2 files changed

+101
-101
lines changed

2 files changed

+101
-101
lines changed

src/core/intrusive_string_set.h

Lines changed: 89 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -4,153 +4,160 @@
44

55
#pragma once
66

7+
#include <cassert>
78
#include <cstring>
89
#include <memory>
910
#include <string_view>
1011
#include <vector>
1112

13+
#include "base/hash.h"
14+
1215
namespace dfly {
1316

14-
class ISSEntry {
17+
class ISLEntry {
1518
public:
16-
ISSEntry(std::string_view key) {
17-
ISSEntry* next = nullptr;
19+
ISLEntry() = default;
20+
21+
ISLEntry(char* data) {
22+
data_ = data;
23+
}
24+
25+
operator bool() const {
26+
return data_;
27+
}
28+
29+
static ISLEntry Create(std::string_view key) {
30+
char* next = nullptr;
1831
uint32_t key_size = key.size();
1932

2033
auto size = sizeof(next) + sizeof(key_size) + key_size;
2134

22-
data_ = (char*)malloc(size);
35+
char* data = (char*)malloc(size);
2336

24-
std::memcpy(data_, &next, sizeof(next));
37+
std::memcpy(data, &next, sizeof(next));
2538

26-
auto* key_size_pos = data_ + sizeof(next);
39+
auto* key_size_pos = data + sizeof(next);
2740
std::memcpy(key_size_pos, &key_size, sizeof(key_size));
2841

2942
auto* key_pos = key_size_pos + sizeof(key_size);
3043
std::memcpy(key_pos, key.data(), key_size);
44+
45+
return ISLEntry(data);
46+
}
47+
48+
static void Destroy(ISLEntry entry) {
49+
free(entry.data_);
3150
}
3251

3352
std::string_view Key() const {
3453
return {GetKeyData(), GetKeySize()};
3554
}
3655

37-
ISSEntry* Next() const {
38-
ISSEntry* next = nullptr;
39-
std::memcpy(&next, data_, sizeof(next));
56+
ISLEntry Next() const {
57+
ISLEntry next;
58+
std::memcpy(&next.data_, data_, sizeof(next));
4059
return next;
4160
}
4261

43-
void SetNext(ISSEntry* next) {
62+
// TODO remove from public
63+
void SetNext(ISLEntry next) {
4464
std::memcpy(data_, &next, sizeof(next));
65+
next.data_ = nullptr;
4566
}
4667

4768
private:
4869
const char* GetKeyData() const {
49-
return data_ + sizeof(ISSEntry*) + sizeof(uint32_t);
70+
return data_ + sizeof(ISLEntry*) + sizeof(uint32_t);
5071
}
5172

5273
uint32_t GetKeySize() const {
5374
uint32_t size = 0;
54-
std::memcpy(&size, data_ + sizeof(ISSEntry*), sizeof(size));
75+
std::memcpy(&size, data_ + sizeof(ISLEntry*), sizeof(size));
5576
return size;
5677
}
5778

5879
// TODO consider use SDS strings or other approach
5980
// TODO add optimization for big keys
60-
// memory daya layout [ISSEntry*, key_size, key]
61-
char* data_;
81+
// memory daya layout [ISLEntry*, key_size, key]
82+
char* data_ = nullptr;
6283
};
6384

64-
class ISMEntry {
85+
class IntrusiveStringList {
6586
public:
66-
ISMEntry(std::string_view key, std::string_view val) {
67-
ISMEntry* next = nullptr;
68-
uint32_t key_size = key.size();
69-
uint32_t val_size = val.size();
70-
71-
auto size = sizeof(next) + sizeof(key_size) + sizeof(val_size) + key_size + val_size;
72-
73-
data_ = (char*)malloc(size);
74-
75-
std::memcpy(data_, &next, sizeof(next));
76-
77-
auto* key_size_pos = data_ + sizeof(next);
78-
std::memcpy(key_size_pos, &key_size, sizeof(key_size));
79-
80-
auto* val_size_pos = key_size_pos + sizeof(key_size);
81-
std::memcpy(val_size_pos, &val_size, sizeof(val_size));
82-
83-
auto* key_pos = val_size_pos + sizeof(val_size);
84-
std::memcpy(key_pos, key.data(), key_size);
85-
86-
auto* val_pos = key_pos + key_size;
87-
std::memcpy(val_pos, val.data(), val_size);
87+
~IntrusiveStringList() {
88+
while (start_) {
89+
auto next = start_.Next();
90+
ISLEntry::Destroy(start_);
91+
start_ = next;
92+
}
8893
}
8994

90-
std::string_view Key() const {
91-
return {GetKeyData(), GetKeySize()};
95+
ISLEntry Emplace(std::string_view key) {
96+
auto e = ISLEntry::Create(key);
97+
e.SetNext(start_);
98+
start_ = e;
99+
return start_;
92100
}
93101

94-
std::string_view Val() const {
95-
return {GetValData(), GetValSize()};
102+
ISLEntry Find(std::string_view str) {
103+
auto it = start_;
104+
for (; it && it.Key() != str; it = it.Next())
105+
;
106+
return it;
96107
}
97108

98-
ISMEntry* Next() const {
99-
ISMEntry* next = nullptr;
100-
std::memcpy(&next, data_, sizeof(next));
101-
return next;
102-
}
103-
104-
void SetVal(std::string_view val) {
105-
// TODO add optimization for the same size key
106-
uint32_t val_size = val.size();
107-
auto new_size =
108-
sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t) + GetKeySize() + val_size;
109-
110-
data_ = (char*)realloc(data_, new_size);
109+
private:
110+
ISLEntry start_;
111+
};
111112

112-
auto* val_size_pos = data_ + sizeof(ISMEntry*) + sizeof(uint32_t);
113-
std::memcpy(val_size_pos, &val_size, sizeof(val_size));
113+
class IntrusiveStringSet {
114+
public:
115+
// TODO add TTL processing
116+
ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) {
117+
if (size_ >= entries_.size()) {
118+
Grow();
119+
}
120+
auto bucket_id = BucketId(Hash(str));
121+
auto& bucket = entries_[bucket_id];
114122

115-
auto* val_pos = val_size_pos + sizeof(val_size) + GetKeySize();
116-
std::memcpy(val_pos, val.data(), val_size);
117-
}
123+
if (auto existed_item = bucket.Find(str); existed_item) {
124+
// TODO consider common implementation for key value pair
125+
return ISLEntry();
126+
}
118127

119-
void SetNext(ISMEntry* next) {
120-
std::memcpy(data_, &next, sizeof(next));
128+
return bucket.Emplace(str);
121129
}
122130

123131
private:
124-
const char* GetKeyData() const {
125-
return data_ + sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t);
132+
std::uint32_t Capacity() const {
133+
return 1 << capacity_log_;
126134
}
127135

128-
uint32_t GetKeySize() const {
129-
uint32_t size = 0;
130-
std::memcpy(&size, data_ + sizeof(ISMEntry*), sizeof(size));
131-
return size;
132-
}
136+
void Grow() {
137+
++capacity_log_;
138+
entries_.resize(Capacity());
133139

134-
const char* GetValData() const {
135-
return GetKeyData() + GetKeySize();
140+
// TODO rehashing
136141
}
137142

138-
uint32_t GetValSize() const {
139-
uint32_t size = 0;
140-
std::memcpy(&size, data_ + sizeof(ISMEntry*) + sizeof(uint32_t), sizeof(size));
141-
return size;
143+
uint32_t BucketId(uint64_t hash) const {
144+
assert(capacity_log_ > 0);
145+
return hash >> (64 - capacity_log_);
142146
}
143147

144-
// TODO consider use SDS strings or other approach
145-
// TODO add optimization for big keys
146-
// memory daya layout [ISMEntry*, key_size, val_size, key, val]
147-
char* data_;
148-
};
148+
uint64_t Hash(std::string_view str) const {
149+
constexpr XXH64_hash_t kHashSeed = 24061983;
150+
return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);
151+
}
149152

150-
template <class EntryT> class IntrusiveStringSet {
151-
public:
152153
private:
153-
std::vector<EntryT*> entries_;
154+
static constexpr size_t kMinSizeShift = 2;
155+
std::uint32_t capacity_log_ = 1;
156+
std::uint32_t size_ = 0; // number of elements in the set.
157+
158+
static_assert(sizeof(IntrusiveStringList) == sizeof(void*),
159+
"IntrusiveStringList should be just a pointer");
160+
std::vector<IntrusiveStringList> entries_;
154161
};
155162

156163
} // namespace dfly

src/core/intrusive_string_set_test.cc

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,24 @@ class IntrusiveStringSetTest : public ::testing::Test {
2525
}
2626
};
2727

28-
TEST_F(IntrusiveStringSetTest, ISSEntryTest) {
29-
ISSEntry test("0123456789");
28+
TEST_F(IntrusiveStringSetTest, IntrusiceStringListTest) {
29+
IntrusiveStringList isl;
30+
ISLEntry test = isl.Emplace("0123456789");
3031

3132
EXPECT_EQ(test.Key(), "0123456789"sv);
32-
EXPECT_EQ(test.Next(), nullptr);
33+
EXPECT_EQ(test.Next(), ISLEntry());
3334

34-
test.SetNext(&test);
35+
test = isl.Emplace("123456789");
3536

36-
EXPECT_EQ(test.Key(), "0123456789"sv);
37-
EXPECT_EQ(test.Next(), &test);
38-
}
39-
40-
TEST_F(IntrusiveStringSetTest, ISMEntryTest) {
41-
ISMEntry test("0123456789", "qwertyuiopasdfghjklzxcvbnm");
37+
EXPECT_EQ(test.Next().Key(), "0123456789"sv);
38+
EXPECT_EQ(test.Key(), "123456789"sv);
4239

43-
EXPECT_EQ(test.Key(), "0123456789"sv);
44-
EXPECT_EQ(test.Val(), "qwertyuiopasdfghjklzxcvbnm"sv);
45-
EXPECT_EQ(test.Next(), nullptr);
46-
47-
test.SetVal("QWERTYUIOPASDFGHJKLZXCVBNM");
48-
test.SetNext(&test);
40+
test = isl.Emplace("23456789");
4941

50-
EXPECT_EQ(test.Key(), "0123456789"sv);
51-
EXPECT_EQ(test.Val(), "QWERTYUIOPASDFGHJKLZXCVBNM"sv);
52-
EXPECT_EQ(test.Next(), &test);
42+
EXPECT_EQ(isl.Find("0123456789").Key(), "0123456789"sv);
43+
EXPECT_EQ(isl.Find("23456789").Key(), "23456789"sv);
44+
EXPECT_EQ(isl.Find("123456789").Key(), "123456789"sv);
45+
EXPECT_EQ(isl.Find("test"), ISLEntry());
5346
}
5447

5548
} // namespace dfly

0 commit comments

Comments
 (0)