Skip to content

Commit 71ac26c

Browse files
committed
feat: add current bucket id into hash (#5141)
1 parent 4da9dd9 commit 71ac26c

File tree

3 files changed

+40
-21
lines changed

3 files changed

+40
-21
lines changed

src/core/intrusive_string_list.h

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,26 @@ class ISLEntry {
8686
return (uptr() & kExtHashShiftedMask) >> kExtHashShift;
8787
}
8888

89-
bool CheckExtendedHash(uint64_t hash, uint32_t capacity_log) const {
90-
uint32_t ext_hash_shift = 64 - capacity_log - ext_hash_bit_size;
89+
bool CheckBucketAffiliation(uint32_t bucket_id, uint32_t capacity_log, uint32_t shift_log) const {
90+
uint32_t bucket_id_hash_part = capacity_log > shift_log ? shift_log : capacity_log;
91+
uint32_t bucket_mask = (1 << bucket_id_hash_part) - 1;
92+
bucket_id &= bucket_mask;
93+
uint32_t stored_bucket_id = GetExtendedHash() >> (ext_hash_bit_size - bucket_id_hash_part);
94+
return bucket_id == stored_bucket_id;
95+
}
96+
97+
bool CheckExtendedHash(uint64_t hash, uint32_t capacity_log, uint32_t shift_log) const {
98+
uint32_t start_hash_bit = capacity_log > shift_log ? capacity_log - shift_log : 0;
99+
uint32_t ext_hash_shift = 64 - start_hash_bit - ext_hash_bit_size;
91100
uint64_t ext_hash = (hash >> ext_hash_shift) & kExtHashMask;
92101
return GetExtendedHash() == ext_hash;
93102
}
94103

95-
void SetExtendedHash(uint64_t hash, uint32_t capacity_log) {
96-
uint32_t ext_hash_shift = 64 - capacity_log - ext_hash_bit_size;
104+
// TODO rename to SetHash
105+
// shift_log identify which bucket the element belongs to
106+
void SetExtendedHash(uint64_t hash, uint32_t capacity_log, uint32_t shift_log) {
107+
uint32_t start_hash_bit = capacity_log > shift_log ? capacity_log - shift_log : 0;
108+
uint32_t ext_hash_shift = 64 - start_hash_bit - ext_hash_bit_size;
97109
uint64_t ext_hash = ((hash >> ext_hash_shift) << kExtHashShift) & kExtHashShiftedMask;
98110
data_ = (char*)((uptr() & ~kExtHashShiftedMask) | ext_hash);
99111
}
@@ -396,14 +408,15 @@ class IntrusiveStringList {
396408

397409
// TODO consider to wrap ISLEntry to prevent usage out of the list
398410
IntrusiveStringList::iterator Find(std::string_view str, uint64_t hash, uint32_t capacity_log,
399-
uint32_t* set_size, uint32_t time_now = UINT32_MAX) {
411+
uint32_t shift_log, uint32_t* set_size,
412+
uint32_t time_now = UINT32_MAX) {
400413
auto entry = begin();
401414
for (; entry; ++entry) {
402415
if (entry.ExpireIfNeeded(time_now, &obj_malloc_used_)) {
403416
(*set_size)--;
404417
continue;
405418
}
406-
if (entry->CheckExtendedHash(hash, capacity_log) && entry->Key() == str)
419+
if (entry->CheckExtendedHash(hash, capacity_log, shift_log) && entry->Key() == str)
407420
break;
408421
}
409422
return entry;

src/core/intrusive_string_set.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ class IntrusiveStringSet {
152152
uint32_t ttl_sec = UINT32_MAX) {
153153
++size_;
154154
auto& entry = bucket->Emplace(str, ttl_sec);
155-
entry.SetExtendedHash(hash, capacity_log_);
155+
entry.SetExtendedHash(hash, capacity_log_, kShiftLog);
156156
return entry;
157157
}
158158

@@ -195,6 +195,7 @@ class IntrusiveStringSet {
195195

196196
using ItemCb = std::function<void(std::string_view)>;
197197

198+
// TODO fix with CheckExtendedHash
198199
uint32_t Scan(uint32_t cursor, const ItemCb& cb) {
199200
uint32_t entries_idx = cursor >> (32 - capacity_log_);
200201

@@ -306,7 +307,7 @@ class IntrusiveStringSet {
306307
uint64_t hash = Hash(entry.Key());
307308
auto bucket_id = BucketId(hash);
308309
auto& inserted_entry = entries_[bucket_id].Insert(entry.Release());
309-
inserted_entry.SetExtendedHash(hash, capacity_log_);
310+
inserted_entry.SetExtendedHash(hash, capacity_log_, kShiftLog);
310311
}
311312
}
312313
}
@@ -352,7 +353,7 @@ class IntrusiveStringSet {
352353
if (it->Empty()) {
353354
continue;
354355
}
355-
auto res = it->Find(str, hash, capacity_log_, &size_, time_now_);
356+
auto res = it->Find(str, hash, capacity_log_, kShiftLog, &size_, time_now_);
356357
if (res) {
357358
return {res, bucket_id};
358359
}
@@ -362,8 +363,9 @@ class IntrusiveStringSet {
362363
}
363364

364365
private:
365-
static constexpr std::uint32_t kMinCapacityLog = 3;
366-
static constexpr std::uint32_t kDisplacementSize = 16;
366+
static constexpr std::uint32_t kMinCapacityLog = 3; // TODO make template
367+
static constexpr std::uint32_t kShiftLog = 4; // TODO make template
368+
static constexpr std::uint32_t kDisplacementSize = 1 << kShiftLog;
367369
std::uint32_t capacity_log_ = 0;
368370
std::uint32_t size_ = 0; // number of elements in the set.
369371
std::uint32_t time_now_ = 0;

src/core/intrusive_string_set_test.cc

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -120,32 +120,36 @@ TEST_F(IntrusiveStringSetTest, HashCheckTest) {
120120
IntrusiveStringList isl;
121121
{
122122
ISLEntry& test = isl.Emplace("0123456789");
123-
test.SetExtendedHash(hash, 3);
123+
test.SetExtendedHash(hash, 3, 4);
124+
EXPECT_TRUE(test.CheckBucketAffiliation(7, 3, 4));
125+
EXPECT_FALSE(test.CheckBucketAffiliation(6, 3, 4));
126+
EXPECT_TRUE(test.CheckBucketAffiliation(7, 4, 3));
127+
EXPECT_FALSE(test.CheckBucketAffiliation(6, 4, 3));
124128
}
125129
{
126130
ISLEntry& test = isl.Emplace("123456789");
127-
test.SetExtendedHash(hash, 3);
131+
test.SetExtendedHash(hash, 3, 4);
128132
}
129133
{
130134
ISLEntry& test = isl.Emplace("23456789");
131-
test.SetExtendedHash(hash, 3);
135+
test.SetExtendedHash(hash, 3, 4);
132136
}
133137
{
134138
ISLEntry& test = isl.Emplace("3456789");
135-
test.SetExtendedHash(hash, 3);
139+
test.SetExtendedHash(hash, 3, 4);
136140
}
137141
{
138142
ISLEntry& test = isl.Emplace("456789");
139-
test.SetExtendedHash(hash, 3);
143+
test.SetExtendedHash(hash, 3, 4);
140144
}
141145

142146
uint32_t num_expired_fields = 0;
143147

144-
EXPECT_TRUE(isl.Find("0123456789", 0xFEDCBA9876543210ULL, 3, &num_expired_fields));
145-
EXPECT_TRUE(isl.Find("123456789", 0xFEDCBA9876543210ULL, 3, &num_expired_fields));
146-
EXPECT_TRUE(isl.Find("23456789", 0xFEDCBA9876543210ULL, 3, &num_expired_fields));
147-
EXPECT_TRUE(isl.Find("3456789", 0xFEDCBA9876543210ULL, 3, &num_expired_fields));
148-
EXPECT_TRUE(isl.Find("456789", 0xFEDCBA9876543210ULL, 3, &num_expired_fields));
148+
EXPECT_TRUE(isl.Find("0123456789", 0xFEDCBA9876543210ULL, 3, 4, &num_expired_fields));
149+
EXPECT_TRUE(isl.Find("123456789", 0xFEDCBA9876543210ULL, 3, 4, &num_expired_fields));
150+
EXPECT_TRUE(isl.Find("23456789", 0xFEDCBA9876543210ULL, 3, 4, &num_expired_fields));
151+
EXPECT_TRUE(isl.Find("3456789", 0xFEDCBA9876543210ULL, 3, 4, &num_expired_fields));
152+
EXPECT_TRUE(isl.Find("456789", 0xFEDCBA9876543210ULL, 3, 4, &num_expired_fields));
149153
}
150154

151155
TEST_F(IntrusiveStringSetTest, IntrusiveStringSetAddFindTest) {

0 commit comments

Comments
 (0)