Skip to content

Commit b335f10

Browse files
committed
feat: json paths in search
Signed-off-by: Vladislav Oleshko <[email protected]>
1 parent a743d75 commit b335f10

File tree

7 files changed

+102
-41
lines changed

7 files changed

+102
-41
lines changed

src/core/search/search.cc

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -264,34 +264,34 @@ struct BasicSearch {
264264
} // namespace
265265

266266
FieldIndices::FieldIndices(Schema schema) : schema_{move(schema)}, all_ids_{}, indices_{} {
267-
for (auto& [field, type] : schema_.fields) {
268-
switch (type) {
269-
case Schema::TAG:
270-
indices_[field] = make_unique<TagIndex>();
267+
for (const auto& [field_name, field_info] : schema_.fields) {
268+
switch (field_info.type) {
269+
case SchemaField::TAG:
270+
indices_[field_name] = make_unique<TagIndex>();
271271
break;
272-
case Schema::TEXT:
273-
indices_[field] = make_unique<TextIndex>();
272+
case SchemaField::TEXT:
273+
indices_[field_name] = make_unique<TextIndex>();
274274
break;
275-
case Schema::NUMERIC:
276-
indices_[field] = make_unique<NumericIndex>();
275+
case SchemaField::NUMERIC:
276+
indices_[field_name] = make_unique<NumericIndex>();
277277
break;
278-
case Schema::VECTOR:
279-
indices_[field] = make_unique<VectorIndex>();
278+
case SchemaField::VECTOR:
279+
indices_[field_name] = make_unique<VectorIndex>();
280280
break;
281281
}
282282
}
283283
}
284284

285285
void FieldIndices::Add(DocId doc, DocumentAccessor* access) {
286286
for (auto& [field, index] : indices_) {
287-
index->Add(doc, access, field);
287+
index->Add(doc, access, schema_.fields[field].identifier);
288288
}
289289
all_ids_.insert(upper_bound(all_ids_.begin(), all_ids_.end(), doc), doc);
290290
}
291291

292292
void FieldIndices::Remove(DocId doc, DocumentAccessor* access) {
293293
for (auto& [field, index] : indices_) {
294-
index->Remove(doc, access, field);
294+
index->Remove(doc, access, schema_.fields[field].identifier);
295295
}
296296
auto it = lower_bound(all_ids_.begin(), all_ids_.end(), doc);
297297
CHECK(it != all_ids_.end() && *it == doc);
@@ -305,10 +305,10 @@ BaseIndex* FieldIndices::GetIndex(string_view field) const {
305305

306306
std::vector<TextIndex*> FieldIndices::GetAllTextIndices() const {
307307
vector<TextIndex*> out;
308-
for (auto& [field, type] : schema_.fields) {
309-
if (type != Schema::TEXT)
308+
for (auto& [field_name, field_info] : schema_.fields) {
309+
if (field_info.type != SchemaField::TEXT)
310310
continue;
311-
auto* index = dynamic_cast<TextIndex*>(GetIndex(field));
311+
auto* index = dynamic_cast<TextIndex*>(GetIndex(field_name));
312312
DCHECK(index);
313313
out.push_back(index);
314314
}

src/core/search/search.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,15 @@ namespace dfly::search {
1919
struct AstNode;
2020
struct TextIndex;
2121

22-
struct Schema {
22+
struct SchemaField {
2323
enum FieldType { TAG, TEXT, NUMERIC, VECTOR };
2424

25-
absl::flat_hash_map<std::string, FieldType> fields;
25+
std::string identifier;
26+
FieldType type;
27+
};
28+
29+
struct Schema {
30+
absl::flat_hash_map<std::string, SchemaField> fields;
2631
};
2732

2833
// Collection of indices for all fields in schema

src/server/search/doc_accessors.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <absl/strings/str_join.h>
99

1010
#include <jsoncons/json.hpp>
11+
#include <jsoncons_ext/jsonpath/jsonpath.hpp>
1112

1213
#include "core/json_object.h"
1314
#include "core/search/search.h"
@@ -55,7 +56,7 @@ SearchDocData ListPackAccessor::Serialize(search::Schema schema) const {
5556
string_view v = container_utils::LpGetView(fptr, intbuf_[1].data());
5657
fptr = lpNext(lp_, fptr);
5758

58-
if (schema.fields.at(k) == search::Schema::VECTOR)
59+
if (schema.fields.at(k).type == search::SchemaField::VECTOR)
5960
out[k] = FtVectorToString(GetVector(k));
6061
else
6162
out[k] = v;
@@ -78,7 +79,7 @@ SearchDocData StringMapAccessor::Serialize(search::Schema schema) const {
7879
string_view k = SdsToSafeSv(kptr);
7980
string_view v = SdsToSafeSv(vptr);
8081

81-
if (schema.fields.at(k) == search::Schema::VECTOR)
82+
if (schema.fields.at(k).type == search::SchemaField::VECTOR)
8283
out[k] = FtVectorToString(GetVector(k));
8384
else
8485
out[k] = v;
@@ -88,7 +89,11 @@ SearchDocData StringMapAccessor::Serialize(search::Schema schema) const {
8889
}
8990

9091
string_view JsonAccessor::GetString(string_view active_field) const {
91-
buf_ = json_->get_value_or<string>(active_field, string{});
92+
error_code ec;
93+
auto path = jsoncons::jsonpath::make_expression<JsonType>(active_field, ec);
94+
DCHECK(!ec);
95+
96+
buf_ = path.evaluate(*json_).as<string>();
9297
return buf_;
9398
}
9499

src/server/search/doc_accessors.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ struct JsonAccessor : public BaseAccessor {
6363
SearchDocData Serialize(search::Schema schema) const override;
6464

6565
private:
66-
mutable std::string buf_;
6766
JsonType* json_;
67+
68+
mutable std::string buf_;
6869
};
6970

7071
// Get accessor for value

src/server/search/search_family.cc

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include <atomic>
1111
#include <jsoncons/json.hpp>
12+
#include <jsoncons_ext/jsonpath/jsonpath.hpp>
1213
#include <variant>
1314
#include <vector>
1415

@@ -31,16 +32,17 @@ using namespace facade;
3132

3233
namespace {
3334

34-
const absl::flat_hash_map<string_view, search::Schema::FieldType> kSchemaTypes = {
35-
{"TAG"sv, search::Schema::TAG},
36-
{"TEXT"sv, search::Schema::TEXT},
37-
{"NUMERIC"sv, search::Schema::NUMERIC},
38-
{"VECTOR"sv, search::Schema::VECTOR}};
35+
const absl::flat_hash_map<string_view, search::SchemaField::FieldType> kSchemaTypes = {
36+
{"TAG"sv, search::SchemaField::TAG},
37+
{"TEXT"sv, search::SchemaField::TEXT},
38+
{"NUMERIC"sv, search::SchemaField::NUMERIC},
39+
{"VECTOR"sv, search::SchemaField::VECTOR}};
3940

4041
static const set<string_view> kIgnoredOptions = {"WEIGHT", "SEPARATOR", "TYPE", "DIM",
4142
"DISTANCE_METRIC"};
4243

43-
optional<search::Schema> ParseSchemaOrReply(CmdArgList args, ConnectionContext* cntx) {
44+
optional<search::Schema> ParseSchemaOrReply(DocIndex::DataType type, CmdArgList args,
45+
ConnectionContext* cntx) {
4446
search::Schema schema;
4547
for (size_t i = 0; i < args.size(); i++) {
4648
string_view field = ArgS(args, i);
@@ -49,6 +51,31 @@ optional<search::Schema> ParseSchemaOrReply(CmdArgList args, ConnectionContext*
4951
return nullopt;
5052
}
5153

54+
// Verify json path is correct
55+
if (type == DocIndex::JSON) {
56+
error_code ec;
57+
jsoncons::jsonpath::make_expression<JsonType>(field, ec);
58+
if (ec) {
59+
(*cntx)->SendError("Bad json path: " + string{field});
60+
return nullopt;
61+
}
62+
}
63+
64+
// AS [alias]
65+
string_view field_alias = field; // by default "alias" is same as identifier
66+
if (absl::AsciiStrToUpper(ArgS(args, i)) == "AS") {
67+
if (i++ >= args.size()) {
68+
(*cntx)->SendError("Expected attribute for field: " + string{field});
69+
return nullopt;
70+
}
71+
field_alias = ArgS(args, i);
72+
73+
if (i++ >= args.size()) {
74+
(*cntx)->SendError("No field type for field: " + string{field});
75+
return nullopt;
76+
}
77+
}
78+
5279
ToUpper(&args[i]);
5380
string_view type_str = ArgS(args, i);
5481
auto it = kSchemaTypes.find(type_str);
@@ -58,15 +85,15 @@ optional<search::Schema> ParseSchemaOrReply(CmdArgList args, ConnectionContext*
5885
}
5986

6087
// Skip {algorithm} {dim} flags
61-
if (it->second == search::Schema::VECTOR)
88+
if (it->second == search::SchemaField::VECTOR)
6289
i += 2;
6390

6491
// Skip all trailing ignored parameters
6592
while (i + 2 < args.size() && kIgnoredOptions.count(ArgS(args, i + 1)) > 0) {
6693
i += 2;
6794
}
6895

69-
schema.fields[field] = it->second;
96+
schema.fields[field_alias] = {string{field}, it->second};
7097
}
7198

7299
return schema;
@@ -174,7 +201,7 @@ void ReplyKnn(size_t knn_limit, const SearchParams& params, absl::Span<SearchRes
174201
}
175202
}
176203

177-
string_view GetSchemaTypeName(search::Schema::FieldType type) {
204+
string_view GetSchemaTypeName(search::SchemaField::FieldType type) {
178205
for (const auto& [iname, itype] : kSchemaTypes) {
179206
if (itype == type)
180207
return iname;
@@ -225,7 +252,7 @@ void SearchFamily::FtCreate(CmdArgList args, ConnectionContext* cntx) {
225252
if (i++ >= args.size())
226253
return (*cntx)->SendError("Empty schema");
227254

228-
auto schema = ParseSchemaOrReply(args.subspan(i), cntx);
255+
auto schema = ParseSchemaOrReply(index.type, args.subspan(i), cntx);
229256
if (!schema)
230257
return;
231258
index.schema = move(*schema);
@@ -278,7 +305,7 @@ void SearchFamily::FtInfo(CmdArgList args, ConnectionContext* cntx) {
278305
if (num_notfound > 0u)
279306
return (*cntx)->SendError("Unknown index name");
280307

281-
DCHECK(infos.front().schema.fields == infos.back().schema.fields);
308+
DCHECK_EQ(infos.front().schema.fields.size(), infos.back().schema.fields.size());
282309

283310
size_t total_num_docs = 0;
284311
for (const auto& info : infos)
@@ -292,8 +319,10 @@ void SearchFamily::FtInfo(CmdArgList args, ConnectionContext* cntx) {
292319
(*cntx)->SendSimpleString("fields");
293320
const auto& fields = infos.front().schema.fields;
294321
(*cntx)->StartArray(fields.size());
295-
for (auto [field, type] : fields) {
296-
string_view reply[3] = {string_view{field}, "type"sv, GetSchemaTypeName(type)};
322+
for (const auto& [field_name, field_info] : fields) {
323+
string_view reply[6] = {"identifier", string_view{field_info.identifier},
324+
"attribute", string_view{field_name},
325+
"type"sv, GetSchemaTypeName(field_info.type)};
297326
(*cntx)->SendSimpleStrArr(reply);
298327
}
299328

src/server/search/search_family_test.cc

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,11 @@ TEST_F(SearchFamilyTest, InfoIndex) {
8989
}
9090

9191
auto info = Run({"ft.info", "idx-1"});
92-
EXPECT_THAT(info, RespArray(ElementsAre(
93-
_, _, "fields",
94-
RespArray(ElementsAre(RespArray(ElementsAre("name", "type", "TEXT")))),
95-
"num_docs", IntArg(15))));
92+
EXPECT_THAT(
93+
info, RespArray(ElementsAre(_, _, "fields",
94+
RespArray(ElementsAre(RespArray(ElementsAre(
95+
"identifier", "name", "attribute", "name", "type", "TEXT")))),
96+
"num_docs", IntArg(15))));
9697
}
9798

9899
TEST_F(SearchFamilyTest, Simple) {
@@ -132,7 +133,9 @@ TEST_F(SearchFamilyTest, Json) {
132133
Run({"json.set", "k2", ".", R"({"a": "another test", "b": "more details"})"});
133134
Run({"json.set", "k3", ".", R"({"a": "last test", "b": "secret details"})"});
134135

135-
EXPECT_EQ(Run({"ft.create", "i1", "on", "json", "schema", "a", "text", "b", "text"}), "OK");
136+
EXPECT_EQ(Run({"ft.create", "i1", "on", "json", "schema", "$.a", "as", "a", "text", "$.b", "as",
137+
"b", "text"}),
138+
"OK");
136139

137140
EXPECT_THAT(Run({"ft.search", "i1", "some|more"}), AreDocIds("k1", "k2"));
138141
EXPECT_THAT(Run({"ft.search", "i1", "some|more|secret"}), AreDocIds("k1", "k2", "k3"));
@@ -145,6 +148,18 @@ TEST_F(SearchFamilyTest, Json) {
145148
EXPECT_THAT(Run({"ft.search", "i1", "@a:small @b:secret"}), kNoResults);
146149
}
147150

151+
TEST_F(SearchFamilyTest, AttributesJsonPaths) {
152+
Run({"json.set", "k1", ".", R"( {"nested": {"value": "no"}} )"});
153+
Run({"json.set", "k2", ".", R"( {"nested": {"value": "yes"}} )"});
154+
Run({"json.set", "k3", ".", R"( {"nested": {"value": "maybe"}} )"});
155+
156+
EXPECT_EQ(
157+
Run({"ft.create", "i1", "on", "json", "schema", "$.nested.value", "as", "value", "text"}),
158+
"OK");
159+
160+
EXPECT_THAT(Run({"ft.search", "i1", "yes"}), AreDocIds("k2"));
161+
}
162+
148163
TEST_F(SearchFamilyTest, Tags) {
149164
Run({"hset", "d:1", "color", "red, green"});
150165
Run({"hset", "d:2", "color", "green, blue"});

tests/dragonfly/search_test.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,17 @@ async def test_management(async_client: aioredis.Redis):
101101

102102
i1info = await i1.info()
103103
assert i1info["num_docs"] == 10
104-
assert sorted(i1info["fields"]) == [["f1", "type", "TEXT"], ["f2", "type", "NUMERIC"]]
104+
assert sorted(i1info["fields"]) == [
105+
["identifier", "f1", "attribute", "f1", "type", "TEXT"],
106+
["identifier", "f2", "attribute", "f2", "type", "NUMERIC"],
107+
]
105108

106109
i2info = await i2.info()
107110
assert i2info["num_docs"] == 15
108-
assert sorted(i2info["fields"]) == [["f3", "type", "NUMERIC"], ["f4", "type", "TAG"]]
111+
assert sorted(i2info["fields"]) == [
112+
["identifier", "f3", "attribute", "f3", "type", "NUMERIC"],
113+
["identifier", "f4", "attribute", "f4", "type", "TAG"],
114+
]
109115

110116
await i1.dropindex()
111117
await i2.dropindex()

0 commit comments

Comments
 (0)