diff --git a/src/core/search/CMakeLists.txt b/src/core/search/CMakeLists.txt index adbcd1257a22..7c4b03d1b406 100644 --- a/src/core/search/CMakeLists.txt +++ b/src/core/search/CMakeLists.txt @@ -13,4 +13,4 @@ cxx_test(compressed_sorted_set_test query_parser LABELS DFLY) cxx_test(block_list_test query_parser LABELS DFLY) cxx_test(rax_tree_test redis_test_lib LABELS DFLY) cxx_test(search_parser_test query_parser LABELS DFLY) -cxx_test(search_test query_parser LABELS DFLY) +cxx_test(search_test redis_test_lib query_parser LABELS DFLY) diff --git a/src/core/search/ast_expr.cc b/src/core/search/ast_expr.cc index 767e8797877f..86c200c3e24f 100644 --- a/src/core/search/ast_expr.cc +++ b/src/core/search/ast_expr.cc @@ -16,7 +16,11 @@ using namespace std; namespace dfly::search { -AstTermNode::AstTermNode(string term) : term{term} { +AstTermNode::AstTermNode(string term) : term{std::move(term)} { +} + +AstPrefixNode::AstPrefixNode(string prefix) : prefix{std::move(prefix)} { + this->prefix.pop_back(); } AstRangeNode::AstRangeNode(double lo, bool lo_excl, double hi, bool hi_excl) diff --git a/src/core/search/ast_expr.h b/src/core/search/ast_expr.h index d302e599cc6e..ef1663375bae 100644 --- a/src/core/search/ast_expr.h +++ b/src/core/search/ast_expr.h @@ -24,11 +24,17 @@ struct AstStarNode {}; // Matches terms in text fields struct AstTermNode { - AstTermNode(std::string term); + explicit AstTermNode(std::string term); std::string term; }; +struct AstPrefixNode { + explicit AstPrefixNode(std::string prefix); + + std::string prefix; +}; + // Matches numeric range struct AstRangeNode { AstRangeNode(double lo, bool lo_excl, double hi, bool hi_excl); @@ -97,8 +103,8 @@ struct AstSortNode { }; using NodeVariants = - std::variant; + std::variant; struct AstNode : public NodeVariants { using variant::variant; diff --git a/src/core/search/indices.cc b/src/core/search/indices.cc index 01121d552eff..3f7939d8f649 100644 --- a/src/core/search/indices.cc +++ b/src/core/search/indices.cc @@ -123,6 +123,15 @@ const typename BaseStringIndex::Container* BaseStringIndex::Matching(strin return (it != entries_.end()) ? &it->second : nullptr; } +template +void BaseStringIndex::MatchingPrefix(std::string_view prefix, + absl::FunctionRef cb) const { + for (auto it = entries_.lower_bound(prefix); + it != entries_.end() && (*it).first.rfind(prefix, 0) == 0; ++it) { + cb(&(*it).second); + } +} + template typename BaseStringIndex::Container* BaseStringIndex::GetOrCreate(string_view word) { auto* mr = entries_.get_allocator().resource(); diff --git a/src/core/search/indices.h b/src/core/search/indices.h index 84bedd8eb3a1..61c8a6a01853 100644 --- a/src/core/search/indices.h +++ b/src/core/search/indices.h @@ -11,10 +11,12 @@ #include #include +#include "absl/functional/function_ref.h" #include "base/pmr/memory_resource.h" #include "core/search/base.h" #include "core/search/block_list.h" #include "core/search/compressed_sorted_set.h" +#include "core/search/rax_tree.h" // TODO: move core field definitions out of big header #include "core/search/search.h" @@ -51,37 +53,17 @@ template struct BaseStringIndex : public BaseIndex { // Pointer is valid as long as index is not mutated. Nullptr if not found const Container* Matching(std::string_view str) const; + // Iterate over all Machting on prefix. + void MatchingPrefix(std::string_view prefix, absl::FunctionRef cb) const; + // Returns all the terms that appear as keys in the reverse index. std::vector GetTerms() const; protected: Container* GetOrCreate(std::string_view word); - struct PmrEqual { - using is_transparent = void; - bool operator()(const PMR_NS::string& lhs, const PMR_NS::string& rhs) const { - return lhs == rhs; - } - bool operator()(const PMR_NS::string& lhs, const std::string_view& rhs) const { - return lhs == rhs; - } - }; - - struct PmrHash { - using is_transparent = void; - size_t operator()(const std::string_view& sv) const { - return absl::Hash()(sv); - } - size_t operator()(const PMR_NS::string& pmrs) const { - return operator()(std::string_view{pmrs.data(), pmrs.size()}); - } - }; - bool case_sensitive_ = false; - - absl::flat_hash_map>> - entries_; + search::RaxTreeMap entries_; }; // Index for text fields. diff --git a/src/core/search/lexer.lex b/src/core/search/lexer.lex index 3fa6d1713992..4dd9936f6877 100644 --- a/src/core/search/lexer.lex +++ b/src/core/search/lexer.lex @@ -74,6 +74,7 @@ tag_val_char {term_char}|\\[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ] "$"{term_char}+ return ParseParam(str(), loc()); "@"{term_char}+ return Parser::make_FIELD(str(), loc()); +{term_char}+"*" return Parser::make_PREFIX(str(), loc()); {term_char}+ return Parser::make_TERM(str(), loc()); {tag_val_char}+ return make_TagVal(str(), loc()); diff --git a/src/core/search/parser.y b/src/core/search/parser.y index 42e5113b74e5..b831d85e2c0e 100644 --- a/src/core/search/parser.y +++ b/src/core/search/parser.y @@ -69,7 +69,7 @@ double toDouble(string_view src); // Needed 0 at the end to satisfy bison 3.5.1 %token YYEOF 0 -%token TERM "term" TAG_VAL "tag_val" PARAM "param" FIELD "field" +%token TERM "term" TAG_VAL "tag_val" PARAM "param" FIELD "field" PREFIX "prefix" %precedence TERM TAG_VAL %left OR_OP @@ -132,6 +132,7 @@ search_unary_expr: LPAREN search_expr RPAREN { $$ = std::move($2); } | NOT_OP search_unary_expr { $$ = AstNegateNode(std::move($2)); } | TERM { $$ = AstTermNode(std::move($1)); } + | PREFIX { $$ = AstPrefixNode(std::move($1)); } | UINT32 { $$ = AstTermNode(std::move($1)); } | FIELD COLON field_cond { $$ = AstFieldNode(std::move($1), std::move($3)); } diff --git a/src/core/search/rax_tree.h b/src/core/search/rax_tree.h index d21273aea4b0..267a07680692 100644 --- a/src/core/search/rax_tree.h +++ b/src/core/search/rax_tree.h @@ -1,10 +1,9 @@ #pragma once -#include - -#include +#include #include #include +#include #include "base/pmr/memory_resource.h" @@ -17,6 +16,7 @@ namespace dfly::search { // absl::flat_hash_map/std::unordered_map compatible tree map based on rax tree. // Allocates all objects on heap (with custom memory resource) as rax tree operates fully on // pointers. +// TODO: Add full support for polymorphic allocators, including rax trie node allocations template struct RaxTreeMap { struct FindIterator; @@ -87,7 +87,7 @@ template struct RaxTreeMap { }; public: - explicit RaxTreeMap(PMR_NS::memory_resource* mr) : tree_(raxNew()), mr_(mr) { + explicit RaxTreeMap(PMR_NS::memory_resource* mr) : tree_(raxNew()), alloc_(mr) { } size_t size() const { @@ -119,7 +119,12 @@ template struct RaxTreeMap { V* old = nullptr; raxRemove(tree_, to_key_ptr(it->first.data()), it->first.size(), reinterpret_cast(&old)); - mr_->deallocate(old, sizeof(V), alignof(V)); + alloc_.destroy(old); + alloc_.deallocate(old, 1); + } + + auto& get_allocator() const { + return alloc_; } private: @@ -128,7 +133,7 @@ template struct RaxTreeMap { } rax* tree_; - PMR_NS::memory_resource* mr_; + PMR_NS::polymorphic_allocator alloc_; }; template @@ -138,15 +143,14 @@ std::pair::FindIterator, bool> RaxTreeMap::try_emplace if (auto it = find(key); it) return {it, false}; - void* ptr = mr_->allocate(sizeof(V), alignof(V)); - V* data = new (ptr) V(std::forward(args)...); - assert(uint64_t(ptr) == uint64_t(data)); // we free by the latter + V* ptr = alloc_.allocate(1); + alloc_.construct(ptr, std::forward(args)...); V* old = nullptr; - raxInsert(tree_, to_key_ptr(key), key.size(), data, reinterpret_cast(&old)); + raxInsert(tree_, to_key_ptr(key), key.size(), ptr, reinterpret_cast(&old)); assert(old == nullptr); - auto it = std::make_optional(std::pair(key, *data)); + auto it = std::make_optional(std::pair(key, *ptr)); return std::make_pair(FindIterator{it}, true); } diff --git a/src/core/search/search.cc b/src/core/search/search.cc index 2ce14b97f2f2..a378cb80dc86 100644 --- a/src/core/search/search.cc +++ b/src/core/search/search.cc @@ -119,6 +119,7 @@ struct ProfileBuilder { Overloaded node_info{ [](monostate) -> string { return ""s; }, [](const AstTermNode& n) { return absl::StrCat("Term{", n.term, "}"); }, + [](const AstPrefixNode& n) { return absl::StrCat("Prefix{", n.prefix, "}"); }, [](const AstRangeNode& n) { return absl::StrCat("Range{", n.lo, "<>", n.hi, "}"); }, [](const AstLogicalNode& n) { auto op = n.op == AstLogicalNode::AND ? "and" : "or"; @@ -270,6 +271,28 @@ struct BasicSearch { return UnifyResults(GetSubResults(selected_indices, mapping), LogicOp::OR); } + IndexResult Search(const AstPrefixNode& node, string_view active_field) { + vector indices; + if (!active_field.empty()) { + if (auto* index = GetIndex(active_field); index) + indices = {index}; + else + return IndexResult{}; + } else { + indices = indices_->GetAllTextIndices(); + } + + auto mapping = [&node, this](TextIndex* index) { + IndexResult result{}; + index->MatchingPrefix(node.prefix, [&result, this](const auto* c) { + Merge(IndexResult{c}, &result, LogicOp::OR); + }); + return result; + }; + + return UnifyResults(GetSubResults(indices, mapping), LogicOp::OR); + } + // [range]: access field's numeric index IndexResult Search(const AstRangeNode& node, string_view active_field) { DCHECK(!active_field.empty()); diff --git a/src/core/search/search_parser_test.cc b/src/core/search/search_parser_test.cc index e32fc3fa3ef9..93baef9341ae 100644 --- a/src/core/search/search_parser_test.cc +++ b/src/core/search/search_parser_test.cc @@ -73,8 +73,10 @@ TEST_F(SearchParserTest, Scanner) { NEXT_EQ(TOK_TERM, string, "cd"); NEXT_TOK(TOK_YYEOF); - SetInput("(5a 6) "); + SetInput("*"); + NEXT_TOK(TOK_STAR); + SetInput("(5a 6) "); NEXT_TOK(TOK_LPAREN); NEXT_EQ(TOK_TERM, string, "5a"); NEXT_EQ(TOK_UINT32, string, "6"); @@ -151,6 +153,36 @@ TEST_F(SearchParserTest, Scanner) { NEXT_EQ(TOK_TAG_VAL, string, "blue]1#-"); NEXT_TOK(TOK_RCURLBR); + // Prefix simple + SetInput("pre*"); + NEXT_EQ(TOK_PREFIX, string, "pre*"); + + // TODO: uncomment when we support escaped terms + // Prefix escaped (redis doesn't support quoted prefix matches) + // SetInput("pre\\**"); + // NEXT_EQ(TOK_PREFIX, string, "pre*"); + + // Prefix in tag + SetInput("@color:{prefix*}"); + NEXT_EQ(TOK_FIELD, string, "@color"); + NEXT_TOK(TOK_COLON); + NEXT_TOK(TOK_LCURLBR); + NEXT_EQ(TOK_PREFIX, string, "prefix*"); + NEXT_TOK(TOK_RCURLBR); + + // Prefix escaped star + SetInput("@color:{\"prefix*\"}"); + NEXT_EQ(TOK_FIELD, string, "@color"); + NEXT_TOK(TOK_COLON); + NEXT_TOK(TOK_LCURLBR); + NEXT_EQ(TOK_TERM, string, "prefix*"); + NEXT_TOK(TOK_RCURLBR); + + // Prefix spaced with star + SetInput("pre *"); + NEXT_EQ(TOK_TERM, string, "pre"); + NEXT_TOK(TOK_STAR); + SetInput("почтальон Печкин"); NEXT_EQ(TOK_TERM, string, "почтальон"); NEXT_EQ(TOK_TERM, string, "Печкин"); @@ -172,6 +204,12 @@ TEST_F(SearchParserTest, Parse) { EXPECT_EQ(1, Parse(" foo:bar ")); EXPECT_EQ(1, Parse(" @foo:@bar ")); EXPECT_EQ(1, Parse(" @foo: ")); + + // We don't support suffix/any other position for now + EXPECT_EQ(1, Parse("*pre")); + EXPECT_EQ(1, Parse("*pre*")); + + EXPECT_EQ(1, Parse("pre***")); } TEST_F(SearchParserTest, ParseParams) { diff --git a/src/core/search/search_test.cc b/src/core/search/search_test.cc index a5bc5495acc9..f61eb37e7661 100644 --- a/src/core/search/search_test.cc +++ b/src/core/search/search_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,10 @@ #include "core/search/query_driver.h" #include "core/search/vector_utils.h" +extern "C" { +#include "redis/zmalloc.h" +} + namespace dfly { namespace search { @@ -80,6 +85,11 @@ Schema MakeSimpleSchema(initializer_list