1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_ 16 #define ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_ 17 18 #include <cstdint> 19 #include <memory> 20 #include <optional> 21 #include <string> 22 #include <unordered_map> 23 #include <unordered_set> 24 #include <vector> 25 26 #include "icing/text_classifier/lib3/utils/base/status.h" 27 #include "icing/text_classifier/lib3/utils/base/statusor.h" 28 #include "icing/monkey_test/monkey-test-util.h" 29 #include "icing/monkey_test/monkey-tokenized-document.h" 30 #include "icing/proto/document.pb.h" 31 #include "icing/proto/schema.pb.h" 32 #include "icing/proto/search.pb.h" 33 #include "icing/proto/term.pb.h" 34 #include "icing/store/document-id.h" 35 36 namespace icing { 37 namespace lib { 38 39 class InMemoryIcingSearchEngine { 40 public: 41 struct PickDocumentResult { 42 std::string name_space; 43 std::string uri; 44 // document is empty if and only if such (name_space, uri) is not alive 45 // in the in-memory icing. 46 std::optional<DocumentProto> document; 47 }; 48 InMemoryIcingSearchEngine(MonkeyTestRandomEngine * random)49 InMemoryIcingSearchEngine(MonkeyTestRandomEngine *random) : random_(random) {} 50 GetNumAliveDocuments()51 uint32_t GetNumAliveDocuments() const { return existing_doc_ids_.size(); } 52 GetSchema()53 const SchemaProto *GetSchema() const { return schema_.get(); } 54 55 void SetSchema(SchemaProto &&schema); 56 57 // Randomly pick a document from the in-memory Icing for monkey testing. 58 // 59 // p_alive: chance of getting an alive document. 60 // p_all: chance of getting a document that has ever been "Put" before, 61 // including already "Delete"d documents. 62 // p_other: chance of getting a random namespace + uri that has never been 63 // "Put" before. 64 // 65 // p_alive, p_all, and p_other is required to be positive and sum to 1. 66 // Otherwise, they will be normalized to ensure this. 67 // 68 // Returns an instance of PickDocumentResult. 69 PickDocumentResult RandomPickDocument(float p_alive, float p_all, 70 float p_other) const; 71 72 // Puts the document into the in-memory Icing. If the (namespace, uri) pair 73 // already exists, the old document will be overwritten. 74 void Put(const MonkeyTokenizedDocument &document); 75 76 std::unordered_set<std::string> GetAllNamespaces() const; 77 78 // Deletes the Document specified by the given (namespace, uri) pair. 79 // 80 // Returns: 81 // OK on success 82 // NOT_FOUND if no document exists with namespace, uri 83 libtextclassifier3::Status Delete(const std::string &name_space, 84 const std::string &uri); 85 86 // Deletes all Documents belonging to the specified namespace. 87 // 88 // Returns: 89 // The number of deleted documents on success 90 // INTERNAL_ERROR if there are inconsistencies in the in-memory Icing 91 libtextclassifier3::StatusOr<uint32_t> DeleteByNamespace( 92 const std::string &name_space); 93 94 // Deletes all Documents belonging to the specified type 95 // 96 // Returns: 97 // The number of deleted documents on success 98 // INTERNAL_ERROR if there are inconsistencies in the in-memory Icing 99 libtextclassifier3::StatusOr<uint32_t> DeleteBySchemaType( 100 const std::string &schema_type); 101 102 // Deletes all Documents that match the query specified in search_spec. 103 // Currently, only the "query" and "term_match_type" fields are recognized by 104 // the in-memory Icing, and only single term queries with possible section 105 // restrictions are supported. 106 // 107 // Returns: 108 // The number of deleted documents on success 109 // INTERNAL_ERROR if there are inconsistencies in the in-memory Icing 110 libtextclassifier3::StatusOr<uint32_t> DeleteByQuery( 111 const SearchSpecProto &search_spec); 112 113 // Retrieves documents according to search_spec. 114 // Currently, only the "query" and "term_match_type" fields are recognized by 115 // the in-memory Icing, and only single term queries with possible section 116 // restrictions are supported. 117 libtextclassifier3::StatusOr<std::vector<DocumentProto>> Search( 118 const SearchSpecProto &search_spec) const; 119 120 private: 121 // Does not own. 122 MonkeyTestRandomEngine *random_; 123 124 std::vector<MonkeyTokenizedDocument> documents_; 125 std::vector<DocumentId> existing_doc_ids_; 126 // A map from namespaces to uris and then from uris to internal document ids, 127 // which is used for fast lookups. 128 std::unordered_map<std::string, std::unordered_map<std::string, DocumentId>> 129 namespace_uri_docid_map; 130 131 std::unique_ptr<SchemaProto> schema_; 132 // A map that maps from (schema_type, property_name) to the corresponding 133 // PropertyConfigProto. 134 std::unordered_map< 135 std::string, std::unordered_map<std::string, const PropertyConfigProto &>> 136 property_config_map_; 137 138 // Finds and returns the internal document id for the document identified by 139 // the given key (namespace, uri) 140 // 141 // Returns: 142 // The document id found on success 143 // NOT_FOUND if the key doesn't exist or doc has been deleted 144 libtextclassifier3::StatusOr<DocumentId> InternalGet( 145 const std::string &name_space, const std::string &uri) const; 146 147 // A helper method for DeleteByQuery and Search to get matched internal doc 148 // ids. 149 libtextclassifier3::StatusOr<std::vector<DocumentId>> InternalSearch( 150 const SearchSpecProto &search_spec) const; 151 152 libtextclassifier3::StatusOr<const PropertyConfigProto *> GetPropertyConfig( 153 const std::string &schema_type, const std::string &property_name) const; 154 155 libtextclassifier3::StatusOr<TermMatchType::Code> GetTermMatchType( 156 const std::string &schema_type, 157 const MonkeyTokenizedSection §ion) const; 158 159 libtextclassifier3::StatusOr<bool> DoesDocumentMatchQuery( 160 const MonkeyTokenizedDocument &document, const std::string &query, 161 TermMatchType::Code term_match_type) const; 162 }; 163 164 } // namespace lib 165 } // namespace icing 166 167 #endif // ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_ 168