1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_ 16 #define ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_ 17 18 #include <cstdint> 19 #include <memory> 20 #include <string> 21 #include <string_view> 22 23 #include "icing/text_classifier/lib3/utils/base/status.h" 24 #include "icing/text_classifier/lib3/utils/base/statusor.h" 25 #include "icing/file/persistent-storage.h" 26 #include "icing/index/iterator/doc-hit-info-iterator.h" 27 #include "icing/schema/schema-store.h" 28 #include "icing/schema/section.h" 29 #include "icing/store/document-id.h" 30 #include "icing/store/document-store.h" 31 32 namespace icing { 33 namespace lib { 34 35 template <typename T> 36 class NumericIndex : public PersistentStorage { 37 public: 38 using value_type = T; 39 40 // Editor class for batch adding new records into numeric index for a given 41 // property, DocumentId and SectionId. The caller should use BufferKey to 42 // buffer a key (calls several times for multiple keys) and finally call 43 // IndexAllBufferedKeys to batch add all buffered keys (with DocumentId + 44 // SectionId info, i.e. BasicHit) into numeric index. 45 // 46 // For example, there are values = [5, 1, 10, -100] in DocumentId = 5, 47 // SectionId = 1 (property "timestamp"). 48 // Then the client should call BufferKey(5), BufferKey(1), BufferKey(10), 49 // BufferKey(-100) first, and finally call IndexAllBufferedKeys once to batch 50 // add these records into numeric index. 51 class Editor { 52 public: Editor(std::string_view property_path,DocumentId document_id,SectionId section_id)53 explicit Editor(std::string_view property_path, DocumentId document_id, 54 SectionId section_id) 55 : property_path_(property_path), 56 document_id_(document_id), 57 section_id_(section_id) {} 58 59 virtual ~Editor() = default; 60 61 // Buffers a new key. 62 // 63 // Returns: 64 // - OK on success 65 // - Any other errors, depending on the actual implementation 66 virtual libtextclassifier3::Status BufferKey(T key) = 0; 67 68 // Adds all buffered keys into numeric index. 69 // 70 // Returns: 71 // - OK on success 72 // - Any other errors, depending on the actual implementation 73 virtual libtextclassifier3::Status IndexAllBufferedKeys() && = 0; 74 75 protected: 76 std::string property_path_; 77 DocumentId document_id_; 78 SectionId section_id_; 79 }; 80 81 // Iterator class for numeric index range query [key_lower, key_upper] 82 // (inclusive for both side) on a given property (see GetIterator). There are 83 // some basic requirements for implementation: 84 // - Iterates through all relevant doc hits. 85 // - Merges multiple SectionIds of doc hits with same DocumentId into a single 86 // SectionIdMask and constructs DocHitInfo. 87 // - Returns DocHitInfo in descending DocumentId order. 88 // 89 // For example, relevant doc hits (DocumentId, SectionId) are [(2, 0), (4, 3), 90 // (2, 1), (6, 2), (4, 2)]. Advance() and GetDocHitInfo() should return 91 // DocHitInfo(6, SectionIdMask(2)), DocHitInfo(4, SectionIdMask(2, 3)) and 92 // DocHitInfo(2, SectionIdMask(0, 1)). 93 class Iterator { 94 public: Iterator(T key_lower,T key_upper)95 explicit Iterator(T key_lower, T key_upper) 96 : key_lower_(key_lower), key_upper_(key_upper) {} 97 98 virtual ~Iterator() = default; 99 100 virtual libtextclassifier3::Status Advance() = 0; 101 102 virtual DocHitInfo GetDocHitInfo() const = 0; 103 104 virtual int32_t GetNumAdvanceCalls() const = 0; 105 106 virtual int32_t GetNumBlocksInspected() const = 0; 107 108 protected: 109 T key_lower_; 110 T key_upper_; 111 }; 112 113 virtual ~NumericIndex() = default; 114 115 // Returns an Editor instance for adding new records into numeric index for a 116 // given property, DocumentId and SectionId. See Editor for more details. 117 virtual std::unique_ptr<Editor> Edit(std::string_view property_path, 118 DocumentId document_id, 119 SectionId section_id) = 0; 120 121 // Returns a DocHitInfoIteratorNumeric (in DocHitInfoIterator interface type 122 // format) for iterating through all docs which have the specified (numeric) 123 // property contents in range [key_lower, key_upper]. 124 // 125 // In general, different numeric index implementations require different data 126 // iterator implementations, so class Iterator is an abstraction of the data 127 // iterator and DocHitInfoIteratorNumeric can work with any implementation of 128 // it. See Iterator and DocHitInfoIteratorNumeric for more details. 129 // 130 // Returns: 131 // - std::unique_ptr<DocHitInfoIterator> on success 132 // - NOT_FOUND_ERROR if there is no numeric index for property_path 133 // - INVALID_ARGUMENT_ERROR if key_lower > key_upper 134 // - Any other errors, depending on the actual implementation 135 virtual libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> 136 GetIterator(std::string_view property_path, T key_lower, T key_upper, 137 const DocumentStore& document_store, 138 const SchemaStore& schema_store, 139 int64_t current_time_ms) const = 0; 140 141 // Reduces internal file sizes by reclaiming space and ids of deleted 142 // documents. Numeric index will convert all data (hits) to the new document 143 // ids and regenerate all index files. If all data in a property path are 144 // completely deleted, then the underlying storage must be discarded as well. 145 // 146 // - document_id_old_to_new: a map for converting old document id to new 147 // document id. 148 // - new_last_added_document_id: will be used to update the last added 149 // document id in the numeric index. 150 // 151 // Returns: 152 // - OK on success 153 // - Any other errors, depending on the actual implementation 154 virtual libtextclassifier3::Status Optimize( 155 const std::vector<DocumentId>& document_id_old_to_new, 156 DocumentId new_last_added_document_id) = 0; 157 158 // Clears all data in the integer index and set last_added_document_id to 159 // kInvalidDocumentId. 160 // 161 // Returns: 162 // - OK on success 163 // - Any other errors, depending on the actual implementation 164 virtual libtextclassifier3::Status Clear() = 0; 165 166 // Returns the largest document_id added to the index. Note that DocumentIds 167 // are always inserted in increasing order. 168 virtual DocumentId last_added_document_id() const = 0; 169 170 // Sets last_added_document_id to document_id so long as document_id > 171 // last_added_document_id() or last_added_document_id() is invalid. 172 virtual void set_last_added_document_id(DocumentId document_id) = 0; 173 174 // The number of individual indices that the NumericIndex has created to 175 // search over all indexed properties thus far. 176 virtual int num_property_indices() const = 0; 177 178 protected: NumericIndex(const Filesystem & filesystem,std::string && working_path,PersistentStorage::WorkingPathType working_path_type)179 explicit NumericIndex(const Filesystem& filesystem, 180 std::string&& working_path, 181 PersistentStorage::WorkingPathType working_path_type) 182 : PersistentStorage(filesystem, std::move(working_path), 183 working_path_type) {} 184 185 virtual libtextclassifier3::Status PersistStoragesToDisk( 186 bool force) override = 0; 187 188 virtual libtextclassifier3::Status PersistMetadataToDisk( 189 bool force) override = 0; 190 191 virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum( 192 bool force) override = 0; 193 194 virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum( 195 bool force) override = 0; 196 197 virtual Crcs& crcs() override = 0; 198 virtual const Crcs& crcs() const override = 0; 199 }; 200 201 } // namespace lib 202 } // namespace icing 203 204 #endif // ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_ 205