• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
16 #define ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/file/persistent-storage.h"
26 #include "icing/index/iterator/doc-hit-info-iterator.h"
27 #include "icing/schema/schema-store.h"
28 #include "icing/schema/section.h"
29 #include "icing/store/document-id.h"
30 #include "icing/store/document-store.h"
31 
32 namespace icing {
33 namespace lib {
34 
35 template <typename T>
36 class NumericIndex : public PersistentStorage {
37  public:
38   using value_type = T;
39 
40   // Editor class for batch adding new records into numeric index for a given
41   // property, DocumentId and SectionId. The caller should use BufferKey to
42   // buffer a key (calls several times for multiple keys) and finally call
43   // IndexAllBufferedKeys to batch add all buffered keys (with DocumentId +
44   // SectionId info, i.e. BasicHit) into numeric index.
45   //
46   // For example, there are values = [5, 1, 10, -100] in DocumentId = 5,
47   // SectionId = 1 (property "timestamp").
48   // Then the client should call BufferKey(5), BufferKey(1), BufferKey(10),
49   // BufferKey(-100) first, and finally call IndexAllBufferedKeys once to batch
50   // add these records into numeric index.
51   class Editor {
52    public:
Editor(std::string_view property_path,DocumentId document_id,SectionId section_id)53     explicit Editor(std::string_view property_path, DocumentId document_id,
54                     SectionId section_id)
55         : property_path_(property_path),
56           document_id_(document_id),
57           section_id_(section_id) {}
58 
59     virtual ~Editor() = default;
60 
61     // Buffers a new key.
62     //
63     // Returns:
64     //   - OK on success
65     //   - Any other errors, depending on the actual implementation
66     virtual libtextclassifier3::Status BufferKey(T key) = 0;
67 
68     // Adds all buffered keys into numeric index.
69     //
70     // Returns:
71     //   - OK on success
72     //   - Any other errors, depending on the actual implementation
73     virtual libtextclassifier3::Status IndexAllBufferedKeys() && = 0;
74 
75    protected:
76     std::string property_path_;
77     DocumentId document_id_;
78     SectionId section_id_;
79   };
80 
81   // Iterator class for numeric index range query [key_lower, key_upper]
82   // (inclusive for both side) on a given property (see GetIterator). There are
83   // some basic requirements for implementation:
84   // - Iterates through all relevant doc hits.
85   // - Merges multiple SectionIds of doc hits with same DocumentId into a single
86   //   SectionIdMask and constructs DocHitInfo.
87   // - Returns DocHitInfo in descending DocumentId order.
88   //
89   // For example, relevant doc hits (DocumentId, SectionId) are [(2, 0), (4, 3),
90   // (2, 1), (6, 2), (4, 2)]. Advance() and GetDocHitInfo() should return
91   // DocHitInfo(6, SectionIdMask(2)), DocHitInfo(4, SectionIdMask(2, 3)) and
92   // DocHitInfo(2, SectionIdMask(0, 1)).
93   class Iterator {
94    public:
Iterator(T key_lower,T key_upper)95     explicit Iterator(T key_lower, T key_upper)
96         : key_lower_(key_lower), key_upper_(key_upper) {}
97 
98     virtual ~Iterator() = default;
99 
100     virtual libtextclassifier3::Status Advance() = 0;
101 
102     virtual DocHitInfo GetDocHitInfo() const = 0;
103 
104     virtual int32_t GetNumAdvanceCalls() const = 0;
105 
106     virtual int32_t GetNumBlocksInspected() const = 0;
107 
108    protected:
109     T key_lower_;
110     T key_upper_;
111   };
112 
113   virtual ~NumericIndex() = default;
114 
115   // Returns an Editor instance for adding new records into numeric index for a
116   // given property, DocumentId and SectionId. See Editor for more details.
117   virtual std::unique_ptr<Editor> Edit(std::string_view property_path,
118                                        DocumentId document_id,
119                                        SectionId section_id) = 0;
120 
121   // Returns a DocHitInfoIteratorNumeric (in DocHitInfoIterator interface type
122   // format) for iterating through all docs which have the specified (numeric)
123   // property contents in range [key_lower, key_upper].
124   //
125   // In general, different numeric index implementations require different data
126   // iterator implementations, so class Iterator is an abstraction of the data
127   // iterator and DocHitInfoIteratorNumeric can work with any implementation of
128   // it. See Iterator and DocHitInfoIteratorNumeric for more details.
129   //
130   // Returns:
131   //   - std::unique_ptr<DocHitInfoIterator> on success
132   //   - NOT_FOUND_ERROR if there is no numeric index for property_path
133   //   - INVALID_ARGUMENT_ERROR if key_lower > key_upper
134   //   - Any other errors, depending on the actual implementation
135   virtual libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
136   GetIterator(std::string_view property_path, T key_lower, T key_upper,
137               const DocumentStore& document_store,
138               const SchemaStore& schema_store,
139               int64_t current_time_ms) const = 0;
140 
141   // Reduces internal file sizes by reclaiming space and ids of deleted
142   // documents. Numeric index will convert all data (hits) to the new document
143   // ids and regenerate all index files. If all data in a property path are
144   // completely deleted, then the underlying storage must be discarded as well.
145   //
146   // - document_id_old_to_new: a map for converting old document id to new
147   //   document id.
148   // - new_last_added_document_id: will be used to update the last added
149   //                               document id in the numeric index.
150   //
151   // Returns:
152   //   - OK on success
153   //   - Any other errors, depending on the actual implementation
154   virtual libtextclassifier3::Status Optimize(
155       const std::vector<DocumentId>& document_id_old_to_new,
156       DocumentId new_last_added_document_id) = 0;
157 
158   // Clears all data in the integer index and set last_added_document_id to
159   // kInvalidDocumentId.
160   //
161   // Returns:
162   //   - OK on success
163   //   - Any other errors, depending on the actual implementation
164   virtual libtextclassifier3::Status Clear() = 0;
165 
166   // Returns the largest document_id added to the index. Note that DocumentIds
167   // are always inserted in increasing order.
168   virtual DocumentId last_added_document_id() const = 0;
169 
170   // Sets last_added_document_id to document_id so long as document_id >
171   // last_added_document_id() or last_added_document_id() is invalid.
172   virtual void set_last_added_document_id(DocumentId document_id) = 0;
173 
174   // The number of individual indices that the NumericIndex has created to
175   // search over all indexed properties thus far.
176   virtual int num_property_indices() const = 0;
177 
178  protected:
NumericIndex(const Filesystem & filesystem,std::string && working_path,PersistentStorage::WorkingPathType working_path_type)179   explicit NumericIndex(const Filesystem& filesystem,
180                         std::string&& working_path,
181                         PersistentStorage::WorkingPathType working_path_type)
182       : PersistentStorage(filesystem, std::move(working_path),
183                           working_path_type) {}
184 
185   virtual libtextclassifier3::Status PersistStoragesToDisk(
186       bool force) override = 0;
187 
188   virtual libtextclassifier3::Status PersistMetadataToDisk(
189       bool force) override = 0;
190 
191   virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
192       bool force) override = 0;
193 
194   virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
195       bool force) override = 0;
196 
197   virtual Crcs& crcs() override = 0;
198   virtual const Crcs& crcs() const override = 0;
199 };
200 
201 }  // namespace lib
202 }  // namespace icing
203 
204 #endif  // ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
205