• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
16 #define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/file/file-backed-vector.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/file/persistent-storage.h"
31 #include "icing/join/doc-join-info.h"
32 #include "icing/join/qualified-id-join-index.h"
33 #include "icing/schema/joinable-property.h"
34 #include "icing/store/document-filter-data.h"
35 #include "icing/store/document-id.h"
36 #include "icing/store/key-mapper.h"
37 #include "icing/store/namespace-fingerprint-identifier.h"
38 #include "icing/store/namespace-id.h"
39 #include "icing/util/crc32.h"
40 
41 namespace icing {
42 namespace lib {
43 
44 // QualifiedIdJoinIndexImplV1: a class to maintain data mapping DocJoinInfo to
45 // joinable qualified ids and delete propagation info.
46 class QualifiedIdJoinIndexImplV1 : public QualifiedIdJoinIndex {
47  public:
48   struct Info {
49     static constexpr int32_t kMagic = 0x48cabdc6;
50 
51     int32_t magic;
52     DocumentId last_added_document_id;
53 
ComputeChecksumInfo54     Crc32 ComputeChecksum() const {
55       return Crc32(
56           std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
57     }
58   } __attribute__((packed));
59   static_assert(sizeof(Info) == 8, "");
60 
61   // Metadata file layout: <Crcs><Info>
62   static constexpr int32_t kCrcsMetadataBufferOffset = 0;
63   static constexpr int32_t kInfoMetadataBufferOffset =
64       static_cast<int32_t>(sizeof(Crcs));
65   static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
66   static_assert(kMetadataFileSize == 20, "");
67 
68   // Creates a QualifiedIdJoinIndexImplV1 instance to store qualified ids for
69   // future joining search. If any of the underlying file is missing, then
70   // delete the whole working_path and (re)initialize with new ones. Otherwise
71   // initialize and create the instance by existing files.
72   //
73   // filesystem: Object to make system level calls
74   // working_path: Specifies the working path for PersistentStorage.
75   //               QualifiedIdJoinIndexImplV1 uses working path as working
76   //               directory and all related files will be stored under this
77   //               directory. It takes full ownership and of working_path_,
78   //               including creation/deletion. It is the caller's
79   //               responsibility to specify correct working path and avoid
80   //               mixing different persistent storages together under the same
81   //               path. Also the caller has the ownership for the parent
82   //               directory of working_path_, and it is responsible for parent
83   //               directory creation/deletion. See PersistentStorage for more
84   //               details about the concept of working_path.
85   // pre_mapping_fbv: flag indicating whether memory map max possible file size
86   //                  for underlying FileBackedVector before growing the actual
87   //                  file size.
88   // use_persistent_hash_map: flag indicating whether use persistent hash map as
89   //                          the key mapper (if false, then fall back to
90   //                          dynamic trie key mapper).
91   //
92   // Returns:
93   //   - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
94   //                               checksum
95   //   - INTERNAL_ERROR on I/O errors
96   //   - Any KeyMapper errors
97   static libtextclassifier3::StatusOr<
98       std::unique_ptr<QualifiedIdJoinIndexImplV1>>
99   Create(const Filesystem& filesystem, std::string working_path,
100          bool pre_mapping_fbv, bool use_persistent_hash_map);
101 
102   // Delete copy and move constructor/assignment operator.
103   QualifiedIdJoinIndexImplV1(const QualifiedIdJoinIndexImplV1&) = delete;
104   QualifiedIdJoinIndexImplV1& operator=(const QualifiedIdJoinIndexImplV1&) =
105       delete;
106 
107   QualifiedIdJoinIndexImplV1(QualifiedIdJoinIndexImplV1&&) = delete;
108   QualifiedIdJoinIndexImplV1& operator=(QualifiedIdJoinIndexImplV1&&) = delete;
109 
110   ~QualifiedIdJoinIndexImplV1() override;
111 
112   // v2 only API. Returns UNIMPLEMENTED_ERROR.
Put(SchemaTypeId schema_type_id,JoinablePropertyId joinable_property_id,DocumentId document_id,std::vector<NamespaceFingerprintIdentifier> && ref_namespace_fingerprint_ids)113   libtextclassifier3::Status Put(SchemaTypeId schema_type_id,
114                                  JoinablePropertyId joinable_property_id,
115                                  DocumentId document_id,
116                                  std::vector<NamespaceFingerprintIdentifier>&&
117                                      ref_namespace_fingerprint_ids) override {
118     return absl_ports::UnimplementedError("This API is not supported in V2");
119   }
120 
121   // v2 only API. Returns UNIMPLEMENTED_ERROR.
122   libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
GetIterator(SchemaTypeId schema_type_id,JoinablePropertyId joinable_property_id)123   GetIterator(SchemaTypeId schema_type_id,
124               JoinablePropertyId joinable_property_id) const override {
125     return absl_ports::UnimplementedError("This API is not supported in V2");
126   }
127 
128   // Puts a new data into index: DocJoinInfo (DocumentId, JoinablePropertyId)
129   // references to ref_qualified_id_str (the identifier of another document).
130   //
131   // REQUIRES: ref_qualified_id_str contains no '\0'.
132   //
133   // Returns:
134   //   - OK on success
135   //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
136   //   - Any KeyMapper errors
137   libtextclassifier3::Status Put(
138       const DocJoinInfo& doc_join_info,
139       std::string_view ref_qualified_id_str) override;
140 
141   // Gets the referenced document's qualified id string by DocJoinInfo.
142   //
143   // Returns:
144   //   - A qualified id string referenced by the given DocJoinInfo (DocumentId,
145   //     JoinablePropertyId) on success
146   //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
147   //   - NOT_FOUND_ERROR if doc_join_info doesn't exist
148   //   - Any KeyMapper errors
149   libtextclassifier3::StatusOr<std::string_view> Get(
150       const DocJoinInfo& doc_join_info) const override;
151 
152   // Reduces internal file sizes by reclaiming space and ids of deleted
153   // documents. Qualified id type joinable index will convert all entries to the
154   // new document ids.
155   //
156   // - document_id_old_to_new: a map for converting old document id to new
157   //   document id.
158   // - namespace_id_old_to_new: a map for converting old namespace id to new
159   //   namespace id. It is unused in this implementation since we store raw
160   //   qualified id string (which contains raw namespace string).
161   // - new_last_added_document_id: will be used to update the last added
162   //                               document id in the qualified id type joinable
163   //                               index.
164   //
165   // Returns:
166   //   - OK on success
167   //   - INTERNAL_ERROR on I/O error. This could potentially leave the index in
168   //     an invalid state and the caller should handle it properly (e.g. discard
169   //     and rebuild)
170   libtextclassifier3::Status Optimize(
171       const std::vector<DocumentId>& document_id_old_to_new,
172       const std::vector<NamespaceId>& namespace_id_old_to_new,
173       DocumentId new_last_added_document_id) override;
174 
175   // Clears all data and set last_added_document_id to kInvalidDocumentId.
176   //
177   // Returns:
178   //   - OK on success
179   //   - INTERNAL_ERROR on I/O error
180   libtextclassifier3::Status Clear() override;
181 
is_v2()182   bool is_v2() const override { return false; }
183 
size()184   int32_t size() const override { return doc_join_info_mapper_->num_keys(); }
185 
empty()186   bool empty() const override { return size() == 0; }
187 
last_added_document_id()188   DocumentId last_added_document_id() const override {
189     return info().last_added_document_id;
190   }
191 
set_last_added_document_id(DocumentId document_id)192   void set_last_added_document_id(DocumentId document_id) override {
193     SetInfoDirty();
194 
195     Info& info_ref = info();
196     if (info_ref.last_added_document_id == kInvalidDocumentId ||
197         document_id > info_ref.last_added_document_id) {
198       info_ref.last_added_document_id = document_id;
199     }
200   }
201 
202  private:
QualifiedIdJoinIndexImplV1(const Filesystem & filesystem,std::string && working_path,std::unique_ptr<uint8_t[]> metadata_buffer,std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,std::unique_ptr<FileBackedVector<char>> qualified_id_storage,bool pre_mapping_fbv,bool use_persistent_hash_map)203   explicit QualifiedIdJoinIndexImplV1(
204       const Filesystem& filesystem, std::string&& working_path,
205       std::unique_ptr<uint8_t[]> metadata_buffer,
206       std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper,
207       std::unique_ptr<FileBackedVector<char>> qualified_id_storage,
208       bool pre_mapping_fbv, bool use_persistent_hash_map)
209       : QualifiedIdJoinIndex(filesystem, std::move(working_path)),
210         metadata_buffer_(std::move(metadata_buffer)),
211         doc_join_info_mapper_(std::move(doc_join_info_mapper)),
212         qualified_id_storage_(std::move(qualified_id_storage)),
213         pre_mapping_fbv_(pre_mapping_fbv),
214         use_persistent_hash_map_(use_persistent_hash_map),
215         is_info_dirty_(false),
216         is_storage_dirty_(false) {}
217 
218   static libtextclassifier3::StatusOr<
219       std::unique_ptr<QualifiedIdJoinIndexImplV1>>
220   InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
221                      bool pre_mapping_fbv, bool use_persistent_hash_map);
222 
223   static libtextclassifier3::StatusOr<
224       std::unique_ptr<QualifiedIdJoinIndexImplV1>>
225   InitializeExistingFiles(const Filesystem& filesystem,
226                           std::string&& working_path, bool pre_mapping_fbv,
227                           bool use_persistent_hash_map);
228 
229   // Transfers qualified id join index data from the current to new_index and
230   // convert to new document id according to document_id_old_to_new. It is a
231   // helper function for Optimize.
232   //
233   // Returns:
234   //   - OK on success
235   //   - INTERNAL_ERROR on I/O error
236   libtextclassifier3::Status TransferIndex(
237       const std::vector<DocumentId>& document_id_old_to_new,
238       QualifiedIdJoinIndexImplV1* new_index) const;
239 
240   // Flushes contents of metadata file.
241   //
242   // Returns:
243   //   - OK on success
244   //   - INTERNAL_ERROR on I/O error
245   libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
246 
247   // Flushes contents of all storages to underlying files.
248   //
249   // Returns:
250   //   - OK on success
251   //   - INTERNAL_ERROR on I/O error
252   libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
253 
254   // Computes and returns Info checksum.
255   //
256   // Returns:
257   //   - Crc of the Info on success
258   libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
259 
260   // Computes and returns all storages checksum.
261   //
262   // Returns:
263   //   - Crc of all storages on success
264   //   - INTERNAL_ERROR if any data inconsistency
265   libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
266       bool force) override;
267 
crcs()268   Crcs& crcs() override {
269     return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
270                                     kCrcsMetadataBufferOffset);
271   }
272 
crcs()273   const Crcs& crcs() const override {
274     return *reinterpret_cast<const Crcs*>(metadata_buffer_.get() +
275                                           kCrcsMetadataBufferOffset);
276   }
277 
info()278   Info& info() {
279     return *reinterpret_cast<Info*>(metadata_buffer_.get() +
280                                     kInfoMetadataBufferOffset);
281   }
282 
info()283   const Info& info() const {
284     return *reinterpret_cast<const Info*>(metadata_buffer_.get() +
285                                           kInfoMetadataBufferOffset);
286   }
287 
SetInfoDirty()288   void SetInfoDirty() { is_info_dirty_ = true; }
289   // When storage is dirty, we have to set info dirty as well. So just expose
290   // SetDirty to set both.
SetDirty()291   void SetDirty() {
292     is_info_dirty_ = true;
293     is_storage_dirty_ = true;
294   }
295 
is_info_dirty()296   bool is_info_dirty() const { return is_info_dirty_; }
is_storage_dirty()297   bool is_storage_dirty() const { return is_storage_dirty_; }
298 
299   // Metadata buffer
300   std::unique_ptr<uint8_t[]> metadata_buffer_;
301 
302   // Persistent KeyMapper for mapping (encoded) DocJoinInfo (DocumentId,
303   // JoinablePropertyId) to another referenced document's qualified id string
304   // index in qualified_id_storage_.
305   std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper_;
306 
307   // Storage for qualified id strings.
308   std::unique_ptr<FileBackedVector<char>> qualified_id_storage_;
309 
310   // TODO(b/268521214): add delete propagation storage
311 
312   // Flag indicating whether memory map max possible file size for underlying
313   // FileBackedVector before growing the actual file size.
314   bool pre_mapping_fbv_;
315 
316   // Flag indicating whether use persistent hash map as the key mapper (if
317   // false, then fall back to dynamic trie key mapper).
318   bool use_persistent_hash_map_;
319 
320   bool is_info_dirty_;
321   bool is_storage_dirty_;
322 };
323 
324 }  // namespace lib
325 }  // namespace icing
326 
327 #endif  // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_IMPL_V1_H_
328