• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
16 #define ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
17 
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <type_traits>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/absl_ports/str_join.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/legacy/index/icing-dynamic-trie.h"
32 #include "icing/legacy/index/icing-filesystem.h"
33 #include "icing/store/key-mapper.h"
34 #include "icing/util/crc32.h"
35 #include "icing/util/logging.h"
36 #include "icing/util/status-macros.h"
37 
38 namespace icing {
39 namespace lib {
40 
41 // File-backed mapping between the string key and a trivially copyable value
42 // type.
43 //
44 // DynamicTrieKeyMapper is thread-compatible
45 template <typename T, typename Formatter = absl_ports::DefaultFormatter>
46 class DynamicTrieKeyMapper : public KeyMapper<T, Formatter> {
47  public:
48   // Returns an initialized instance of DynamicTrieKeyMapper that can
49   // immediately handle read/write operations.
50   // Returns any encountered IO errors.
51   //
52   // base_dir : Base directory used to save all the files required to persist
53   //            DynamicTrieKeyMapper. If this base_dir was previously used to
54   //            create a DynamicTrieKeyMapper, then this existing data would be
55   //            loaded. Otherwise, an empty DynamicTrieKeyMapper would be
56   //            created.
57   // maximum_size_bytes : The maximum allowable size of the key mapper storage.
58   static libtextclassifier3::StatusOr<
59       std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
60   Create(const Filesystem& filesystem, std::string_view base_dir,
61          int maximum_size_bytes);
62 
63   // Deletes all the files associated with the DynamicTrieKeyMapper.
64   //
65   // base_dir : Base directory used to save all the files required to persist
66   //            DynamicTrieKeyMapper. Should be the same as passed into
67   //            Create().
68   //
69   // Returns
70   //   OK on success
71   //   INTERNAL_ERROR on I/O error
72   static libtextclassifier3::Status Delete(const Filesystem& filesystem,
73                                            std::string_view base_dir);
74 
75   ~DynamicTrieKeyMapper() override = default;
76 
77   libtextclassifier3::Status Put(std::string_view key, T value) override;
78 
79   libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
80                                            T next_value) override;
81 
82   libtextclassifier3::StatusOr<T> Get(std::string_view key) const override;
83 
84   bool Delete(std::string_view key) override;
85 
86   std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator> GetIterator()
87       const override;
88 
num_keys()89   int32_t num_keys() const override { return trie_.size(); }
90 
91   libtextclassifier3::Status PersistToDisk() override;
92 
93   libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override;
94 
95   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override;
96 
97   libtextclassifier3::StatusOr<Crc32> ComputeChecksum() override;
98 
99  private:
100   class Iterator : public KeyMapper<T, Formatter>::Iterator {
101    public:
Iterator(const IcingDynamicTrie & trie)102     explicit Iterator(const IcingDynamicTrie& trie)
103         : itr_(trie, /*prefix=*/""), start_(true) {}
104 
105     ~Iterator() override = default;
106 
Advance()107     bool Advance() override {
108       if (start_) {
109         start_ = false;
110         return itr_.IsValid();
111       }
112       return itr_.Advance();
113     }
114 
GetKey()115     std::string_view GetKey() const override {
116       const char* key = itr_.GetKey();
117       return std::string_view(key);
118     }
119 
GetValue()120     T GetValue() const override {
121       T value;
122       memcpy(&value, itr_.GetValue(), sizeof(T));
123       return value;
124     }
125 
126    private:
127     IcingDynamicTrie::Iterator itr_;
128 
129     // TODO(b/241784804): remove this flag after changing IcingDynamicTrie to
130     //                    follow the common iterator pattern in our codebase.
131     bool start_;
132   };
133 
134   static constexpr char kDynamicTrieKeyMapperDir[] = "key_mapper_dir";
135   static constexpr char kDynamicTrieKeyMapperPrefix[] = "key_mapper";
136 
137   // Use DynamicTrieKeyMapper::Create() to instantiate.
138   explicit DynamicTrieKeyMapper(std::string_view key_mapper_dir);
139 
140   // Load any existing DynamicTrieKeyMapper data from disk, or creates a new
141   // instance of DynamicTrieKeyMapper on disk and gets ready to process
142   // read/write operations.
143   //
144   // Returns any encountered IO errors.
145   libtextclassifier3::Status Initialize(int maximum_size_bytes);
146 
147   const std::string file_prefix_;
148 
149   // TODO(adorokhine) Filesystem is a forked class that's available both in
150   // icing and icing namespaces. We will need icing::Filesystem in order
151   // to use IcingDynamicTrie. Filesystem class should be fully refactored
152   // to have a single definition across both namespaces. Such a class should
153   // use icing (and general google3) coding conventions and behave like
154   // a proper C++ class.
155   const IcingFilesystem icing_filesystem_;
156   IcingDynamicTrie trie_;
157 
158   static_assert(std::is_trivially_copyable<T>::value,
159                 "T must be trivially copyable");
160 };
161 
162 template <typename T, typename Formatter>
163 libtextclassifier3::StatusOr<
164     std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
Create(const Filesystem & filesystem,std::string_view base_dir,int maximum_size_bytes)165 DynamicTrieKeyMapper<T, Formatter>::Create(const Filesystem& filesystem,
166                                            std::string_view base_dir,
167                                            int maximum_size_bytes) {
168   // We create a subdirectory since the trie creates and stores multiple files.
169   // This makes it easier to isolate the trie files away from other files that
170   // could potentially be in the same base_dir, and makes it easier to delete.
171   const std::string key_mapper_dir =
172       absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
173   if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
174     return absl_ports::InternalError(absl_ports::StrCat(
175         "Failed to create DynamicTrieKeyMapper directory: ", key_mapper_dir));
176   }
177   auto mapper = std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>(
178       new DynamicTrieKeyMapper<T, Formatter>(key_mapper_dir));
179   ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
180   return mapper;
181 }
182 
183 template <typename T, typename Formatter>
Delete(const Filesystem & filesystem,std::string_view base_dir)184 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Delete(
185     const Filesystem& filesystem, std::string_view base_dir) {
186   std::string key_mapper_dir =
187       absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
188   if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
189     return absl_ports::InternalError(absl_ports::StrCat(
190         "Failed to delete DynamicTrieKeyMapper directory: ", key_mapper_dir));
191   }
192   return libtextclassifier3::Status::OK;
193 }
194 
195 template <typename T, typename Formatter>
DynamicTrieKeyMapper(std::string_view key_mapper_dir)196 DynamicTrieKeyMapper<T, Formatter>::DynamicTrieKeyMapper(
197     std::string_view key_mapper_dir)
198     : file_prefix_(
199           absl_ports::StrCat(key_mapper_dir, "/", kDynamicTrieKeyMapperPrefix)),
200       trie_(file_prefix_,
201             IcingDynamicTrie::RuntimeOptions().set_storage_policy(
202                 IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
203             &icing_filesystem_) {}
204 
205 template <typename T, typename Formatter>
Initialize(int maximum_size_bytes)206 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Initialize(
207     int maximum_size_bytes) {
208   IcingDynamicTrie::Options options;
209   // Divide the max space between the three internal arrays: nodes, nexts and
210   // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
211   // MaxSuffixesSize is in units of bytes.
212   options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
213   options.max_nexts = options.max_nodes;
214   options.max_suffixes_size =
215       sizeof(IcingDynamicTrie::Node) * options.max_nodes;
216   options.value_size = sizeof(T);
217 
218   if (!trie_.CreateIfNotExist(options)) {
219     return absl_ports::InternalError(absl_ports::StrCat(
220         "Failed to create DynamicTrieKeyMapper file: ", file_prefix_));
221   }
222   if (!trie_.Init()) {
223     return absl_ports::InternalError(absl_ports::StrCat(
224         "Failed to init DynamicTrieKeyMapper file: ", file_prefix_));
225   }
226   return libtextclassifier3::Status::OK;
227 }
228 
229 template <typename T, typename Formatter>
GetOrPut(std::string_view key,T next_value)230 libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::GetOrPut(
231     std::string_view key, T next_value) {
232   std::string string_key(key);
233   uint32_t value_index;
234   libtextclassifier3::Status status =
235       trie_.Insert(string_key.c_str(), &next_value, &value_index,
236                    /*replace=*/false);
237   if (!status.ok()) {
238     ICING_LOG(DBG) << "Unable to insert key " << string_key
239                    << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
240                    << status.error_message();
241     return status;
242   }
243   // This memory address could be unaligned since we're just grabbing the value
244   // from somewhere in the trie's suffix array. The suffix array is filled with
245   // chars, so the address might not be aligned to T values.
246   const T* unaligned_value =
247       static_cast<const T*>(trie_.GetValueAtIndex(value_index));
248 
249   // memcpy the value to ensure that the returned value here is in a T-aligned
250   // address
251   T aligned_value;
252   memcpy(&aligned_value, unaligned_value, sizeof(T));
253   return aligned_value;
254 }
255 
256 template <typename T, typename Formatter>
Put(std::string_view key,T value)257 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Put(
258     std::string_view key, T value) {
259   std::string string_key(key);
260   libtextclassifier3::Status status = trie_.Insert(string_key.c_str(), &value);
261   if (!status.ok()) {
262     ICING_LOG(DBG) << "Unable to insert key " << string_key
263                    << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
264                    << status.error_message();
265     return status;
266   }
267   return libtextclassifier3::Status::OK;
268 }
269 
270 template <typename T, typename Formatter>
Get(std::string_view key)271 libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::Get(
272     std::string_view key) const {
273   std::string string_key(key);
274   T value;
275   if (!trie_.Find(string_key.c_str(), &value)) {
276     return absl_ports::NotFoundError(
277         absl_ports::StrCat("Key not found ", Formatter()(string_key),
278                            " in DynamicTrieKeyMapper ", file_prefix_, "."));
279   }
280   return value;
281 }
282 
283 template <typename T, typename Formatter>
Delete(std::string_view key)284 bool DynamicTrieKeyMapper<T, Formatter>::Delete(std::string_view key) {
285   return trie_.Delete(key);
286 }
287 
288 template <typename T, typename Formatter>
289 std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator>
GetIterator()290 DynamicTrieKeyMapper<T, Formatter>::GetIterator() const {
291   return std::make_unique<DynamicTrieKeyMapper<T, Formatter>::Iterator>(trie_);
292 }
293 
294 template <typename T, typename Formatter>
PersistToDisk()295 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::PersistToDisk() {
296   if (!trie_.Sync()) {
297     return absl_ports::InternalError(absl_ports::StrCat(
298         "Failed to sync DynamicTrieKeyMapper file: ", file_prefix_));
299   }
300 
301   return libtextclassifier3::Status::OK;
302 }
303 
304 template <typename T, typename Formatter>
305 libtextclassifier3::StatusOr<int64_t>
GetDiskUsage()306 DynamicTrieKeyMapper<T, Formatter>::GetDiskUsage() const {
307   int64_t size = trie_.GetDiskUsage();
308   if (size == IcingFilesystem::kBadFileSize || size < 0) {
309     return absl_ports::InternalError("Failed to get disk usage of key mapper");
310   }
311   return size;
312 }
313 
314 template <typename T, typename Formatter>
315 libtextclassifier3::StatusOr<int64_t>
GetElementsSize()316 DynamicTrieKeyMapper<T, Formatter>::GetElementsSize() const {
317   int64_t size = trie_.GetElementsSize();
318   if (size == IcingFilesystem::kBadFileSize || size < 0) {
319     return absl_ports::InternalError(
320         "Failed to get disk usage of elements in the key mapper");
321   }
322   return size;
323 }
324 
325 template <typename T, typename Formatter>
326 libtextclassifier3::StatusOr<Crc32>
ComputeChecksum()327 DynamicTrieKeyMapper<T, Formatter>::ComputeChecksum() {
328   return Crc32(trie_.UpdateCrc());
329 }
330 
331 }  // namespace lib
332 }  // namespace icing
333 
334 #endif  // ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
335