1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
16 #define ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
17
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <type_traits>
24
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/absl_ports/str_join.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/legacy/index/icing-dynamic-trie.h"
32 #include "icing/legacy/index/icing-filesystem.h"
33 #include "icing/store/key-mapper.h"
34 #include "icing/util/crc32.h"
35 #include "icing/util/logging.h"
36 #include "icing/util/status-macros.h"
37
38 namespace icing {
39 namespace lib {
40
41 // File-backed mapping between the string key and a trivially copyable value
42 // type.
43 //
44 // DynamicTrieKeyMapper is thread-compatible
45 template <typename T, typename Formatter = absl_ports::DefaultFormatter>
46 class DynamicTrieKeyMapper : public KeyMapper<T, Formatter> {
47 public:
48 // Returns an initialized instance of DynamicTrieKeyMapper that can
49 // immediately handle read/write operations.
50 // Returns any encountered IO errors.
51 //
52 // base_dir : Base directory used to save all the files required to persist
53 // DynamicTrieKeyMapper. If this base_dir was previously used to
54 // create a DynamicTrieKeyMapper, then this existing data would be
55 // loaded. Otherwise, an empty DynamicTrieKeyMapper would be
56 // created.
57 // maximum_size_bytes : The maximum allowable size of the key mapper storage.
58 static libtextclassifier3::StatusOr<
59 std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
60 Create(const Filesystem& filesystem, std::string_view base_dir,
61 int maximum_size_bytes);
62
63 // Deletes all the files associated with the DynamicTrieKeyMapper.
64 //
65 // base_dir : Base directory used to save all the files required to persist
66 // DynamicTrieKeyMapper. Should be the same as passed into
67 // Create().
68 //
69 // Returns
70 // OK on success
71 // INTERNAL_ERROR on I/O error
72 static libtextclassifier3::Status Delete(const Filesystem& filesystem,
73 std::string_view base_dir);
74
75 ~DynamicTrieKeyMapper() override = default;
76
77 libtextclassifier3::Status Put(std::string_view key, T value) override;
78
79 libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
80 T next_value) override;
81
82 libtextclassifier3::StatusOr<T> Get(std::string_view key) const override;
83
84 bool Delete(std::string_view key) override;
85
86 std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator> GetIterator()
87 const override;
88
num_keys()89 int32_t num_keys() const override { return trie_.size(); }
90
91 libtextclassifier3::Status PersistToDisk() override;
92
93 libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override;
94
95 libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override;
96
97 libtextclassifier3::StatusOr<Crc32> ComputeChecksum() override;
98
99 private:
100 class Iterator : public KeyMapper<T, Formatter>::Iterator {
101 public:
Iterator(const IcingDynamicTrie & trie)102 explicit Iterator(const IcingDynamicTrie& trie)
103 : itr_(trie, /*prefix=*/""), start_(true) {}
104
105 ~Iterator() override = default;
106
Advance()107 bool Advance() override {
108 if (start_) {
109 start_ = false;
110 return itr_.IsValid();
111 }
112 return itr_.Advance();
113 }
114
GetKey()115 std::string_view GetKey() const override {
116 const char* key = itr_.GetKey();
117 return std::string_view(key);
118 }
119
GetValue()120 T GetValue() const override {
121 T value;
122 memcpy(&value, itr_.GetValue(), sizeof(T));
123 return value;
124 }
125
126 private:
127 IcingDynamicTrie::Iterator itr_;
128
129 // TODO(b/241784804): remove this flag after changing IcingDynamicTrie to
130 // follow the common iterator pattern in our codebase.
131 bool start_;
132 };
133
134 static constexpr char kDynamicTrieKeyMapperDir[] = "key_mapper_dir";
135 static constexpr char kDynamicTrieKeyMapperPrefix[] = "key_mapper";
136
137 // Use DynamicTrieKeyMapper::Create() to instantiate.
138 explicit DynamicTrieKeyMapper(std::string_view key_mapper_dir);
139
140 // Load any existing DynamicTrieKeyMapper data from disk, or creates a new
141 // instance of DynamicTrieKeyMapper on disk and gets ready to process
142 // read/write operations.
143 //
144 // Returns any encountered IO errors.
145 libtextclassifier3::Status Initialize(int maximum_size_bytes);
146
147 const std::string file_prefix_;
148
149 // TODO(adorokhine) Filesystem is a forked class that's available both in
150 // icing and icing namespaces. We will need icing::Filesystem in order
151 // to use IcingDynamicTrie. Filesystem class should be fully refactored
152 // to have a single definition across both namespaces. Such a class should
153 // use icing (and general google3) coding conventions and behave like
154 // a proper C++ class.
155 const IcingFilesystem icing_filesystem_;
156 IcingDynamicTrie trie_;
157
158 static_assert(std::is_trivially_copyable<T>::value,
159 "T must be trivially copyable");
160 };
161
162 template <typename T, typename Formatter>
163 libtextclassifier3::StatusOr<
164 std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
Create(const Filesystem & filesystem,std::string_view base_dir,int maximum_size_bytes)165 DynamicTrieKeyMapper<T, Formatter>::Create(const Filesystem& filesystem,
166 std::string_view base_dir,
167 int maximum_size_bytes) {
168 // We create a subdirectory since the trie creates and stores multiple files.
169 // This makes it easier to isolate the trie files away from other files that
170 // could potentially be in the same base_dir, and makes it easier to delete.
171 const std::string key_mapper_dir =
172 absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
173 if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
174 return absl_ports::InternalError(absl_ports::StrCat(
175 "Failed to create DynamicTrieKeyMapper directory: ", key_mapper_dir));
176 }
177 auto mapper = std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>(
178 new DynamicTrieKeyMapper<T, Formatter>(key_mapper_dir));
179 ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
180 return mapper;
181 }
182
183 template <typename T, typename Formatter>
Delete(const Filesystem & filesystem,std::string_view base_dir)184 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Delete(
185 const Filesystem& filesystem, std::string_view base_dir) {
186 std::string key_mapper_dir =
187 absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
188 if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
189 return absl_ports::InternalError(absl_ports::StrCat(
190 "Failed to delete DynamicTrieKeyMapper directory: ", key_mapper_dir));
191 }
192 return libtextclassifier3::Status::OK;
193 }
194
195 template <typename T, typename Formatter>
DynamicTrieKeyMapper(std::string_view key_mapper_dir)196 DynamicTrieKeyMapper<T, Formatter>::DynamicTrieKeyMapper(
197 std::string_view key_mapper_dir)
198 : file_prefix_(
199 absl_ports::StrCat(key_mapper_dir, "/", kDynamicTrieKeyMapperPrefix)),
200 trie_(file_prefix_,
201 IcingDynamicTrie::RuntimeOptions().set_storage_policy(
202 IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
203 &icing_filesystem_) {}
204
205 template <typename T, typename Formatter>
Initialize(int maximum_size_bytes)206 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Initialize(
207 int maximum_size_bytes) {
208 IcingDynamicTrie::Options options;
209 // Divide the max space between the three internal arrays: nodes, nexts and
210 // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
211 // MaxSuffixesSize is in units of bytes.
212 options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
213 options.max_nexts = options.max_nodes;
214 options.max_suffixes_size =
215 sizeof(IcingDynamicTrie::Node) * options.max_nodes;
216 options.value_size = sizeof(T);
217
218 if (!trie_.CreateIfNotExist(options)) {
219 return absl_ports::InternalError(absl_ports::StrCat(
220 "Failed to create DynamicTrieKeyMapper file: ", file_prefix_));
221 }
222 if (!trie_.Init()) {
223 return absl_ports::InternalError(absl_ports::StrCat(
224 "Failed to init DynamicTrieKeyMapper file: ", file_prefix_));
225 }
226 return libtextclassifier3::Status::OK;
227 }
228
229 template <typename T, typename Formatter>
GetOrPut(std::string_view key,T next_value)230 libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::GetOrPut(
231 std::string_view key, T next_value) {
232 std::string string_key(key);
233 uint32_t value_index;
234 libtextclassifier3::Status status =
235 trie_.Insert(string_key.c_str(), &next_value, &value_index,
236 /*replace=*/false);
237 if (!status.ok()) {
238 ICING_LOG(DBG) << "Unable to insert key " << string_key
239 << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
240 << status.error_message();
241 return status;
242 }
243 // This memory address could be unaligned since we're just grabbing the value
244 // from somewhere in the trie's suffix array. The suffix array is filled with
245 // chars, so the address might not be aligned to T values.
246 const T* unaligned_value =
247 static_cast<const T*>(trie_.GetValueAtIndex(value_index));
248
249 // memcpy the value to ensure that the returned value here is in a T-aligned
250 // address
251 T aligned_value;
252 memcpy(&aligned_value, unaligned_value, sizeof(T));
253 return aligned_value;
254 }
255
256 template <typename T, typename Formatter>
Put(std::string_view key,T value)257 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Put(
258 std::string_view key, T value) {
259 std::string string_key(key);
260 libtextclassifier3::Status status = trie_.Insert(string_key.c_str(), &value);
261 if (!status.ok()) {
262 ICING_LOG(DBG) << "Unable to insert key " << string_key
263 << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
264 << status.error_message();
265 return status;
266 }
267 return libtextclassifier3::Status::OK;
268 }
269
270 template <typename T, typename Formatter>
Get(std::string_view key)271 libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::Get(
272 std::string_view key) const {
273 std::string string_key(key);
274 T value;
275 if (!trie_.Find(string_key.c_str(), &value)) {
276 return absl_ports::NotFoundError(
277 absl_ports::StrCat("Key not found ", Formatter()(string_key),
278 " in DynamicTrieKeyMapper ", file_prefix_, "."));
279 }
280 return value;
281 }
282
283 template <typename T, typename Formatter>
Delete(std::string_view key)284 bool DynamicTrieKeyMapper<T, Formatter>::Delete(std::string_view key) {
285 return trie_.Delete(key);
286 }
287
288 template <typename T, typename Formatter>
289 std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator>
GetIterator()290 DynamicTrieKeyMapper<T, Formatter>::GetIterator() const {
291 return std::make_unique<DynamicTrieKeyMapper<T, Formatter>::Iterator>(trie_);
292 }
293
294 template <typename T, typename Formatter>
PersistToDisk()295 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::PersistToDisk() {
296 if (!trie_.Sync()) {
297 return absl_ports::InternalError(absl_ports::StrCat(
298 "Failed to sync DynamicTrieKeyMapper file: ", file_prefix_));
299 }
300
301 return libtextclassifier3::Status::OK;
302 }
303
304 template <typename T, typename Formatter>
305 libtextclassifier3::StatusOr<int64_t>
GetDiskUsage()306 DynamicTrieKeyMapper<T, Formatter>::GetDiskUsage() const {
307 int64_t size = trie_.GetDiskUsage();
308 if (size == IcingFilesystem::kBadFileSize || size < 0) {
309 return absl_ports::InternalError("Failed to get disk usage of key mapper");
310 }
311 return size;
312 }
313
314 template <typename T, typename Formatter>
315 libtextclassifier3::StatusOr<int64_t>
GetElementsSize()316 DynamicTrieKeyMapper<T, Formatter>::GetElementsSize() const {
317 int64_t size = trie_.GetElementsSize();
318 if (size == IcingFilesystem::kBadFileSize || size < 0) {
319 return absl_ports::InternalError(
320 "Failed to get disk usage of elements in the key mapper");
321 }
322 return size;
323 }
324
325 template <typename T, typename Formatter>
326 libtextclassifier3::StatusOr<Crc32>
ComputeChecksum()327 DynamicTrieKeyMapper<T, Formatter>::ComputeChecksum() {
328 return Crc32(trie_.UpdateCrc());
329 }
330
331 } // namespace lib
332 } // namespace icing
333
334 #endif // ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
335