1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/file-backed-vector.h"
16
17 #include <unistd.h>
18
19 #include <algorithm>
20 #include <cerrno>
21 #include <cstdint>
22 #include <limits>
23 #include <memory>
24 #include <string>
25 #include <string_view>
26 #include <vector>
27
28 #include "icing/text_classifier/lib3/utils/base/status.h"
29 #include "gmock/gmock.h"
30 #include "gtest/gtest.h"
31 #include "icing/file/filesystem.h"
32 #include "icing/file/memory-mapped-file.h"
33 #include "icing/file/mock-filesystem.h"
34 #include "icing/testing/common-matchers.h"
35 #include "icing/testing/tmp-directory.h"
36 #include "icing/util/crc32.h"
37 #include "icing/util/logging.h"
38
39 using ::testing::ElementsAre;
40 using ::testing::Eq;
41 using ::testing::IsTrue;
42 using ::testing::Lt;
43 using ::testing::Not;
44 using ::testing::Pointee;
45 using ::testing::SizeIs;
46
47 namespace icing {
48 namespace lib {
49
50 namespace {
51
52 class FileBackedVectorTest : public testing::Test {
53 protected:
SetUp()54 void SetUp() override {
55 file_path_ = GetTestTempDir() + "/test.array";
56 fd_ = filesystem_.OpenForWrite(file_path_.c_str());
57 ASSERT_NE(-1, fd_);
58 ASSERT_TRUE(filesystem_.Truncate(fd_, 0));
59 }
60
TearDown()61 void TearDown() override {
62 close(fd_);
63 filesystem_.DeleteFile(file_path_.c_str());
64 }
65
66 // Helper method to loop over some data and insert into the vector at some idx
67 template <typename T>
Insert(FileBackedVector<T> * vector,int32_t idx,const std::vector<T> & data)68 void Insert(FileBackedVector<T>* vector, int32_t idx,
69 const std::vector<T>& data) {
70 for (int i = 0; i < data.size(); ++i) {
71 ICING_ASSERT_OK(vector->Set(idx + i, data.at(i)));
72 }
73 }
74
Insert(FileBackedVector<char> * vector,int32_t idx,std::string data)75 void Insert(FileBackedVector<char>* vector, int32_t idx, std::string data) {
76 Insert(vector, idx, std::vector<char>(data.begin(), data.end()));
77 }
78
79 // Helper method to retrieve data from the beginning of the vector
80 template <typename T>
Get(FileBackedVector<T> * vector,int32_t idx,int32_t expected_len)81 std::vector<T> Get(FileBackedVector<T>* vector, int32_t idx,
82 int32_t expected_len) {
83 return std::vector<T>(vector->array() + idx,
84 vector->array() + idx + expected_len);
85 }
86
Get(FileBackedVector<char> * vector,int32_t expected_len)87 std::string_view Get(FileBackedVector<char>* vector, int32_t expected_len) {
88 return Get(vector, 0, expected_len);
89 }
90
Get(FileBackedVector<char> * vector,int32_t idx,int32_t expected_len)91 std::string_view Get(FileBackedVector<char>* vector, int32_t idx,
92 int32_t expected_len) {
93 return std::string_view(vector->array() + idx, expected_len);
94 }
95
filesystem() const96 const Filesystem& filesystem() const { return filesystem_; }
97
98 Filesystem filesystem_;
99 std::string file_path_;
100 int fd_;
101 };
102
TEST_F(FileBackedVectorTest,Create)103 TEST_F(FileBackedVectorTest, Create) {
104 {
105 // Create a vector for a new file
106 ICING_ASSERT_OK_AND_ASSIGN(
107 auto vector, FileBackedVector<char>::Create(
108 filesystem_, file_path_,
109 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
110 }
111
112 {
113 // We can create it again based on the same file.
114 ICING_ASSERT_OK_AND_ASSIGN(
115 auto vector, FileBackedVector<char>::Create(
116 filesystem_, file_path_,
117 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
118 }
119 }
120
TEST_F(FileBackedVectorTest,CreateWithInvalidStrategy)121 TEST_F(FileBackedVectorTest, CreateWithInvalidStrategy) {
122 // Create a vector with unimplemented strategy
123 EXPECT_THAT(FileBackedVector<char>::Create(
124 filesystem_, file_path_,
125 MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC),
126 StatusIs(libtextclassifier3::StatusCode::UNIMPLEMENTED));
127 }
128
TEST_F(FileBackedVectorTest,CreateWithCustomMaxFileSize)129 TEST_F(FileBackedVectorTest, CreateWithCustomMaxFileSize) {
130 int32_t header_size = FileBackedVector<char>::Header::kHeaderSize;
131
132 // Create a vector with invalid max_file_size
133 EXPECT_THAT(FileBackedVector<char>::Create(
134 filesystem_, file_path_,
135 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
136 /*max_file_size=*/-1),
137 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
138 EXPECT_THAT(FileBackedVector<char>::Create(
139 filesystem_, file_path_,
140 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
141 /*max_file_size=*/header_size - 1),
142 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
143 EXPECT_THAT(FileBackedVector<char>::Create(
144 filesystem_, file_path_,
145 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
146 /*max_file_size=*/header_size + sizeof(char) - 1),
147 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
148
149 {
150 // Create a vector with max_file_size that allows only 1 element.
151 ICING_ASSERT_OK_AND_ASSIGN(
152 auto vector, FileBackedVector<char>::Create(
153 filesystem_, file_path_,
154 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
155 /*max_file_size=*/header_size + sizeof(char) * 1));
156 ICING_ASSERT_OK(vector->Set(0, 'a'));
157 }
158
159 {
160 // We can create it again with larger max_file_size, as long as it is not
161 // greater than kMaxFileSize.
162 ICING_ASSERT_OK_AND_ASSIGN(
163 auto vector, FileBackedVector<char>::Create(
164 filesystem_, file_path_,
165 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
166 /*max_file_size=*/header_size + sizeof(char) * 2));
167 EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(Eq('a'))));
168 ICING_ASSERT_OK(vector->Set(1, 'b'));
169 }
170
171 // We cannot create it again with max_file_size < current_file_size, even if
172 // it is a valid value.
173 int64_t current_file_size = filesystem_.GetFileSize(file_path_.c_str());
174 ASSERT_THAT(current_file_size, Eq(header_size + sizeof(char) * 2));
175 ASSERT_THAT(current_file_size - 1, Not(Lt(header_size + sizeof(char))));
176 EXPECT_THAT(FileBackedVector<char>::Create(
177 filesystem_, file_path_,
178 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
179 /*max_file_size=*/current_file_size - 1),
180 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
181
182 {
183 // We can create it again with max_file_size == current_file_size.
184 ICING_ASSERT_OK_AND_ASSIGN(
185 auto vector, FileBackedVector<char>::Create(
186 filesystem_, file_path_,
187 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
188 /*max_file_size=*/current_file_size));
189 EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(Eq('a'))));
190 EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(Eq('b'))));
191 }
192 }
193
TEST_F(FileBackedVectorTest,SimpleShared)194 TEST_F(FileBackedVectorTest, SimpleShared) {
195 // Create a vector and add some data.
196 ICING_ASSERT_OK_AND_ASSIGN(
197 std::unique_ptr<FileBackedVector<char>> vector,
198 FileBackedVector<char>::Create(
199 filesystem_, file_path_,
200 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
201 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
202
203 std::string expected = "abcde";
204 Insert(vector.get(), 0, expected);
205 EXPECT_EQ(expected.length(), vector->num_elements());
206 EXPECT_EQ(expected, Get(vector.get(), expected.length()));
207
208 uint32_t good_crc_value = 1134899064U;
209 const Crc32 good_crc(good_crc_value);
210 // Explicit call to update the crc does update the value
211 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc));
212
213 // PersistToDisk does nothing bad.
214 ICING_EXPECT_OK(vector->PersistToDisk());
215
216 // Close out the old vector to ensure everything persists properly before we
217 // reassign it
218 vector.reset();
219
220 // Write a bad crc, this would be a mismatch compared to the computed crc of
221 // the contents on reinitialization.
222 uint32_t bad_crc_value = 123;
223 filesystem_.PWrite(file_path_.data(),
224 offsetof(FileBackedVector<char>::Header, vector_checksum),
225 &bad_crc_value, sizeof(bad_crc_value));
226
227 ASSERT_THAT(FileBackedVector<char>::Create(
228 filesystem_, file_path_,
229 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
230 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
231
232 // Get it back into an ok state
233 filesystem_.PWrite(file_path_.data(),
234 offsetof(FileBackedVector<char>::Header, vector_checksum),
235 &good_crc_value, sizeof(good_crc_value));
236 ICING_ASSERT_OK_AND_ASSIGN(
237 vector, FileBackedVector<char>::Create(
238 filesystem_, file_path_,
239 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
240
241 EXPECT_EQ(expected, Get(vector.get(), expected.length()));
242
243 // Close out the old vector to ensure everything persists properly before we
244 // reassign it
245 vector.reset();
246
247 // Can reinitialize it safely
248 ICING_ASSERT_OK_AND_ASSIGN(
249 vector, FileBackedVector<char>::Create(
250 filesystem_, file_path_,
251 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
252
253 // Truncate the content
254 ICING_EXPECT_OK(vector->TruncateTo(0));
255
256 // Crc is cleared after truncation and reset to 0.
257 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
258 EXPECT_EQ(0u, vector->num_elements());
259 }
260
TEST_F(FileBackedVectorTest,Get)261 TEST_F(FileBackedVectorTest, Get) {
262 // Create a vector and add some data.
263 ICING_ASSERT_OK_AND_ASSIGN(
264 std::unique_ptr<FileBackedVector<char>> vector,
265 FileBackedVector<char>::Create(
266 filesystem_, file_path_,
267 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
268
269 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
270
271 std::string expected = "abc";
272 Insert(vector.get(), 0, expected);
273 EXPECT_EQ(expected.length(), vector->num_elements());
274
275 EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(Eq('a'))));
276 EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(Eq('b'))));
277 EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(Eq('c'))));
278
279 // Out of bounds error
280 EXPECT_THAT(vector->Get(3),
281 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
282 EXPECT_THAT(vector->Get(-1),
283 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
284 }
285
TEST_F(FileBackedVectorTest,SetWithoutGrowing)286 TEST_F(FileBackedVectorTest, SetWithoutGrowing) {
287 // Create a vector and add some data.
288 ICING_ASSERT_OK_AND_ASSIGN(
289 std::unique_ptr<FileBackedVector<char>> vector,
290 FileBackedVector<char>::Create(
291 filesystem_, file_path_,
292 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
293
294 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
295
296 std::string original = "abcde";
297 Insert(vector.get(), /*idx=*/0, original);
298 ASSERT_THAT(vector->num_elements(), Eq(original.length()));
299 ASSERT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq(original));
300
301 ICING_EXPECT_OK(vector->Set(/*idx=*/1, /*len=*/3, 'z'));
302 EXPECT_THAT(vector->num_elements(), Eq(5));
303 EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq("azzze"));
304 }
305
TEST_F(FileBackedVectorTest,SetWithGrowing)306 TEST_F(FileBackedVectorTest, SetWithGrowing) {
307 // Create a vector and add some data.
308 ICING_ASSERT_OK_AND_ASSIGN(
309 std::unique_ptr<FileBackedVector<char>> vector,
310 FileBackedVector<char>::Create(
311 filesystem_, file_path_,
312 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
313
314 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
315
316 std::string original = "abcde";
317 Insert(vector.get(), /*idx=*/0, original);
318 ASSERT_THAT(vector->num_elements(), Eq(original.length()));
319 ASSERT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq(original));
320
321 ICING_EXPECT_OK(vector->Set(/*idx=*/3, /*len=*/4, 'z'));
322 EXPECT_THAT(vector->num_elements(), Eq(7));
323 EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/7), Eq("abczzzz"));
324 }
325
TEST_F(FileBackedVectorTest,SetInvalidArguments)326 TEST_F(FileBackedVectorTest, SetInvalidArguments) {
327 // Create a vector and add some data.
328 ICING_ASSERT_OK_AND_ASSIGN(
329 std::unique_ptr<FileBackedVector<char>> vector,
330 FileBackedVector<char>::Create(
331 filesystem_, file_path_,
332 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
333
334 EXPECT_THAT(vector->Set(/*idx=*/0, /*len=*/-1, 'z'),
335 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
336 EXPECT_THAT(vector->Set(/*idx=*/0, /*len=*/0, 'z'),
337 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
338 EXPECT_THAT(vector->Set(/*idx=*/-1, /*len=*/2, 'z'),
339 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
340 EXPECT_THAT(vector->Set(/*idx=*/100,
341 /*len=*/std::numeric_limits<int32_t>::max(), 'z'),
342 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
343 }
344
TEST_F(FileBackedVectorTest,MutableView)345 TEST_F(FileBackedVectorTest, MutableView) {
346 // Create a vector and add some data.
347 ICING_ASSERT_OK_AND_ASSIGN(
348 std::unique_ptr<FileBackedVector<char>> vector,
349 FileBackedVector<char>::Create(
350 filesystem_, file_path_,
351 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
352 Insert(vector.get(), /*idx=*/0, std::string(1000, 'a'));
353 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2620640643U)));
354
355 ICING_ASSERT_OK_AND_ASSIGN(FileBackedVector<char>::MutableView mutable_elt,
356 vector->GetMutable(3));
357
358 mutable_elt.Get() = 'b';
359 EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(Eq('b'))));
360
361 mutable_elt.Get() = 'c';
362 EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(Eq('c'))));
363 }
364
TEST_F(FileBackedVectorTest,MutableViewShouldSetDirty)365 TEST_F(FileBackedVectorTest, MutableViewShouldSetDirty) {
366 // Create a vector and add some data.
367 ICING_ASSERT_OK_AND_ASSIGN(
368 std::unique_ptr<FileBackedVector<char>> vector,
369 FileBackedVector<char>::Create(
370 filesystem_, file_path_,
371 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
372 Insert(vector.get(), /*idx=*/0, std::string(1000, 'a'));
373 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2620640643U)));
374
375 std::string_view reconstructed_view =
376 std::string_view(vector->array(), vector->num_elements());
377
378 ICING_ASSERT_OK_AND_ASSIGN(FileBackedVector<char>::MutableView mutable_elt,
379 vector->GetMutable(3));
380
381 // Mutate the element via MutateView
382 // If non-const Get() is called, MutateView should set the element index dirty
383 // so that ComputeChecksum() can pick up the change and compute the checksum
384 // correctly. Validate by mapping another array on top.
385 mutable_elt.Get() = 'b';
386 ASSERT_THAT(vector->Get(3), IsOkAndHolds(Pointee(Eq('b'))));
387 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc1, vector->ComputeChecksum());
388 Crc32 full_crc1;
389 full_crc1.Append(reconstructed_view);
390 EXPECT_THAT(crc1, Eq(full_crc1));
391
392 // Mutate and test again.
393 mutable_elt.Get() = 'c';
394 ASSERT_THAT(vector->Get(3), IsOkAndHolds(Pointee(Eq('c'))));
395 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc2, vector->ComputeChecksum());
396 Crc32 full_crc2;
397 full_crc2.Append(reconstructed_view);
398 EXPECT_THAT(crc2, Eq(full_crc2));
399 }
400
TEST_F(FileBackedVectorTest,MutableArrayView)401 TEST_F(FileBackedVectorTest, MutableArrayView) {
402 // Create a vector and add some data.
403 ICING_ASSERT_OK_AND_ASSIGN(
404 std::unique_ptr<FileBackedVector<int>> vector,
405 FileBackedVector<int>::Create(
406 filesystem_, file_path_,
407 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
408 Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
409 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
410
411 constexpr int kArrayViewOffset = 5;
412 ICING_ASSERT_OK_AND_ASSIGN(
413 FileBackedVector<int>::MutableArrayView mutable_arr,
414 vector->GetMutable(kArrayViewOffset, /*len=*/3));
415 EXPECT_THAT(mutable_arr, SizeIs(3));
416
417 mutable_arr[0] = 2;
418 mutable_arr[1] = 3;
419 mutable_arr[2] = 4;
420
421 EXPECT_THAT(vector->Get(kArrayViewOffset + 0), IsOkAndHolds(Pointee(Eq(2))));
422 EXPECT_THAT(mutable_arr.data()[0], Eq(2));
423
424 EXPECT_THAT(vector->Get(kArrayViewOffset + 1), IsOkAndHolds(Pointee(Eq(3))));
425 EXPECT_THAT(mutable_arr.data()[1], Eq(3));
426
427 EXPECT_THAT(vector->Get(kArrayViewOffset + 2), IsOkAndHolds(Pointee(Eq(4))));
428 EXPECT_THAT(mutable_arr.data()[2], Eq(4));
429 }
430
TEST_F(FileBackedVectorTest,MutableArrayViewSetArray)431 TEST_F(FileBackedVectorTest, MutableArrayViewSetArray) {
432 // Create a vector and add some data.
433 ICING_ASSERT_OK_AND_ASSIGN(
434 std::unique_ptr<FileBackedVector<int>> vector,
435 FileBackedVector<int>::Create(
436 filesystem_, file_path_,
437 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
438 Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
439 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
440
441 constexpr int kArrayViewOffset = 3;
442 constexpr int kArrayViewLen = 5;
443 ICING_ASSERT_OK_AND_ASSIGN(
444 FileBackedVector<int>::MutableArrayView mutable_arr,
445 vector->GetMutable(kArrayViewOffset, kArrayViewLen));
446
447 std::vector<int> change1{2, 3, 4};
448 mutable_arr.SetArray(/*idx=*/0, change1.data(), change1.size());
449 EXPECT_THAT(Get(vector.get(), kArrayViewOffset, kArrayViewLen),
450 ElementsAre(2, 3, 4, 1, 1));
451
452 std::vector<int> change2{5, 6};
453 mutable_arr.SetArray(/*idx=*/2, change2.data(), change2.size());
454 EXPECT_THAT(Get(vector.get(), kArrayViewOffset, kArrayViewLen),
455 ElementsAre(2, 3, 5, 6, 1));
456 }
457
TEST_F(FileBackedVectorTest,MutableArrayViewSetArrayWithZeroLength)458 TEST_F(FileBackedVectorTest, MutableArrayViewSetArrayWithZeroLength) {
459 // Create a vector and add some data.
460 ICING_ASSERT_OK_AND_ASSIGN(
461 std::unique_ptr<FileBackedVector<int>> vector,
462 FileBackedVector<int>::Create(
463 filesystem_, file_path_,
464 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
465 Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
466 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
467
468 constexpr int kArrayViewOffset = 3;
469 constexpr int kArrayViewLen = 5;
470 ICING_ASSERT_OK_AND_ASSIGN(
471 FileBackedVector<int>::MutableArrayView mutable_arr,
472 vector->GetMutable(kArrayViewOffset, kArrayViewLen));
473
474 // Zero arr_len should work and change nothing
475 std::vector<int> change{2, 3};
476 mutable_arr.SetArray(/*idx=*/0, change.data(), /*arr_len=*/0);
477 EXPECT_THAT(Get(vector.get(), kArrayViewOffset, kArrayViewLen),
478 ElementsAre(1, 1, 1, 1, 1));
479 }
480
TEST_F(FileBackedVectorTest,MutableArrayViewIndexOperatorShouldSetDirty)481 TEST_F(FileBackedVectorTest, MutableArrayViewIndexOperatorShouldSetDirty) {
482 // Create an array with some data.
483 ICING_ASSERT_OK_AND_ASSIGN(
484 std::unique_ptr<FileBackedVector<int>> vector,
485 FileBackedVector<int>::Create(
486 filesystem_, file_path_,
487 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
488 Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
489 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
490
491 std::string_view reconstructed_view(
492 reinterpret_cast<const char*>(vector->array()),
493 vector->num_elements() * sizeof(int));
494
495 constexpr int kArrayViewOffset = 5;
496 ICING_ASSERT_OK_AND_ASSIGN(
497 FileBackedVector<int>::MutableArrayView mutable_arr,
498 vector->GetMutable(kArrayViewOffset, /*len=*/3));
499
500 // Use operator[] to mutate elements
501 // If non-const operator[] is called, MutateView should set the element index
502 // dirty so that ComputeChecksum() can pick up the change and compute the
503 // checksum correctly. Validate by mapping another array on top.
504 mutable_arr[0] = 2;
505 ASSERT_THAT(vector->Get(kArrayViewOffset + 0), IsOkAndHolds(Pointee(Eq(2))));
506 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc1, vector->ComputeChecksum());
507 EXPECT_THAT(crc1, Eq(Crc32(reconstructed_view)));
508
509 mutable_arr[1] = 3;
510 ASSERT_THAT(vector->Get(kArrayViewOffset + 1), IsOkAndHolds(Pointee(Eq(3))));
511 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc2, vector->ComputeChecksum());
512 EXPECT_THAT(crc2, Eq(Crc32(reconstructed_view)));
513
514 mutable_arr[2] = 4;
515 ASSERT_THAT(vector->Get(kArrayViewOffset + 2), IsOkAndHolds(Pointee(Eq(4))));
516 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc3, vector->ComputeChecksum());
517 EXPECT_THAT(crc3, Eq(Crc32(reconstructed_view)));
518
519 // Change the same position. It should set dirty again.
520 mutable_arr[0] = 5;
521 ASSERT_THAT(vector->Get(kArrayViewOffset + 0), IsOkAndHolds(Pointee(Eq(5))));
522 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc4, vector->ComputeChecksum());
523 EXPECT_THAT(crc4, Eq(Crc32(reconstructed_view)));
524 }
525
TEST_F(FileBackedVectorTest,MutableArrayViewSetArrayShouldSetDirty)526 TEST_F(FileBackedVectorTest, MutableArrayViewSetArrayShouldSetDirty) {
527 // Create an array with some data.
528 ICING_ASSERT_OK_AND_ASSIGN(
529 std::unique_ptr<FileBackedVector<int>> vector,
530 FileBackedVector<int>::Create(
531 filesystem_, file_path_,
532 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
533 Insert(vector.get(), /*idx=*/0, std::vector<int>(/*count=*/100, /*value=*/1));
534 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2494890115U)));
535
536 std::string_view reconstructed_view(
537 reinterpret_cast<const char*>(vector->array()),
538 vector->num_elements() * sizeof(int));
539
540 constexpr int kArrayViewOffset = 3;
541 constexpr int kArrayViewLen = 5;
542 ICING_ASSERT_OK_AND_ASSIGN(
543 FileBackedVector<int>::MutableArrayView mutable_arr,
544 vector->GetMutable(kArrayViewOffset, kArrayViewLen));
545
546 std::vector<int> change{2, 3, 4};
547 mutable_arr.SetArray(/*idx=*/0, change.data(), change.size());
548 ASSERT_THAT(Get(vector.get(), kArrayViewOffset, kArrayViewLen),
549 ElementsAre(2, 3, 4, 1, 1));
550 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc, vector->ComputeChecksum());
551 EXPECT_THAT(crc, Eq(Crc32(reconstructed_view)));
552 }
553
TEST_F(FileBackedVectorTest,Append)554 TEST_F(FileBackedVectorTest, Append) {
555 ICING_ASSERT_OK_AND_ASSIGN(
556 std::unique_ptr<FileBackedVector<char>> vector,
557 FileBackedVector<char>::Create(
558 filesystem_, file_path_,
559 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
560 ASSERT_THAT(vector->num_elements(), Eq(0));
561
562 ICING_EXPECT_OK(vector->Append('a'));
563 EXPECT_THAT(vector->num_elements(), Eq(1));
564 EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(Eq('a'))));
565
566 ICING_EXPECT_OK(vector->Append('b'));
567 EXPECT_THAT(vector->num_elements(), Eq(2));
568 EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(Eq('b'))));
569 }
570
TEST_F(FileBackedVectorTest,AppendAfterSet)571 TEST_F(FileBackedVectorTest, AppendAfterSet) {
572 ICING_ASSERT_OK_AND_ASSIGN(
573 std::unique_ptr<FileBackedVector<char>> vector,
574 FileBackedVector<char>::Create(
575 filesystem_, file_path_,
576 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
577 ASSERT_THAT(vector->num_elements(), Eq(0));
578
579 ICING_ASSERT_OK(vector->Set(9, 'z'));
580 ASSERT_THAT(vector->num_elements(), Eq(10));
581 ICING_EXPECT_OK(vector->Append('a'));
582 EXPECT_THAT(vector->num_elements(), Eq(11));
583 EXPECT_THAT(vector->Get(10), IsOkAndHolds(Pointee(Eq('a'))));
584 }
585
TEST_F(FileBackedVectorTest,AppendAfterTruncate)586 TEST_F(FileBackedVectorTest, AppendAfterTruncate) {
587 ICING_ASSERT_OK_AND_ASSIGN(
588 std::unique_ptr<FileBackedVector<char>> vector,
589 FileBackedVector<char>::Create(
590 filesystem_, file_path_,
591 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
592 Insert(vector.get(), /*idx=*/0, std::string(1000, 'z'));
593 ASSERT_THAT(vector->num_elements(), Eq(1000));
594
595 ICING_ASSERT_OK(vector->TruncateTo(5));
596 ICING_EXPECT_OK(vector->Append('a'));
597 EXPECT_THAT(vector->num_elements(), Eq(6));
598 EXPECT_THAT(vector->Get(5), IsOkAndHolds(Pointee(Eq('a'))));
599 }
600
TEST_F(FileBackedVectorTest,AppendShouldFailIfExceedingMaxFileSize)601 TEST_F(FileBackedVectorTest, AppendShouldFailIfExceedingMaxFileSize) {
602 int32_t max_file_size = (1 << 10) - 1;
603 int32_t max_num_elements =
604 (max_file_size - FileBackedVector<char>::Header::kHeaderSize) /
605 sizeof(char);
606
607 ICING_ASSERT_OK_AND_ASSIGN(
608 std::unique_ptr<FileBackedVector<char>> vector,
609 FileBackedVector<char>::Create(
610 filesystem_, file_path_,
611 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size));
612 ICING_ASSERT_OK(vector->Set(max_num_elements - 1, 'z'));
613 ASSERT_THAT(vector->num_elements(), Eq(max_num_elements));
614
615 EXPECT_THAT(vector->Append('a'),
616 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
617 }
618
TEST_F(FileBackedVectorTest,Allocate)619 TEST_F(FileBackedVectorTest, Allocate) {
620 ICING_ASSERT_OK_AND_ASSIGN(
621 std::unique_ptr<FileBackedVector<char>> vector,
622 FileBackedVector<char>::Create(
623 filesystem_, file_path_,
624 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
625 ASSERT_THAT(vector->num_elements(), Eq(0));
626
627 ICING_ASSERT_OK_AND_ASSIGN(
628 typename FileBackedVector<char>::MutableArrayView mutable_arr,
629 vector->Allocate(3));
630 EXPECT_THAT(vector->num_elements(), Eq(3));
631 EXPECT_THAT(mutable_arr, SizeIs(3));
632 std::string change = "abc";
633 mutable_arr.SetArray(/*idx=*/0, /*arr=*/change.data(), /*arr_len=*/3);
634 EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/3), Eq(change));
635 }
636
TEST_F(FileBackedVectorTest,AllocateAfterSet)637 TEST_F(FileBackedVectorTest, AllocateAfterSet) {
638 ICING_ASSERT_OK_AND_ASSIGN(
639 std::unique_ptr<FileBackedVector<char>> vector,
640 FileBackedVector<char>::Create(
641 filesystem_, file_path_,
642 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
643 ASSERT_THAT(vector->num_elements(), Eq(0));
644
645 ICING_ASSERT_OK(vector->Set(9, 'z'));
646 ASSERT_THAT(vector->num_elements(), Eq(10));
647 ICING_ASSERT_OK_AND_ASSIGN(
648 typename FileBackedVector<char>::MutableArrayView mutable_arr,
649 vector->Allocate(3));
650 EXPECT_THAT(vector->num_elements(), Eq(13));
651 EXPECT_THAT(mutable_arr, SizeIs(3));
652 std::string change = "abc";
653 mutable_arr.SetArray(/*idx=*/0, /*arr=*/change.data(), /*arr_len=*/3);
654 EXPECT_THAT(Get(vector.get(), /*idx=*/10, /*expected_len=*/3), Eq(change));
655 }
656
TEST_F(FileBackedVectorTest,AllocateAfterTruncate)657 TEST_F(FileBackedVectorTest, AllocateAfterTruncate) {
658 ICING_ASSERT_OK_AND_ASSIGN(
659 std::unique_ptr<FileBackedVector<char>> vector,
660 FileBackedVector<char>::Create(
661 filesystem_, file_path_,
662 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
663 Insert(vector.get(), /*idx=*/0, std::string(1000, 'z'));
664 ASSERT_THAT(vector->num_elements(), Eq(1000));
665
666 ICING_ASSERT_OK(vector->TruncateTo(5));
667 ICING_ASSERT_OK_AND_ASSIGN(
668 typename FileBackedVector<char>::MutableArrayView mutable_arr,
669 vector->Allocate(3));
670 EXPECT_THAT(vector->num_elements(), Eq(8));
671 std::string change = "abc";
672 mutable_arr.SetArray(/*idx=*/0, /*arr=*/change.data(), /*arr_len=*/3);
673 EXPECT_THAT(Get(vector.get(), /*idx=*/5, /*expected_len=*/3), Eq(change));
674 }
675
TEST_F(FileBackedVectorTest,AllocateInvalidLengthShouldFail)676 TEST_F(FileBackedVectorTest, AllocateInvalidLengthShouldFail) {
677 ICING_ASSERT_OK_AND_ASSIGN(
678 std::unique_ptr<FileBackedVector<char>> vector,
679 FileBackedVector<char>::Create(
680 filesystem_, file_path_,
681 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
682 ASSERT_THAT(vector->num_elements(), Eq(0));
683
684 EXPECT_THAT(vector->Allocate(-1),
685 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
686 EXPECT_THAT(vector->num_elements(), Eq(0));
687
688 EXPECT_THAT(vector->Allocate(0),
689 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
690 EXPECT_THAT(vector->num_elements(), Eq(0));
691 }
692
TEST_F(FileBackedVectorTest,AllocateShouldFailIfExceedingMaxFileSize)693 TEST_F(FileBackedVectorTest, AllocateShouldFailIfExceedingMaxFileSize) {
694 int32_t max_file_size = (1 << 10) - 1;
695 int32_t max_num_elements =
696 (max_file_size - FileBackedVector<char>::Header::kHeaderSize) /
697 sizeof(char);
698
699 ICING_ASSERT_OK_AND_ASSIGN(
700 std::unique_ptr<FileBackedVector<char>> vector,
701 FileBackedVector<char>::Create(
702 filesystem_, file_path_,
703 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size));
704 ICING_ASSERT_OK(vector->Set(max_num_elements - 3, 'z'));
705 ASSERT_THAT(vector->num_elements(), Eq(max_num_elements - 2));
706
707 EXPECT_THAT(vector->Allocate(3),
708 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
709 EXPECT_THAT(vector->Allocate(2), IsOk());
710 }
711
TEST_F(FileBackedVectorTest,IncrementalCrc_NonOverlappingChanges)712 TEST_F(FileBackedVectorTest, IncrementalCrc_NonOverlappingChanges) {
713 int num_elements = 1000;
714 int incremental_size = 3;
715 // Create an array with some data.
716 ICING_ASSERT_OK_AND_ASSIGN(
717 std::unique_ptr<FileBackedVector<char>> vector,
718 FileBackedVector<char>::Create(
719 filesystem_, file_path_,
720 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
721
722 Insert(vector.get(), 0, std::string(num_elements, 'a'));
723 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2620640643U)));
724
725 // Non-overlapping changes to the array, with increasing intervals
726 // between updating the checksum. Validate by mapping another array on top.
727 uint32_t next_update = 2;
728 for (uint32_t i = 0; i < num_elements; i += incremental_size) {
729 Insert(vector.get(), i, std::string(incremental_size, 'b'));
730
731 if (i >= next_update) {
732 ICING_ASSERT_OK_AND_ASSIGN(Crc32 incremental_crc,
733 vector->ComputeChecksum());
734 ICING_LOG(INFO) << "Now crc @" << incremental_crc.Get();
735
736 Crc32 full_crc;
737 std::string_view reconstructed_view =
738 std::string_view(vector->array(), vector->num_elements());
739 full_crc.Append(reconstructed_view);
740
741 ASSERT_EQ(incremental_crc, full_crc);
742 next_update *= 2;
743 }
744 }
745
746 for (uint32_t i = 0; i < num_elements; ++i) {
747 EXPECT_THAT(vector->Get(i), IsOkAndHolds(Pointee(Eq('b'))));
748 }
749 }
750
TEST_F(FileBackedVectorTest,IncrementalCrc_OverlappingChanges)751 TEST_F(FileBackedVectorTest, IncrementalCrc_OverlappingChanges) {
752 int num_elements = 1000;
753 int incremental_size = 3;
754 // Create an array with some data.
755 ICING_ASSERT_OK_AND_ASSIGN(
756 std::unique_ptr<FileBackedVector<char>> vector,
757 FileBackedVector<char>::Create(
758 filesystem_, file_path_,
759 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
760
761 Insert(vector.get(), 0, std::string(num_elements, 'a'));
762 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2620640643U)));
763
764 // Overlapping changes to the array, with increasing intervals
765 // between updating the checksum. Validate by mapping another array on top.
766 uint32_t next_update = 2;
767 for (uint32_t i = 0; i < num_elements; i++) {
768 Insert(vector.get(), i, std::string(incremental_size, 'b'));
769
770 if (i >= next_update) {
771 ICING_ASSERT_OK_AND_ASSIGN(Crc32 incremental_crc,
772 vector->ComputeChecksum());
773 ICING_LOG(INFO) << "Now crc @" << incremental_crc.Get();
774
775 Crc32 full_crc;
776 std::string_view reconstructed_view =
777 std::string_view(vector->array(), vector->num_elements());
778 full_crc.Append(reconstructed_view);
779
780 ASSERT_EQ(incremental_crc, full_crc);
781 next_update *= 2;
782 }
783 }
784 for (uint32_t i = 0; i < num_elements; ++i) {
785 EXPECT_THAT(vector->Get(i), IsOkAndHolds(Pointee(Eq('b'))));
786 }
787 }
788
TEST_F(FileBackedVectorTest,SetIntMaxShouldReturnOutOfRangeError)789 TEST_F(FileBackedVectorTest, SetIntMaxShouldReturnOutOfRangeError) {
790 // Create a vector and add some data.
791 ICING_ASSERT_OK_AND_ASSIGN(
792 std::unique_ptr<FileBackedVector<int32_t>> vector,
793 FileBackedVector<int32_t>::Create(
794 filesystem_, file_path_,
795 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
796 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
797
798 // It is an edge case. Since Set() calls GrowIfNecessary(idx + 1), we have to
799 // make sure that when idx is INT32_MAX, Set() should handle it correctly.
800 EXPECT_THAT(vector->Set(std::numeric_limits<int32_t>::max(), 1),
801 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
802 }
803
TEST_F(FileBackedVectorTest,Grow)804 TEST_F(FileBackedVectorTest, Grow) {
805 int32_t max_file_size = (1 << 20) - 1;
806 int32_t header_size = FileBackedVector<int32_t>::Header::kHeaderSize;
807 int32_t element_type_size = static_cast<int32_t>(sizeof(int32_t));
808
809 // Max file size includes size of the header and elements, so max # of
810 // elements will be (max_file_size - header_size) / element_type_size.
811 //
812 // Also ensure that (max_file_size - header_size) is not a multiple of
813 // element_type_size, in order to test if the desired # of elements is
814 // computed by (math) floor instead of ceil.
815 ASSERT_THAT((max_file_size - header_size) % element_type_size, Not(Eq(0)));
816 int32_t max_num_elements = (max_file_size - header_size) / element_type_size;
817
818 ASSERT_TRUE(filesystem_.Truncate(fd_, 0));
819
820 // Create a vector and add some data.
821 ICING_ASSERT_OK_AND_ASSIGN(
822 std::unique_ptr<FileBackedVector<int32_t>> vector,
823 FileBackedVector<int32_t>::Create(
824 filesystem_, file_path_,
825 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size));
826 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
827 // max_num_elements is the allowed max # of elements, so the valid index
828 // should be 0 to max_num_elements-1.
829 EXPECT_THAT(vector->Set(max_num_elements, 1),
830 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
831 EXPECT_THAT(vector->Set(-1, 1),
832 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
833 EXPECT_THAT(vector->Set(max_num_elements - 1, 1), IsOk());
834
835 int32_t start = max_num_elements - 5;
836 std::vector<int32_t> data{1, 2, 3, 4, 5};
837 Insert(vector.get(), start, data);
838
839 // Crc works?
840 const Crc32 good_crc(650981917U);
841 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc));
842
843 // PersistToDisk does nothing bad, and ensures the content is still there
844 // after we recreate the vector
845 ICING_EXPECT_OK(vector->PersistToDisk());
846
847 // Close out the old vector to ensure everything persists properly before we
848 // reassign it
849 vector.reset();
850
851 ICING_ASSERT_OK_AND_ASSIGN(
852 vector,
853 FileBackedVector<int32_t>::Create(
854 filesystem_, file_path_,
855 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size));
856
857 EXPECT_THAT(Get(vector.get(), start, data.size()), Eq(data));
858 }
859
TEST_F(FileBackedVectorTest,GrowsInChunks)860 TEST_F(FileBackedVectorTest, GrowsInChunks) {
861 // This is the same value as FileBackedVector::kGrowElements
862 constexpr int32_t kGrowElements = 1U << 14; // 16K
863
864 ICING_ASSERT_OK_AND_ASSIGN(
865 std::unique_ptr<FileBackedVector<int>> vector,
866 FileBackedVector<int>::Create(
867 filesystem_, file_path_,
868 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
869
870 // Our initial file size should just be the size of the header. Disk usage
871 // will indicate that one block has been allocated, which contains the header.
872 int header_size = sizeof(FileBackedVector<char>::Header);
873 int page_size = getpagesize();
874 EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(header_size));
875 EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(page_size));
876
877 // Once we add something though, we'll grow to be kGrowElements big. From this
878 // point on, file size and disk usage should be the same because Growing will
879 // explicitly allocate the number of blocks needed to accomodate the file.
880 Insert(vector.get(), 0, {1});
881 int file_size = 1 * kGrowElements * sizeof(int);
882 EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
883 EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
884
885 // Should still be the same size, don't need to grow underlying file
886 Insert(vector.get(), 1, {2});
887 EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
888 EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
889
890 // Now we grow by a kGrowElements chunk, so the underlying file is 2
891 // kGrowElements big
892 file_size = 2 * kGrowElements * sizeof(int);
893 Insert(vector.get(), 2, std::vector<int>(kGrowElements, 3));
894 EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
895 EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
896
897 // Destroy/persist the contents.
898 vector.reset();
899
900 // Reinitialize
901 ICING_ASSERT_OK_AND_ASSIGN(
902 vector, FileBackedVector<int>::Create(
903 filesystem_, file_path_,
904 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
905
906 // Should be the same file size as before
907 EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
908 Eq(kGrowElements * 2 * sizeof(int)));
909 }
910
TEST_F(FileBackedVectorTest,Delete)911 TEST_F(FileBackedVectorTest, Delete) {
912 // Can delete even if there's nothing there
913 ICING_EXPECT_OK(FileBackedVector<int64_t>::Delete(filesystem_, file_path_));
914
915 // Create a vector and add some data.
916 ICING_ASSERT_OK_AND_ASSIGN(
917 std::unique_ptr<FileBackedVector<char>> vector,
918 FileBackedVector<char>::Create(
919 filesystem_, file_path_,
920 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
921
922 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
923
924 std::string expected = "abcde";
925 Insert(vector.get(), 0, expected);
926 ASSERT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(1134899064U)));
927 ASSERT_EQ(expected.length(), vector->num_elements());
928
929 // Close out the old vector to ensure everything persists properly before we
930 // delete the underlying files
931 vector.reset();
932
933 ICING_EXPECT_OK(FileBackedVector<int64_t>::Delete(filesystem_, file_path_));
934
935 EXPECT_FALSE(filesystem_.FileExists(file_path_.data()));
936
937 // Can successfully create again.
938 ICING_ASSERT_OK_AND_ASSIGN(
939 vector, FileBackedVector<char>::Create(
940 filesystem_, file_path_,
941 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
942 }
943
TEST_F(FileBackedVectorTest,TruncateTo)944 TEST_F(FileBackedVectorTest, TruncateTo) {
945 ICING_ASSERT_OK_AND_ASSIGN(
946 std::unique_ptr<FileBackedVector<char>> vector,
947 FileBackedVector<char>::Create(
948 filesystem_, file_path_,
949 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
950 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
951
952 Insert(vector.get(), 0, "A");
953 Insert(vector.get(), 1, "Z");
954
955 EXPECT_EQ(2, vector->num_elements());
956 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(1658635950)));
957
958 // Modify 1 element, out of 2 total elements. 1/2 changes exceeds the partial
959 // crc limit, so our next checksum call will recompute the entire vector's
960 // checksum.
961 Insert(vector.get(), 1, "J");
962 // We'll ignore everything after the 1st element, so the full vector's
963 // checksum will only include "J".
964 ICING_EXPECT_OK(vector->TruncateTo(1));
965 EXPECT_EQ(1, vector->num_elements());
966 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534)));
967
968 // Truncating clears the checksum and resets it to 0
969 ICING_EXPECT_OK(vector->TruncateTo(0));
970 EXPECT_EQ(0, vector->num_elements());
971 EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
972
973 // Can't truncate past end.
974 EXPECT_THAT(vector->TruncateTo(100),
975 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
976
977 // Must be greater than or equal to 0
978 EXPECT_THAT(vector->TruncateTo(-1),
979 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
980 }
981
TEST_F(FileBackedVectorTest,TruncateAndReReadFile)982 TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
983 {
984 ICING_ASSERT_OK_AND_ASSIGN(
985 std::unique_ptr<FileBackedVector<float>> vector,
986 FileBackedVector<float>::Create(
987 filesystem_, file_path_,
988 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
989
990 ICING_ASSERT_OK(vector->Set(0, 1.0));
991 ICING_ASSERT_OK(vector->Set(1, 2.0));
992 ICING_ASSERT_OK(vector->Set(2, 2.0));
993 ICING_ASSERT_OK(vector->Set(3, 2.0));
994 ICING_ASSERT_OK(vector->Set(4, 2.0));
995 } // Destroying the vector should trigger a checksum of the 5 elements
996
997 {
998 ICING_ASSERT_OK_AND_ASSIGN(
999 std::unique_ptr<FileBackedVector<float>> vector,
1000 FileBackedVector<float>::Create(
1001 filesystem_, file_path_,
1002 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1003
1004 EXPECT_EQ(5, vector->num_elements());
1005 ICING_EXPECT_OK(vector->TruncateTo(4));
1006 EXPECT_EQ(4, vector->num_elements());
1007 } // Destroying the vector should update the checksum to 4 elements
1008
1009 // Creating again should double check that our checksum of 4 elements matches
1010 // what was previously saved.
1011 {
1012 ICING_ASSERT_OK_AND_ASSIGN(
1013 std::unique_ptr<FileBackedVector<float>> vector,
1014 FileBackedVector<float>::Create(
1015 filesystem_, file_path_,
1016 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1017
1018 EXPECT_EQ(vector->num_elements(), 4);
1019 }
1020 }
1021
TEST_F(FileBackedVectorTest,Sort)1022 TEST_F(FileBackedVectorTest, Sort) {
1023 ICING_ASSERT_OK_AND_ASSIGN(
1024 std::unique_ptr<FileBackedVector<int>> vector,
1025 FileBackedVector<int>::Create(
1026 filesystem_, file_path_,
1027 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1028 ICING_ASSERT_OK(vector->Set(0, 5));
1029 ICING_ASSERT_OK(vector->Set(1, 4));
1030 ICING_ASSERT_OK(vector->Set(2, 2));
1031 ICING_ASSERT_OK(vector->Set(3, 3));
1032 ICING_ASSERT_OK(vector->Set(4, 1));
1033
1034 // Sort vector range [1, 4) (excluding 4).
1035 EXPECT_THAT(vector->Sort(/*begin_idx=*/1, /*end_idx=*/4), IsOk());
1036 // Verify sorted range should be sorted and others should remain unchanged.
1037 EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(5)));
1038 EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
1039 EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
1040 EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
1041 EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(1)));
1042
1043 // Sort again by end_idx = num_elements().
1044 EXPECT_THAT(vector->Sort(/*begin_idx=*/0, /*end_idx=*/vector->num_elements()),
1045 IsOk());
1046 EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(1)));
1047 EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
1048 EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
1049 EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
1050 EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(5)));
1051 }
1052
TEST_F(FileBackedVectorTest,SortByInvalidIndexShouldReturnOutOfRangeError)1053 TEST_F(FileBackedVectorTest, SortByInvalidIndexShouldReturnOutOfRangeError) {
1054 ICING_ASSERT_OK_AND_ASSIGN(
1055 std::unique_ptr<FileBackedVector<int>> vector,
1056 FileBackedVector<int>::Create(
1057 filesystem_, file_path_,
1058 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1059 ICING_ASSERT_OK(vector->Set(0, 5));
1060 ICING_ASSERT_OK(vector->Set(1, 4));
1061 ICING_ASSERT_OK(vector->Set(2, 2));
1062 ICING_ASSERT_OK(vector->Set(3, 3));
1063 ICING_ASSERT_OK(vector->Set(4, 1));
1064
1065 EXPECT_THAT(vector->Sort(/*begin_idx=*/-1, /*end_idx=*/4),
1066 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1067 EXPECT_THAT(vector->Sort(/*begin_idx=*/0, /*end_idx=*/-1),
1068 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1069 EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/3),
1070 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1071 EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/1),
1072 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1073 EXPECT_THAT(vector->Sort(/*begin_idx=*/5, /*end_idx=*/5),
1074 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1075 EXPECT_THAT(vector->Sort(/*begin_idx=*/3, /*end_idx=*/6),
1076 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
1077 }
1078
TEST_F(FileBackedVectorTest,SortShouldSetDirtyCorrectly)1079 TEST_F(FileBackedVectorTest, SortShouldSetDirtyCorrectly) {
1080 {
1081 ICING_ASSERT_OK_AND_ASSIGN(
1082 std::unique_ptr<FileBackedVector<int>> vector,
1083 FileBackedVector<int>::Create(
1084 filesystem_, file_path_,
1085 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1086 ICING_ASSERT_OK(vector->Set(0, 5));
1087 ICING_ASSERT_OK(vector->Set(1, 4));
1088 ICING_ASSERT_OK(vector->Set(2, 2));
1089 ICING_ASSERT_OK(vector->Set(3, 3));
1090 ICING_ASSERT_OK(vector->Set(4, 1));
1091 } // Destroying the vector should trigger a checksum of the 5 elements
1092
1093 {
1094 ICING_ASSERT_OK_AND_ASSIGN(
1095 std::unique_ptr<FileBackedVector<int>> vector,
1096 FileBackedVector<int>::Create(
1097 filesystem_, file_path_,
1098 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1099
1100 // Sort vector range [1, 4) (excluding 4).
1101 EXPECT_THAT(vector->Sort(/*begin_idx=*/1, /*end_idx=*/4), IsOk());
1102 } // Destroying the vector should update the checksum
1103
1104 // Creating again should check that the checksum after sorting matches what
1105 // was previously saved. This tests the correctness of SetDirty() for sorted
1106 // elements.
1107 ICING_ASSERT_OK_AND_ASSIGN(
1108 std::unique_ptr<FileBackedVector<int>> vector,
1109 FileBackedVector<int>::Create(
1110 filesystem_, file_path_,
1111 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1112
1113 // Verify sorted range should be sorted and others should remain unchanged.
1114 EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(5)));
1115 EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(2)));
1116 EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(3)));
1117 EXPECT_THAT(vector->Get(3), IsOkAndHolds(Pointee(4)));
1118 EXPECT_THAT(vector->Get(4), IsOkAndHolds(Pointee(1)));
1119 }
1120
TEST_F(FileBackedVectorTest,SetDirty)1121 TEST_F(FileBackedVectorTest, SetDirty) {
1122 // 1. Create a vector and add some data.
1123 ICING_ASSERT_OK_AND_ASSIGN(
1124 std::unique_ptr<FileBackedVector<char>> vector,
1125 FileBackedVector<char>::Create(
1126 filesystem_, file_path_,
1127 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1128 Insert(vector.get(), 0, "abcd");
1129
1130 std::string_view reconstructed_view =
1131 std::string_view(vector->array(), vector->num_elements());
1132
1133 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc1, vector->ComputeChecksum());
1134 Crc32 full_crc_before_overwrite;
1135 full_crc_before_overwrite.Append(reconstructed_view);
1136 EXPECT_THAT(crc1, Eq(full_crc_before_overwrite));
1137
1138 // 2. Manually overwrite the values of the first two elements.
1139 std::string corrupted_content = "ef";
1140 ASSERT_THAT(
1141 filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
1142 corrupted_content.c_str(), corrupted_content.length()),
1143 IsTrue());
1144 ASSERT_THAT(Get(vector.get(), 0, 4), Eq("efcd"));
1145 Crc32 full_crc_after_overwrite;
1146 full_crc_after_overwrite.Append(reconstructed_view);
1147 ASSERT_THAT(full_crc_before_overwrite, Not(Eq(full_crc_after_overwrite)));
1148
1149 // 3. Without calling SetDirty(), the checksum will be recomputed incorrectly.
1150 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc2, vector->ComputeChecksum());
1151 EXPECT_THAT(crc2, Not(Eq(full_crc_after_overwrite)));
1152
1153 // 4. Call SetDirty()
1154 vector->SetDirty(0);
1155 vector->SetDirty(1);
1156
1157 // 5. The checksum should be computed correctly after calling SetDirty() with
1158 // correct index.
1159 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc3, vector->ComputeChecksum());
1160 EXPECT_THAT(crc3, Eq(full_crc_after_overwrite));
1161 }
1162
TEST_F(FileBackedVectorTest,InitFileTooSmallForHeaderFails)1163 TEST_F(FileBackedVectorTest, InitFileTooSmallForHeaderFails) {
1164 {
1165 // 1. Create a vector with a few elements.
1166 ICING_ASSERT_OK_AND_ASSIGN(
1167 std::unique_ptr<FileBackedVector<char>> vector,
1168 FileBackedVector<char>::Create(
1169 filesystem_, file_path_,
1170 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1171 Insert(vector.get(), 0, "A");
1172 Insert(vector.get(), 1, "Z");
1173 ASSERT_THAT(vector->PersistToDisk(), IsOk());
1174 }
1175
1176 // 2. Shrink the file to be smaller than the header.
1177 filesystem_.Truncate(fd_, sizeof(FileBackedVector<char>::Header) - 1);
1178
1179 {
1180 // 3. Attempt to create the file and confirm that it fails.
1181 EXPECT_THAT(FileBackedVector<char>::Create(
1182 filesystem_, file_path_,
1183 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
1184 StatusIs(libtextclassifier3::StatusCode::INTERNAL));
1185 }
1186 }
1187
TEST_F(FileBackedVectorTest,InitWrongDataSizeFails)1188 TEST_F(FileBackedVectorTest, InitWrongDataSizeFails) {
1189 {
1190 // 1. Create a vector with a few elements.
1191 ICING_ASSERT_OK_AND_ASSIGN(
1192 std::unique_ptr<FileBackedVector<char>> vector,
1193 FileBackedVector<char>::Create(
1194 filesystem_, file_path_,
1195 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1196 Insert(vector.get(), 0, "A");
1197 Insert(vector.get(), 1, "Z");
1198 ASSERT_THAT(vector->PersistToDisk(), IsOk());
1199 }
1200
1201 {
1202 // 2. Attempt to create the file with a different element size and confirm
1203 // that it fails.
1204 EXPECT_THAT(FileBackedVector<int>::Create(
1205 filesystem_, file_path_,
1206 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
1207 StatusIs(libtextclassifier3::StatusCode::INTERNAL));
1208 }
1209 }
1210
TEST_F(FileBackedVectorTest,InitCorruptHeaderFails)1211 TEST_F(FileBackedVectorTest, InitCorruptHeaderFails) {
1212 {
1213 // 1. Create a vector with a few elements.
1214 ICING_ASSERT_OK_AND_ASSIGN(
1215 std::unique_ptr<FileBackedVector<char>> vector,
1216 FileBackedVector<char>::Create(
1217 filesystem_, file_path_,
1218 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1219 Insert(vector.get(), 0, "A");
1220 Insert(vector.get(), 1, "Z");
1221 ASSERT_THAT(vector->PersistToDisk(), IsOk());
1222 }
1223
1224 // 2. Modify the header, but don't update the checksum. This would be similar
1225 // to corruption of the header.
1226 FileBackedVector<char>::Header header;
1227 ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
1228 IsTrue());
1229 header.num_elements = 1;
1230 ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
1231 IsTrue());
1232
1233 {
1234 // 3. Attempt to create the file with a header that doesn't match its
1235 // checksum and confirm that it fails.
1236 EXPECT_THAT(FileBackedVector<char>::Create(
1237 filesystem_, file_path_,
1238 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
1239 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
1240 }
1241 }
1242
TEST_F(FileBackedVectorTest,InitHeaderElementSizeTooBigFails)1243 TEST_F(FileBackedVectorTest, InitHeaderElementSizeTooBigFails) {
1244 {
1245 // 1. Create a vector with a few elements.
1246 ICING_ASSERT_OK_AND_ASSIGN(
1247 std::unique_ptr<FileBackedVector<char>> vector,
1248 FileBackedVector<char>::Create(
1249 filesystem_, file_path_,
1250 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1251 Insert(vector.get(), 0, "A");
1252 Insert(vector.get(), 1, "Z");
1253 ASSERT_THAT(vector->PersistToDisk(), IsOk());
1254 }
1255
1256 // 2. Modify the header so that the number of elements exceeds the actual size
1257 // of the underlying file.
1258 FileBackedVector<char>::Header header;
1259 ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
1260 IsTrue());
1261 int64_t file_size = filesystem_.GetFileSize(fd_);
1262 int64_t allocated_elements_size = file_size - sizeof(header);
1263 header.num_elements = (allocated_elements_size / sizeof(char)) + 1;
1264 header.header_checksum = header.CalculateHeaderChecksum();
1265 ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
1266 IsTrue());
1267
1268 {
1269 // 3. Attempt to create the file with num_elements that is larger than the
1270 // underlying file and confirm that it fails.
1271 EXPECT_THAT(FileBackedVector<char>::Create(
1272 filesystem_, file_path_,
1273 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
1274 StatusIs(libtextclassifier3::StatusCode::INTERNAL));
1275 }
1276 }
1277
TEST_F(FileBackedVectorTest,InitCorruptElementsFails)1278 TEST_F(FileBackedVectorTest, InitCorruptElementsFails) {
1279 {
1280 // 1. Create a vector with a few elements.
1281 ICING_ASSERT_OK_AND_ASSIGN(
1282 std::unique_ptr<FileBackedVector<char>> vector,
1283 FileBackedVector<char>::Create(
1284 filesystem_, file_path_,
1285 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1286 Insert(vector.get(), 0, "A");
1287 Insert(vector.get(), 1, "Z");
1288 ASSERT_THAT(vector->PersistToDisk(), IsOk());
1289 }
1290
1291 // 2. Overwrite the values of the first two elements.
1292 std::string corrupted_content = "BY";
1293 ASSERT_THAT(
1294 filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
1295 corrupted_content.c_str(), corrupted_content.length()),
1296 IsTrue());
1297
1298 {
1299 // 3. Attempt to create the file with elements that don't match their
1300 // checksum and confirm that it fails.
1301 EXPECT_THAT(FileBackedVector<char>::Create(
1302 filesystem_, file_path_,
1303 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
1304 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
1305 }
1306 }
1307
TEST_F(FileBackedVectorTest,InitNormalSucceeds)1308 TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
1309 {
1310 // 1. Create a vector with a few elements.
1311 ICING_ASSERT_OK_AND_ASSIGN(
1312 std::unique_ptr<FileBackedVector<char>> vector,
1313 FileBackedVector<char>::Create(
1314 filesystem_, file_path_,
1315 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
1316 Insert(vector.get(), 0, "A");
1317 Insert(vector.get(), 1, "Z");
1318 ASSERT_THAT(vector->PersistToDisk(), IsOk());
1319 }
1320
1321 {
1322 // 2. Attempt to create the file with a completely valid header and elements
1323 // region. This should succeed.
1324 EXPECT_THAT(FileBackedVector<char>::Create(
1325 filesystem_, file_path_,
1326 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
1327 IsOk());
1328 }
1329 }
1330
TEST_F(FileBackedVectorTest,InitFromExistingFileShouldPreMapAtLeastFileSize)1331 TEST_F(FileBackedVectorTest, InitFromExistingFileShouldPreMapAtLeastFileSize) {
1332 {
1333 // 1. Create a vector with a few elements.
1334 ICING_ASSERT_OK_AND_ASSIGN(
1335 std::unique_ptr<FileBackedVector<char>> vector,
1336 FileBackedVector<char>::Create(
1337 filesystem_, file_path_,
1338 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
1339 FileBackedVector<char>::kMaxFileSize));
1340 Insert(vector.get(), 10000, "A");
1341 Insert(vector.get(), 10001, "Z");
1342 ASSERT_THAT(vector->PersistToDisk(), IsOk());
1343 }
1344
1345 {
1346 // 2. Attempt to create the file with pre_mapping_mmap_size < file_size. It
1347 // should still pre-map file_size, so we can pass the checksum
1348 // verification when initializing and get the correct contents.
1349 int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
1350 int pre_mapping_mmap_size = 10;
1351 ASSERT_THAT(pre_mapping_mmap_size, Lt(file_size));
1352 ICING_ASSERT_OK_AND_ASSIGN(
1353 std::unique_ptr<FileBackedVector<char>> vector,
1354 FileBackedVector<char>::Create(
1355 filesystem_, file_path_,
1356 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
1357 FileBackedVector<char>::kMaxFileSize, pre_mapping_mmap_size));
1358 EXPECT_THAT(Get(vector.get(), /*idx=*/10000, /*expected_len=*/2), Eq("AZ"));
1359 }
1360 }
1361
1362 } // namespace
1363
1364 } // namespace lib
1365 } // namespace icing
1366