1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/legacy/index/icing-dynamic-trie.h"
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <cstdio>
20 #include <memory>
21 #include <string>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/hash/farmhash.h"
27 #include "gmock/gmock.h"
28 #include "gtest/gtest.h"
29 #include "icing/legacy/core/icing-string-util.h"
30 #include "icing/legacy/index/icing-filesystem.h"
31 #include "icing/testing/common-matchers.h"
32 #include "icing/testing/random-string.h"
33 #include "icing/testing/tmp-directory.h"
34 #include "icing/util/logging.h"
35 
36 namespace icing {
37 namespace lib {
38 
39 namespace {
40 
41 using testing::ContainerEq;
42 using testing::ElementsAre;
43 using testing::StrEq;
44 
45 constexpr std::string_view kKeys[] = {
46     "", "ab", "ac", "abd", "bac", "bb", "bacd", "abbb", "abcdefg",
47 };
48 constexpr uint32_t kNumKeys = ABSL_ARRAYSIZE(kKeys);
49 
50 class IcingDynamicTrieTest : public ::testing::Test {
51  protected:
52   // Output trie stats to stderr.
StatsDump(const IcingDynamicTrie & trie)53   static void StatsDump(const IcingDynamicTrie& trie) {
54     IcingDynamicTrie::Stats stats;
55     trie.CollectStats(&stats);
56     DLOG(INFO) << "Stats:\n" << stats.DumpStats(true);
57   }
58 
AddToTrie(IcingDynamicTrie * trie,uint32_t num_keys)59   static void AddToTrie(IcingDynamicTrie* trie, uint32_t num_keys) {
60     std::string key;
61     for (uint32_t i = 0; i < kNumKeys; i++) {
62       key.clear();
63       IcingStringUtil::SStringAppendF(&key, 0, "%u+%010u", i % 2, i);
64       ASSERT_THAT(trie->Insert(key.c_str(), &i), IsOk());
65     }
66   }
67 
CheckTrie(const IcingDynamicTrie & trie,uint32_t num_keys)68   static void CheckTrie(const IcingDynamicTrie& trie, uint32_t num_keys) {
69     std::string key;
70     for (uint32_t i = 0; i < kNumKeys; i++) {
71       key.clear();
72       IcingStringUtil::SStringAppendF(&key, 0, "%u+%010u", i % 2, i);
73       uint32_t val;
74       bool found = trie.Find(key.c_str(), &val);
75       EXPECT_TRUE(found);
76       EXPECT_EQ(i, val);
77     }
78   }
79 
PrintTrie(const IcingDynamicTrie & trie)80   static void PrintTrie(const IcingDynamicTrie& trie) {
81     std::vector<std::string> keys;
82     std::ostringstream os;
83     DLOG(INFO) << "Trie:\n";
84     trie.DumpTrie(&os, &keys);
85     DLOG(INFO) << os.str();
86   }
87 
SetUp()88   void SetUp() override {
89     trie_files_dir_ = GetTestTempDir() + "/trie_files";
90     trie_files_prefix_ = trie_files_dir_ + "/test_file_";
91   }
92 
TearDown()93   void TearDown() override {
94     IcingFilesystem filesystem;
95     filesystem.DeleteDirectoryRecursively(trie_files_dir_.c_str());
96   }
97 
98   std::string trie_files_dir_;
99   std::string trie_files_prefix_;
100 };
101 
RetrieveKeyValuePairs(IcingDynamicTrie::Iterator & term_iter)102 std::vector<std::pair<std::string, int>> RetrieveKeyValuePairs(
103     IcingDynamicTrie::Iterator& term_iter) {
104   std::vector<std::pair<std::string, int>> key_value;
105   for (; term_iter.IsValid(); term_iter.Advance()) {
106     uint32_t val;
107     memcpy(&val, term_iter.GetValue(), sizeof(val));
108     key_value.push_back(std::make_pair(term_iter.GetKey(), val));
109   }
110   return key_value;
111 }
112 
113 constexpr std::string_view kCommonEnglishWords[] = {
114     "that", "was",  "for",  "on",   "are",  "with",  "they", "be",    "at",
115     "one",  "have", "this", "from", "word", "but",   "what", "some",  "you",
116     "had",  "the",  "and",  "can",  "out",  "other", "were", "which", "their",
117     "time", "will", "how",  "said", "each", "tell",  "may",  "three"};
118 constexpr uint32_t kCommonEnglishWordArrayLen =
119     sizeof(kCommonEnglishWords) / sizeof(std::string_view);
120 
TEST_F(IcingDynamicTrieTest,Simple)121 TEST_F(IcingDynamicTrieTest, Simple) {
122   // Test simple key insertions.
123   IcingFilesystem filesystem;
124   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
125                         &filesystem);
126   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
127   ASSERT_TRUE(trie.Init());
128 
129   for (uint32_t i = 0; i < kNumKeys; i++) {
130     ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk());
131 
132     uint32_t val;
133     bool found = trie.Find(kKeys[i].data(), &val);
134     EXPECT_TRUE(found) << kKeys[i];
135     if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
136   }
137 
138   EXPECT_EQ(trie.size(), kNumKeys);
139 
140   StatsDump(trie);
141   std::vector<std::string> keys;
142   std::ostringstream os;
143   DLOG(INFO) << "Trie:\n";
144   trie.DumpTrie(&os, &keys);
145   DLOG(INFO) << os.str();
146   EXPECT_EQ(keys.size(), kNumKeys);
147 }
148 
TEST_F(IcingDynamicTrieTest,Init)149 TEST_F(IcingDynamicTrieTest, Init) {
150   // Test create/init behavior.
151   IcingFilesystem filesystem;
152   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
153                         &filesystem);
154   EXPECT_FALSE(trie.is_initialized());
155   EXPECT_FALSE(trie.Init());
156 
157   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
158   EXPECT_TRUE(trie.Init());
159   EXPECT_TRUE(trie.is_initialized());
160 }
161 
TEST_F(IcingDynamicTrieTest,Iterator)162 TEST_F(IcingDynamicTrieTest, Iterator) {
163   // Test iterator.
164   IcingFilesystem filesystem;
165   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
166                         &filesystem);
167   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
168   ASSERT_TRUE(trie.Init());
169 
170   for (uint32_t i = 0; i < kNumKeys; i++) {
171     ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk());
172   }
173 
174   // Should get the entire trie.
175   std::vector<std::pair<std::string, int>> exp_key_values = {
176       {"", 0},   {"ab", 1},  {"abbb", 7}, {"abcdefg", 8}, {"abd", 3},
177       {"ac", 2}, {"bac", 4}, {"bacd", 6}, {"bb", 5}};
178   IcingDynamicTrie::Iterator it_all(trie, "");
179   std::vector<std::pair<std::string, int>> key_values =
180       RetrieveKeyValuePairs(it_all);
181   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
182 
183   // Should get same results after calling Reset
184   it_all.Reset();
185   key_values = RetrieveKeyValuePairs(it_all);
186   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
187 
188   // Get everything under "a".
189   exp_key_values = {
190       {"ab", 1}, {"abbb", 7}, {"abcdefg", 8}, {"abd", 3}, {"ac", 2}};
191   IcingDynamicTrie::Iterator it1(trie, "a");
192   key_values = RetrieveKeyValuePairs(it1);
193   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
194 
195   // Should get same results after calling Reset
196   it1.Reset();
197   key_values = RetrieveKeyValuePairs(it1);
198   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
199 
200   // Now "b".
201   exp_key_values = {{"bac", 4}, {"bacd", 6}, {"bb", 5}};
202   IcingDynamicTrie::Iterator it2(trie, "b");
203   key_values = RetrieveKeyValuePairs(it2);
204   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
205 
206   // Should get same results after calling Reset
207   it2.Reset();
208   key_values = RetrieveKeyValuePairs(it2);
209   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
210 
211   // Get everything under "ab".
212   exp_key_values = {{"ab", 1}, {"abbb", 7}, {"abcdefg", 8}, {"abd", 3}};
213   IcingDynamicTrie::Iterator it3(trie, "ab");
214   key_values = RetrieveKeyValuePairs(it3);
215   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
216 
217   // Should get same results after calling Reset
218   it3.Reset();
219   key_values = RetrieveKeyValuePairs(it3);
220   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
221 
222   // Should match only one key exactly.
223   constexpr std::string_view kOneMatch[] = {
224       "abd",
225       "abcd",
226       "abcdef",
227       "abcdefg",
228   };
229   // With the following match:
230   constexpr std::string_view kOneMatchMatched[] = {
231       "abd",
232       "abcdefg",
233       "abcdefg",
234       "abcdefg",
235   };
236 
237   for (size_t k = 0; k < ABSL_ARRAYSIZE(kOneMatch); k++) {
238     IcingDynamicTrie::Iterator it_single(trie, kOneMatch[k].data());
239     ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
240     EXPECT_THAT(it_single.GetKey(), StrEq(kOneMatchMatched[k].data()));
241     EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
242     EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
243 
244     // Should get same results after calling Reset
245     it_single.Reset();
246     ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
247     EXPECT_THAT(it_single.GetKey(), StrEq(kOneMatchMatched[k].data()));
248     EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
249     EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
250   }
251 
252   // Matches nothing.
253   constexpr std::string_view kNoMatch[] = {
254       "abbd",
255       "abcdeg",
256       "abcdefh",
257   };
258   for (size_t k = 0; k < ABSL_ARRAYSIZE(kNoMatch); k++) {
259     IcingDynamicTrie::Iterator it_empty(trie, kNoMatch[k].data());
260     EXPECT_FALSE(it_empty.IsValid());
261     it_empty.Reset();
262     EXPECT_FALSE(it_empty.IsValid());
263   }
264 
265   // Clear.
266   trie.Clear();
267   EXPECT_FALSE(IcingDynamicTrie::Iterator(trie, "").IsValid());
268   EXPECT_EQ(0u, trie.size());
269   EXPECT_EQ(1.0, trie.min_free_fraction());
270 }
271 
TEST_F(IcingDynamicTrieTest,IteratorReverse)272 TEST_F(IcingDynamicTrieTest, IteratorReverse) {
273   // Test iterator.
274   IcingFilesystem filesystem;
275   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
276                         &filesystem);
277   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
278   ASSERT_TRUE(trie.Init());
279 
280   for (uint32_t i = 0; i < kNumKeys; i++) {
281     ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk());
282   }
283 
284   // Should get the entire trie.
285   std::vector<std::pair<std::string, int>> exp_key_values = {
286       {"bb", 5},      {"bacd", 6}, {"bac", 4}, {"ac", 2}, {"abd", 3},
287       {"abcdefg", 8}, {"abbb", 7}, {"ab", 1},  {"", 0}};
288   IcingDynamicTrie::Iterator it_all(trie, "", /*reverse=*/true);
289   std::vector<std::pair<std::string, int>> key_values =
290       RetrieveKeyValuePairs(it_all);
291   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
292   it_all.Reset();
293   key_values = RetrieveKeyValuePairs(it_all);
294   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
295 
296   // Get everything under "a".
297   exp_key_values = {
298       {"ac", 2}, {"abd", 3}, {"abcdefg", 8}, {"abbb", 7}, {"ab", 1}};
299   IcingDynamicTrie::Iterator it1(trie, "a", /*reverse=*/true);
300   key_values = RetrieveKeyValuePairs(it1);
301   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
302 
303   // Should get same results after calling Reset
304   it1.Reset();
305   key_values = RetrieveKeyValuePairs(it1);
306   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
307 
308   // Now "b".
309   exp_key_values = {{"bb", 5}, {"bacd", 6}, {"bac", 4}};
310   IcingDynamicTrie::Iterator it2(trie, "b", /*reverse=*/true);
311   key_values = RetrieveKeyValuePairs(it2);
312   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
313 
314   // Should get same results after calling Reset
315   it2.Reset();
316   key_values = RetrieveKeyValuePairs(it2);
317   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
318 
319   // Get everything under "ab".
320   exp_key_values = {{"abd", 3}, {"abcdefg", 8}, {"abbb", 7}, {"ab", 1}};
321   IcingDynamicTrie::Iterator it3(trie, "ab", /*reverse=*/true);
322   key_values = RetrieveKeyValuePairs(it3);
323   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
324 
325   // Should get same results after calling Reset
326   it3.Reset();
327   key_values = RetrieveKeyValuePairs(it3);
328   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
329 
330   // Should match only one key exactly.
331   constexpr std::string_view kOneMatch[] = {
332       "abd",
333       "abcd",
334       "abcdef",
335       "abcdefg",
336   };
337   // With the following match:
338   constexpr std::string_view kOneMatchMatched[] = {
339       "abd",
340       "abcdefg",
341       "abcdefg",
342       "abcdefg",
343   };
344 
345   for (size_t k = 0; k < ABSL_ARRAYSIZE(kOneMatch); k++) {
346     IcingDynamicTrie::Iterator it_single(trie, kOneMatch[k].data(),
347                                          /*reverse=*/true);
348     ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
349     EXPECT_THAT(it_single.GetKey(), StrEq(kOneMatchMatched[k].data()));
350     EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
351     EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
352 
353     // Should get same results after calling Reset
354     it_single.Reset();
355     ASSERT_TRUE(it_single.IsValid()) << kOneMatch[k];
356     EXPECT_THAT(it_single.GetKey(), StrEq(kOneMatchMatched[k].data()));
357     EXPECT_FALSE(it_single.Advance()) << kOneMatch[k];
358     EXPECT_FALSE(it_single.IsValid()) << kOneMatch[k];
359   }
360 
361   // Matches nothing.
362   constexpr std::string_view kNoMatch[] = {
363       "abbd",
364       "abcdeg",
365       "abcdefh",
366   };
367   for (size_t k = 0; k < ABSL_ARRAYSIZE(kNoMatch); k++) {
368     IcingDynamicTrie::Iterator it_empty(trie, kNoMatch[k].data(),
369                                         /*reverse=*/true);
370     EXPECT_FALSE(it_empty.IsValid());
371     it_empty.Reset();
372     EXPECT_FALSE(it_empty.IsValid());
373   }
374 
375   // Clear.
376   trie.Clear();
377   EXPECT_FALSE(
378       IcingDynamicTrie::Iterator(trie, "", /*reverse=*/true).IsValid());
379   EXPECT_EQ(0u, trie.size());
380   EXPECT_EQ(1.0, trie.min_free_fraction());
381 }
382 
TEST_F(IcingDynamicTrieTest,IteratorLoadTest)383 TEST_F(IcingDynamicTrieTest, IteratorLoadTest) {
384   IcingFilesystem filesystem;
385   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
386                         &filesystem);
387   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
388   ASSERT_TRUE(trie.Init());
389 
390   std::default_random_engine random;
391   ICING_LOG(ERROR) << "Seed: " << std::default_random_engine::default_seed;
392 
393   std::vector<std::pair<std::string, int>> exp_key_values;
394   // Randomly generate 1024 terms.
395   for (int i = 0; i < 1024; ++i) {
396     std::string term = RandomString("abcdefg", 5, &random) + std::to_string(i);
397     ASSERT_THAT(trie.Insert(term.c_str(), &i), IsOk());
398     exp_key_values.push_back(std::make_pair(term, i));
399   }
400   // Lexicographically sort the expected keys.
401   std::sort(exp_key_values.begin(), exp_key_values.end());
402 
403   // Check that the iterator works.
404   IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
405   std::vector<std::pair<std::string, int>> key_values =
406       RetrieveKeyValuePairs(term_iter);
407   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
408 
409   // Check that Reset works.
410   term_iter.Reset();
411   key_values = RetrieveKeyValuePairs(term_iter);
412   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
413 
414   std::reverse(exp_key_values.begin(), exp_key_values.end());
415   // Check that the reverse iterator works.
416   IcingDynamicTrie::Iterator term_iter_reverse(trie, /*prefix=*/"",
417                                                /*reverse=*/true);
418   key_values = RetrieveKeyValuePairs(term_iter_reverse);
419   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
420 
421   // Check that Reset works.
422   term_iter_reverse.Reset();
423   key_values = RetrieveKeyValuePairs(term_iter_reverse);
424   EXPECT_THAT(key_values, ContainerEq(exp_key_values));
425 }
426 
TEST_F(IcingDynamicTrieTest,Persistence)427 TEST_F(IcingDynamicTrieTest, Persistence) {
428   // Test persistence on the English dictionary.
429   IcingFilesystem filesystem;
430   {
431     // Test with a trie including strings in words. Test will fail if
432     // words are not unique.
433     IcingDynamicTrie trie(trie_files_prefix_,
434                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
435     EXPECT_FALSE(trie.Init());
436     ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
437     ASSERT_TRUE(trie.Init());
438 
439     for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
440       ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &i), IsOk());
441     }
442     // Explicitly omit sync.
443 
444     StatsDump(trie);
445   }
446 
447   {
448     IcingDynamicTrie trie(trie_files_prefix_,
449                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
450     ASSERT_TRUE(trie.Init());
451     EXPECT_EQ(0U, trie.size());
452 
453     for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
454       ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &i), IsOk());
455     }
456     trie.Sync();
457 
458     StatsDump(trie);
459   }
460 
461   {
462     IcingDynamicTrie trie(trie_files_prefix_,
463                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
464     ASSERT_TRUE(trie.Init());
465 
466     // Make sure we can find everything with the right value.
467     uint32_t found_count = 0;
468     uint32_t matched_count = 0;
469     for (size_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
470       uint32_t val;
471       bool found = trie.Find(kCommonEnglishWords[i].data(), &val);
472       if (found) {
473         found_count++;
474         if (i == val) {
475           matched_count++;
476         }
477       }
478     }
479     EXPECT_EQ(found_count, kCommonEnglishWordArrayLen);
480     EXPECT_EQ(matched_count, kCommonEnglishWordArrayLen);
481 
482     StatsDump(trie);
483   }
484 }
485 
TEST_F(IcingDynamicTrieTest,PersistenceShared)486 TEST_F(IcingDynamicTrieTest, PersistenceShared) {
487   // Test persistence on the English dictionary.
488   IcingFilesystem filesystem;
489   IcingDynamicTrie::RuntimeOptions ropt;
490 
491   {
492     // Test with a trie including strings in words. Test will fail if
493     // words are not unique.
494     ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
495     IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
496     EXPECT_FALSE(trie.Init());
497     ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
498     ASSERT_TRUE(trie.Init());
499 
500     uint32_t next_reopen = kCommonEnglishWordArrayLen / 16;
501     for (uint32_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
502       ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &i), IsOk());
503 
504       if (i == next_reopen) {
505         ASSERT_NE(0u, trie.UpdateCrc());
506         trie.Close();
507         ASSERT_TRUE(trie.Init());
508 
509         next_reopen += next_reopen / 2;
510       }
511     }
512     // Explicitly omit sync. Shared should automatically persist.
513 
514     StatsDump(trie);
515   }
516 
517   // Go back and forth between the two policies.
518   for (int i = 0; i < 5; i++) {
519     if (i % 2 == 0) {
520       DLOG(INFO) << "Opening with map shared";
521       ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
522     } else {
523       DLOG(INFO) << "Opening with explicit flush";
524       ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kExplicitFlush;
525     }
526     IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
527     ASSERT_TRUE(trie.Init());
528 
529     // Make sure we can find everything with the right value.
530     uint32_t found_count = 0;
531     uint32_t matched_count = 0;
532     for (size_t i = 0; i < kCommonEnglishWordArrayLen; i++) {
533       uint32_t val;
534       bool found = trie.Find(kCommonEnglishWords[i].data(), &val);
535       if (found) {
536         found_count++;
537         if (i == val) {
538           matched_count++;
539         }
540       }
541     }
542     EXPECT_EQ(found_count, kCommonEnglishWordArrayLen);
543     EXPECT_EQ(matched_count, kCommonEnglishWordArrayLen);
544 
545     StatsDump(trie);
546   }
547 
548   // Clear and re-open.
549   ropt.storage_policy = IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc;
550   IcingDynamicTrie trie(trie_files_prefix_, ropt, &filesystem);
551   ASSERT_TRUE(trie.Init());
552   trie.Clear();
553   trie.Close();
554   ASSERT_TRUE(trie.Init());
555 }
556 
TEST_F(IcingDynamicTrieTest,Sync)557 TEST_F(IcingDynamicTrieTest, Sync) {
558   IcingFilesystem filesystem;
559   {
560     IcingDynamicTrie trie(trie_files_prefix_,
561                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
562     ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
563     ASSERT_TRUE(trie.Init());
564 
565     for (uint32_t i = 0; i < kNumKeys; i++) {
566       ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk());
567 
568       uint32_t val;
569       bool found = trie.Find(kKeys[i].data(), &val);
570       EXPECT_TRUE(found) << kKeys[i];
571       if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
572     }
573 
574     StatsDump(trie);
575     PrintTrie(trie);
576 
577     trie.Sync();
578 
579     for (uint32_t i = 0; i < kNumKeys; i++) {
580       uint32_t val;
581       bool found = trie.Find(kKeys[i].data(), &val);
582       EXPECT_TRUE(found) << kKeys[i];
583       if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
584     }
585   }
586 
587   {
588     IcingDynamicTrie trie(trie_files_prefix_,
589                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
590     ASSERT_TRUE(trie.Init());
591 
592     for (uint32_t i = 0; i < kNumKeys; i++) {
593       uint32_t val;
594       bool found = trie.Find(kKeys[i].data(), &val);
595       EXPECT_TRUE(found) << kKeys[i];
596       if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
597     }
598 
599     StatsDump(trie);
600     PrintTrie(trie);
601   }
602 }
603 
TEST_F(IcingDynamicTrieTest,LimitsZero)604 TEST_F(IcingDynamicTrieTest, LimitsZero) {
605   // Don't crash if we set limits to 0.
606   IcingFilesystem filesystem;
607   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
608                         &filesystem);
609   ASSERT_FALSE(trie.CreateIfNotExist(IcingDynamicTrie::Options(0, 0, 0, 0)));
610 }
611 
TEST_F(IcingDynamicTrieTest,LimitsSmall)612 TEST_F(IcingDynamicTrieTest, LimitsSmall) {
613   // Test limits with a few keys.
614   IcingFilesystem filesystem;
615   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
616                         &filesystem);
617   ASSERT_TRUE(trie.CreateIfNotExist(
618       IcingDynamicTrie::Options(10, 300, 30, sizeof(uint32_t))));
619   ASSERT_TRUE(trie.Init());
620 
621   ASSERT_LT(3U, kNumKeys);
622 
623   for (uint32_t i = 0; i < 3; i++) {
624     ASSERT_THAT(trie.Insert(kKeys[i].data(), &i), IsOk()) << i;
625 
626     uint32_t val;
627     bool found = trie.Find(kKeys[i].data(), &val);
628     EXPECT_TRUE(found) << kKeys[i];
629     if (found) EXPECT_EQ(i, val) << kKeys[i] << " " << val;
630   }
631 
632   uint32_t val = 3;
633   EXPECT_THAT(trie.Insert(kKeys[3].data(), &val),
634               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
635 
636   StatsDump(trie);
637   PrintTrie(trie);
638 }
639 
TEST_F(IcingDynamicTrieTest,DISABLEDFingerprintedKeys)640 TEST_F(IcingDynamicTrieTest, DISABLEDFingerprintedKeys) {
641   IcingFilesystem filesystem;
642   IcingDynamicTrie::Options options(4 << 20, 4 << 20, 20 << 20,
643                                     sizeof(uint32_t));
644   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
645                         &filesystem);
646   ASSERT_TRUE(trie.CreateIfNotExist(options));
647   ASSERT_TRUE(trie.Init());
648   IcingDynamicTrie triefp(trie_files_prefix_ + ".fps",
649                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
650   ASSERT_TRUE(triefp.CreateIfNotExist(options));
651   ASSERT_TRUE(triefp.Init());
652 
653   static const uint32_t kNumKeys = 1000000;
654   std::string key;
655   for (uint32_t i = 0; i < kNumKeys; i++) {
656     key.clear();
657     IcingStringUtil::SStringAppendF(
658         &key, 1000, "content://gmail-ls/account/conversation/%u/message/%u", i,
659         10 * i);
660     ASSERT_THAT(trie.Insert(key.c_str(), &i), IsOk());
661 
662     // Now compute a fingerprint.
663     uint64_t fpkey = tc3farmhash::Fingerprint64(key);
664 
665     // Convert to base255 since keys in trie cannot contain 0.
666     uint8_t fpkey_base255[9];
667     for (int j = 0; j < 8; j++) {
668       fpkey_base255[j] = (fpkey % 255) + 1;
669       fpkey /= 255;
670     }
671     fpkey_base255[8] = '\0';
672     ASSERT_THAT(triefp.Insert(reinterpret_cast<const char*>(fpkey_base255), &i),
673                 IsOk());
674 
675     // Sync periodically to gauge write locality.
676     if ((i + 1) % (kNumKeys / 10) == 0) {
677       DLOG(INFO) << "Trie sync";
678       trie.Sync();
679       DLOG(INFO) << "Trie fp sync";
680       triefp.Sync();
681     }
682   }
683 
684   DLOG(INFO) << "Trie stats";
685   StatsDump(trie);
686   DLOG(INFO) << "Trie fp stats";
687   StatsDump(triefp);
688 }
689 
TEST_F(IcingDynamicTrieTest,AddDups)690 TEST_F(IcingDynamicTrieTest, AddDups) {
691   IcingFilesystem filesystem;
692   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
693                         &filesystem);
694   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
695   ASSERT_TRUE(trie.Init());
696 
697   static const uint32_t kNumKeys = 5000;
698   AddToTrie(&trie, kNumKeys);
699   CheckTrie(trie, kNumKeys);
700 
701   DLOG(INFO) << "Trie stats";
702   StatsDump(trie);
703 
704   AddToTrie(&trie, kNumKeys);
705   CheckTrie(trie, kNumKeys);
706   DLOG(INFO) << "Trie stats";
707   StatsDump(trie);
708 }
709 
TEST_F(IcingDynamicTrieTest,Properties)710 TEST_F(IcingDynamicTrieTest, Properties) {
711   IcingFilesystem filesystem;
712   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
713                         &filesystem);
714   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
715   ASSERT_TRUE(trie.Init());
716 
717   static const uint32_t kOne = 1;
718   uint32_t val_idx;
719   trie.Insert("abcd", &kOne, &val_idx, false);
720   trie.SetProperty(val_idx, 0);
721   trie.SetProperty(val_idx, 3);
722 
723   {
724     IcingDynamicTrie::PropertyReader reader(trie, 3);
725     ASSERT_TRUE(reader.Exists());
726     EXPECT_TRUE(reader.HasProperty(val_idx));
727     EXPECT_FALSE(reader.HasProperty(1000));
728   }
729 
730   // Disappear after close.
731   trie.Close();
732   ASSERT_TRUE(trie.Init());
733   {
734     IcingDynamicTrie::PropertyReader reader(trie, 3);
735     EXPECT_FALSE(reader.HasProperty(val_idx));
736   }
737 
738   // Persist after sync.
739   trie.Insert("abcd", &kOne, &val_idx, false);
740   trie.SetProperty(val_idx, 1);
741   ASSERT_TRUE(trie.Sync());
742   trie.Close();
743   ASSERT_TRUE(trie.Init());
744 
745   uint32_t val;
746   ASSERT_TRUE(trie.Find("abcd", &val, &val_idx));
747   EXPECT_EQ(1u, val);
748   {
749     IcingDynamicTrie::PropertyReader reader(trie, 1);
750     EXPECT_TRUE(reader.HasProperty(val_idx));
751   }
752 
753   // Get all.
754   {
755     IcingDynamicTrie::PropertyReadersAll readers(trie);
756     ASSERT_EQ(4u, readers.size());
757     EXPECT_TRUE(readers.Exists(0));
758     EXPECT_TRUE(readers.Exists(1));
759     EXPECT_FALSE(readers.Exists(2));
760     EXPECT_TRUE(readers.Exists(3));
761   }
762 }
763 
TEST_F(IcingDynamicTrieTest,ClearSingleProperty)764 TEST_F(IcingDynamicTrieTest, ClearSingleProperty) {
765   IcingFilesystem filesystem;
766   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
767                         &filesystem);
768   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
769   ASSERT_TRUE(trie.Init());
770 
771   static const uint32_t kOne = 1;
772   uint32_t val_idx[3];
773   trie.Insert("abcd", &kOne, &val_idx[0], false);
774   trie.SetProperty(val_idx[0], 0);
775   trie.SetProperty(val_idx[0], 3);
776 
777   trie.Insert("efgh", &kOne, &val_idx[1], false);
778   trie.SetProperty(val_idx[1], 0);
779   trie.SetProperty(val_idx[1], 3);
780 
781   trie.Insert("ijkl", &kOne, &val_idx[2], false);
782   trie.SetProperty(val_idx[2], 0);
783   trie.SetProperty(val_idx[2], 3);
784 
785   {
786     IcingDynamicTrie::PropertyReadersAll readers(trie);
787     ASSERT_EQ(4u, readers.size());
788     EXPECT_TRUE(readers.Exists(0));
789     EXPECT_FALSE(readers.Exists(1));
790     EXPECT_FALSE(readers.Exists(2));
791     EXPECT_TRUE(readers.Exists(3));
792     for (size_t i = 0; i < readers.size(); i++) {
793       if (readers.Exists(i)) {
794         for (size_t j = 0; j < sizeof(val_idx) / sizeof(uint32_t); ++j) {
795           EXPECT_TRUE(readers.HasProperty(i, val_idx[j]));
796         }
797       }
798     }
799   }
800 
801   EXPECT_TRUE(trie.ClearPropertyForAllValues(3));
802 
803   {
804     IcingDynamicTrie::PropertyReadersAll readers(trie);
805     ASSERT_EQ(4u, readers.size());
806     EXPECT_TRUE(readers.Exists(0));
807     EXPECT_FALSE(readers.Exists(1));
808     EXPECT_FALSE(readers.Exists(2));
809     // Clearing the property causes all values to be deleted.
810     EXPECT_FALSE(readers.Exists(3));
811     for (size_t i = 0; i < readers.size(); i++) {
812       for (size_t j = 0; j < sizeof(val_idx) / sizeof(uint32_t); ++j) {
813         if (i == 0) {
814           EXPECT_TRUE(readers.HasProperty(i, val_idx[j]));
815         } else {
816           EXPECT_FALSE(readers.HasProperty(i, val_idx[j]));
817         }
818       }
819     }
820   }
821 }
822 
TEST_F(IcingDynamicTrieTest,DeletionShouldWorkWhenRootIsLeaf)823 TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWhenRootIsLeaf) {
824   IcingFilesystem filesystem;
825   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
826                         &filesystem);
827   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
828   ASSERT_TRUE(trie.Init());
829 
830   // Inserts a key, the root is a leaf.
831   uint32_t value = 1;
832   ASSERT_THAT(trie.Insert("foo", &value), IsOk());
833   ASSERT_TRUE(trie.Find("foo", &value));
834 
835   // Deletes the key.
836   EXPECT_TRUE(trie.Delete("foo"));
837   EXPECT_FALSE(trie.Find("foo", &value));
838 }
839 
TEST_F(IcingDynamicTrieTest,DeletionShouldWorkWhenLastCharIsLeaf)840 TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWhenLastCharIsLeaf) {
841   IcingFilesystem filesystem;
842   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
843                         &filesystem);
844   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
845   ASSERT_TRUE(trie.Init());
846 
847   // Inserts "bar" and "ba", the trie structure looks like:
848   //       root
849   //         |
850   //         b
851   //         |
852   //         a
853   //        / \
854   //     null  r
855   uint32_t value = 1;
856   ASSERT_THAT(trie.Insert("bar", &value), IsOk());
857   ASSERT_THAT(trie.Insert("ba", &value), IsOk());
858   ASSERT_TRUE(trie.Find("bar", &value));
859   ASSERT_TRUE(trie.Find("ba", &value));
860 
861   // Deletes "bar". "r" is a leaf node in the trie.
862   EXPECT_TRUE(trie.Delete("bar"));
863   EXPECT_FALSE(trie.Find("bar", &value));
864   EXPECT_TRUE(trie.Find("ba", &value));
865 }
866 
TEST_F(IcingDynamicTrieTest,DeletionShouldWorkWithTerminationNode)867 TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWithTerminationNode) {
868   IcingFilesystem filesystem;
869   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
870                         &filesystem);
871   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
872   ASSERT_TRUE(trie.Init());
873 
874   // Inserts "bar" and "ba", the trie structure looks like:
875   //       root
876   //         |
877   //         b
878   //         |
879   //         a
880   //        / \
881   //     null  r
882   uint32_t value = 1;
883   ASSERT_THAT(trie.Insert("bar", &value), IsOk());
884   ASSERT_THAT(trie.Insert("ba", &value), IsOk());
885   ASSERT_TRUE(trie.Find("bar", &value));
886   ASSERT_TRUE(trie.Find("ba", &value));
887 
888   // Deletes "ba" which is a key with termination node in the trie.
889   EXPECT_TRUE(trie.Delete("ba"));
890   EXPECT_FALSE(trie.Find("ba", &value));
891   EXPECT_TRUE(trie.Find("bar", &value));
892 }
893 
TEST_F(IcingDynamicTrieTest,DeletionShouldWorkWithMultipleNexts)894 TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWithMultipleNexts) {
895   IcingFilesystem filesystem;
896   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
897                         &filesystem);
898   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
899   ASSERT_TRUE(trie.Init());
900 
901   // Inserts "ba", "bb", "bc", and "bd", the trie structure looks like:
902   //       root
903   //         |
904   //         b
905   //      / | | \
906   //     a  b c  d
907   uint32_t value = 1;
908   ASSERT_THAT(trie.Insert("ba", &value), IsOk());
909   ASSERT_THAT(trie.Insert("bb", &value), IsOk());
910   ASSERT_THAT(trie.Insert("bc", &value), IsOk());
911   ASSERT_THAT(trie.Insert("bd", &value), IsOk());
912   ASSERT_TRUE(trie.Find("ba", &value));
913   ASSERT_TRUE(trie.Find("bb", &value));
914   ASSERT_TRUE(trie.Find("bc", &value));
915   ASSERT_TRUE(trie.Find("bd", &value));
916 
917   // Deletes "bc".
918   EXPECT_TRUE(trie.Delete("bc"));
919   EXPECT_FALSE(trie.Find("bc", &value));
920   EXPECT_TRUE(trie.Find("ba", &value));
921   EXPECT_TRUE(trie.Find("bb", &value));
922   EXPECT_TRUE(trie.Find("bd", &value));
923 }
924 
TEST_F(IcingDynamicTrieTest,DeletionShouldWorkWithMultipleTrieBranches)925 TEST_F(IcingDynamicTrieTest, DeletionShouldWorkWithMultipleTrieBranches) {
926   IcingFilesystem filesystem;
927   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
928                         &filesystem);
929   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
930   ASSERT_TRUE(trie.Init());
931 
932   // Inserts "batter", "battle", and "bar", the trie structure looks like:
933   //       root
934   //         |
935   //         b
936   //         |
937   //         a
938   //        / \
939   //       t   r
940   //       |
941   //       t
942   //      / \
943   //     e   l
944   //     |   |
945   //     r   e
946   uint32_t value = 1;
947   ASSERT_THAT(trie.Insert("batter", &value), IsOk());
948   ASSERT_THAT(trie.Insert("battle", &value), IsOk());
949   ASSERT_THAT(trie.Insert("bar", &value), IsOk());
950   ASSERT_TRUE(trie.Find("batter", &value));
951   ASSERT_TRUE(trie.Find("battle", &value));
952   ASSERT_TRUE(trie.Find("bar", &value));
953 
954   // Deletes "batter".
955   EXPECT_TRUE(trie.Delete("batter"));
956   EXPECT_FALSE(trie.Find("batter", &value));
957   EXPECT_TRUE(trie.Find("battle", &value));
958   EXPECT_TRUE(trie.Find("bar", &value));
959 }
960 
TEST_F(IcingDynamicTrieTest,InsertionShouldWorkAfterDeletion)961 TEST_F(IcingDynamicTrieTest, InsertionShouldWorkAfterDeletion) {
962   IcingFilesystem filesystem;
963   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
964                         &filesystem);
965   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
966   ASSERT_TRUE(trie.Init());
967 
968   // Inserts some keys.
969   uint32_t value = 1;
970   ASSERT_THAT(trie.Insert("bar", &value), IsOk());
971   ASSERT_THAT(trie.Insert("bed", &value), IsOk());
972   ASSERT_THAT(trie.Insert("foo", &value), IsOk());
973 
974   // Deletes a key
975   ASSERT_TRUE(trie.Delete("bed"));
976   ASSERT_FALSE(trie.Find("bed", &value));
977 
978   // Inserts after deletion
979   ASSERT_THAT(trie.Insert("bed", &value), IsOk());
980   ASSERT_THAT(trie.Insert("bedroom", &value), IsOk());
981   EXPECT_TRUE(trie.Find("bed", &value));
982   EXPECT_TRUE(trie.Find("bedroom", &value));
983 }
984 
TEST_F(IcingDynamicTrieTest,IteratorShouldWorkAfterDeletion)985 TEST_F(IcingDynamicTrieTest, IteratorShouldWorkAfterDeletion) {
986   IcingFilesystem filesystem;
987   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
988                         &filesystem);
989   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
990   ASSERT_TRUE(trie.Init());
991 
992   // Inserts some keys.
993   uint32_t value = 1;
994   ASSERT_THAT(trie.Insert("bar", &value), IsOk());
995   ASSERT_THAT(trie.Insert("bed", &value), IsOk());
996   ASSERT_THAT(trie.Insert("foo", &value), IsOk());
997 
998   // Deletes a key
999   ASSERT_TRUE(trie.Delete("bed"));
1000 
1001   // Iterates through all keys
1002   IcingDynamicTrie::Iterator iterator_all(trie, "");
1003   std::vector<std::string> results;
1004   for (; iterator_all.IsValid(); iterator_all.Advance()) {
1005     results.emplace_back(iterator_all.GetKey());
1006   }
1007   EXPECT_THAT(results, ElementsAre("bar", "foo"));
1008 
1009   // Iterates through keys that start with "b"
1010   IcingDynamicTrie::Iterator iterator_b(trie, "b");
1011   results.clear();
1012   for (; iterator_b.IsValid(); iterator_b.Advance()) {
1013     results.emplace_back(iterator_b.GetKey());
1014   }
1015   EXPECT_THAT(results, ElementsAre("bar"));
1016 }
1017 
TEST_F(IcingDynamicTrieTest,DeletingNonExistingKeyShouldReturnTrue)1018 TEST_F(IcingDynamicTrieTest, DeletingNonExistingKeyShouldReturnTrue) {
1019   IcingFilesystem filesystem;
1020   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
1021                         &filesystem);
1022   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
1023   ASSERT_TRUE(trie.Init());
1024 
1025   // Inserts some keys.
1026   uint32_t value = 1;
1027   ASSERT_THAT(trie.Insert("bar", &value), IsOk());
1028   ASSERT_THAT(trie.Insert("bed", &value), IsOk());
1029 
1030   // "ba" and bedroom are not keys in the trie.
1031   EXPECT_TRUE(trie.Delete("ba"));
1032   EXPECT_TRUE(trie.Delete("bedroom"));
1033 
1034   // The original keys are not affected.
1035   EXPECT_TRUE(trie.Find("bar", &value));
1036   EXPECT_TRUE(trie.Find("bed", &value));
1037 }
1038 
TEST_F(IcingDynamicTrieTest,DeletionResortsFullNextArray)1039 TEST_F(IcingDynamicTrieTest, DeletionResortsFullNextArray) {
1040   IcingFilesystem filesystem;
1041   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
1042                         &filesystem);
1043   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
1044   ASSERT_TRUE(trie.Init());
1045 
1046   uint32_t value = 1;
1047   // 'f' -> [ 'a', 'j', 'o', 'u' ]
1048   ASSERT_THAT(trie.Insert("foul", &value), IsOk());
1049   ASSERT_THAT(trie.Insert("far", &value), IsOk());
1050   ASSERT_THAT(trie.Insert("fudge", &value), IsOk());
1051   ASSERT_THAT(trie.Insert("fjord", &value), IsOk());
1052 
1053   // Delete the third child
1054   EXPECT_TRUE(trie.Delete("foul"));
1055 
1056   std::vector<std::string> remaining;
1057   for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
1058        term_iter.IsValid(); term_iter.Advance()) {
1059     remaining.push_back(term_iter.GetKey());
1060   }
1061   EXPECT_THAT(remaining, ElementsAre("far", "fjord", "fudge"));
1062 }
1063 
TEST_F(IcingDynamicTrieTest,DeletionResortsPartiallyFilledNextArray)1064 TEST_F(IcingDynamicTrieTest, DeletionResortsPartiallyFilledNextArray) {
1065   IcingFilesystem filesystem;
1066   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
1067                         &filesystem);
1068   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
1069   ASSERT_TRUE(trie.Init());
1070 
1071   uint32_t value = 1;
1072   // 'f' -> [ 'a', 'o', 'u', 0xFF ]
1073   ASSERT_THAT(trie.Insert("foul", &value), IsOk());
1074   ASSERT_THAT(trie.Insert("far", &value), IsOk());
1075   ASSERT_THAT(trie.Insert("fudge", &value), IsOk());
1076 
1077   // Delete the second child
1078   EXPECT_TRUE(trie.Delete("foul"));
1079 
1080   std::vector<std::string> remaining;
1081   for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
1082        term_iter.IsValid(); term_iter.Advance()) {
1083     remaining.push_back(term_iter.GetKey());
1084   }
1085   EXPECT_THAT(remaining, ElementsAre("far", "fudge"));
1086 }
1087 
TEST_F(IcingDynamicTrieTest,DeletionLoadTest)1088 TEST_F(IcingDynamicTrieTest, DeletionLoadTest) {
1089   IcingFilesystem filesystem;
1090   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
1091                         &filesystem);
1092   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
1093   ASSERT_TRUE(trie.Init());
1094 
1095   std::default_random_engine random;
1096   ICING_LOG(ERROR) << "Seed: " << std::default_random_engine::default_seed;
1097   std::vector<std::string> terms;
1098   uint32_t value;
1099   // Randomly generate 2048 terms.
1100   for (int i = 0; i < 2048; ++i) {
1101     terms.push_back(RandomString("abcdefg", 5, &random));
1102     ASSERT_THAT(trie.Insert(terms.back().c_str(), &value), IsOk());
1103   }
1104 
1105   // Randomly delete 1024 terms.
1106   std::unordered_set<std::string> exp_remaining(terms.begin(), terms.end());
1107   std::shuffle(terms.begin(), terms.end(), random);
1108   for (int i = 0; i < 1024; ++i) {
1109     exp_remaining.erase(terms[i]);
1110     ASSERT_TRUE(trie.Delete(terms[i].c_str()));
1111   }
1112 
1113   // Check that the iterator still works, and the remaining terms are correct.
1114   std::unordered_set<std::string> remaining;
1115   for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
1116        term_iter.IsValid(); term_iter.Advance()) {
1117     remaining.insert(term_iter.GetKey());
1118   }
1119   EXPECT_THAT(remaining, ContainerEq(exp_remaining));
1120 
1121   // Check that we can still insert terms after delete.
1122   for (int i = 0; i < 2048; ++i) {
1123     std::string term = RandomString("abcdefg", 5, &random);
1124     ASSERT_THAT(trie.Insert(term.c_str(), &value), IsOk());
1125     exp_remaining.insert(term);
1126   }
1127   remaining.clear();
1128   for (IcingDynamicTrie::Iterator term_iter(trie, /*prefix=*/"");
1129        term_iter.IsValid(); term_iter.Advance()) {
1130     remaining.insert(term_iter.GetKey());
1131   }
1132   EXPECT_THAT(remaining, ContainerEq(exp_remaining));
1133 }
1134 
1135 }  // namespace
1136 
1137 // The tests below are accessing private methods and fields of IcingDynamicTrie
1138 // so can't be in the anonymous namespace.
1139 
TEST_F(IcingDynamicTrieTest,TrieShouldRespectLimits)1140 TEST_F(IcingDynamicTrieTest, TrieShouldRespectLimits) {
1141   // Test limits on numbers of nodes, nexts, and suffixes size.
1142   IcingFilesystem filesystem;
1143 
1144   // These 3 numbers are the entities we need in order to insert all the test
1145   // words before the last one.
1146   uint32_t num_nodes_enough;
1147   uint32_t num_nexts_enough;
1148   uint32_t suffixes_size_enough;
1149 
1150   // First, try to fill the 3 numbers above.
1151   {
1152     IcingDynamicTrie trie(trie_files_prefix_,
1153                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
1154     ASSERT_TRUE(trie.Remove());
1155     // Creates a trie with enough numbers of nodes, nexts, and suffix file size.
1156     ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
1157         /*max_nodes_in=*/1000, /*max_nexts_in=*/1000,
1158         /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
1159     ASSERT_TRUE(trie.Init());
1160 
1161     // Inserts all the test words before the last one.
1162     uint32_t value = 0;
1163     for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
1164       ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &value), IsOk());
1165     }
1166 
1167     IcingDynamicTrieHeader header;
1168     trie.GetHeader(&header);
1169 
1170     // Before each insertion, it requires that there're (2 + 1 + key_length)
1171     // nodes left, so we need 8 nodes to insert the last word. +7 here will make
1172     // it just enough to insert the word before the last one.
1173     num_nodes_enough = header.num_nodes() + 7;
1174 
1175     // Before each insertion, it requires that there're (2 + 1 + key_length +
1176     // kMaxNextArraySize) nexts left, so we need (8 + kMaxNextArraySize) nexts
1177     // to insert the last word. (7 + kMaxNextArraySize) here will make it just
1178     // enough to insert the word before the last one.
1179     num_nexts_enough =
1180         header.num_nexts() + 7 + IcingDynamicTrie::kMaxNextArraySize;
1181 
1182     // Before each insertion, it requires that there're (1 + key_length +
1183     // value_size) bytes left for suffixes, so we need (6 + sizeof(uint32_t))
1184     // bytes to insert the last word. (5 + sizeof(uint32_t)) here will make it
1185     // just enough to insert the word before the last one.
1186     suffixes_size_enough = header.suffixes_size() + 5 + sizeof(uint32_t);
1187   }
1188 
1189   // Test a trie with just enough number of nodes.
1190   {
1191     IcingDynamicTrie trie(trie_files_prefix_,
1192                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
1193     ASSERT_TRUE(trie.Remove());
1194     ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
1195         num_nodes_enough, /*max_nexts_in=*/1000,
1196         /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
1197     ASSERT_TRUE(trie.Init());
1198 
1199     // Inserts all the test words before the last one.
1200     uint32_t value = 0;
1201     for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
1202       ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &value), IsOk());
1203     }
1204 
1205     // Fails to insert the last word because no enough nodes left.
1206     EXPECT_THAT(
1207         trie.Insert(kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(),
1208                     &value),
1209         StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1210   }
1211 
1212   // Test a trie with just enough number of nexts.
1213   {
1214     IcingDynamicTrie trie(trie_files_prefix_,
1215                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
1216     ASSERT_TRUE(trie.Remove());
1217     ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
1218         /*max_nodes_in=*/1000, num_nexts_enough,
1219         /*max_suffixes_size_in=*/1000, sizeof(uint32_t))));
1220     ASSERT_TRUE(trie.Init());
1221 
1222     // Inserts all the test words before the last one.
1223     uint32_t value = 0;
1224     for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
1225       ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &value), IsOk());
1226     }
1227 
1228     // Fails to insert the last word because no enough nexts left.
1229     EXPECT_THAT(
1230         trie.Insert(kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(),
1231                     &value),
1232         StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1233   }
1234 
1235   // Test a trie with just enough suffixes size.
1236   {
1237     IcingDynamicTrie trie(trie_files_prefix_,
1238                           IcingDynamicTrie::RuntimeOptions(), &filesystem);
1239     ASSERT_TRUE(trie.Remove());
1240     ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options(
1241         /*max_nodes_in=*/1000, /*max_nexts_in=*/1000, suffixes_size_enough,
1242         sizeof(uint32_t))));
1243     ASSERT_TRUE(trie.Init());
1244 
1245     // Inserts all the test words before the last one.
1246     uint32_t value = 0;
1247     for (size_t i = 0; i < kCommonEnglishWordArrayLen - 1; ++i) {
1248       ASSERT_THAT(trie.Insert(kCommonEnglishWords[i].data(), &value), IsOk());
1249     }
1250 
1251     // Fails to insert the last word because no enough space for more suffixes.
1252     EXPECT_THAT(
1253         trie.Insert(kCommonEnglishWords[kCommonEnglishWordArrayLen - 1].data(),
1254                     &value),
1255         StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
1256   }
1257 }
1258 
TEST_F(IcingDynamicTrieTest,SyncErrorRecovery)1259 TEST_F(IcingDynamicTrieTest, SyncErrorRecovery) {
1260   IcingFilesystem filesystem;
1261   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
1262                         &filesystem);
1263   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
1264   ASSERT_TRUE(trie.Init());
1265 
1266   static const uint32_t kNumKeys = 5000;
1267   AddToTrie(&trie, kNumKeys);
1268   CheckTrie(trie, kNumKeys);
1269 
1270   trie.Sync();
1271   trie.Close();
1272 
1273   // Reach into the file and set the value_size.
1274   ASSERT_TRUE(trie.Init());
1275   IcingDynamicTrieHeader hdr;
1276   trie.GetHeader(&hdr);
1277   hdr.set_value_size(hdr.value_size() + 123);
1278   trie.SetHeader(hdr);
1279   trie.Close();
1280 
1281   ASSERT_FALSE(trie.Init());
1282 }
1283 
TEST_F(IcingDynamicTrieTest,BitmapsClosedWhenInitFails)1284 TEST_F(IcingDynamicTrieTest, BitmapsClosedWhenInitFails) {
1285   // Create trie with one property.
1286   IcingFilesystem filesystem;
1287   IcingDynamicTrie trie(
1288       trie_files_prefix_,
1289       IcingDynamicTrie::RuntimeOptions().set_storage_policy(
1290           IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
1291       &filesystem);
1292   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
1293   ASSERT_TRUE(trie.Init());
1294   ASSERT_TRUE(trie.deleted_bitmap_);
1295   trie.SetProperty(0, 0);
1296   ASSERT_EQ(1, trie.property_bitmaps_.size());
1297   ASSERT_TRUE(trie.property_bitmaps_[0]);
1298   trie.Close();
1299 
1300   // Intentionally corrupt deleted_bitmap file to make Init() fail.
1301   FILE* fp = fopen(trie.deleted_bitmap_filename_.c_str(), "r+");
1302   ASSERT_TRUE(fp);
1303   ASSERT_EQ(16, fwrite("################", 1, 16, fp));
1304   fclose(fp);
1305   ASSERT_FALSE(trie.Init());
1306 
1307   // Check that both the bitmap and the property files have been closed.
1308   ASSERT_FALSE(trie.deleted_bitmap_);
1309   ASSERT_EQ(0, trie.property_bitmaps_.size());
1310 }
1311 
TEST_F(IcingDynamicTrieTest,IsBranchingTermShouldWorkForExistingTerms)1312 TEST_F(IcingDynamicTrieTest, IsBranchingTermShouldWorkForExistingTerms) {
1313   IcingFilesystem filesystem;
1314   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
1315                         &filesystem);
1316   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
1317   ASSERT_TRUE(trie.Init());
1318 
1319   uint32_t value = 1;
1320 
1321   ASSERT_THAT(trie.Insert("", &value), IsOk());
1322   EXPECT_FALSE(trie.IsBranchingTerm(""));
1323 
1324   ASSERT_THAT(trie.Insert("ab", &value), IsOk());
1325   EXPECT_FALSE(trie.IsBranchingTerm(""));
1326   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1327 
1328   ASSERT_THAT(trie.Insert("ac", &value), IsOk());
1329   // "" is a prefix of "ab" and "ac", but it is not a branching term.
1330   EXPECT_FALSE(trie.IsBranchingTerm(""));
1331   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1332   EXPECT_FALSE(trie.IsBranchingTerm("ac"));
1333 
1334   ASSERT_THAT(trie.Insert("ba", &value), IsOk());
1335   // "" now branches to "ba"
1336   EXPECT_TRUE(trie.IsBranchingTerm(""));
1337   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1338   EXPECT_FALSE(trie.IsBranchingTerm("ac"));
1339   EXPECT_FALSE(trie.IsBranchingTerm("ba"));
1340 
1341   ASSERT_THAT(trie.Insert("a", &value), IsOk());
1342   EXPECT_TRUE(trie.IsBranchingTerm(""));
1343   // "a" branches to "ab" and "ac"
1344   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1345   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1346   EXPECT_FALSE(trie.IsBranchingTerm("ac"));
1347   EXPECT_FALSE(trie.IsBranchingTerm("ba"));
1348 
1349   ASSERT_THAT(trie.Insert("abc", &value), IsOk());
1350   ASSERT_THAT(trie.Insert("acd", &value), IsOk());
1351   EXPECT_TRUE(trie.IsBranchingTerm(""));
1352   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1353   // "ab" is a prefix of "abc", but it is not a branching term.
1354   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1355   // "ac" is a prefix of "acd", but it is not a branching term.
1356   EXPECT_FALSE(trie.IsBranchingTerm("ac"));
1357   EXPECT_FALSE(trie.IsBranchingTerm("ba"));
1358   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1359   EXPECT_FALSE(trie.IsBranchingTerm("acd"));
1360 
1361   ASSERT_THAT(trie.Insert("abcd", &value), IsOk());
1362   EXPECT_TRUE(trie.IsBranchingTerm(""));
1363   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1364   // "ab" is a prefix of "abc" and "abcd", but it is not a branching term.
1365   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1366   EXPECT_FALSE(trie.IsBranchingTerm("ac"));
1367   EXPECT_FALSE(trie.IsBranchingTerm("ba"));
1368   // "abc" is a prefix of "abcd", but it is not a branching term.
1369   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1370   EXPECT_FALSE(trie.IsBranchingTerm("acd"));
1371   EXPECT_FALSE(trie.IsBranchingTerm("abcd"));
1372 
1373   ASSERT_THAT(trie.Insert("abd", &value), IsOk());
1374   EXPECT_TRUE(trie.IsBranchingTerm(""));
1375   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1376   // "ab" branches to "abc" and "abd"
1377   EXPECT_TRUE(trie.IsBranchingTerm("ab"));
1378   EXPECT_FALSE(trie.IsBranchingTerm("ac"));
1379   EXPECT_FALSE(trie.IsBranchingTerm("ba"));
1380   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1381   EXPECT_FALSE(trie.IsBranchingTerm("acd"));
1382   EXPECT_FALSE(trie.IsBranchingTerm("abcd"));
1383   EXPECT_FALSE(trie.IsBranchingTerm("abd"));
1384 }
1385 
TEST_F(IcingDynamicTrieTest,IsBranchingTermShouldWorkForNonExistingTerms)1386 TEST_F(IcingDynamicTrieTest, IsBranchingTermShouldWorkForNonExistingTerms) {
1387   IcingFilesystem filesystem;
1388   IcingDynamicTrie trie(trie_files_prefix_, IcingDynamicTrie::RuntimeOptions(),
1389                         &filesystem);
1390   ASSERT_TRUE(trie.CreateIfNotExist(IcingDynamicTrie::Options()));
1391   ASSERT_TRUE(trie.Init());
1392 
1393   uint32_t value = 1;
1394 
1395   EXPECT_FALSE(trie.IsBranchingTerm(""));
1396   EXPECT_FALSE(trie.IsBranchingTerm("a"));
1397   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1398   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1399 
1400   ASSERT_THAT(trie.Insert("aa", &value), IsOk());
1401   EXPECT_FALSE(trie.IsBranchingTerm(""));
1402   EXPECT_FALSE(trie.IsBranchingTerm("a"));
1403   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1404   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1405 
1406   ASSERT_THAT(trie.Insert("ac", &value), IsOk());
1407   EXPECT_FALSE(trie.IsBranchingTerm(""));
1408   // "a" does not exist in the trie, but now it branches to "aa" and "ac".
1409   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1410   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1411   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1412 
1413   ASSERT_THAT(trie.Insert("ad", &value), IsOk());
1414   EXPECT_FALSE(trie.IsBranchingTerm(""));
1415   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1416   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1417   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1418 
1419   ASSERT_THAT(trie.Insert("abcd", &value), IsOk());
1420   EXPECT_FALSE(trie.IsBranchingTerm(""));
1421   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1422   EXPECT_FALSE(trie.IsBranchingTerm("ab"));
1423   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1424 
1425   ASSERT_THAT(trie.Insert("abd", &value), IsOk());
1426   EXPECT_FALSE(trie.IsBranchingTerm(""));
1427   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1428   // "ab" does not exist in the trie, but now it branches to "abcd" and "abd".
1429   EXPECT_TRUE(trie.IsBranchingTerm("ab"));
1430   EXPECT_FALSE(trie.IsBranchingTerm("abc"));
1431 
1432   ASSERT_THAT(trie.Insert("abce", &value), IsOk());
1433   EXPECT_FALSE(trie.IsBranchingTerm(""));
1434   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1435   EXPECT_TRUE(trie.IsBranchingTerm("ab"));
1436   // "abc" does not exist in the trie, but now it branches to "abcd" and "abce".
1437   EXPECT_TRUE(trie.IsBranchingTerm("abc"));
1438 
1439   ASSERT_THAT(trie.Insert("abc_suffix", &value), IsOk());
1440   EXPECT_FALSE(trie.IsBranchingTerm(""));
1441   EXPECT_TRUE(trie.IsBranchingTerm("a"));
1442   EXPECT_TRUE(trie.IsBranchingTerm("ab"));
1443   EXPECT_TRUE(trie.IsBranchingTerm("abc"));
1444   EXPECT_FALSE(trie.IsBranchingTerm("abc_s"));
1445   EXPECT_FALSE(trie.IsBranchingTerm("abc_su"));
1446   EXPECT_FALSE(trie.IsBranchingTerm("abc_suffi"));
1447 }
1448 
1449 }  // namespace lib
1450 }  // namespace icing
1451