• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // We store the trie in three areas: nodes, nexts and suffixes.
16 //
17 // Nodes contain an index to a children array (kept in nexts) or to
18 // suffixes (for leaf nodes). Nexts contain children arrays of
19 // different sizes. Each child entry has the matched char and an index
20 // back into the nodes. Leaf nodes index into suffixes instead of the
21 // nexts array. Each suffix is a NULL-terminated suffix off the trie,
22 // followed by a 4-byte value associated with that key.
23 //
24 // Allocation
25 //
26 // Nodes are allocated and never removed. Nexts contain arrays of
27 // sizes in power-of-2 increments, i.e. 1, 2, 4, ..., 256. When the
28 // number of children of a node increases, it is relocated to an array
29 // with the proper size. The (smaller) unused array is added to a free
30 // list. A free list is kept for each array size. Allocations happen
31 // from the free list first, and then from the end of the nexts
32 // array. Suffixes are never freed or compacted. If a node wants to
33 // refer to a smaller suffix, it moves the pointer forward and the
34 // characters before the new pointer are wasted.
35 //
36 // Keys can contain any character except '\0'. The '\0' char is
37 // special in that it specifies an end-of-key in the child array.
38 //
39 // Ideas to try:
40 //
41 // - Put suffix index in a Next instead of creating a leaf node.
42 // - Change allocation buckets to 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, ..., 256
43 // - Compact next array
44 // - GroupVarByte and delta-encode the next array
45 // - Collapse nodes with single children
46 //
47 // Persistence
48 //
49 // We persist the trie in a binary format such that resurrecting the
50 // trie is simply a few file reads. The file is laid out as such:
51 //
52 // - Header
53 // - Nodes
54 // - Nexts
55 // - Suffixes
56 //
57 // Each section is aligned to IcingMMapper::system_page_size(). The max
58 // requested value for each array is pre-allocated in the file. When
59 // we make modifications to the arrays, we set bits in a dirty bitmap
60 // of pages. No changes get written to disk until an explicit call to
61 // Flush. Then we only write the pages that have their dirty bit set.
62 
63 #include "icing/legacy/index/icing-dynamic-trie.h"
64 
65 #include <errno.h>
66 #include <fcntl.h>
67 #include <inttypes.h>
68 #include <string.h>
69 #include <sys/mman.h>
70 #include <sys/stat.h>
71 #include <unistd.h>
72 
73 #include <algorithm>
74 #include <memory>
75 #include <utility>
76 
77 #include "icing/legacy/core/icing-packed-pod.h"
78 #include "icing/legacy/core/icing-string-util.h"
79 #include "icing/legacy/core/icing-timer.h"
80 #include "icing/legacy/index/icing-array-storage.h"
81 #include "icing/legacy/index/icing-filesystem.h"
82 #include "icing/legacy/index/icing-flash-bitmap.h"
83 #include "icing/legacy/index/icing-mmapper.h"
84 #include "icing/util/i18n-utils.h"
85 #include "icing/util/logging.h"
86 #include "icing/util/math-util.h"
87 
88 using std::inplace_merge;
89 using std::lower_bound;
90 using std::max;
91 using std::mismatch;
92 using std::pair;
93 using std::sort;
94 using std::vector;
95 
96 namespace icing {
97 namespace lib {
98 
99 namespace {
100 constexpr uint32_t kInvalidNodeIndex = (1U << 24) - 1;
101 constexpr uint32_t kInvalidNextIndex = ~0U;
102 
103 // Returns the number of valid nexts in the array.
GetValidNextsSize(IcingDynamicTrie::Next * next_array_start,int next_array_length)104 int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start,
105                       int next_array_length) {
106   int valid_nexts_length = 0;
107   for (; valid_nexts_length < next_array_length &&
108          next_array_start[valid_nexts_length].node_index() != kInvalidNodeIndex;
109        ++valid_nexts_length) {
110   }
111   return valid_nexts_length;
112 }
113 }  // namespace
114 
115 // Based on the bit field widths.
116 const uint32_t IcingDynamicTrie::Options::kMaxNodes = (1U << 24) - 1;
117 const uint32_t IcingDynamicTrie::Options::kMaxNexts = (1U << 27) - 1;
118 const uint32_t IcingDynamicTrie::Options::kMaxSuffixesSize = 1U << 27;
119 const uint32_t IcingDynamicTrie::Options::kMaxValueSize = 1U << 16;
120 
121 const uint32_t IcingDynamicTrie::kInvalidSuffixIndex = ~0U;
122 
123 const int IcingDynamicTrie::kMaxNextArraySize;
124 const int IcingDynamicTrie::kNumNextAllocationBuckets;
125 
126 const uint32_t IcingDynamicTrie::kMaxPropertyId;
127 
128 const uint32_t IcingDynamicTrie::kInvalidValueIndex;
129 
130 const uint32_t IcingDynamicTrie::kNoCrc;
131 
132 // Manages logical node candidates while searching for possible
133 // variant matches. Currently implemented as depth first search. The
134 // max stack depth is key length * variant fanout. Since max variant
135 // fanout is 3, we don't need to worry about blowup of the depth first
136 // search stack.
137 //
138 // Keeps track of original matched string (the string actually present
139 // in the trie) for every candidate.
140 class IcingDynamicTrie::CandidateSet {
141  public:
142   struct Candidate {
143     LogicalNode logical_node;
144     const char *key;
145     int matched_prefix_len;
146     std::string matched_span;
147 
Candidateicing::lib::IcingDynamicTrie::CandidateSet::Candidate148     Candidate() {}
149 
Candidateicing::lib::IcingDynamicTrie::CandidateSet::Candidate150     Candidate(const LogicalNode &logical_node_in, const char *key_in,
151               int matched_prefix_len_in, const char *matched_span_in,
152               int matched_span_len_in)
153         : logical_node(logical_node_in),
154           key(key_in),
155           matched_prefix_len(matched_prefix_len_in),
156           matched_span(matched_span_in, matched_span_len_in) {}
157 
matched_lenicing::lib::IcingDynamicTrie::CandidateSet::Candidate158     int matched_len() const { return matched_prefix_len + matched_span.size(); }
159   };
160 
CandidateSet(bool prefix)161   explicit CandidateSet(bool prefix) : prefix_(prefix) {}
162 
IsTerminal(const char * key,uint32_t value_index) const163   bool IsTerminal(const char *key, uint32_t value_index) const {
164     // Terminal match condition:
165     //
166     // 1. Key was entirely consumed.
167     // 2. The entire suffix was consumed (hence value index is
168     //    valid). OR, we are ok with prefix matches.
169     return *key == 0 && (value_index != kInvalidValueIndex || prefix_);
170   }
171 
172   // Push a terminal or non-terminal.
Push(const LogicalNode & logical_node,const char * key,uint32_t value_index,int matched_prefix_len,const char * matched_span,int matched_span_len)173   void Push(const LogicalNode &logical_node, const char *key,
174             uint32_t value_index, int matched_prefix_len,
175             const char *matched_span, int matched_span_len) {
176     if (!AddMatchIfTerminal(key, value_index, matched_span, matched_span_len)) {
177       PushNonTerminal(logical_node, key, matched_prefix_len, matched_span,
178                       matched_span_len);
179     }
180   }
181 
AddMatchIfTerminal(const char * key,uint32_t value_index,const char * matched_span,int matched_span_len)182   bool AddMatchIfTerminal(const char *key, uint32_t value_index,
183                           const char *matched_span, int matched_span_len) {
184     if (!IsTerminal(key, value_index)) {
185       return false;
186     }
187 
188     // Terminal match.
189     matches_.push_back(OriginalMatch());
190     OriginalMatch *match = &matches_.back();
191     match->value_index = value_index;
192     match->orig.reserve(cur_prefix_.size() + matched_span_len);
193     match->orig.append(cur_prefix_).append(matched_span, matched_span_len);
194     return true;
195   }
196 
197   // Push a definite non-terminal.
PushNonTerminal(const LogicalNode & logical_node,const char * key,int matched_prefix_len,const char * matched_span,int matched_span_len)198   void PushNonTerminal(const LogicalNode &logical_node, const char *key,
199                        int matched_prefix_len, const char *matched_span,
200                        int matched_span_len) {
201     candidates_.push_back(Candidate(logical_node, key, matched_prefix_len,
202                                     matched_span, matched_span_len));
203   }
204 
Pop(Candidate * candidate)205   void Pop(Candidate *candidate) {
206     *candidate = candidates_.back();
207     if (cur_prefix_.size() < candidate->matched_prefix_len) {
208       ICING_LOG(FATAL)
209           << "Length of current prefix is smaller than length of matched "
210              "prefer, there're inconsistencies in dynamic trie.";
211     }
212 
213     cur_prefix_.resize(candidate->matched_prefix_len);
214     cur_prefix_.append(candidate->matched_span);
215     candidates_.pop_back();
216   }
217 
empty() const218   bool empty() const { return candidates_.empty(); }
219 
Release(vector<OriginalMatch> * ret)220   void Release(vector<OriginalMatch> *ret) {
221     if (!empty()) {
222       ICING_LOG(FATAL) << "Candidate set not empty before releasing matches";
223     }
224 
225     ret->swap(matches_);
226 
227     cur_prefix_.clear();
228     candidates_.clear();
229     matches_.clear();
230   }
231 
232  private:
233   const bool prefix_;
234 
235   std::string cur_prefix_;
236   vector<Candidate> candidates_;
237 
238   vector<IcingDynamicTrie::OriginalMatch> matches_;
239 };
240 
241 // Options.
is_valid() const242 bool IcingDynamicTrie::Options::is_valid() const {
243   return max_nodes <= kMaxNodes && max_nodes > 0 && max_nexts <= kMaxNexts &&
244          max_nexts > 0 && max_suffixes_size <= kMaxSuffixesSize &&
245          max_suffixes_size > 0 && value_size <= kMaxValueSize;
246 }
247 
248 // IcingDynamicTrieStorage
249 class IcingDynamicTrie::IcingDynamicTrieStorage {
250  public:
251   IcingDynamicTrieStorage(const std::string &file_basename,
252                           const RuntimeOptions &runtime_options,
253                           const IcingFilesystem *filesystem);
254   ~IcingDynamicTrieStorage();
255 
is_initialized() const256   bool is_initialized() const { return hdr_mmapper_.is_valid(); }
257 
258   bool CreateIfNotExist(const Options &options);
259   bool Init();
260   static bool Remove(const std::string &file_basename,
261                      const IcingFilesystem &filesystem);
262   bool Sync();
263   uint64_t GetDiskUsage() const;
264 
265   // Returns the size of the elements held in the trie. This excludes the size
266   // of any internal metadata of the trie, e.g. the trie's header.
267   uint64_t GetElementsFileSize() const;
268 
269   void Warm();
270 
271   void Clear();
272 
empty() const273   bool empty() const { return hdr().num_nodes() == 0; }
274 
275   // Never cast off these consts when writing to the arrays. Always
276   // use the GetMutable* helpers above.
GetNode(uint32_t idx) const277   const Node *GetNode(uint32_t idx) const {
278     return &array_storage_[NODE].array_cast<Node>()[idx];
279   }
GetRootNode() const280   const Node *GetRootNode() const { return GetNode(0); }
GetNext(uint32_t idx,int child) const281   const Next *GetNext(uint32_t idx, int child) const {
282     return &array_storage_[NEXT].array_cast<Next>()[idx + child];
283   }
GetSuffix(uint32_t idx) const284   const char *GetSuffix(uint32_t idx) const {
285     return &array_storage_[SUFFIX].array_cast<char>()[idx];
286   }
287 
GetNodeIndex(const Node * node) const288   uint32_t GetNodeIndex(const Node *node) const { return node - GetNode(0); }
GetNextArrayIndex(const Next * next) const289   uint32_t GetNextArrayIndex(const Next *next) const {
290     return next - GetNext(0, 0);
291   }
GetSuffixIndex(const char * suffix) const292   uint32_t GetSuffixIndex(const char *suffix) const {
293     return suffix - GetSuffix(0);
294   }
295 
296   // By default, nodes_, nexts_ and suffixes_ are read-only. This
297   // returns a writable element or array within and sets
298   // dirty_pages_[array_type] as a side effect, assuming the mutable
299   // area will get written to.
300   Node *GetMutableNode(uint32_t idx);
301   Next *GetMutableNextArray(uint32_t idx, uint32_t len);
302   char *GetMutableSuffix(uint32_t idx, uint32_t len);
303 
304   // Update crcs based on current contents. Returns all_crc or kNoCrc.
305   uint32_t UpdateCrc();
306 
307   // Allocators.
308   uint32_t nodes_left() const;
309   uint32_t nexts_left() const;
310   uint32_t suffixes_left() const;
311 
312   // REQUIRES: nodes_left() > 0.
313   Node *AllocNode();
314   // REQUIRES: nexts_left() >= kMaxNextArraySize.
315   Next *AllocNextArray(int size);
316   void FreeNextArray(Next *next, int log2_size);
317   // REQUIRES: suffixes_left() >= strlen(suffix) + 1 + value_size()
318   uint32_t MakeSuffix(const char *suffix, const void *value,
319                       uint32_t *value_index);
320 
hdr() const321   const IcingDynamicTrieHeader &hdr() const { return hdr_.hdr; }
322 
value_size() const323   uint32_t value_size() const { return hdr().value_size(); }
324 
325   void FillDirtyPageStats(Stats *stats) const;
326 
inc_num_keys()327   void inc_num_keys() { hdr_.hdr.set_num_keys(hdr_.hdr.num_keys() + 1); }
328 
329  private:
330   friend void IcingDynamicTrie::SetHeader(
331       const IcingDynamicTrieHeader &new_hdr);
332 
333   enum ArrayType { NODE, NEXT, SUFFIX, NUM_ARRAY_TYPES };
334 
335   // Returns all filenames that are part of the storage. First
336   // filename is the header and the rest correspond to ArrayType enum
337   // values.
338   static void GetFilenames(const std::string &file_basename,
339                            vector<std::string> *filenames);
340   static std::string GetHeaderFilename(const std::string &file_basename);
341 
342   uint32_t GetHeaderCrc() const;
343 
344   uint32_t GetAllCrc() const;
345 
346   uint32_t UpdateCrcInternal(bool write_hdr);
347 
348   // Initializes hdr_ with options and writes the resulting header to disk.
349   bool CreateNewHeader(IcingScopedFd sfd, const Options &options);
350   bool WriteHeader();
351 
352   // Header block. On-disk header block format is as follows:
353   //
354   // |serialized-header|pad|crcs|
355   // <--- system_page_size() --->
356 
357   // Wrapper for header protobuf.
358   class Header {
359     // Serialized format:
360     //
361     // magic(4)|size(4)|serialized hdr(size)
362     static const uint32_t kMagic;
363     // TODO(b/77482303) : Remove version from the IcingFlashBitmap header -
364     // magic makes it unnecessary.
365     static const uint32_t kCurVersion;
366 
367    public:
368     void Init(const Options &options);
369     bool Init(const uint8_t *buf, uint32_t buf_size);
Invalidate()370     void Invalidate() { hdr.Clear(); }
371     bool SerializeToArray(uint8_t *buf, uint32_t buf_size) const;
372     bool Verify();
373 
374     IcingDynamicTrieHeader hdr;
375   };
376 
377   std::string file_basename_;
378 
379   Header hdr_;
380 
381   IcingMMapper hdr_mmapper_;
382 
383   struct Crcs {
384     uint32_t all_crc;
385     uint32_t header_crc;
386     uint32_t array_crcs[NUM_ARRAY_TYPES];
387   };
388   Crcs *crcs_;
389 
serialized_header_max()390   static uint32_t serialized_header_max() {
391     return IcingMMapper::system_page_size() - sizeof(Crcs);
392   }
393 
394   RuntimeOptions runtime_options_;
395 
396   // Info kept about each array (NODE, NEXT, SUFFIX) to manage
397   // storage.
398   IcingScopedFd array_fds_[NUM_ARRAY_TYPES];
399   std::vector<IcingArrayStorage> array_storage_;
400   const IcingFilesystem *filesystem_;
401 };
402 
IcingDynamicTrieStorage(const std::string & file_basename,const RuntimeOptions & runtime_options,const IcingFilesystem * filesystem)403 IcingDynamicTrie::IcingDynamicTrieStorage::IcingDynamicTrieStorage(
404     const std::string &file_basename, const RuntimeOptions &runtime_options,
405     const IcingFilesystem *filesystem)
406     : file_basename_(file_basename),
407       hdr_mmapper_(false, MAP_SHARED),
408       crcs_(nullptr),
409       runtime_options_(runtime_options),
410       array_storage_(NUM_ARRAY_TYPES, IcingArrayStorage(*filesystem)),
411       filesystem_(filesystem) {}
412 
~IcingDynamicTrieStorage()413 IcingDynamicTrie::IcingDynamicTrieStorage::~IcingDynamicTrieStorage() {
414   if (is_initialized()) {
415     for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
416       array_storage_[i].Reset();
417     }
418   }
419 }
420 
GetFilenames(const std::string & file_basename,vector<std::string> * filenames)421 void IcingDynamicTrie::IcingDynamicTrieStorage::GetFilenames(
422     const std::string &file_basename, vector<std::string> *filenames) {
423   const char *kArrayFilenameSuffixes[NUM_ARRAY_TYPES] = {
424       ".n",
425       ".x",
426       ".s",
427   };
428 
429   filenames->clear();
430   filenames->push_back(GetHeaderFilename(file_basename));
431   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
432     filenames->push_back(file_basename + kArrayFilenameSuffixes[i]);
433   }
434 }
435 
GetHeaderFilename(const std::string & file_basename)436 std::string IcingDynamicTrie::IcingDynamicTrieStorage::GetHeaderFilename(
437     const std::string &file_basename) {
438   constexpr char kHeaderFilenameSuffix[] = ".h";
439   return file_basename + kHeaderFilenameSuffix;
440 }
441 
Init()442 bool IcingDynamicTrie::IcingDynamicTrieStorage::Init() {
443   bool init_crcs = false;
444   const bool map_shared =
445       runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc;
446 
447   // Open files.
448   vector<std::string> filenames;
449   GetFilenames(file_basename_, &filenames);
450   for (size_t i = 0; i < filenames.size(); i++) {
451     uint64_t file_size = filesystem_->GetFileSize(filenames[i].c_str());
452     if (file_size == IcingFilesystem::kBadFileSize) {
453       goto failed;
454     }
455     IcingScopedFd sfd(filesystem_->OpenForWrite(filenames[i].c_str()));
456     if (!sfd.is_valid()) {
457       goto failed;
458     }
459     // The first filename is the header and the rest correspond to ArrayType
460     // enum values. The header's fd can be closed immediately after mmapping
461     // (see b/114830334). Other files' fds are tracked in array_fds_ for later
462     // closing.
463     if (i == 0) {
464       // Header.
465       if (file_size != IcingMMapper::system_page_size()) {
466         ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
467             "Trie hdr wrong size: %" PRIu64, file_size);
468         goto failed;
469       }
470 
471       // Open hdr.
472       hdr_mmapper_.Remap(sfd.get(), 0, IcingMMapper::system_page_size());
473       if (!hdr_mmapper_.is_valid()) {
474         ICING_LOG(ERROR) << "Trie map header failed";
475         goto failed;
476       }
477     } else {
478       array_fds_[i - 1] = std::move(sfd);
479     }
480   }
481 
482   // Point crcs_ to correct region.
483   crcs_ = reinterpret_cast<Crcs *>(hdr_mmapper_.address() +
484                                    serialized_header_max());
485   if (crcs_->header_crc == kNoCrc) {
486     // Create crcs.
487     crcs_->header_crc = GetHeaderCrc();
488 
489     // Do the same for the arrays.
490     init_crcs = true;
491   } else {
492     // Verify crc.
493     if (crcs_->header_crc != GetHeaderCrc()) {
494       ICING_LOG(ERROR) << "Trie header crc failed";
495       goto failed;
496     }
497   }
498 
499   // Deserialize and verify header.
500   if (!hdr_.Init(hdr_mmapper_.address(),
501                  IcingMMapper::system_page_size() - sizeof(Crcs)) ||
502       !hdr_.Verify()) {
503     ICING_LOG(ERROR) << "Trie reading header failed";
504     goto failed;
505   }
506 
507   // We have the header set up. Now read in the arrays.
508   if (!array_storage_[NODE].Init(array_fds_[NODE].get(), 0, map_shared,
509                                  sizeof(Node), hdr_.hdr.num_nodes(),
510                                  hdr_.hdr.max_nodes(), &crcs_->array_crcs[NODE],
511                                  init_crcs)) {
512     ICING_LOG(ERROR) << "Trie mmap node failed";
513     goto failed;
514   }
515 
516   if (!array_storage_[NEXT].Init(array_fds_[NEXT].get(), 0, map_shared,
517                                  sizeof(Next), hdr_.hdr.num_nexts(),
518                                  hdr_.hdr.max_nexts(), &crcs_->array_crcs[NEXT],
519                                  init_crcs)) {
520     ICING_LOG(ERROR) << "Trie mmap next failed";
521     goto failed;
522   }
523 
524   if (!array_storage_[SUFFIX].Init(array_fds_[SUFFIX].get(), 0, map_shared,
525                                    sizeof(char), hdr_.hdr.suffixes_size(),
526                                    hdr_.hdr.max_suffixes_size(),
527                                    &crcs_->array_crcs[SUFFIX], init_crcs)) {
528     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
529         "Trie mmap suffix failed");
530     goto failed;
531   }
532 
533   // Overall crc.
534   if (init_crcs) {
535     crcs_->all_crc = GetAllCrc();
536   } else {
537     // Verify crc.
538     if (crcs_->all_crc != GetAllCrc()) {
539       ICING_LOG(ERROR) << "Trie all crc failed";
540       goto failed;
541     }
542   }
543 
544   return true;
545 
546 failed:
547   crcs_ = nullptr;
548   hdr_mmapper_.Unmap();
549   hdr_.Invalidate();
550   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
551     array_storage_[i].Reset();
552     array_fds_[i].reset();
553   }
554 
555   return false;
556 }
557 
CreateIfNotExist(const Options & options)558 bool IcingDynamicTrie::IcingDynamicTrieStorage::CreateIfNotExist(
559     const Options &options) {
560   vector<std::string> filenames;
561   GetFilenames(file_basename_, &filenames);
562 
563   // Check already exists. Just header file check is enough.
564   if (filesystem_->FileExists(filenames[0].c_str())) {
565     return true;
566   }
567 
568   // Ensure the storage directory exists
569   std::string storage_dir = filesystem_->GetDirname(filenames[0].c_str());
570   if (!filesystem_->CreateDirectoryRecursively(storage_dir.c_str())) {
571     return false;
572   }
573 
574   // Create files.
575   for (size_t i = 0; i < filenames.size(); i++) {
576     IcingScopedFd sfd(filesystem_->OpenForWrite(filenames[i].c_str()));
577     if (!sfd.is_valid()) {
578       Remove(file_basename_, *filesystem_);
579       return false;
580     }
581 
582     if (i == 0) {
583       if (!CreateNewHeader(std::move(sfd), options)) {
584         ICING_LOG(ERROR) << "Serialize trie header failed";
585         Remove(file_basename_, *filesystem_);
586         return false;
587       }
588     } else {
589       // Crcs are automatically kNoCrc so they will be initialized
590       // upon first call to Init.
591       if (!filesystem_->Truncate(*sfd, 0)) {
592         Remove(file_basename_, *filesystem_);
593         return false;
594       }
595     }
596   }
597   return true;
598 }
599 
CreateNewHeader(IcingScopedFd sfd,const Options & options)600 bool IcingDynamicTrie::IcingDynamicTrieStorage::CreateNewHeader(
601     IcingScopedFd sfd, const Options &options) {
602   ICING_VLOG(1) << "Creating header with write+sync";
603   hdr_.Init(options);
604   auto buf = std::make_unique<uint8_t[]>(IcingMMapper::system_page_size());
605   // serialized_header_max must be less than system_page_size so we don't
606   // overflow buf when serializing the header.
607   if (serialized_header_max() > IcingMMapper::system_page_size()) {
608     ICING_LOG(FATAL) << "serialized_header_max exceeds system page size";
609   }
610 
611   return hdr_.SerializeToArray(buf.get(), serialized_header_max()) &&
612          filesystem_->Write(sfd.get(), buf.get(),
613                             IcingMMapper::system_page_size()) &&
614          filesystem_->DataSync(sfd.get());
615 }
616 
Remove(const std::string & file_basename,const IcingFilesystem & filesystem)617 bool IcingDynamicTrie::IcingDynamicTrieStorage::Remove(
618     const std::string &file_basename, const IcingFilesystem &filesystem) {
619   bool success = true;
620   vector<std::string> files;
621   GetFilenames(file_basename, &files);
622   for (size_t i = 0; i < files.size(); i++) {
623     if (!filesystem.DeleteFile(files[i].c_str())) {
624       success = false;
625     }
626   }
627   return success;
628 }
629 
Warm()630 void IcingDynamicTrie::IcingDynamicTrieStorage::Warm() {
631   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
632     array_storage_[i].Warm();
633   }
634 }
635 
Clear()636 void IcingDynamicTrie::IcingDynamicTrieStorage::Clear() {
637   if (!is_initialized()) {
638     ICING_LOG(FATAL) << "DynamicTrie not initialized";
639   }
640 
641   // Clear header.
642   hdr_.hdr.set_num_nodes(0);
643   hdr_.hdr.set_num_nexts(0);
644   hdr_.hdr.set_suffixes_size(0);
645   for (int i = 0; i < hdr_.hdr.free_lists_size(); i++) {
646     hdr_.hdr.set_free_lists(i, kInvalidNextIndex);
647   }
648   hdr_.hdr.set_num_keys(0);
649 
650   // Clear array storage.
651   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
652     array_storage_[i].Clear();
653   }
654 
655   // Copy to persistence.
656   WriteHeader();
657 }
658 
Sync()659 bool IcingDynamicTrie::IcingDynamicTrieStorage::Sync() {
660   if (!is_initialized()) {
661     ICING_LOG(FATAL) << "DynamicTrie not initialized";
662   }
663 
664   uint32_t total_flushed = 0;
665   bool success = true;
666 
667   // Sync all array types.
668   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
669     total_flushed += array_storage_[i].Sync();
670     if (!filesystem_->DataSync(array_fds_[i].get())) {
671       ICING_LOG(ERROR) << "Unable to sync data for flushing";
672       success = false;
673     }
674   }
675 
676   if (!WriteHeader()) {
677     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
678         "Flushing trie header failed: %s", strerror(errno));
679     success = false;
680   }
681 
682   // Need to update CRCs before we sync the header mmap.
683   UpdateCrcInternal(false);
684 
685   // Sync header.
686   if (!hdr_mmapper_.Sync()) {
687     ICING_LOG(ERROR) << "Unable to sync trie header for flushing";
688     success = false;
689   }
690 
691   if (total_flushed > 0) {
692     ICING_VLOG(1) << IcingStringUtil::StringPrintf("Flushing %u pages of trie",
693                                                    total_flushed);
694   }
695 
696   return success;
697 }
698 
GetDiskUsage() const699 uint64_t IcingDynamicTrie::IcingDynamicTrieStorage::GetDiskUsage() const {
700   // Trie files themselves.
701   uint64_t total = 0;
702   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
703     IcingFilesystem::IncrementByOrSetInvalid(
704         filesystem_->GetDiskUsage(array_fds_[i].get()), &total);
705   }
706 
707   // Header.
708   std::string header_filename = GetHeaderFilename(file_basename_);
709   IcingFilesystem::IncrementByOrSetInvalid(
710       filesystem_->GetFileDiskUsage(header_filename.c_str()), &total);
711 
712   return total;
713 }
714 
GetElementsFileSize() const715 uint64_t IcingDynamicTrie::IcingDynamicTrieStorage::GetElementsFileSize()
716     const {
717   // Trie files themselves, exclude size of the header. These arrays are dense,
718   // not sparse, so use file size for more accurate numbers.
719   uint64_t total = 0;
720   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
721     IcingFilesystem::IncrementByOrSetInvalid(
722         filesystem_->GetFileSize(array_fds_[i].get()), &total);
723   }
724   return total;
725 }
726 
AllocNode()727 IcingDynamicTrie::Node *IcingDynamicTrie::IcingDynamicTrieStorage::AllocNode() {
728   if (nodes_left() == 0) {
729     ICING_LOG(FATAL) << "No allocated nodes left";
730   }
731 
732   hdr_.hdr.set_num_nodes(hdr_.hdr.num_nodes() + 1);
733   return GetMutableNode(hdr_.hdr.num_nodes() - 1);
734 }
735 
736 IcingDynamicTrie::Next *
AllocNextArray(int size)737 IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) {
738   if (size > kMaxNextArraySize) {
739     ICING_LOG(FATAL) << "Array size exceeds the max 'next' array size";
740   }
741 
742   if (nexts_left() < static_cast<uint32_t>(kMaxNextArraySize)) {
743     ICING_LOG(FATAL) << "'next' buffer not enough";
744   }
745 
746   // Compute ceil(log2(size)).
747   int log2_size = 0;
748   while ((1 << log2_size) < size) log2_size++;
749   // Note: size <= aligned_size <= kMaxNextArraySize
750   int aligned_size = 1 << log2_size;
751 
752   // Look in free list.
753   Next *ret;
754   if (hdr_.hdr.free_lists(log2_size) != kInvalidNextIndex) {
755     ret = GetMutableNextArray(hdr_.hdr.free_lists(log2_size), aligned_size);
756     uint32_t next_link = ret->next_index();
757     if (next_link != kInvalidNextIndex && next_link >= hdr_.hdr.max_nexts()) {
758       ICING_LOG(FATAL) << "'next' index is out of range";
759     }
760     hdr_.hdr.set_free_lists(log2_size, next_link);
761   } else {
762     // Allocate a new one.
763     ret = GetMutableNextArray(hdr_.hdr.num_nexts(), aligned_size);
764     hdr_.hdr.set_num_nexts(hdr_.hdr.num_nexts() + aligned_size);
765   }
766 
767   // Fill with char 0xff so we are sorted properly.
768   for (int i = 0; i < aligned_size; i++) {
769     ret[i].set_val(0xff);
770     ret[i].set_node_index(kInvalidNodeIndex);
771   }
772   return ret;
773 }
774 
FreeNextArray(Next * next,int log2_size)775 void IcingDynamicTrie::IcingDynamicTrieStorage::FreeNextArray(Next *next,
776                                                               int log2_size) {
777   if (GetNextArrayIndex(next) + (1 << log2_size) > hdr_.hdr.max_nexts()) {
778     ICING_LOG(FATAL) << "'next' array is out of range";
779   }
780 
781   // Put it in free list.
782   next->set_next_index(hdr_.hdr.free_lists(log2_size));
783   hdr_.hdr.set_free_lists(log2_size, GetNextArrayIndex(next));
784 }
785 
MakeSuffix(const char * suffix,const void * value,uint32_t * value_index)786 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::MakeSuffix(
787     const char *suffix, const void *value, uint32_t *value_index) {
788   int suffix_len = strlen(suffix);
789   if (suffixes_left() < suffix_len + 1 + value_size()) {
790     ICING_LOG(FATAL) << "'suffix' buffer not enough";
791   }
792 
793   char *start =
794       GetMutableSuffix(hdr_.hdr.suffixes_size(), suffix_len + 1 + value_size());
795   memcpy(start, suffix, suffix_len + 1);
796   memcpy(start + suffix_len + 1, value, value_size());
797   if (value_index) *value_index = GetSuffixIndex(start + suffix_len + 1);
798   hdr_.hdr.set_suffixes_size(hdr_.hdr.suffixes_size() + suffix_len + 1 +
799                              value_size());
800 
801   return GetSuffixIndex(start);
802 }
803 
GetHeaderCrc() const804 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::GetHeaderCrc() const {
805   return IcingStringUtil::UpdateCrc32(
806       0, reinterpret_cast<const char *>(hdr_mmapper_.address()),
807       serialized_header_max());
808 }
809 
GetAllCrc() const810 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::GetAllCrc() const {
811   // Append array crcs to header crc.
812   return IcingStringUtil::UpdateCrc32(
813       crcs_->header_crc, reinterpret_cast<const char *>(crcs_->array_crcs),
814       sizeof(crcs_->array_crcs));
815 }
816 
UpdateCrc()817 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrc() {
818   return UpdateCrcInternal(true);
819 }
820 
UpdateCrcInternal(bool write_hdr)821 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrcInternal(
822     bool write_hdr) {
823   if (write_hdr && !WriteHeader()) {
824     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
825         "Flushing trie header failed: %s", strerror(errno));
826   }
827 
828   crcs_->header_crc = GetHeaderCrc();
829 
830   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
831     array_storage_[i].UpdateCrc();
832   }
833 
834   crcs_->all_crc = GetAllCrc();
835 
836   return crcs_->all_crc;
837 }
838 
WriteHeader()839 bool IcingDynamicTrie::IcingDynamicTrieStorage::WriteHeader() {
840   return hdr_.SerializeToArray(hdr_mmapper_.address(), serialized_header_max());
841 }
842 
843 IcingDynamicTrie::Node *
GetMutableNode(uint32_t idx)844 IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableNode(uint32_t idx) {
845   return array_storage_[NODE].GetMutableMem<Node>(idx, 1);
846 }
847 
848 IcingDynamicTrie::Next *
GetMutableNextArray(uint32_t idx,uint32_t len)849 IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableNextArray(uint32_t idx,
850                                                                uint32_t len) {
851   return array_storage_[NEXT].GetMutableMem<Next>(idx, len);
852 }
853 
GetMutableSuffix(uint32_t idx,uint32_t len)854 char *IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableSuffix(
855     uint32_t idx, uint32_t len) {
856   return array_storage_[SUFFIX].GetMutableMem<char>(idx, len);
857 }
858 
859 // Header functions.
860 const uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::Header::kMagic =
861     0x6dfba6ae;
862 // For future revisions, this should be synced with global index version.
863 // See comments on Upgrade() in native-index-impl.h for versioning.
864 const uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::Header::kCurVersion =
865     4;
866 
Init(const IcingDynamicTrie::Options & options)867 void IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
868     const IcingDynamicTrie::Options &options) {
869   hdr.Clear();
870 
871   hdr.set_version(kCurVersion);
872   hdr.set_max_nodes(options.max_nodes);
873   hdr.set_max_nexts(options.max_nexts);
874   hdr.set_max_suffixes_size(options.max_suffixes_size);
875   hdr.set_value_size(options.value_size);
876 
877   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
878     hdr.add_free_lists(kInvalidNextIndex);
879   }
880 }
881 
Init(const uint8_t * buf,uint32_t buf_size)882 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
883     const uint8_t *buf, uint32_t buf_size) {
884   // Check magic and length.
885   if (buf_size <= sizeof(kMagic) + sizeof(uint32_t)) {
886     ICING_LOG(ERROR) << "Trie header too short";
887     return false;
888   }
889 
890   uint32_t magic;
891   memcpy(&magic, buf, sizeof(magic));
892   if (magic != kMagic) {
893     ICING_LOG(ERROR) << "Trie header magic mismatch";
894     return false;
895   }
896   uint32_t len;
897   memcpy(&len, buf + sizeof(magic), sizeof(len));
898   if (len > buf_size - sizeof(magic) - sizeof(len)) {
899     ICING_LOG(ERROR) << "Trie header too short";
900     return false;
901   }
902 
903   return hdr.ParseFromArray(buf + sizeof(magic) + sizeof(len), len);
904 }
905 
SerializeToArray(uint8_t * buf,uint32_t buf_size) const906 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::SerializeToArray(
907     uint8_t *buf, uint32_t buf_size) const {
908   uint32_t size = hdr.ByteSizeLong();
909   if (size + sizeof(kMagic) + sizeof(uint32_t) > buf_size) return false;
910   memcpy(buf, &kMagic, sizeof(kMagic));
911   memcpy(buf + sizeof(kMagic), &size, sizeof(uint32_t));
912   hdr.SerializeWithCachedSizesToArray(buf + sizeof(kMagic) + sizeof(uint32_t));
913   return true;
914 }
915 
Verify()916 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Verify() {
917   // Check version.
918   if (hdr.version() != kCurVersion) {
919     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
920         "Trie version %u mismatch", hdr.version());
921     return false;
922   }
923 
924   // Check that indices in hdr are within bounds. Note that this is
925   // not a comprehensive integrity check for the entire trie.
926   if (hdr.num_nodes() > hdr.max_nodes() || hdr.num_nexts() > hdr.max_nexts() ||
927       hdr.suffixes_size() > hdr.max_suffixes_size() ||
928       hdr.value_size() >= hdr.max_suffixes_size()) {
929     ICING_LOG(ERROR) << "Trie header array size out of bounds";
930     return false;
931   }
932 
933   if (hdr.free_lists_size() != kNumNextAllocationBuckets) {
934     ICING_LOG(ERROR) << "Bad number of free lists";
935     return false;
936   }
937 
938   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
939     if (hdr.free_lists(i) != kInvalidNextIndex &&
940         hdr.free_lists(i) >= hdr.max_nexts()) {
941       ICING_LOG(ERROR) << "Free list index out of bounds";
942       return false;
943     }
944   }
945 
946   return true;
947 }
948 
nodes_left() const949 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::nodes_left() const {
950   return hdr_.hdr.max_nodes() - hdr_.hdr.num_nodes();
951 }
952 
nexts_left() const953 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::nexts_left() const {
954   return hdr_.hdr.max_nexts() - hdr_.hdr.num_nexts();
955 }
956 
suffixes_left() const957 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::suffixes_left() const {
958   return hdr_.hdr.max_suffixes_size() - hdr_.hdr.suffixes_size();
959 }
960 
FillDirtyPageStats(Stats * stats) const961 void IcingDynamicTrie::IcingDynamicTrieStorage::FillDirtyPageStats(
962     Stats *stats) const {
963   stats->dirty_pages_nodes = array_storage_[NODE].num_dirty_pages();
964   stats->dirty_pages_nexts = array_storage_[NEXT].num_dirty_pages();
965   stats->dirty_pages_suffixes = array_storage_[SUFFIX].num_dirty_pages();
966 }
967 
968 // Dumper.
969 class IcingDynamicTrie::Dumper {
970  public:
Dumper(const IcingDynamicTrie & trie)971   explicit Dumper(const IcingDynamicTrie &trie)
972       : all_props_(trie), del_prop_(trie), storage_(trie.storage_.get()) {}
973 
Dump(std::ostream * pretty_print,vector<std::string> * keys) const974   void Dump(std::ostream *pretty_print, vector<std::string> *keys) const {
975     if (storage_->empty()) {
976       *pretty_print << "(empty)\n";
977     } else {
978       DumpNodeRecursive("", *storage_->GetRootNode(), 0, pretty_print, keys);
979     }
980   }
981 
982  private:
SuffixToValueAsString(const char * suffix) const983   std::string SuffixToValueAsString(const char *suffix) const {
984     int suffix_len = strlen(suffix);
985     std::string ret;
986     ret.reserve(storage_->value_size() * 2);
987     for (uint32_t i = 0; i < storage_->value_size(); i++) {
988       IcingStringUtil::SStringAppendF(&ret, 10, "%02x",
989                                       suffix[suffix_len + 1 + i]);
990     }
991 
992     // Now dump set properties.
993     uint32_t value_index = storage_->GetSuffixIndex(suffix + suffix_len + 1);
994     if (del_prop_.HasProperty(value_index)) {
995       ret += " (deleted)";
996     }
997     ret += " [";
998     for (size_t i = 0; i < all_props_.size(); i++) {
999       if (all_props_.HasProperty(i, value_index)) {
1000         IcingStringUtil::SStringAppendF(&ret, 10, "%zu", i);
1001       }
1002     }
1003     ret += ']';
1004 
1005     return ret;
1006   }
1007 
1008   // Inputs:
1009   //   prefix - the key prefix of the current node (so we can rebuild the key)
1010   //   node - the node we're at
1011   //   level - how many levels deep we are in the trie
1012   //   ret - the stream to pretty print to
1013   //   keys - the keys encountered are appended to this
DumpNodeRecursive(const std::string & prefix,const Node & node,int level,std::ostream * ret,vector<std::string> * keys) const1014   void DumpNodeRecursive(const std::string &prefix, const Node &node, int level,
1015                          std::ostream *ret, vector<std::string> *keys) const {
1016     if (node.is_leaf()) {
1017       // Dump suffix and value.
1018       for (int i = 0; i < level; i++) {
1019         *ret << ' ';
1020       }
1021       const char *suffix = storage_->GetSuffix(node.next_index());
1022       *ret << suffix;
1023       *ret << ' ';
1024       *ret << SuffixToValueAsString(suffix);
1025       *ret << '\n';
1026       keys->push_back(prefix + suffix);
1027     } else {
1028       // Go through each child (next) node. Print char and recursively
1029       // print trie underneath.
1030       for (uint32_t i = 0; i < (1U << node.log2_num_children()); i++) {
1031         const Next &next = *storage_->GetNext(node.next_index(), i);
1032         if (next.node_index() == kInvalidNodeIndex) break;
1033         for (int j = 0; j < level; j++) {
1034           *ret << ' ';
1035         }
1036         std::string new_prefix = prefix;
1037         if (next.val()) {
1038           *ret << static_cast<char>(next.val());
1039           new_prefix += next.val();
1040         } else {
1041           *ret << "null";
1042         }
1043         *ret << '\n';
1044         DumpNodeRecursive(new_prefix, *storage_->GetNode(next.node_index()),
1045                           level + 1, ret, keys);
1046       }
1047     }
1048   }
1049 
1050   PropertyReadersAll all_props_;
1051   PropertyDeletedReader del_prop_;
1052   const IcingDynamicTrie::IcingDynamicTrieStorage *storage_;
1053 };
1054 
1055 // IcingDynamicTrie.
IcingDynamicTrie(const std::string & filename_base,const RuntimeOptions & runtime_options,const IcingFilesystem * filesystem)1056 IcingDynamicTrie::IcingDynamicTrie(const std::string &filename_base,
1057                                    const RuntimeOptions &runtime_options,
1058                                    const IcingFilesystem *filesystem)
1059     : IIcingStorage(),
1060       filename_base_(filename_base),
1061       is_initialized_(false),
1062       runtime_options_(runtime_options),
1063       storage_(nullptr),
1064       property_bitmaps_prefix_(filename_base_ + ".prop."),
1065       deleted_bitmap_filename_(filename_base_ + ".deleted"),
1066       deleted_bitmap_(nullptr),
1067       filesystem_(filesystem) {}
1068 
~IcingDynamicTrie()1069 IcingDynamicTrie::~IcingDynamicTrie() { Close(); }
1070 
Init()1071 bool IcingDynamicTrie::Init() {
1072   if (is_initialized_) return true;
1073 
1074   if (storage_ != nullptr) {
1075     ICING_LOG(FATAL) << "Storage is not null before initialization";
1076   }
1077 
1078   storage_ = std::make_unique<IcingDynamicTrieStorage>(
1079       filename_base_, runtime_options_, filesystem_);
1080   if (!storage_->Init() || !InitPropertyBitmaps()) {
1081     storage_.reset();
1082     return false;
1083   }
1084   is_initialized_ = true;
1085   return true;
1086 }
1087 
CreateIfNotExist(const Options & options)1088 bool IcingDynamicTrie::CreateIfNotExist(const Options &options) {
1089   // Initialized means exists.
1090   if (is_initialized_) return true;
1091 
1092   if (!options.is_valid()) {
1093     ICING_LOG(ERROR) << "Trie options invalid";
1094     return false;
1095   }
1096 
1097   auto storage = std::make_unique<IcingDynamicTrieStorage>(
1098       filename_base_, runtime_options_, filesystem_);
1099   return storage->CreateIfNotExist(options);
1100 }
1101 
Close()1102 void IcingDynamicTrie::Close() {
1103   if (!is_initialized_) return;
1104 
1105   UpdateCrc();
1106 
1107   storage_.reset();
1108   property_bitmaps_.clear();
1109   deleted_bitmap_.reset();
1110   is_initialized_ = false;
1111 }
1112 
Remove()1113 bool IcingDynamicTrie::Remove() {
1114   if (is_initialized()) {
1115     Close();
1116   }
1117 
1118   bool success = true;
1119 
1120   // Remove storage files.
1121   if (!IcingDynamicTrieStorage::Remove(filename_base_, *filesystem_)) {
1122     success = false;
1123   }
1124 
1125   // Also remove property bitmaps.
1126   vector<std::string> files;
1127   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
1128                                      &files)) {
1129     return false;
1130   }
1131   for (size_t i = 0; i < files.size(); i++) {
1132     if (!filesystem_->DeleteFile(files[i].c_str())) success = false;
1133   }
1134   // And deleted bitmap.
1135   if (!filesystem_->DeleteFile(deleted_bitmap_filename_.c_str()))
1136     success = false;
1137 
1138   return success;
1139 }
1140 
Sync()1141 bool IcingDynamicTrie::Sync() {
1142   if (!is_initialized_) {
1143     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1144   }
1145 
1146   bool success = true;
1147   IcingTimer timer;
1148 
1149   // Sync property bitmaps.
1150   for (size_t i = 0; i < property_bitmaps_.size(); i++) {
1151     if (property_bitmaps_[i]) {
1152       if (!property_bitmaps_[i]->Sync()) success = false;
1153     }
1154   }
1155   if (!deleted_bitmap_->Sync()) success = false;
1156 
1157   // Sync storage.
1158   if (!storage_->Sync()) success = false;
1159 
1160   Warm();
1161 
1162   ICING_VLOG(1) << IcingStringUtil::StringPrintf(
1163       "Syncing dynamic trie %s took %.3fms", filename_base_.c_str(),
1164       timer.Elapsed() * 1000.);
1165 
1166   return success;
1167 }
1168 
GetDiskUsage() const1169 uint64_t IcingDynamicTrie::GetDiskUsage() const {
1170   uint64_t total = 0;
1171   // Property bitmaps.
1172   IcingFilesystem::IncrementByOrSetInvalid(deleted_bitmap_->GetDiskUsage(),
1173                                            &total);
1174 
1175   for (auto &bitmap : property_bitmaps_) {
1176     if (bitmap == nullptr) continue;
1177     IcingFilesystem::IncrementByOrSetInvalid(bitmap->GetDiskUsage(), &total);
1178   }
1179 
1180   // Storage.
1181   IcingFilesystem::IncrementByOrSetInvalid(storage_->GetDiskUsage(), &total);
1182   return total;
1183 }
1184 
GetElementsSize() const1185 uint64_t IcingDynamicTrie::GetElementsSize() const {
1186   uint64_t total = 0;
1187 
1188   // Bitmaps are sparsely populated, so disk usage is more accurate for those.
1189   // Property bitmaps.
1190   IcingFilesystem::IncrementByOrSetInvalid(deleted_bitmap_->GetDiskUsage(),
1191                                            &total);
1192   // The deleted bitmap is always initially grown to kGrowSize, whether there
1193   // are elements or not. So even if there are no elements in the trie, we'll
1194   // still have the bitmap of size kGrowSize, so subtract that from the size of
1195   // the trie's elements.
1196   total -= IcingFlashBitmap::kGrowSize;
1197 
1198   for (auto &bitmap : property_bitmaps_) {
1199     if (bitmap == nullptr) continue;
1200     IcingFilesystem::IncrementByOrSetInvalid(bitmap->GetDiskUsage(), &total);
1201   }
1202 
1203   // Storage. We can use file size here since the storage files aren't sparse.
1204   IcingFilesystem::IncrementByOrSetInvalid(storage_->GetElementsFileSize(),
1205                                            &total);
1206   return total;
1207 }
1208 
OpenAndInitBitmap(const std::string & filename,bool verify,const IcingFilesystem * filesystem)1209 std::unique_ptr<IcingFlashBitmap> IcingDynamicTrie::OpenAndInitBitmap(
1210     const std::string &filename, bool verify,
1211     const IcingFilesystem *filesystem) {
1212   auto bitmap = std::make_unique<IcingFlashBitmap>(filename, filesystem);
1213   if (!bitmap->Init() || (verify && !bitmap->Verify())) {
1214     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Init of %s failed",
1215                                                       filename.c_str());
1216     return nullptr;
1217   }
1218   return bitmap;
1219 }
1220 
InitPropertyBitmaps()1221 bool IcingDynamicTrie::InitPropertyBitmaps() {
1222   // Only called on init.
1223   if (!property_bitmaps_.empty()) {
1224     ICING_LOG(FATAL) << "Property bitmaps not empty before initialization";
1225   }
1226 
1227   if (deleted_bitmap_ != nullptr) {
1228     ICING_LOG(FATAL) << "Deleted bitmap not null before initialization";
1229   }
1230 
1231   // Truncate property bitmap files at current value index. Last value
1232   // is at suffixes_size - value_size(). We want to clear everything
1233   // after that.
1234   uint64_t truncate_idx =
1235       storage_->hdr().suffixes_size() > 0
1236           ? ValueIndexToPropertyBitmapIndex(storage_->hdr().suffixes_size() -
1237                                             value_size()) +
1238                 1
1239           : 0;
1240 
1241   // Discover property bitmaps by scanning the dir.
1242   vector<std::string> files;
1243   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
1244                                      &files)) {
1245     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
1246         "Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
1247     goto failed;
1248   }
1249   for (size_t i = 0; i < files.size(); i++) {
1250     // Decode property id from filename.
1251     size_t property_id_start_idx = files[i].rfind('.');
1252     if (property_id_start_idx == std::string::npos) {
1253       ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
1254                                                         files[i].c_str());
1255       continue;
1256     }
1257     property_id_start_idx++;  // skip dot
1258     char *end;
1259     uint32_t property_id =
1260         strtol(files[i].c_str() + property_id_start_idx, &end, 10);  // NOLINT
1261     if (!end || end != (files[i].c_str() + files[i].size())) {
1262       ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
1263                                                         files[i].c_str());
1264       continue;
1265     }
1266     std::unique_ptr<IcingFlashBitmap> bitmap = OpenAndInitBitmap(
1267         files[i],
1268         runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
1269         filesystem_);
1270     if (!bitmap) {
1271       ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
1272           "Open prop bitmap failed: %s", files[i].c_str());
1273       goto failed;
1274     }
1275     bitmap->Truncate(truncate_idx);
1276     if (property_id >= property_bitmaps_.size()) {
1277       property_bitmaps_.resize(property_id + 1);
1278     }
1279     property_bitmaps_[property_id] = std::move(bitmap);
1280   }
1281 
1282   deleted_bitmap_ = OpenAndInitBitmap(
1283       deleted_bitmap_filename_,
1284       runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
1285       filesystem_);
1286   if (!deleted_bitmap_) {
1287     goto failed;
1288   }
1289   deleted_bitmap_->Truncate(truncate_idx);
1290 
1291   return true;
1292 
1293 failed:
1294   property_bitmaps_.clear();
1295   deleted_bitmap_.reset();
1296   return false;
1297 }
1298 
Warm() const1299 void IcingDynamicTrie::Warm() const {
1300   if (!is_initialized()) {
1301     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1302   }
1303 
1304   return storage_->Warm();
1305 }
1306 
OnSleep()1307 void IcingDynamicTrie::OnSleep() {
1308   if (!is_initialized()) {
1309     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1310   }
1311 
1312   // Update crcs so we can verify when we come back.
1313   UpdateCrc();
1314 }
1315 
~NewValueMap()1316 IcingDynamicTrie::NewValueMap::~NewValueMap() {}
1317 
Compact(const NewValueMap & old_tvi_to_new_value,IcingDynamicTrie * out,std::unordered_map<uint32_t,uint32_t> * old_to_new_tvi) const1318 bool IcingDynamicTrie::Compact(
1319     const NewValueMap &old_tvi_to_new_value, IcingDynamicTrie *out,
1320     std::unordered_map<uint32_t, uint32_t> *old_to_new_tvi) const {
1321   if (old_to_new_tvi == nullptr) {
1322     ICING_LOG(ERROR) << "TVI is null";
1323   }
1324 
1325   if (!is_initialized()) {
1326     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1327   }
1328 
1329   PropertyReadersAll prop_readers(*this);
1330 
1331   old_to_new_tvi->clear();
1332   old_to_new_tvi->rehash(size() * 2);
1333 
1334   for (Iterator it_all(*this, ""); it_all.IsValid(); it_all.Advance()) {
1335     uint32_t value_index = it_all.GetValueIndex();
1336     const void *new_value = old_tvi_to_new_value.GetNewValue(value_index);
1337     if (!new_value) continue;
1338 
1339     uint32_t new_value_index;
1340     if (!out->Insert(it_all.GetKey(), new_value, &new_value_index, false)) {
1341       return false;
1342     }
1343 
1344     old_to_new_tvi->insert({value_index, new_value_index});
1345 
1346     // Copy properties.
1347     for (size_t i = 0; i < prop_readers.size(); i++) {
1348       if (prop_readers.HasProperty(i, value_index)) {
1349         if (!out->SetProperty(new_value_index, i)) {
1350           // Ouch. We need to bail.
1351           return false;
1352         }
1353       }
1354     }
1355   }
1356 
1357   return true;
1358 }
1359 
size() const1360 uint32_t IcingDynamicTrie::size() const {
1361   if (!is_initialized()) {
1362     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1363   }
1364   return storage_->hdr().num_keys();
1365 }
1366 
CollectStatsRecursive(const Node & node,Stats * stats) const1367 void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
1368                                              Stats *stats) const {
1369   if (node.is_leaf()) {
1370     stats->num_leaves++;
1371     const char *suffix = storage_->GetSuffix(node.next_index());
1372     stats->suffixes_used += strlen(suffix) + 1 + value_size();
1373     if (!suffix[0]) {
1374       stats->null_suffixes++;
1375     }
1376   } else {
1377     stats->num_intermediates++;
1378     uint32_t i = 0;
1379     for (; i < (1U << node.log2_num_children()); i++) {
1380       const Next &next = *storage_->GetNext(node.next_index(), i);
1381       if (next.node_index() == kInvalidNodeIndex) break;
1382       CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats);
1383     }
1384 
1385     // At least one valid node in each next array
1386     if (i == 0) {
1387       ICING_LOG(FATAL) << "No valid node in 'next' array";
1388     }
1389 
1390     stats->child_counts[i - 1]++;
1391     stats->wasted[node.log2_num_children()] +=
1392         (1 << node.log2_num_children()) - i;
1393     stats->total_wasted += (1 << node.log2_num_children()) - i;
1394   }
1395 }
1396 
CollectStats(Stats * stats) const1397 void IcingDynamicTrie::CollectStats(Stats *stats) const {
1398   if (!is_initialized()) {
1399     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1400   }
1401 
1402   memset(stats, 0, sizeof(*stats));
1403 
1404   stats->num_keys = storage_->hdr().num_keys();
1405   stats->num_nodes = storage_->hdr().num_nodes();
1406   stats->max_nodes = storage_->hdr().max_nodes();
1407   stats->num_nexts = storage_->hdr().num_nexts();
1408   stats->max_nexts = storage_->hdr().max_nexts();
1409   stats->suffixes_size = storage_->hdr().suffixes_size();
1410   stats->max_suffixes_size = storage_->hdr().max_suffixes_size();
1411 
1412   // Stats collected from traversing the trie.
1413   if (!storage_->empty()) {
1414     CollectStatsRecursive(*storage_->GetRootNode(), stats);
1415   }
1416 
1417   // Free-list stats.
1418   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1419     for (uint32_t cur = storage_->hdr().free_lists(i); cur != kInvalidNextIndex;
1420          cur = storage_->GetNext(cur, 0)->next_index()) {
1421       stats->num_free[i]++;
1422     }
1423     stats->total_free += stats->num_free[i] * (1 << i);
1424   }
1425 
1426   // Dirty page counts.
1427   storage_->FillDirtyPageStats(stats);
1428 }
1429 
DumpStats(int verbosity) const1430 std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const {
1431   std::string ret;
1432   IcingStringUtil::SStringAppendF(
1433       &ret, 0,
1434       "Keys %u "
1435       "Nodes (%u/%u) %.3f%% "
1436       "Nexts (%u/%u) %.3f%% "
1437       "Suffixes (%u/%u) %.3f%%\n",
1438       num_keys, num_nodes, max_nodes,
1439       100. * math_util::SafeDivide(num_nodes, max_nodes), num_nexts, max_nexts,
1440       100. * math_util::SafeDivide(num_nexts, max_nexts), suffixes_size,
1441       max_suffixes_size,
1442       100. * math_util::SafeDivide(suffixes_size, max_suffixes_size));
1443 
1444   if (verbosity > 0) {
1445     for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1446       if (num_free[i] > 0) {
1447         IcingStringUtil::SStringAppendF(&ret, 0, "Freelist@%d: %u\n", 1 << i,
1448                                         num_free[i]);
1449       }
1450     }
1451     IcingStringUtil::SStringAppendF(
1452         &ret, 0, "Freelist total: %u/%u %.3f%%\n", total_free, num_nexts,
1453         100. * math_util::SafeDivide(total_free, num_nexts));
1454 
1455     for (int i = 0; i < 256; i++) {
1456       if (child_counts[i] > 0) {
1457         IcingStringUtil::SStringAppendF(&ret, 0, "Child count@%d: %u\n", i + 1,
1458                                         child_counts[i]);
1459       }
1460     }
1461     for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1462       IcingStringUtil::SStringAppendF(&ret, 0, "Wasted@%d: %u\n", 1 << i,
1463                                       wasted[i]);
1464     }
1465     IcingStringUtil::SStringAppendF(
1466         &ret, 0,
1467         "Wasted total: %u\n"
1468         "Num intermediates %u num leaves %u "
1469         "suffixes used %u null %u\n"
1470         "Total next frag: %.3f%%\n",
1471         total_wasted, num_intermediates, num_leaves, suffixes_used,
1472         null_suffixes,
1473         100. * math_util::SafeDivide((total_free + total_wasted), num_nexts));
1474   }
1475   IcingStringUtil::SStringAppendF(
1476       &ret, 0, "Memory usage: %zu/%zu bytes\n",
1477       num_nodes * sizeof(Node) + num_nexts * sizeof(Next) + suffixes_size,
1478       max_nodes * sizeof(Node) + max_nexts * sizeof(Next) + max_suffixes_size);
1479 
1480   IcingStringUtil::SStringAppendF(
1481       &ret, 0, "Dirty pages: nodes %u/%.0f nexts %u/%.0f suffixes %u/%.0f\n",
1482       dirty_pages_nodes,
1483       math_util::SafeDivide(num_nodes * sizeof(Node) + getpagesize() - 1,
1484                             getpagesize()),
1485       dirty_pages_nexts,
1486       math_util::SafeDivide(num_nexts * sizeof(Next) + getpagesize() - 1,
1487                             getpagesize()),
1488       dirty_pages_suffixes,
1489       math_util::SafeDivide(suffixes_size + getpagesize() - 1, getpagesize()));
1490 
1491   return ret;
1492 }
1493 
DumpTrie(std::ostream * pretty_print,vector<std::string> * keys) const1494 void IcingDynamicTrie::DumpTrie(std::ostream *pretty_print,
1495                                 vector<std::string> *keys) const {
1496   if (!is_initialized()) {
1497     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1498   }
1499 
1500   Dumper dumper(*this);
1501   dumper.Dump(pretty_print, keys);
1502 }
1503 
Clear()1504 void IcingDynamicTrie::Clear() {
1505   if (!is_initialized()) {
1506     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1507   }
1508 
1509   storage_->Clear();
1510   for (auto &bitmap : property_bitmaps_) {
1511     if (bitmap) {
1512       bitmap->Delete();
1513       bitmap.reset();
1514     }
1515   }
1516   deleted_bitmap_->Truncate(0);
1517 }
1518 
ClearSuffixAndValue(uint32_t suffix_value_index)1519 bool IcingDynamicTrie::ClearSuffixAndValue(uint32_t suffix_value_index) {
1520   // The size 1 below is for a '\0' between the suffix and the value.
1521   size_t suffix_and_value_length =
1522       strlen(this->storage_->GetSuffix(suffix_value_index)) + 1 +
1523       this->value_size();
1524   char *mutable_suffix_and_value = this->storage_->GetMutableSuffix(
1525       suffix_value_index, suffix_and_value_length);
1526 
1527   if (mutable_suffix_and_value == nullptr) {
1528     return false;
1529   }
1530 
1531   memset(mutable_suffix_and_value, 0, suffix_and_value_length);
1532   return true;
1533 }
1534 
ResetNext(uint32_t next_index)1535 bool IcingDynamicTrie::ResetNext(uint32_t next_index) {
1536   Next *mutable_next =
1537       this->storage_->GetMutableNextArray(next_index, /*len=*/1);
1538 
1539   if (mutable_next == nullptr) {
1540     return false;
1541   }
1542 
1543   mutable_next->set_val(0);
1544   mutable_next->set_node_index(kInvalidNodeIndex);
1545   return true;
1546 }
1547 
SortNextArray(const Node * node)1548 bool IcingDynamicTrie::SortNextArray(const Node *node) {
1549   if (node == nullptr) {
1550     // Nothing to sort, return success directly.
1551     return true;
1552   }
1553 
1554   uint32_t next_array_buffer_size = 1u << node->log2_num_children();
1555   Next *next_array_start = this->storage_->GetMutableNextArray(
1556       node->next_index(), next_array_buffer_size);
1557 
1558   if (next_array_start == nullptr) {
1559     return false;
1560   }
1561 
1562   std::sort(next_array_start, next_array_start + next_array_buffer_size - 1);
1563   return true;
1564 }
1565 
Insert(const char * key,const void * value,uint32_t * value_index,bool replace,bool * pnew_key)1566 bool IcingDynamicTrie::Insert(const char *key, const void *value,
1567                               uint32_t *value_index, bool replace,
1568                               bool *pnew_key) {
1569   if (!is_initialized()) {
1570     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1571   }
1572 
1573   if (pnew_key) *pnew_key = false;
1574 
1575   // Find out ahead of time whether things will fit. A conservative
1576   // check based on allocations made below.
1577   //
1578   // IMPORTANT: This needs to be updated if the alloc patterns below
1579   // change.
1580   size_t key_len = strlen(key);
1581   if (!(storage_->nodes_left() >= 2 + key_len + 1 &&
1582         storage_->nexts_left() >= 2 + key_len + 1 + kMaxNextArraySize &&
1583         storage_->suffixes_left() >= key_len + 1 + value_size())) {
1584     // No more space left.
1585     return false;
1586   }
1587 
1588   uint32_t best_node_index;
1589   int key_offset;
1590   FindBestNode(key, &best_node_index, &key_offset, false);
1591 
1592   // A negative key_offset indicates that storage_ is empty
1593   if (key_offset < 0) {
1594     // First key.
1595     if (!storage_->empty()) {
1596       ICING_LOG(FATAL) << "Key offset is negative but storage is not empty, "
1597                           "there're inconsistencies in dynamic trie.";
1598     }
1599     Node *node = storage_->AllocNode();
1600     node->set_next_index(storage_->MakeSuffix(key, value, value_index));
1601     node->set_is_leaf(true);
1602     node->set_log2_num_children(0);
1603   } else if (storage_->GetNode(best_node_index)->is_leaf()) {
1604     // Prefix in the trie. Split at leaf.
1605     Node *split_node = storage_->GetMutableNode(best_node_index);
1606     const char *prev_suffix = storage_->GetSuffix(split_node->next_index());
1607 
1608     // Find the common prefix length.
1609     const char *prev_suffix_cur = prev_suffix;
1610     const char *key_cur = key + key_offset;
1611     while (*prev_suffix_cur && *prev_suffix_cur == *key_cur) {
1612       prev_suffix_cur++;
1613       key_cur++;
1614     }
1615 
1616     // Equal strings?
1617     if (*prev_suffix_cur == 0 && *key_cur == 0) {
1618       // Update value if replace == true and return.
1619       if (value_index) {
1620         *value_index = storage_->GetSuffixIndex(prev_suffix_cur + 1);
1621       }
1622       if (replace) {
1623         char *mutable_prev_suffix_cur = storage_->GetMutableSuffix(
1624             storage_->GetSuffixIndex(prev_suffix_cur + 1), value_size());
1625         memcpy(mutable_prev_suffix_cur, value, value_size());
1626       }
1627       return true;
1628     }
1629 
1630     if (*prev_suffix_cur == *key_cur) {
1631       ICING_LOG(FATAL) << "The suffix cursor and key cursor should diverge "
1632                           "after finding the common prefix.";
1633     }
1634 
1635     // Create single-branch children for the common prefix
1636     // length. After the loop, split_node points to the node that
1637     // will have more than 1 char.
1638     int common_len = prev_suffix_cur - prev_suffix;
1639     for (int i = 0; i < common_len; i++) {
1640       // Create a single-branch child node.
1641       Next *split_next = storage_->AllocNextArray(1);
1642       split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
1643       split_node->set_is_leaf(false);
1644       split_node->set_log2_num_children(0);
1645       Node *child_node = storage_->AllocNode();
1646       split_next[0].set_val(*(prev_suffix + i));
1647       split_next[0].set_node_index(storage_->GetNodeIndex(child_node));
1648 
1649       split_node = child_node;
1650     }
1651 
1652     // Fill a split.
1653     Next *split_next = storage_->AllocNextArray(2);
1654     split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
1655     split_node->set_is_leaf(false);
1656     split_node->set_log2_num_children(1);
1657     Node *prev_suffix_node = storage_->AllocNode();
1658     Node *key_node = storage_->AllocNode();
1659     split_next[0].set_val(*(prev_suffix + common_len));
1660     split_next[0].set_node_index(storage_->GetNodeIndex(prev_suffix_node));
1661     if (*(prev_suffix + common_len)) {
1662       uint32_t next_index =
1663           storage_->GetSuffixIndex(prev_suffix + common_len) + 1;
1664       prev_suffix_node->set_next_index(next_index);
1665     } else {
1666       uint32_t next_index = storage_->GetSuffixIndex(prev_suffix + common_len);
1667       prev_suffix_node->set_next_index(next_index);
1668     }
1669     prev_suffix_node->set_is_leaf(true);
1670     prev_suffix_node->set_log2_num_children(0);
1671     split_next[1].set_val(*(key + key_offset + common_len));
1672     split_next[1].set_node_index(storage_->GetNodeIndex(key_node));
1673     if (*(key + key_offset + common_len)) {
1674       uint32_t next_index = storage_->MakeSuffix(
1675           key + key_offset + common_len + 1, value, value_index);
1676       key_node->set_next_index(next_index);
1677     } else {
1678       uint32_t next_index = storage_->MakeSuffix(key + key_offset + common_len,
1679                                                  value, value_index);
1680       key_node->set_next_index(next_index);
1681     }
1682     key_node->set_is_leaf(true);
1683     key_node->set_log2_num_children(0);
1684 
1685     std::sort(split_next, split_next + 2);
1686   } else {
1687     // Insert into intermediate node.
1688     const Node *best_node = storage_->GetNode(best_node_index);
1689 
1690     // Add our value as a node + suffix.
1691     Node *new_leaf_node = storage_->AllocNode();
1692     if (*(key + key_offset)) {
1693       uint32_t next_index =
1694           storage_->MakeSuffix(key + key_offset + 1, value, value_index);
1695       new_leaf_node->set_next_index(next_index);
1696     } else {
1697       uint32_t next_index =
1698           storage_->MakeSuffix(key + key_offset, value, value_index);
1699       new_leaf_node->set_next_index(next_index);
1700     }
1701     new_leaf_node->set_is_leaf(true);
1702     new_leaf_node->set_log2_num_children(0);
1703 
1704     // Figure out the real length of the existing next array.
1705     uint32_t next_array_buffer_size = 1u << best_node->log2_num_children();
1706     Next *cur_next = storage_->GetMutableNextArray(best_node->next_index(),
1707                                                    next_array_buffer_size);
1708     int next_len = GetValidNextsSize(cur_next, next_array_buffer_size);
1709     Next *new_next = cur_next;
1710     if (next_len == (next_array_buffer_size)) {
1711       // Allocate a new, larger, array.
1712       new_next = storage_->AllocNextArray(next_len + 1);
1713       memcpy(new_next, cur_next, sizeof(Next) * next_len);
1714     }
1715 
1716     // Write a link to our new leaf node and sort.
1717     new_next[next_len].set_val(*(key + key_offset));
1718     new_next[next_len].set_node_index(storage_->GetNodeIndex(new_leaf_node));
1719     inplace_merge(new_next, new_next + next_len, new_next + next_len + 1);
1720     next_len++;
1721 
1722     // If this was new, update the parent node and free the old next
1723     // array.
1724     if (new_next != cur_next) {
1725       Node *mutable_best_node =
1726           storage_->GetMutableNode(storage_->GetNodeIndex(best_node));
1727       mutable_best_node->set_next_index(storage_->GetNextArrayIndex(new_next));
1728       mutable_best_node->set_is_leaf(false);
1729       uint8_t log2_num_children = mutable_best_node->log2_num_children();
1730 
1731       // 8 == log2(256)
1732       if (log2_num_children >= 8) {
1733         ICING_LOG(FATAL) << "Number of children exceeds the max allowed size";
1734       }
1735 
1736       mutable_best_node->set_log2_num_children(log2_num_children + 1);
1737 
1738       storage_->FreeNextArray(cur_next,
1739                               mutable_best_node->log2_num_children() - 1);
1740     }
1741   }
1742 
1743   // We added a new key.
1744   storage_->inc_num_keys();
1745 
1746   if (pnew_key) *pnew_key = true;
1747   return true;
1748 }
1749 
GetValueAtIndex(uint32_t value_index) const1750 const void *IcingDynamicTrie::GetValueAtIndex(uint32_t value_index) const {
1751   if (!is_initialized()) {
1752     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1753   }
1754 
1755   return static_cast<const void *>(storage_->GetSuffix(value_index));
1756 }
1757 
SetValueAtIndex(uint32_t value_index,const void * value)1758 void IcingDynamicTrie::SetValueAtIndex(uint32_t value_index,
1759                                        const void *value) {
1760   if (!is_initialized()) {
1761     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1762   }
1763 
1764   if (value_index > storage_->hdr().max_suffixes_size() - value_size()) {
1765     ICING_LOG(FATAL) << "Value index is out of range";
1766   }
1767 
1768   memcpy(storage_->GetMutableSuffix(value_index, value_size()), value,
1769          value_size());
1770 }
1771 
Find(const char * key,void * value,uint32_t * value_index) const1772 bool IcingDynamicTrie::Find(const char *key, void *value,
1773                             uint32_t *value_index) const {
1774   if (!is_initialized()) {
1775     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1776   }
1777 
1778   uint32_t best_node_index;
1779   int key_offset;
1780   FindBestNode(key, &best_node_index, &key_offset, false);
1781 
1782   const Node *best_node = storage_->GetNode(best_node_index);
1783   if (key_offset >= 0 && best_node->is_leaf() &&
1784       !strcmp(key + key_offset, storage_->GetSuffix(best_node->next_index()))) {
1785     uint32_t vidx = best_node->next_index() +
1786                     strlen(storage_->GetSuffix(best_node->next_index())) + 1;
1787     if (value_index) *value_index = vidx;
1788     if (value) memcpy(value, storage_->GetSuffix(vidx), value_size());
1789     return true;
1790   } else {
1791     return false;
1792   }
1793 }
1794 
Iterator(const IcingDynamicTrie & trie,const char * prefix)1795 IcingDynamicTrie::Iterator::Iterator(const IcingDynamicTrie &trie,
1796                                      const char *prefix)
1797     : cur_key_(prefix),
1798       cur_suffix_(nullptr),
1799       cur_suffix_len_(0),
1800       single_leaf_match_(false),
1801       trie_(trie) {
1802   if (!trie.is_initialized()) {
1803     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1804   }
1805 
1806   Reset();
1807 }
1808 
LeftBranchToLeaf(uint32_t node_index)1809 void IcingDynamicTrie::Iterator::LeftBranchToLeaf(uint32_t node_index) {
1810   // Go down the trie, following the left-most child until we hit a
1811   // leaf. Push to stack and cur_key nodes and chars as we go.
1812   for (; !trie_.storage_->GetNode(node_index)->is_leaf();
1813        node_index =
1814            trie_.storage_
1815                ->GetNext(trie_.storage_->GetNode(node_index)->next_index(), 0)
1816                ->node_index()) {
1817     branch_stack_.push_back(Branch(node_index));
1818     cur_key_.push_back(
1819         trie_.storage_
1820             ->GetNext(trie_.storage_->GetNode(node_index)->next_index(), 0)
1821             ->val());
1822   }
1823 
1824   // We're at a leaf.
1825   cur_suffix_ = trie_.storage_->GetSuffix(
1826       trie_.storage_->GetNode(node_index)->next_index());
1827   cur_suffix_len_ = strlen(cur_suffix_);
1828   cur_key_.append(cur_suffix_, cur_suffix_len_);
1829 }
1830 
Reset()1831 void IcingDynamicTrie::Iterator::Reset() {
1832   size_t strip_len = branch_stack_.size() + cur_suffix_len_;
1833 
1834   if (cur_key_.size() < strip_len) {
1835     ICING_LOG(FATAL) << "Key size < visited trie depth + remaining suffix "
1836                         "size, there're inconsistencies in dynamic trie";
1837   }
1838 
1839   // Trim back cur_key_ to original prefix.
1840   cur_key_.resize(cur_key_.size() - strip_len);
1841   cur_suffix_ = nullptr;
1842   cur_suffix_len_ = 0;
1843   single_leaf_match_ = false;
1844   branch_stack_.clear();
1845 
1846   // Nothing to do with an empty trie.
1847   if (trie_.storage_->empty()) return;
1848 
1849   // Find node matching prefix.
1850   uint32_t node_index;
1851   int key_offset;
1852   trie_.FindBestNode(cur_key_.c_str(), &node_index, &key_offset, true);
1853 
1854   // Two cases/states:
1855   //
1856   // - Found an intermediate node. If we matched all of prefix
1857   //   (cur_key_), LeftBranchToLeaf.
1858   //
1859   // - Found a leaf node, which is the ONLY matching key for this
1860   //   prefix. Check that suffix matches the prefix. Then we set
1861   //   single_leaf_match_ = true and apply different logic for
1862   //   Advance.
1863   if (key_offset < 0) {
1864     // A negative key_offset indicates that trie_.storage_ is empty
1865     ICING_LOG(FATAL) << "Trie storage is empty";
1866   }
1867 
1868   const Node *best_node = trie_.storage_->GetNode(node_index);
1869   if (best_node->is_leaf() &&
1870       !strncmp(cur_key_.c_str() + key_offset,
1871                trie_.storage_->GetSuffix(best_node->next_index()),
1872                cur_key_.size() - key_offset)) {
1873     // Copy the entire suffix into the current key.
1874     cur_key_.resize(key_offset);
1875     cur_key_.append(trie_.storage_->GetSuffix(best_node->next_index()));
1876     cur_suffix_ = trie_.storage_->GetSuffix(best_node->next_index());
1877     cur_suffix_len_ = strlen(cur_suffix_);
1878     single_leaf_match_ = true;
1879   } else if (static_cast<size_t>(key_offset) == cur_key_.size()) {
1880     LeftBranchToLeaf(node_index);
1881   }
1882 }
1883 
Advance()1884 bool IcingDynamicTrie::Iterator::Advance() {
1885   if (!IsValid()) return false;
1886   if (single_leaf_match_) {
1887     // If we only have an exact match, the Advance logic does not
1888     // apply. Invalidate the iterator and return.
1889     cur_suffix_ = nullptr;
1890     cur_suffix_len_ = 0;
1891     return false;
1892   }
1893 
1894   if (cur_key_.size() < (branch_stack_.size() + cur_suffix_len_)) {
1895     ICING_LOG(FATAL) << "Key size < visited trie depth + remaining suffix "
1896                         "size, there're inconsistencies in dynamic trie";
1897   }
1898 
1899   // Move up from the current leaf.
1900   cur_key_.resize(cur_key_.size() - cur_suffix_len_);
1901   cur_suffix_ = nullptr;
1902   cur_suffix_len_ = 0;
1903 
1904   while (!branch_stack_.empty()) {
1905     Branch *branch = &branch_stack_.back();
1906     const Node *node = trie_.storage_->GetNode(branch->node_idx);
1907     branch->child_idx++;
1908     if (branch->child_idx < (1 << node->log2_num_children()) &&
1909         trie_.storage_->GetNext(node->next_index(), branch->child_idx)
1910                 ->node_index() != kInvalidNodeIndex) {
1911       // Successfully incremented to the next child. Update the char
1912       // value at this depth.
1913       cur_key_[cur_key_.size() - 1] =
1914           trie_.storage_->GetNext(node->next_index(), branch->child_idx)->val();
1915       // We successfully found a sub-trie to explore.
1916       LeftBranchToLeaf(
1917           trie_.storage_->GetNext(node->next_index(), branch->child_idx)
1918               ->node_index());
1919       return true;
1920     }
1921     branch_stack_.pop_back();
1922     cur_key_.resize(cur_key_.size() - 1);
1923   }
1924 
1925   // Un-wound the entire stack. We are done.
1926   return false;
1927 }
1928 
IsValid() const1929 bool IcingDynamicTrie::Iterator::IsValid() const {
1930   return cur_suffix_ != nullptr;
1931 }
1932 
GetKey() const1933 const char *IcingDynamicTrie::Iterator::GetKey() const {
1934   // cur_key_ can have a NULL in it so cur_key_ can be wrong but
1935   // cur_key_.c_str() is always right.
1936   return IsValid() ? cur_key_.c_str() : nullptr;
1937 }
1938 
GetValue() const1939 const void *IcingDynamicTrie::Iterator::GetValue() const {
1940   if (!IsValid()) return nullptr;
1941 
1942   return static_cast<const void *>(cur_suffix_ + cur_suffix_len_ + 1);
1943 }
1944 
GetValueIndex() const1945 uint32_t IcingDynamicTrie::Iterator::GetValueIndex() const {
1946   if (!IsValid()) return kInvalidSuffixIndex;
1947 
1948   return trie_.storage_->GetSuffixIndex(cur_suffix_ + cur_suffix_len_ + 1);
1949 }
1950 
LeftBranchToUtf8End()1951 void IcingDynamicTrie::Utf8Iterator::LeftBranchToUtf8End() {
1952   if (cur_len_ <= 0) {
1953     ICING_LOG(FATAL) << "Invalid UTF-8 character length";
1954   }
1955 
1956   if (branch_end_ - branch_stack_ != cur_len_) {
1957     ICING_LOG(FATAL) << "Depth from first visited node to last visited node "
1958                         "doesn't match the current UTF-8 character length";
1959   }
1960 
1961   // Use branch at top of stack to determine where to follow.
1962   const Branch &branch = *(branch_end_ - 1);
1963   const Node *node = trie_.storage_->GetNode(branch.child->node_index());
1964 
1965   // If we start with non-ascii, take all left branches while there is
1966   // a continuation byte.
1967   if (!i18n_utils::IsAscii(cur_[cur_len_ - 1])) {
1968     while (!node->is_leaf()) {
1969       if (cur_len_ >= U8_MAX_LENGTH) break;
1970 
1971       InitBranch(branch_end_, node, 0);
1972       // When we are looking to complete a utf8 char, skip 0s.
1973       if (branch_end_->child->val() == 0) {
1974         // Check if we already have a valid cur_.
1975         cur_[cur_len_] = 0;
1976         UChar32 uchar32 = i18n_utils::GetUChar32At(cur_, cur_len_, 0);
1977         if (uchar32 == i18n_utils::kInvalidUChar32 &&
1978             node->log2_num_children() > 0) {
1979           branch_end_->child++;
1980         } else {
1981           // Good termination. Just break.
1982           break;
1983         }
1984       }
1985 
1986       if (!IcingStringUtil::IsContinuationByte(branch_end_->child->val()))
1987         break;
1988 
1989       cur_[cur_len_++] = branch_end_->child->val();
1990       node = trie_.storage_->GetNode(branch_end_->child->node_index());
1991       branch_end_++;
1992     }
1993 
1994     cur_logical_node_.node = node;
1995 
1996     // Maybe go into suffixes and set suffix_offset.
1997     if (node->is_leaf()) {
1998       GoIntoSuffix(node);
1999     } else {
2000       cur_logical_node_.suffix_offset = 0;
2001     }
2002   } else {  // ascii
2003     cur_logical_node_.node = node;
2004     cur_logical_node_.suffix_offset = 0;
2005   }
2006 
2007   // NULL-terminate.
2008   cur_[cur_len_] = 0;
2009 }
2010 
GoIntoSuffix(const Node * node)2011 void IcingDynamicTrie::Utf8Iterator::GoIntoSuffix(const Node *node) {
2012   const char *suffix = trie_.storage_->GetSuffix(node->next_index());
2013   const char *cur_suffix;
2014   for (cur_suffix = suffix; cur_len_ < U8_MAX_LENGTH &&
2015                             IcingStringUtil::IsContinuationByte(*cur_suffix);
2016        cur_suffix++) {
2017     cur_[cur_len_++] = *cur_suffix;
2018   }
2019   cur_logical_node_.suffix_offset = cur_suffix - suffix;
2020 }
2021 
Reset()2022 void IcingDynamicTrie::Utf8Iterator::Reset() {
2023   cur_[0] = 0;
2024   cur_len_ = 0;
2025   branch_end_ = branch_stack_;
2026 
2027   if (start_node_) {
2028     // Take the first char node's children.
2029     const Next *next = trie_.storage_->GetNext(start_node_->next_index(), 0);
2030     branch_end_->node = start_node_;
2031     branch_end_->child_end = next + (1 << start_node_->log2_num_children());
2032     if (next->val() == 0) {
2033       // Skip any nulls at this position. We don't return empty string
2034       // as an iteration.
2035       next++;
2036     }
2037     branch_end_->child = next;
2038     cur_[cur_len_++] = next->val();
2039     branch_end_++;
2040 
2041     // Will NULL-terminate cur_.
2042     LeftBranchToUtf8End();
2043   } else {
2044     // Nothing to return.
2045     cur_logical_node_.node = nullptr;
2046     cur_logical_node_.suffix_offset = 0;
2047   }
2048 }
2049 
Advance()2050 bool IcingDynamicTrie::Utf8Iterator::Advance() {
2051   if (!IsValid()) return false;
2052 
2053   // Clip to branch.
2054   cur_len_ = branch_end_ - branch_stack_;
2055 
2056   while (branch_end_ > branch_stack_) {
2057     Branch *branch = branch_end_ - 1;
2058     branch->child++;
2059     if (!branch->IsFinished()) {
2060       // Successfully incremented to the next child. Update the char
2061       // value at this depth.
2062       cur_[cur_len_ - 1] = branch->child->val();
2063 
2064       // We successfully found a sub-trie to explore.
2065       LeftBranchToUtf8End();
2066       return true;
2067     }
2068     cur_len_--;
2069     branch_end_--;
2070   }
2071 
2072   // Un-wound the entire stack. We are done.
2073   return false;
2074 }
2075 
InitBranch(Branch * branch,const Node * start,char key_char)2076 void IcingDynamicTrie::Utf8Iterator::InitBranch(Branch *branch,
2077                                                 const Node *start,
2078                                                 char key_char) {
2079   branch->node = start;
2080   branch->child = trie_.storage_->GetNext(start->next_index(), 0);
2081   branch->child_end = branch->child + (1 << start->log2_num_children());
2082   if (key_char) {
2083     branch->child =
2084         trie_.LowerBound(branch->child, branch->child_end, key_char);
2085   }
2086 }
2087 
IsFinished()2088 bool IcingDynamicTrie::Utf8Iterator::Branch::IsFinished() {
2089   return child >= child_end || child->node_index() == kInvalidNodeIndex;
2090 }
2091 
IsValid() const2092 bool IcingDynamicTrie::Utf8Iterator::IsValid() const { return cur_len_ > 0; }
2093 
GetNextByChar(const Node * node,uint8_t key_char) const2094 const IcingDynamicTrie::Next *IcingDynamicTrie::GetNextByChar(
2095     const Node *node, uint8_t key_char) const {
2096   const Next *next_start = storage_->GetNext(node->next_index(), 0);
2097   const Next *next_end = next_start + (1 << node->log2_num_children());
2098 
2099   const Next *found = LowerBound(next_start, next_end, key_char);
2100   if (found >= next_end || found->val() != key_char ||
2101       found->node_index() == kInvalidNodeIndex) {
2102     return nullptr;
2103   }
2104 
2105   return found;
2106 }
2107 
LowerBound(const Next * start,const Next * end,uint8_t key_char) const2108 const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound(
2109     const Next *start, const Next *end, uint8_t key_char) const {
2110   // Above this value will use binary search instead of linear
2111   // search. 16 was chosen from running some benchmarks with
2112   // different values.
2113   static const uint32_t kBinarySearchCutoff = 16;
2114 
2115   if (end - start >= kBinarySearchCutoff) {
2116     // Binary search.
2117     Next key_next(key_char, 0);
2118     return lower_bound(start, end, key_next);
2119   } else {
2120     // Linear search.
2121     const Next *found;
2122     for (found = start; found < end; found++) {
2123       if (found->val() >= key_char) {
2124         // Should have gotten match.
2125         break;
2126       }
2127     }
2128     return found;
2129   }
2130 }
2131 
FindBestNode(const char * key,uint32_t * best_node_index,int * key_offset,bool prefix,bool utf8) const2132 void IcingDynamicTrie::FindBestNode(const char *key, uint32_t *best_node_index,
2133                                     int *key_offset, bool prefix,
2134                                     bool utf8) const {
2135   // Find the best node such that:
2136   //
2137   // - If key is NOT in the trie, key[0..key_offset) is a prefix to
2138   //   everything under best_node_index.
2139   //
2140   // - If key is in the trie, best_node_index is the leaf that points
2141   //   to the key suffix and key_offset == strlen(key).
2142   //
2143   // If prefix is true, when key is both in the trie AND a prefix
2144   // (e.g. "ab" and "abc" are in the trie), we return the intermediate
2145   // node with key as the prefix as opposed to the exactly matching
2146   // leaf node.
2147   if (storage_->empty()) {
2148     *best_node_index = 0;
2149     *key_offset = -1;
2150     return;
2151   }
2152 
2153   const Node *cur_node = storage_->GetRootNode();
2154   const char *cur_key = key;
2155   const Node *utf8_node = cur_node;
2156   const char *utf8_key = cur_key;
2157   while (!cur_node->is_leaf()) {
2158     const Next *found = GetNextByChar(cur_node, *cur_key);
2159     if (!found) break;
2160 
2161     if (prefix && found->val() == 0) {
2162       break;
2163     }
2164 
2165     cur_node = storage_->GetNode(found->node_index());
2166 
2167     // End of key.
2168     if (*cur_key == 0) {
2169       break;
2170     }
2171     cur_key++;
2172 
2173     if (utf8 && i18n_utils::IsLeadUtf8Byte(*cur_key)) {
2174       utf8_node = cur_node;
2175       utf8_key = cur_key;
2176     }
2177   }
2178 
2179   if (utf8) {
2180     // Rewind.
2181     cur_node = utf8_node;
2182     cur_key = utf8_key;
2183   }
2184 
2185   *best_node_index = storage_->GetNodeIndex(cur_node);
2186   *key_offset = reinterpret_cast<const char *>(cur_key) - key;
2187 }
2188 
FindNewBranchingPrefixLength(const char * key,bool utf8) const2189 int IcingDynamicTrie::FindNewBranchingPrefixLength(const char *key,
2190                                                    bool utf8) const {
2191   if (storage_->empty()) {
2192     return kNoBranchFound;
2193   }
2194 
2195   uint32_t best_node_index;
2196   int key_offset;
2197   FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true, utf8);
2198   const Node *cur_node = storage_->GetNode(best_node_index);
2199   const char *cur_key = key + key_offset;
2200   if (cur_node->is_leaf()) {
2201     // Prefix in the trie. Split at leaf.
2202     const char *prev_suffix = storage_->GetSuffix(cur_node->next_index());
2203     while (*prev_suffix != '\0' && *prev_suffix == *cur_key) {
2204       prev_suffix++;
2205       cur_key++;
2206     }
2207 
2208     // Equal strings? No branching.
2209     if (*prev_suffix == '\0' && *cur_key == '\0') {
2210       return kNoBranchFound;
2211     }
2212 
2213     if (utf8) {
2214       // Rewind to utf8 boundary.
2215       size_t offset = i18n_utils::SafeTruncateUtf8Length(key, cur_key - key);
2216       cur_key = key + offset;
2217     }
2218 
2219     return cur_key - key;
2220   } else if (cur_node->log2_num_children() == 0) {
2221     // Intermediate node going from no branching to branching.
2222     return cur_key - key;
2223   }
2224 
2225   // If we've reached this point, then we're already at a branch point. So there
2226   // is no *new* branch point.
2227   return kNoBranchFound;
2228 }
2229 
FindBranchingPrefixLengths(const char * key,bool utf8) const2230 std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(const char *key,
2231                                                               bool utf8) const {
2232   std::vector<int> prefix_lengths;
2233 
2234   if (storage_->empty()) {
2235     return prefix_lengths;
2236   }
2237 
2238   const Node *cur_node = storage_->GetRootNode();
2239   const char *cur_key = key;
2240   while (*cur_key && !cur_node->is_leaf()) {
2241     // Branching prefix?
2242     if (cur_node->log2_num_children() > 0) {
2243       int len = cur_key - key;
2244       if (utf8) {
2245         // Do not cut mid-utf8. Walk up to utf8 boundary.
2246         len = i18n_utils::SafeTruncateUtf8Length(key, len);
2247         if (prefix_lengths.empty() || len != prefix_lengths.back()) {
2248           prefix_lengths.push_back(len);
2249         }
2250       } else {
2251         prefix_lengths.push_back(len);
2252       }
2253     }
2254 
2255     // Move to next.
2256     const Next *found = GetNextByChar(cur_node, *cur_key);
2257     if (found == nullptr) {
2258       break;
2259     }
2260     cur_node = storage_->GetNode(found->node_index());
2261 
2262     ++cur_key;
2263   }
2264   return prefix_lengths;
2265 }
2266 
GetDebugInfo(int verbosity,std::string * out) const2267 void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
2268   Stats stats;
2269   CollectStats(&stats);
2270   out->append(stats.DumpStats(verbosity));
2271 
2272   // Property files.
2273   vector<std::string> files;
2274   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
2275                                      &files)) {
2276     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
2277         "Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
2278     return;
2279   }
2280   for (size_t i = 0; i < files.size(); i++) {
2281     IcingStringUtil::SStringAppendF(
2282         out, 1000, "Prop file %s size %" PRIu64 "\n",
2283         filesystem_->GetBasename(files[i].c_str()).c_str(),
2284         filesystem_->GetFileSize(files[i].c_str()));
2285   }
2286   IcingStringUtil::SStringAppendF(
2287       out, 1000, "Deleted file %s size %" PRIu64 "\n",
2288       filesystem_->GetBasename(deleted_bitmap_filename_.c_str()).c_str(),
2289       filesystem_->GetFileSize(deleted_bitmap_filename_.c_str()));
2290 }
2291 
min_free_fraction() const2292 double IcingDynamicTrie::min_free_fraction() const {
2293   if (!is_initialized()) {
2294     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2295   }
2296 
2297   return 1.0 - max(max(static_cast<double>(storage_->hdr().num_nodes()) /
2298                            storage_->hdr().max_nodes(),
2299                        static_cast<double>(storage_->hdr().num_nexts()) /
2300                            storage_->hdr().max_nexts()),
2301                    static_cast<double>(storage_->hdr().suffixes_size()) /
2302                        storage_->hdr().max_suffixes_size());
2303 }
2304 
value_size() const2305 uint32_t IcingDynamicTrie::value_size() const {
2306   return storage_->hdr().value_size();
2307 }
2308 
max_value_index() const2309 uint32_t IcingDynamicTrie::max_value_index() const {
2310   return storage_->hdr().max_suffixes_size();
2311 }
2312 
UpdateCrc()2313 uint32_t IcingDynamicTrie::UpdateCrc() {
2314   if (!is_initialized()) {
2315     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2316   }
2317 
2318   if (runtime_options_.storage_policy != RuntimeOptions::kMapSharedWithCrc) {
2319     return kNoCrc;
2320   }
2321 
2322   // Combine storage crc with property bitmap crcs.
2323   uint32_t crc = storage_->UpdateCrc();
2324 
2325   // Update crcs on bitmaps.
2326   for (size_t i = 0; i < property_bitmaps_.size(); ++i) {
2327     if (property_bitmaps_[i]) {
2328       // Combine property id with the bitmap crc.
2329       uint64_t this_crc = property_bitmaps_[i]->UpdateCrc();
2330       this_crc = (this_crc << 32) | i;
2331       crc = IcingStringUtil::UpdateCrc32(
2332           crc, reinterpret_cast<const char *>(&this_crc), sizeof(this_crc));
2333     }
2334   }
2335   uint32_t this_crc = deleted_bitmap_->UpdateCrc();
2336   crc = IcingStringUtil::UpdateCrc32(
2337       crc, reinterpret_cast<const char *>(&this_crc), sizeof(this_crc));
2338 
2339   return crc;
2340 }
2341 
OpenOrCreatePropertyBitmap(uint32_t property_id)2342 IcingFlashBitmap *IcingDynamicTrie::OpenOrCreatePropertyBitmap(
2343     uint32_t property_id) {
2344   if (!is_initialized()) {
2345     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2346   }
2347 
2348   if (property_id > kMaxPropertyId) {
2349     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
2350         "Property id %u out of range", property_id);
2351     return nullptr;
2352   }
2353 
2354   if (property_id >= property_bitmaps_.size()) {
2355     property_bitmaps_.resize(property_id + 1);
2356   }
2357   if (!property_bitmaps_[property_id]) {
2358     std::string filename;
2359     IcingStringUtil::SStringAppendF(
2360         &filename, property_bitmaps_prefix_.size() + 10, "%s%u",
2361         property_bitmaps_prefix_.c_str(), property_id);
2362     property_bitmaps_[property_id] =
2363         OpenAndInitBitmap(filename, false, filesystem_);
2364   }
2365   return property_bitmaps_[property_id].get();
2366 }
2367 
SetProperty(uint32_t value_index,uint32_t property_id)2368 bool IcingDynamicTrie::SetProperty(uint32_t value_index, uint32_t property_id) {
2369   IcingFlashBitmap *bitmap = OpenOrCreatePropertyBitmap(property_id);
2370   if (!bitmap) {
2371     return false;
2372   }
2373   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2374 
2375   // Also clear deleted bit.
2376   return bitmap->SetBit(idx, true) && deleted_bitmap_->SetBit(idx, false);
2377 }
2378 
ClearProperty(uint32_t value_index,uint32_t property_id)2379 bool IcingDynamicTrie::ClearProperty(uint32_t value_index,
2380                                      uint32_t property_id) {
2381   if (property_id >= property_bitmaps_.size() ||
2382       !property_bitmaps_[property_id]) {
2383     // No bitmap is ok for clearing.
2384     return true;
2385   }
2386 
2387   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2388   return property_bitmaps_[property_id]->SetBit(idx, false);
2389 }
2390 
SetDeleted(uint32_t value_index)2391 bool IcingDynamicTrie::SetDeleted(uint32_t value_index) {
2392   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2393   return deleted_bitmap_->SetBit(idx, true);
2394 }
2395 
ClearDeleted(uint32_t value_index)2396 bool IcingDynamicTrie::ClearDeleted(uint32_t value_index) {
2397   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2398   return deleted_bitmap_->SetBit(idx, false);
2399 }
2400 
2401 // Steps:
2402 // 1. Find the key in the trie.
2403 // 2. Remove the suffix and the value.
2404 // 3. Reset the nexts that point to the nodes to be removed.
2405 // 4. Sort any next array if needed.
Delete(const std::string_view key)2406 bool IcingDynamicTrie::Delete(const std::string_view key) {
2407   if (!is_initialized()) {
2408     ICING_LOG(ERROR) << "DynamicTrie not initialized";
2409     return false;
2410   }
2411 
2412   if (storage_->empty()) {
2413     // Nothing to delete.
2414     return true;
2415   }
2416 
2417   // Tries to find the key in the trie, starting from the root.
2418   const Node *current_node = storage_->GetRootNode();
2419 
2420   // The node after which we start to remove data.
2421   const Node *last_multichild_node = nullptr;
2422 
2423   // While visiting the trie nodes, we store the indices of Nexts that point
2424   // to all the nodes after last_multichild_node. Those nodes must be
2425   // consecutive and all have only one child. Resetting those Nexts means that
2426   // we remove the data of the key.
2427   std::vector<uint32_t> nexts_to_reset;
2428   nexts_to_reset.reserve(key.length());
2429 
2430   // Iterates through chars in the key, finds nodes in the trie until a leaf
2431   // node is reached. The max number of loops is key.length() + 1 because we
2432   // start from the root.
2433   for (size_t i = 0; i <= key.length(); ++i) {
2434     if (current_node->is_leaf()) {
2435       // Leaf node, now check the suffix.
2436       if (key.substr(i) != storage_->GetSuffix(current_node->next_index())) {
2437         // Key does not exist in the trie, nothing to delete.
2438         return true;
2439       }
2440       // Otherwise, key is found.
2441       break;
2442     }
2443 
2444     // Finds the next char.
2445     const Next *next;
2446     if (i == key.length()) {
2447       // When we're at the end of the key, the next char is the termination char
2448       // '\0'.
2449       next = GetNextByChar(current_node, '\0');
2450     } else {
2451       next = GetNextByChar(current_node, key[i]);
2452     }
2453 
2454     if (next == nullptr) {
2455       // Key does not exist in the trie, nothing to delete.
2456       return true;
2457     }
2458 
2459     // Checks the real size of next array.
2460     uint32_t next_array_buffer_size = 1u << current_node->log2_num_children();
2461     Next *next_array_start = storage_->GetMutableNextArray(
2462         current_node->next_index(), next_array_buffer_size);
2463     int valid_next_array_size =
2464         GetValidNextsSize(next_array_start, next_array_buffer_size);
2465     if (valid_next_array_size == 0) {
2466       // Key does not exist in the trie, nothing to delete.
2467       // This shouldn't happen, but we put a sanity check here in case something
2468       // is wrong.
2469       return true;
2470     } else if (valid_next_array_size == 1) {
2471       // Single-child branch will be deleted.
2472       nexts_to_reset.push_back(storage_->GetNextArrayIndex(next));
2473     } else {
2474       // We see a new node with multiple children, all the previously seen nodes
2475       // shouldn't be removed.
2476       last_multichild_node = current_node;
2477       nexts_to_reset.clear();
2478       nexts_to_reset.push_back(storage_->GetNextArrayIndex(next));
2479     }
2480 
2481     // Updates current_node.
2482     current_node = storage_->GetNode(next->node_index());
2483   }
2484   // Now we've found the key in the trie.
2485 
2486   ClearSuffixAndValue(current_node->next_index());
2487 
2488   // Resets nexts to remove key information.
2489   for (uint32_t next_index : nexts_to_reset) {
2490     ResetNext(next_index);
2491   }
2492   SortNextArray(last_multichild_node);
2493 
2494   return true;
2495 }
2496 
ClearPropertyForAllValues(uint32_t property_id)2497 bool IcingDynamicTrie::ClearPropertyForAllValues(uint32_t property_id) {
2498   if (!is_initialized()) {
2499     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2500   }
2501 
2502   PropertyReadersAll readers(*this);
2503   if (!readers.Exists(property_id)) {
2504     ICING_VLOG(1) << IcingStringUtil::StringPrintf(
2505         "Properties for id %u don't exist", property_id);
2506     return true;
2507   }
2508 
2509   // Mark values that have no other properties set as as deleted.
2510   uint64_t max_idx =
2511       ValueIndexToPropertyBitmapIndex(storage_->hdr().suffixes_size());
2512   // TODO(vishwajith) Inefficient to do this bit by bit, should be word by
2513   // word. Removing a corpus is likely rare enough that this is low priority.
2514   for (uint64_t i = 0; i < max_idx; ++i) {
2515     // See if the bit is set in our property map.
2516     if (readers.IsPropertyUnique(property_id, i)) {
2517       deleted_bitmap_->SetBit(i, true);
2518     }
2519   }
2520 
2521   // Now delete the bitmap file for this property.
2522   std::unique_ptr<IcingFlashBitmap> bitmap(
2523       std::move(property_bitmaps_[property_id]));
2524   // bitmap cannot be null here, because then readers.Exists(property_id) would
2525   // have returned false earlier, and we wouldn't get here.
2526   if (bitmap == nullptr) {
2527     ICING_LOG(ERROR) << "Property bitmap is null";
2528     return false;
2529   }
2530 
2531   return bitmap->Delete();
2532 }
2533 
Exists() const2534 bool IcingDynamicTrie::PropertyReaderBase::Exists() const {
2535   return bitmap_ != nullptr;
2536 }
2537 
HasProperty(uint32_t value_index) const2538 bool IcingDynamicTrie::PropertyReaderBase::HasProperty(
2539     uint32_t value_index) const {
2540   return bitmap_ &&
2541          bitmap_->GetBit(trie_.ValueIndexToPropertyBitmapIndex(value_index));
2542 }
2543 
PropertyReaderBase(const IcingDynamicTrie & trie,bool deleted,uint32_t property_id)2544 IcingDynamicTrie::PropertyReaderBase::PropertyReaderBase(
2545     const IcingDynamicTrie &trie, bool deleted, uint32_t property_id)
2546     : trie_(trie) {
2547   if (!trie.is_initialized()) {
2548     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2549   }
2550 
2551   if (deleted) {
2552     bitmap_ = trie.deleted_bitmap_.get();
2553   } else if (property_id < trie.property_bitmaps_.size()) {
2554     bitmap_ = trie.property_bitmaps_[property_id].get();
2555   } else {
2556     bitmap_ = nullptr;
2557   }
2558 }
2559 
PropertyReadersAll(const IcingDynamicTrie & trie)2560 IcingDynamicTrie::PropertyReadersAll::PropertyReadersAll(
2561     const IcingDynamicTrie &trie)
2562     : trie_(trie) {
2563   if (!trie.is_initialized()) {
2564     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2565   }
2566 }
2567 
Exists(uint32_t property_id) const2568 bool IcingDynamicTrie::PropertyReadersAll::Exists(uint32_t property_id) const {
2569   return property_id < trie_.property_bitmaps_.size() &&
2570          trie_.property_bitmaps_[property_id];
2571 }
2572 
HasProperty(uint32_t property_id,uint32_t value_index) const2573 bool IcingDynamicTrie::PropertyReadersAll::HasProperty(
2574     uint32_t property_id, uint32_t value_index) const {
2575   return property_id < trie_.property_bitmaps_.size() &&
2576          trie_.property_bitmaps_[property_id] &&
2577          trie_.property_bitmaps_[property_id]->GetBit(
2578              trie_.ValueIndexToPropertyBitmapIndex(value_index));
2579 }
2580 
IsPropertyUnique(uint32_t property_id,uint32_t value_index) const2581 bool IcingDynamicTrie::PropertyReadersAll::IsPropertyUnique(
2582     uint32_t property_id, uint32_t value_index) const {
2583   uint32_t idx = trie_.ValueIndexToPropertyBitmapIndex(value_index);
2584 
2585   // First check that value is set for the requested id.
2586   if (property_id >= trie_.property_bitmaps_.size() ||
2587       !trie_.property_bitmaps_[property_id] ||
2588       !trie_.property_bitmaps_[property_id]->GetBit(idx)) {
2589     return false;
2590   }
2591 
2592   // Now check that the value is not set for the rest.
2593   for (size_t i = 0; i < trie_.property_bitmaps_.size(); ++i) {
2594     if (i == property_id) {
2595       continue;
2596     }
2597     if (trie_.property_bitmaps_[i] && trie_.property_bitmaps_[i]->GetBit(idx)) {
2598       return false;
2599     }
2600   }
2601   return true;
2602 }
2603 
size() const2604 size_t IcingDynamicTrie::PropertyReadersAll::size() const {
2605   return trie_.property_bitmaps_.size();
2606 }
2607 
ValueIndexToPropertyBitmapIndex(uint32_t value_index) const2608 uint64_t IcingDynamicTrie::ValueIndexToPropertyBitmapIndex(
2609     uint32_t value_index) const {
2610   // We know that value indices are separated by at least 1 +
2611   // value_size() bytes (for the null terminator and the value).
2612   return value_index / (value_size() + 1);
2613 }
2614 
2615 // Testing hooks.
GetHeader(IcingDynamicTrieHeader * hdr) const2616 void IcingDynamicTrie::GetHeader(IcingDynamicTrieHeader *hdr) const {
2617   if (!is_initialized()) {
2618     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2619   }
2620 
2621   *hdr = storage_->hdr();
2622 }
2623 
SetHeader(const IcingDynamicTrieHeader & new_hdr)2624 void IcingDynamicTrie::SetHeader(const IcingDynamicTrieHeader &new_hdr) {
2625   if (!is_initialized()) {
2626     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2627   }
2628 
2629   storage_->hdr_.hdr = new_hdr;
2630   storage_->WriteHeader();
2631 }
2632 
2633 }  // namespace lib
2634 }  // namespace icing
2635