• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // We store the trie in three areas: nodes, nexts and suffixes.
16 //
17 // Nodes contain an index to a children array (kept in nexts) or to
18 // suffixes (for leaf nodes). Nexts contain children arrays of
19 // different sizes. Each child entry has the matched char and an index
20 // back into the nodes. Leaf nodes index into suffixes instead of the
21 // nexts array. Each suffix is a NULL-terminated suffix off the trie,
22 // followed by a 4-byte value associated with that key.
23 //
24 // Allocation
25 //
26 // Nodes are allocated and never removed. Nexts contain arrays of
27 // sizes in power-of-2 increments, i.e. 1, 2, 4, ..., 256. When the
28 // number of children of a node increases, it is relocated to an array
29 // with the proper size. The (smaller) unused array is added to a free
30 // list. A free list is kept for each array size. Allocations happen
31 // from the free list first, and then from the end of the nexts
32 // array. Suffixes are never freed or compacted. If a node wants to
33 // refer to a smaller suffix, it moves the pointer forward and the
34 // characters before the new pointer are wasted.
35 //
36 // Keys can contain any character except '\0'. The '\0' char is
37 // special in that it specifies an end-of-key in the child array.
38 //
39 // Ideas to try:
40 //
41 // - Put suffix index in a Next instead of creating a leaf node.
42 // - Change allocation buckets to 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, ..., 256
43 // - Compact next array
44 // - GroupVarByte and delta-encode the next array
45 // - Collapse nodes with single children
46 //
47 // Persistence
48 //
49 // We persist the trie in a binary format such that resurrecting the
50 // trie is simply a few file reads. The file is laid out as such:
51 //
52 // - Header
53 // - Nodes
54 // - Nexts
55 // - Suffixes
56 //
57 // Each section is aligned to IcingMMapper::system_page_size(). The max
58 // requested value for each array is pre-allocated in the file. When
59 // we make modifications to the arrays, we set bits in a dirty bitmap
60 // of pages. No changes get written to disk until an explicit call to
61 // Flush. Then we only write the pages that have their dirty bit set.
62 
63 #include "icing/legacy/index/icing-dynamic-trie.h"
64 
65 #include <fcntl.h>
66 #include <sys/mman.h>
67 #include <sys/stat.h>
68 #include <unistd.h>
69 
70 #include <algorithm>
71 #include <cerrno>
72 #include <cinttypes>
73 #include <cstdint>
74 #include <cstring>
75 #include <memory>
76 #include <utility>
77 
78 #include "icing/legacy/core/icing-packed-pod.h"
79 #include "icing/legacy/core/icing-string-util.h"
80 #include "icing/legacy/core/icing-timer.h"
81 #include "icing/legacy/index/icing-array-storage.h"
82 #include "icing/legacy/index/icing-filesystem.h"
83 #include "icing/legacy/index/icing-flash-bitmap.h"
84 #include "icing/legacy/index/icing-mmapper.h"
85 #include "icing/util/i18n-utils.h"
86 #include "icing/util/logging.h"
87 #include "icing/util/math-util.h"
88 
89 using std::inplace_merge;
90 using std::lower_bound;
91 using std::max;
92 using std::mismatch;
93 using std::pair;
94 using std::sort;
95 using std::vector;
96 
97 namespace icing {
98 namespace lib {
99 
100 namespace {
101 constexpr uint32_t kInvalidNodeIndex = (1U << 24) - 1;
102 constexpr uint32_t kInvalidNextIndex = ~0U;
103 
104 // Returns the number of valid nexts in the array.
GetValidNextsSize(IcingDynamicTrie::Next * next_array_start,int next_array_length)105 int GetValidNextsSize(IcingDynamicTrie::Next *next_array_start,
106                       int next_array_length) {
107   int valid_nexts_length = 0;
108   for (; valid_nexts_length < next_array_length &&
109          next_array_start[valid_nexts_length].node_index() != kInvalidNodeIndex;
110        ++valid_nexts_length) {
111   }
112   return valid_nexts_length;
113 }
114 }  // namespace
115 
116 // Based on the bit field widths.
117 const uint32_t IcingDynamicTrie::Options::kMaxNodes = (1U << 24) - 1;
118 const uint32_t IcingDynamicTrie::Options::kMaxNexts = (1U << 27) - 1;
119 const uint32_t IcingDynamicTrie::Options::kMaxSuffixesSize = 1U << 27;
120 const uint32_t IcingDynamicTrie::Options::kMaxValueSize = 1U << 16;
121 
122 const uint32_t IcingDynamicTrie::kInvalidSuffixIndex = ~0U;
123 
124 const int IcingDynamicTrie::kMaxNextArraySize;
125 const int IcingDynamicTrie::kNumNextAllocationBuckets;
126 
127 const uint32_t IcingDynamicTrie::kMaxPropertyId;
128 
129 const uint32_t IcingDynamicTrie::kInvalidValueIndex;
130 
131 const uint32_t IcingDynamicTrie::kNoCrc;
132 
133 // Manages logical node candidates while searching for possible
134 // variant matches. Currently implemented as depth first search. The
135 // max stack depth is key length * variant fanout. Since max variant
136 // fanout is 3, we don't need to worry about blowup of the depth first
137 // search stack.
138 //
139 // Keeps track of original matched string (the string actually present
140 // in the trie) for every candidate.
141 class IcingDynamicTrie::CandidateSet {
142  public:
143   struct Candidate {
144     LogicalNode logical_node;
145     const char *key;
146     int matched_prefix_len;
147     std::string matched_span;
148 
Candidateicing::lib::IcingDynamicTrie::CandidateSet::Candidate149     Candidate() {}
150 
Candidateicing::lib::IcingDynamicTrie::CandidateSet::Candidate151     Candidate(const LogicalNode &logical_node_in, const char *key_in,
152               int matched_prefix_len_in, const char *matched_span_in,
153               int matched_span_len_in)
154         : logical_node(logical_node_in),
155           key(key_in),
156           matched_prefix_len(matched_prefix_len_in),
157           matched_span(matched_span_in, matched_span_len_in) {}
158 
matched_lenicing::lib::IcingDynamicTrie::CandidateSet::Candidate159     int matched_len() const { return matched_prefix_len + matched_span.size(); }
160   };
161 
CandidateSet(bool prefix)162   explicit CandidateSet(bool prefix) : prefix_(prefix) {}
163 
IsTerminal(const char * key,uint32_t value_index) const164   bool IsTerminal(const char *key, uint32_t value_index) const {
165     // Terminal match condition:
166     //
167     // 1. Key was entirely consumed.
168     // 2. The entire suffix was consumed (hence value index is
169     //    valid). OR, we are ok with prefix matches.
170     return *key == 0 && (value_index != kInvalidValueIndex || prefix_);
171   }
172 
173   // Push a terminal or non-terminal.
Push(const LogicalNode & logical_node,const char * key,uint32_t value_index,int matched_prefix_len,const char * matched_span,int matched_span_len)174   void Push(const LogicalNode &logical_node, const char *key,
175             uint32_t value_index, int matched_prefix_len,
176             const char *matched_span, int matched_span_len) {
177     if (!AddMatchIfTerminal(key, value_index, matched_span, matched_span_len)) {
178       PushNonTerminal(logical_node, key, matched_prefix_len, matched_span,
179                       matched_span_len);
180     }
181   }
182 
AddMatchIfTerminal(const char * key,uint32_t value_index,const char * matched_span,int matched_span_len)183   bool AddMatchIfTerminal(const char *key, uint32_t value_index,
184                           const char *matched_span, int matched_span_len) {
185     if (!IsTerminal(key, value_index)) {
186       return false;
187     }
188 
189     // Terminal match.
190     matches_.push_back(OriginalMatch());
191     OriginalMatch *match = &matches_.back();
192     match->value_index = value_index;
193     match->orig.reserve(cur_prefix_.size() + matched_span_len);
194     match->orig.append(cur_prefix_).append(matched_span, matched_span_len);
195     return true;
196   }
197 
198   // Push a definite non-terminal.
PushNonTerminal(const LogicalNode & logical_node,const char * key,int matched_prefix_len,const char * matched_span,int matched_span_len)199   void PushNonTerminal(const LogicalNode &logical_node, const char *key,
200                        int matched_prefix_len, const char *matched_span,
201                        int matched_span_len) {
202     candidates_.push_back(Candidate(logical_node, key, matched_prefix_len,
203                                     matched_span, matched_span_len));
204   }
205 
Pop(Candidate * candidate)206   void Pop(Candidate *candidate) {
207     *candidate = candidates_.back();
208     if (cur_prefix_.size() < candidate->matched_prefix_len) {
209       ICING_LOG(FATAL)
210           << "Length of current prefix is smaller than length of matched "
211              "prefer, there're inconsistencies in dynamic trie.";
212     }
213 
214     cur_prefix_.resize(candidate->matched_prefix_len);
215     cur_prefix_.append(candidate->matched_span);
216     candidates_.pop_back();
217   }
218 
empty() const219   bool empty() const { return candidates_.empty(); }
220 
Release(vector<OriginalMatch> * ret)221   void Release(vector<OriginalMatch> *ret) {
222     if (!empty()) {
223       ICING_LOG(FATAL) << "Candidate set not empty before releasing matches";
224     }
225 
226     ret->swap(matches_);
227 
228     cur_prefix_.clear();
229     candidates_.clear();
230     matches_.clear();
231   }
232 
233  private:
234   const bool prefix_;
235 
236   std::string cur_prefix_;
237   vector<Candidate> candidates_;
238 
239   vector<IcingDynamicTrie::OriginalMatch> matches_;
240 };
241 
242 // Options.
is_valid() const243 bool IcingDynamicTrie::Options::is_valid() const {
244   return max_nodes <= kMaxNodes && max_nodes > 0 && max_nexts <= kMaxNexts &&
245          max_nexts > 0 && max_suffixes_size <= kMaxSuffixesSize &&
246          max_suffixes_size > 0 && value_size <= kMaxValueSize;
247 }
248 
249 // IcingDynamicTrieStorage
250 class IcingDynamicTrie::IcingDynamicTrieStorage {
251  public:
252   IcingDynamicTrieStorage(const std::string &file_basename,
253                           const RuntimeOptions &runtime_options,
254                           const IcingFilesystem *filesystem);
255   ~IcingDynamicTrieStorage();
256 
is_initialized() const257   bool is_initialized() const { return hdr_mmapper_.is_valid(); }
258 
259   bool CreateIfNotExist(const Options &options);
260   bool Init();
261   static bool Remove(const std::string &file_basename,
262                      const IcingFilesystem &filesystem);
263   bool Sync();
264   uint64_t GetDiskUsage() const;
265 
266   // Returns the size of the elements held in the trie. This excludes the size
267   // of any internal metadata of the trie, e.g. the trie's header.
268   uint64_t GetElementsFileSize() const;
269 
270   void Warm();
271 
272   void Clear();
273 
empty() const274   bool empty() const { return hdr().num_nodes() == 0; }
275 
276   // Never cast off these consts when writing to the arrays. Always
277   // use the GetMutable* helpers above.
GetNode(uint32_t idx) const278   const Node *GetNode(uint32_t idx) const {
279     return &array_storage_[NODE].array_cast<Node>()[idx];
280   }
GetRootNode() const281   const Node *GetRootNode() const { return GetNode(0); }
GetNext(uint32_t idx,int child) const282   const Next *GetNext(uint32_t idx, int child) const {
283     return &array_storage_[NEXT].array_cast<Next>()[idx + child];
284   }
GetSuffix(uint32_t idx) const285   const char *GetSuffix(uint32_t idx) const {
286     return &array_storage_[SUFFIX].array_cast<char>()[idx];
287   }
288 
GetNodeIndex(const Node * node) const289   uint32_t GetNodeIndex(const Node *node) const { return node - GetNode(0); }
GetNextArrayIndex(const Next * next) const290   uint32_t GetNextArrayIndex(const Next *next) const {
291     return next - GetNext(0, 0);
292   }
GetSuffixIndex(const char * suffix) const293   uint32_t GetSuffixIndex(const char *suffix) const {
294     return suffix - GetSuffix(0);
295   }
296 
297   // By default, nodes_, nexts_ and suffixes_ are read-only. This
298   // returns a writable element or array within and sets
299   // dirty_pages_[array_type] as a side effect, assuming the mutable
300   // area will get written to.
301   Node *GetMutableNode(uint32_t idx);
302   Next *GetMutableNextArray(uint32_t idx, uint32_t len);
303   char *GetMutableSuffix(uint32_t idx, uint32_t len);
304 
305   // Update crcs based on current contents. Returns all_crc or kNoCrc.
306   uint32_t UpdateCrc();
307 
308   // Allocators.
309   uint32_t nodes_left() const;
310   uint32_t nexts_left() const;
311   uint32_t suffixes_left() const;
312 
313   // REQUIRES: nodes_left() > 0.
314   Node *AllocNode();
315   // REQUIRES: nexts_left() >= kMaxNextArraySize.
316   Next *AllocNextArray(int size);
317   void FreeNextArray(Next *next, int log2_size);
318   // REQUIRES: suffixes_left() >= strlen(suffix) + 1 + value_size()
319   uint32_t MakeSuffix(const char *suffix, const void *value,
320                       uint32_t *value_index);
321 
hdr() const322   const IcingDynamicTrieHeader &hdr() const { return hdr_.hdr; }
323 
value_size() const324   uint32_t value_size() const { return hdr().value_size(); }
325 
326   void FillDirtyPageStats(Stats *stats) const;
327 
inc_num_keys()328   void inc_num_keys() { hdr_.hdr.set_num_keys(hdr_.hdr.num_keys() + 1); }
329 
330  private:
331   friend void IcingDynamicTrie::SetHeader(
332       const IcingDynamicTrieHeader &new_hdr);
333 
334   enum ArrayType { NODE, NEXT, SUFFIX, NUM_ARRAY_TYPES };
335 
336   // Returns all filenames that are part of the storage. First
337   // filename is the header and the rest correspond to ArrayType enum
338   // values.
339   static void GetFilenames(const std::string &file_basename,
340                            vector<std::string> *filenames);
341   static std::string GetHeaderFilename(const std::string &file_basename);
342 
343   uint32_t GetHeaderCrc() const;
344 
345   uint32_t GetAllCrc() const;
346 
347   uint32_t UpdateCrcInternal(bool write_hdr);
348 
349   // Initializes hdr_ with options and writes the resulting header to disk.
350   bool CreateNewHeader(IcingScopedFd sfd, const Options &options);
351   bool WriteHeader();
352 
353   // Header block. On-disk header block format is as follows:
354   //
355   // |serialized-header|pad|crcs|
356   // <--- system_page_size() --->
357 
358   // Wrapper for header protobuf.
359   class Header {
360     // Serialized format:
361     //
362     // magic(4)|size(4)|serialized hdr(size)
363     static const uint32_t kMagic;
364     // TODO(b/77482303) : Remove version from the IcingFlashBitmap header -
365     // magic makes it unnecessary.
366     static const uint32_t kCurVersion;
367 
368    public:
369     void Init(const Options &options);
370     bool Init(const uint8_t *buf, uint32_t buf_size);
Invalidate()371     void Invalidate() { hdr.Clear(); }
372     bool SerializeToArray(uint8_t *buf, uint32_t buf_size) const;
373     bool Verify();
374 
375     IcingDynamicTrieHeader hdr;
376   };
377 
378   std::string file_basename_;
379 
380   Header hdr_;
381 
382   IcingMMapper hdr_mmapper_;
383 
384   struct Crcs {
385     uint32_t all_crc;
386     uint32_t header_crc;
387     uint32_t array_crcs[NUM_ARRAY_TYPES];
388   };
389   Crcs *crcs_;
390 
serialized_header_max()391   static uint32_t serialized_header_max() {
392     return IcingMMapper::system_page_size() - sizeof(Crcs);
393   }
394 
395   RuntimeOptions runtime_options_;
396 
397   // Info kept about each array (NODE, NEXT, SUFFIX) to manage
398   // storage.
399   IcingScopedFd array_fds_[NUM_ARRAY_TYPES];
400   std::vector<IcingArrayStorage> array_storage_;
401 
402   // Legacy file system. Switch to use the new Filesystem class instead.
403   const IcingFilesystem *filesystem_;
404 };
405 
IcingDynamicTrieStorage(const std::string & file_basename,const RuntimeOptions & runtime_options,const IcingFilesystem * filesystem)406 IcingDynamicTrie::IcingDynamicTrieStorage::IcingDynamicTrieStorage(
407     const std::string &file_basename, const RuntimeOptions &runtime_options,
408     const IcingFilesystem *filesystem)
409     : file_basename_(file_basename),
410       hdr_mmapper_(false, MAP_SHARED),
411       crcs_(nullptr),
412       runtime_options_(runtime_options),
413       array_storage_(NUM_ARRAY_TYPES, IcingArrayStorage(*filesystem)),
414       filesystem_(filesystem) {}
415 
~IcingDynamicTrieStorage()416 IcingDynamicTrie::IcingDynamicTrieStorage::~IcingDynamicTrieStorage() {
417   if (is_initialized()) {
418     for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
419       array_storage_[i].Reset();
420     }
421   }
422 }
423 
GetFilenames(const std::string & file_basename,vector<std::string> * filenames)424 void IcingDynamicTrie::IcingDynamicTrieStorage::GetFilenames(
425     const std::string &file_basename, vector<std::string> *filenames) {
426   const char *kArrayFilenameSuffixes[NUM_ARRAY_TYPES] = {
427       ".n",
428       ".x",
429       ".s",
430   };
431 
432   filenames->clear();
433   filenames->push_back(GetHeaderFilename(file_basename));
434   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
435     filenames->push_back(file_basename + kArrayFilenameSuffixes[i]);
436   }
437 }
438 
GetHeaderFilename(const std::string & file_basename)439 std::string IcingDynamicTrie::IcingDynamicTrieStorage::GetHeaderFilename(
440     const std::string &file_basename) {
441   constexpr char kHeaderFilenameSuffix[] = ".h";
442   return file_basename + kHeaderFilenameSuffix;
443 }
444 
Init()445 bool IcingDynamicTrie::IcingDynamicTrieStorage::Init() {
446   bool init_crcs = false;
447   const bool map_shared =
448       runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc;
449 
450   // Open files.
451   vector<std::string> filenames;
452   GetFilenames(file_basename_, &filenames);
453   for (size_t i = 0; i < filenames.size(); i++) {
454     uint64_t file_size = filesystem_->GetFileSize(filenames[i].c_str());
455     if (file_size == IcingFilesystem::kBadFileSize) {
456       goto failed;
457     }
458     IcingScopedFd sfd(filesystem_->OpenForWrite(filenames[i].c_str()));
459     if (!sfd.is_valid()) {
460       goto failed;
461     }
462     // The first filename is the header and the rest correspond to ArrayType
463     // enum values. The header's fd can be closed immediately after mmapping
464     // (see b/114830334). Other files' fds are tracked in array_fds_ for later
465     // closing.
466     if (i == 0) {
467       // Header.
468       if (file_size != IcingMMapper::system_page_size()) {
469         ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
470             "Trie hdr wrong size: %" PRIu64, file_size);
471         goto failed;
472       }
473 
474       // Open hdr.
475       hdr_mmapper_.Remap(sfd.get(), 0, IcingMMapper::system_page_size());
476       if (!hdr_mmapper_.is_valid()) {
477         ICING_LOG(ERROR) << "Trie map header failed";
478         goto failed;
479       }
480     } else {
481       array_fds_[i - 1] = std::move(sfd);
482     }
483   }
484 
485   // Point crcs_ to correct region.
486   crcs_ = reinterpret_cast<Crcs *>(hdr_mmapper_.address() +
487                                    serialized_header_max());
488   if (crcs_->header_crc == kNoCrc) {
489     // Create crcs.
490     crcs_->header_crc = GetHeaderCrc();
491 
492     // Do the same for the arrays.
493     init_crcs = true;
494   } else {
495     // Verify crc.
496     if (crcs_->header_crc != GetHeaderCrc()) {
497       ICING_LOG(ERROR) << "Trie header crc failed";
498       goto failed;
499     }
500   }
501 
502   // Deserialize and verify header.
503   if (!hdr_.Init(hdr_mmapper_.address(),
504                  IcingMMapper::system_page_size() - sizeof(Crcs)) ||
505       !hdr_.Verify()) {
506     ICING_LOG(ERROR) << "Trie reading header failed";
507     goto failed;
508   }
509 
510   // We have the header set up. Now read in the arrays.
511   if (!array_storage_[NODE].Init(array_fds_[NODE].get(), 0, map_shared,
512                                  sizeof(Node), hdr_.hdr.num_nodes(),
513                                  hdr_.hdr.max_nodes(), &crcs_->array_crcs[NODE],
514                                  init_crcs)) {
515     ICING_LOG(ERROR) << "Trie mmap node failed";
516     goto failed;
517   }
518 
519   if (!array_storage_[NEXT].Init(array_fds_[NEXT].get(), 0, map_shared,
520                                  sizeof(Next), hdr_.hdr.num_nexts(),
521                                  hdr_.hdr.max_nexts(), &crcs_->array_crcs[NEXT],
522                                  init_crcs)) {
523     ICING_LOG(ERROR) << "Trie mmap next failed";
524     goto failed;
525   }
526 
527   if (!array_storage_[SUFFIX].Init(array_fds_[SUFFIX].get(), 0, map_shared,
528                                    sizeof(char), hdr_.hdr.suffixes_size(),
529                                    hdr_.hdr.max_suffixes_size(),
530                                    &crcs_->array_crcs[SUFFIX], init_crcs)) {
531     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
532         "Trie mmap suffix failed");
533     goto failed;
534   }
535 
536   // Overall crc.
537   if (init_crcs) {
538     crcs_->all_crc = GetAllCrc();
539   } else {
540     // Verify crc.
541     if (crcs_->all_crc != GetAllCrc()) {
542       ICING_LOG(ERROR) << "Trie all crc failed";
543       goto failed;
544     }
545   }
546 
547   return true;
548 
549 failed:
550   crcs_ = nullptr;
551   hdr_mmapper_.Unmap();
552   hdr_.Invalidate();
553   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
554     array_storage_[i].Reset();
555     array_fds_[i].reset();
556   }
557 
558   return false;
559 }
560 
CreateIfNotExist(const Options & options)561 bool IcingDynamicTrie::IcingDynamicTrieStorage::CreateIfNotExist(
562     const Options &options) {
563   vector<std::string> filenames;
564   GetFilenames(file_basename_, &filenames);
565 
566   // Check already exists. Just header file check is enough.
567   if (filesystem_->FileExists(filenames[0].c_str())) {
568     return true;
569   }
570 
571   // Ensure the storage directory exists
572   std::string storage_dir = filesystem_->GetDirname(filenames[0].c_str());
573   if (!filesystem_->CreateDirectoryRecursively(storage_dir.c_str())) {
574     return false;
575   }
576 
577   // Create files.
578   for (size_t i = 0; i < filenames.size(); i++) {
579     IcingScopedFd sfd(filesystem_->OpenForWrite(filenames[i].c_str()));
580     if (!sfd.is_valid()) {
581       Remove(file_basename_, *filesystem_);
582       return false;
583     }
584 
585     if (i == 0) {
586       if (!CreateNewHeader(std::move(sfd), options)) {
587         ICING_LOG(ERROR) << "Serialize trie header failed";
588         Remove(file_basename_, *filesystem_);
589         return false;
590       }
591     } else {
592       // Crcs are automatically kNoCrc so they will be initialized
593       // upon first call to Init.
594       if (!filesystem_->Truncate(*sfd, 0)) {
595         Remove(file_basename_, *filesystem_);
596         return false;
597       }
598     }
599   }
600   return true;
601 }
602 
CreateNewHeader(IcingScopedFd sfd,const Options & options)603 bool IcingDynamicTrie::IcingDynamicTrieStorage::CreateNewHeader(
604     IcingScopedFd sfd, const Options &options) {
605   ICING_VLOG(1) << "Creating header with write+sync";
606   hdr_.Init(options);
607   auto buf = std::make_unique<uint8_t[]>(IcingMMapper::system_page_size());
608   // serialized_header_max must be less than system_page_size so we don't
609   // overflow buf when serializing the header.
610   if (serialized_header_max() > IcingMMapper::system_page_size()) {
611     ICING_LOG(FATAL) << "serialized_header_max exceeds system page size";
612   }
613 
614   return hdr_.SerializeToArray(buf.get(), serialized_header_max()) &&
615          filesystem_->Write(sfd.get(), buf.get(),
616                             IcingMMapper::system_page_size()) &&
617          filesystem_->DataSync(sfd.get());
618 }
619 
Remove(const std::string & file_basename,const IcingFilesystem & filesystem)620 bool IcingDynamicTrie::IcingDynamicTrieStorage::Remove(
621     const std::string &file_basename, const IcingFilesystem &filesystem) {
622   bool success = true;
623   vector<std::string> files;
624   GetFilenames(file_basename, &files);
625   for (size_t i = 0; i < files.size(); i++) {
626     if (!filesystem.DeleteFile(files[i].c_str())) {
627       success = false;
628     }
629   }
630   return success;
631 }
632 
Warm()633 void IcingDynamicTrie::IcingDynamicTrieStorage::Warm() {
634   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
635     array_storage_[i].Warm();
636   }
637 }
638 
Clear()639 void IcingDynamicTrie::IcingDynamicTrieStorage::Clear() {
640   if (!is_initialized()) {
641     ICING_LOG(FATAL) << "DynamicTrie not initialized";
642   }
643 
644   // Clear header.
645   hdr_.hdr.set_num_nodes(0);
646   hdr_.hdr.set_num_nexts(0);
647   hdr_.hdr.set_suffixes_size(0);
648   for (int i = 0; i < hdr_.hdr.free_lists_size(); i++) {
649     hdr_.hdr.set_free_lists(i, kInvalidNextIndex);
650   }
651   hdr_.hdr.set_num_keys(0);
652 
653   // Clear array storage.
654   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
655     array_storage_[i].Clear();
656   }
657 
658   // Copy to persistence.
659   WriteHeader();
660 }
661 
Sync()662 bool IcingDynamicTrie::IcingDynamicTrieStorage::Sync() {
663   if (!is_initialized()) {
664     ICING_LOG(FATAL) << "DynamicTrie not initialized";
665   }
666 
667   uint32_t total_flushed = 0;
668   bool success = true;
669 
670   // Sync all array types.
671   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
672     total_flushed += array_storage_[i].Sync();
673     if (!filesystem_->DataSync(array_fds_[i].get())) {
674       ICING_LOG(ERROR) << "Unable to sync data for flushing";
675       success = false;
676     }
677   }
678 
679   if (!WriteHeader()) {
680     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
681         "Flushing trie header failed: %s", strerror(errno));
682     success = false;
683   }
684 
685   // Need to update CRCs before we sync the header mmap.
686   UpdateCrcInternal(false);
687 
688   // Sync header.
689   if (!hdr_mmapper_.Sync()) {
690     ICING_LOG(ERROR) << "Unable to sync trie header for flushing";
691     success = false;
692   }
693 
694   if (total_flushed > 0) {
695     ICING_VLOG(1) << IcingStringUtil::StringPrintf("Flushing %u pages of trie",
696                                                    total_flushed);
697   }
698 
699   return success;
700 }
701 
GetDiskUsage() const702 uint64_t IcingDynamicTrie::IcingDynamicTrieStorage::GetDiskUsage() const {
703   // Trie files themselves.
704   uint64_t total = 0;
705   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
706     IcingFilesystem::IncrementByOrSetInvalid(
707         filesystem_->GetDiskUsage(array_fds_[i].get()), &total);
708   }
709 
710   // Header.
711   std::string header_filename = GetHeaderFilename(file_basename_);
712   IcingFilesystem::IncrementByOrSetInvalid(
713       filesystem_->GetFileDiskUsage(header_filename.c_str()), &total);
714 
715   return total;
716 }
717 
GetElementsFileSize() const718 uint64_t IcingDynamicTrie::IcingDynamicTrieStorage::GetElementsFileSize()
719     const {
720   // Trie files themselves, exclude size of the header. These arrays are dense,
721   // not sparse, so use file size for more accurate numbers.
722   uint64_t total = 0;
723   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
724     IcingFilesystem::IncrementByOrSetInvalid(
725         filesystem_->GetFileSize(array_fds_[i].get()), &total);
726   }
727   return total;
728 }
729 
AllocNode()730 IcingDynamicTrie::Node *IcingDynamicTrie::IcingDynamicTrieStorage::AllocNode() {
731   if (nodes_left() == 0) {
732     ICING_LOG(FATAL) << "No allocated nodes left";
733   }
734 
735   hdr_.hdr.set_num_nodes(hdr_.hdr.num_nodes() + 1);
736   return GetMutableNode(hdr_.hdr.num_nodes() - 1);
737 }
738 
739 IcingDynamicTrie::Next *
AllocNextArray(int size)740 IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) {
741   if (size > kMaxNextArraySize) {
742     ICING_LOG(FATAL) << "Array size exceeds the max 'next' array size";
743   }
744 
745   if (nexts_left() < static_cast<uint32_t>(kMaxNextArraySize)) {
746     ICING_LOG(FATAL) << "'next' buffer not enough";
747   }
748 
749   // Compute ceil(log2(size)).
750   int log2_size = 0;
751   while ((1 << log2_size) < size) log2_size++;
752   // Note: size <= aligned_size <= kMaxNextArraySize
753   int aligned_size = 1 << log2_size;
754 
755   // Look in free list.
756   Next *ret;
757   if (hdr_.hdr.free_lists(log2_size) != kInvalidNextIndex) {
758     ret = GetMutableNextArray(hdr_.hdr.free_lists(log2_size), aligned_size);
759     uint32_t next_link = ret->next_index();
760     if (next_link != kInvalidNextIndex && next_link >= hdr_.hdr.max_nexts()) {
761       ICING_LOG(FATAL) << "'next' index is out of range";
762     }
763     hdr_.hdr.set_free_lists(log2_size, next_link);
764   } else {
765     // Allocate a new one.
766     ret = GetMutableNextArray(hdr_.hdr.num_nexts(), aligned_size);
767     hdr_.hdr.set_num_nexts(hdr_.hdr.num_nexts() + aligned_size);
768   }
769 
770   // Fill with char 0xff so we are sorted properly.
771   for (int i = 0; i < aligned_size; i++) {
772     ret[i].set_val(0xff);
773     ret[i].set_node_index(kInvalidNodeIndex);
774   }
775   return ret;
776 }
777 
FreeNextArray(Next * next,int log2_size)778 void IcingDynamicTrie::IcingDynamicTrieStorage::FreeNextArray(Next *next,
779                                                               int log2_size) {
780   if (GetNextArrayIndex(next) + (1 << log2_size) > hdr_.hdr.max_nexts()) {
781     ICING_LOG(FATAL) << "'next' array is out of range";
782   }
783 
784   // Put it in free list.
785   next->set_next_index(hdr_.hdr.free_lists(log2_size));
786   hdr_.hdr.set_free_lists(log2_size, GetNextArrayIndex(next));
787 }
788 
MakeSuffix(const char * suffix,const void * value,uint32_t * value_index)789 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::MakeSuffix(
790     const char *suffix, const void *value, uint32_t *value_index) {
791   int suffix_len = strlen(suffix);
792   if (suffixes_left() < suffix_len + 1 + value_size()) {
793     ICING_LOG(FATAL) << "'suffix' buffer not enough";
794   }
795 
796   char *start =
797       GetMutableSuffix(hdr_.hdr.suffixes_size(), suffix_len + 1 + value_size());
798   memcpy(start, suffix, suffix_len + 1);
799   memcpy(start + suffix_len + 1, value, value_size());
800   if (value_index) *value_index = GetSuffixIndex(start + suffix_len + 1);
801   hdr_.hdr.set_suffixes_size(hdr_.hdr.suffixes_size() + suffix_len + 1 +
802                              value_size());
803 
804   return GetSuffixIndex(start);
805 }
806 
GetHeaderCrc() const807 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::GetHeaderCrc() const {
808   return IcingStringUtil::UpdateCrc32(
809       0, reinterpret_cast<const char *>(hdr_mmapper_.address()),
810       serialized_header_max());
811 }
812 
GetAllCrc() const813 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::GetAllCrc() const {
814   // Append array crcs to header crc.
815   return IcingStringUtil::UpdateCrc32(
816       crcs_->header_crc, reinterpret_cast<const char *>(crcs_->array_crcs),
817       sizeof(crcs_->array_crcs));
818 }
819 
UpdateCrc()820 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrc() {
821   return UpdateCrcInternal(true);
822 }
823 
UpdateCrcInternal(bool write_hdr)824 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrcInternal(
825     bool write_hdr) {
826   if (write_hdr && !WriteHeader()) {
827     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
828         "Flushing trie header failed: %s", strerror(errno));
829   }
830 
831   crcs_->header_crc = GetHeaderCrc();
832 
833   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
834     array_storage_[i].UpdateCrc();
835   }
836 
837   crcs_->all_crc = GetAllCrc();
838 
839   return crcs_->all_crc;
840 }
841 
WriteHeader()842 bool IcingDynamicTrie::IcingDynamicTrieStorage::WriteHeader() {
843   return hdr_.SerializeToArray(hdr_mmapper_.address(), serialized_header_max());
844 }
845 
846 IcingDynamicTrie::Node *
GetMutableNode(uint32_t idx)847 IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableNode(uint32_t idx) {
848   return array_storage_[NODE].GetMutableMem<Node>(idx, 1);
849 }
850 
851 IcingDynamicTrie::Next *
GetMutableNextArray(uint32_t idx,uint32_t len)852 IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableNextArray(uint32_t idx,
853                                                                uint32_t len) {
854   return array_storage_[NEXT].GetMutableMem<Next>(idx, len);
855 }
856 
GetMutableSuffix(uint32_t idx,uint32_t len)857 char *IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableSuffix(
858     uint32_t idx, uint32_t len) {
859   return array_storage_[SUFFIX].GetMutableMem<char>(idx, len);
860 }
861 
862 // Header functions.
863 const uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::Header::kMagic =
864     0x6dfba6ae;
865 // For future revisions, this should be synced with global index version.
866 // See comments on Upgrade() in native-index-impl.h for versioning.
867 const uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::Header::kCurVersion =
868     4;
869 
Init(const IcingDynamicTrie::Options & options)870 void IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
871     const IcingDynamicTrie::Options &options) {
872   hdr.Clear();
873 
874   hdr.set_version(kCurVersion);
875   hdr.set_max_nodes(options.max_nodes);
876   hdr.set_max_nexts(options.max_nexts);
877   hdr.set_max_suffixes_size(options.max_suffixes_size);
878   hdr.set_value_size(options.value_size);
879 
880   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
881     hdr.add_free_lists(kInvalidNextIndex);
882   }
883 }
884 
Init(const uint8_t * buf,uint32_t buf_size)885 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
886     const uint8_t *buf, uint32_t buf_size) {
887   // Check magic and length.
888   if (buf_size <= sizeof(kMagic) + sizeof(uint32_t)) {
889     ICING_LOG(ERROR) << "Trie header too short";
890     return false;
891   }
892 
893   uint32_t magic;
894   memcpy(&magic, buf, sizeof(magic));
895   if (magic != kMagic) {
896     ICING_LOG(ERROR) << "Trie header magic mismatch";
897     return false;
898   }
899   uint32_t len;
900   memcpy(&len, buf + sizeof(magic), sizeof(len));
901   if (len > buf_size - sizeof(magic) - sizeof(len)) {
902     ICING_LOG(ERROR) << "Trie header too short";
903     return false;
904   }
905 
906   return hdr.ParseFromArray(buf + sizeof(magic) + sizeof(len), len);
907 }
908 
SerializeToArray(uint8_t * buf,uint32_t buf_size) const909 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::SerializeToArray(
910     uint8_t *buf, uint32_t buf_size) const {
911   uint32_t size = hdr.ByteSizeLong();
912   if (size + sizeof(kMagic) + sizeof(uint32_t) > buf_size) return false;
913   memcpy(buf, &kMagic, sizeof(kMagic));
914   memcpy(buf + sizeof(kMagic), &size, sizeof(uint32_t));
915   hdr.SerializeWithCachedSizesToArray(buf + sizeof(kMagic) + sizeof(uint32_t));
916   return true;
917 }
918 
Verify()919 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Verify() {
920   // Check version.
921   if (hdr.version() != kCurVersion) {
922     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
923         "Trie version %u mismatch", hdr.version());
924     return false;
925   }
926 
927   // Check that indices in hdr are within bounds. Note that this is
928   // not a comprehensive integrity check for the entire trie.
929   if (hdr.num_nodes() > hdr.max_nodes() || hdr.num_nexts() > hdr.max_nexts() ||
930       hdr.suffixes_size() > hdr.max_suffixes_size() ||
931       hdr.value_size() >= hdr.max_suffixes_size()) {
932     ICING_LOG(ERROR) << "Trie header array size out of bounds";
933     return false;
934   }
935 
936   if (hdr.free_lists_size() != kNumNextAllocationBuckets) {
937     ICING_LOG(ERROR) << "Bad number of free lists";
938     return false;
939   }
940 
941   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
942     if (hdr.free_lists(i) != kInvalidNextIndex &&
943         hdr.free_lists(i) >= hdr.max_nexts()) {
944       ICING_LOG(ERROR) << "Free list index out of bounds";
945       return false;
946     }
947   }
948 
949   return true;
950 }
951 
nodes_left() const952 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::nodes_left() const {
953   return hdr_.hdr.max_nodes() - hdr_.hdr.num_nodes();
954 }
955 
nexts_left() const956 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::nexts_left() const {
957   return hdr_.hdr.max_nexts() - hdr_.hdr.num_nexts();
958 }
959 
suffixes_left() const960 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::suffixes_left() const {
961   return hdr_.hdr.max_suffixes_size() - hdr_.hdr.suffixes_size();
962 }
963 
FillDirtyPageStats(Stats * stats) const964 void IcingDynamicTrie::IcingDynamicTrieStorage::FillDirtyPageStats(
965     Stats *stats) const {
966   stats->dirty_pages_nodes = array_storage_[NODE].num_dirty_pages();
967   stats->dirty_pages_nexts = array_storage_[NEXT].num_dirty_pages();
968   stats->dirty_pages_suffixes = array_storage_[SUFFIX].num_dirty_pages();
969 }
970 
971 // Dumper.
972 class IcingDynamicTrie::Dumper {
973  public:
Dumper(const IcingDynamicTrie & trie)974   explicit Dumper(const IcingDynamicTrie &trie)
975       : all_props_(trie), del_prop_(trie), storage_(trie.storage_.get()) {}
976 
Dump(std::ostream * pretty_print,vector<std::string> * keys) const977   void Dump(std::ostream *pretty_print, vector<std::string> *keys) const {
978     if (storage_->empty()) {
979       *pretty_print << "(empty)\n";
980     } else {
981       DumpNodeRecursive("", *storage_->GetRootNode(), 0, pretty_print, keys);
982     }
983   }
984 
985  private:
SuffixToValueAsString(const char * suffix) const986   std::string SuffixToValueAsString(const char *suffix) const {
987     int suffix_len = strlen(suffix);
988     std::string ret;
989     ret.reserve(storage_->value_size() * 2);
990     for (uint32_t i = 0; i < storage_->value_size(); i++) {
991       IcingStringUtil::SStringAppendF(&ret, 10, "%02x",
992                                       suffix[suffix_len + 1 + i]);
993     }
994 
995     // Now dump set properties.
996     uint32_t value_index = storage_->GetSuffixIndex(suffix + suffix_len + 1);
997     if (del_prop_.HasProperty(value_index)) {
998       ret += " (deleted)";
999     }
1000     ret += " [";
1001     for (size_t i = 0; i < all_props_.size(); i++) {
1002       if (all_props_.HasProperty(i, value_index)) {
1003         IcingStringUtil::SStringAppendF(&ret, 10, "%zu", i);
1004       }
1005     }
1006     ret += ']';
1007 
1008     return ret;
1009   }
1010 
1011   // Inputs:
1012   //   prefix - the key prefix of the current node (so we can rebuild the key)
1013   //   node - the node we're at
1014   //   level - how many levels deep we are in the trie
1015   //   ret - the stream to pretty print to
1016   //   keys - the keys encountered are appended to this
DumpNodeRecursive(const std::string & prefix,const Node & node,int level,std::ostream * ret,vector<std::string> * keys) const1017   void DumpNodeRecursive(const std::string &prefix, const Node &node, int level,
1018                          std::ostream *ret, vector<std::string> *keys) const {
1019     if (node.is_leaf()) {
1020       // Dump suffix and value.
1021       for (int i = 0; i < level; i++) {
1022         *ret << ' ';
1023       }
1024       const char *suffix = storage_->GetSuffix(node.next_index());
1025       *ret << suffix;
1026       *ret << ' ';
1027       *ret << SuffixToValueAsString(suffix);
1028       *ret << '\n';
1029       keys->push_back(prefix + suffix);
1030     } else {
1031       // Go through each child (next) node. Print char and recursively
1032       // print trie underneath.
1033       for (uint32_t i = 0; i < (1U << node.log2_num_children()); i++) {
1034         const Next &next = *storage_->GetNext(node.next_index(), i);
1035         if (next.node_index() == kInvalidNodeIndex) break;
1036         for (int j = 0; j < level; j++) {
1037           *ret << ' ';
1038         }
1039         std::string new_prefix = prefix;
1040         if (next.val()) {
1041           *ret << static_cast<char>(next.val());
1042           new_prefix += next.val();
1043         } else {
1044           *ret << "null";
1045         }
1046         *ret << '\n';
1047         DumpNodeRecursive(new_prefix, *storage_->GetNode(next.node_index()),
1048                           level + 1, ret, keys);
1049       }
1050     }
1051   }
1052 
1053   PropertyReadersAll all_props_;
1054   PropertyDeletedReader del_prop_;
1055   const IcingDynamicTrie::IcingDynamicTrieStorage *storage_;
1056 };
1057 
1058 // IcingDynamicTrie.
IcingDynamicTrie(const std::string & filename_base,const RuntimeOptions & runtime_options,const IcingFilesystem * filesystem)1059 IcingDynamicTrie::IcingDynamicTrie(const std::string &filename_base,
1060                                    const RuntimeOptions &runtime_options,
1061                                    const IcingFilesystem *filesystem)
1062     : IIcingStorage(),
1063       filename_base_(filename_base),
1064       is_initialized_(false),
1065       runtime_options_(runtime_options),
1066       storage_(nullptr),
1067       property_bitmaps_prefix_(filename_base_ + ".prop."),
1068       deleted_bitmap_filename_(filename_base_ + ".deleted"),
1069       deleted_bitmap_(nullptr),
1070       filesystem_(filesystem) {}
1071 
~IcingDynamicTrie()1072 IcingDynamicTrie::~IcingDynamicTrie() { Close(); }
1073 
Init()1074 bool IcingDynamicTrie::Init() {
1075   if (is_initialized_) return true;
1076 
1077   if (storage_ != nullptr) {
1078     ICING_LOG(FATAL) << "Storage is not null before initialization";
1079   }
1080 
1081   storage_ = std::make_unique<IcingDynamicTrieStorage>(
1082       filename_base_, runtime_options_, filesystem_);
1083   if (!storage_->Init() || !InitPropertyBitmaps()) {
1084     storage_.reset();
1085     return false;
1086   }
1087   is_initialized_ = true;
1088   return true;
1089 }
1090 
CreateIfNotExist(const Options & options)1091 bool IcingDynamicTrie::CreateIfNotExist(const Options &options) {
1092   // Initialized means exists.
1093   if (is_initialized_) return true;
1094 
1095   if (!options.is_valid()) {
1096     ICING_LOG(ERROR) << "Trie options invalid";
1097     return false;
1098   }
1099 
1100   auto storage = std::make_unique<IcingDynamicTrieStorage>(
1101       filename_base_, runtime_options_, filesystem_);
1102   return storage->CreateIfNotExist(options);
1103 }
1104 
Close()1105 void IcingDynamicTrie::Close() {
1106   if (!is_initialized_) return;
1107 
1108   UpdateCrc();
1109 
1110   storage_.reset();
1111   property_bitmaps_.clear();
1112   deleted_bitmap_.reset();
1113   is_initialized_ = false;
1114 }
1115 
Remove()1116 bool IcingDynamicTrie::Remove() {
1117   if (is_initialized()) {
1118     Close();
1119   }
1120 
1121   bool success = true;
1122 
1123   // Remove storage files.
1124   if (!IcingDynamicTrieStorage::Remove(filename_base_, *filesystem_)) {
1125     success = false;
1126   }
1127 
1128   // Also remove property bitmaps.
1129   vector<std::string> files;
1130   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
1131                                      &files)) {
1132     return false;
1133   }
1134   for (size_t i = 0; i < files.size(); i++) {
1135     if (!filesystem_->DeleteFile(files[i].c_str())) success = false;
1136   }
1137   // And deleted bitmap.
1138   if (!filesystem_->DeleteFile(deleted_bitmap_filename_.c_str()))
1139     success = false;
1140 
1141   return success;
1142 }
1143 
Sync()1144 bool IcingDynamicTrie::Sync() {
1145   if (!is_initialized_) {
1146     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1147   }
1148 
1149   bool success = true;
1150   IcingTimer timer;
1151 
1152   // Sync property bitmaps.
1153   for (size_t i = 0; i < property_bitmaps_.size(); i++) {
1154     if (property_bitmaps_[i]) {
1155       if (!property_bitmaps_[i]->Sync()) success = false;
1156     }
1157   }
1158   if (!deleted_bitmap_->Sync()) success = false;
1159 
1160   // Sync storage.
1161   if (!storage_->Sync()) success = false;
1162 
1163   Warm();
1164 
1165   ICING_VLOG(1) << IcingStringUtil::StringPrintf(
1166       "Syncing dynamic trie %s took %.3fms", filename_base_.c_str(),
1167       timer.Elapsed() * 1000.);
1168 
1169   return success;
1170 }
1171 
GetDiskUsage() const1172 uint64_t IcingDynamicTrie::GetDiskUsage() const {
1173   uint64_t total = 0;
1174   // Property bitmaps.
1175   IcingFilesystem::IncrementByOrSetInvalid(deleted_bitmap_->GetDiskUsage(),
1176                                            &total);
1177 
1178   for (auto &bitmap : property_bitmaps_) {
1179     if (bitmap == nullptr) continue;
1180     IcingFilesystem::IncrementByOrSetInvalid(bitmap->GetDiskUsage(), &total);
1181   }
1182 
1183   // Storage.
1184   IcingFilesystem::IncrementByOrSetInvalid(storage_->GetDiskUsage(), &total);
1185   return total;
1186 }
1187 
GetElementsSize() const1188 uint64_t IcingDynamicTrie::GetElementsSize() const {
1189   uint64_t total = 0;
1190 
1191   // Bitmaps are sparsely populated, so disk usage is more accurate for those.
1192   // Property bitmaps.
1193   IcingFilesystem::IncrementByOrSetInvalid(deleted_bitmap_->GetDiskUsage(),
1194                                            &total);
1195   // The deleted bitmap is always initially grown to kGrowSize, whether there
1196   // are elements or not. So even if there are no elements in the trie, we'll
1197   // still have the bitmap of size kGrowSize, so subtract that from the size of
1198   // the trie's elements.
1199   total -= IcingFlashBitmap::kGrowSize;
1200 
1201   for (auto &bitmap : property_bitmaps_) {
1202     if (bitmap == nullptr) continue;
1203     IcingFilesystem::IncrementByOrSetInvalid(bitmap->GetDiskUsage(), &total);
1204   }
1205 
1206   // Storage. We can use file size here since the storage files aren't sparse.
1207   IcingFilesystem::IncrementByOrSetInvalid(storage_->GetElementsFileSize(),
1208                                            &total);
1209   return total;
1210 }
1211 
OpenAndInitBitmap(const std::string & filename,bool verify,const IcingFilesystem * filesystem)1212 std::unique_ptr<IcingFlashBitmap> IcingDynamicTrie::OpenAndInitBitmap(
1213     const std::string &filename, bool verify,
1214     const IcingFilesystem *filesystem) {
1215   auto bitmap = std::make_unique<IcingFlashBitmap>(filename, filesystem);
1216   if (!bitmap->Init() || (verify && !bitmap->Verify())) {
1217     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Init of %s failed",
1218                                                       filename.c_str());
1219     return nullptr;
1220   }
1221   return bitmap;
1222 }
1223 
InitPropertyBitmaps()1224 bool IcingDynamicTrie::InitPropertyBitmaps() {
1225   // Only called on init.
1226   if (!property_bitmaps_.empty()) {
1227     ICING_LOG(FATAL) << "Property bitmaps not empty before initialization";
1228   }
1229 
1230   if (deleted_bitmap_ != nullptr) {
1231     ICING_LOG(FATAL) << "Deleted bitmap not null before initialization";
1232   }
1233 
1234   // Truncate property bitmap files at current value index. Last value
1235   // is at suffixes_size - value_size(). We want to clear everything
1236   // after that.
1237   uint64_t truncate_idx =
1238       storage_->hdr().suffixes_size() > 0
1239           ? ValueIndexToPropertyBitmapIndex(storage_->hdr().suffixes_size() -
1240                                             value_size()) +
1241                 1
1242           : 0;
1243 
1244   // Discover property bitmaps by scanning the dir.
1245   vector<std::string> files;
1246   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
1247                                      &files)) {
1248     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
1249         "Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
1250     goto failed;
1251   }
1252   for (size_t i = 0; i < files.size(); i++) {
1253     // Decode property id from filename.
1254     size_t property_id_start_idx = files[i].rfind('.');
1255     if (property_id_start_idx == std::string::npos) {
1256       ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
1257                                                         files[i].c_str());
1258       continue;
1259     }
1260     property_id_start_idx++;  // skip dot
1261     char *end;
1262     uint32_t property_id =
1263         strtol(files[i].c_str() + property_id_start_idx, &end, 10);  // NOLINT
1264     if (!end || end != (files[i].c_str() + files[i].size())) {
1265       ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
1266                                                         files[i].c_str());
1267       continue;
1268     }
1269     std::unique_ptr<IcingFlashBitmap> bitmap = OpenAndInitBitmap(
1270         files[i],
1271         runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
1272         filesystem_);
1273     if (!bitmap) {
1274       ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
1275           "Open prop bitmap failed: %s", files[i].c_str());
1276       goto failed;
1277     }
1278     bitmap->Truncate(truncate_idx);
1279     if (property_id >= property_bitmaps_.size()) {
1280       property_bitmaps_.resize(property_id + 1);
1281     }
1282     property_bitmaps_[property_id] = std::move(bitmap);
1283   }
1284 
1285   deleted_bitmap_ = OpenAndInitBitmap(
1286       deleted_bitmap_filename_,
1287       runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
1288       filesystem_);
1289   if (!deleted_bitmap_) {
1290     goto failed;
1291   }
1292   deleted_bitmap_->Truncate(truncate_idx);
1293 
1294   return true;
1295 
1296 failed:
1297   property_bitmaps_.clear();
1298   deleted_bitmap_.reset();
1299   return false;
1300 }
1301 
Warm() const1302 void IcingDynamicTrie::Warm() const {
1303   if (!is_initialized()) {
1304     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1305   }
1306 
1307   return storage_->Warm();
1308 }
1309 
OnSleep()1310 void IcingDynamicTrie::OnSleep() {
1311   if (!is_initialized()) {
1312     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1313   }
1314 
1315   // Update crcs so we can verify when we come back.
1316   UpdateCrc();
1317 }
1318 
~NewValueMap()1319 IcingDynamicTrie::NewValueMap::~NewValueMap() {}
1320 
Compact(const NewValueMap & old_tvi_to_new_value,IcingDynamicTrie * out,std::unordered_map<uint32_t,uint32_t> * old_to_new_tvi) const1321 bool IcingDynamicTrie::Compact(
1322     const NewValueMap &old_tvi_to_new_value, IcingDynamicTrie *out,
1323     std::unordered_map<uint32_t, uint32_t> *old_to_new_tvi) const {
1324   if (old_to_new_tvi == nullptr) {
1325     ICING_LOG(ERROR) << "TVI is null";
1326   }
1327 
1328   if (!is_initialized()) {
1329     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1330   }
1331 
1332   PropertyReadersAll prop_readers(*this);
1333 
1334   old_to_new_tvi->clear();
1335   old_to_new_tvi->rehash(size() * 2);
1336 
1337   for (Iterator it_all(*this, ""); it_all.IsValid(); it_all.Advance()) {
1338     uint32_t value_index = it_all.GetValueIndex();
1339     const void *new_value = old_tvi_to_new_value.GetNewValue(value_index);
1340     if (!new_value) continue;
1341 
1342     uint32_t new_value_index;
1343     if (!out->Insert(it_all.GetKey(), new_value, &new_value_index, false)) {
1344       return false;
1345     }
1346 
1347     old_to_new_tvi->insert({value_index, new_value_index});
1348 
1349     // Copy properties.
1350     for (size_t i = 0; i < prop_readers.size(); i++) {
1351       if (prop_readers.HasProperty(i, value_index)) {
1352         if (!out->SetProperty(new_value_index, i)) {
1353           // Ouch. We need to bail.
1354           return false;
1355         }
1356       }
1357     }
1358   }
1359 
1360   return true;
1361 }
1362 
size() const1363 uint32_t IcingDynamicTrie::size() const {
1364   if (!is_initialized()) {
1365     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1366   }
1367   return storage_->hdr().num_keys();
1368 }
1369 
CollectStatsRecursive(const Node & node,Stats * stats,uint32_t depth) const1370 void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats,
1371                                              uint32_t depth) const {
1372   if (node.is_leaf()) {
1373     stats->num_leaves++;
1374     stats->sum_depth += depth;
1375     stats->max_depth = max(stats->max_depth, depth);
1376     const char *suffix = storage_->GetSuffix(node.next_index());
1377     stats->suffixes_used += strlen(suffix) + 1 + value_size();
1378     if (!suffix[0]) {
1379       stats->null_suffixes++;
1380     }
1381   } else {
1382     stats->num_intermediates++;
1383     uint32_t i = 0;
1384     for (; i < (1U << node.log2_num_children()); i++) {
1385       const Next &next = *storage_->GetNext(node.next_index(), i);
1386       if (next.node_index() == kInvalidNodeIndex) break;
1387       CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats,
1388                             depth + 1);
1389     }
1390 
1391     // At least one valid node in each next array
1392     if (i == 0) {
1393       ICING_LOG(FATAL) << "No valid node in 'next' array";
1394     }
1395     stats->sum_children += i;
1396     stats->max_children = max(stats->max_children, i);
1397 
1398     stats->child_counts[i - 1]++;
1399     stats->wasted[node.log2_num_children()] +=
1400         (1 << node.log2_num_children()) - i;
1401     stats->total_wasted += (1 << node.log2_num_children()) - i;
1402   }
1403 }
1404 
CollectStats(Stats * stats) const1405 void IcingDynamicTrie::CollectStats(Stats *stats) const {
1406   if (!is_initialized()) {
1407     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1408   }
1409 
1410   memset(stats, 0, sizeof(*stats));
1411 
1412   stats->num_keys = storage_->hdr().num_keys();
1413   stats->num_nodes = storage_->hdr().num_nodes();
1414   stats->max_nodes = storage_->hdr().max_nodes();
1415   stats->num_nexts = storage_->hdr().num_nexts();
1416   stats->max_nexts = storage_->hdr().max_nexts();
1417   stats->suffixes_size = storage_->hdr().suffixes_size();
1418   stats->max_suffixes_size = storage_->hdr().max_suffixes_size();
1419 
1420   // Stats collected from traversing the trie.
1421   if (!storage_->empty()) {
1422     CollectStatsRecursive(*storage_->GetRootNode(), stats);
1423   }
1424 
1425   // Free-list stats.
1426   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1427     for (uint32_t cur = storage_->hdr().free_lists(i); cur != kInvalidNextIndex;
1428          cur = storage_->GetNext(cur, 0)->next_index()) {
1429       stats->num_free[i]++;
1430     }
1431     stats->total_free += stats->num_free[i] * (1 << i);
1432   }
1433 
1434   // Dirty page counts.
1435   storage_->FillDirtyPageStats(stats);
1436 }
1437 
DumpStats(int verbosity) const1438 std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const {
1439   std::string ret;
1440   IcingStringUtil::SStringAppendF(
1441       &ret, 0,
1442       "Keys %u "
1443       "Nodes (%u/%u) %.3f%% "
1444       "Nexts (%u/%u) %.3f%% "
1445       "Suffixes (%u/%u) %.3f%%\n",
1446       num_keys, num_nodes, max_nodes,
1447       100. * math_util::SafeDivide(num_nodes, max_nodes), num_nexts, max_nexts,
1448       100. * math_util::SafeDivide(num_nexts, max_nexts), suffixes_size,
1449       max_suffixes_size,
1450       100. * math_util::SafeDivide(suffixes_size, max_suffixes_size));
1451 
1452   if (verbosity > 0) {
1453     for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1454       if (num_free[i] > 0) {
1455         IcingStringUtil::SStringAppendF(&ret, 0, "Freelist@%d: %u\n", 1 << i,
1456                                         num_free[i]);
1457       }
1458     }
1459     IcingStringUtil::SStringAppendF(
1460         &ret, 0, "Freelist total: %u/%u %.3f%%\n", total_free, num_nexts,
1461         100. * math_util::SafeDivide(total_free, num_nexts));
1462 
1463     for (int i = 0; i < 256; i++) {
1464       if (child_counts[i] > 0) {
1465         IcingStringUtil::SStringAppendF(&ret, 0, "Child count@%d: %u\n", i + 1,
1466                                         child_counts[i]);
1467       }
1468     }
1469     for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1470       IcingStringUtil::SStringAppendF(&ret, 0, "Wasted@%d: %u\n", 1 << i,
1471                                       wasted[i]);
1472     }
1473     IcingStringUtil::SStringAppendF(
1474         &ret, 0,
1475         "Wasted total: %u\n"
1476         "Num intermediates %u num leaves %u "
1477         "suffixes used %u null %u\n"
1478         "avg and max children for intermediates: %.3f, %u\n"
1479         "avg and max depth for leaves: %.3f, %u\n"
1480         "Total next frag: %.3f%%\n",
1481         total_wasted, num_intermediates, num_leaves, suffixes_used,
1482         null_suffixes, 1. * sum_children / num_intermediates, max_children,
1483         1. * sum_depth / num_leaves, max_depth,
1484         100. * math_util::SafeDivide((total_free + total_wasted), num_nexts));
1485   }
1486   IcingStringUtil::SStringAppendF(
1487       &ret, 0, "Memory usage: %zu/%zu bytes\n",
1488       num_nodes * sizeof(Node) + num_nexts * sizeof(Next) + suffixes_size,
1489       max_nodes * sizeof(Node) + max_nexts * sizeof(Next) + max_suffixes_size);
1490 
1491   IcingStringUtil::SStringAppendF(
1492       &ret, 0, "Dirty pages: nodes %u/%.0f nexts %u/%.0f suffixes %u/%.0f\n",
1493       dirty_pages_nodes,
1494       math_util::SafeDivide(num_nodes * sizeof(Node) + getpagesize() - 1,
1495                             getpagesize()),
1496       dirty_pages_nexts,
1497       math_util::SafeDivide(num_nexts * sizeof(Next) + getpagesize() - 1,
1498                             getpagesize()),
1499       dirty_pages_suffixes,
1500       math_util::SafeDivide(suffixes_size + getpagesize() - 1, getpagesize()));
1501 
1502   return ret;
1503 }
1504 
DumpTrie(std::ostream * pretty_print,vector<std::string> * keys) const1505 void IcingDynamicTrie::DumpTrie(std::ostream *pretty_print,
1506                                 vector<std::string> *keys) const {
1507   if (!is_initialized()) {
1508     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1509   }
1510 
1511   Dumper dumper(*this);
1512   dumper.Dump(pretty_print, keys);
1513 }
1514 
Clear()1515 void IcingDynamicTrie::Clear() {
1516   if (!is_initialized()) {
1517     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1518   }
1519 
1520   storage_->Clear();
1521   for (auto &bitmap : property_bitmaps_) {
1522     if (bitmap) {
1523       bitmap->Delete();
1524       bitmap.reset();
1525     }
1526   }
1527   deleted_bitmap_->Truncate(0);
1528 }
1529 
ClearSuffixAndValue(uint32_t suffix_value_index)1530 bool IcingDynamicTrie::ClearSuffixAndValue(uint32_t suffix_value_index) {
1531   // The size 1 below is for a '\0' between the suffix and the value.
1532   size_t suffix_and_value_length =
1533       strlen(this->storage_->GetSuffix(suffix_value_index)) + 1 +
1534       this->value_size();
1535   char *mutable_suffix_and_value = this->storage_->GetMutableSuffix(
1536       suffix_value_index, suffix_and_value_length);
1537 
1538   if (mutable_suffix_and_value == nullptr) {
1539     return false;
1540   }
1541 
1542   memset(mutable_suffix_and_value, 0, suffix_and_value_length);
1543   return true;
1544 }
1545 
ResetNext(uint32_t next_index)1546 bool IcingDynamicTrie::ResetNext(uint32_t next_index) {
1547   Next *mutable_next =
1548       this->storage_->GetMutableNextArray(next_index, /*len=*/1);
1549 
1550   if (mutable_next == nullptr) {
1551     return false;
1552   }
1553 
1554   mutable_next->set_val(0);
1555   mutable_next->set_node_index(kInvalidNodeIndex);
1556   return true;
1557 }
1558 
SortNextArray(const Node * node)1559 bool IcingDynamicTrie::SortNextArray(const Node *node) {
1560   if (node == nullptr) {
1561     // Nothing to sort, return success directly.
1562     return true;
1563   }
1564 
1565   uint32_t next_array_buffer_size = 1u << node->log2_num_children();
1566   Next *next_array_start = this->storage_->GetMutableNextArray(
1567       node->next_index(), next_array_buffer_size);
1568 
1569   if (next_array_start == nullptr) {
1570     return false;
1571   }
1572 
1573   std::sort(next_array_start, next_array_start + next_array_buffer_size - 1);
1574   return true;
1575 }
1576 
Insert(const char * key,const void * value,uint32_t * value_index,bool replace,bool * pnew_key)1577 bool IcingDynamicTrie::Insert(const char *key, const void *value,
1578                               uint32_t *value_index, bool replace,
1579                               bool *pnew_key) {
1580   if (!is_initialized()) {
1581     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1582   }
1583 
1584   if (pnew_key) *pnew_key = false;
1585 
1586   // Find out ahead of time whether things will fit. A conservative
1587   // check based on allocations made below.
1588   //
1589   // IMPORTANT: This needs to be updated if the alloc patterns below
1590   // change.
1591   size_t key_len = strlen(key);
1592   if (!(storage_->nodes_left() >= 2 + key_len + 1 &&
1593         storage_->nexts_left() >= 2 + key_len + 1 + kMaxNextArraySize &&
1594         storage_->suffixes_left() >= key_len + 1 + value_size())) {
1595     // No more space left.
1596     return false;
1597   }
1598 
1599   uint32_t best_node_index;
1600   int key_offset;
1601   FindBestNode(key, &best_node_index, &key_offset, false);
1602 
1603   // A negative key_offset indicates that storage_ is empty
1604   if (key_offset < 0) {
1605     // First key.
1606     if (!storage_->empty()) {
1607       ICING_LOG(FATAL) << "Key offset is negative but storage is not empty, "
1608                           "there're inconsistencies in dynamic trie.";
1609     }
1610     Node *node = storage_->AllocNode();
1611     node->set_next_index(storage_->MakeSuffix(key, value, value_index));
1612     node->set_is_leaf(true);
1613     node->set_log2_num_children(0);
1614   } else if (storage_->GetNode(best_node_index)->is_leaf()) {
1615     // Prefix in the trie. Split at leaf.
1616     Node *split_node = storage_->GetMutableNode(best_node_index);
1617     const char *prev_suffix = storage_->GetSuffix(split_node->next_index());
1618 
1619     // Find the common prefix length.
1620     const char *prev_suffix_cur = prev_suffix;
1621     const char *key_cur = key + key_offset;
1622     while (*prev_suffix_cur && *prev_suffix_cur == *key_cur) {
1623       prev_suffix_cur++;
1624       key_cur++;
1625     }
1626 
1627     // Equal strings?
1628     if (*prev_suffix_cur == 0 && *key_cur == 0) {
1629       // Update value if replace == true and return.
1630       if (value_index) {
1631         *value_index = storage_->GetSuffixIndex(prev_suffix_cur + 1);
1632       }
1633       if (replace) {
1634         char *mutable_prev_suffix_cur = storage_->GetMutableSuffix(
1635             storage_->GetSuffixIndex(prev_suffix_cur + 1), value_size());
1636         memcpy(mutable_prev_suffix_cur, value, value_size());
1637       }
1638       return true;
1639     }
1640 
1641     if (*prev_suffix_cur == *key_cur) {
1642       ICING_LOG(FATAL) << "The suffix cursor and key cursor should diverge "
1643                           "after finding the common prefix.";
1644     }
1645 
1646     // Create single-branch children for the common prefix
1647     // length. After the loop, split_node points to the node that
1648     // will have more than 1 char.
1649     int common_len = prev_suffix_cur - prev_suffix;
1650     for (int i = 0; i < common_len; i++) {
1651       // Create a single-branch child node.
1652       Next *split_next = storage_->AllocNextArray(1);
1653       split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
1654       split_node->set_is_leaf(false);
1655       split_node->set_log2_num_children(0);
1656       Node *child_node = storage_->AllocNode();
1657       split_next[0].set_val(*(prev_suffix + i));
1658       split_next[0].set_node_index(storage_->GetNodeIndex(child_node));
1659 
1660       split_node = child_node;
1661     }
1662 
1663     // Fill a split.
1664     Next *split_next = storage_->AllocNextArray(2);
1665     split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
1666     split_node->set_is_leaf(false);
1667     split_node->set_log2_num_children(1);
1668     Node *prev_suffix_node = storage_->AllocNode();
1669     Node *key_node = storage_->AllocNode();
1670     split_next[0].set_val(*(prev_suffix + common_len));
1671     split_next[0].set_node_index(storage_->GetNodeIndex(prev_suffix_node));
1672     if (*(prev_suffix + common_len)) {
1673       uint32_t next_index =
1674           storage_->GetSuffixIndex(prev_suffix + common_len) + 1;
1675       prev_suffix_node->set_next_index(next_index);
1676     } else {
1677       uint32_t next_index = storage_->GetSuffixIndex(prev_suffix + common_len);
1678       prev_suffix_node->set_next_index(next_index);
1679     }
1680     prev_suffix_node->set_is_leaf(true);
1681     prev_suffix_node->set_log2_num_children(0);
1682     split_next[1].set_val(*(key + key_offset + common_len));
1683     split_next[1].set_node_index(storage_->GetNodeIndex(key_node));
1684     if (*(key + key_offset + common_len)) {
1685       uint32_t next_index = storage_->MakeSuffix(
1686           key + key_offset + common_len + 1, value, value_index);
1687       key_node->set_next_index(next_index);
1688     } else {
1689       uint32_t next_index = storage_->MakeSuffix(key + key_offset + common_len,
1690                                                  value, value_index);
1691       key_node->set_next_index(next_index);
1692     }
1693     key_node->set_is_leaf(true);
1694     key_node->set_log2_num_children(0);
1695 
1696     std::sort(split_next, split_next + 2);
1697   } else {
1698     // Insert into intermediate node.
1699     const Node *best_node = storage_->GetNode(best_node_index);
1700 
1701     // Add our value as a node + suffix.
1702     Node *new_leaf_node = storage_->AllocNode();
1703     if (*(key + key_offset)) {
1704       uint32_t next_index =
1705           storage_->MakeSuffix(key + key_offset + 1, value, value_index);
1706       new_leaf_node->set_next_index(next_index);
1707     } else {
1708       uint32_t next_index =
1709           storage_->MakeSuffix(key + key_offset, value, value_index);
1710       new_leaf_node->set_next_index(next_index);
1711     }
1712     new_leaf_node->set_is_leaf(true);
1713     new_leaf_node->set_log2_num_children(0);
1714 
1715     // Figure out the real length of the existing next array.
1716     uint32_t next_array_buffer_size = 1u << best_node->log2_num_children();
1717     Next *cur_next = storage_->GetMutableNextArray(best_node->next_index(),
1718                                                    next_array_buffer_size);
1719     int next_len = GetValidNextsSize(cur_next, next_array_buffer_size);
1720     Next *new_next = cur_next;
1721     if (next_len == (next_array_buffer_size)) {
1722       // Allocate a new, larger, array.
1723       new_next = storage_->AllocNextArray(next_len + 1);
1724       memcpy(new_next, cur_next, sizeof(Next) * next_len);
1725     }
1726 
1727     // Write a link to our new leaf node and sort.
1728     new_next[next_len].set_val(*(key + key_offset));
1729     new_next[next_len].set_node_index(storage_->GetNodeIndex(new_leaf_node));
1730     inplace_merge(new_next, new_next + next_len, new_next + next_len + 1);
1731     next_len++;
1732 
1733     // If this was new, update the parent node and free the old next
1734     // array.
1735     if (new_next != cur_next) {
1736       Node *mutable_best_node =
1737           storage_->GetMutableNode(storage_->GetNodeIndex(best_node));
1738       mutable_best_node->set_next_index(storage_->GetNextArrayIndex(new_next));
1739       mutable_best_node->set_is_leaf(false);
1740       uint8_t log2_num_children = mutable_best_node->log2_num_children();
1741 
1742       // 8 == log2(256)
1743       if (log2_num_children >= 8) {
1744         ICING_LOG(FATAL) << "Number of children exceeds the max allowed size";
1745       }
1746 
1747       mutable_best_node->set_log2_num_children(log2_num_children + 1);
1748 
1749       storage_->FreeNextArray(cur_next,
1750                               mutable_best_node->log2_num_children() - 1);
1751     }
1752   }
1753 
1754   // We added a new key.
1755   storage_->inc_num_keys();
1756 
1757   if (pnew_key) *pnew_key = true;
1758   return true;
1759 }
1760 
GetValueAtIndex(uint32_t value_index) const1761 const void *IcingDynamicTrie::GetValueAtIndex(uint32_t value_index) const {
1762   if (!is_initialized()) {
1763     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1764   }
1765 
1766   return static_cast<const void *>(storage_->GetSuffix(value_index));
1767 }
1768 
SetValueAtIndex(uint32_t value_index,const void * value)1769 void IcingDynamicTrie::SetValueAtIndex(uint32_t value_index,
1770                                        const void *value) {
1771   if (!is_initialized()) {
1772     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1773   }
1774 
1775   if (value_index > storage_->hdr().max_suffixes_size() - value_size()) {
1776     ICING_LOG(FATAL) << "Value index is out of range";
1777   }
1778 
1779   memcpy(storage_->GetMutableSuffix(value_index, value_size()), value,
1780          value_size());
1781 }
1782 
Find(const char * key,void * value,uint32_t * value_index) const1783 bool IcingDynamicTrie::Find(const char *key, void *value,
1784                             uint32_t *value_index) const {
1785   if (!is_initialized()) {
1786     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1787   }
1788 
1789   uint32_t best_node_index;
1790   int key_offset;
1791   FindBestNode(key, &best_node_index, &key_offset, false);
1792 
1793   const Node *best_node = storage_->GetNode(best_node_index);
1794   if (key_offset >= 0 && best_node->is_leaf() &&
1795       !strcmp(key + key_offset, storage_->GetSuffix(best_node->next_index()))) {
1796     uint32_t vidx = best_node->next_index() +
1797                     strlen(storage_->GetSuffix(best_node->next_index())) + 1;
1798     if (value_index) *value_index = vidx;
1799     if (value) memcpy(value, storage_->GetSuffix(vidx), value_size());
1800     return true;
1801   } else {
1802     return false;
1803   }
1804 }
1805 
Iterator(const IcingDynamicTrie & trie,const char * prefix)1806 IcingDynamicTrie::Iterator::Iterator(const IcingDynamicTrie &trie,
1807                                      const char *prefix)
1808     : cur_key_(prefix),
1809       cur_suffix_(nullptr),
1810       cur_suffix_len_(0),
1811       single_leaf_match_(false),
1812       trie_(trie) {
1813   if (!trie.is_initialized()) {
1814     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1815   }
1816 
1817   Reset();
1818 }
1819 
LeftBranchToLeaf(uint32_t node_index)1820 void IcingDynamicTrie::Iterator::LeftBranchToLeaf(uint32_t node_index) {
1821   // Go down the trie, following the left-most child until we hit a
1822   // leaf. Push to stack and cur_key nodes and chars as we go.
1823   for (; !trie_.storage_->GetNode(node_index)->is_leaf();
1824        node_index =
1825            trie_.storage_
1826                ->GetNext(trie_.storage_->GetNode(node_index)->next_index(), 0)
1827                ->node_index()) {
1828     branch_stack_.push_back(Branch(node_index));
1829     cur_key_.push_back(
1830         trie_.storage_
1831             ->GetNext(trie_.storage_->GetNode(node_index)->next_index(), 0)
1832             ->val());
1833   }
1834 
1835   // We're at a leaf.
1836   cur_suffix_ = trie_.storage_->GetSuffix(
1837       trie_.storage_->GetNode(node_index)->next_index());
1838   cur_suffix_len_ = strlen(cur_suffix_);
1839   cur_key_.append(cur_suffix_, cur_suffix_len_);
1840 }
1841 
Reset()1842 void IcingDynamicTrie::Iterator::Reset() {
1843   size_t strip_len = branch_stack_.size() + cur_suffix_len_;
1844 
1845   if (cur_key_.size() < strip_len) {
1846     ICING_LOG(FATAL) << "Key size < visited trie depth + remaining suffix "
1847                         "size, there're inconsistencies in dynamic trie";
1848   }
1849 
1850   // Trim back cur_key_ to original prefix.
1851   cur_key_.resize(cur_key_.size() - strip_len);
1852   cur_suffix_ = nullptr;
1853   cur_suffix_len_ = 0;
1854   single_leaf_match_ = false;
1855   branch_stack_.clear();
1856 
1857   // Nothing to do with an empty trie.
1858   if (trie_.storage_->empty()) return;
1859 
1860   // Find node matching prefix.
1861   uint32_t node_index;
1862   int key_offset;
1863   trie_.FindBestNode(cur_key_.c_str(), &node_index, &key_offset, true);
1864 
1865   // Two cases/states:
1866   //
1867   // - Found an intermediate node. If we matched all of prefix
1868   //   (cur_key_), LeftBranchToLeaf.
1869   //
1870   // - Found a leaf node, which is the ONLY matching key for this
1871   //   prefix. Check that suffix matches the prefix. Then we set
1872   //   single_leaf_match_ = true and apply different logic for
1873   //   Advance.
1874   if (key_offset < 0) {
1875     // A negative key_offset indicates that trie_.storage_ is empty
1876     ICING_LOG(FATAL) << "Trie storage is empty";
1877   }
1878 
1879   const Node *best_node = trie_.storage_->GetNode(node_index);
1880   if (best_node->is_leaf() &&
1881       !strncmp(cur_key_.c_str() + key_offset,
1882                trie_.storage_->GetSuffix(best_node->next_index()),
1883                cur_key_.size() - key_offset)) {
1884     // Copy the entire suffix into the current key.
1885     cur_key_.resize(key_offset);
1886     cur_key_.append(trie_.storage_->GetSuffix(best_node->next_index()));
1887     cur_suffix_ = trie_.storage_->GetSuffix(best_node->next_index());
1888     cur_suffix_len_ = strlen(cur_suffix_);
1889     single_leaf_match_ = true;
1890   } else if (static_cast<size_t>(key_offset) == cur_key_.size()) {
1891     LeftBranchToLeaf(node_index);
1892   }
1893 }
1894 
Advance()1895 bool IcingDynamicTrie::Iterator::Advance() {
1896   if (!IsValid()) return false;
1897   if (single_leaf_match_) {
1898     // If we only have an exact match, the Advance logic does not
1899     // apply. Invalidate the iterator and return.
1900     cur_suffix_ = nullptr;
1901     cur_suffix_len_ = 0;
1902     return false;
1903   }
1904 
1905   if (cur_key_.size() < (branch_stack_.size() + cur_suffix_len_)) {
1906     ICING_LOG(FATAL) << "Key size < visited trie depth + remaining suffix "
1907                         "size, there're inconsistencies in dynamic trie";
1908   }
1909 
1910   // Move up from the current leaf.
1911   cur_key_.resize(cur_key_.size() - cur_suffix_len_);
1912   cur_suffix_ = nullptr;
1913   cur_suffix_len_ = 0;
1914 
1915   while (!branch_stack_.empty()) {
1916     Branch *branch = &branch_stack_.back();
1917     const Node *node = trie_.storage_->GetNode(branch->node_idx);
1918     branch->child_idx++;
1919     if (branch->child_idx < (1 << node->log2_num_children()) &&
1920         trie_.storage_->GetNext(node->next_index(), branch->child_idx)
1921                 ->node_index() != kInvalidNodeIndex) {
1922       // Successfully incremented to the next child. Update the char
1923       // value at this depth.
1924       cur_key_[cur_key_.size() - 1] =
1925           trie_.storage_->GetNext(node->next_index(), branch->child_idx)->val();
1926       // We successfully found a sub-trie to explore.
1927       LeftBranchToLeaf(
1928           trie_.storage_->GetNext(node->next_index(), branch->child_idx)
1929               ->node_index());
1930       return true;
1931     }
1932     branch_stack_.pop_back();
1933     cur_key_.resize(cur_key_.size() - 1);
1934   }
1935 
1936   // Un-wound the entire stack. We are done.
1937   return false;
1938 }
1939 
IsValid() const1940 bool IcingDynamicTrie::Iterator::IsValid() const {
1941   return cur_suffix_ != nullptr;
1942 }
1943 
GetKey() const1944 const char *IcingDynamicTrie::Iterator::GetKey() const {
1945   // cur_key_ can have a NULL in it so cur_key_ can be wrong but
1946   // cur_key_.c_str() is always right.
1947   return IsValid() ? cur_key_.c_str() : nullptr;
1948 }
1949 
GetValue() const1950 const void *IcingDynamicTrie::Iterator::GetValue() const {
1951   if (!IsValid()) return nullptr;
1952 
1953   return static_cast<const void *>(cur_suffix_ + cur_suffix_len_ + 1);
1954 }
1955 
GetValueIndex() const1956 uint32_t IcingDynamicTrie::Iterator::GetValueIndex() const {
1957   if (!IsValid()) return kInvalidSuffixIndex;
1958 
1959   return trie_.storage_->GetSuffixIndex(cur_suffix_ + cur_suffix_len_ + 1);
1960 }
1961 
LeftBranchToUtf8End()1962 void IcingDynamicTrie::Utf8Iterator::LeftBranchToUtf8End() {
1963   if (cur_len_ <= 0) {
1964     ICING_LOG(FATAL) << "Invalid UTF-8 character length";
1965   }
1966 
1967   if (branch_end_ - branch_stack_ != cur_len_) {
1968     ICING_LOG(FATAL) << "Depth from first visited node to last visited node "
1969                         "doesn't match the current UTF-8 character length";
1970   }
1971 
1972   // Use branch at top of stack to determine where to follow.
1973   const Branch &branch = *(branch_end_ - 1);
1974   const Node *node = trie_.storage_->GetNode(branch.child->node_index());
1975 
1976   // If we start with non-ascii, take all left branches while there is
1977   // a continuation byte.
1978   if (!i18n_utils::IsAscii(cur_[cur_len_ - 1])) {
1979     while (!node->is_leaf()) {
1980       if (cur_len_ >= U8_MAX_LENGTH) break;
1981 
1982       InitBranch(branch_end_, node, 0);
1983       // When we are looking to complete a utf8 char, skip 0s.
1984       if (branch_end_->child->val() == 0) {
1985         // Check if we already have a valid cur_.
1986         cur_[cur_len_] = 0;
1987         UChar32 uchar32 = i18n_utils::GetUChar32At(cur_, cur_len_, 0);
1988         if (uchar32 == i18n_utils::kInvalidUChar32 &&
1989             node->log2_num_children() > 0) {
1990           branch_end_->child++;
1991         } else {
1992           // Good termination. Just break.
1993           break;
1994         }
1995       }
1996 
1997       if (!IcingStringUtil::IsContinuationByte(branch_end_->child->val()))
1998         break;
1999 
2000       cur_[cur_len_++] = branch_end_->child->val();
2001       node = trie_.storage_->GetNode(branch_end_->child->node_index());
2002       branch_end_++;
2003     }
2004 
2005     cur_logical_node_.node = node;
2006 
2007     // Maybe go into suffixes and set suffix_offset.
2008     if (node->is_leaf()) {
2009       GoIntoSuffix(node);
2010     } else {
2011       cur_logical_node_.suffix_offset = 0;
2012     }
2013   } else {  // ascii
2014     cur_logical_node_.node = node;
2015     cur_logical_node_.suffix_offset = 0;
2016   }
2017 
2018   // NULL-terminate.
2019   cur_[cur_len_] = 0;
2020 }
2021 
GoIntoSuffix(const Node * node)2022 void IcingDynamicTrie::Utf8Iterator::GoIntoSuffix(const Node *node) {
2023   const char *suffix = trie_.storage_->GetSuffix(node->next_index());
2024   const char *cur_suffix;
2025   for (cur_suffix = suffix; cur_len_ < U8_MAX_LENGTH &&
2026                             IcingStringUtil::IsContinuationByte(*cur_suffix);
2027        cur_suffix++) {
2028     cur_[cur_len_++] = *cur_suffix;
2029   }
2030   cur_logical_node_.suffix_offset = cur_suffix - suffix;
2031 }
2032 
Reset()2033 void IcingDynamicTrie::Utf8Iterator::Reset() {
2034   cur_[0] = 0;
2035   cur_len_ = 0;
2036   branch_end_ = branch_stack_;
2037 
2038   if (start_node_) {
2039     // Take the first char node's children.
2040     const Next *next = trie_.storage_->GetNext(start_node_->next_index(), 0);
2041     branch_end_->node = start_node_;
2042     branch_end_->child_end = next + (1 << start_node_->log2_num_children());
2043     if (next->val() == 0) {
2044       // Skip any nulls at this position. We don't return empty string
2045       // as an iteration.
2046       next++;
2047     }
2048     branch_end_->child = next;
2049     cur_[cur_len_++] = next->val();
2050     branch_end_++;
2051 
2052     // Will NULL-terminate cur_.
2053     LeftBranchToUtf8End();
2054   } else {
2055     // Nothing to return.
2056     cur_logical_node_.node = nullptr;
2057     cur_logical_node_.suffix_offset = 0;
2058   }
2059 }
2060 
Advance()2061 bool IcingDynamicTrie::Utf8Iterator::Advance() {
2062   if (!IsValid()) return false;
2063 
2064   // Clip to branch.
2065   cur_len_ = branch_end_ - branch_stack_;
2066 
2067   while (branch_end_ > branch_stack_) {
2068     Branch *branch = branch_end_ - 1;
2069     branch->child++;
2070     if (!branch->IsFinished()) {
2071       // Successfully incremented to the next child. Update the char
2072       // value at this depth.
2073       cur_[cur_len_ - 1] = branch->child->val();
2074 
2075       // We successfully found a sub-trie to explore.
2076       LeftBranchToUtf8End();
2077       return true;
2078     }
2079     cur_len_--;
2080     branch_end_--;
2081   }
2082 
2083   // Un-wound the entire stack. We are done.
2084   return false;
2085 }
2086 
InitBranch(Branch * branch,const Node * start,char key_char)2087 void IcingDynamicTrie::Utf8Iterator::InitBranch(Branch *branch,
2088                                                 const Node *start,
2089                                                 char key_char) {
2090   branch->node = start;
2091   branch->child = trie_.storage_->GetNext(start->next_index(), 0);
2092   branch->child_end = branch->child + (1 << start->log2_num_children());
2093   if (key_char) {
2094     branch->child =
2095         trie_.LowerBound(branch->child, branch->child_end, key_char);
2096   }
2097 }
2098 
IsFinished()2099 bool IcingDynamicTrie::Utf8Iterator::Branch::IsFinished() {
2100   return child >= child_end || child->node_index() == kInvalidNodeIndex;
2101 }
2102 
IsValid() const2103 bool IcingDynamicTrie::Utf8Iterator::IsValid() const { return cur_len_ > 0; }
2104 
GetNextByChar(const Node * node,uint8_t key_char) const2105 const IcingDynamicTrie::Next *IcingDynamicTrie::GetNextByChar(
2106     const Node *node, uint8_t key_char) const {
2107   const Next *next_start = storage_->GetNext(node->next_index(), 0);
2108   const Next *next_end = next_start + (1 << node->log2_num_children());
2109 
2110   const Next *found = LowerBound(next_start, next_end, key_char);
2111   if (found >= next_end || found->val() != key_char ||
2112       found->node_index() == kInvalidNodeIndex) {
2113     return nullptr;
2114   }
2115 
2116   return found;
2117 }
2118 
LowerBound(const Next * start,const Next * end,uint8_t key_char) const2119 const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound(
2120     const Next *start, const Next *end, uint8_t key_char) const {
2121   // Above this value will use binary search instead of linear
2122   // search. 16 was chosen from running some benchmarks with
2123   // different values.
2124   static const uint32_t kBinarySearchCutoff = 16;
2125 
2126   if (end - start >= kBinarySearchCutoff) {
2127     // Binary search.
2128     Next key_next(key_char, 0);
2129     return lower_bound(start, end, key_next);
2130   } else {
2131     // Linear search.
2132     const Next *found;
2133     for (found = start; found < end; found++) {
2134       if (found->val() >= key_char) {
2135         // Should have gotten match.
2136         break;
2137       }
2138     }
2139     return found;
2140   }
2141 }
2142 
FindBestNode(const char * key,uint32_t * best_node_index,int * key_offset,bool prefix,bool utf8) const2143 void IcingDynamicTrie::FindBestNode(const char *key, uint32_t *best_node_index,
2144                                     int *key_offset, bool prefix,
2145                                     bool utf8) const {
2146   // Find the best node such that:
2147   //
2148   // - If key is NOT in the trie, key[0..key_offset) is a prefix to
2149   //   everything under best_node_index.
2150   //
2151   // - If key is in the trie, best_node_index is the leaf that points
2152   //   to the key suffix and key_offset == strlen(key).
2153   //
2154   // If prefix is true, when key is both in the trie AND a prefix
2155   // (e.g. "ab" and "abc" are in the trie), we return the intermediate
2156   // node with key as the prefix as opposed to the exactly matching
2157   // leaf node.
2158   if (storage_->empty()) {
2159     *best_node_index = 0;
2160     *key_offset = -1;
2161     return;
2162   }
2163 
2164   const Node *cur_node = storage_->GetRootNode();
2165   const char *cur_key = key;
2166   const Node *utf8_node = cur_node;
2167   const char *utf8_key = cur_key;
2168   while (!cur_node->is_leaf()) {
2169     const Next *found = GetNextByChar(cur_node, *cur_key);
2170     if (!found) break;
2171 
2172     if (prefix && found->val() == 0) {
2173       break;
2174     }
2175 
2176     cur_node = storage_->GetNode(found->node_index());
2177 
2178     // End of key.
2179     if (*cur_key == 0) {
2180       break;
2181     }
2182     cur_key++;
2183 
2184     if (utf8 && i18n_utils::IsLeadUtf8Byte(*cur_key)) {
2185       utf8_node = cur_node;
2186       utf8_key = cur_key;
2187     }
2188   }
2189 
2190   if (utf8) {
2191     // Rewind.
2192     cur_node = utf8_node;
2193     cur_key = utf8_key;
2194   }
2195 
2196   *best_node_index = storage_->GetNodeIndex(cur_node);
2197   *key_offset = reinterpret_cast<const char *>(cur_key) - key;
2198 }
2199 
FindNewBranchingPrefixLength(const char * key,bool utf8) const2200 int IcingDynamicTrie::FindNewBranchingPrefixLength(const char *key,
2201                                                    bool utf8) const {
2202   if (storage_->empty()) {
2203     return kNoBranchFound;
2204   }
2205 
2206   uint32_t best_node_index;
2207   int key_offset;
2208   FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true, utf8);
2209   const Node *cur_node = storage_->GetNode(best_node_index);
2210   const char *cur_key = key + key_offset;
2211   if (cur_node->is_leaf()) {
2212     // Prefix in the trie. Split at leaf.
2213     const char *prev_suffix = storage_->GetSuffix(cur_node->next_index());
2214     while (*prev_suffix != '\0' && *prev_suffix == *cur_key) {
2215       prev_suffix++;
2216       cur_key++;
2217     }
2218 
2219     // Equal strings? No branching.
2220     if (*prev_suffix == '\0' && *cur_key == '\0') {
2221       return kNoBranchFound;
2222     }
2223 
2224     if (utf8) {
2225       // Rewind to utf8 boundary.
2226       size_t offset = i18n_utils::SafeTruncateUtf8Length(key, cur_key - key);
2227       cur_key = key + offset;
2228     }
2229 
2230     return cur_key - key;
2231   } else if (cur_node->log2_num_children() == 0) {
2232     // Intermediate node going from no branching to branching.
2233     return cur_key - key;
2234   }
2235 
2236   // If we've reached this point, then we're already at a branch point. So there
2237   // is no *new* branch point.
2238   return kNoBranchFound;
2239 }
2240 
FindBranchingPrefixLengths(const char * key,bool utf8) const2241 std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(const char *key,
2242                                                               bool utf8) const {
2243   std::vector<int> prefix_lengths;
2244 
2245   if (storage_->empty()) {
2246     return prefix_lengths;
2247   }
2248 
2249   const Node *cur_node = storage_->GetRootNode();
2250   const char *cur_key = key;
2251   while (*cur_key && !cur_node->is_leaf()) {
2252     // Branching prefix?
2253     if (cur_node->log2_num_children() > 0) {
2254       int len = cur_key - key;
2255       if (utf8) {
2256         // Do not cut mid-utf8. Walk up to utf8 boundary.
2257         len = i18n_utils::SafeTruncateUtf8Length(key, len);
2258         if (prefix_lengths.empty() || len != prefix_lengths.back()) {
2259           prefix_lengths.push_back(len);
2260         }
2261       } else {
2262         prefix_lengths.push_back(len);
2263       }
2264     }
2265 
2266     // Move to next.
2267     const Next *found = GetNextByChar(cur_node, *cur_key);
2268     if (found == nullptr) {
2269       break;
2270     }
2271     cur_node = storage_->GetNode(found->node_index());
2272 
2273     ++cur_key;
2274   }
2275   return prefix_lengths;
2276 }
2277 
GetDebugInfo(int verbosity,std::string * out) const2278 void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
2279   Stats stats;
2280   CollectStats(&stats);
2281   out->append(stats.DumpStats(verbosity));
2282 
2283   // Property files.
2284   vector<std::string> files;
2285   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
2286                                      &files)) {
2287     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
2288         "Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
2289     return;
2290   }
2291   for (size_t i = 0; i < files.size(); i++) {
2292     IcingStringUtil::SStringAppendF(
2293         out, 1000, "Prop file %s size %" PRIu64 "\n",
2294         filesystem_->GetBasename(files[i].c_str()).c_str(),
2295         filesystem_->GetFileSize(files[i].c_str()));
2296   }
2297   IcingStringUtil::SStringAppendF(
2298       out, 1000, "Deleted file %s size %" PRIu64 "\n",
2299       filesystem_->GetBasename(deleted_bitmap_filename_.c_str()).c_str(),
2300       filesystem_->GetFileSize(deleted_bitmap_filename_.c_str()));
2301 }
2302 
min_free_fraction() const2303 double IcingDynamicTrie::min_free_fraction() const {
2304   if (!is_initialized()) {
2305     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2306   }
2307 
2308   return 1.0 - max(max(static_cast<double>(storage_->hdr().num_nodes()) /
2309                            storage_->hdr().max_nodes(),
2310                        static_cast<double>(storage_->hdr().num_nexts()) /
2311                            storage_->hdr().max_nexts()),
2312                    static_cast<double>(storage_->hdr().suffixes_size()) /
2313                        storage_->hdr().max_suffixes_size());
2314 }
2315 
value_size() const2316 uint32_t IcingDynamicTrie::value_size() const {
2317   return storage_->hdr().value_size();
2318 }
2319 
max_value_index() const2320 uint32_t IcingDynamicTrie::max_value_index() const {
2321   return storage_->hdr().max_suffixes_size();
2322 }
2323 
UpdateCrc()2324 uint32_t IcingDynamicTrie::UpdateCrc() {
2325   if (!is_initialized()) {
2326     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2327   }
2328 
2329   if (runtime_options_.storage_policy != RuntimeOptions::kMapSharedWithCrc) {
2330     return kNoCrc;
2331   }
2332 
2333   // Combine storage crc with property bitmap crcs.
2334   uint32_t crc = storage_->UpdateCrc();
2335 
2336   // Update crcs on bitmaps.
2337   for (size_t i = 0; i < property_bitmaps_.size(); ++i) {
2338     if (property_bitmaps_[i]) {
2339       // Combine property id with the bitmap crc.
2340       uint64_t this_crc = property_bitmaps_[i]->UpdateCrc();
2341       this_crc = (this_crc << 32) | i;
2342       crc = IcingStringUtil::UpdateCrc32(
2343           crc, reinterpret_cast<const char *>(&this_crc), sizeof(this_crc));
2344     }
2345   }
2346   uint32_t this_crc = deleted_bitmap_->UpdateCrc();
2347   crc = IcingStringUtil::UpdateCrc32(
2348       crc, reinterpret_cast<const char *>(&this_crc), sizeof(this_crc));
2349 
2350   return crc;
2351 }
2352 
OpenOrCreatePropertyBitmap(uint32_t property_id)2353 IcingFlashBitmap *IcingDynamicTrie::OpenOrCreatePropertyBitmap(
2354     uint32_t property_id) {
2355   if (!is_initialized()) {
2356     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2357   }
2358 
2359   if (property_id > kMaxPropertyId) {
2360     ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
2361         "Property id %u out of range", property_id);
2362     return nullptr;
2363   }
2364 
2365   if (property_id >= property_bitmaps_.size()) {
2366     property_bitmaps_.resize(property_id + 1);
2367   }
2368   if (!property_bitmaps_[property_id]) {
2369     std::string filename;
2370     IcingStringUtil::SStringAppendF(
2371         &filename, property_bitmaps_prefix_.size() + 10, "%s%u",
2372         property_bitmaps_prefix_.c_str(), property_id);
2373     property_bitmaps_[property_id] =
2374         OpenAndInitBitmap(filename, false, filesystem_);
2375   }
2376   return property_bitmaps_[property_id].get();
2377 }
2378 
SetProperty(uint32_t value_index,uint32_t property_id)2379 bool IcingDynamicTrie::SetProperty(uint32_t value_index, uint32_t property_id) {
2380   IcingFlashBitmap *bitmap = OpenOrCreatePropertyBitmap(property_id);
2381   if (!bitmap) {
2382     return false;
2383   }
2384   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2385 
2386   // Also clear deleted bit.
2387   return bitmap->SetBit(idx, true) && deleted_bitmap_->SetBit(idx, false);
2388 }
2389 
ClearProperty(uint32_t value_index,uint32_t property_id)2390 bool IcingDynamicTrie::ClearProperty(uint32_t value_index,
2391                                      uint32_t property_id) {
2392   if (property_id >= property_bitmaps_.size() ||
2393       !property_bitmaps_[property_id]) {
2394     // No bitmap is ok for clearing.
2395     return true;
2396   }
2397 
2398   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2399   return property_bitmaps_[property_id]->SetBit(idx, false);
2400 }
2401 
SetDeleted(uint32_t value_index)2402 bool IcingDynamicTrie::SetDeleted(uint32_t value_index) {
2403   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2404   return deleted_bitmap_->SetBit(idx, true);
2405 }
2406 
ClearDeleted(uint32_t value_index)2407 bool IcingDynamicTrie::ClearDeleted(uint32_t value_index) {
2408   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2409   return deleted_bitmap_->SetBit(idx, false);
2410 }
2411 
2412 // Steps:
2413 // 1. Find the key in the trie.
2414 // 2. Remove the suffix and the value.
2415 // 3. Reset the nexts that point to the nodes to be removed.
2416 // 4. Sort any next array if needed.
Delete(const std::string_view key)2417 bool IcingDynamicTrie::Delete(const std::string_view key) {
2418   if (!is_initialized()) {
2419     ICING_LOG(ERROR) << "DynamicTrie not initialized";
2420     return false;
2421   }
2422 
2423   if (storage_->empty()) {
2424     // Nothing to delete.
2425     return true;
2426   }
2427 
2428   // Tries to find the key in the trie, starting from the root.
2429   const Node *current_node = storage_->GetRootNode();
2430 
2431   // The node after which we start to remove data.
2432   const Node *last_multichild_node = nullptr;
2433 
2434   // While visiting the trie nodes, we store the indices of Nexts that point
2435   // to all the nodes after last_multichild_node. Those nodes must be
2436   // consecutive and all have only one child. Resetting those Nexts means that
2437   // we remove the data of the key.
2438   std::vector<uint32_t> nexts_to_reset;
2439   nexts_to_reset.reserve(key.length());
2440 
2441   // Iterates through chars in the key, finds nodes in the trie until a leaf
2442   // node is reached. The max number of loops is key.length() + 1 because we
2443   // start from the root.
2444   for (size_t i = 0; i <= key.length(); ++i) {
2445     if (current_node->is_leaf()) {
2446       // Leaf node, now check the suffix.
2447       if (key.substr(i) != storage_->GetSuffix(current_node->next_index())) {
2448         // Key does not exist in the trie, nothing to delete.
2449         return true;
2450       }
2451       // Otherwise, key is found.
2452       break;
2453     }
2454 
2455     // Finds the next char.
2456     const Next *next;
2457     if (i == key.length()) {
2458       // When we're at the end of the key, the next char is the termination char
2459       // '\0'.
2460       next = GetNextByChar(current_node, '\0');
2461     } else {
2462       next = GetNextByChar(current_node, key[i]);
2463     }
2464 
2465     if (next == nullptr) {
2466       // Key does not exist in the trie, nothing to delete.
2467       return true;
2468     }
2469 
2470     // Checks the real size of next array.
2471     uint32_t next_array_buffer_size = 1u << current_node->log2_num_children();
2472     Next *next_array_start = storage_->GetMutableNextArray(
2473         current_node->next_index(), next_array_buffer_size);
2474     int valid_next_array_size =
2475         GetValidNextsSize(next_array_start, next_array_buffer_size);
2476     if (valid_next_array_size == 0) {
2477       // Key does not exist in the trie, nothing to delete.
2478       // This shouldn't happen, but we put a sanity check here in case something
2479       // is wrong.
2480       return true;
2481     } else if (valid_next_array_size == 1) {
2482       // Single-child branch will be deleted.
2483       nexts_to_reset.push_back(storage_->GetNextArrayIndex(next));
2484     } else {
2485       // We see a new node with multiple children, all the previously seen nodes
2486       // shouldn't be removed.
2487       last_multichild_node = current_node;
2488       nexts_to_reset.clear();
2489       nexts_to_reset.push_back(storage_->GetNextArrayIndex(next));
2490     }
2491 
2492     // Updates current_node.
2493     current_node = storage_->GetNode(next->node_index());
2494   }
2495   // Now we've found the key in the trie.
2496 
2497   ClearSuffixAndValue(current_node->next_index());
2498 
2499   // Resets nexts to remove key information.
2500   for (uint32_t next_index : nexts_to_reset) {
2501     ResetNext(next_index);
2502   }
2503   SortNextArray(last_multichild_node);
2504 
2505   return true;
2506 }
2507 
ClearPropertyForAllValues(uint32_t property_id)2508 bool IcingDynamicTrie::ClearPropertyForAllValues(uint32_t property_id) {
2509   if (!is_initialized()) {
2510     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2511   }
2512 
2513   PropertyReadersAll readers(*this);
2514   if (!readers.Exists(property_id)) {
2515     ICING_VLOG(1) << IcingStringUtil::StringPrintf(
2516         "Properties for id %u don't exist", property_id);
2517     return true;
2518   }
2519 
2520   // Mark values that have no other properties set as as deleted.
2521   uint64_t max_idx =
2522       ValueIndexToPropertyBitmapIndex(storage_->hdr().suffixes_size());
2523   // TODO(vishwajith) Inefficient to do this bit by bit, should be word by
2524   // word. Removing a corpus is likely rare enough that this is low priority.
2525   for (uint64_t i = 0; i < max_idx; ++i) {
2526     // See if the bit is set in our property map.
2527     if (readers.IsPropertyUnique(property_id, i)) {
2528       deleted_bitmap_->SetBit(i, true);
2529     }
2530   }
2531 
2532   // Now delete the bitmap file for this property.
2533   std::unique_ptr<IcingFlashBitmap> bitmap(
2534       std::move(property_bitmaps_[property_id]));
2535   // bitmap cannot be null here, because then readers.Exists(property_id) would
2536   // have returned false earlier, and we wouldn't get here.
2537   if (bitmap == nullptr) {
2538     ICING_LOG(ERROR) << "Property bitmap is null";
2539     return false;
2540   }
2541 
2542   return bitmap->Delete();
2543 }
2544 
Exists() const2545 bool IcingDynamicTrie::PropertyReaderBase::Exists() const {
2546   return bitmap_ != nullptr;
2547 }
2548 
HasProperty(uint32_t value_index) const2549 bool IcingDynamicTrie::PropertyReaderBase::HasProperty(
2550     uint32_t value_index) const {
2551   return bitmap_ &&
2552          bitmap_->GetBit(trie_.ValueIndexToPropertyBitmapIndex(value_index));
2553 }
2554 
PropertyReaderBase(const IcingDynamicTrie & trie,bool deleted,uint32_t property_id)2555 IcingDynamicTrie::PropertyReaderBase::PropertyReaderBase(
2556     const IcingDynamicTrie &trie, bool deleted, uint32_t property_id)
2557     : trie_(trie) {
2558   if (!trie.is_initialized()) {
2559     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2560   }
2561 
2562   if (deleted) {
2563     bitmap_ = trie.deleted_bitmap_.get();
2564   } else if (property_id < trie.property_bitmaps_.size()) {
2565     bitmap_ = trie.property_bitmaps_[property_id].get();
2566   } else {
2567     bitmap_ = nullptr;
2568   }
2569 }
2570 
PropertyReadersAll(const IcingDynamicTrie & trie)2571 IcingDynamicTrie::PropertyReadersAll::PropertyReadersAll(
2572     const IcingDynamicTrie &trie)
2573     : trie_(trie) {
2574   if (!trie.is_initialized()) {
2575     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2576   }
2577 }
2578 
Exists(uint32_t property_id) const2579 bool IcingDynamicTrie::PropertyReadersAll::Exists(uint32_t property_id) const {
2580   return property_id < trie_.property_bitmaps_.size() &&
2581          trie_.property_bitmaps_[property_id];
2582 }
2583 
HasProperty(uint32_t property_id,uint32_t value_index) const2584 bool IcingDynamicTrie::PropertyReadersAll::HasProperty(
2585     uint32_t property_id, uint32_t value_index) const {
2586   return property_id < trie_.property_bitmaps_.size() &&
2587          trie_.property_bitmaps_[property_id] &&
2588          trie_.property_bitmaps_[property_id]->GetBit(
2589              trie_.ValueIndexToPropertyBitmapIndex(value_index));
2590 }
2591 
IsPropertyUnique(uint32_t property_id,uint32_t value_index) const2592 bool IcingDynamicTrie::PropertyReadersAll::IsPropertyUnique(
2593     uint32_t property_id, uint32_t value_index) const {
2594   uint32_t idx = trie_.ValueIndexToPropertyBitmapIndex(value_index);
2595 
2596   // First check that value is set for the requested id.
2597   if (property_id >= trie_.property_bitmaps_.size() ||
2598       !trie_.property_bitmaps_[property_id] ||
2599       !trie_.property_bitmaps_[property_id]->GetBit(idx)) {
2600     return false;
2601   }
2602 
2603   // Now check that the value is not set for the rest.
2604   for (size_t i = 0; i < trie_.property_bitmaps_.size(); ++i) {
2605     if (i == property_id) {
2606       continue;
2607     }
2608     if (trie_.property_bitmaps_[i] && trie_.property_bitmaps_[i]->GetBit(idx)) {
2609       return false;
2610     }
2611   }
2612   return true;
2613 }
2614 
size() const2615 size_t IcingDynamicTrie::PropertyReadersAll::size() const {
2616   return trie_.property_bitmaps_.size();
2617 }
2618 
ValueIndexToPropertyBitmapIndex(uint32_t value_index) const2619 uint64_t IcingDynamicTrie::ValueIndexToPropertyBitmapIndex(
2620     uint32_t value_index) const {
2621   // We know that value indices are separated by at least 1 +
2622   // value_size() bytes (for the null terminator and the value).
2623   return value_index / (value_size() + 1);
2624 }
2625 
2626 // Testing hooks.
GetHeader(IcingDynamicTrieHeader * hdr) const2627 void IcingDynamicTrie::GetHeader(IcingDynamicTrieHeader *hdr) const {
2628   if (!is_initialized()) {
2629     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2630   }
2631 
2632   *hdr = storage_->hdr();
2633 }
2634 
SetHeader(const IcingDynamicTrieHeader & new_hdr)2635 void IcingDynamicTrie::SetHeader(const IcingDynamicTrieHeader &new_hdr) {
2636   if (!is_initialized()) {
2637     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2638   }
2639 
2640   storage_->hdr_.hdr = new_hdr;
2641   storage_->WriteHeader();
2642 }
2643 
2644 }  // namespace lib
2645 }  // namespace icing
2646