• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Copyright 2012 Google Inc. All Rights Reserved.
16 // Author: ulas@google.com (Ulas Kirazci)
17 //
18 // A disk-backed array.
19 
20 #ifndef ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
21 #define ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
22 
23 #include <cstdint>
24 #include <string>
25 #include <vector>
26 
27 #include "icing/legacy/index/icing-filesystem.h"
28 #include "icing/legacy/index/icing-mmapper.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 class IcingArrayStorage {
34  public:
35   explicit IcingArrayStorage(const IcingFilesystem &filesystem);
36   ~IcingArrayStorage();
37 
38   // Mmap a disk-backed array at fd_offset in fd. fd is owned by the
39   // caller and must be kept valid.
40   //
41   // If map_shared is true, changes to GetMutableMem immediately apply
42   // to the backing store. Otherwise changes are kept private until an
43   // explicit call to Flush.
44   //
45   // Each element in the array is elt_size bytes and the array is
46   // valid up to num_elts. max_num_elts is the max that the array is
47   // allowed to grow to.
48   //
49   // If crc_ptr is not NULL, explicit calls to UpdateCrc keep the crc
50   // of the array in *crc_ptr.
51   //
52   // If init_crc is true, the crc of the array is recomputed and
53   // written into crc_ptr. Else, the crc of the array is checked
54   // against the current value in crc_ptr and Init fails if the crc
55   // does not match.
56   //
57   // REQUIRES: !is_initialized()
58   bool Init(int fd, size_t fd_offset, bool map_shared, uint32_t elt_size,
59             uint32_t num_elts, uint32_t max_num_elts, uint32_t *crc_ptr,
60             bool init_crc);
61 
62   // Undo Init. Make is_initialized() == false.
63   void Reset();
64 
is_initialized()65   bool is_initialized() const { return mmapper_ != nullptr; }
66 
67   // Attempt to swap into RAM.
68   void Warm() const;
69 
70   // Make array empty again.
71   void Clear();
72 
73   // Intent to write memory at (elt_idx, elt_idx + elt_len). Returns
74   // NULL if file cannot be grown to accommodate that offset.
75   template <class T>
GetMutableMem(uint32_t elt_idx,uint32_t elt_len)76   T *GetMutableMem(uint32_t elt_idx, uint32_t elt_len) {
77     return static_cast<T *>(GetMutableMemInternal(elt_idx, elt_len));
78   }
79 
80   // Resizes to first elt_len elements.
81   // REQUIRES: elt_len <= num_elts()
82   void Truncate(uint32_t len);
83 
84   // Push changes to crc into crc_ptr. No effect if crc_ptr is NULL.
85   void UpdateCrc();
86 
87   // Write and sync dirty pages to fd starting at offset. Returns
88   // number of pages synced.
89   uint32_t Sync();
90 
91   // Accessors.
array()92   const uint8_t *array() const { return mmapper_->address(); }
93   template <class T>
array_cast()94   const T *array_cast() const {
95     return reinterpret_cast<const T *>(array());
96   }
num_elts()97   uint32_t num_elts() const { return cur_num_; }
max_num_elts()98   uint32_t max_num_elts() const { return max_num_; }
max_size()99   uint32_t max_size() const { return max_num_elts() * elt_size_; }
100 
101   // For stats.
num_dirty_pages()102   uint32_t num_dirty_pages() const {
103     uint32_t num = 0;
104     for (size_t i = 0; i < dirty_pages_.size(); i++) {
105       if (dirty_pages_[i]) num++;
106     }
107     return num;
108   }
109 
110  private:
111   // We track partial updates to the array for CRC updating. This
112   // requires extra memory to keep track of original buffers but
113   // allows for much faster CRC re-computation. This is the frac limit
114   // of byte len after which we will discard recorded changes and
115   // recompute the entire CRC instead.
116   static const uint32_t kPartialCrcLimitDiv;  // 10 means limit is 1/10
117 
118   // Grow file by at least this many elts if array is growable.
119   static const size_t kGrowElts;
120 
121   // A change record (somebody called GetMutableMem on this
122   // region). We only keep changes <= changes_end_.
123   struct Change {
ChangeChange124     Change(uint32_t o, uint32_t l) : elt_offset(o), elt_len(l) {}
125 
126     uint32_t elt_offset;
127     uint32_t elt_len;
128   };
129   static_assert(8 == sizeof(Change), "sizeof(Change) != 8");
130   static_assert(4 == alignof(Change), "alignof(Change) != 4");
131 
132   void *GetMutableMemInternal(uint32_t elt_idx, uint32_t elt_len);
133 
134   bool GrowIfNecessary(uint32_t num_elts);
135 
136   int fd_;
137   size_t fd_offset_;
138   bool map_shared_;
139   IcingMMapper *mmapper_;
140 
141   // Size of an element in the array.
142   uint32_t elt_size_;
143 
144   // In bytes.
145   uint32_t cur_num_;       // cur boundary of written elts
146   uint32_t changes_end_;   // cur_num_ at last call to UpdateCrc
147   uint32_t max_num_;       // size of array in elts
148   uint32_t capacity_num_;  // num elts that can be accommodated by file size
149 
150   uint32_t *crc_ptr_;
151 
152   // Changes that have happened since the last update
153   // (between [0, changes_end_)).
154   std::vector<Change> changes_;
155   std::string saved_orig_buf_;
156 
157   // Keep track of all pages we touched so we can write them back to
158   // disk.
159   std::vector<bool> dirty_pages_;
160 
161   const IcingFilesystem &filesystem_;
162 };
163 
164 }  // namespace lib
165 }  // namespace icing
166 
167 #endif  // ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
168