• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Copyright 2012 Google Inc. All Rights Reserved.
16 // Author: ulas@google.com (Ulas Kirazci)
17 //
18 // A disk-backed array.
19 
20 #ifndef ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
21 #define ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
22 
23 #include <stdint.h>
24 
25 #include <string>
26 #include <vector>
27 
28 #include "icing/legacy/index/icing-filesystem.h"
29 #include "icing/legacy/index/icing-mmapper.h"
30 
31 namespace icing {
32 namespace lib {
33 
34 class IcingArrayStorage {
35  public:
36   explicit IcingArrayStorage(const IcingFilesystem &filesystem);
37   ~IcingArrayStorage();
38 
39   // Mmap a disk-backed array at fd_offset in fd. fd is owned by the
40   // caller and must be kept valid.
41   //
42   // If map_shared is true, changes to GetMutableMem immediately apply
43   // to the backing store. Otherwise changes are kept private until an
44   // explicit call to Flush.
45   //
46   // Each element in the array is elt_size bytes and the array is
47   // valid up to num_elts. max_num_elts is the max that the array is
48   // allowed to grow to.
49   //
50   // If crc_ptr is not NULL, explicit calls to UpdateCrc keep the crc
51   // of the array in *crc_ptr.
52   //
53   // If init_crc is true, the crc of the array is recomputed and
54   // written into crc_ptr. Else, the crc of the array is checked
55   // against the current value in crc_ptr and Init fails if the crc
56   // does not match.
57   //
58   // REQUIRES: !is_initialized()
59   bool Init(int fd, size_t fd_offset, bool map_shared, uint32_t elt_size,
60             uint32_t num_elts, uint32_t max_num_elts, uint32_t *crc_ptr,
61             bool init_crc);
62 
63   // Undo Init. Make is_initialized() == false.
64   void Reset();
65 
is_initialized()66   bool is_initialized() const { return mmapper_ != nullptr; }
67 
68   // Attempt to swap into RAM.
69   void Warm() const;
70 
71   // Make array empty again.
72   void Clear();
73 
74   // Intent to write memory at (elt_idx, elt_idx + elt_len). Returns
75   // NULL if file cannot be grown to accommodate that offset.
76   template <class T>
GetMutableMem(uint32_t elt_idx,uint32_t elt_len)77   T *GetMutableMem(uint32_t elt_idx, uint32_t elt_len) {
78     return static_cast<T *>(GetMutableMemInternal(elt_idx, elt_len));
79   }
80 
81   // Resizes to first elt_len elements.
82   // REQUIRES: elt_len <= num_elts()
83   void Truncate(uint32_t len);
84 
85   // Push changes to crc into crc_ptr. No effect if crc_ptr is NULL.
86   void UpdateCrc();
87 
88   // Write and sync dirty pages to fd starting at offset. Returns
89   // number of pages synced.
90   uint32_t Sync();
91 
92   // Accessors.
array()93   const uint8_t *array() const { return mmapper_->address(); }
94   template <class T>
array_cast()95   const T *array_cast() const {
96     return reinterpret_cast<const T *>(array());
97   }
num_elts()98   uint32_t num_elts() const { return cur_num_; }
max_num_elts()99   uint32_t max_num_elts() const { return max_num_; }
max_size()100   uint32_t max_size() const { return max_num_elts() * elt_size_; }
101 
102   // For stats.
num_dirty_pages()103   uint32_t num_dirty_pages() const {
104     uint32_t num = 0;
105     for (size_t i = 0; i < dirty_pages_.size(); i++) {
106       if (dirty_pages_[i]) num++;
107     }
108     return num;
109   }
110 
111  private:
112   // We track partial updates to the array for CRC updating. This
113   // requires extra memory to keep track of original buffers but
114   // allows for much faster CRC re-computation. This is the frac limit
115   // of byte len after which we will discard recorded changes and
116   // recompute the entire CRC instead.
117   static const uint32_t kPartialCrcLimitDiv;  // 10 means limit is 1/10
118 
119   // Grow file by at least this many elts if array is growable.
120   static const size_t kGrowElts;
121 
122   // A change record (somebody called GetMutableMem on this
123   // region). We only keep changes <= changes_end_.
124   struct Change {
ChangeChange125     Change(uint32_t o, uint32_t l) : elt_offset(o), elt_len(l) {}
126 
127     uint32_t elt_offset;
128     uint32_t elt_len;
129   };
130   static_assert(8 == sizeof(Change), "sizeof(Change) != 8");
131   static_assert(4 == alignof(Change), "alignof(Change) != 4");
132 
133   void *GetMutableMemInternal(uint32_t elt_idx, uint32_t elt_len);
134 
135   bool GrowIfNecessary(uint32_t num_elts);
136 
137   int fd_;
138   size_t fd_offset_;
139   bool map_shared_;
140   IcingMMapper *mmapper_;
141 
142   // Size of an element in the array.
143   uint32_t elt_size_;
144 
145   // In bytes.
146   uint32_t cur_num_;       // cur boundary of written elts
147   uint32_t changes_end_;   // cur_num_ at last call to UpdateCrc
148   uint32_t max_num_;       // size of array in elts
149   uint32_t capacity_num_;  // num elts that can be accommodated by file size
150 
151   uint32_t *crc_ptr_;
152 
153   // Changes that have happened since the last update
154   // (between [0, changes_end_)).
155   std::vector<Change> changes_;
156   std::string saved_orig_buf_;
157 
158   // Keep track of all pages we touched so we can write them back to
159   // disk.
160   std::vector<bool> dirty_pages_;
161 
162   const IcingFilesystem &filesystem_;
163 };
164 
165 }  // namespace lib
166 }  // namespace icing
167 
168 #endif  // ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
169