• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_
16 #define ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_
17 
18 #include <sys/types.h>
19 
20 #include <cstdint>
21 #include <memory>
22 
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/file/filesystem.h"
26 #include "icing/file/posting_list/posting-list-common.h"
27 #include "icing/file/posting_list/posting-list-used.h"
28 #include "icing/legacy/index/icing-bit-util.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 // This class is used to manage I/O to a single flash block and to manage the
34 // division of that flash block into PostingLists. It provides an interface to
35 // allocate, free and read posting lists. Note that IndexBlock is stateless:
36 // - Any changes to block header will be synced to disk before the method
37 //   returns.
38 // - Any posting list allocation/freeing will be synced to disk before the
39 //   method returns.
40 // - When getting an allocated posting list, it PReads the contents from disk to
41 //   a buffer and transfer the ownership to PostingListUsed. Any changes to
42 //   PostingListUsed will not be visible to other instances until calling
43 //   WritePostingListToDisk.
44 //
45 // An IndexBlock contains a small header and an array of fixed-size posting list
46 // buffers. Initially, all posting lists are chained in a singly-linked free
47 // list.
48 //
49 // When we want to get a new PostingList from an IndexBlock, we just pull one
50 // off the free list. When the user wants to return the PostingList to the free
51 // pool, we prepend it to the free list.
52 //
53 // Read-write the same block is NOT thread safe. If we try to read-write the
54 // same block at the same time (either by the same or different IndexBlock
55 // instances), then it causes race condition and the behavior is undefined.
56 class IndexBlock {
57  public:
58   // What is the maximum posting list size in bytes that can be stored in this
59   // block.
CalculateMaxPostingListBytes(uint32_t block_size_in_bytes,uint32_t data_type_bytes)60   static uint32_t CalculateMaxPostingListBytes(uint32_t block_size_in_bytes,
61                                                uint32_t data_type_bytes) {
62     return (block_size_in_bytes - sizeof(BlockHeader)) / data_type_bytes *
63            data_type_bytes;
64   }
65 
66   // Creates an IndexBlock to reference the previously used region of the file
67   // descriptor starting at block_file_offset with size block_size.
68   //
69   // - serializer: for reading/writing posting list. Also some additional
70   //               information (e.g. data size) should be provided by the
71   //               serializer.
72   // - fd: a valid file descriptor opened for write by the caller.
73   // - block_file_offset: absolute offset of the file (fd).
74   // - block_size: byte size of this block.
75   //
76   // Unlike CreateFromUninitializedRegion, a pre-existing index block has
77   // already determined and written posting list bytes into block header, so it
78   // will be read from block header and the caller doesn't have to provide.
79   //
80   // RETURNS:
81   //   - A valid IndexBlock instance on success
82   //   - INVALID_ARGUMENT_ERROR
83   //     - If block_size is too small for even just the BlockHeader
84   //     - If the posting list size stored in the region is not a valid posting
85   //       list size (e.g. exceeds max_posting_list_bytes(size))
86   //   - INTERNAL_ERROR on I/O error
87   static libtextclassifier3::StatusOr<IndexBlock>
88   CreateFromPreexistingIndexBlockRegion(const Filesystem* filesystem,
89                                         PostingListSerializer* serializer,
90                                         int fd, off_t block_file_offset,
91                                         uint32_t block_size);
92 
93   // Creates an IndexBlock to reference an uninitialized region of the file
94   // descriptor starting at block_file_offset with size block_size. The
95   // IndexBlock will initialize the region to be an empty IndexBlock with
96   // posting lists of size posting_list_bytes.
97   //
98   // - serializer: for reading/writing posting list. Also some additional
99   //               information (e.g. data size) should be provided by the
100   //               serializer.
101   // - fd: a valid file descriptor opened for write by the caller.
102   // - block_file_offset: absolute offset of the file (fd).
103   // - block_size: byte size of this block.
104   // - posting_list_bytes: byte size of all posting lists in this block. This
105   //   information will be written into block header.
106   //
107   // RETURNS:
108   //   - A valid IndexBlock instance on success
109   //   - INVALID_ARGUMENT_ERROR
110   //     - If block_size is too small for even just the BlockHeader
111   //     - If the posting list size stored in the region is not a valid posting
112   //       list size (e.g. exceeds max_posting_list_bytes(size))
113   //   - INTERNAL_ERROR on I/O error
114   static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion(
115       const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
116       off_t block_file_offset, uint32_t block_size,
117       uint32_t posting_list_bytes);
118 
119   IndexBlock(const IndexBlock&) = delete;
120   IndexBlock& operator=(const IndexBlock&) = delete;
121   IndexBlock(IndexBlock&&) = default;
122   IndexBlock& operator=(IndexBlock&&) = default;
123 
124   ~IndexBlock() = default;
125 
126   struct PostingListAndBlockInfo {
127     PostingListUsed posting_list_used;
128     PostingListIndex posting_list_index;
129 
130     uint32_t next_block_index;
131 
132     // Flag indicating if there are any free posting lists available after this
133     // allocation request.
134     bool has_free_posting_lists;
135 
PostingListAndBlockInfoPostingListAndBlockInfo136     explicit PostingListAndBlockInfo(PostingListUsed&& posting_list_used_in,
137                                      PostingListIndex posting_list_index_in,
138                                      uint32_t next_block_index_in,
139                                      bool has_free_posting_lists_in)
140         : posting_list_used(std::move(posting_list_used_in)),
141           posting_list_index(posting_list_index_in),
142           next_block_index(next_block_index_in),
143           has_free_posting_lists(has_free_posting_lists_in) {}
144   };
145 
146   // PReads existing posting list content at posting_list_index, instantiates a
147   // PostingListUsed, and returns it with some additional index block info.
148   //
149   // RETURNS:
150   //   - A valid PostingListAndBlockInfo on success
151   //   - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index
152   //     >= max_num_posting_lists()
153   //   - INTERNAL_ERROR on I/O error
154   libtextclassifier3::StatusOr<PostingListAndBlockInfo> GetAllocatedPostingList(
155       PostingListIndex posting_list_index);
156 
157   // Allocates a PostingListUsed in the IndexBlock, initializes the content
158   // (by serializer), and returns the initialized PostingListUsed instance,
159   // PostingListIndex, and some additional index block info.
160   //
161   // RETURNS:
162   //   - A valid PostingListAndBlockInfo instance on success
163   //   - RESOURCE_EXHAUSTED_ERROR if there is already no free posting list
164   //     available, i.e. !HasFreePostingLists()
165   //   - INTERNAL_ERROR on I/O error
166   libtextclassifier3::StatusOr<PostingListAndBlockInfo> AllocatePostingList();
167 
168   // Frees a posting list at posting_list_index, adds it into the free list
169   // chain and updates block header. Both changes on posting list free and
170   // header will be synced to disk.
171   //
172   // It is considered an error to "double-free" a posting list. You should never
173   // call FreePostingList(index) with the same index twice, unless that index
174   // was returned by an intervening AllocatePostingList() call.
175   //
176   // Ex.
177   //   PostingListIndex index = block.AllocatePostingList();
178   //   DoSomething(block.GetAllocatedPostingList(index));
179   //   block.FreePostingList(index);
180   //   block.FreePostingList(index);  // Argh! What are you doing?!
181   //   ...
182   //   PostingListIndex index = block.AllocatePostingList();
183   //   DoSomething(block.GetAllocatedPostingList(index));
184   //   block.FreePostingList(index);
185   //   index = block.AllocatePostingList();
186   //   DoSomethingElse(block.GetAllocatedPostingList(index));
187   //   // A-Ok! We called AllocatePostingList() since the last FreePostingList()
188   //   // call.
189   //   block.FreePostingList(index);
190   //
191   // RETURNS:
192   //   - OK on success
193   //   - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index
194   //     >= max_num_posting_lists()
195   //   - INTERNAL_ERROR on I/O error
196   libtextclassifier3::Status FreePostingList(
197       PostingListIndex posting_list_index);
198 
199   // Writes back an allocated posting list (PostingListUsed) at
200   // posting_list_index to disk.
201   //
202   // RETURNS:
203   //   - OK on success
204   //   - INVALID_ARGUMENT_ERROR
205   //     - If posting_list_index < 0 or posting_list_index >=
206   //       max_num_posting_lists()
207   //     - If posting_list_used.size_in_bytes() != posting_list_bytes_
208   //   - INTERNAL_ERROR on I/O error
209   libtextclassifier3::Status WritePostingListToDisk(
210       const PostingListUsed& posting_list_used,
211       PostingListIndex posting_list_index);
212 
213   // PReads to get the index of next block from block header. Blocks can be
214   // chained, and the interpretation of the chaining is up to the caller.
215   //
216   // RETURNS:
217   //   - Next block index on success
218   //   - INTERNAL_ERROR on I/O error
219   libtextclassifier3::StatusOr<uint32_t> GetNextBlockIndex() const;
220 
221   // PWrites block header to set the index of next block.
222   //
223   // RETURNS:
224   //   - OK on success
225   //   - INTERNAL_ERROR on I/O error
226   libtextclassifier3::Status SetNextBlockIndex(uint32_t next_block_index);
227 
228   // PReads to get whether or not there are available posting lists in the free
229   // list.
230   //
231   // RETURNS:
232   //   - A bool value on success
233   //   - INTERNAL_ERROR on I/O error
234   libtextclassifier3::StatusOr<bool> HasFreePostingLists() const;
235 
236   // Retrieves the size (in bytes) of the posting lists in this IndexBlock.
posting_list_bytes()237   uint32_t posting_list_bytes() const { return posting_list_bytes_; }
238 
239   // Retrieves maximum number of posting lists in the block.
max_num_posting_lists()240   uint32_t max_num_posting_lists() const {
241     return total_posting_lists_bytes() / posting_list_bytes_;
242   }
243 
244   // Retrieves number of bits required to store the largest PostingListIndex in
245   // this block.
posting_list_index_bits()246   int posting_list_index_bits() const {
247     return BitsToStore(max_num_posting_lists());
248   }
249 
250  private:
251   struct BlockHeader {
252     // Index of the next block if this block is being chained or part of a free
253     // list.
254     uint32_t next_block_index;
255 
256     // Index to the first PostingListFree in the IndexBlock. This is the start
257     // of the free list.
258     PostingListIndex free_list_posting_list_index;
259 
260     // The size of each posting list in the IndexBlock. This value will be
261     // initialized when calling CreateFromUninitializedRegion once and remain
262     // unchanged.
263     uint32_t posting_list_bytes;
264   };
265 
266   // Assumes that fd has been opened for write.
IndexBlock(const Filesystem * filesystem,PostingListSerializer * serializer,int fd,off_t block_file_offset,uint32_t block_size_in_bytes,uint32_t posting_list_bytes)267   explicit IndexBlock(const Filesystem* filesystem,
268                       PostingListSerializer* serializer, int fd,
269                       off_t block_file_offset, uint32_t block_size_in_bytes,
270                       uint32_t posting_list_bytes)
271       : filesystem_(filesystem),
272         serializer_(serializer),
273         fd_(fd),
274         block_file_offset_(block_file_offset),
275         block_size_in_bytes_(block_size_in_bytes),
276         posting_list_bytes_(posting_list_bytes) {}
277 
278   // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds
279   // all posting lists to the free list.
280   //
281   // RETURNS:
282   //   - OK on success
283   //   - INTERNAL_ERROR on I/O error
284   libtextclassifier3::Status Reset();
285 
286   // Frees a posting list at posting_list_index, adds it into the free list
287   // chain and updates (sets) the given block header instance.
288   //
289   // - This function is served to avoid redundant block header PWrite when
290   //   freeing multiple posting lists.
291   // - The caller should provide a BlockHeader instance for updating the free
292   //   list chain, and finally sync it to disk.
293   //
294   // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
295   //
296   // RETURNS:
297   //   - OK on success
298   //   - INTERNAL_ERROR on I/O error
299   libtextclassifier3::Status FreePostingListImpl(
300       BlockHeader& header, PostingListIndex posting_list_index);
301 
302   // PReads block header from the file.
303   //
304   // RETURNS:
305   //   - A BlockHeader instance on success
306   //   - INTERNAL_ERROR on I/O error
307   libtextclassifier3::StatusOr<BlockHeader> ReadHeader() const;
308 
309   // PReads posting list content at posting_list_index. Note that it can be a
310   // freed or allocated posting list.
311   //
312   // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
313   //
314   // RETURNS:
315   //   - A data buffer with size = posting_list_bytes_ on success
316   //   - INTERNAL_ERROR on I/O error
317   libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>> ReadPostingList(
318       PostingListIndex posting_list_index) const;
319 
320   // PWrites block header to the file.
321   //
322   // RETURNS:
323   //   - OK on success
324   //   - INTERNAL_ERROR on I/O error
325   libtextclassifier3::Status WriteHeader(const BlockHeader& header);
326 
327   // PWrites posting list content at posting_list_index. Note that it can be a
328   // freed or allocated posting list.
329   //
330   // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes() and size of
331   //   posting_list_buffer is posting_list_bytes_.
332   //
333   // RETURNS:
334   //   - OK on success
335   //   - INTERNAL_ERROR on I/O error
336   libtextclassifier3::Status WritePostingList(
337       PostingListIndex posting_list_index, const uint8_t* posting_list_buffer);
338 
339   // Retrieves the absolute file (fd) offset of a posting list at
340   // posting_list_index.
341   //
342   // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes()
get_posting_list_file_offset(PostingListIndex posting_list_index)343   off_t get_posting_list_file_offset(
344       PostingListIndex posting_list_index) const {
345     return block_file_offset_ + sizeof(BlockHeader) +
346            posting_list_bytes_ * posting_list_index;
347   }
348 
349   // Retrieves the byte size in the block available for posting lists (excluding
350   // the size of block header).
total_posting_lists_bytes()351   uint32_t total_posting_lists_bytes() const {
352     return block_size_in_bytes_ - sizeof(BlockHeader);
353   }
354 
355   const Filesystem* filesystem_;  // Does not own.
356 
357   PostingListSerializer* serializer_;  // Does not own.
358 
359   int fd_;  // Does not own.
360 
361   off_t block_file_offset_;
362   uint32_t block_size_in_bytes_;
363   uint32_t posting_list_bytes_;
364 };
365 
366 }  // namespace lib
367 }  // namespace icing
368 
369 #endif  // ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_
370