1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_ 16 #define ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_ 17 18 #include <sys/types.h> 19 20 #include <cstdint> 21 #include <memory> 22 23 #include "icing/text_classifier/lib3/utils/base/status.h" 24 #include "icing/text_classifier/lib3/utils/base/statusor.h" 25 #include "icing/file/filesystem.h" 26 #include "icing/file/posting_list/posting-list-common.h" 27 #include "icing/file/posting_list/posting-list-used.h" 28 #include "icing/legacy/index/icing-bit-util.h" 29 30 namespace icing { 31 namespace lib { 32 33 // This class is used to manage I/O to a single flash block and to manage the 34 // division of that flash block into PostingLists. It provides an interface to 35 // allocate, free and read posting lists. Note that IndexBlock is stateless: 36 // - Any changes to block header will be synced to disk before the method 37 // returns. 38 // - Any posting list allocation/freeing will be synced to disk before the 39 // method returns. 40 // - When getting an allocated posting list, it PReads the contents from disk to 41 // a buffer and transfer the ownership to PostingListUsed. Any changes to 42 // PostingListUsed will not be visible to other instances until calling 43 // WritePostingListToDisk. 44 // 45 // An IndexBlock contains a small header and an array of fixed-size posting list 46 // buffers. Initially, all posting lists are chained in a singly-linked free 47 // list. 48 // 49 // When we want to get a new PostingList from an IndexBlock, we just pull one 50 // off the free list. When the user wants to return the PostingList to the free 51 // pool, we prepend it to the free list. 52 // 53 // Read-write the same block is NOT thread safe. If we try to read-write the 54 // same block at the same time (either by the same or different IndexBlock 55 // instances), then it causes race condition and the behavior is undefined. 56 class IndexBlock { 57 public: 58 // What is the maximum posting list size in bytes that can be stored in this 59 // block. CalculateMaxPostingListBytes(uint32_t block_size_in_bytes,uint32_t data_type_bytes)60 static uint32_t CalculateMaxPostingListBytes(uint32_t block_size_in_bytes, 61 uint32_t data_type_bytes) { 62 return (block_size_in_bytes - sizeof(BlockHeader)) / data_type_bytes * 63 data_type_bytes; 64 } 65 66 // Creates an IndexBlock to reference the previously used region of the file 67 // descriptor starting at block_file_offset with size block_size. 68 // 69 // - serializer: for reading/writing posting list. Also some additional 70 // information (e.g. data size) should be provided by the 71 // serializer. 72 // - fd: a valid file descriptor opened for write by the caller. 73 // - block_file_offset: absolute offset of the file (fd). 74 // - block_size: byte size of this block. 75 // 76 // Unlike CreateFromUninitializedRegion, a pre-existing index block has 77 // already determined and written posting list bytes into block header, so it 78 // will be read from block header and the caller doesn't have to provide. 79 // 80 // RETURNS: 81 // - A valid IndexBlock instance on success 82 // - INVALID_ARGUMENT_ERROR 83 // - If block_size is too small for even just the BlockHeader 84 // - If the posting list size stored in the region is not a valid posting 85 // list size (e.g. exceeds max_posting_list_bytes(size)) 86 // - INTERNAL_ERROR on I/O error 87 static libtextclassifier3::StatusOr<IndexBlock> 88 CreateFromPreexistingIndexBlockRegion(const Filesystem* filesystem, 89 PostingListSerializer* serializer, 90 int fd, off_t block_file_offset, 91 uint32_t block_size); 92 93 // Creates an IndexBlock to reference an uninitialized region of the file 94 // descriptor starting at block_file_offset with size block_size. The 95 // IndexBlock will initialize the region to be an empty IndexBlock with 96 // posting lists of size posting_list_bytes. 97 // 98 // - serializer: for reading/writing posting list. Also some additional 99 // information (e.g. data size) should be provided by the 100 // serializer. 101 // - fd: a valid file descriptor opened for write by the caller. 102 // - block_file_offset: absolute offset of the file (fd). 103 // - block_size: byte size of this block. 104 // - posting_list_bytes: byte size of all posting lists in this block. This 105 // information will be written into block header. 106 // 107 // RETURNS: 108 // - A valid IndexBlock instance on success 109 // - INVALID_ARGUMENT_ERROR 110 // - If block_size is too small for even just the BlockHeader 111 // - If the posting list size stored in the region is not a valid posting 112 // list size (e.g. exceeds max_posting_list_bytes(size)) 113 // - INTERNAL_ERROR on I/O error 114 static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion( 115 const Filesystem* filesystem, PostingListSerializer* serializer, int fd, 116 off_t block_file_offset, uint32_t block_size, 117 uint32_t posting_list_bytes); 118 119 IndexBlock(const IndexBlock&) = delete; 120 IndexBlock& operator=(const IndexBlock&) = delete; 121 IndexBlock(IndexBlock&&) = default; 122 IndexBlock& operator=(IndexBlock&&) = default; 123 124 ~IndexBlock() = default; 125 126 struct PostingListAndBlockInfo { 127 PostingListUsed posting_list_used; 128 PostingListIndex posting_list_index; 129 130 uint32_t next_block_index; 131 132 // Flag indicating if there are any free posting lists available after this 133 // allocation request. 134 bool has_free_posting_lists; 135 PostingListAndBlockInfoPostingListAndBlockInfo136 explicit PostingListAndBlockInfo(PostingListUsed&& posting_list_used_in, 137 PostingListIndex posting_list_index_in, 138 uint32_t next_block_index_in, 139 bool has_free_posting_lists_in) 140 : posting_list_used(std::move(posting_list_used_in)), 141 posting_list_index(posting_list_index_in), 142 next_block_index(next_block_index_in), 143 has_free_posting_lists(has_free_posting_lists_in) {} 144 }; 145 146 // PReads existing posting list content at posting_list_index, instantiates a 147 // PostingListUsed, and returns it with some additional index block info. 148 // 149 // RETURNS: 150 // - A valid PostingListAndBlockInfo on success 151 // - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index 152 // >= max_num_posting_lists() 153 // - INTERNAL_ERROR on I/O error 154 libtextclassifier3::StatusOr<PostingListAndBlockInfo> GetAllocatedPostingList( 155 PostingListIndex posting_list_index); 156 157 // Allocates a PostingListUsed in the IndexBlock, initializes the content 158 // (by serializer), and returns the initialized PostingListUsed instance, 159 // PostingListIndex, and some additional index block info. 160 // 161 // RETURNS: 162 // - A valid PostingListAndBlockInfo instance on success 163 // - RESOURCE_EXHAUSTED_ERROR if there is already no free posting list 164 // available, i.e. !HasFreePostingLists() 165 // - INTERNAL_ERROR on I/O error 166 libtextclassifier3::StatusOr<PostingListAndBlockInfo> AllocatePostingList(); 167 168 // Frees a posting list at posting_list_index, adds it into the free list 169 // chain and updates block header. Both changes on posting list free and 170 // header will be synced to disk. 171 // 172 // It is considered an error to "double-free" a posting list. You should never 173 // call FreePostingList(index) with the same index twice, unless that index 174 // was returned by an intervening AllocatePostingList() call. 175 // 176 // Ex. 177 // PostingListIndex index = block.AllocatePostingList(); 178 // DoSomething(block.GetAllocatedPostingList(index)); 179 // block.FreePostingList(index); 180 // block.FreePostingList(index); // Argh! What are you doing?! 181 // ... 182 // PostingListIndex index = block.AllocatePostingList(); 183 // DoSomething(block.GetAllocatedPostingList(index)); 184 // block.FreePostingList(index); 185 // index = block.AllocatePostingList(); 186 // DoSomethingElse(block.GetAllocatedPostingList(index)); 187 // // A-Ok! We called AllocatePostingList() since the last FreePostingList() 188 // // call. 189 // block.FreePostingList(index); 190 // 191 // RETURNS: 192 // - OK on success 193 // - INVALID_ARGUMENT_ERROR if posting_list_index < 0 or posting_list_index 194 // >= max_num_posting_lists() 195 // - INTERNAL_ERROR on I/O error 196 libtextclassifier3::Status FreePostingList( 197 PostingListIndex posting_list_index); 198 199 // Writes back an allocated posting list (PostingListUsed) at 200 // posting_list_index to disk. 201 // 202 // RETURNS: 203 // - OK on success 204 // - INVALID_ARGUMENT_ERROR 205 // - If posting_list_index < 0 or posting_list_index >= 206 // max_num_posting_lists() 207 // - If posting_list_used.size_in_bytes() != posting_list_bytes_ 208 // - INTERNAL_ERROR on I/O error 209 libtextclassifier3::Status WritePostingListToDisk( 210 const PostingListUsed& posting_list_used, 211 PostingListIndex posting_list_index); 212 213 // PReads to get the index of next block from block header. Blocks can be 214 // chained, and the interpretation of the chaining is up to the caller. 215 // 216 // RETURNS: 217 // - Next block index on success 218 // - INTERNAL_ERROR on I/O error 219 libtextclassifier3::StatusOr<uint32_t> GetNextBlockIndex() const; 220 221 // PWrites block header to set the index of next block. 222 // 223 // RETURNS: 224 // - OK on success 225 // - INTERNAL_ERROR on I/O error 226 libtextclassifier3::Status SetNextBlockIndex(uint32_t next_block_index); 227 228 // PReads to get whether or not there are available posting lists in the free 229 // list. 230 // 231 // RETURNS: 232 // - A bool value on success 233 // - INTERNAL_ERROR on I/O error 234 libtextclassifier3::StatusOr<bool> HasFreePostingLists() const; 235 236 // Retrieves the size (in bytes) of the posting lists in this IndexBlock. posting_list_bytes()237 uint32_t posting_list_bytes() const { return posting_list_bytes_; } 238 239 // Retrieves maximum number of posting lists in the block. max_num_posting_lists()240 uint32_t max_num_posting_lists() const { 241 return total_posting_lists_bytes() / posting_list_bytes_; 242 } 243 244 // Retrieves number of bits required to store the largest PostingListIndex in 245 // this block. posting_list_index_bits()246 int posting_list_index_bits() const { 247 return BitsToStore(max_num_posting_lists()); 248 } 249 250 private: 251 struct BlockHeader { 252 // Index of the next block if this block is being chained or part of a free 253 // list. 254 uint32_t next_block_index; 255 256 // Index to the first PostingListFree in the IndexBlock. This is the start 257 // of the free list. 258 PostingListIndex free_list_posting_list_index; 259 260 // The size of each posting list in the IndexBlock. This value will be 261 // initialized when calling CreateFromUninitializedRegion once and remain 262 // unchanged. 263 uint32_t posting_list_bytes; 264 }; 265 266 // Assumes that fd has been opened for write. IndexBlock(const Filesystem * filesystem,PostingListSerializer * serializer,int fd,off_t block_file_offset,uint32_t block_size_in_bytes,uint32_t posting_list_bytes)267 explicit IndexBlock(const Filesystem* filesystem, 268 PostingListSerializer* serializer, int fd, 269 off_t block_file_offset, uint32_t block_size_in_bytes, 270 uint32_t posting_list_bytes) 271 : filesystem_(filesystem), 272 serializer_(serializer), 273 fd_(fd), 274 block_file_offset_(block_file_offset), 275 block_size_in_bytes_(block_size_in_bytes), 276 posting_list_bytes_(posting_list_bytes) {} 277 278 // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds 279 // all posting lists to the free list. 280 // 281 // RETURNS: 282 // - OK on success 283 // - INTERNAL_ERROR on I/O error 284 libtextclassifier3::Status Reset(); 285 286 // Frees a posting list at posting_list_index, adds it into the free list 287 // chain and updates (sets) the given block header instance. 288 // 289 // - This function is served to avoid redundant block header PWrite when 290 // freeing multiple posting lists. 291 // - The caller should provide a BlockHeader instance for updating the free 292 // list chain, and finally sync it to disk. 293 // 294 // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes() 295 // 296 // RETURNS: 297 // - OK on success 298 // - INTERNAL_ERROR on I/O error 299 libtextclassifier3::Status FreePostingListImpl( 300 BlockHeader& header, PostingListIndex posting_list_index); 301 302 // PReads block header from the file. 303 // 304 // RETURNS: 305 // - A BlockHeader instance on success 306 // - INTERNAL_ERROR on I/O error 307 libtextclassifier3::StatusOr<BlockHeader> ReadHeader() const; 308 309 // PReads posting list content at posting_list_index. Note that it can be a 310 // freed or allocated posting list. 311 // 312 // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes() 313 // 314 // RETURNS: 315 // - A data buffer with size = posting_list_bytes_ on success 316 // - INTERNAL_ERROR on I/O error 317 libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>> ReadPostingList( 318 PostingListIndex posting_list_index) const; 319 320 // PWrites block header to the file. 321 // 322 // RETURNS: 323 // - OK on success 324 // - INTERNAL_ERROR on I/O error 325 libtextclassifier3::Status WriteHeader(const BlockHeader& header); 326 327 // PWrites posting list content at posting_list_index. Note that it can be a 328 // freed or allocated posting list. 329 // 330 // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes() and size of 331 // posting_list_buffer is posting_list_bytes_. 332 // 333 // RETURNS: 334 // - OK on success 335 // - INTERNAL_ERROR on I/O error 336 libtextclassifier3::Status WritePostingList( 337 PostingListIndex posting_list_index, const uint8_t* posting_list_buffer); 338 339 // Retrieves the absolute file (fd) offset of a posting list at 340 // posting_list_index. 341 // 342 // REQUIRES: 0 <= posting_list_index < max_posting_list_bytes() get_posting_list_file_offset(PostingListIndex posting_list_index)343 off_t get_posting_list_file_offset( 344 PostingListIndex posting_list_index) const { 345 return block_file_offset_ + sizeof(BlockHeader) + 346 posting_list_bytes_ * posting_list_index; 347 } 348 349 // Retrieves the byte size in the block available for posting lists (excluding 350 // the size of block header). total_posting_lists_bytes()351 uint32_t total_posting_lists_bytes() const { 352 return block_size_in_bytes_ - sizeof(BlockHeader); 353 } 354 355 const Filesystem* filesystem_; // Does not own. 356 357 PostingListSerializer* serializer_; // Does not own. 358 359 int fd_; // Does not own. 360 361 off_t block_file_offset_; 362 uint32_t block_size_in_bytes_; 363 uint32_t posting_list_bytes_; 364 }; 365 366 } // namespace lib 367 } // namespace icing 368 369 #endif // ICING_FILE_POSTING_LIST_INDEX_BLOCK_H_ 370