• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/main/index-block.h"
16 
17 #include <algorithm>
18 #include <cinttypes>
19 #include <limits>
20 
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "icing/absl_ports/canonical_errors.h"
23 #include "icing/file/memory-mapped-file.h"
24 #include "icing/index/main/posting-list-free.h"
25 #include "icing/index/main/posting-list-utils.h"
26 #include "icing/legacy/core/icing-string-util.h"
27 #include "icing/util/math-util.h"
28 #include "icing/util/status-macros.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 namespace {
34 
ValidatePostingListBytes(uint32_t posting_list_bytes,uint32_t block_size)35 libtextclassifier3::Status ValidatePostingListBytes(uint32_t posting_list_bytes,
36                                                     uint32_t block_size) {
37   if (posting_list_bytes >
38           IndexBlock::CalculateMaxPostingListBytes(block_size) ||
39       !posting_list_utils::IsValidPostingListSize(posting_list_bytes)) {
40     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
41         "Requested posting list size %d is illegal for a flash block with max "
42         "posting list size of %d",
43         posting_list_bytes,
44         IndexBlock::CalculateMaxPostingListBytes(block_size)));
45   }
46   return libtextclassifier3::Status::OK;
47 }
48 
49 }  // namespace
50 
ApproximateFullPostingListHitsForBlock(uint32_t block_size,int posting_list_index_bits)51 uint32_t IndexBlock::ApproximateFullPostingListHitsForBlock(
52     uint32_t block_size, int posting_list_index_bits) {
53   // Assume 50% compressed and most don't have term frequencies.
54   uint32_t bytes_per_hit = sizeof(Hit::Value) / 2;
55   return (block_size - sizeof(BlockHeader)) /
56          ((1u << posting_list_index_bits) * bytes_per_hit);
57 }
58 
59 libtextclassifier3::StatusOr<IndexBlock>
CreateFromPreexistingIndexBlockRegion(const Filesystem & filesystem,std::string_view file_path,off_t offset,uint32_t block_size)60 IndexBlock::CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem,
61                                                   std::string_view file_path,
62                                                   off_t offset,
63                                                   uint32_t block_size) {
64   if (block_size < sizeof(BlockHeader)) {
65     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
66         "Provided block_size %d is too small to fit even the BlockHeader!",
67         block_size));
68   }
69   MemoryMappedFile mmapped_file(
70       filesystem, file_path, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
71   ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
72   IndexBlock block(std::move(mmapped_file));
73   ICING_RETURN_IF_ERROR(
74       ValidatePostingListBytes(block.get_posting_list_bytes(), block_size));
75   return block;
76 }
77 
78 libtextclassifier3::StatusOr<IndexBlock>
CreateFromUninitializedRegion(const Filesystem & filesystem,std::string_view file_path,off_t offset,uint32_t block_size,uint32_t posting_list_bytes)79 IndexBlock::CreateFromUninitializedRegion(const Filesystem& filesystem,
80                                           std::string_view file_path,
81                                           off_t offset, uint32_t block_size,
82                                           uint32_t posting_list_bytes) {
83   if (block_size < sizeof(BlockHeader)) {
84     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
85         "Provided block_size %d is too small to fit even the BlockHeader!",
86         block_size));
87   }
88   ICING_RETURN_IF_ERROR(
89       ValidatePostingListBytes(posting_list_bytes, block_size));
90   MemoryMappedFile mmapped_file(
91       filesystem, file_path, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
92   ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
93   IndexBlock block(std::move(mmapped_file));
94   // Safe to ignore the return value of Reset. Reset returns an error if
95   // posting_list_bytes is invalid, but this function ensures that
96   // posting_list_bytes is valid thanks to the call to ValidatePostingListBytes
97   // above.
98   block.Reset(posting_list_bytes);
99   return block;
100 }
101 
IndexBlock(MemoryMappedFile mmapped_block)102 IndexBlock::IndexBlock(MemoryMappedFile mmapped_block)
103     : header_(reinterpret_cast<BlockHeader*>(mmapped_block.mutable_region())),
104       posting_lists_start_ptr_(mmapped_block.mutable_region() +
105                                sizeof(BlockHeader)),
106       block_size_in_bytes_(mmapped_block.region_size()),
107       mmapped_block_(
108           std::make_unique<MemoryMappedFile>(std::move(mmapped_block))) {}
109 
Reset(int posting_list_bytes)110 libtextclassifier3::Status IndexBlock::Reset(int posting_list_bytes) {
111   ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
112       posting_list_bytes, mmapped_block_->region_size()));
113   header_->free_list_posting_list_index = kInvalidPostingListIndex;
114   header_->next_block_index = kInvalidBlockIndex;
115   header_->posting_list_bytes = posting_list_bytes;
116 
117   // Starting with the last posting list, prepend each posting list to the free
118   // list. At the end, the beginning of the free list should be the first
119   // posting list.
120   for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
121        posting_list_index >= 0; --posting_list_index) {
122     // Adding the posting list at posting_list_index to the free list will
123     // modify both the posting list and also
124     // header_->free_list_posting_list_index.
125     FreePostingList(posting_list_index);
126   }
127   return libtextclassifier3::Status::OK;
128 }
129 
130 libtextclassifier3::StatusOr<PostingListUsed>
GetAllocatedPostingList(PostingListIndex posting_list_index)131 IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
132   if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
133     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
134         "Cannot get posting list with index %d in IndexBlock with only %d "
135         "posting lists.",
136         posting_list_index, max_num_posting_lists()));
137   }
138   return PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
139       get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
140 }
141 
142 libtextclassifier3::StatusOr<PostingListIndex>
AllocatePostingList()143 IndexBlock::AllocatePostingList() {
144   if (!has_free_posting_lists()) {
145     return absl_ports::ResourceExhaustedError(
146         "No available posting lists to allocate.");
147   }
148 
149   // Pull one off the free list.
150   PostingListIndex posting_list_index = header_->free_list_posting_list_index;
151 
152   // We know at this point that posting_list_bytes will return a valid pl size
153   // (because an already initialized IndexBlock instance can't have an invalid
154   // posting_list_bytes). So CreateFromPreexistingPostingListFreeRegion will
155   // always return OK and ValueOrDie is safe to call.
156   auto posting_list_or =
157       PostingListFree::CreateFromPreexistingPostingListFreeRegion(
158           get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
159   PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
160 
161   header_->free_list_posting_list_index = plfree.get_next_posting_list_index();
162   if (header_->free_list_posting_list_index != kInvalidPostingListIndex &&
163       header_->free_list_posting_list_index >= max_num_posting_lists()) {
164     ICING_LOG(ERROR)
165         << "Free Posting List points to an invalid posting list index!";
166     header_->free_list_posting_list_index = kInvalidPostingListIndex;
167   }
168 
169   // Make it a used posting list.
170   PostingListUsed::CreateFromUnitializedRegion(
171       get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
172   return posting_list_index;
173 }
174 
FreePostingList(PostingListIndex posting_list_index)175 void IndexBlock::FreePostingList(PostingListIndex posting_list_index) {
176   if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
177     ICING_LOG(ERROR) << "Cannot free posting list with index "
178                      << posting_list_index << " in IndexBlock with only "
179                      << max_num_posting_lists() << " posting lists.";
180     return;
181   }
182 
183   // We know at this point that posting_list_bytes will return a valid pl size.
184   // So CreateFromUninitializedRegion will always return OK and ValueOrDie is
185   // safe to call.
186   auto posting_list_or = PostingListFree::CreateFromUnitializedRegion(
187       get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
188   PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
189 
190   // Put at the head of the list.
191   plfree.set_next_posting_list_index(header_->free_list_posting_list_index);
192   header_->free_list_posting_list_index = posting_list_index;
193 }
194 
get_posting_list_ptr(PostingListIndex posting_list_index)195 char* IndexBlock::get_posting_list_ptr(PostingListIndex posting_list_index) {
196   return posting_lists_start_ptr_ +
197          get_posting_list_bytes() * posting_list_index;
198 }
199 
200 }  // namespace lib
201 }  // namespace icing
202