1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/posting_list/index-block.h"
16
17 #include <sys/types.h>
18
19 #include <cstdint>
20 #include <memory>
21
22 #include "icing/text_classifier/lib3/utils/base/status.h"
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/file/posting_list/posting-list-common.h"
27 #include "icing/file/posting_list/posting-list-free.h"
28 #include "icing/file/posting_list/posting-list-used.h"
29 #include "icing/file/posting_list/posting-list-utils.h"
30 #include "icing/legacy/core/icing-string-util.h"
31 #include "icing/util/logging.h"
32 #include "icing/util/status-macros.h"
33
34 namespace icing {
35 namespace lib {
36
37 namespace {
38
ValidatePostingListBytes(PostingListSerializer * serializer,uint32_t posting_list_bytes,uint32_t block_size)39 libtextclassifier3::Status ValidatePostingListBytes(
40 PostingListSerializer* serializer, uint32_t posting_list_bytes,
41 uint32_t block_size) {
42 if (posting_list_bytes > IndexBlock::CalculateMaxPostingListBytes(
43 block_size, serializer->GetDataTypeBytes()) ||
44 !posting_list_utils::IsValidPostingListSize(
45 posting_list_bytes, serializer->GetDataTypeBytes(),
46 serializer->GetMinPostingListSize())) {
47 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
48 "Requested posting list size %d is illegal for a flash block with max "
49 "posting list size of %d",
50 posting_list_bytes,
51 IndexBlock::CalculateMaxPostingListBytes(
52 block_size, serializer->GetDataTypeBytes())));
53 }
54 return libtextclassifier3::Status::OK;
55 }
56
57 } // namespace
58
59 /* static */ libtextclassifier3::StatusOr<IndexBlock>
CreateFromPreexistingIndexBlockRegion(const Filesystem * filesystem,PostingListSerializer * serializer,int fd,off_t block_file_offset,uint32_t block_size)60 IndexBlock::CreateFromPreexistingIndexBlockRegion(
61 const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
62 off_t block_file_offset, uint32_t block_size) {
63 if (block_size < sizeof(BlockHeader)) {
64 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
65 "Provided block_size %d is too small to fit even the BlockHeader!",
66 block_size));
67 }
68
69 BlockHeader header;
70 if (!filesystem->PRead(fd, &header, sizeof(BlockHeader), block_file_offset)) {
71 return absl_ports::InternalError("PRead block header error");
72 }
73
74 ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
75 serializer, header.posting_list_bytes, block_size));
76
77 return IndexBlock(filesystem, serializer, fd, block_file_offset, block_size,
78 header.posting_list_bytes);
79 }
80
81 /* static */ libtextclassifier3::StatusOr<IndexBlock>
CreateFromUninitializedRegion(const Filesystem * filesystem,PostingListSerializer * serializer,int fd,off_t block_file_offset,uint32_t block_size,uint32_t posting_list_bytes)82 IndexBlock::CreateFromUninitializedRegion(const Filesystem* filesystem,
83 PostingListSerializer* serializer,
84 int fd, off_t block_file_offset,
85 uint32_t block_size,
86 uint32_t posting_list_bytes) {
87 if (block_size < sizeof(BlockHeader)) {
88 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
89 "Provided block_size %d is too small to fit even the BlockHeader!",
90 block_size));
91 }
92
93 ICING_RETURN_IF_ERROR(
94 ValidatePostingListBytes(serializer, posting_list_bytes, block_size));
95 IndexBlock block(filesystem, serializer, fd, block_file_offset, block_size,
96 posting_list_bytes);
97 ICING_RETURN_IF_ERROR(block.Reset());
98
99 return block;
100 }
101
102 libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
GetAllocatedPostingList(PostingListIndex posting_list_index)103 IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
104 if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
105 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
106 "Cannot get posting list with index %d in IndexBlock with only %d "
107 "posting lists.",
108 posting_list_index, max_num_posting_lists()));
109 }
110
111 // Read out the header from disk.
112 ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
113
114 // Read out the allocated posting list from disk.
115 ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
116 ReadPostingList(posting_list_index));
117
118 ICING_ASSIGN_OR_RETURN(
119 PostingListUsed pl_used,
120 PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
121 serializer_, std::move(posting_list_buffer), posting_list_bytes_));
122 return PostingListAndBlockInfo(
123 std::move(pl_used), posting_list_index, header.next_block_index,
124 /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
125 kInvalidPostingListIndex);
126 }
127
128 libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
AllocatePostingList()129 IndexBlock::AllocatePostingList() {
130 // Read out the header from disk.
131 ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
132
133 if (header.free_list_posting_list_index == kInvalidPostingListIndex) {
134 return absl_ports::ResourceExhaustedError(
135 "No available posting lists to allocate.");
136 }
137
138 // Pull one off the free list.
139 PostingListIndex posting_list_index = header.free_list_posting_list_index;
140
141 // Read out the posting list from disk.
142 ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
143 ReadPostingList(posting_list_index));
144 // Step 1: get the next (chained) free posting list index and set it to block
145 // header.
146 ICING_ASSIGN_OR_RETURN(
147 PostingListFree pl_free,
148 PostingListFree::CreateFromPreexistingPostingListFreeRegion(
149 posting_list_buffer.get(), posting_list_bytes_,
150 serializer_->GetDataTypeBytes(),
151 serializer_->GetMinPostingListSize()));
152 header.free_list_posting_list_index = pl_free.get_next_posting_list_index();
153 if (header.free_list_posting_list_index != kInvalidPostingListIndex &&
154 header.free_list_posting_list_index >= max_num_posting_lists()) {
155 ICING_LOG(ERROR)
156 << "Free Posting List points to an invalid posting list index!";
157 header.free_list_posting_list_index = kInvalidPostingListIndex;
158 }
159
160 // Step 2: create PostingListUsed instance. The original content in the above
161 // posting_list_buffer is not important now because
162 // PostingListUsed::CreateFromUnitializedRegion will wipe it out, and
163 // we only need to sync it to disk after initializing.
164 ICING_ASSIGN_OR_RETURN(PostingListUsed pl_used,
165 PostingListUsed::CreateFromUnitializedRegion(
166 serializer_, posting_list_bytes_));
167
168 // Sync the initialized posting list (overwrite the original content of
169 // PostingListFree) and header to disk.
170 ICING_RETURN_IF_ERROR(
171 WritePostingList(posting_list_index, pl_used.posting_list_buffer()));
172 ICING_RETURN_IF_ERROR(WriteHeader(header));
173
174 return PostingListAndBlockInfo(
175 std::move(pl_used), posting_list_index, header.next_block_index,
176 /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
177 kInvalidPostingListIndex);
178 }
179
FreePostingList(PostingListIndex posting_list_index)180 libtextclassifier3::Status IndexBlock::FreePostingList(
181 PostingListIndex posting_list_index) {
182 if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
183 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
184 "Cannot free posting list with index %d in IndexBlock with only %d "
185 "posting lists.",
186 posting_list_index, max_num_posting_lists()));
187 }
188
189 ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
190 ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
191 ICING_RETURN_IF_ERROR(WriteHeader(header));
192 return libtextclassifier3::Status::OK;
193 }
194
WritePostingListToDisk(const PostingListUsed & posting_list_used,PostingListIndex posting_list_index)195 libtextclassifier3::Status IndexBlock::WritePostingListToDisk(
196 const PostingListUsed& posting_list_used,
197 PostingListIndex posting_list_index) {
198 if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
199 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
200 "Cannot write posting list with index %d in IndexBlock with only %d "
201 "posting lists.",
202 posting_list_index, max_num_posting_lists()));
203 }
204
205 if (posting_list_used.size_in_bytes() != posting_list_bytes_) {
206 return absl_ports::InvalidArgumentError(
207 "Cannot write posting list into a block with different posting list "
208 "bytes");
209 }
210
211 if (!posting_list_used.is_dirty()) {
212 return libtextclassifier3::Status::OK;
213 }
214
215 // Write the allocated posting list to disk.
216 return WritePostingList(posting_list_index,
217 posting_list_used.posting_list_buffer());
218 }
219
GetNextBlockIndex() const220 libtextclassifier3::StatusOr<uint32_t> IndexBlock::GetNextBlockIndex() const {
221 ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
222 return header.next_block_index;
223 }
224
SetNextBlockIndex(uint32_t next_block_index)225 libtextclassifier3::Status IndexBlock::SetNextBlockIndex(
226 uint32_t next_block_index) {
227 ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
228 header.next_block_index = next_block_index;
229 ICING_RETURN_IF_ERROR(WriteHeader(header));
230
231 return libtextclassifier3::Status::OK;
232 }
233
HasFreePostingLists() const234 libtextclassifier3::StatusOr<bool> IndexBlock::HasFreePostingLists() const {
235 ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
236 return header.free_list_posting_list_index != kInvalidPostingListIndex;
237 }
238
Reset()239 libtextclassifier3::Status IndexBlock::Reset() {
240 BlockHeader header;
241 header.free_list_posting_list_index = kInvalidPostingListIndex;
242 header.next_block_index = kInvalidBlockIndex;
243 header.posting_list_bytes = posting_list_bytes_;
244
245 // Starting with the last posting list, prepend each posting list to the free
246 // list. At the end, the beginning of the free list should be the first
247 // posting list.
248 for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
249 posting_list_index >= 0; --posting_list_index) {
250 // Adding the posting list at posting_list_index to the free list will
251 // modify both the posting list and also
252 // header.free_list_posting_list_index.
253 ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
254 }
255
256 // Sync the header to disk.
257 ICING_RETURN_IF_ERROR(WriteHeader(header));
258
259 return libtextclassifier3::Status::OK;
260 }
261
FreePostingListImpl(BlockHeader & header,PostingListIndex posting_list_index)262 libtextclassifier3::Status IndexBlock::FreePostingListImpl(
263 BlockHeader& header, PostingListIndex posting_list_index) {
264 // Read out the posting list from disk.
265 ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
266 ReadPostingList(posting_list_index));
267
268 ICING_ASSIGN_OR_RETURN(PostingListFree plfree,
269 PostingListFree::CreateFromUnitializedRegion(
270 posting_list_buffer.get(), posting_list_bytes(),
271 serializer_->GetDataTypeBytes(),
272 serializer_->GetMinPostingListSize()));
273
274 // Put at the head of the list.
275 plfree.set_next_posting_list_index(header.free_list_posting_list_index);
276 header.free_list_posting_list_index = posting_list_index;
277
278 // Sync the posting list to disk.
279 ICING_RETURN_IF_ERROR(
280 WritePostingList(posting_list_index, posting_list_buffer.get()));
281 return libtextclassifier3::Status::OK;
282 }
283
ReadHeader() const284 libtextclassifier3::StatusOr<IndexBlock::BlockHeader> IndexBlock::ReadHeader()
285 const {
286 BlockHeader header;
287 if (!filesystem_->PRead(fd_, &header, sizeof(BlockHeader),
288 block_file_offset_)) {
289 return absl_ports::InternalError(
290 absl_ports::StrCat("PRead block header error: ", strerror(errno)));
291 }
292 if (header.posting_list_bytes != posting_list_bytes_) {
293 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
294 "Inconsistent posting list bytes between block header (%d) and class "
295 "instance (%d)",
296 header.posting_list_bytes, posting_list_bytes_));
297 }
298 return header;
299 }
300
301 libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>>
ReadPostingList(PostingListIndex posting_list_index) const302 IndexBlock::ReadPostingList(PostingListIndex posting_list_index) const {
303 auto posting_list_buffer = std::make_unique<uint8_t[]>(posting_list_bytes_);
304 if (!filesystem_->PRead(fd_, posting_list_buffer.get(), posting_list_bytes_,
305 get_posting_list_file_offset(posting_list_index))) {
306 return absl_ports::InternalError(
307 absl_ports::StrCat("PRead posting list error: ", strerror(errno)));
308 }
309 return posting_list_buffer;
310 }
311
WriteHeader(const BlockHeader & header)312 libtextclassifier3::Status IndexBlock::WriteHeader(const BlockHeader& header) {
313 if (!filesystem_->PWrite(fd_, block_file_offset_, &header,
314 sizeof(BlockHeader))) {
315 return absl_ports::InternalError(
316 absl_ports::StrCat("PWrite block header error: ", strerror(errno)));
317 }
318 return libtextclassifier3::Status::OK;
319 }
320
WritePostingList(PostingListIndex posting_list_index,const uint8_t * posting_list_buffer)321 libtextclassifier3::Status IndexBlock::WritePostingList(
322 PostingListIndex posting_list_index, const uint8_t* posting_list_buffer) {
323 if (!filesystem_->PWrite(fd_,
324 get_posting_list_file_offset(posting_list_index),
325 posting_list_buffer, posting_list_bytes_)) {
326 return absl_ports::InternalError(
327 absl_ports::StrCat("PWrite posting list error: ", strerror(errno)));
328 }
329 return libtextclassifier3::Status::OK;
330 }
331
332 } // namespace lib
333 } // namespace icing
334