• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/posting_list/index-block.h"
16 
17 #include <sys/types.h>
18 
19 #include <cstdint>
20 #include <memory>
21 
22 #include "icing/text_classifier/lib3/utils/base/status.h"
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/file/posting_list/posting-list-common.h"
27 #include "icing/file/posting_list/posting-list-free.h"
28 #include "icing/file/posting_list/posting-list-used.h"
29 #include "icing/file/posting_list/posting-list-utils.h"
30 #include "icing/legacy/core/icing-string-util.h"
31 #include "icing/util/logging.h"
32 #include "icing/util/status-macros.h"
33 
34 namespace icing {
35 namespace lib {
36 
37 namespace {
38 
ValidatePostingListBytes(PostingListSerializer * serializer,uint32_t posting_list_bytes,uint32_t block_size)39 libtextclassifier3::Status ValidatePostingListBytes(
40     PostingListSerializer* serializer, uint32_t posting_list_bytes,
41     uint32_t block_size) {
42   if (posting_list_bytes > IndexBlock::CalculateMaxPostingListBytes(
43                                block_size, serializer->GetDataTypeBytes()) ||
44       !posting_list_utils::IsValidPostingListSize(
45           posting_list_bytes, serializer->GetDataTypeBytes(),
46           serializer->GetMinPostingListSize())) {
47     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
48         "Requested posting list size %d is illegal for a flash block with max "
49         "posting list size of %d",
50         posting_list_bytes,
51         IndexBlock::CalculateMaxPostingListBytes(
52             block_size, serializer->GetDataTypeBytes())));
53   }
54   return libtextclassifier3::Status::OK;
55 }
56 
57 }  // namespace
58 
59 /* static */ libtextclassifier3::StatusOr<IndexBlock>
CreateFromPreexistingIndexBlockRegion(const Filesystem * filesystem,PostingListSerializer * serializer,int fd,off_t block_file_offset,uint32_t block_size)60 IndexBlock::CreateFromPreexistingIndexBlockRegion(
61     const Filesystem* filesystem, PostingListSerializer* serializer, int fd,
62     off_t block_file_offset, uint32_t block_size) {
63   if (block_size < sizeof(BlockHeader)) {
64     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
65         "Provided block_size %d is too small to fit even the BlockHeader!",
66         block_size));
67   }
68 
69   BlockHeader header;
70   if (!filesystem->PRead(fd, &header, sizeof(BlockHeader), block_file_offset)) {
71     return absl_ports::InternalError("PRead block header error");
72   }
73 
74   ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
75       serializer, header.posting_list_bytes, block_size));
76 
77   return IndexBlock(filesystem, serializer, fd, block_file_offset, block_size,
78                     header.posting_list_bytes);
79 }
80 
81 /* static */ libtextclassifier3::StatusOr<IndexBlock>
CreateFromUninitializedRegion(const Filesystem * filesystem,PostingListSerializer * serializer,int fd,off_t block_file_offset,uint32_t block_size,uint32_t posting_list_bytes)82 IndexBlock::CreateFromUninitializedRegion(const Filesystem* filesystem,
83                                           PostingListSerializer* serializer,
84                                           int fd, off_t block_file_offset,
85                                           uint32_t block_size,
86                                           uint32_t posting_list_bytes) {
87   if (block_size < sizeof(BlockHeader)) {
88     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
89         "Provided block_size %d is too small to fit even the BlockHeader!",
90         block_size));
91   }
92 
93   ICING_RETURN_IF_ERROR(
94       ValidatePostingListBytes(serializer, posting_list_bytes, block_size));
95   IndexBlock block(filesystem, serializer, fd, block_file_offset, block_size,
96                    posting_list_bytes);
97   ICING_RETURN_IF_ERROR(block.Reset());
98 
99   return block;
100 }
101 
102 libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
GetAllocatedPostingList(PostingListIndex posting_list_index)103 IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
104   if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
105     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
106         "Cannot get posting list with index %d in IndexBlock with only %d "
107         "posting lists.",
108         posting_list_index, max_num_posting_lists()));
109   }
110 
111   // Read out the header from disk.
112   ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
113 
114   // Read out the allocated posting list from disk.
115   ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
116                          ReadPostingList(posting_list_index));
117 
118   ICING_ASSIGN_OR_RETURN(
119       PostingListUsed pl_used,
120       PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
121           serializer_, std::move(posting_list_buffer), posting_list_bytes_));
122   return PostingListAndBlockInfo(
123       std::move(pl_used), posting_list_index, header.next_block_index,
124       /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
125           kInvalidPostingListIndex);
126 }
127 
128 libtextclassifier3::StatusOr<IndexBlock::PostingListAndBlockInfo>
AllocatePostingList()129 IndexBlock::AllocatePostingList() {
130   // Read out the header from disk.
131   ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
132 
133   if (header.free_list_posting_list_index == kInvalidPostingListIndex) {
134     return absl_ports::ResourceExhaustedError(
135         "No available posting lists to allocate.");
136   }
137 
138   // Pull one off the free list.
139   PostingListIndex posting_list_index = header.free_list_posting_list_index;
140 
141   // Read out the posting list from disk.
142   ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
143                          ReadPostingList(posting_list_index));
144   // Step 1: get the next (chained) free posting list index and set it to block
145   //         header.
146   ICING_ASSIGN_OR_RETURN(
147       PostingListFree pl_free,
148       PostingListFree::CreateFromPreexistingPostingListFreeRegion(
149           posting_list_buffer.get(), posting_list_bytes_,
150           serializer_->GetDataTypeBytes(),
151           serializer_->GetMinPostingListSize()));
152   header.free_list_posting_list_index = pl_free.get_next_posting_list_index();
153   if (header.free_list_posting_list_index != kInvalidPostingListIndex &&
154       header.free_list_posting_list_index >= max_num_posting_lists()) {
155     ICING_LOG(ERROR)
156         << "Free Posting List points to an invalid posting list index!";
157     header.free_list_posting_list_index = kInvalidPostingListIndex;
158   }
159 
160   // Step 2: create PostingListUsed instance. The original content in the above
161   //         posting_list_buffer is not important now because
162   //         PostingListUsed::CreateFromUnitializedRegion will wipe it out, and
163   //         we only need to sync it to disk after initializing.
164   ICING_ASSIGN_OR_RETURN(PostingListUsed pl_used,
165                          PostingListUsed::CreateFromUnitializedRegion(
166                              serializer_, posting_list_bytes_));
167 
168   // Sync the initialized posting list (overwrite the original content of
169   // PostingListFree) and header to disk.
170   ICING_RETURN_IF_ERROR(
171       WritePostingList(posting_list_index, pl_used.posting_list_buffer()));
172   ICING_RETURN_IF_ERROR(WriteHeader(header));
173 
174   return PostingListAndBlockInfo(
175       std::move(pl_used), posting_list_index, header.next_block_index,
176       /*has_free_posting_lists_in=*/header.free_list_posting_list_index !=
177           kInvalidPostingListIndex);
178 }
179 
FreePostingList(PostingListIndex posting_list_index)180 libtextclassifier3::Status IndexBlock::FreePostingList(
181     PostingListIndex posting_list_index) {
182   if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
183     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
184         "Cannot free posting list with index %d in IndexBlock with only %d "
185         "posting lists.",
186         posting_list_index, max_num_posting_lists()));
187   }
188 
189   ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
190   ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
191   ICING_RETURN_IF_ERROR(WriteHeader(header));
192   return libtextclassifier3::Status::OK;
193 }
194 
WritePostingListToDisk(const PostingListUsed & posting_list_used,PostingListIndex posting_list_index)195 libtextclassifier3::Status IndexBlock::WritePostingListToDisk(
196     const PostingListUsed& posting_list_used,
197     PostingListIndex posting_list_index) {
198   if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
199     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
200         "Cannot write posting list with index %d in IndexBlock with only %d "
201         "posting lists.",
202         posting_list_index, max_num_posting_lists()));
203   }
204 
205   if (posting_list_used.size_in_bytes() != posting_list_bytes_) {
206     return absl_ports::InvalidArgumentError(
207         "Cannot write posting list into a block with different posting list "
208         "bytes");
209   }
210 
211   if (!posting_list_used.is_dirty()) {
212     return libtextclassifier3::Status::OK;
213   }
214 
215   // Write the allocated posting list to disk.
216   return WritePostingList(posting_list_index,
217                           posting_list_used.posting_list_buffer());
218 }
219 
GetNextBlockIndex() const220 libtextclassifier3::StatusOr<uint32_t> IndexBlock::GetNextBlockIndex() const {
221   ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
222   return header.next_block_index;
223 }
224 
SetNextBlockIndex(uint32_t next_block_index)225 libtextclassifier3::Status IndexBlock::SetNextBlockIndex(
226     uint32_t next_block_index) {
227   ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
228   header.next_block_index = next_block_index;
229   ICING_RETURN_IF_ERROR(WriteHeader(header));
230 
231   return libtextclassifier3::Status::OK;
232 }
233 
HasFreePostingLists() const234 libtextclassifier3::StatusOr<bool> IndexBlock::HasFreePostingLists() const {
235   ICING_ASSIGN_OR_RETURN(BlockHeader header, ReadHeader());
236   return header.free_list_posting_list_index != kInvalidPostingListIndex;
237 }
238 
Reset()239 libtextclassifier3::Status IndexBlock::Reset() {
240   BlockHeader header;
241   header.free_list_posting_list_index = kInvalidPostingListIndex;
242   header.next_block_index = kInvalidBlockIndex;
243   header.posting_list_bytes = posting_list_bytes_;
244 
245   // Starting with the last posting list, prepend each posting list to the free
246   // list. At the end, the beginning of the free list should be the first
247   // posting list.
248   for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
249        posting_list_index >= 0; --posting_list_index) {
250     // Adding the posting list at posting_list_index to the free list will
251     // modify both the posting list and also
252     // header.free_list_posting_list_index.
253     ICING_RETURN_IF_ERROR(FreePostingListImpl(header, posting_list_index));
254   }
255 
256   // Sync the header to disk.
257   ICING_RETURN_IF_ERROR(WriteHeader(header));
258 
259   return libtextclassifier3::Status::OK;
260 }
261 
FreePostingListImpl(BlockHeader & header,PostingListIndex posting_list_index)262 libtextclassifier3::Status IndexBlock::FreePostingListImpl(
263     BlockHeader& header, PostingListIndex posting_list_index) {
264   // Read out the posting list from disk.
265   ICING_ASSIGN_OR_RETURN(std::unique_ptr<uint8_t[]> posting_list_buffer,
266                          ReadPostingList(posting_list_index));
267 
268   ICING_ASSIGN_OR_RETURN(PostingListFree plfree,
269                          PostingListFree::CreateFromUnitializedRegion(
270                              posting_list_buffer.get(), posting_list_bytes(),
271                              serializer_->GetDataTypeBytes(),
272                              serializer_->GetMinPostingListSize()));
273 
274   // Put at the head of the list.
275   plfree.set_next_posting_list_index(header.free_list_posting_list_index);
276   header.free_list_posting_list_index = posting_list_index;
277 
278   // Sync the posting list to disk.
279   ICING_RETURN_IF_ERROR(
280       WritePostingList(posting_list_index, posting_list_buffer.get()));
281   return libtextclassifier3::Status::OK;
282 }
283 
ReadHeader() const284 libtextclassifier3::StatusOr<IndexBlock::BlockHeader> IndexBlock::ReadHeader()
285     const {
286   BlockHeader header;
287   if (!filesystem_->PRead(fd_, &header, sizeof(BlockHeader),
288                           block_file_offset_)) {
289     return absl_ports::InternalError(
290         absl_ports::StrCat("PRead block header error: ", strerror(errno)));
291   }
292   if (header.posting_list_bytes != posting_list_bytes_) {
293     return absl_ports::InternalError(IcingStringUtil::StringPrintf(
294         "Inconsistent posting list bytes between block header (%d) and class "
295         "instance (%d)",
296         header.posting_list_bytes, posting_list_bytes_));
297   }
298   return header;
299 }
300 
301 libtextclassifier3::StatusOr<std::unique_ptr<uint8_t[]>>
ReadPostingList(PostingListIndex posting_list_index) const302 IndexBlock::ReadPostingList(PostingListIndex posting_list_index) const {
303   auto posting_list_buffer = std::make_unique<uint8_t[]>(posting_list_bytes_);
304   if (!filesystem_->PRead(fd_, posting_list_buffer.get(), posting_list_bytes_,
305                           get_posting_list_file_offset(posting_list_index))) {
306     return absl_ports::InternalError(
307         absl_ports::StrCat("PRead posting list error: ", strerror(errno)));
308   }
309   return posting_list_buffer;
310 }
311 
WriteHeader(const BlockHeader & header)312 libtextclassifier3::Status IndexBlock::WriteHeader(const BlockHeader& header) {
313   if (!filesystem_->PWrite(fd_, block_file_offset_, &header,
314                            sizeof(BlockHeader))) {
315     return absl_ports::InternalError(
316         absl_ports::StrCat("PWrite block header error: ", strerror(errno)));
317   }
318   return libtextclassifier3::Status::OK;
319 }
320 
WritePostingList(PostingListIndex posting_list_index,const uint8_t * posting_list_buffer)321 libtextclassifier3::Status IndexBlock::WritePostingList(
322     PostingListIndex posting_list_index, const uint8_t* posting_list_buffer) {
323   if (!filesystem_->PWrite(fd_,
324                            get_posting_list_file_offset(posting_list_index),
325                            posting_list_buffer, posting_list_bytes_)) {
326     return absl_ports::InternalError(
327         absl_ports::StrCat("PWrite posting list error: ", strerror(errno)));
328   }
329   return libtextclassifier3::Status::OK;
330 }
331 
332 }  // namespace lib
333 }  // namespace icing
334