1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/main/index-block.h"
16
17 #include <algorithm>
18 #include <cinttypes>
19 #include <limits>
20
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "icing/absl_ports/canonical_errors.h"
23 #include "icing/file/memory-mapped-file.h"
24 #include "icing/index/main/posting-list-free.h"
25 #include "icing/index/main/posting-list-utils.h"
26 #include "icing/legacy/core/icing-string-util.h"
27 #include "icing/util/math-util.h"
28 #include "icing/util/status-macros.h"
29
30 namespace icing {
31 namespace lib {
32
33 namespace {
34
ValidatePostingListBytes(uint32_t posting_list_bytes,uint32_t block_size)35 libtextclassifier3::Status ValidatePostingListBytes(uint32_t posting_list_bytes,
36 uint32_t block_size) {
37 if (posting_list_bytes >
38 IndexBlock::CalculateMaxPostingListBytes(block_size) ||
39 !posting_list_utils::IsValidPostingListSize(posting_list_bytes)) {
40 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
41 "Requested posting list size %d is illegal for a flash block with max "
42 "posting list size of %d",
43 posting_list_bytes,
44 IndexBlock::CalculateMaxPostingListBytes(block_size)));
45 }
46 return libtextclassifier3::Status::OK;
47 }
48
49 } // namespace
50
ApproximateFullPostingListHitsForBlock(uint32_t block_size,int posting_list_index_bits)51 uint32_t IndexBlock::ApproximateFullPostingListHitsForBlock(
52 uint32_t block_size, int posting_list_index_bits) {
53 // Assume 50% compressed and most don't have term frequencies.
54 uint32_t bytes_per_hit = sizeof(Hit::Value) / 2;
55 return (block_size - sizeof(BlockHeader)) /
56 ((1u << posting_list_index_bits) * bytes_per_hit);
57 }
58
59 libtextclassifier3::StatusOr<IndexBlock>
CreateFromPreexistingIndexBlockRegion(const Filesystem & filesystem,std::string_view file_path,off_t offset,uint32_t block_size)60 IndexBlock::CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem,
61 std::string_view file_path,
62 off_t offset,
63 uint32_t block_size) {
64 if (block_size < sizeof(BlockHeader)) {
65 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
66 "Provided block_size %d is too small to fit even the BlockHeader!",
67 block_size));
68 }
69 MemoryMappedFile mmapped_file(
70 filesystem, file_path, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
71 ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
72 IndexBlock block(std::move(mmapped_file));
73 ICING_RETURN_IF_ERROR(
74 ValidatePostingListBytes(block.get_posting_list_bytes(), block_size));
75 return block;
76 }
77
78 libtextclassifier3::StatusOr<IndexBlock>
CreateFromUninitializedRegion(const Filesystem & filesystem,std::string_view file_path,off_t offset,uint32_t block_size,uint32_t posting_list_bytes)79 IndexBlock::CreateFromUninitializedRegion(const Filesystem& filesystem,
80 std::string_view file_path,
81 off_t offset, uint32_t block_size,
82 uint32_t posting_list_bytes) {
83 if (block_size < sizeof(BlockHeader)) {
84 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
85 "Provided block_size %d is too small to fit even the BlockHeader!",
86 block_size));
87 }
88 ICING_RETURN_IF_ERROR(
89 ValidatePostingListBytes(posting_list_bytes, block_size));
90 MemoryMappedFile mmapped_file(
91 filesystem, file_path, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
92 ICING_RETURN_IF_ERROR(mmapped_file.Remap(offset, block_size));
93 IndexBlock block(std::move(mmapped_file));
94 // Safe to ignore the return value of Reset. Reset returns an error if
95 // posting_list_bytes is invalid, but this function ensures that
96 // posting_list_bytes is valid thanks to the call to ValidatePostingListBytes
97 // above.
98 block.Reset(posting_list_bytes);
99 return block;
100 }
101
IndexBlock(MemoryMappedFile mmapped_block)102 IndexBlock::IndexBlock(MemoryMappedFile mmapped_block)
103 : header_(reinterpret_cast<BlockHeader*>(mmapped_block.mutable_region())),
104 posting_lists_start_ptr_(mmapped_block.mutable_region() +
105 sizeof(BlockHeader)),
106 block_size_in_bytes_(mmapped_block.region_size()),
107 mmapped_block_(
108 std::make_unique<MemoryMappedFile>(std::move(mmapped_block))) {}
109
Reset(int posting_list_bytes)110 libtextclassifier3::Status IndexBlock::Reset(int posting_list_bytes) {
111 ICING_RETURN_IF_ERROR(ValidatePostingListBytes(
112 posting_list_bytes, mmapped_block_->region_size()));
113 header_->free_list_posting_list_index = kInvalidPostingListIndex;
114 header_->next_block_index = kInvalidBlockIndex;
115 header_->posting_list_bytes = posting_list_bytes;
116
117 // Starting with the last posting list, prepend each posting list to the free
118 // list. At the end, the beginning of the free list should be the first
119 // posting list.
120 for (PostingListIndex posting_list_index = max_num_posting_lists() - 1;
121 posting_list_index >= 0; --posting_list_index) {
122 // Adding the posting list at posting_list_index to the free list will
123 // modify both the posting list and also
124 // header_->free_list_posting_list_index.
125 FreePostingList(posting_list_index);
126 }
127 return libtextclassifier3::Status::OK;
128 }
129
130 libtextclassifier3::StatusOr<PostingListUsed>
GetAllocatedPostingList(PostingListIndex posting_list_index)131 IndexBlock::GetAllocatedPostingList(PostingListIndex posting_list_index) {
132 if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
133 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
134 "Cannot get posting list with index %d in IndexBlock with only %d "
135 "posting lists.",
136 posting_list_index, max_num_posting_lists()));
137 }
138 return PostingListUsed::CreateFromPreexistingPostingListUsedRegion(
139 get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
140 }
141
142 libtextclassifier3::StatusOr<PostingListIndex>
AllocatePostingList()143 IndexBlock::AllocatePostingList() {
144 if (!has_free_posting_lists()) {
145 return absl_ports::ResourceExhaustedError(
146 "No available posting lists to allocate.");
147 }
148
149 // Pull one off the free list.
150 PostingListIndex posting_list_index = header_->free_list_posting_list_index;
151
152 // We know at this point that posting_list_bytes will return a valid pl size
153 // (because an already initialized IndexBlock instance can't have an invalid
154 // posting_list_bytes). So CreateFromPreexistingPostingListFreeRegion will
155 // always return OK and ValueOrDie is safe to call.
156 auto posting_list_or =
157 PostingListFree::CreateFromPreexistingPostingListFreeRegion(
158 get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
159 PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
160
161 header_->free_list_posting_list_index = plfree.get_next_posting_list_index();
162 if (header_->free_list_posting_list_index != kInvalidPostingListIndex &&
163 header_->free_list_posting_list_index >= max_num_posting_lists()) {
164 ICING_LOG(ERROR)
165 << "Free Posting List points to an invalid posting list index!";
166 header_->free_list_posting_list_index = kInvalidPostingListIndex;
167 }
168
169 // Make it a used posting list.
170 PostingListUsed::CreateFromUnitializedRegion(
171 get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
172 return posting_list_index;
173 }
174
FreePostingList(PostingListIndex posting_list_index)175 void IndexBlock::FreePostingList(PostingListIndex posting_list_index) {
176 if (posting_list_index >= max_num_posting_lists() || posting_list_index < 0) {
177 ICING_LOG(ERROR) << "Cannot free posting list with index "
178 << posting_list_index << " in IndexBlock with only "
179 << max_num_posting_lists() << " posting lists.";
180 return;
181 }
182
183 // We know at this point that posting_list_bytes will return a valid pl size.
184 // So CreateFromUninitializedRegion will always return OK and ValueOrDie is
185 // safe to call.
186 auto posting_list_or = PostingListFree::CreateFromUnitializedRegion(
187 get_posting_list_ptr(posting_list_index), get_posting_list_bytes());
188 PostingListFree plfree = std::move(posting_list_or).ValueOrDie();
189
190 // Put at the head of the list.
191 plfree.set_next_posting_list_index(header_->free_list_posting_list_index);
192 header_->free_list_posting_list_index = posting_list_index;
193 }
194
get_posting_list_ptr(PostingListIndex posting_list_index)195 char* IndexBlock::get_posting_list_ptr(PostingListIndex posting_list_index) {
196 return posting_lists_start_ptr_ +
197 get_posting_list_bytes() * posting_list_index;
198 }
199
200 } // namespace lib
201 } // namespace icing
202