1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/embed/posting-list-embedding-hit-accessor.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <utility>
20 #include <vector>
21
22 #include "icing/text_classifier/lib3/utils/base/status.h"
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/file/posting_list/flash-index-storage.h"
26 #include "icing/file/posting_list/posting-list-common.h"
27 #include "icing/file/posting_list/posting-list-identifier.h"
28 #include "icing/file/posting_list/posting-list-used.h"
29 #include "icing/index/embed/embedding-hit.h"
30 #include "icing/index/embed/posting-list-embedding-hit-serializer.h"
31 #include "icing/legacy/index/icing-bit-util.h"
32 #include "icing/util/status-macros.h"
33
34 namespace icing {
35 namespace lib {
36
37 libtextclassifier3::StatusOr<std::unique_ptr<PostingListEmbeddingHitAccessor>>
Create(FlashIndexStorage * storage,PostingListEmbeddingHitSerializer * serializer)38 PostingListEmbeddingHitAccessor::Create(
39 FlashIndexStorage *storage, PostingListEmbeddingHitSerializer *serializer) {
40 uint32_t max_posting_list_bytes = storage->max_posting_list_bytes();
41 ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
42 PostingListUsed::CreateFromUnitializedRegion(
43 serializer, max_posting_list_bytes));
44 return std::unique_ptr<PostingListEmbeddingHitAccessor>(
45 new PostingListEmbeddingHitAccessor(storage, serializer,
46 std::move(in_memory_posting_list)));
47 }
48
49 libtextclassifier3::StatusOr<std::unique_ptr<PostingListEmbeddingHitAccessor>>
CreateFromExisting(FlashIndexStorage * storage,PostingListEmbeddingHitSerializer * serializer,PostingListIdentifier existing_posting_list_id)50 PostingListEmbeddingHitAccessor::CreateFromExisting(
51 FlashIndexStorage *storage, PostingListEmbeddingHitSerializer *serializer,
52 PostingListIdentifier existing_posting_list_id) {
53 // Our in_memory_posting_list_ will start as empty.
54 ICING_ASSIGN_OR_RETURN(
55 std::unique_ptr<PostingListEmbeddingHitAccessor> pl_accessor,
56 Create(storage, serializer));
57 ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
58 storage->GetPostingList(existing_posting_list_id));
59 pl_accessor->preexisting_posting_list_ =
60 std::make_unique<PostingListHolder>(std::move(holder));
61 return pl_accessor;
62 }
63
64 // Returns the next batch of hits for the provided posting list.
65 libtextclassifier3::StatusOr<std::vector<EmbeddingHit>>
GetNextHitsBatch()66 PostingListEmbeddingHitAccessor::GetNextHitsBatch() {
67 if (preexisting_posting_list_ == nullptr) {
68 if (has_reached_posting_list_chain_end_) {
69 return std::vector<EmbeddingHit>();
70 }
71 return absl_ports::FailedPreconditionError(
72 "Cannot retrieve hits from a PostingListEmbeddingHitAccessor that was "
73 "not created from a preexisting posting list.");
74 }
75 ICING_ASSIGN_OR_RETURN(
76 std::vector<EmbeddingHit> batch,
77 serializer_->GetHits(&preexisting_posting_list_->posting_list));
78 uint32_t next_block_index = kInvalidBlockIndex;
79 // Posting lists will only be chained when they are max-sized, in which case
80 // next_block_index will point to the next block for the next posting list.
81 // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
82 // to the next free list block, which is not relevant here.
83 if (preexisting_posting_list_->posting_list.size_in_bytes() ==
84 storage_->max_posting_list_bytes()) {
85 next_block_index = preexisting_posting_list_->next_block_index;
86 }
87
88 if (next_block_index != kInvalidBlockIndex) {
89 // Since we only have to deal with next block for max-sized posting list
90 // block, max_num_posting_lists is 1 and posting_list_index_bits is
91 // BitsToStore(1).
92 PostingListIdentifier next_posting_list_id(
93 next_block_index, /*posting_list_index=*/0,
94 /*posting_list_index_bits=*/BitsToStore(1));
95 ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
96 storage_->GetPostingList(next_posting_list_id));
97 preexisting_posting_list_ =
98 std::make_unique<PostingListHolder>(std::move(holder));
99 } else {
100 has_reached_posting_list_chain_end_ = true;
101 preexisting_posting_list_.reset();
102 }
103 return batch;
104 }
105
PrependHit(const EmbeddingHit & hit)106 libtextclassifier3::Status PostingListEmbeddingHitAccessor::PrependHit(
107 const EmbeddingHit &hit) {
108 PostingListUsed &active_pl = (preexisting_posting_list_ != nullptr)
109 ? preexisting_posting_list_->posting_list
110 : in_memory_posting_list_;
111 libtextclassifier3::Status status = serializer_->PrependHit(&active_pl, hit);
112 if (!absl_ports::IsResourceExhausted(status)) {
113 return status;
114 }
115 // There is no more room to add hits to this current posting list! Therefore,
116 // we need to either move those hits to a larger posting list or flush this
117 // posting list and create another max-sized posting list in the chain.
118 if (preexisting_posting_list_ != nullptr) {
119 ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
120 } else {
121 ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
122 }
123
124 // Re-add hit. Should always fit since we just cleared
125 // in_memory_posting_list_. It's fine to explicitly reference
126 // in_memory_posting_list_ here because there's no way of reaching this line
127 // while preexisting_posting_list_ is still in use.
128 return serializer_->PrependHit(&in_memory_posting_list_, hit);
129 }
130
131 } // namespace lib
132 } // namespace icing
133