• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/embed/posting-list-embedding-hit-accessor.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <utility>
20 #include <vector>
21 
22 #include "icing/text_classifier/lib3/utils/base/status.h"
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/file/posting_list/flash-index-storage.h"
26 #include "icing/file/posting_list/posting-list-common.h"
27 #include "icing/file/posting_list/posting-list-identifier.h"
28 #include "icing/file/posting_list/posting-list-used.h"
29 #include "icing/index/embed/embedding-hit.h"
30 #include "icing/index/embed/posting-list-embedding-hit-serializer.h"
31 #include "icing/legacy/index/icing-bit-util.h"
32 #include "icing/util/status-macros.h"
33 
34 namespace icing {
35 namespace lib {
36 
37 libtextclassifier3::StatusOr<std::unique_ptr<PostingListEmbeddingHitAccessor>>
Create(FlashIndexStorage * storage,PostingListEmbeddingHitSerializer * serializer)38 PostingListEmbeddingHitAccessor::Create(
39     FlashIndexStorage *storage, PostingListEmbeddingHitSerializer *serializer) {
40   uint32_t max_posting_list_bytes = storage->max_posting_list_bytes();
41   ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
42                          PostingListUsed::CreateFromUnitializedRegion(
43                              serializer, max_posting_list_bytes));
44   return std::unique_ptr<PostingListEmbeddingHitAccessor>(
45       new PostingListEmbeddingHitAccessor(storage, serializer,
46                                           std::move(in_memory_posting_list)));
47 }
48 
49 libtextclassifier3::StatusOr<std::unique_ptr<PostingListEmbeddingHitAccessor>>
CreateFromExisting(FlashIndexStorage * storage,PostingListEmbeddingHitSerializer * serializer,PostingListIdentifier existing_posting_list_id)50 PostingListEmbeddingHitAccessor::CreateFromExisting(
51     FlashIndexStorage *storage, PostingListEmbeddingHitSerializer *serializer,
52     PostingListIdentifier existing_posting_list_id) {
53   // Our in_memory_posting_list_ will start as empty.
54   ICING_ASSIGN_OR_RETURN(
55       std::unique_ptr<PostingListEmbeddingHitAccessor> pl_accessor,
56       Create(storage, serializer));
57   ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
58                          storage->GetPostingList(existing_posting_list_id));
59   pl_accessor->preexisting_posting_list_ =
60       std::make_unique<PostingListHolder>(std::move(holder));
61   return pl_accessor;
62 }
63 
64 // Returns the next batch of hits for the provided posting list.
65 libtextclassifier3::StatusOr<std::vector<EmbeddingHit>>
GetNextHitsBatch()66 PostingListEmbeddingHitAccessor::GetNextHitsBatch() {
67   if (preexisting_posting_list_ == nullptr) {
68     if (has_reached_posting_list_chain_end_) {
69       return std::vector<EmbeddingHit>();
70     }
71     return absl_ports::FailedPreconditionError(
72         "Cannot retrieve hits from a PostingListEmbeddingHitAccessor that was "
73         "not created from a preexisting posting list.");
74   }
75   ICING_ASSIGN_OR_RETURN(
76       std::vector<EmbeddingHit> batch,
77       serializer_->GetHits(&preexisting_posting_list_->posting_list));
78   uint32_t next_block_index = kInvalidBlockIndex;
79   // Posting lists will only be chained when they are max-sized, in which case
80   // next_block_index will point to the next block for the next posting list.
81   // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
82   // to the next free list block, which is not relevant here.
83   if (preexisting_posting_list_->posting_list.size_in_bytes() ==
84       storage_->max_posting_list_bytes()) {
85     next_block_index = preexisting_posting_list_->next_block_index;
86   }
87 
88   if (next_block_index != kInvalidBlockIndex) {
89     // Since we only have to deal with next block for max-sized posting list
90     // block, max_num_posting_lists is 1 and posting_list_index_bits is
91     // BitsToStore(1).
92     PostingListIdentifier next_posting_list_id(
93         next_block_index, /*posting_list_index=*/0,
94         /*posting_list_index_bits=*/BitsToStore(1));
95     ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
96                            storage_->GetPostingList(next_posting_list_id));
97     preexisting_posting_list_ =
98         std::make_unique<PostingListHolder>(std::move(holder));
99   } else {
100     has_reached_posting_list_chain_end_ = true;
101     preexisting_posting_list_.reset();
102   }
103   return batch;
104 }
105 
PrependHit(const EmbeddingHit & hit)106 libtextclassifier3::Status PostingListEmbeddingHitAccessor::PrependHit(
107     const EmbeddingHit &hit) {
108   PostingListUsed &active_pl = (preexisting_posting_list_ != nullptr)
109                                    ? preexisting_posting_list_->posting_list
110                                    : in_memory_posting_list_;
111   libtextclassifier3::Status status = serializer_->PrependHit(&active_pl, hit);
112   if (!absl_ports::IsResourceExhausted(status)) {
113     return status;
114   }
115   // There is no more room to add hits to this current posting list! Therefore,
116   // we need to either move those hits to a larger posting list or flush this
117   // posting list and create another max-sized posting list in the chain.
118   if (preexisting_posting_list_ != nullptr) {
119     ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
120   } else {
121     ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
122   }
123 
124   // Re-add hit. Should always fit since we just cleared
125   // in_memory_posting_list_. It's fine to explicitly reference
126   // in_memory_posting_list_ here because there's no way of reaching this line
127   // while preexisting_posting_list_ is still in use.
128   return serializer_->PrependHit(&in_memory_posting_list_, hit);
129 }
130 
131 }  // namespace lib
132 }  // namespace icing
133