1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
16
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include <vector>
23
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/absl_ports/canonical_errors.h"
26 #include "icing/absl_ports/str_cat.h"
27 #include "icing/index/hit/doc-hit-info.h"
28 #include "icing/index/iterator/doc-hit-info-iterator.h"
29 #include "icing/schema/section.h"
30 #include "icing/store/document-id.h"
31 #include "icing/util/status-macros.h"
32
33 namespace icing {
34 namespace lib {
35
36 namespace {
37
38 // When combining ANDed iterators, n-ary operator has better performance when
39 // number of operands > 3 according to benchmark cl/243720660
40 inline constexpr int kBinaryAndIteratorPerformanceThreshold = 3;
41
42 // The minimum number of iterators needed to construct a And iterator. The And
43 // constructor currently takes 2 iterators.
44 inline constexpr int kMinBinaryIterators = 2;
45
46 } // namespace
47
CreateAndIterator(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)48 std::unique_ptr<DocHitInfoIterator> CreateAndIterator(
49 std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
50 if (iterators.size() == 1) {
51 return std::move(iterators.at(0));
52 }
53
54 std::unique_ptr<DocHitInfoIterator> iterator;
55 if (iterators.size() <= kBinaryAndIteratorPerformanceThreshold &&
56 iterators.size() >= kMinBinaryIterators) {
57 // Accumulate the iterators that need to be ANDed together.
58 iterator = std::move(iterators.at(iterators.size() - 1));
59 for (int i = iterators.size() - 2; i >= 0; --i) {
60 std::unique_ptr<DocHitInfoIterator> temp_iterator = std::move(iterator);
61 iterator = std::make_unique<DocHitInfoIteratorAnd>(
62 /*short_it=*/std::move(iterators[i]),
63 /*long_it=*/std::move(temp_iterator));
64 }
65 } else {
66 // If the vector is too small, the AndNary iterator can handle it and return
67 // an error on the Advance call
68 iterator =
69 std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
70 }
71
72 return iterator;
73 }
74
DocHitInfoIteratorAnd(std::unique_ptr<DocHitInfoIterator> short_it,std::unique_ptr<DocHitInfoIterator> long_it)75 DocHitInfoIteratorAnd::DocHitInfoIteratorAnd(
76 std::unique_ptr<DocHitInfoIterator> short_it,
77 std::unique_ptr<DocHitInfoIterator> long_it)
78 : short_(std::move(short_it)), long_(std::move(long_it)) {}
79
Advance()80 libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
81 // Advance on short first
82 if (!short_->Advance().ok()) {
83 // Didn't find anything for the first iterator, reset to invalid values and
84 // return.
85 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
86 hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
87 return absl_ports::ResourceExhaustedError(
88 "No more DocHitInfos in iterator");
89 }
90 DocumentId short_doc_id = short_->doc_hit_info().document_id();
91
92 // Then AdvanceTo on long
93 ICING_ASSIGN_OR_RETURN(DocumentId long_doc_id,
94 AdvanceTo(long_.get(), short_doc_id));
95
96 // Now try to align DocHitInfos by moving one or the other.
97 while (short_doc_id != long_doc_id) {
98 if (short_doc_id > long_doc_id) {
99 ICING_ASSIGN_OR_RETURN(short_doc_id,
100 AdvanceTo(short_.get(), long_doc_id));
101 } else {
102 ICING_ASSIGN_OR_RETURN(long_doc_id, AdvanceTo(long_.get(), short_doc_id));
103 }
104 }
105
106 // Guaranteed that short_doc_id and long_doc_id match now
107 doc_hit_info_ = short_->doc_hit_info();
108 doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info().hit_section_ids_mask());
109 hit_intersect_section_ids_mask_ = short_->hit_intersect_section_ids_mask() &
110 long_->hit_intersect_section_ids_mask();
111 return libtextclassifier3::Status::OK;
112 }
113
114 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()115 DocHitInfoIteratorAnd::TrimRightMostNode() && {
116 ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_long,
117 std::move(*long_).TrimRightMostNode());
118 if (trimmed_long.iterator_ == nullptr) {
119 trimmed_long.iterator_ = std::move(short_);
120 } else {
121 trimmed_long.iterator_ = std::make_unique<DocHitInfoIteratorAnd>(
122 std::move(short_), std::move(trimmed_long.iterator_));
123 }
124 return trimmed_long;
125 }
126
GetNumBlocksInspected() const127 int32_t DocHitInfoIteratorAnd::GetNumBlocksInspected() const {
128 return short_->GetNumBlocksInspected() + long_->GetNumBlocksInspected();
129 }
130
GetNumLeafAdvanceCalls() const131 int32_t DocHitInfoIteratorAnd::GetNumLeafAdvanceCalls() const {
132 return short_->GetNumLeafAdvanceCalls() + long_->GetNumLeafAdvanceCalls();
133 }
134
ToString() const135 std::string DocHitInfoIteratorAnd::ToString() const {
136 return absl_ports::StrCat("(", short_->ToString(), " AND ", long_->ToString(),
137 ")");
138 }
139
DocHitInfoIteratorAndNary(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)140 DocHitInfoIteratorAndNary::DocHitInfoIteratorAndNary(
141 std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
142 : iterators_(std::move(iterators)) {}
143
Advance()144 libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
145 if (iterators_.size() < 2) {
146 return absl_ports::InvalidArgumentError(
147 "Not enough iterators to AND together");
148 }
149
150 // Advance on the first iterator to get a potential hit
151 if (!iterators_.at(0)->Advance().ok()) {
152 // Didn't find anything for the first iterator, reset to invalid values and
153 // return
154 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
155 hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
156 return absl_ports::ResourceExhaustedError(
157 "No more DocHitInfos in iterator");
158 }
159 DocumentId potential_document_id =
160 iterators_.at(0)->doc_hit_info().document_id();
161
162 // Our goal is to find the next document_id that exists on all the iterators
163 // by advancing the iterators one by one. We start with some
164 // "potential_document_id", check if it actually matches the above goal. If
165 // yes, return. If not, find the next best "potential" and repeat till we hit
166 // the end.
167
168 // Has the current potential_document_id been found in all the iterators?
169 bool found_document_id = false;
170 while (!found_document_id) {
171 for (auto& iterator : iterators_) {
172 if (iterator->doc_hit_info().document_id() > potential_document_id) {
173 // Advance the current iterator until it's equal to or smaller than the
174 // potential hit doc id
175 DocumentId unused;
176 ICING_ASSIGN_OR_RETURN(
177 unused, AdvanceTo(iterator.get(), potential_document_id));
178 (void)unused; // Silence unused warning.
179 }
180
181 if (iterator->doc_hit_info().document_id() == potential_document_id) {
182 // The potential hit got matched on the iterators so far
183 found_document_id = true;
184 continue;
185 } else if (iterator->doc_hit_info().document_id() <
186 potential_document_id) {
187 // This iterator doesn't have potential_document_id as we've gone past
188 // it already. Use the current document_id as the new
189 // "potential_document_id" and start checking all iterators again.
190 found_document_id = false;
191 potential_document_id = iterator->doc_hit_info().document_id();
192 break;
193 }
194 }
195 }
196
197 // Found a DocumentId which exists in all the iterators
198 doc_hit_info_ = iterators_.at(0)->doc_hit_info();
199 hit_intersect_section_ids_mask_ =
200 iterators_.at(0)->hit_intersect_section_ids_mask();
201
202 for (size_t i = 1; i < iterators_.size(); i++) {
203 doc_hit_info_.MergeSectionsFrom(
204 iterators_.at(i)->doc_hit_info().hit_section_ids_mask());
205 hit_intersect_section_ids_mask_ &=
206 iterators_.at(i)->hit_intersect_section_ids_mask();
207 }
208 return libtextclassifier3::Status::OK;
209 }
210
211 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()212 DocHitInfoIteratorAndNary::TrimRightMostNode() && {
213 ICING_ASSIGN_OR_RETURN(
214 TrimmedNode trimmed_right,
215 std::move(*iterators_.rbegin()->get()).TrimRightMostNode());
216 if (trimmed_right.iterator_ == nullptr) {
217 if (iterators_.size() > 2) {
218 iterators_.pop_back();
219 trimmed_right.iterator_ =
220 std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
221 } else if (iterators_.size() == 2) {
222 trimmed_right.iterator_ = std::move(iterators_.at(0));
223 }
224 } else {
225 iterators_.at(iterators_.size() - 1) = std::move(trimmed_right.iterator_);
226 trimmed_right.iterator_ =
227 std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
228 }
229 return trimmed_right;
230 }
231
GetNumBlocksInspected() const232 int32_t DocHitInfoIteratorAndNary::GetNumBlocksInspected() const {
233 int32_t blockCount = 0;
234 for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
235 blockCount += iter->GetNumBlocksInspected();
236 }
237 return blockCount;
238 }
239
GetNumLeafAdvanceCalls() const240 int32_t DocHitInfoIteratorAndNary::GetNumLeafAdvanceCalls() const {
241 int32_t leafCount = 0;
242 for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
243 leafCount += iter->GetNumLeafAdvanceCalls();
244 }
245 return leafCount;
246 }
247
ToString() const248 std::string DocHitInfoIteratorAndNary::ToString() const {
249 std::string ret = "(";
250
251 for (int i = 0; i < iterators_.size(); ++i) {
252 if (i == iterators_.size() - 1) {
253 // Last element in vector
254 absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), ")");
255 } else {
256 absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), " AND ");
257 }
258 }
259
260 return ret;
261 }
262
263 } // namespace lib
264 } // namespace icing
265