• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/absl_ports/canonical_errors.h"
26 #include "icing/absl_ports/str_cat.h"
27 #include "icing/index/hit/doc-hit-info.h"
28 #include "icing/index/iterator/doc-hit-info-iterator.h"
29 #include "icing/schema/section.h"
30 #include "icing/store/document-id.h"
31 #include "icing/util/status-macros.h"
32 
33 namespace icing {
34 namespace lib {
35 
36 namespace {
37 
38 // When combining ANDed iterators, n-ary operator has better performance when
39 // number of operands > 3 according to benchmark cl/243720660
40 inline constexpr int kBinaryAndIteratorPerformanceThreshold = 3;
41 
42 // The minimum number of iterators needed to construct a And iterator. The And
43 // constructor currently takes 2 iterators.
44 inline constexpr int kMinBinaryIterators = 2;
45 
46 }  // namespace
47 
CreateAndIterator(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)48 std::unique_ptr<DocHitInfoIterator> CreateAndIterator(
49     std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
50   if (iterators.size() == 1) {
51     return std::move(iterators.at(0));
52   }
53 
54   std::unique_ptr<DocHitInfoIterator> iterator;
55   if (iterators.size() <= kBinaryAndIteratorPerformanceThreshold &&
56       iterators.size() >= kMinBinaryIterators) {
57     // Accumulate the iterators that need to be ANDed together.
58     iterator = std::move(iterators.at(iterators.size() - 1));
59     for (int i = iterators.size() - 2; i >= 0; --i) {
60       std::unique_ptr<DocHitInfoIterator> temp_iterator = std::move(iterator);
61       iterator = std::make_unique<DocHitInfoIteratorAnd>(
62           /*short_it=*/std::move(iterators[i]),
63           /*long_it=*/std::move(temp_iterator));
64     }
65   } else {
66     // If the vector is too small, the AndNary iterator can handle it and return
67     // an error on the Advance call
68     iterator =
69         std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
70   }
71 
72   return iterator;
73 }
74 
DocHitInfoIteratorAnd(std::unique_ptr<DocHitInfoIterator> short_it,std::unique_ptr<DocHitInfoIterator> long_it)75 DocHitInfoIteratorAnd::DocHitInfoIteratorAnd(
76     std::unique_ptr<DocHitInfoIterator> short_it,
77     std::unique_ptr<DocHitInfoIterator> long_it)
78     : short_(std::move(short_it)), long_(std::move(long_it)) {}
79 
Advance()80 libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
81   // Advance on short first
82   if (!short_->Advance().ok()) {
83     // Didn't find anything for the first iterator, reset to invalid values and
84     // return.
85     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
86     hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
87     return absl_ports::ResourceExhaustedError(
88         "No more DocHitInfos in iterator");
89   }
90   DocumentId short_doc_id = short_->doc_hit_info().document_id();
91 
92   // Then AdvanceTo on long
93   ICING_ASSIGN_OR_RETURN(DocumentId long_doc_id,
94                          AdvanceTo(long_.get(), short_doc_id));
95 
96   // Now try to align DocHitInfos by moving one or the other.
97   while (short_doc_id != long_doc_id) {
98     if (short_doc_id > long_doc_id) {
99       ICING_ASSIGN_OR_RETURN(short_doc_id,
100                              AdvanceTo(short_.get(), long_doc_id));
101     } else {
102       ICING_ASSIGN_OR_RETURN(long_doc_id, AdvanceTo(long_.get(), short_doc_id));
103     }
104   }
105 
106   // Guaranteed that short_doc_id and long_doc_id match now
107   doc_hit_info_ = short_->doc_hit_info();
108   doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info().hit_section_ids_mask());
109   hit_intersect_section_ids_mask_ = short_->hit_intersect_section_ids_mask() &
110                                     long_->hit_intersect_section_ids_mask();
111   return libtextclassifier3::Status::OK;
112 }
113 
114 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()115 DocHitInfoIteratorAnd::TrimRightMostNode() && {
116   ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_long,
117                          std::move(*long_).TrimRightMostNode());
118   if (trimmed_long.iterator_ == nullptr) {
119     trimmed_long.iterator_ = std::move(short_);
120   } else {
121     trimmed_long.iterator_ = std::make_unique<DocHitInfoIteratorAnd>(
122         std::move(short_), std::move(trimmed_long.iterator_));
123   }
124   return trimmed_long;
125 }
126 
GetNumBlocksInspected() const127 int32_t DocHitInfoIteratorAnd::GetNumBlocksInspected() const {
128   return short_->GetNumBlocksInspected() + long_->GetNumBlocksInspected();
129 }
130 
GetNumLeafAdvanceCalls() const131 int32_t DocHitInfoIteratorAnd::GetNumLeafAdvanceCalls() const {
132   return short_->GetNumLeafAdvanceCalls() + long_->GetNumLeafAdvanceCalls();
133 }
134 
ToString() const135 std::string DocHitInfoIteratorAnd::ToString() const {
136   return absl_ports::StrCat("(", short_->ToString(), " AND ", long_->ToString(),
137                             ")");
138 }
139 
DocHitInfoIteratorAndNary(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)140 DocHitInfoIteratorAndNary::DocHitInfoIteratorAndNary(
141     std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
142     : iterators_(std::move(iterators)) {}
143 
Advance()144 libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
145   if (iterators_.size() < 2) {
146     return absl_ports::InvalidArgumentError(
147         "Not enough iterators to AND together");
148   }
149 
150   // Advance on the first iterator to get a potential hit
151   if (!iterators_.at(0)->Advance().ok()) {
152     // Didn't find anything for the first iterator, reset to invalid values and
153     // return
154     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
155     hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
156     return absl_ports::ResourceExhaustedError(
157         "No more DocHitInfos in iterator");
158   }
159   DocumentId potential_document_id =
160       iterators_.at(0)->doc_hit_info().document_id();
161 
162   // Our goal is to find the next document_id that exists on all the iterators
163   // by advancing the iterators one by one. We start with some
164   // "potential_document_id", check if it actually matches the above goal. If
165   // yes, return. If not, find the next best "potential" and repeat till we hit
166   // the end.
167 
168   // Has the current potential_document_id been found in all the iterators?
169   bool found_document_id = false;
170   while (!found_document_id) {
171     for (auto& iterator : iterators_) {
172       if (iterator->doc_hit_info().document_id() > potential_document_id) {
173         // Advance the current iterator until it's equal to or smaller than the
174         // potential hit doc id
175         DocumentId unused;
176         ICING_ASSIGN_OR_RETURN(
177             unused, AdvanceTo(iterator.get(), potential_document_id));
178         (void)unused;  // Silence unused warning.
179       }
180 
181       if (iterator->doc_hit_info().document_id() == potential_document_id) {
182         // The potential hit got matched on the iterators so far
183         found_document_id = true;
184         continue;
185       } else if (iterator->doc_hit_info().document_id() <
186                  potential_document_id) {
187         // This iterator doesn't have potential_document_id as we've gone past
188         // it already. Use the current document_id as the new
189         // "potential_document_id" and start checking all iterators again.
190         found_document_id = false;
191         potential_document_id = iterator->doc_hit_info().document_id();
192         break;
193       }
194     }
195   }
196 
197   // Found a DocumentId which exists in all the iterators
198   doc_hit_info_ = iterators_.at(0)->doc_hit_info();
199   hit_intersect_section_ids_mask_ =
200       iterators_.at(0)->hit_intersect_section_ids_mask();
201 
202   for (size_t i = 1; i < iterators_.size(); i++) {
203     doc_hit_info_.MergeSectionsFrom(
204         iterators_.at(i)->doc_hit_info().hit_section_ids_mask());
205     hit_intersect_section_ids_mask_ &=
206         iterators_.at(i)->hit_intersect_section_ids_mask();
207   }
208   return libtextclassifier3::Status::OK;
209 }
210 
211 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()212 DocHitInfoIteratorAndNary::TrimRightMostNode() && {
213   ICING_ASSIGN_OR_RETURN(
214       TrimmedNode trimmed_right,
215       std::move(*iterators_.rbegin()->get()).TrimRightMostNode());
216   if (trimmed_right.iterator_ == nullptr) {
217     if (iterators_.size() > 2) {
218       iterators_.pop_back();
219       trimmed_right.iterator_ =
220           std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
221     } else if (iterators_.size() == 2) {
222       trimmed_right.iterator_ = std::move(iterators_.at(0));
223     }
224   } else {
225     iterators_.at(iterators_.size() - 1) = std::move(trimmed_right.iterator_);
226     trimmed_right.iterator_ =
227         std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
228   }
229   return trimmed_right;
230 }
231 
GetNumBlocksInspected() const232 int32_t DocHitInfoIteratorAndNary::GetNumBlocksInspected() const {
233   int32_t blockCount = 0;
234   for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
235     blockCount += iter->GetNumBlocksInspected();
236   }
237   return blockCount;
238 }
239 
GetNumLeafAdvanceCalls() const240 int32_t DocHitInfoIteratorAndNary::GetNumLeafAdvanceCalls() const {
241   int32_t leafCount = 0;
242   for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
243     leafCount += iter->GetNumLeafAdvanceCalls();
244   }
245   return leafCount;
246 }
247 
ToString() const248 std::string DocHitInfoIteratorAndNary::ToString() const {
249   std::string ret = "(";
250 
251   for (int i = 0; i < iterators_.size(); ++i) {
252     if (i == iterators_.size() - 1) {
253       // Last element in vector
254       absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), ")");
255     } else {
256       absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), " AND ");
257     }
258   }
259 
260   return ret;
261 }
262 
263 }  // namespace lib
264 }  // namespace icing
265