1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/scoring/ranker.h"
16
17 #include <algorithm>
18 #include <vector>
19
20 #include "icing/text_classifier/lib3/utils/base/statusor.h"
21 #include "icing/absl_ports/canonical_errors.h"
22 #include "icing/scoring/scored-document-hit.h"
23 #include "icing/util/logging.h"
24
25 namespace icing {
26 namespace lib {
27
28 namespace {
29 // For all the heap manipulations in this file, we use a vector to represent the
30 // heap. The element at index 0 is the root node. For any node at index i, its
31 // left child node is at 2 * i + 1, its right child node is at 2 * i + 2.
32
33 // Helper function to wrap the heapify algorithm, it heapifies the target
34 // subtree node in place.
Heapify(std::vector<ScoredDocumentHit> * scored_document_hits,int target_subtree_root_index,const ScoredDocumentHitComparator & scored_document_hit_comparator)35 void Heapify(
36 std::vector<ScoredDocumentHit>* scored_document_hits,
37 int target_subtree_root_index,
38 const ScoredDocumentHitComparator& scored_document_hit_comparator) {
39 const int heap_size = scored_document_hits->size();
40 if (target_subtree_root_index >= heap_size) {
41 return;
42 }
43
44 // Initializes subtree root as the current best node.
45 int best = target_subtree_root_index;
46 // If we represent a heap in an array/vector, indices of left and right
47 // children can be calculated.
48 const int left = target_subtree_root_index * 2 + 1;
49 const int right = target_subtree_root_index * 2 + 2;
50
51 // If left child is better than current best
52 if (left < heap_size &&
53 scored_document_hit_comparator(scored_document_hits->at(left),
54 scored_document_hits->at(best))) {
55 best = left;
56 }
57
58 // If right child is better than current best
59 if (right < heap_size &&
60 scored_document_hit_comparator(scored_document_hits->at(right),
61 scored_document_hits->at(best))) {
62 best = right;
63 }
64
65 // If the best is not the subtree root, swap and continue heapifying the lower
66 // level subtree
67 if (best != target_subtree_root_index) {
68 std::swap(scored_document_hits->at(best),
69 scored_document_hits->at(target_subtree_root_index));
70 Heapify(scored_document_hits, best, scored_document_hit_comparator);
71 }
72 }
73
74 // Helper function to extract the root from the heap. The heap structure will be
75 // maintained.
76 //
77 // Returns:
78 // The current root element on success
79 // RESOURCE_EXHAUSTED_ERROR if heap is empty
PopRoot(std::vector<ScoredDocumentHit> * scored_document_hits_heap,const ScoredDocumentHitComparator & scored_document_hit_comparator)80 libtextclassifier3::StatusOr<ScoredDocumentHit> PopRoot(
81 std::vector<ScoredDocumentHit>* scored_document_hits_heap,
82 const ScoredDocumentHitComparator& scored_document_hit_comparator) {
83 if (scored_document_hits_heap->empty()) {
84 // An invalid ScoredDocumentHit
85 return absl_ports::ResourceExhaustedError("Heap is empty");
86 }
87
88 // Steps to extract root from heap:
89 // 1. copy out root
90 ScoredDocumentHit root = scored_document_hits_heap->at(0);
91 const size_t last_node_index = scored_document_hits_heap->size() - 1;
92 // 2. swap root and the last node
93 std::swap(scored_document_hits_heap->at(0),
94 scored_document_hits_heap->at(last_node_index));
95 // 3. remove last node
96 scored_document_hits_heap->pop_back();
97 // 4. heapify root
98 Heapify(scored_document_hits_heap, /*target_subtree_root_index=*/0,
99 scored_document_hit_comparator);
100 return root;
101 }
102
103 } // namespace
104
BuildHeapInPlace(std::vector<ScoredDocumentHit> * scored_document_hits,const ScoredDocumentHitComparator & scored_document_hit_comparator)105 void BuildHeapInPlace(
106 std::vector<ScoredDocumentHit>* scored_document_hits,
107 const ScoredDocumentHitComparator& scored_document_hit_comparator) {
108 const int heap_size = scored_document_hits->size();
109 // Since we use a vector to represent the heap, [size / 2 - 1] is the index
110 // of the parent node of the last node.
111 for (int subtree_root_index = heap_size / 2 - 1; subtree_root_index >= 0;
112 subtree_root_index--) {
113 Heapify(scored_document_hits, subtree_root_index,
114 scored_document_hit_comparator);
115 }
116 }
117
PopTopResultsFromHeap(std::vector<ScoredDocumentHit> * scored_document_hits_heap,int num_results,const ScoredDocumentHitComparator & scored_document_hit_comparator)118 std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
119 std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results,
120 const ScoredDocumentHitComparator& scored_document_hit_comparator) {
121 std::vector<ScoredDocumentHit> scored_document_hit_result;
122 int result_size = std::min(
123 num_results, static_cast<int>(scored_document_hits_heap->size()));
124 while (result_size-- > 0) {
125 libtextclassifier3::StatusOr<ScoredDocumentHit> next_best_document_hit_or =
126 PopRoot(scored_document_hits_heap, scored_document_hit_comparator);
127 if (next_best_document_hit_or.ok()) {
128 scored_document_hit_result.push_back(
129 std::move(next_best_document_hit_or).ValueOrDie());
130 } else {
131 ICING_VLOG(1) << next_best_document_hit_or.status().error_message();
132 }
133 }
134 return scored_document_hit_result;
135 }
136
137 } // namespace lib
138 } // namespace icing
139