• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/scoring/ranker.h"
16 
17 #include <algorithm>
18 #include <vector>
19 
20 #include "icing/text_classifier/lib3/utils/base/statusor.h"
21 #include "icing/absl_ports/canonical_errors.h"
22 #include "icing/scoring/scored-document-hit.h"
23 #include "icing/util/logging.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 namespace {
29 // For all the heap manipulations in this file, we use a vector to represent the
30 // heap. The element at index 0 is the root node. For any node at index i, its
31 // left child node is at 2 * i + 1, its right child node is at 2 * i + 2.
32 
33 // Helper function to wrap the heapify algorithm, it heapifies the target
34 // subtree node in place.
Heapify(std::vector<ScoredDocumentHit> * scored_document_hits,int target_subtree_root_index,const ScoredDocumentHitComparator & scored_document_hit_comparator)35 void Heapify(
36     std::vector<ScoredDocumentHit>* scored_document_hits,
37     int target_subtree_root_index,
38     const ScoredDocumentHitComparator& scored_document_hit_comparator) {
39   const int heap_size = scored_document_hits->size();
40   if (target_subtree_root_index >= heap_size) {
41     return;
42   }
43 
44   // Initializes subtree root as the current best node.
45   int best = target_subtree_root_index;
46   // If we represent a heap in an array/vector, indices of left and right
47   // children can be calculated.
48   const int left = target_subtree_root_index * 2 + 1;
49   const int right = target_subtree_root_index * 2 + 2;
50 
51   // If left child is better than current best
52   if (left < heap_size &&
53       scored_document_hit_comparator(scored_document_hits->at(left),
54                                      scored_document_hits->at(best))) {
55     best = left;
56   }
57 
58   // If right child is better than current best
59   if (right < heap_size &&
60       scored_document_hit_comparator(scored_document_hits->at(right),
61                                      scored_document_hits->at(best))) {
62     best = right;
63   }
64 
65   // If the best is not the subtree root, swap and continue heapifying the lower
66   // level subtree
67   if (best != target_subtree_root_index) {
68     std::swap(scored_document_hits->at(best),
69               scored_document_hits->at(target_subtree_root_index));
70     Heapify(scored_document_hits, best, scored_document_hit_comparator);
71   }
72 }
73 
74 // Helper function to extract the root from the heap. The heap structure will be
75 // maintained.
76 //
77 // Returns:
78 //   The current root element on success
79 //   RESOURCE_EXHAUSTED_ERROR if heap is empty
PopRoot(std::vector<ScoredDocumentHit> * scored_document_hits_heap,const ScoredDocumentHitComparator & scored_document_hit_comparator)80 libtextclassifier3::StatusOr<ScoredDocumentHit> PopRoot(
81     std::vector<ScoredDocumentHit>* scored_document_hits_heap,
82     const ScoredDocumentHitComparator& scored_document_hit_comparator) {
83   if (scored_document_hits_heap->empty()) {
84     // An invalid ScoredDocumentHit
85     return absl_ports::ResourceExhaustedError("Heap is empty");
86   }
87 
88   // Steps to extract root from heap:
89   // 1. copy out root
90   ScoredDocumentHit root = scored_document_hits_heap->at(0);
91   const size_t last_node_index = scored_document_hits_heap->size() - 1;
92   // 2. swap root and the last node
93   std::swap(scored_document_hits_heap->at(0),
94             scored_document_hits_heap->at(last_node_index));
95   // 3. remove last node
96   scored_document_hits_heap->pop_back();
97   // 4. heapify root
98   Heapify(scored_document_hits_heap, /*target_subtree_root_index=*/0,
99           scored_document_hit_comparator);
100   return root;
101 }
102 
103 }  // namespace
104 
BuildHeapInPlace(std::vector<ScoredDocumentHit> * scored_document_hits,const ScoredDocumentHitComparator & scored_document_hit_comparator)105 void BuildHeapInPlace(
106     std::vector<ScoredDocumentHit>* scored_document_hits,
107     const ScoredDocumentHitComparator& scored_document_hit_comparator) {
108   const int heap_size = scored_document_hits->size();
109   // Since we use a vector to represent the heap, [size / 2 - 1] is the index
110   // of the parent node of the last node.
111   for (int subtree_root_index = heap_size / 2 - 1; subtree_root_index >= 0;
112        subtree_root_index--) {
113     Heapify(scored_document_hits, subtree_root_index,
114             scored_document_hit_comparator);
115   }
116 }
117 
PopTopResultsFromHeap(std::vector<ScoredDocumentHit> * scored_document_hits_heap,int num_results,const ScoredDocumentHitComparator & scored_document_hit_comparator)118 std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
119     std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results,
120     const ScoredDocumentHitComparator& scored_document_hit_comparator) {
121   std::vector<ScoredDocumentHit> scored_document_hit_result;
122   int result_size = std::min(
123       num_results, static_cast<int>(scored_document_hits_heap->size()));
124   while (result_size-- > 0) {
125     libtextclassifier3::StatusOr<ScoredDocumentHit> next_best_document_hit_or =
126         PopRoot(scored_document_hits_heap, scored_document_hit_comparator);
127     if (next_best_document_hit_or.ok()) {
128       scored_document_hit_result.push_back(
129           std::move(next_best_document_hit_or).ValueOrDie());
130     } else {
131       ICING_VLOG(1) << next_best_document_hit_or.status().error_message();
132     }
133   }
134   return scored_document_hit_result;
135 }
136 
137 }  // namespace lib
138 }  // namespace icing
139