1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/hit/hit.h"
16 
17 #include "icing/store/document-id.h"
18 #include "icing/util/bit-util.h"
19 
20 namespace icing {
21 namespace lib {
22 
23 namespace {
24 
25 enum FlagOffset {
26   // This hit, whether exact or not, came from a prefixed section and will
27   // need to be backfilled into branching posting lists if/when those are
28   // created.
29   kInPrefixSection = 0,
30   // This hit represents a prefix of a longer term. If exact matches are
31   // required, then this hit should be ignored.
32   kPrefixHit = 1,
33   // Whether or not the hit has a term_frequency  other than
34   // kDefaultTermFrequency.
35   kHasTermFrequency = 2,
36   kNumFlags = 3,
37 };
38 
39 static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <
40                   sizeof(Hit::Value) * 8,
41               "Hit::kInvalidValue contains risky value and we should have at "
42               "least one unused bit to avoid potential bugs. Please follow the "
43               "process mentioned in hit.h to correct the value of "
44               "Hit::kInvalidValue and remove this static_assert afterwards.");
45 
46 static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <=
47                   sizeof(Hit::Value) * 8,
48               "HitOverflow");
49 static_assert(kDocumentIdBits == 22, "");
50 static_assert(kSectionIdBits == 6, "");
51 static_assert(kNumFlags == 3, "");
52 
InvertDocumentId(DocumentId document_id)53 inline DocumentId InvertDocumentId(DocumentId document_id) {
54   static_assert(kMaxDocumentId <= (std::numeric_limits<DocumentId>::max() - 1),
55                 "(kMaxDocumentId + 1) must not overflow.");
56   static_assert(
57       (kMaxDocumentId + 1) < (1U << kDocumentIdBits),
58       "(kMaxDocumentId + 1) must also fit in kDocumentIdBits wide bitfield");
59   // Invert the document_id value. +1 is added so the resulting range is [1,
60   // kMaxDocumentId + 1].
61   return (kMaxDocumentId + 1) - document_id;
62 }
63 
64 }  // namespace
65 
BasicHit(SectionId section_id,DocumentId document_id)66 BasicHit::BasicHit(SectionId section_id, DocumentId document_id) {
67   // Values are stored so that when sorted, they appear in document_id
68   // descending, section_id ascending, order. So inverted document_id appears in
69   // the most significant bits, followed by (uninverted) section_id.
70   Value temp_value = 0;
71   bit_util::BitfieldSet(/*new_value=*/InvertDocumentId(document_id),
72                         /*lsb_offset=*/kSectionIdBits, /*len=*/kDocumentIdBits,
73                         /*value_out=*/&temp_value);
74   bit_util::BitfieldSet(/*new_value=*/section_id, /*lsb_offset=*/0,
75                         /*len=*/kSectionIdBits, /*value_out=*/&temp_value);
76   value_ = temp_value;
77 }
78 
document_id() const79 DocumentId BasicHit::document_id() const {
80   DocumentId inverted_document_id = bit_util::BitfieldGet(
81       value_, /*lsb_offset=*/kSectionIdBits, /*len=*/kDocumentIdBits);
82   // Undo the document_id inversion.
83   return InvertDocumentId(inverted_document_id);
84 }
85 
section_id() const86 SectionId BasicHit::section_id() const {
87   return bit_util::BitfieldGet(value_, /*lsb_offset=*/0,
88                                /*len=*/kSectionIdBits);
89 }
90 
Hit(SectionId section_id,DocumentId document_id,Hit::TermFrequency term_frequency,bool is_in_prefix_section,bool is_prefix_hit)91 Hit::Hit(SectionId section_id, DocumentId document_id,
92          Hit::TermFrequency term_frequency, bool is_in_prefix_section,
93          bool is_prefix_hit)
94     : term_frequency_(term_frequency) {
95   // Values are stored so that when sorted, they appear in document_id
96   // descending, section_id ascending, order. Also, all else being
97   // equal, non-prefix hits sort before prefix hits. So inverted
98   // document_id appears in the most significant bits, followed by
99   // (uninverted) section_id.
100   Value temp_value = 0;
101   bit_util::BitfieldSet(InvertDocumentId(document_id),
102                         kSectionIdBits + kNumFlags, kDocumentIdBits,
103                         &temp_value);
104   bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value);
105   bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
106                         kHasTermFrequency, /*len=*/1, &temp_value);
107   bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, /*len=*/1, &temp_value);
108   bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection,
109                         /*len=*/1, &temp_value);
110   value_ = temp_value;
111 }
112 
document_id() const113 DocumentId Hit::document_id() const {
114   DocumentId inverted_document_id = bit_util::BitfieldGet(
115       value(), kSectionIdBits + kNumFlags, kDocumentIdBits);
116   // Undo the document_id inversion.
117   return InvertDocumentId(inverted_document_id);
118 }
119 
section_id() const120 SectionId Hit::section_id() const {
121   return bit_util::BitfieldGet(value(), kNumFlags, kSectionIdBits);
122 }
123 
has_term_frequency() const124 bool Hit::has_term_frequency() const {
125   return bit_util::BitfieldGet(value(), kHasTermFrequency, 1);
126 }
127 
is_prefix_hit() const128 bool Hit::is_prefix_hit() const {
129   return bit_util::BitfieldGet(value(), kPrefixHit, 1);
130 }
131 
is_in_prefix_section() const132 bool Hit::is_in_prefix_section() const {
133   return bit_util::BitfieldGet(value(), kInPrefixSection, 1);
134 }
135 
TranslateHit(Hit old_hit,DocumentId new_document_id)136 Hit Hit::TranslateHit(Hit old_hit, DocumentId new_document_id) {
137   return Hit(old_hit.section_id(), new_document_id, old_hit.term_frequency(),
138              old_hit.is_in_prefix_section(), old_hit.is_prefix_hit());
139 }
140 
operator ()(const Hit & hit1,const Hit & hit2) const141 bool Hit::EqualsDocumentIdAndSectionId::operator()(const Hit& hit1,
142                                                    const Hit& hit2) const {
143   return (hit1.value() >> kNumFlags) == (hit2.value() >> kNumFlags);
144 }
145 
146 }  // namespace lib
147 }  // namespace icing
148