1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_HIT_HIT_H_ 16 #define ICING_INDEX_HIT_HIT_H_ 17 18 #include <cstdint> 19 #include <limits> 20 21 #include "icing/legacy/core/icing-packed-pod.h" 22 #include "icing/schema/section.h" 23 #include "icing/store/document-id.h" 24 25 namespace icing { 26 namespace lib { 27 28 // Hit is a specific encoding that refers to content within a document. A hit 29 // consists of: 30 // - a DocumentId 31 // - a SectionId 32 // referring to the document and section that the hit corresponds to, as well as 33 // metadata about the hit: 34 // - whether the Hit has a TermFrequency other than the default value 35 // - whether the Hit does not appear exactly in the document, but instead 36 // represents a term that is a prefix of a term in the document 37 // - whether the Hit came from a section that has prefix expansion enabled 38 // and a term frequency for the hit. 39 // The hit is the most basic unit of the index and, when grouped together by 40 // term, can be used to encode what terms appear in what documents. 41 class Hit { 42 public: 43 // The datatype used to encode Hit information: the document_id, section_id 44 // and the has_term_frequency, prefix hit and in prefix section flags. 45 using Value = uint32_t; 46 47 // WARNING: Changing this value will invalidate any pre-existing posting lists 48 // on user devices. 49 static constexpr Value kInvalidValue = std::numeric_limits<Value>::max(); 50 // Docs are sorted in reverse, and 0 is never used as the inverted 51 // DocumentId (because it is the inverse of kInvalidValue), so it is always 52 // the max in a descending sort. 53 static constexpr Value kMaxDocumentIdSortValue = 0; 54 55 // The Term Frequency of a Hit. 56 using TermFrequency = uint8_t; 57 // Max TermFrequency is 255. 58 static constexpr TermFrequency kMaxTermFrequency = 59 std::numeric_limits<TermFrequency>::max(); 60 static constexpr TermFrequency kDefaultTermFrequency = 1; 61 static constexpr TermFrequency kNoTermFrequency = 0; 62 63 explicit Hit(Value value = kInvalidValue, 64 TermFrequency term_frequency = kDefaultTermFrequency) value_(value)65 : value_(value), term_frequency_(term_frequency) {} 66 Hit(SectionId section_id, DocumentId document_id, 67 TermFrequency term_frequency, bool is_in_prefix_section = false, 68 bool is_prefix_hit = false); 69 is_valid()70 bool is_valid() const { return value() != kInvalidValue; } value()71 Value value() const { return value_; } 72 DocumentId document_id() const; 73 SectionId section_id() const; 74 // Whether or not the hit contains a valid term frequency. 75 bool has_term_frequency() const; term_frequency()76 TermFrequency term_frequency() const { return term_frequency_; } 77 bool is_prefix_hit() const; 78 bool is_in_prefix_section() const; 79 80 bool operator<(const Hit& h2) const { return value() < h2.value(); } 81 bool operator==(const Hit& h2) const { return value() == h2.value(); } 82 83 struct EqualsDocumentIdAndSectionId { 84 bool operator()(const Hit& hit1, const Hit& hit2) const; 85 }; 86 87 private: 88 // Value and TermFrequency must be in this order. 89 // Value bits layout: 5 unused + 20 document_id + 4 section id + 3 flags. 90 Value value_; 91 TermFrequency term_frequency_; 92 } __attribute__((packed)); 93 static_assert(sizeof(Hit) == 5, ""); 94 // TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions. 95 static_assert(icing_is_packed_pod<Hit>::value, "go/icing-ubsan"); 96 97 } // namespace lib 98 } // namespace icing 99 100 #endif // ICING_INDEX_HIT_HIT_H_ 101