• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_HIT_HIT_H_
16 #define ICING_INDEX_HIT_HIT_H_
17 
18 #include <cstdint>
19 #include <limits>
20 
21 #include "icing/legacy/core/icing-packed-pod.h"
22 #include "icing/schema/section.h"
23 #include "icing/store/document-id.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 // Hit is a specific encoding that refers to content within a document. A hit
29 // consists of:
30 // - a DocumentId
31 // - a SectionId
32 // referring to the document and section that the hit corresponds to, as well as
33 // metadata about the hit:
34 // - whether the Hit has a TermFrequency other than the default value
35 // - whether the Hit does not appear exactly in the document, but instead
36 //   represents a term that is a prefix of a term in the document
37 // - whether the Hit came from a section that has prefix expansion enabled
38 // and a term frequency for the hit.
39 // The hit is the most basic unit of the index and, when grouped together by
40 // term, can be used to encode what terms appear in what documents.
41 class Hit {
42  public:
43   // The datatype used to encode Hit information: the document_id, section_id
44   // and the has_term_frequency, prefix hit and in prefix section flags.
45   using Value = uint32_t;
46 
47   // WARNING: Changing this value will invalidate any pre-existing posting lists
48   // on user devices.
49   static constexpr Value kInvalidValue = std::numeric_limits<Value>::max();
50   // Docs are sorted in reverse, and 0 is never used as the inverted
51   // DocumentId (because it is the inverse of kInvalidValue), so it is always
52   // the max in a descending sort.
53   static constexpr Value kMaxDocumentIdSortValue = 0;
54 
55   // The Term Frequency of a Hit.
56   using TermFrequency = uint8_t;
57   // Max TermFrequency is 255.
58   static constexpr TermFrequency kMaxTermFrequency =
59       std::numeric_limits<TermFrequency>::max();
60   static constexpr TermFrequency kDefaultTermFrequency = 1;
61   static constexpr TermFrequency kNoTermFrequency = 0;
62 
63   explicit Hit(Value value = kInvalidValue,
64                TermFrequency term_frequency = kDefaultTermFrequency)
value_(value)65       : value_(value), term_frequency_(term_frequency) {}
66   Hit(SectionId section_id, DocumentId document_id,
67       TermFrequency term_frequency, bool is_in_prefix_section = false,
68       bool is_prefix_hit = false);
69 
is_valid()70   bool is_valid() const { return value() != kInvalidValue; }
value()71   Value value() const { return value_; }
72   DocumentId document_id() const;
73   SectionId section_id() const;
74   // Whether or not the hit contains a valid term frequency.
75   bool has_term_frequency() const;
term_frequency()76   TermFrequency term_frequency() const { return term_frequency_; }
77   bool is_prefix_hit() const;
78   bool is_in_prefix_section() const;
79 
80   bool operator<(const Hit& h2) const { return value() < h2.value(); }
81   bool operator==(const Hit& h2) const { return value() == h2.value(); }
82 
83   struct EqualsDocumentIdAndSectionId {
84     bool operator()(const Hit& hit1, const Hit& hit2) const;
85   };
86 
87  private:
88   // Value and TermFrequency must be in this order.
89   // Value bits layout: 5 unused + 20 document_id + 4 section id + 3 flags.
90   Value value_;
91   TermFrequency term_frequency_;
92 } __attribute__((packed));
93 static_assert(sizeof(Hit) == 5, "");
94 // TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
95 static_assert(icing_is_packed_pod<Hit>::value, "go/icing-ubsan");
96 
97 }  // namespace lib
98 }  // namespace icing
99 
100 #endif  // ICING_INDEX_HIT_HIT_H_
101