• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_SECTION_H_
16 #define ICING_SCHEMA_SECTION_H_
17 
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 #include <utility>
22 #include <vector>
23 
24 #include "icing/proto/schema.pb.h"
25 #include "icing/proto/term.pb.h"
26 
27 namespace icing {
28 namespace lib {
29 
30 using SectionId = int8_t;
31 // 6 bits for 64 values.
32 inline constexpr int kSectionIdBits = 6;
33 inline constexpr SectionId kTotalNumSections = (1 << kSectionIdBits);
34 inline constexpr SectionId kInvalidSectionId = kTotalNumSections;
35 inline constexpr SectionId kMaxSectionId = kTotalNumSections - 1;
36 // Prior versions of Icing only supported 16 indexed properties.
37 inline constexpr SectionId kOldTotalNumSections = 16;
38 inline constexpr SectionId kMinSectionId = 0;
IsSectionIdValid(SectionId section_id)39 constexpr bool IsSectionIdValid(SectionId section_id) {
40   return section_id >= kMinSectionId && section_id <= kMaxSectionId;
41 }
42 
43 using SectionIdMask = int64_t;
44 inline constexpr SectionIdMask kSectionIdMaskAll = ~SectionIdMask{0};
45 inline constexpr SectionIdMask kSectionIdMaskNone = SectionIdMask{0};
46 
47 static_assert(kSectionIdBits < 8 * sizeof(SectionId),
48               "Cannot exhaust all bits of SectionId since it is a signed "
49               "integer and the most significant bit should be preserved.");
50 
51 static_assert(
52     kMaxSectionId < 8 * sizeof(SectionIdMask),
53     "SectionIdMask is not large enough to represent all section values!");
54 
55 struct SectionMetadata {
56   // Dot-joined property names, representing the location of section inside an
57   // document. E.g. "property1.property2"
58   std::string path;
59 
60   // A unique id of property within a type config
61   SectionId id;
62 
63   // Indexable data type of this section. E.g. STRING, INT64.
64   PropertyConfigProto::DataType::Code data_type;
65 
66   // How strings should be tokenized. It is invalid for a string section
67   // (data_type == 'STRING') to have tokenizer == 'NONE'.
68   StringIndexingConfig::TokenizerType::Code tokenizer;
69 
70   // How tokens in a string section should be matched.
71   //
72   // TermMatchType::UNKNOWN:
73   //   Terms will not match anything
74   //
75   // TermMatchType::PREFIX:
76   //   Terms will be stored as a prefix match, "fool" matches "foo" and "fool"
77   //
78   // TermMatchType::EXACT_ONLY:
79   //   Terms will be only stored as an exact match, "fool" only matches "fool"
80   TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
81 
82   // How tokens in a numeric section should be matched.
83   //
84   // NumericMatchType::UNKNOWN:
85   //   Contents will not match anything. It is invalid for a numeric section
86   //   (data_type == 'INT64') to have numeric_match_type == 'UNKNOWN'.
87   //
88   // NumericMatchType::RANGE:
89   //   Contents will be matched by a range query.
90   IntegerIndexingConfig::NumericMatchType::Code numeric_match_type;
91 
SectionMetadataSectionMetadata92   explicit SectionMetadata(
93       SectionId id_in, PropertyConfigProto::DataType::Code data_type_in,
94       StringIndexingConfig::TokenizerType::Code tokenizer,
95       TermMatchType::Code term_match_type_in,
96       IntegerIndexingConfig::NumericMatchType::Code numeric_match_type_in,
97       std::string&& path_in)
98       : path(std::move(path_in)),
99         id(id_in),
100         data_type(data_type_in),
101         tokenizer(tokenizer),
102         term_match_type(term_match_type_in),
103         numeric_match_type(numeric_match_type_in) {}
104 
105   SectionMetadata(const SectionMetadata& other) = default;
106   SectionMetadata& operator=(const SectionMetadata& other) = default;
107 
108   SectionMetadata(SectionMetadata&& other) = default;
109   SectionMetadata& operator=(SectionMetadata&& other) = default;
110 
111   bool operator==(const SectionMetadata& rhs) const {
112     return path == rhs.path && id == rhs.id && data_type == rhs.data_type &&
113            tokenizer == rhs.tokenizer &&
114            term_match_type == rhs.term_match_type &&
115            numeric_match_type == rhs.numeric_match_type;
116   }
117 };
118 
119 // Section is an icing internal concept similar to document property but with
120 // extra metadata. The content can be a value or the combination of repeated
121 // values of a property, and the type of content is specified by template.
122 //
123 // Current supported types:
124 // - std::string_view (PropertyConfigProto::DataType::STRING)
125 // - int64_t (PropertyConfigProto::DataType::INT64)
126 template <typename T>
127 struct Section {
128   SectionMetadata metadata;
129   std::vector<T> content;
130 
SectionSection131   explicit Section(SectionMetadata&& metadata_in, std::vector<T>&& content_in)
132       : metadata(std::move(metadata_in)), content(std::move(content_in)) {}
133 
data_typeSection134   PropertyConfigProto::DataType::Code data_type() const {
135     return metadata.data_type;
136   }
137 };
138 
139 // Groups of different type sections. Callers can access sections with types
140 // they want and avoid going through non-desired ones.
141 //
142 // REQUIRES: lifecycle of the property must be longer than this object, since we
143 //   use std::string_view for extracting its string_values.
144 struct SectionGroup {
145   std::vector<Section<std::string_view>> string_sections;
146   std::vector<Section<int64_t>> integer_sections;
147 };
148 
149 }  // namespace lib
150 }  // namespace icing
151 
152 #endif  // ICING_SCHEMA_SECTION_H_
153