• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/schema/section-manager.h"
16 
17 #include <algorithm>
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 #include <utility>
22 #include <vector>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/legacy/core/icing-string-util.h"
28 #include "icing/proto/document.pb.h"
29 #include "icing/proto/schema.pb.h"
30 #include "icing/proto/term.pb.h"
31 #include "icing/schema/property-util.h"
32 #include "icing/schema/section.h"
33 #include "icing/store/document-filter-data.h"
34 #include "icing/store/key-mapper.h"
35 #include "icing/util/status-macros.h"
36 
37 namespace icing {
38 namespace lib {
39 
40 namespace {
41 
42 // Helper function to append a new section metadata
AppendNewSectionMetadata(std::vector<SectionMetadata> * metadata_list,std::string && concatenated_path,const PropertyConfigProto & property_config)43 libtextclassifier3::Status AppendNewSectionMetadata(
44     std::vector<SectionMetadata>* metadata_list,
45     std::string&& concatenated_path,
46     const PropertyConfigProto& property_config) {
47   // Validates next section id, makes sure that section id is the same as the
48   // list index so that we could find any section metadata by id in O(1) later.
49   SectionId new_section_id = static_cast<SectionId>(metadata_list->size());
50   if (!IsSectionIdValid(new_section_id)) {
51     // Max number of sections reached
52     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
53         "Too many properties to be indexed, max number of properties "
54         "allowed: %d",
55         kMaxSectionId - kMinSectionId + 1));
56   }
57 
58   // Creates section metadata
59   metadata_list->push_back(SectionMetadata(
60       new_section_id, property_config.data_type(),
61       property_config.string_indexing_config().tokenizer_type(),
62       property_config.string_indexing_config().term_match_type(),
63       property_config.integer_indexing_config().numeric_match_type(),
64       property_config.embedding_indexing_config().embedding_indexing_type(),
65       std::move(concatenated_path)));
66   return libtextclassifier3::Status::OK;
67 }
68 
69 template <typename T>
AppendSection(SectionMetadata section_metadata,libtextclassifier3::StatusOr<std::vector<T>> && section_content_or,std::vector<Section<T>> & sections_out)70 void AppendSection(
71     SectionMetadata section_metadata,
72     libtextclassifier3::StatusOr<std::vector<T>>&& section_content_or,
73     std::vector<Section<T>>& sections_out) {
74   if (!section_content_or.ok()) {
75     return;
76   }
77 
78   std::vector<T> section_content = std::move(section_content_or).ValueOrDie();
79   if (!section_content.empty()) {
80     // Adds to result vector if section is found in document
81     sections_out.emplace_back(std::move(section_metadata),
82                               std::move(section_content));
83   }
84 }
85 
86 }  // namespace
87 
88 libtextclassifier3::Status
ProcessSchemaTypePropertyConfig(SchemaTypeId schema_type_id,const PropertyConfigProto & property_config,std::string && property_path)89 SectionManager::Builder::ProcessSchemaTypePropertyConfig(
90     SchemaTypeId schema_type_id, const PropertyConfigProto& property_config,
91     std::string&& property_path) {
92   if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
93     return absl_ports::InvalidArgumentError("Invalid schema type id");
94   }
95 
96   // We don't need to check if the property is indexable. This method will
97   // only be called properties that should consume sectionIds, even if the
98   // property's indexing configuration itself is not indexable.
99   // This would be the case for unknown and non-indexable property paths that
100   // are defined in the indexable_nested_properties_list.
101   ICING_RETURN_IF_ERROR(
102       AppendNewSectionMetadata(&section_metadata_cache_[schema_type_id],
103                                std::move(property_path), property_config));
104   return libtextclassifier3::Status::OK;
105 }
106 
107 libtextclassifier3::StatusOr<const SectionMetadata*>
GetSectionMetadata(SchemaTypeId schema_type_id,SectionId section_id) const108 SectionManager::GetSectionMetadata(SchemaTypeId schema_type_id,
109                                    SectionId section_id) const {
110   if (schema_type_id < 0 || schema_type_id >= section_metadata_cache_.size()) {
111     return absl_ports::InvalidArgumentError("Invalid schema type id");
112   }
113   if (!IsSectionIdValid(section_id)) {
114     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
115         "Section id %d is greater than the max value %d", section_id,
116         kMaxSectionId));
117   }
118 
119   const std::vector<SectionMetadata>& section_metadatas =
120       section_metadata_cache_[schema_type_id];
121   if (section_id >= section_metadatas.size()) {
122     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
123         "Section with id %d doesn't exist in type config with id %d",
124         section_id, schema_type_id));
125   }
126 
127   // The index of metadata list is the same as the section id, so we can use
128   // section id as the index.
129   return &section_metadatas[section_id];
130 }
131 
ExtractSections(const DocumentProto & document) const132 libtextclassifier3::StatusOr<SectionGroup> SectionManager::ExtractSections(
133     const DocumentProto& document) const {
134   ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
135                          GetMetadataList(document.schema()));
136   SectionGroup section_group;
137   for (const SectionMetadata& section_metadata : *metadata_list) {
138     switch (section_metadata.data_type) {
139       case PropertyConfigProto::DataType::STRING: {
140         if (section_metadata.term_match_type == TermMatchType::UNKNOWN ||
141             section_metadata.tokenizer ==
142                 StringIndexingConfig::TokenizerType::NONE) {
143           // Skip if term-match type is UNKNOWN, or if the tokenizer-type is
144           // NONE.
145           break;
146         }
147         AppendSection(
148             section_metadata,
149             property_util::ExtractPropertyValuesFromDocument<std::string_view>(
150                 document, section_metadata.path),
151             section_group.string_sections);
152         break;
153       }
154       case PropertyConfigProto::DataType::INT64: {
155         if (section_metadata.numeric_match_type ==
156             IntegerIndexingConfig::NumericMatchType::UNKNOWN) {
157           // Skip if numeric-match type is UNKNOWN.
158           break;
159         }
160         AppendSection(section_metadata,
161                       property_util::ExtractPropertyValuesFromDocument<int64_t>(
162                           document, section_metadata.path),
163                       section_group.integer_sections);
164         break;
165       }
166       case PropertyConfigProto::DataType::VECTOR: {
167         if (section_metadata.embedding_indexing_type ==
168             EmbeddingIndexingConfig::EmbeddingIndexingType::UNKNOWN) {
169           // Skip if embedding indexing type is UNKNOWN.
170           break;
171         }
172         AppendSection(
173             section_metadata,
174             property_util::ExtractPropertyValuesFromDocument<
175                 PropertyProto::VectorProto>(document, section_metadata.path),
176             section_group.vector_sections);
177         break;
178       }
179       default: {
180         // Skip other data types.
181         break;
182       }
183     }
184   }
185   return section_group;
186 }
187 
188 libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
GetMetadataList(const std::string & type_config_name) const189 SectionManager::GetMetadataList(const std::string& type_config_name) const {
190   ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
191                          schema_type_mapper_.Get(type_config_name));
192   return &section_metadata_cache_.at(schema_type_id);
193 }
194 
195 }  // namespace lib
196 }  // namespace icing
197