1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/util/document-validator.h"
16
17 #include <cstdint>
18 #include <unordered_set>
19
20 #include "icing/text_classifier/lib3/utils/base/status.h"
21 #include "icing/absl_ports/canonical_errors.h"
22 #include "icing/schema/schema-util.h"
23 #include "icing/util/status-macros.h"
24
25 namespace icing {
26 namespace lib {
27
28 using PropertyConfigMap =
29 std::unordered_map<std::string_view, const PropertyConfigProto*>;
30
DocumentValidator(const SchemaStore * schema_store)31 DocumentValidator::DocumentValidator(const SchemaStore* schema_store)
32 : schema_store_(schema_store) {}
33
Validate(const DocumentProto & document,int depth)34 libtextclassifier3::Status DocumentValidator::Validate(
35 const DocumentProto& document, int depth) {
36 if (document.namespace_().empty()) {
37 return absl_ports::InvalidArgumentError("Field 'namespace' is empty.");
38 }
39
40 // Only require a non-empty uri on top-level documents.
41 if (depth == 0 && document.uri().empty()) {
42 return absl_ports::InvalidArgumentError("Field 'uri' is empty.");
43 }
44
45 if (document.schema().empty()) {
46 return absl_ports::InvalidArgumentError(
47 absl_ports::StrCat("Field 'schema' is empty for key: (",
48 document.namespace_(), ", ", document.uri(), ")."));
49 }
50
51 if (document.score() < 0) {
52 return absl_ports::InvalidArgumentError("Field 'score' is negative.");
53 }
54
55 if (document.creation_timestamp_ms() < 0) {
56 return absl_ports::InvalidArgumentError(
57 "Field 'creation_timestamp_ms' is negative.");
58 }
59
60 if (document.ttl_ms() < 0) {
61 return absl_ports::InvalidArgumentError("Field 'ttl_ms' is negative.");
62 }
63
64 // TODO(b/144458732): Implement a more robust version of
65 // ICING_ASSIGN_OR_RETURN that can support error logging.
66 auto type_config_or = schema_store_->GetSchemaTypeConfig(document.schema());
67 if (!type_config_or.ok()) {
68 ICING_LOG(ERROR) << type_config_or.status().error_message()
69 << "Error while validating document ("
70 << document.namespace_() << ", " << document.uri() << ")";
71 return type_config_or.status();
72 }
73 const SchemaTypeConfigProto* type_config =
74 std::move(type_config_or).ValueOrDie();
75
76 int32_t num_required_properties_actual = 0;
77 SchemaUtil::ParsedPropertyConfigs parsed_property_configs =
78 SchemaUtil::ParsePropertyConfigs(*type_config);
79 std::unordered_set<std::string_view> unique_properties;
80
81 for (const PropertyProto& property : document.properties()) {
82 if (property.name().empty()) {
83 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
84 "Field 'name' is empty in PropertyProto for key: (",
85 document.namespace_(), ", ", document.uri(), ")."));
86 }
87
88 if (!unique_properties.insert(property.name()).second) {
89 // Failed to insert because of duplicate property name
90 return absl_ports::AlreadyExistsError(absl_ports::StrCat(
91 "Property name '", property.name(), "' already exists for key: (",
92 document.namespace_(), ", ", document.uri(), ")."));
93 }
94
95 const auto& property_iter =
96 parsed_property_configs.property_config_map.find(property.name());
97 if (property_iter == parsed_property_configs.property_config_map.end()) {
98 return absl_ports::NotFoundError(absl_ports::StrCat(
99 "Property config '", property.name(), "' not found for key: (",
100 document.namespace_(), ", ", document.uri(),
101 ") of type: ", document.schema(), "."));
102 }
103 const PropertyConfigProto& property_config = *property_iter->second;
104
105 // Get the property value size according to data type.
106 int value_size = 0;
107 if (property_config.data_type() == PropertyConfigProto::DataType::STRING) {
108 value_size = property.string_values_size();
109 } else if (property_config.data_type() ==
110 PropertyConfigProto::DataType::INT64) {
111 value_size = property.int64_values_size();
112 } else if (property_config.data_type() ==
113 PropertyConfigProto::DataType::DOUBLE) {
114 value_size = property.double_values_size();
115 } else if (property_config.data_type() ==
116 PropertyConfigProto::DataType::BOOLEAN) {
117 value_size = property.boolean_values_size();
118 } else if (property_config.data_type() ==
119 PropertyConfigProto::DataType::BYTES) {
120 value_size = property.bytes_values_size();
121 } else if (property_config.data_type() ==
122 PropertyConfigProto::DataType::DOCUMENT) {
123 value_size = property.document_values_size();
124 }
125
126 if (property_config.cardinality() ==
127 PropertyConfigProto::Cardinality::OPTIONAL) {
128 if (value_size != 0 && value_size != 1) {
129 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
130 "Property '%s' is optional but %d elements are "
131 "found for key: (%s, %s).",
132 property.name().c_str(), value_size, document.namespace_().c_str(),
133 document.uri().c_str()));
134 }
135 } else if (property_config.cardinality() ==
136 PropertyConfigProto::Cardinality::REQUIRED) {
137 if (value_size != 1) {
138 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
139 "Property '%s' with only 1 value is required but "
140 "%d elements are found for key: (%s, %s).",
141 property.name().c_str(), value_size, document.namespace_().c_str(),
142 document.uri().c_str()));
143 }
144 num_required_properties_actual++;
145 }
146
147 // We put the validation for nested DocumentProto at last separately
148 // because it takes longer time to run. If any of the previous validations
149 // fail, we don't need to validate the extra documents.
150 if (property_config.data_type() ==
151 PropertyConfigProto::DataType::DOCUMENT) {
152 const std::string_view nested_type_expected =
153 property_config.schema_type();
154 for (const DocumentProto& nested_document : property.document_values()) {
155 if (nested_type_expected.compare(nested_document.schema()) != 0) {
156 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
157 "Property '", property.name(), "' should have type '",
158 nested_type_expected,
159 "' but actual "
160 "value has type '",
161 nested_document.schema(), "' for key: (", document.namespace_(),
162 ", ", document.uri(), ")."));
163 }
164 ICING_RETURN_IF_ERROR(Validate(nested_document, depth + 1));
165 }
166 }
167 }
168 if (num_required_properties_actual <
169 parsed_property_configs.num_required_properties) {
170 return absl_ports::InvalidArgumentError(
171 absl_ports::StrCat("One or more required fields missing for key: (",
172 document.namespace_(), ", ", document.uri(), ")."));
173 }
174 return libtextclassifier3::Status::OK;
175 }
176
177 } // namespace lib
178 } // namespace icing
179