1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/schema/schema-util.h"
16
17 #include <algorithm>
18 #include <cstdint>
19 #include <queue>
20 #include <string>
21 #include <string_view>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <utility>
25 #include <vector>
26
27 #include "icing/text_classifier/lib3/utils/base/status.h"
28 #include "icing/absl_ports/annotate.h"
29 #include "icing/absl_ports/canonical_errors.h"
30 #include "icing/absl_ports/str_cat.h"
31 #include "icing/absl_ports/str_join.h"
32 #include "icing/proto/schema.pb.h"
33 #include "icing/proto/term.pb.h"
34 #include "icing/util/logging.h"
35 #include "icing/util/status-macros.h"
36
37 namespace icing {
38 namespace lib {
39
40 namespace {
41
ArePropertiesEqual(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)42 bool ArePropertiesEqual(const PropertyConfigProto& old_property,
43 const PropertyConfigProto& new_property) {
44 return old_property.property_name() == new_property.property_name() &&
45 old_property.data_type() == new_property.data_type() &&
46 old_property.schema_type() == new_property.schema_type() &&
47 old_property.cardinality() == new_property.cardinality() &&
48 old_property.string_indexing_config().term_match_type() ==
49 new_property.string_indexing_config().term_match_type() &&
50 old_property.string_indexing_config().tokenizer_type() ==
51 new_property.string_indexing_config().tokenizer_type() &&
52 old_property.document_indexing_config().index_nested_properties() ==
53 new_property.document_indexing_config().index_nested_properties();
54 }
55
IsCardinalityCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)56 bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
57 const PropertyConfigProto& new_property) {
58 if (old_property.cardinality() < new_property.cardinality()) {
59 // We allow a new, less restrictive cardinality (i.e. a REQUIRED field
60 // can become REPEATED or OPTIONAL, but not the other way around).
61 ICING_VLOG(1) << absl_ports::StrCat(
62 "Cardinality is more restrictive than before ",
63 PropertyConfigProto::Cardinality::Code_Name(old_property.cardinality()),
64 "->",
65 PropertyConfigProto::Cardinality::Code_Name(
66 new_property.cardinality()));
67 return false;
68 }
69 return true;
70 }
71
IsDataTypeCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)72 bool IsDataTypeCompatible(const PropertyConfigProto& old_property,
73 const PropertyConfigProto& new_property) {
74 if (old_property.data_type() != new_property.data_type()) {
75 // TODO(cassiewang): Maybe we can be a bit looser with this, e.g. we just
76 // string cast an int64_t to a string. But for now, we'll stick with
77 // simplistics.
78 ICING_VLOG(1) << absl_ports::StrCat(
79 "Data type ",
80 PropertyConfigProto::DataType::Code_Name(old_property.data_type()),
81 "->",
82 PropertyConfigProto::DataType::Code_Name(new_property.data_type()));
83 return false;
84 }
85 return true;
86 }
87
IsSchemaTypeCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)88 bool IsSchemaTypeCompatible(const PropertyConfigProto& old_property,
89 const PropertyConfigProto& new_property) {
90 if (old_property.schema_type() != new_property.schema_type()) {
91 ICING_VLOG(1) << absl_ports::StrCat("Schema type ",
92 old_property.schema_type(), "->",
93 new_property.schema_type());
94 return false;
95 }
96 return true;
97 }
98
IsPropertyCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)99 bool IsPropertyCompatible(const PropertyConfigProto& old_property,
100 const PropertyConfigProto& new_property) {
101 return IsDataTypeCompatible(old_property, new_property) &&
102 IsSchemaTypeCompatible(old_property, new_property) &&
103 IsCardinalityCompatible(old_property, new_property);
104 }
105
IsTermMatchTypeCompatible(const StringIndexingConfig & old_indexed,const StringIndexingConfig & new_indexed)106 bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
107 const StringIndexingConfig& new_indexed) {
108 return old_indexed.term_match_type() == new_indexed.term_match_type() &&
109 old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
110 }
111
IsIntegerNumericMatchTypeCompatible(const IntegerIndexingConfig & old_indexed,const IntegerIndexingConfig & new_indexed)112 bool IsIntegerNumericMatchTypeCompatible(
113 const IntegerIndexingConfig& old_indexed,
114 const IntegerIndexingConfig& new_indexed) {
115 return old_indexed.numeric_match_type() == new_indexed.numeric_match_type();
116 }
117
IsEmbeddingIndexingCompatible(const EmbeddingIndexingConfig & old_indexed,const EmbeddingIndexingConfig & new_indexed)118 bool IsEmbeddingIndexingCompatible(const EmbeddingIndexingConfig& old_indexed,
119 const EmbeddingIndexingConfig& new_indexed) {
120 return old_indexed.embedding_indexing_type() ==
121 new_indexed.embedding_indexing_type();
122 }
123
IsDocumentIndexingCompatible(const DocumentIndexingConfig & old_indexed,const DocumentIndexingConfig & new_indexed)124 bool IsDocumentIndexingCompatible(const DocumentIndexingConfig& old_indexed,
125 const DocumentIndexingConfig& new_indexed) {
126 // TODO(b/265304217): This could mark the new schema as incompatible and
127 // generate some unnecessary index rebuilds if the two schemas have an
128 // equivalent set of indexed properties, but changed the way that it is
129 // declared.
130 if (old_indexed.index_nested_properties() !=
131 new_indexed.index_nested_properties()) {
132 return false;
133 }
134
135 if (old_indexed.indexable_nested_properties_list().size() !=
136 new_indexed.indexable_nested_properties_list().size()) {
137 return false;
138 }
139
140 std::unordered_set<std::string_view> old_indexable_nested_properies_set(
141 old_indexed.indexable_nested_properties_list().begin(),
142 old_indexed.indexable_nested_properties_list().end());
143 for (const auto& property : new_indexed.indexable_nested_properties_list()) {
144 if (old_indexable_nested_properies_set.find(property) ==
145 old_indexable_nested_properies_set.end()) {
146 return false;
147 }
148 }
149 return true;
150 }
151
AddIncompatibleChangeToDelta(std::unordered_set<std::string> & incompatible_delta,const SchemaTypeConfigProto & old_type_config,const SchemaUtil::DependentMap & new_schema_dependent_map,const SchemaUtil::TypeConfigMap & old_type_config_map,const SchemaUtil::TypeConfigMap & new_type_config_map)152 void AddIncompatibleChangeToDelta(
153 std::unordered_set<std::string>& incompatible_delta,
154 const SchemaTypeConfigProto& old_type_config,
155 const SchemaUtil::DependentMap& new_schema_dependent_map,
156 const SchemaUtil::TypeConfigMap& old_type_config_map,
157 const SchemaUtil::TypeConfigMap& new_type_config_map) {
158 // If this type is incompatible, then every type that depends on it might
159 // also be incompatible. Use the dependent map to mark those ones as
160 // incompatible too.
161 incompatible_delta.insert(old_type_config.schema_type());
162 auto dependent_types_itr =
163 new_schema_dependent_map.find(old_type_config.schema_type());
164 if (dependent_types_itr != new_schema_dependent_map.end()) {
165 for (const auto& [dependent_type, _] : dependent_types_itr->second) {
166 // The types from new_schema that depend on the current
167 // old_type_config may not present in old_schema.
168 // Those types will be listed at schema_delta.schema_types_new
169 // instead.
170 std::string dependent_type_str(dependent_type);
171 if (old_type_config_map.find(dependent_type_str) !=
172 old_type_config_map.end()) {
173 incompatible_delta.insert(std::move(dependent_type_str));
174 }
175 }
176 }
177 }
178
179 // Returns if C1 <= C2 based on the following rule, where C1 and C2 are
180 // cardinalities that can be one of REPEATED, OPTIONAL, or REQUIRED.
181 //
182 // Rule: REQUIRED < OPTIONAL < REPEATED
CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,PropertyConfigProto::Cardinality::Code C2)183 bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,
184 PropertyConfigProto::Cardinality::Code C2) {
185 if (C1 == C2) {
186 return true;
187 }
188 if (C1 == PropertyConfigProto::Cardinality::REQUIRED) {
189 return C2 == PropertyConfigProto::Cardinality::OPTIONAL ||
190 C2 == PropertyConfigProto::Cardinality::REPEATED;
191 }
192 if (C1 == PropertyConfigProto::Cardinality::OPTIONAL) {
193 return C2 == PropertyConfigProto::Cardinality::REPEATED;
194 }
195 return false;
196 }
197
198 // Check if set1 is a subset of set2.
199 template <typename T>
IsSubset(const std::unordered_set<T> & set1,const std::unordered_set<T> & set2)200 bool IsSubset(const std::unordered_set<T>& set1,
201 const std::unordered_set<T>& set2) {
202 for (const auto& item : set1) {
203 if (set2.find(item) == set2.end()) {
204 return false;
205 }
206 }
207 return true;
208 }
209
210 } // namespace
211
CalculateTransitiveNestedTypeRelations(const SchemaUtil::DependentMap & direct_nested_types_map,const std::unordered_set<std::string_view> & joinable_types,std::string_view type,bool path_contains_joinable_property,SchemaUtil::DependentMap * expanded_nested_types_map,std::unordered_map<std::string_view,bool> && pending_expansion_paths_indexable,std::unordered_set<std::string_view> * sink_types)212 libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
213 const SchemaUtil::DependentMap& direct_nested_types_map,
214 const std::unordered_set<std::string_view>& joinable_types,
215 std::string_view type, bool path_contains_joinable_property,
216 SchemaUtil::DependentMap* expanded_nested_types_map,
217 std::unordered_map<std::string_view, bool>&&
218 pending_expansion_paths_indexable,
219 std::unordered_set<std::string_view>* sink_types) {
220 // TODO(b/280698121): Implement optimizations to this code to avoid reentering
221 // a node after it's already been expanded.
222
223 auto itr = direct_nested_types_map.find(type);
224 if (itr == direct_nested_types_map.end()) {
225 // It's a sink node. Just return.
226 sink_types->insert(type);
227 return libtextclassifier3::Status::OK;
228 }
229 std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
230 expanded_relations;
231
232 // Add all of the adjacent outgoing relations.
233 expanded_relations.reserve(itr->second.size());
234 expanded_relations.insert(itr->second.begin(), itr->second.end());
235
236 // Iterate through each adjacent outgoing relation and add their indirect
237 // outgoing relations.
238 for (const auto& [adjacent_type, adjacent_property_protos] : itr->second) {
239 // Make a copy of pending_expansion_paths_indexable for every iteration.
240 std::unordered_map<std::string_view, bool> pending_expansion_paths_copy(
241 pending_expansion_paths_indexable);
242
243 // 1. Check the nested indexable config of the edge (type -> adjacent_type),
244 // and the joinable config of the current path up to adjacent_type.
245 //
246 // The nested indexable config is true if any of the PropertyConfigProtos
247 // representing the connecting edge has index_nested_properties=true.
248 bool is_edge_nested_indexable = std::any_of(
249 adjacent_property_protos.begin(), adjacent_property_protos.end(),
250 [](const PropertyConfigProto* property_config) {
251 return property_config->document_indexing_config()
252 .index_nested_properties();
253 });
254 // TODO(b/265304217): change this once we add joinable_properties_list.
255 // Check if addition of the new edge (type->adjacent_type) makes the path
256 // joinable.
257 bool new_path_contains_joinable_property =
258 joinable_types.count(type) > 0 || path_contains_joinable_property;
259 // Set is_nested_indexable field for the current edge
260 pending_expansion_paths_copy[type] = is_edge_nested_indexable;
261
262 // If is_edge_nested_indexable=false, then all paths to adjacent_type
263 // currently in the pending_expansions map are also not nested indexable.
264 if (!is_edge_nested_indexable) {
265 for (auto& pending_expansion : pending_expansion_paths_copy) {
266 pending_expansion.second = false;
267 }
268 }
269
270 // 2. Check if we're in the middle of expanding this type - IOW
271 // there's a cycle!
272 //
273 // This cycle is not allowed if either:
274 // 1. The cycle starting at adjacent_type is nested indexable, OR
275 // 2. The current path contains a joinable property.
276 auto adjacent_itr = pending_expansion_paths_copy.find(adjacent_type);
277 if (adjacent_itr != pending_expansion_paths_copy.end()) {
278 if (adjacent_itr->second || new_path_contains_joinable_property) {
279 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
280 "Invalid cycle detected in type configs. '", type,
281 "' references itself and is nested-indexable or nested-joinable."));
282 }
283 // The cycle is allowed and there's no need to keep iterating the loop.
284 // Move on to the next adjacent value.
285 continue;
286 }
287
288 // 3. Expand this type as needed.
289 ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
290 direct_nested_types_map, joinable_types, adjacent_type,
291 new_path_contains_joinable_property, expanded_nested_types_map,
292 std::move(pending_expansion_paths_copy), sink_types));
293 if (sink_types->count(adjacent_type) > 0) {
294 // "adjacent" is a sink node. Just skip to the next.
295 continue;
296 }
297
298 // 4. "adjacent" has been fully expanded. Add all of its transitive
299 // outgoing relations to this type's transitive outgoing relations.
300 auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type);
301 for (const auto& [transitive_reachable, _] :
302 adjacent_expanded_itr->second) {
303 // Insert a transitive reachable node `transitive_reachable` for `type` if
304 // it wasn't previously reachable.
305 // Since there is no direct edge between `type` and `transitive_reachable`
306 // we insert an empty vector into the dependent map.
307 expanded_relations.insert({transitive_reachable, {}});
308 }
309 }
310 for (const auto& kvp : expanded_relations) {
311 expanded_nested_types_map->operator[](type).insert(kvp);
312 }
313 return libtextclassifier3::Status::OK;
314 }
315
316 template <typename T>
CalculateAcyclicTransitiveRelations(const SchemaUtil::TypeRelationMap<T> & direct_relation_map,std::string_view type,SchemaUtil::TypeRelationMap<T> * expanded_relation_map,std::unordered_set<std::string_view> * pending_expansions,std::unordered_set<std::string_view> * sink_types)317 libtextclassifier3::Status CalculateAcyclicTransitiveRelations(
318 const SchemaUtil::TypeRelationMap<T>& direct_relation_map,
319 std::string_view type,
320 SchemaUtil::TypeRelationMap<T>* expanded_relation_map,
321 std::unordered_set<std::string_view>* pending_expansions,
322 std::unordered_set<std::string_view>* sink_types) {
323 auto expanded_itr = expanded_relation_map->find(type);
324 if (expanded_itr != expanded_relation_map->end()) {
325 // We've already expanded this type. Just return.
326 return libtextclassifier3::Status::OK;
327 }
328 auto itr = direct_relation_map.find(type);
329 if (itr == direct_relation_map.end()) {
330 // It's a sink node. Just return.
331 sink_types->insert(type);
332 return libtextclassifier3::Status::OK;
333 }
334 pending_expansions->insert(type);
335 std::unordered_map<std::string_view, T> expanded_relations;
336
337 // Add all of the adjacent outgoing relations.
338 expanded_relations.reserve(itr->second.size());
339 expanded_relations.insert(itr->second.begin(), itr->second.end());
340
341 // Iterate through each adjacent outgoing relation and add their indirect
342 // outgoing relations.
343 for (const auto& [adjacent, _] : itr->second) {
344 // 1. Check if we're in the middle of expanding this type - IOW there's a
345 // cycle!
346 if (pending_expansions->count(adjacent) > 0) {
347 return absl_ports::InvalidArgumentError(
348 absl_ports::StrCat("Invalid cycle detected in type configs. '", type,
349 "' references or inherits from itself."));
350 }
351
352 // 2. Expand this type as needed.
353 ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
354 direct_relation_map, adjacent, expanded_relation_map,
355 pending_expansions, sink_types));
356 if (sink_types->count(adjacent) > 0) {
357 // "adjacent" is a sink node. Just skip to the next.
358 continue;
359 }
360
361 // 3. "adjacent" has been fully expanded. Add all of its transitive outgoing
362 // relations to this type's transitive outgoing relations.
363 auto adjacent_expanded_itr = expanded_relation_map->find(adjacent);
364 for (const auto& [transitive_reachable, _] :
365 adjacent_expanded_itr->second) {
366 // Insert a transitive reachable node `transitive_reachable` for `type`.
367 // Also since there is no direct edge between `type` and
368 // `transitive_reachable`, the direct edge is initialized by default.
369 expanded_relations.insert({transitive_reachable, T()});
370 }
371 }
372 expanded_relation_map->insert({type, std::move(expanded_relations)});
373 pending_expansions->erase(type);
374 return libtextclassifier3::Status::OK;
375 }
376
377 // Calculate and return the expanded nested-type map from
378 // direct_nested_type_map. This expands the direct_nested_type_map to also
379 // include indirect nested-type relations.
380 //
381 // Ex. Suppose we have the following relations in direct_nested_type_map.
382 //
383 // C -> B (Schema type B has a document property of type C)
384 // B -> A (Schema type A has a document property of type B)
385 //
386 // Then, this function would expand the map by adding C -> A to the map.
387 libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
CalculateTransitiveNestedTypeRelations(const SchemaUtil::DependentMap & direct_nested_type_map,const std::unordered_set<std::string_view> & joinable_types,bool allow_circular_schema_definitions)388 CalculateTransitiveNestedTypeRelations(
389 const SchemaUtil::DependentMap& direct_nested_type_map,
390 const std::unordered_set<std::string_view>& joinable_types,
391 bool allow_circular_schema_definitions) {
392 SchemaUtil::DependentMap expanded_nested_type_map;
393 // Types that have no outgoing relations.
394 std::unordered_set<std::string_view> sink_types;
395
396 if (allow_circular_schema_definitions) {
397 // Map of nodes that are pending expansion -> whether the path from each key
398 // node to the 'current' node is nested_indexable.
399 // A copy of this map is made for each new node that we expand.
400 std::unordered_map<std::string_view, bool>
401 pending_expansion_paths_indexable;
402 for (const auto& kvp : direct_nested_type_map) {
403 ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
404 direct_nested_type_map, joinable_types, kvp.first,
405 /*path_contains_joinable_property=*/false, &expanded_nested_type_map,
406 std::unordered_map<std::string_view, bool>(
407 pending_expansion_paths_indexable),
408 &sink_types));
409 }
410 } else {
411 // If allow_circular_schema_definitions is false, then fallback to the old
412 // way of detecting cycles.
413 // Types that we are expanding.
414 std::unordered_set<std::string_view> pending_expansions;
415 for (const auto& kvp : direct_nested_type_map) {
416 ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
417 direct_nested_type_map, kvp.first, &expanded_nested_type_map,
418 &pending_expansions, &sink_types));
419 }
420 }
421 return expanded_nested_type_map;
422 }
423
424 // Calculate and return the expanded inheritance map from
425 // direct_nested_type_map. This expands the direct_inheritance_map to also
426 // include indirect inheritance relations.
427 //
428 // Ex. Suppose we have the following relations in direct_inheritance_map.
429 //
430 // C -> B (Schema type C is B's parent_type )
431 // B -> A (Schema type B is A's parent_type)
432 //
433 // Then, this function would expand the map by adding C -> A to the map.
434 libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
CalculateTransitiveInheritanceRelations(const SchemaUtil::InheritanceMap & direct_inheritance_map)435 CalculateTransitiveInheritanceRelations(
436 const SchemaUtil::InheritanceMap& direct_inheritance_map) {
437 SchemaUtil::InheritanceMap expanded_inheritance_map;
438
439 // Types that we are expanding.
440 std::unordered_set<std::string_view> pending_expansions;
441
442 // Types that have no outgoing relation.
443 std::unordered_set<std::string_view> sink_types;
444 for (const auto& kvp : direct_inheritance_map) {
445 ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
446 direct_inheritance_map, kvp.first, &expanded_inheritance_map,
447 &pending_expansions, &sink_types));
448 }
449 return expanded_inheritance_map;
450 }
451
452 // Builds a transitive dependent map. Types with no dependents will not be
453 // present in the map as keys.
454 //
455 // Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
456 // type B and B has a property of type C. C and D only have non-document
457 // properties.
458 //
459 // The transitive dependent map for this schema would be:
460 // C -> A, B (both A and B depend on C)
461 // B -> A (A depends on B)
462 //
463 // A and D will not be present in the map as keys because no type depends on
464 // them.
465 //
466 // RETURNS:
467 // On success, a transitive dependent map of all types in the schema.
468 // INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
469 // ALREADY_EXISTS if a schema type is specified more than once in the schema
470 libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
BuildTransitiveDependentGraph(const SchemaProto & schema,bool allow_circular_schema_definitions)471 BuildTransitiveDependentGraph(const SchemaProto& schema,
472 bool allow_circular_schema_definitions) {
473 // We expand the nested-type dependent map and inheritance map differently
474 // when calculating transitive relations. These two types of relations also
475 // should not be transitive so we keep these as separate maps.
476 //
477 // e.g. For schema type A, B and C, B depends on A through inheritance, and
478 // C depends on B by having a property with type B, we will have the two
479 // relations {A, B} and {B, C} in the dependent map, but will not have {A, C}
480 // in the map.
481 SchemaUtil::DependentMap direct_nested_type_map;
482 SchemaUtil::InheritanceMap direct_inheritance_map;
483
484 // Set of schema types that have at least one joinable property.
485 std::unordered_set<std::string_view> joinable_types;
486
487 // Add all first-order dependents.
488 std::unordered_set<std::string_view> known_types;
489 std::unordered_set<std::string_view> unknown_types;
490 for (const auto& type_config : schema.types()) {
491 std::string_view schema_type(type_config.schema_type());
492 if (known_types.count(schema_type) > 0) {
493 return absl_ports::AlreadyExistsError(absl_ports::StrCat(
494 "Field 'schema_type' '", schema_type, "' is already defined"));
495 }
496 known_types.insert(schema_type);
497 unknown_types.erase(schema_type);
498 // Insert inheritance relations into the inheritance map.
499 for (std::string_view parent_schema_type : type_config.parent_types()) {
500 if (known_types.count(parent_schema_type) == 0) {
501 unknown_types.insert(parent_schema_type);
502 }
503 direct_inheritance_map[parent_schema_type][schema_type] = true;
504 }
505 for (const auto& property_config : type_config.properties()) {
506 if (property_config.joinable_config().value_type() !=
507 JoinableConfig::ValueType::NONE) {
508 joinable_types.insert(schema_type);
509 }
510 // Insert nested-type relations into the nested-type map.
511 if (property_config.data_type() ==
512 PropertyConfigProto::DataType::DOCUMENT) {
513 // Need to know what schema_type these Document properties should be
514 // validated against
515 std::string_view property_schema_type(property_config.schema_type());
516 if (known_types.count(property_schema_type) == 0) {
517 unknown_types.insert(property_schema_type);
518 }
519 direct_nested_type_map[property_schema_type][schema_type].push_back(
520 &property_config);
521 }
522 }
523 }
524 if (!unknown_types.empty()) {
525 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
526 "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
527 }
528
529 // Merge two expanded maps into a single dependent_map, without making
530 // inheritance and nested-type relations transitive.
531 ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap merged_dependent_map,
532 CalculateTransitiveNestedTypeRelations(
533 direct_nested_type_map, joinable_types,
534 allow_circular_schema_definitions));
535 ICING_ASSIGN_OR_RETURN(
536 SchemaUtil::InheritanceMap expanded_inheritance_map,
537 CalculateTransitiveInheritanceRelations(direct_inheritance_map));
538 for (const auto& [parent_type, inheritance_relation] :
539 expanded_inheritance_map) {
540 // Insert the parent_type into the dependent map if it is not present
541 // already.
542 merged_dependent_map.insert({parent_type, {}});
543 for (const auto& [child_type, _] : inheritance_relation) {
544 // Insert the child_type into parent_type's dependent map if it's not
545 // present already, in which case the value will be an empty vector.
546 merged_dependent_map[parent_type].insert({child_type, {}});
547 }
548 }
549 return merged_dependent_map;
550 }
551
552 libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
BuildTransitiveInheritanceGraph(const SchemaProto & schema)553 SchemaUtil::BuildTransitiveInheritanceGraph(const SchemaProto& schema) {
554 SchemaUtil::InheritanceMap direct_inheritance_map;
555 for (const auto& type_config : schema.types()) {
556 for (std::string_view parent_schema_type : type_config.parent_types()) {
557 direct_inheritance_map[parent_schema_type][type_config.schema_type()] =
558 true;
559 }
560 }
561 return CalculateTransitiveInheritanceRelations(direct_inheritance_map);
562 }
563
Validate(const SchemaProto & schema,bool allow_circular_schema_definitions)564 libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
565 const SchemaProto& schema, bool allow_circular_schema_definitions) {
566 // 1. Build the dependent map. This will detect any cycles, non-existent or
567 // duplicate types in the schema.
568 ICING_ASSIGN_OR_RETURN(
569 SchemaUtil::DependentMap dependent_map,
570 BuildTransitiveDependentGraph(schema, allow_circular_schema_definitions));
571
572 // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
573 // already.
574 std::unordered_set<std::string_view> known_property_names;
575
576 // Tracks PropertyConfigs containing joinable properties.
577 std::unordered_set<std::string_view> schema_types_with_joinable_property;
578
579 // 2. Validate the properties of each type.
580 for (const auto& type_config : schema.types()) {
581 std::string_view schema_type(type_config.schema_type());
582 ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
583
584 // We only care about properties being unique within one type_config
585 known_property_names.clear();
586
587 for (const auto& property_config : type_config.properties()) {
588 std::string_view property_name(property_config.property_name());
589 ICING_RETURN_IF_ERROR(ValidatePropertyName(property_name, schema_type));
590
591 // Property names must be unique
592 if (!known_property_names.insert(property_name).second) {
593 return absl_ports::AlreadyExistsError(absl_ports::StrCat(
594 "Field 'property_name' '", property_name,
595 "' is already defined for schema '", schema_type, "'"));
596 }
597
598 auto data_type = property_config.data_type();
599 ICING_RETURN_IF_ERROR(
600 ValidateDataType(data_type, schema_type, property_name));
601
602 if (data_type == PropertyConfigProto::DataType::DOCUMENT) {
603 // Need to know what schema_type these Document properties should be
604 // validated against
605 std::string_view property_schema_type(property_config.schema_type());
606 libtextclassifier3::Status validated_status =
607 ValidateSchemaType(property_schema_type);
608 if (!validated_status.ok()) {
609 return absl_ports::Annotate(
610 validated_status,
611 absl_ports::StrCat("Field 'schema_type' is required for DOCUMENT "
612 "data_types in schema property '",
613 schema_type, ".", property_name, "'"));
614 }
615
616 ICING_RETURN_IF_ERROR(ValidateDocumentIndexingConfig(
617 property_config.document_indexing_config(), schema_type,
618 property_name));
619 }
620
621 ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
622 schema_type, property_name));
623
624 if (data_type == PropertyConfigProto::DataType::STRING) {
625 ICING_RETURN_IF_ERROR(ValidateStringIndexingConfig(
626 property_config.string_indexing_config(), data_type, schema_type,
627 property_name));
628 }
629
630 ICING_RETURN_IF_ERROR(ValidateJoinableConfig(
631 property_config.joinable_config(), data_type,
632 property_config.cardinality(), schema_type, property_name));
633 if (property_config.joinable_config().value_type() !=
634 JoinableConfig::ValueType::NONE) {
635 schema_types_with_joinable_property.insert(schema_type);
636 }
637 }
638 }
639
640 // BFS traverse the dependent graph to make sure that no nested levels
641 // (properties with DOCUMENT data type) have REPEATED cardinality while
642 // depending on schema types with joinable property.
643 std::queue<std::string_view> frontier;
644 for (const auto& schema_type : schema_types_with_joinable_property) {
645 frontier.push(schema_type);
646 }
647 std::unordered_set<std::string_view> traversed =
648 std::move(schema_types_with_joinable_property);
649 while (!frontier.empty()) {
650 std::string_view schema_type = frontier.front();
651 frontier.pop();
652
653 const auto it = dependent_map.find(schema_type);
654 if (it == dependent_map.end()) {
655 continue;
656 }
657
658 // Check every type that has a property of type schema_type.
659 for (const auto& [next_schema_type, property_configs] : it->second) {
660 // Check all properties in "next_schema_type" that are of type
661 // "schema_type".
662 for (const PropertyConfigProto* property_config : property_configs) {
663 if (property_config != nullptr &&
664 property_config->cardinality() ==
665 PropertyConfigProto::Cardinality::REPEATED) {
666 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
667 "Schema type '", next_schema_type,
668 "' cannot have REPEATED nested document property '",
669 property_config->property_name(),
670 "' while connecting to some joinable properties"));
671 }
672 }
673
674 if (traversed.count(next_schema_type) == 0) {
675 traversed.insert(next_schema_type);
676 frontier.push(next_schema_type);
677 }
678 }
679 }
680
681 // Verify that every child type's property set has included all compatible
682 // properties from parent types.
683 ICING_RETURN_IF_ERROR(ValidateInheritedProperties(schema));
684 return dependent_map;
685 }
686
ValidateSchemaType(std::string_view schema_type)687 libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
688 std::string_view schema_type) {
689 // Require a schema_type
690 if (schema_type.empty()) {
691 return absl_ports::InvalidArgumentError(
692 "Field 'schema_type' cannot be empty.");
693 }
694
695 return libtextclassifier3::Status::OK;
696 }
697
ValidatePropertyName(std::string_view property_name,std::string_view schema_type)698 libtextclassifier3::Status SchemaUtil::ValidatePropertyName(
699 std::string_view property_name, std::string_view schema_type) {
700 // Require a property_name
701 if (property_name.empty()) {
702 return absl_ports::InvalidArgumentError(
703 absl_ports::StrCat("Field 'property_name' for schema '", schema_type,
704 "' cannot be empty."));
705 }
706
707 // Only support alphanumeric values.
708 for (char c : property_name) {
709 if (!std::isalnum(c)) {
710 return absl_ports::InvalidArgumentError(
711 absl_ports::StrCat("Field 'property_name' '", property_name,
712 "' can only contain alphanumeric characters."));
713 }
714 }
715
716 return libtextclassifier3::Status::OK;
717 }
718
ValidateDataType(PropertyConfigProto::DataType::Code data_type,std::string_view schema_type,std::string_view property_name)719 libtextclassifier3::Status SchemaUtil::ValidateDataType(
720 PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
721 std::string_view property_name) {
722 // UNKNOWN is the default enum value and should only be used for backwards
723 // compatibility
724 if (data_type == PropertyConfigProto::DataType::UNKNOWN) {
725 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
726 "Field 'data_type' cannot be UNKNOWN for schema property '",
727 schema_type, ".", property_name, "'"));
728 }
729
730 return libtextclassifier3::Status::OK;
731 }
732
ValidateCardinality(PropertyConfigProto::Cardinality::Code cardinality,std::string_view schema_type,std::string_view property_name)733 libtextclassifier3::Status SchemaUtil::ValidateCardinality(
734 PropertyConfigProto::Cardinality::Code cardinality,
735 std::string_view schema_type, std::string_view property_name) {
736 // UNKNOWN is the default enum value and should only be used for backwards
737 // compatibility
738 if (cardinality == PropertyConfigProto::Cardinality::UNKNOWN) {
739 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
740 "Field 'cardinality' cannot be UNKNOWN for schema property '",
741 schema_type, ".", property_name, "'"));
742 }
743
744 return libtextclassifier3::Status::OK;
745 }
746
ValidateStringIndexingConfig(const StringIndexingConfig & config,PropertyConfigProto::DataType::Code data_type,std::string_view schema_type,std::string_view property_name)747 libtextclassifier3::Status SchemaUtil::ValidateStringIndexingConfig(
748 const StringIndexingConfig& config,
749 PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
750 std::string_view property_name) {
751 if (config.term_match_type() == TermMatchType::UNKNOWN &&
752 config.tokenizer_type() != StringIndexingConfig::TokenizerType::NONE) {
753 // They set a tokenizer type, but no term match type.
754 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
755 "Indexed string property '", schema_type, ".", property_name,
756 "' cannot have a term match type UNKNOWN"));
757 }
758
759 if (config.term_match_type() != TermMatchType::UNKNOWN &&
760 config.tokenizer_type() == StringIndexingConfig::TokenizerType::NONE) {
761 // They set a term match type, but no tokenizer type
762 return absl_ports::InvalidArgumentError(
763 absl_ports::StrCat("Indexed string property '", property_name,
764 "' cannot have a tokenizer type of NONE"));
765 }
766
767 return libtextclassifier3::Status::OK;
768 }
769
ValidateJoinableConfig(const JoinableConfig & config,PropertyConfigProto::DataType::Code data_type,PropertyConfigProto::Cardinality::Code cardinality,std::string_view schema_type,std::string_view property_name)770 libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig(
771 const JoinableConfig& config, PropertyConfigProto::DataType::Code data_type,
772 PropertyConfigProto::Cardinality::Code cardinality,
773 std::string_view schema_type, std::string_view property_name) {
774 if (config.value_type() == JoinableConfig::ValueType::QUALIFIED_ID) {
775 if (data_type != PropertyConfigProto::DataType::STRING) {
776 return absl_ports::InvalidArgumentError(
777 absl_ports::StrCat("Qualified id joinable property '", property_name,
778 "' is required to have STRING data type"));
779 }
780
781 if (cardinality == PropertyConfigProto::Cardinality::REPEATED) {
782 return absl_ports::InvalidArgumentError(
783 absl_ports::StrCat("Qualified id joinable property '", property_name,
784 "' cannot have REPEATED cardinality"));
785 }
786 }
787
788 if (config.propagate_delete() &&
789 config.value_type() != JoinableConfig::ValueType::QUALIFIED_ID) {
790 return absl_ports::InvalidArgumentError(
791 absl_ports::StrCat("Field 'property_name' '", property_name,
792 "' is required to have QUALIFIED_ID joinable "
793 "value type with delete propagation enabled"));
794 }
795
796 return libtextclassifier3::Status::OK;
797 }
798
ValidateDocumentIndexingConfig(const DocumentIndexingConfig & config,std::string_view schema_type,std::string_view property_name)799 libtextclassifier3::Status SchemaUtil::ValidateDocumentIndexingConfig(
800 const DocumentIndexingConfig& config, std::string_view schema_type,
801 std::string_view property_name) {
802 if (!config.indexable_nested_properties_list().empty() &&
803 config.index_nested_properties()) {
804 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
805 "DocumentIndexingConfig.index_nested_properties is required to be "
806 "false when providing a non-empty indexable_nested_properties_list "
807 "for property '",
808 schema_type, ".", property_name, "'"));
809 }
810 return libtextclassifier3::Status::OK;
811 }
812
IsIndexedProperty(const PropertyConfigProto & property_config)813 /* static */ bool SchemaUtil::IsIndexedProperty(
814 const PropertyConfigProto& property_config) {
815 switch (property_config.data_type()) {
816 case PropertyConfigProto::DataType::STRING:
817 return property_config.string_indexing_config().term_match_type() !=
818 TermMatchType::UNKNOWN &&
819 property_config.string_indexing_config().tokenizer_type() !=
820 StringIndexingConfig::TokenizerType::NONE;
821 case PropertyConfigProto::DataType::INT64:
822 return property_config.integer_indexing_config().numeric_match_type() !=
823 IntegerIndexingConfig::NumericMatchType::UNKNOWN;
824 case PropertyConfigProto::DataType::DOCUMENT:
825 // A document property is considered indexed if it has
826 // index_nested_properties=true, or a non-empty
827 // indexable_nested_properties_list.
828 return property_config.document_indexing_config()
829 .index_nested_properties() ||
830 !property_config.document_indexing_config()
831 .indexable_nested_properties_list()
832 .empty();
833 case PropertyConfigProto::DataType::VECTOR:
834 return property_config.embedding_indexing_config()
835 .embedding_indexing_type() !=
836 EmbeddingIndexingConfig::EmbeddingIndexingType::UNKNOWN;
837 case PropertyConfigProto::DataType::UNKNOWN:
838 case PropertyConfigProto::DataType::DOUBLE:
839 case PropertyConfigProto::DataType::BOOLEAN:
840 case PropertyConfigProto::DataType::BYTES:
841 return false;
842 }
843 }
844
IsParent(const SchemaUtil::InheritanceMap & inheritance_map,std::string_view parent_type,std::string_view child_type)845 bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
846 std::string_view parent_type,
847 std::string_view child_type) {
848 auto iter = inheritance_map.find(parent_type);
849 if (iter == inheritance_map.end()) {
850 return false;
851 }
852 return iter->second.count(child_type) > 0;
853 }
854
IsInheritedPropertyCompatible(const SchemaUtil::InheritanceMap & inheritance_map,const PropertyConfigProto & child_property_config,const PropertyConfigProto & parent_property_config)855 bool SchemaUtil::IsInheritedPropertyCompatible(
856 const SchemaUtil::InheritanceMap& inheritance_map,
857 const PropertyConfigProto& child_property_config,
858 const PropertyConfigProto& parent_property_config) {
859 // Check if child_property_config->cardinality() <=
860 // parent_property_config->cardinality().
861 // Subtype may require a stricter cardinality, but cannot loosen cardinality
862 // requirements.
863 if (!CardinalityLessThanEq(child_property_config.cardinality(),
864 parent_property_config.cardinality())) {
865 return false;
866 }
867
868 // Now we can assume T1 and T2 are not nullptr, and cardinality check passes.
869 if (child_property_config.data_type() !=
870 PropertyConfigProto::DataType::DOCUMENT ||
871 parent_property_config.data_type() !=
872 PropertyConfigProto::DataType::DOCUMENT) {
873 return child_property_config.data_type() ==
874 parent_property_config.data_type();
875 }
876
877 // Now we can assume T1 and T2 are both document type.
878 return child_property_config.schema_type() ==
879 parent_property_config.schema_type() ||
880 IsParent(inheritance_map, parent_property_config.schema_type(),
881 child_property_config.schema_type());
882 }
883
ValidateInheritedProperties(const SchemaProto & schema)884 libtextclassifier3::Status SchemaUtil::ValidateInheritedProperties(
885 const SchemaProto& schema) {
886 // Create a inheritance map
887 ICING_ASSIGN_OR_RETURN(SchemaUtil::InheritanceMap inheritance_map,
888 BuildTransitiveInheritanceGraph(schema));
889
890 // Create a map that maps from type name to property names, and then from
891 // property names to PropertyConfigProto.
892 std::unordered_map<
893 std::string, std::unordered_map<std::string, const PropertyConfigProto*>>
894 property_map;
895 for (const SchemaTypeConfigProto& type_config : schema.types()) {
896 // Skipping building entries for types without any child or parent, since
897 // such entry will never be used.
898 if (type_config.parent_types().empty() &&
899 inheritance_map.count(type_config.schema_type()) == 0) {
900 continue;
901 }
902 auto& curr_property_map = property_map[type_config.schema_type()];
903 for (const PropertyConfigProto& property_config :
904 type_config.properties()) {
905 curr_property_map[property_config.property_name()] = &property_config;
906 }
907 }
908
909 // Validate child properties.
910 for (const SchemaTypeConfigProto& type_config : schema.types()) {
911 const std::string& child_type_name = type_config.schema_type();
912 auto& child_property_map = property_map[child_type_name];
913
914 for (const std::string& parent_type_name : type_config.parent_types()) {
915 auto& parent_property_map = property_map[parent_type_name];
916
917 for (const auto& [property_name, parent_property_config] :
918 parent_property_map) {
919 auto child_property_iter = child_property_map.find(property_name);
920 if (child_property_iter == child_property_map.end()) {
921 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
922 "Property ", property_name, " is not present in child type ",
923 child_type_name, ", but it is defined in the parent type ",
924 parent_type_name, "."));
925 }
926 if (!IsInheritedPropertyCompatible(inheritance_map,
927 *child_property_iter->second,
928 *parent_property_config)) {
929 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
930 "Property ", property_name, " from child type ", child_type_name,
931 " is not compatible to the parent type ", parent_type_name, "."));
932 }
933 }
934 }
935 }
936 return libtextclassifier3::Status::OK;
937 }
938
BuildTypeConfigMap(const SchemaProto & schema,SchemaUtil::TypeConfigMap * type_config_map)939 void SchemaUtil::BuildTypeConfigMap(
940 const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) {
941 type_config_map->clear();
942 for (const SchemaTypeConfigProto& type_config : schema.types()) {
943 type_config_map->emplace(type_config.schema_type(), type_config);
944 }
945 }
946
ParsePropertyConfigs(const SchemaTypeConfigProto & type_config)947 SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
948 const SchemaTypeConfigProto& type_config) {
949 ParsedPropertyConfigs parsed_property_configs;
950
951 // TODO(cassiewang): consider caching property_config_map for some properties,
952 // e.g. using LRU cache. Or changing schema.proto to use go/protomap.
953 for (const PropertyConfigProto& property_config : type_config.properties()) {
954 std::string_view property_name = property_config.property_name();
955 parsed_property_configs.property_config_map.emplace(property_name,
956 &property_config);
957 if (property_config.cardinality() ==
958 PropertyConfigProto::Cardinality::REQUIRED) {
959 parsed_property_configs.required_properties.insert(property_name);
960 }
961
962 // A non-default term_match_type indicates that this property is meant to be
963 // indexed.
964 if (IsIndexedProperty(property_config)) {
965 parsed_property_configs.indexed_properties.insert(property_name);
966 }
967
968 // A non-default value_type indicates that this property is meant to be
969 // joinable.
970 if (property_config.joinable_config().value_type() !=
971 JoinableConfig::ValueType::NONE) {
972 parsed_property_configs.joinable_properties.insert(property_name);
973 }
974
975 // Also keep track of how many nested document properties there are. Adding
976 // new nested document properties will result in join-index rebuild.
977 if (property_config.data_type() ==
978 PropertyConfigProto::DataType::DOCUMENT) {
979 parsed_property_configs.nested_document_properties.insert(property_name);
980 }
981 }
982
983 return parsed_property_configs;
984 }
985
ComputeCompatibilityDelta(const SchemaProto & old_schema,const SchemaProto & new_schema,const DependentMap & new_schema_dependent_map)986 const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
987 const SchemaProto& old_schema, const SchemaProto& new_schema,
988 const DependentMap& new_schema_dependent_map) {
989 SchemaDelta schema_delta;
990
991 TypeConfigMap old_type_config_map, new_type_config_map;
992 BuildTypeConfigMap(old_schema, &old_type_config_map);
993 BuildTypeConfigMap(new_schema, &new_type_config_map);
994
995 // Iterate through and check each field of the old schema
996 for (const auto& old_type_config : old_schema.types()) {
997 auto new_schema_type_and_config =
998 new_type_config_map.find(old_type_config.schema_type());
999
1000 if (new_schema_type_and_config == new_type_config_map.end()) {
1001 // Didn't find the old schema type in the new schema, all the old
1002 // documents of this schema type are invalid without the schema
1003 ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type '",
1004 old_type_config.schema_type(),
1005 "' was not defined in new schema");
1006 schema_delta.schema_types_deleted.insert(old_type_config.schema_type());
1007 continue;
1008 }
1009
1010 ParsedPropertyConfigs new_parsed_property_configs =
1011 ParsePropertyConfigs(new_schema_type_and_config->second);
1012
1013 // We only need to check the old, existing properties to see if they're
1014 // compatible since we'll have old data that may be invalidated or need to
1015 // be reindexed.
1016 std::unordered_set<std::string_view> old_required_properties;
1017 std::unordered_set<std::string_view> old_indexed_properties;
1018 std::unordered_set<std::string_view> old_joinable_properties;
1019 std::unordered_set<std::string_view> old_nested_document_properties;
1020
1021 // If there is a different number of properties, then there must have been a
1022 // change.
1023 bool has_property_changed =
1024 old_type_config.properties_size() !=
1025 new_schema_type_and_config->second.properties_size();
1026 bool is_incompatible = false;
1027 bool is_index_incompatible = false;
1028 bool is_join_incompatible = false;
1029 for (const auto& old_property_config : old_type_config.properties()) {
1030 std::string_view property_name = old_property_config.property_name();
1031 if (old_property_config.cardinality() ==
1032 PropertyConfigProto::Cardinality::REQUIRED) {
1033 old_required_properties.insert(property_name);
1034 }
1035
1036 // A non-default term_match_type indicates that this property is meant to
1037 // be indexed.
1038 bool is_indexed_property = IsIndexedProperty(old_property_config);
1039 if (is_indexed_property) {
1040 old_indexed_properties.insert(property_name);
1041 }
1042
1043 bool is_joinable_property =
1044 old_property_config.joinable_config().value_type() !=
1045 JoinableConfig::ValueType::NONE;
1046 if (is_joinable_property) {
1047 old_joinable_properties.insert(property_name);
1048 }
1049
1050 // A nested-document property is a property of DataType::DOCUMENT.
1051 bool is_nested_document_property =
1052 old_property_config.data_type() ==
1053 PropertyConfigProto::DataType::DOCUMENT;
1054 if (is_nested_document_property) {
1055 old_nested_document_properties.insert(property_name);
1056 }
1057
1058 auto new_property_name_and_config =
1059 new_parsed_property_configs.property_config_map.find(
1060 old_property_config.property_name());
1061
1062 if (new_property_name_and_config ==
1063 new_parsed_property_configs.property_config_map.end()) {
1064 // Didn't find the old property
1065 ICING_VLOG(1) << absl_ports::StrCat(
1066 "Previously defined property type '", old_type_config.schema_type(),
1067 ".", old_property_config.property_name(),
1068 "' was not defined in new schema");
1069 is_incompatible = true;
1070 is_index_incompatible |= is_indexed_property;
1071 is_join_incompatible |=
1072 is_joinable_property || is_nested_document_property;
1073 continue;
1074 }
1075
1076 const PropertyConfigProto* new_property_config =
1077 new_property_name_and_config->second;
1078 if (!has_property_changed &&
1079 !ArePropertiesEqual(old_property_config, *new_property_config)) {
1080 // Finally found a property that changed.
1081 has_property_changed = true;
1082 }
1083
1084 if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
1085 ICING_VLOG(1) << absl_ports::StrCat(
1086 "Property '", old_type_config.schema_type(), ".",
1087 old_property_config.property_name(), "' is incompatible.");
1088 is_incompatible = true;
1089 }
1090
1091 // Any change in the indexed property requires a reindexing
1092 if (!IsTermMatchTypeCompatible(
1093 old_property_config.string_indexing_config(),
1094 new_property_config->string_indexing_config()) ||
1095 !IsIntegerNumericMatchTypeCompatible(
1096 old_property_config.integer_indexing_config(),
1097 new_property_config->integer_indexing_config()) ||
1098 !IsDocumentIndexingCompatible(
1099 old_property_config.document_indexing_config(),
1100 new_property_config->document_indexing_config()) ||
1101 !IsEmbeddingIndexingCompatible(
1102 old_property_config.embedding_indexing_config(),
1103 new_property_config->embedding_indexing_config())) {
1104 is_index_incompatible = true;
1105 }
1106
1107 if (old_property_config.joinable_config().value_type() !=
1108 new_property_config->joinable_config().value_type()) {
1109 is_join_incompatible = true;
1110 }
1111 }
1112
1113 // We can't have new properties that are REQUIRED since we won't know how
1114 // to backfill the data, and the existing data will be invalid. We're
1115 // guaranteed from our previous checks that all the old properties are also
1116 // present in the new property config, so we can do a simple int comparison
1117 // here to detect new required properties.
1118 if (!IsSubset(new_parsed_property_configs.required_properties,
1119 old_required_properties)) {
1120 ICING_VLOG(1) << absl_ports::StrCat(
1121 "New schema '", old_type_config.schema_type(),
1122 "' has REQUIRED properties that are not "
1123 "present in the previously defined schema");
1124 is_incompatible = true;
1125 }
1126
1127 // If we've gained any new indexed properties (this includes gaining new
1128 // indexed nested document properties), then the section ids may change.
1129 // Since the section ids are stored in the index, we'll need to
1130 // reindex everything.
1131 if (!IsSubset(new_parsed_property_configs.indexed_properties,
1132 old_indexed_properties)) {
1133 ICING_VLOG(1) << "Set of indexed properties in schema type '"
1134 << old_type_config.schema_type()
1135 << "' has changed, required reindexing.";
1136 is_index_incompatible = true;
1137 }
1138
1139 // If we've gained any new joinable properties, then the joinable property
1140 // ids may change. Since the joinable property ids are stored in the cache,
1141 // we'll need to reconstruct join index.
1142 // If we've gained any new nested document properties, we also rebuild the
1143 // join index. This is because we index all nested joinable properties, so
1144 // adding a nested document property will most probably result in having
1145 // more joinable properties.
1146 if (!IsSubset(new_parsed_property_configs.joinable_properties,
1147 old_joinable_properties) ||
1148 !IsSubset(new_parsed_property_configs.nested_document_properties,
1149 old_nested_document_properties)) {
1150 ICING_VLOG(1) << "Set of joinable properties in schema type '"
1151 << old_type_config.schema_type()
1152 << "' has changed, required reconstructing joinable cache.";
1153 is_join_incompatible = true;
1154 }
1155
1156 if (is_incompatible) {
1157 AddIncompatibleChangeToDelta(schema_delta.schema_types_incompatible,
1158 old_type_config, new_schema_dependent_map,
1159 old_type_config_map, new_type_config_map);
1160 }
1161
1162 if (is_index_incompatible) {
1163 AddIncompatibleChangeToDelta(schema_delta.schema_types_index_incompatible,
1164 old_type_config, new_schema_dependent_map,
1165 old_type_config_map, new_type_config_map);
1166 }
1167
1168 if (is_join_incompatible) {
1169 AddIncompatibleChangeToDelta(schema_delta.schema_types_join_incompatible,
1170 old_type_config, new_schema_dependent_map,
1171 old_type_config_map, new_type_config_map);
1172 }
1173
1174 if (!is_incompatible && !is_index_incompatible && !is_join_incompatible &&
1175 has_property_changed) {
1176 schema_delta.schema_types_changed_fully_compatible.insert(
1177 old_type_config.schema_type());
1178 }
1179
1180 // Lastly, remove this type from the map. We know that this type can't
1181 // come up in future iterations through the old schema types because the old
1182 // type config has unique types.
1183 new_type_config_map.erase(old_type_config.schema_type());
1184 }
1185
1186 // Any types that are still present in the new_type_config_map are newly added
1187 // types.
1188 schema_delta.schema_types_new.reserve(new_type_config_map.size());
1189 for (auto& kvp : new_type_config_map) {
1190 schema_delta.schema_types_new.insert(std::move(kvp.first));
1191 }
1192
1193 return schema_delta;
1194 }
1195
1196 } // namespace lib
1197 } // namespace icing
1198