• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/schema/schema-util.h"
16 
17 #include <algorithm>
18 #include <cstdint>
19 #include <queue>
20 #include <string>
21 #include <string_view>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <utility>
25 #include <vector>
26 
27 #include "icing/text_classifier/lib3/utils/base/status.h"
28 #include "icing/absl_ports/annotate.h"
29 #include "icing/absl_ports/canonical_errors.h"
30 #include "icing/absl_ports/str_cat.h"
31 #include "icing/absl_ports/str_join.h"
32 #include "icing/proto/schema.pb.h"
33 #include "icing/proto/term.pb.h"
34 #include "icing/util/logging.h"
35 #include "icing/util/status-macros.h"
36 
37 namespace icing {
38 namespace lib {
39 
40 namespace {
41 
ArePropertiesEqual(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)42 bool ArePropertiesEqual(const PropertyConfigProto& old_property,
43                         const PropertyConfigProto& new_property) {
44   return old_property.property_name() == new_property.property_name() &&
45          old_property.data_type() == new_property.data_type() &&
46          old_property.schema_type() == new_property.schema_type() &&
47          old_property.cardinality() == new_property.cardinality() &&
48          old_property.string_indexing_config().term_match_type() ==
49              new_property.string_indexing_config().term_match_type() &&
50          old_property.string_indexing_config().tokenizer_type() ==
51              new_property.string_indexing_config().tokenizer_type() &&
52          old_property.document_indexing_config().index_nested_properties() ==
53              new_property.document_indexing_config().index_nested_properties();
54 }
55 
IsCardinalityCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)56 bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
57                              const PropertyConfigProto& new_property) {
58   if (old_property.cardinality() < new_property.cardinality()) {
59     // We allow a new, less restrictive cardinality (i.e. a REQUIRED field
60     // can become REPEATED or OPTIONAL, but not the other way around).
61     ICING_VLOG(1) << absl_ports::StrCat(
62         "Cardinality is more restrictive than before ",
63         PropertyConfigProto::Cardinality::Code_Name(old_property.cardinality()),
64         "->",
65         PropertyConfigProto::Cardinality::Code_Name(
66             new_property.cardinality()));
67     return false;
68   }
69   return true;
70 }
71 
IsDataTypeCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)72 bool IsDataTypeCompatible(const PropertyConfigProto& old_property,
73                           const PropertyConfigProto& new_property) {
74   if (old_property.data_type() != new_property.data_type()) {
75     // TODO(cassiewang): Maybe we can be a bit looser with this, e.g. we just
76     // string cast an int64_t to a string. But for now, we'll stick with
77     // simplistics.
78     ICING_VLOG(1) << absl_ports::StrCat(
79         "Data type ",
80         PropertyConfigProto::DataType::Code_Name(old_property.data_type()),
81         "->",
82         PropertyConfigProto::DataType::Code_Name(new_property.data_type()));
83     return false;
84   }
85   return true;
86 }
87 
IsSchemaTypeCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)88 bool IsSchemaTypeCompatible(const PropertyConfigProto& old_property,
89                             const PropertyConfigProto& new_property) {
90   if (old_property.schema_type() != new_property.schema_type()) {
91     ICING_VLOG(1) << absl_ports::StrCat("Schema type ",
92                                         old_property.schema_type(), "->",
93                                         new_property.schema_type());
94     return false;
95   }
96   return true;
97 }
98 
IsPropertyCompatible(const PropertyConfigProto & old_property,const PropertyConfigProto & new_property)99 bool IsPropertyCompatible(const PropertyConfigProto& old_property,
100                           const PropertyConfigProto& new_property) {
101   return IsDataTypeCompatible(old_property, new_property) &&
102          IsSchemaTypeCompatible(old_property, new_property) &&
103          IsCardinalityCompatible(old_property, new_property);
104 }
105 
IsTermMatchTypeCompatible(const StringIndexingConfig & old_indexed,const StringIndexingConfig & new_indexed)106 bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
107                                const StringIndexingConfig& new_indexed) {
108   return old_indexed.term_match_type() == new_indexed.term_match_type() &&
109          old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
110 }
111 
IsIntegerNumericMatchTypeCompatible(const IntegerIndexingConfig & old_indexed,const IntegerIndexingConfig & new_indexed)112 bool IsIntegerNumericMatchTypeCompatible(
113     const IntegerIndexingConfig& old_indexed,
114     const IntegerIndexingConfig& new_indexed) {
115   return old_indexed.numeric_match_type() == new_indexed.numeric_match_type();
116 }
117 
IsEmbeddingIndexingCompatible(const EmbeddingIndexingConfig & old_indexed,const EmbeddingIndexingConfig & new_indexed)118 bool IsEmbeddingIndexingCompatible(const EmbeddingIndexingConfig& old_indexed,
119                                    const EmbeddingIndexingConfig& new_indexed) {
120   return old_indexed.embedding_indexing_type() ==
121          new_indexed.embedding_indexing_type();
122 }
123 
IsDocumentIndexingCompatible(const DocumentIndexingConfig & old_indexed,const DocumentIndexingConfig & new_indexed)124 bool IsDocumentIndexingCompatible(const DocumentIndexingConfig& old_indexed,
125                                   const DocumentIndexingConfig& new_indexed) {
126   // TODO(b/265304217): This could mark the new schema as incompatible and
127   // generate some unnecessary index rebuilds if the two schemas have an
128   // equivalent set of indexed properties, but changed the way that it is
129   // declared.
130   if (old_indexed.index_nested_properties() !=
131       new_indexed.index_nested_properties()) {
132     return false;
133   }
134 
135   if (old_indexed.indexable_nested_properties_list().size() !=
136       new_indexed.indexable_nested_properties_list().size()) {
137     return false;
138   }
139 
140   std::unordered_set<std::string_view> old_indexable_nested_properies_set(
141       old_indexed.indexable_nested_properties_list().begin(),
142       old_indexed.indexable_nested_properties_list().end());
143   for (const auto& property : new_indexed.indexable_nested_properties_list()) {
144     if (old_indexable_nested_properies_set.find(property) ==
145         old_indexable_nested_properies_set.end()) {
146       return false;
147     }
148   }
149   return true;
150 }
151 
AddIncompatibleChangeToDelta(std::unordered_set<std::string> & incompatible_delta,const SchemaTypeConfigProto & old_type_config,const SchemaUtil::DependentMap & new_schema_dependent_map,const SchemaUtil::TypeConfigMap & old_type_config_map,const SchemaUtil::TypeConfigMap & new_type_config_map)152 void AddIncompatibleChangeToDelta(
153     std::unordered_set<std::string>& incompatible_delta,
154     const SchemaTypeConfigProto& old_type_config,
155     const SchemaUtil::DependentMap& new_schema_dependent_map,
156     const SchemaUtil::TypeConfigMap& old_type_config_map,
157     const SchemaUtil::TypeConfigMap& new_type_config_map) {
158   // If this type is incompatible, then every type that depends on it might
159   // also be incompatible. Use the dependent map to mark those ones as
160   // incompatible too.
161   incompatible_delta.insert(old_type_config.schema_type());
162   auto dependent_types_itr =
163       new_schema_dependent_map.find(old_type_config.schema_type());
164   if (dependent_types_itr != new_schema_dependent_map.end()) {
165     for (const auto& [dependent_type, _] : dependent_types_itr->second) {
166       // The types from new_schema that depend on the current
167       // old_type_config may not present in old_schema.
168       // Those types will be listed at schema_delta.schema_types_new
169       // instead.
170       std::string dependent_type_str(dependent_type);
171       if (old_type_config_map.find(dependent_type_str) !=
172           old_type_config_map.end()) {
173         incompatible_delta.insert(std::move(dependent_type_str));
174       }
175     }
176   }
177 }
178 
179 // Returns if C1 <= C2 based on the following rule, where C1 and C2 are
180 // cardinalities that can be one of REPEATED, OPTIONAL, or REQUIRED.
181 //
182 // Rule: REQUIRED < OPTIONAL < REPEATED
CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,PropertyConfigProto::Cardinality::Code C2)183 bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,
184                            PropertyConfigProto::Cardinality::Code C2) {
185   if (C1 == C2) {
186     return true;
187   }
188   if (C1 == PropertyConfigProto::Cardinality::REQUIRED) {
189     return C2 == PropertyConfigProto::Cardinality::OPTIONAL ||
190            C2 == PropertyConfigProto::Cardinality::REPEATED;
191   }
192   if (C1 == PropertyConfigProto::Cardinality::OPTIONAL) {
193     return C2 == PropertyConfigProto::Cardinality::REPEATED;
194   }
195   return false;
196 }
197 
198 // Check if set1 is a subset of set2.
199 template <typename T>
IsSubset(const std::unordered_set<T> & set1,const std::unordered_set<T> & set2)200 bool IsSubset(const std::unordered_set<T>& set1,
201               const std::unordered_set<T>& set2) {
202   for (const auto& item : set1) {
203     if (set2.find(item) == set2.end()) {
204       return false;
205     }
206   }
207   return true;
208 }
209 
210 }  // namespace
211 
CalculateTransitiveNestedTypeRelations(const SchemaUtil::DependentMap & direct_nested_types_map,const std::unordered_set<std::string_view> & joinable_types,std::string_view type,bool path_contains_joinable_property,SchemaUtil::DependentMap * expanded_nested_types_map,std::unordered_map<std::string_view,bool> && pending_expansion_paths_indexable,std::unordered_set<std::string_view> * sink_types)212 libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
213     const SchemaUtil::DependentMap& direct_nested_types_map,
214     const std::unordered_set<std::string_view>& joinable_types,
215     std::string_view type, bool path_contains_joinable_property,
216     SchemaUtil::DependentMap* expanded_nested_types_map,
217     std::unordered_map<std::string_view, bool>&&
218         pending_expansion_paths_indexable,
219     std::unordered_set<std::string_view>* sink_types) {
220   // TODO(b/280698121): Implement optimizations to this code to avoid reentering
221   // a node after it's already been expanded.
222 
223   auto itr = direct_nested_types_map.find(type);
224   if (itr == direct_nested_types_map.end()) {
225     // It's a sink node. Just return.
226     sink_types->insert(type);
227     return libtextclassifier3::Status::OK;
228   }
229   std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
230       expanded_relations;
231 
232   // Add all of the adjacent outgoing relations.
233   expanded_relations.reserve(itr->second.size());
234   expanded_relations.insert(itr->second.begin(), itr->second.end());
235 
236   // Iterate through each adjacent outgoing relation and add their indirect
237   // outgoing relations.
238   for (const auto& [adjacent_type, adjacent_property_protos] : itr->second) {
239     // Make a copy of pending_expansion_paths_indexable for every iteration.
240     std::unordered_map<std::string_view, bool> pending_expansion_paths_copy(
241         pending_expansion_paths_indexable);
242 
243     // 1. Check the nested indexable config of the edge (type -> adjacent_type),
244     //    and the joinable config of the current path up to adjacent_type.
245     //
246     // The nested indexable config is true if any of the PropertyConfigProtos
247     // representing the connecting edge has index_nested_properties=true.
248     bool is_edge_nested_indexable = std::any_of(
249         adjacent_property_protos.begin(), adjacent_property_protos.end(),
250         [](const PropertyConfigProto* property_config) {
251           return property_config->document_indexing_config()
252               .index_nested_properties();
253         });
254     // TODO(b/265304217): change this once we add joinable_properties_list.
255     // Check if addition of the new edge (type->adjacent_type) makes the path
256     // joinable.
257     bool new_path_contains_joinable_property =
258         joinable_types.count(type) > 0 || path_contains_joinable_property;
259     // Set is_nested_indexable field for the current edge
260     pending_expansion_paths_copy[type] = is_edge_nested_indexable;
261 
262     // If is_edge_nested_indexable=false, then all paths to adjacent_type
263     // currently in the pending_expansions map are also not nested indexable.
264     if (!is_edge_nested_indexable) {
265       for (auto& pending_expansion : pending_expansion_paths_copy) {
266         pending_expansion.second = false;
267       }
268     }
269 
270     // 2. Check if we're in the middle of expanding this type - IOW
271     // there's a cycle!
272     //
273     // This cycle is not allowed if either:
274     //  1. The cycle starting at adjacent_type is nested indexable, OR
275     //  2. The current path contains a joinable property.
276     auto adjacent_itr = pending_expansion_paths_copy.find(adjacent_type);
277     if (adjacent_itr != pending_expansion_paths_copy.end()) {
278       if (adjacent_itr->second || new_path_contains_joinable_property) {
279         return absl_ports::InvalidArgumentError(absl_ports::StrCat(
280             "Invalid cycle detected in type configs. '", type,
281             "' references itself and is nested-indexable or nested-joinable."));
282       }
283       // The cycle is allowed and there's no need to keep iterating the loop.
284       // Move on to the next adjacent value.
285       continue;
286     }
287 
288     // 3. Expand this type as needed.
289     ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
290         direct_nested_types_map, joinable_types, adjacent_type,
291         new_path_contains_joinable_property, expanded_nested_types_map,
292         std::move(pending_expansion_paths_copy), sink_types));
293     if (sink_types->count(adjacent_type) > 0) {
294       // "adjacent" is a sink node. Just skip to the next.
295       continue;
296     }
297 
298     // 4. "adjacent" has been fully expanded. Add all of its transitive
299     // outgoing relations to this type's transitive outgoing relations.
300     auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type);
301     for (const auto& [transitive_reachable, _] :
302          adjacent_expanded_itr->second) {
303       // Insert a transitive reachable node `transitive_reachable` for `type` if
304       // it wasn't previously reachable.
305       // Since there is no direct edge between `type` and `transitive_reachable`
306       // we insert an empty vector into the dependent map.
307       expanded_relations.insert({transitive_reachable, {}});
308     }
309   }
310   for (const auto& kvp : expanded_relations) {
311     expanded_nested_types_map->operator[](type).insert(kvp);
312   }
313   return libtextclassifier3::Status::OK;
314 }
315 
316 template <typename T>
CalculateAcyclicTransitiveRelations(const SchemaUtil::TypeRelationMap<T> & direct_relation_map,std::string_view type,SchemaUtil::TypeRelationMap<T> * expanded_relation_map,std::unordered_set<std::string_view> * pending_expansions,std::unordered_set<std::string_view> * sink_types)317 libtextclassifier3::Status CalculateAcyclicTransitiveRelations(
318     const SchemaUtil::TypeRelationMap<T>& direct_relation_map,
319     std::string_view type,
320     SchemaUtil::TypeRelationMap<T>* expanded_relation_map,
321     std::unordered_set<std::string_view>* pending_expansions,
322     std::unordered_set<std::string_view>* sink_types) {
323   auto expanded_itr = expanded_relation_map->find(type);
324   if (expanded_itr != expanded_relation_map->end()) {
325     // We've already expanded this type. Just return.
326     return libtextclassifier3::Status::OK;
327   }
328   auto itr = direct_relation_map.find(type);
329   if (itr == direct_relation_map.end()) {
330     // It's a sink node. Just return.
331     sink_types->insert(type);
332     return libtextclassifier3::Status::OK;
333   }
334   pending_expansions->insert(type);
335   std::unordered_map<std::string_view, T> expanded_relations;
336 
337   // Add all of the adjacent outgoing relations.
338   expanded_relations.reserve(itr->second.size());
339   expanded_relations.insert(itr->second.begin(), itr->second.end());
340 
341   // Iterate through each adjacent outgoing relation and add their indirect
342   // outgoing relations.
343   for (const auto& [adjacent, _] : itr->second) {
344     // 1. Check if we're in the middle of expanding this type - IOW there's a
345     // cycle!
346     if (pending_expansions->count(adjacent) > 0) {
347       return absl_ports::InvalidArgumentError(
348           absl_ports::StrCat("Invalid cycle detected in type configs. '", type,
349                              "' references or inherits from itself."));
350     }
351 
352     // 2. Expand this type as needed.
353     ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
354         direct_relation_map, adjacent, expanded_relation_map,
355         pending_expansions, sink_types));
356     if (sink_types->count(adjacent) > 0) {
357       // "adjacent" is a sink node. Just skip to the next.
358       continue;
359     }
360 
361     // 3. "adjacent" has been fully expanded. Add all of its transitive outgoing
362     // relations to this type's transitive outgoing relations.
363     auto adjacent_expanded_itr = expanded_relation_map->find(adjacent);
364     for (const auto& [transitive_reachable, _] :
365          adjacent_expanded_itr->second) {
366       // Insert a transitive reachable node `transitive_reachable` for `type`.
367       // Also since there is no direct edge between `type` and
368       // `transitive_reachable`, the direct edge is initialized by default.
369       expanded_relations.insert({transitive_reachable, T()});
370     }
371   }
372   expanded_relation_map->insert({type, std::move(expanded_relations)});
373   pending_expansions->erase(type);
374   return libtextclassifier3::Status::OK;
375 }
376 
377 // Calculate and return the expanded nested-type map from
378 // direct_nested_type_map. This expands the direct_nested_type_map to also
379 // include indirect nested-type relations.
380 //
381 // Ex. Suppose we have the following relations in direct_nested_type_map.
382 //
383 // C -> B (Schema type B has a document property of type C)
384 // B -> A (Schema type A has a document property of type B)
385 //
386 // Then, this function would expand the map by adding C -> A to the map.
387 libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
CalculateTransitiveNestedTypeRelations(const SchemaUtil::DependentMap & direct_nested_type_map,const std::unordered_set<std::string_view> & joinable_types,bool allow_circular_schema_definitions)388 CalculateTransitiveNestedTypeRelations(
389     const SchemaUtil::DependentMap& direct_nested_type_map,
390     const std::unordered_set<std::string_view>& joinable_types,
391     bool allow_circular_schema_definitions) {
392   SchemaUtil::DependentMap expanded_nested_type_map;
393   // Types that have no outgoing relations.
394   std::unordered_set<std::string_view> sink_types;
395 
396   if (allow_circular_schema_definitions) {
397     // Map of nodes that are pending expansion -> whether the path from each key
398     // node to the 'current' node is nested_indexable.
399     // A copy of this map is made for each new node that we expand.
400     std::unordered_map<std::string_view, bool>
401         pending_expansion_paths_indexable;
402     for (const auto& kvp : direct_nested_type_map) {
403       ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
404           direct_nested_type_map, joinable_types, kvp.first,
405           /*path_contains_joinable_property=*/false, &expanded_nested_type_map,
406           std::unordered_map<std::string_view, bool>(
407               pending_expansion_paths_indexable),
408           &sink_types));
409     }
410   } else {
411     // If allow_circular_schema_definitions is false, then fallback to the old
412     // way of detecting cycles.
413     // Types that we are expanding.
414     std::unordered_set<std::string_view> pending_expansions;
415     for (const auto& kvp : direct_nested_type_map) {
416       ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
417           direct_nested_type_map, kvp.first, &expanded_nested_type_map,
418           &pending_expansions, &sink_types));
419     }
420   }
421   return expanded_nested_type_map;
422 }
423 
424 // Calculate and return the expanded inheritance map from
425 // direct_nested_type_map. This expands the direct_inheritance_map to also
426 // include indirect inheritance relations.
427 //
428 // Ex. Suppose we have the following relations in direct_inheritance_map.
429 //
430 // C -> B (Schema type C is B's parent_type )
431 // B -> A (Schema type B is A's parent_type)
432 //
433 // Then, this function would expand the map by adding C -> A to the map.
434 libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
CalculateTransitiveInheritanceRelations(const SchemaUtil::InheritanceMap & direct_inheritance_map)435 CalculateTransitiveInheritanceRelations(
436     const SchemaUtil::InheritanceMap& direct_inheritance_map) {
437   SchemaUtil::InheritanceMap expanded_inheritance_map;
438 
439   // Types that we are expanding.
440   std::unordered_set<std::string_view> pending_expansions;
441 
442   // Types that have no outgoing relation.
443   std::unordered_set<std::string_view> sink_types;
444   for (const auto& kvp : direct_inheritance_map) {
445     ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
446         direct_inheritance_map, kvp.first, &expanded_inheritance_map,
447         &pending_expansions, &sink_types));
448   }
449   return expanded_inheritance_map;
450 }
451 
452 // Builds a transitive dependent map. Types with no dependents will not be
453 // present in the map as keys.
454 //
455 // Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
456 // type B and B has a property of type C. C and D only have non-document
457 // properties.
458 //
459 // The transitive dependent map for this schema would be:
460 // C -> A, B (both A and B depend on C)
461 // B -> A (A depends on B)
462 //
463 // A and D will not be present in the map as keys because no type depends on
464 // them.
465 //
466 // RETURNS:
467 //   On success, a transitive dependent map of all types in the schema.
468 //   INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
469 //   ALREADY_EXISTS if a schema type is specified more than once in the schema
470 libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
BuildTransitiveDependentGraph(const SchemaProto & schema,bool allow_circular_schema_definitions)471 BuildTransitiveDependentGraph(const SchemaProto& schema,
472                               bool allow_circular_schema_definitions) {
473   // We expand the nested-type dependent map and inheritance map differently
474   // when calculating transitive relations. These two types of relations also
475   // should not be transitive so we keep these as separate maps.
476   //
477   // e.g. For schema type A, B and C, B depends on A through inheritance, and
478   // C depends on B by having a property with type B, we will have the two
479   // relations {A, B} and {B, C} in the dependent map, but will not have {A, C}
480   // in the map.
481   SchemaUtil::DependentMap direct_nested_type_map;
482   SchemaUtil::InheritanceMap direct_inheritance_map;
483 
484   // Set of schema types that have at least one joinable property.
485   std::unordered_set<std::string_view> joinable_types;
486 
487   // Add all first-order dependents.
488   std::unordered_set<std::string_view> known_types;
489   std::unordered_set<std::string_view> unknown_types;
490   for (const auto& type_config : schema.types()) {
491     std::string_view schema_type(type_config.schema_type());
492     if (known_types.count(schema_type) > 0) {
493       return absl_ports::AlreadyExistsError(absl_ports::StrCat(
494           "Field 'schema_type' '", schema_type, "' is already defined"));
495     }
496     known_types.insert(schema_type);
497     unknown_types.erase(schema_type);
498     // Insert inheritance relations into the inheritance map.
499     for (std::string_view parent_schema_type : type_config.parent_types()) {
500       if (known_types.count(parent_schema_type) == 0) {
501         unknown_types.insert(parent_schema_type);
502       }
503       direct_inheritance_map[parent_schema_type][schema_type] = true;
504     }
505     for (const auto& property_config : type_config.properties()) {
506       if (property_config.joinable_config().value_type() !=
507           JoinableConfig::ValueType::NONE) {
508         joinable_types.insert(schema_type);
509       }
510       // Insert nested-type relations into the nested-type map.
511       if (property_config.data_type() ==
512           PropertyConfigProto::DataType::DOCUMENT) {
513         // Need to know what schema_type these Document properties should be
514         // validated against
515         std::string_view property_schema_type(property_config.schema_type());
516         if (known_types.count(property_schema_type) == 0) {
517           unknown_types.insert(property_schema_type);
518         }
519         direct_nested_type_map[property_schema_type][schema_type].push_back(
520             &property_config);
521       }
522     }
523   }
524   if (!unknown_types.empty()) {
525     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
526         "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
527   }
528 
529   // Merge two expanded maps into a single dependent_map, without making
530   // inheritance and nested-type relations transitive.
531   ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap merged_dependent_map,
532                          CalculateTransitiveNestedTypeRelations(
533                              direct_nested_type_map, joinable_types,
534                              allow_circular_schema_definitions));
535   ICING_ASSIGN_OR_RETURN(
536       SchemaUtil::InheritanceMap expanded_inheritance_map,
537       CalculateTransitiveInheritanceRelations(direct_inheritance_map));
538   for (const auto& [parent_type, inheritance_relation] :
539        expanded_inheritance_map) {
540     // Insert the parent_type into the dependent map if it is not present
541     // already.
542     merged_dependent_map.insert({parent_type, {}});
543     for (const auto& [child_type, _] : inheritance_relation) {
544       // Insert the child_type into parent_type's dependent map if it's not
545       // present already, in which case the value will be an empty vector.
546       merged_dependent_map[parent_type].insert({child_type, {}});
547     }
548   }
549   return merged_dependent_map;
550 }
551 
552 libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
BuildTransitiveInheritanceGraph(const SchemaProto & schema)553 SchemaUtil::BuildTransitiveInheritanceGraph(const SchemaProto& schema) {
554   SchemaUtil::InheritanceMap direct_inheritance_map;
555   for (const auto& type_config : schema.types()) {
556     for (std::string_view parent_schema_type : type_config.parent_types()) {
557       direct_inheritance_map[parent_schema_type][type_config.schema_type()] =
558           true;
559     }
560   }
561   return CalculateTransitiveInheritanceRelations(direct_inheritance_map);
562 }
563 
Validate(const SchemaProto & schema,bool allow_circular_schema_definitions)564 libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
565     const SchemaProto& schema, bool allow_circular_schema_definitions) {
566   // 1. Build the dependent map. This will detect any cycles, non-existent or
567   // duplicate types in the schema.
568   ICING_ASSIGN_OR_RETURN(
569       SchemaUtil::DependentMap dependent_map,
570       BuildTransitiveDependentGraph(schema, allow_circular_schema_definitions));
571 
572   // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
573   // already.
574   std::unordered_set<std::string_view> known_property_names;
575 
576   // Tracks PropertyConfigs containing joinable properties.
577   std::unordered_set<std::string_view> schema_types_with_joinable_property;
578 
579   // 2. Validate the properties of each type.
580   for (const auto& type_config : schema.types()) {
581     std::string_view schema_type(type_config.schema_type());
582     ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
583 
584     // We only care about properties being unique within one type_config
585     known_property_names.clear();
586 
587     for (const auto& property_config : type_config.properties()) {
588       std::string_view property_name(property_config.property_name());
589       ICING_RETURN_IF_ERROR(ValidatePropertyName(property_name, schema_type));
590 
591       // Property names must be unique
592       if (!known_property_names.insert(property_name).second) {
593         return absl_ports::AlreadyExistsError(absl_ports::StrCat(
594             "Field 'property_name' '", property_name,
595             "' is already defined for schema '", schema_type, "'"));
596       }
597 
598       auto data_type = property_config.data_type();
599       ICING_RETURN_IF_ERROR(
600           ValidateDataType(data_type, schema_type, property_name));
601 
602       if (data_type == PropertyConfigProto::DataType::DOCUMENT) {
603         // Need to know what schema_type these Document properties should be
604         // validated against
605         std::string_view property_schema_type(property_config.schema_type());
606         libtextclassifier3::Status validated_status =
607             ValidateSchemaType(property_schema_type);
608         if (!validated_status.ok()) {
609           return absl_ports::Annotate(
610               validated_status,
611               absl_ports::StrCat("Field 'schema_type' is required for DOCUMENT "
612                                  "data_types in schema property '",
613                                  schema_type, ".", property_name, "'"));
614         }
615 
616         ICING_RETURN_IF_ERROR(ValidateDocumentIndexingConfig(
617             property_config.document_indexing_config(), schema_type,
618             property_name));
619       }
620 
621       ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
622                                                 schema_type, property_name));
623 
624       if (data_type == PropertyConfigProto::DataType::STRING) {
625         ICING_RETURN_IF_ERROR(ValidateStringIndexingConfig(
626             property_config.string_indexing_config(), data_type, schema_type,
627             property_name));
628       }
629 
630       ICING_RETURN_IF_ERROR(ValidateJoinableConfig(
631           property_config.joinable_config(), data_type,
632           property_config.cardinality(), schema_type, property_name));
633       if (property_config.joinable_config().value_type() !=
634           JoinableConfig::ValueType::NONE) {
635         schema_types_with_joinable_property.insert(schema_type);
636       }
637     }
638   }
639 
640   // BFS traverse the dependent graph to make sure that no nested levels
641   // (properties with DOCUMENT data type) have REPEATED cardinality while
642   // depending on schema types with joinable property.
643   std::queue<std::string_view> frontier;
644   for (const auto& schema_type : schema_types_with_joinable_property) {
645     frontier.push(schema_type);
646   }
647   std::unordered_set<std::string_view> traversed =
648       std::move(schema_types_with_joinable_property);
649   while (!frontier.empty()) {
650     std::string_view schema_type = frontier.front();
651     frontier.pop();
652 
653     const auto it = dependent_map.find(schema_type);
654     if (it == dependent_map.end()) {
655       continue;
656     }
657 
658     // Check every type that has a property of type schema_type.
659     for (const auto& [next_schema_type, property_configs] : it->second) {
660       // Check all properties in "next_schema_type" that are of type
661       // "schema_type".
662       for (const PropertyConfigProto* property_config : property_configs) {
663         if (property_config != nullptr &&
664             property_config->cardinality() ==
665                 PropertyConfigProto::Cardinality::REPEATED) {
666           return absl_ports::InvalidArgumentError(absl_ports::StrCat(
667               "Schema type '", next_schema_type,
668               "' cannot have REPEATED nested document property '",
669               property_config->property_name(),
670               "' while connecting to some joinable properties"));
671         }
672       }
673 
674       if (traversed.count(next_schema_type) == 0) {
675         traversed.insert(next_schema_type);
676         frontier.push(next_schema_type);
677       }
678     }
679   }
680 
681   // Verify that every child type's property set has included all compatible
682   // properties from parent types.
683   ICING_RETURN_IF_ERROR(ValidateInheritedProperties(schema));
684   return dependent_map;
685 }
686 
ValidateSchemaType(std::string_view schema_type)687 libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
688     std::string_view schema_type) {
689   // Require a schema_type
690   if (schema_type.empty()) {
691     return absl_ports::InvalidArgumentError(
692         "Field 'schema_type' cannot be empty.");
693   }
694 
695   return libtextclassifier3::Status::OK;
696 }
697 
ValidatePropertyName(std::string_view property_name,std::string_view schema_type)698 libtextclassifier3::Status SchemaUtil::ValidatePropertyName(
699     std::string_view property_name, std::string_view schema_type) {
700   // Require a property_name
701   if (property_name.empty()) {
702     return absl_ports::InvalidArgumentError(
703         absl_ports::StrCat("Field 'property_name' for schema '", schema_type,
704                            "' cannot be empty."));
705   }
706 
707   // Only support alphanumeric values.
708   for (char c : property_name) {
709     if (!std::isalnum(c)) {
710       return absl_ports::InvalidArgumentError(
711           absl_ports::StrCat("Field 'property_name' '", property_name,
712                              "' can only contain alphanumeric characters."));
713     }
714   }
715 
716   return libtextclassifier3::Status::OK;
717 }
718 
ValidateDataType(PropertyConfigProto::DataType::Code data_type,std::string_view schema_type,std::string_view property_name)719 libtextclassifier3::Status SchemaUtil::ValidateDataType(
720     PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
721     std::string_view property_name) {
722   // UNKNOWN is the default enum value and should only be used for backwards
723   // compatibility
724   if (data_type == PropertyConfigProto::DataType::UNKNOWN) {
725     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
726         "Field 'data_type' cannot be UNKNOWN for schema property '",
727         schema_type, ".", property_name, "'"));
728   }
729 
730   return libtextclassifier3::Status::OK;
731 }
732 
ValidateCardinality(PropertyConfigProto::Cardinality::Code cardinality,std::string_view schema_type,std::string_view property_name)733 libtextclassifier3::Status SchemaUtil::ValidateCardinality(
734     PropertyConfigProto::Cardinality::Code cardinality,
735     std::string_view schema_type, std::string_view property_name) {
736   // UNKNOWN is the default enum value and should only be used for backwards
737   // compatibility
738   if (cardinality == PropertyConfigProto::Cardinality::UNKNOWN) {
739     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
740         "Field 'cardinality' cannot be UNKNOWN for schema property '",
741         schema_type, ".", property_name, "'"));
742   }
743 
744   return libtextclassifier3::Status::OK;
745 }
746 
ValidateStringIndexingConfig(const StringIndexingConfig & config,PropertyConfigProto::DataType::Code data_type,std::string_view schema_type,std::string_view property_name)747 libtextclassifier3::Status SchemaUtil::ValidateStringIndexingConfig(
748     const StringIndexingConfig& config,
749     PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
750     std::string_view property_name) {
751   if (config.term_match_type() == TermMatchType::UNKNOWN &&
752       config.tokenizer_type() != StringIndexingConfig::TokenizerType::NONE) {
753     // They set a tokenizer type, but no term match type.
754     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
755         "Indexed string property '", schema_type, ".", property_name,
756         "' cannot have a term match type UNKNOWN"));
757   }
758 
759   if (config.term_match_type() != TermMatchType::UNKNOWN &&
760       config.tokenizer_type() == StringIndexingConfig::TokenizerType::NONE) {
761     // They set a term match type, but no tokenizer type
762     return absl_ports::InvalidArgumentError(
763         absl_ports::StrCat("Indexed string property '", property_name,
764                            "' cannot have a tokenizer type of NONE"));
765   }
766 
767   return libtextclassifier3::Status::OK;
768 }
769 
ValidateJoinableConfig(const JoinableConfig & config,PropertyConfigProto::DataType::Code data_type,PropertyConfigProto::Cardinality::Code cardinality,std::string_view schema_type,std::string_view property_name)770 libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig(
771     const JoinableConfig& config, PropertyConfigProto::DataType::Code data_type,
772     PropertyConfigProto::Cardinality::Code cardinality,
773     std::string_view schema_type, std::string_view property_name) {
774   if (config.value_type() == JoinableConfig::ValueType::QUALIFIED_ID) {
775     if (data_type != PropertyConfigProto::DataType::STRING) {
776       return absl_ports::InvalidArgumentError(
777           absl_ports::StrCat("Qualified id joinable property '", property_name,
778                              "' is required to have STRING data type"));
779     }
780 
781     if (cardinality == PropertyConfigProto::Cardinality::REPEATED) {
782       return absl_ports::InvalidArgumentError(
783           absl_ports::StrCat("Qualified id joinable property '", property_name,
784                              "' cannot have REPEATED cardinality"));
785     }
786   }
787 
788   if (config.propagate_delete() &&
789       config.value_type() != JoinableConfig::ValueType::QUALIFIED_ID) {
790     return absl_ports::InvalidArgumentError(
791         absl_ports::StrCat("Field 'property_name' '", property_name,
792                            "' is required to have QUALIFIED_ID joinable "
793                            "value type with delete propagation enabled"));
794   }
795 
796   return libtextclassifier3::Status::OK;
797 }
798 
ValidateDocumentIndexingConfig(const DocumentIndexingConfig & config,std::string_view schema_type,std::string_view property_name)799 libtextclassifier3::Status SchemaUtil::ValidateDocumentIndexingConfig(
800     const DocumentIndexingConfig& config, std::string_view schema_type,
801     std::string_view property_name) {
802   if (!config.indexable_nested_properties_list().empty() &&
803       config.index_nested_properties()) {
804     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
805         "DocumentIndexingConfig.index_nested_properties is required to be "
806         "false when providing a non-empty indexable_nested_properties_list "
807         "for property '",
808         schema_type, ".", property_name, "'"));
809   }
810   return libtextclassifier3::Status::OK;
811 }
812 
IsIndexedProperty(const PropertyConfigProto & property_config)813 /* static */ bool SchemaUtil::IsIndexedProperty(
814     const PropertyConfigProto& property_config) {
815   switch (property_config.data_type()) {
816     case PropertyConfigProto::DataType::STRING:
817       return property_config.string_indexing_config().term_match_type() !=
818                  TermMatchType::UNKNOWN &&
819              property_config.string_indexing_config().tokenizer_type() !=
820                  StringIndexingConfig::TokenizerType::NONE;
821     case PropertyConfigProto::DataType::INT64:
822       return property_config.integer_indexing_config().numeric_match_type() !=
823              IntegerIndexingConfig::NumericMatchType::UNKNOWN;
824     case PropertyConfigProto::DataType::DOCUMENT:
825       // A document property is considered indexed if it has
826       // index_nested_properties=true, or a non-empty
827       // indexable_nested_properties_list.
828       return property_config.document_indexing_config()
829                  .index_nested_properties() ||
830              !property_config.document_indexing_config()
831                   .indexable_nested_properties_list()
832                   .empty();
833     case PropertyConfigProto::DataType::VECTOR:
834       return property_config.embedding_indexing_config()
835                  .embedding_indexing_type() !=
836              EmbeddingIndexingConfig::EmbeddingIndexingType::UNKNOWN;
837     case PropertyConfigProto::DataType::UNKNOWN:
838     case PropertyConfigProto::DataType::DOUBLE:
839     case PropertyConfigProto::DataType::BOOLEAN:
840     case PropertyConfigProto::DataType::BYTES:
841       return false;
842   }
843 }
844 
IsParent(const SchemaUtil::InheritanceMap & inheritance_map,std::string_view parent_type,std::string_view child_type)845 bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
846                           std::string_view parent_type,
847                           std::string_view child_type) {
848   auto iter = inheritance_map.find(parent_type);
849   if (iter == inheritance_map.end()) {
850     return false;
851   }
852   return iter->second.count(child_type) > 0;
853 }
854 
IsInheritedPropertyCompatible(const SchemaUtil::InheritanceMap & inheritance_map,const PropertyConfigProto & child_property_config,const PropertyConfigProto & parent_property_config)855 bool SchemaUtil::IsInheritedPropertyCompatible(
856     const SchemaUtil::InheritanceMap& inheritance_map,
857     const PropertyConfigProto& child_property_config,
858     const PropertyConfigProto& parent_property_config) {
859   // Check if child_property_config->cardinality() <=
860   // parent_property_config->cardinality().
861   // Subtype may require a stricter cardinality, but cannot loosen cardinality
862   // requirements.
863   if (!CardinalityLessThanEq(child_property_config.cardinality(),
864                              parent_property_config.cardinality())) {
865     return false;
866   }
867 
868   // Now we can assume T1 and T2 are not nullptr, and cardinality check passes.
869   if (child_property_config.data_type() !=
870           PropertyConfigProto::DataType::DOCUMENT ||
871       parent_property_config.data_type() !=
872           PropertyConfigProto::DataType::DOCUMENT) {
873     return child_property_config.data_type() ==
874            parent_property_config.data_type();
875   }
876 
877   // Now we can assume T1 and T2 are both document type.
878   return child_property_config.schema_type() ==
879              parent_property_config.schema_type() ||
880          IsParent(inheritance_map, parent_property_config.schema_type(),
881                   child_property_config.schema_type());
882 }
883 
ValidateInheritedProperties(const SchemaProto & schema)884 libtextclassifier3::Status SchemaUtil::ValidateInheritedProperties(
885     const SchemaProto& schema) {
886   // Create a inheritance map
887   ICING_ASSIGN_OR_RETURN(SchemaUtil::InheritanceMap inheritance_map,
888                          BuildTransitiveInheritanceGraph(schema));
889 
890   // Create a map that maps from type name to property names, and then from
891   // property names to PropertyConfigProto.
892   std::unordered_map<
893       std::string, std::unordered_map<std::string, const PropertyConfigProto*>>
894       property_map;
895   for (const SchemaTypeConfigProto& type_config : schema.types()) {
896     // Skipping building entries for types without any child or parent, since
897     // such entry will never be used.
898     if (type_config.parent_types().empty() &&
899         inheritance_map.count(type_config.schema_type()) == 0) {
900       continue;
901     }
902     auto& curr_property_map = property_map[type_config.schema_type()];
903     for (const PropertyConfigProto& property_config :
904          type_config.properties()) {
905       curr_property_map[property_config.property_name()] = &property_config;
906     }
907   }
908 
909   // Validate child properties.
910   for (const SchemaTypeConfigProto& type_config : schema.types()) {
911     const std::string& child_type_name = type_config.schema_type();
912     auto& child_property_map = property_map[child_type_name];
913 
914     for (const std::string& parent_type_name : type_config.parent_types()) {
915       auto& parent_property_map = property_map[parent_type_name];
916 
917       for (const auto& [property_name, parent_property_config] :
918            parent_property_map) {
919         auto child_property_iter = child_property_map.find(property_name);
920         if (child_property_iter == child_property_map.end()) {
921           return absl_ports::InvalidArgumentError(absl_ports::StrCat(
922               "Property ", property_name, " is not present in child type ",
923               child_type_name, ", but it is defined in the parent type ",
924               parent_type_name, "."));
925         }
926         if (!IsInheritedPropertyCompatible(inheritance_map,
927                                            *child_property_iter->second,
928                                            *parent_property_config)) {
929           return absl_ports::InvalidArgumentError(absl_ports::StrCat(
930               "Property ", property_name, " from child type ", child_type_name,
931               " is not compatible to the parent type ", parent_type_name, "."));
932         }
933       }
934     }
935   }
936   return libtextclassifier3::Status::OK;
937 }
938 
BuildTypeConfigMap(const SchemaProto & schema,SchemaUtil::TypeConfigMap * type_config_map)939 void SchemaUtil::BuildTypeConfigMap(
940     const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) {
941   type_config_map->clear();
942   for (const SchemaTypeConfigProto& type_config : schema.types()) {
943     type_config_map->emplace(type_config.schema_type(), type_config);
944   }
945 }
946 
ParsePropertyConfigs(const SchemaTypeConfigProto & type_config)947 SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
948     const SchemaTypeConfigProto& type_config) {
949   ParsedPropertyConfigs parsed_property_configs;
950 
951   // TODO(cassiewang): consider caching property_config_map for some properties,
952   // e.g. using LRU cache. Or changing schema.proto to use go/protomap.
953   for (const PropertyConfigProto& property_config : type_config.properties()) {
954     std::string_view property_name = property_config.property_name();
955     parsed_property_configs.property_config_map.emplace(property_name,
956                                                         &property_config);
957     if (property_config.cardinality() ==
958         PropertyConfigProto::Cardinality::REQUIRED) {
959       parsed_property_configs.required_properties.insert(property_name);
960     }
961 
962     // A non-default term_match_type indicates that this property is meant to be
963     // indexed.
964     if (IsIndexedProperty(property_config)) {
965       parsed_property_configs.indexed_properties.insert(property_name);
966     }
967 
968     // A non-default value_type indicates that this property is meant to be
969     // joinable.
970     if (property_config.joinable_config().value_type() !=
971         JoinableConfig::ValueType::NONE) {
972       parsed_property_configs.joinable_properties.insert(property_name);
973     }
974 
975     // Also keep track of how many nested document properties there are. Adding
976     // new nested document properties will result in join-index rebuild.
977     if (property_config.data_type() ==
978         PropertyConfigProto::DataType::DOCUMENT) {
979       parsed_property_configs.nested_document_properties.insert(property_name);
980     }
981   }
982 
983   return parsed_property_configs;
984 }
985 
ComputeCompatibilityDelta(const SchemaProto & old_schema,const SchemaProto & new_schema,const DependentMap & new_schema_dependent_map)986 const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
987     const SchemaProto& old_schema, const SchemaProto& new_schema,
988     const DependentMap& new_schema_dependent_map) {
989   SchemaDelta schema_delta;
990 
991   TypeConfigMap old_type_config_map, new_type_config_map;
992   BuildTypeConfigMap(old_schema, &old_type_config_map);
993   BuildTypeConfigMap(new_schema, &new_type_config_map);
994 
995   // Iterate through and check each field of the old schema
996   for (const auto& old_type_config : old_schema.types()) {
997     auto new_schema_type_and_config =
998         new_type_config_map.find(old_type_config.schema_type());
999 
1000     if (new_schema_type_and_config == new_type_config_map.end()) {
1001       // Didn't find the old schema type in the new schema, all the old
1002       // documents of this schema type are invalid without the schema
1003       ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type '",
1004                                           old_type_config.schema_type(),
1005                                           "' was not defined in new schema");
1006       schema_delta.schema_types_deleted.insert(old_type_config.schema_type());
1007       continue;
1008     }
1009 
1010     ParsedPropertyConfigs new_parsed_property_configs =
1011         ParsePropertyConfigs(new_schema_type_and_config->second);
1012 
1013     // We only need to check the old, existing properties to see if they're
1014     // compatible since we'll have old data that may be invalidated or need to
1015     // be reindexed.
1016     std::unordered_set<std::string_view> old_required_properties;
1017     std::unordered_set<std::string_view> old_indexed_properties;
1018     std::unordered_set<std::string_view> old_joinable_properties;
1019     std::unordered_set<std::string_view> old_nested_document_properties;
1020 
1021     // If there is a different number of properties, then there must have been a
1022     // change.
1023     bool has_property_changed =
1024         old_type_config.properties_size() !=
1025         new_schema_type_and_config->second.properties_size();
1026     bool is_incompatible = false;
1027     bool is_index_incompatible = false;
1028     bool is_join_incompatible = false;
1029     for (const auto& old_property_config : old_type_config.properties()) {
1030       std::string_view property_name = old_property_config.property_name();
1031       if (old_property_config.cardinality() ==
1032           PropertyConfigProto::Cardinality::REQUIRED) {
1033         old_required_properties.insert(property_name);
1034       }
1035 
1036       // A non-default term_match_type indicates that this property is meant to
1037       // be indexed.
1038       bool is_indexed_property = IsIndexedProperty(old_property_config);
1039       if (is_indexed_property) {
1040         old_indexed_properties.insert(property_name);
1041       }
1042 
1043       bool is_joinable_property =
1044           old_property_config.joinable_config().value_type() !=
1045           JoinableConfig::ValueType::NONE;
1046       if (is_joinable_property) {
1047         old_joinable_properties.insert(property_name);
1048       }
1049 
1050       // A nested-document property is a property of DataType::DOCUMENT.
1051       bool is_nested_document_property =
1052           old_property_config.data_type() ==
1053           PropertyConfigProto::DataType::DOCUMENT;
1054       if (is_nested_document_property) {
1055         old_nested_document_properties.insert(property_name);
1056       }
1057 
1058       auto new_property_name_and_config =
1059           new_parsed_property_configs.property_config_map.find(
1060               old_property_config.property_name());
1061 
1062       if (new_property_name_and_config ==
1063           new_parsed_property_configs.property_config_map.end()) {
1064         // Didn't find the old property
1065         ICING_VLOG(1) << absl_ports::StrCat(
1066             "Previously defined property type '", old_type_config.schema_type(),
1067             ".", old_property_config.property_name(),
1068             "' was not defined in new schema");
1069         is_incompatible = true;
1070         is_index_incompatible |= is_indexed_property;
1071         is_join_incompatible |=
1072             is_joinable_property || is_nested_document_property;
1073         continue;
1074       }
1075 
1076       const PropertyConfigProto* new_property_config =
1077           new_property_name_and_config->second;
1078       if (!has_property_changed &&
1079           !ArePropertiesEqual(old_property_config, *new_property_config)) {
1080         // Finally found a property that changed.
1081         has_property_changed = true;
1082       }
1083 
1084       if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
1085         ICING_VLOG(1) << absl_ports::StrCat(
1086             "Property '", old_type_config.schema_type(), ".",
1087             old_property_config.property_name(), "' is incompatible.");
1088         is_incompatible = true;
1089       }
1090 
1091       // Any change in the indexed property requires a reindexing
1092       if (!IsTermMatchTypeCompatible(
1093               old_property_config.string_indexing_config(),
1094               new_property_config->string_indexing_config()) ||
1095           !IsIntegerNumericMatchTypeCompatible(
1096               old_property_config.integer_indexing_config(),
1097               new_property_config->integer_indexing_config()) ||
1098           !IsDocumentIndexingCompatible(
1099               old_property_config.document_indexing_config(),
1100               new_property_config->document_indexing_config()) ||
1101           !IsEmbeddingIndexingCompatible(
1102               old_property_config.embedding_indexing_config(),
1103               new_property_config->embedding_indexing_config())) {
1104         is_index_incompatible = true;
1105       }
1106 
1107       if (old_property_config.joinable_config().value_type() !=
1108           new_property_config->joinable_config().value_type()) {
1109         is_join_incompatible = true;
1110       }
1111     }
1112 
1113     // We can't have new properties that are REQUIRED since we won't know how
1114     // to backfill the data, and the existing data will be invalid. We're
1115     // guaranteed from our previous checks that all the old properties are also
1116     // present in the new property config, so we can do a simple int comparison
1117     // here to detect new required properties.
1118     if (!IsSubset(new_parsed_property_configs.required_properties,
1119                   old_required_properties)) {
1120       ICING_VLOG(1) << absl_ports::StrCat(
1121           "New schema '", old_type_config.schema_type(),
1122           "' has REQUIRED properties that are not "
1123           "present in the previously defined schema");
1124       is_incompatible = true;
1125     }
1126 
1127     // If we've gained any new indexed properties (this includes gaining new
1128     // indexed nested document properties), then the section ids may change.
1129     // Since the section ids are stored in the index, we'll need to
1130     // reindex everything.
1131     if (!IsSubset(new_parsed_property_configs.indexed_properties,
1132                   old_indexed_properties)) {
1133       ICING_VLOG(1) << "Set of indexed properties in schema type '"
1134                     << old_type_config.schema_type()
1135                     << "' has changed, required reindexing.";
1136       is_index_incompatible = true;
1137     }
1138 
1139     // If we've gained any new joinable properties, then the joinable property
1140     // ids may change. Since the joinable property ids are stored in the cache,
1141     // we'll need to reconstruct join index.
1142     // If we've gained any new nested document properties, we also rebuild the
1143     // join index. This is because we index all nested joinable properties, so
1144     // adding a nested document property will most probably result in having
1145     // more joinable properties.
1146     if (!IsSubset(new_parsed_property_configs.joinable_properties,
1147                   old_joinable_properties) ||
1148         !IsSubset(new_parsed_property_configs.nested_document_properties,
1149                   old_nested_document_properties)) {
1150       ICING_VLOG(1) << "Set of joinable properties in schema type '"
1151                     << old_type_config.schema_type()
1152                     << "' has changed, required reconstructing joinable cache.";
1153       is_join_incompatible = true;
1154     }
1155 
1156     if (is_incompatible) {
1157       AddIncompatibleChangeToDelta(schema_delta.schema_types_incompatible,
1158                                    old_type_config, new_schema_dependent_map,
1159                                    old_type_config_map, new_type_config_map);
1160     }
1161 
1162     if (is_index_incompatible) {
1163       AddIncompatibleChangeToDelta(schema_delta.schema_types_index_incompatible,
1164                                    old_type_config, new_schema_dependent_map,
1165                                    old_type_config_map, new_type_config_map);
1166     }
1167 
1168     if (is_join_incompatible) {
1169       AddIncompatibleChangeToDelta(schema_delta.schema_types_join_incompatible,
1170                                    old_type_config, new_schema_dependent_map,
1171                                    old_type_config_map, new_type_config_map);
1172     }
1173 
1174     if (!is_incompatible && !is_index_incompatible && !is_join_incompatible &&
1175         has_property_changed) {
1176       schema_delta.schema_types_changed_fully_compatible.insert(
1177           old_type_config.schema_type());
1178     }
1179 
1180     // Lastly, remove this type from the map. We know that this type can't
1181     // come up in future iterations through the old schema types because the old
1182     // type config has unique types.
1183     new_type_config_map.erase(old_type_config.schema_type());
1184   }
1185 
1186   // Any types that are still present in the new_type_config_map are newly added
1187   // types.
1188   schema_delta.schema_types_new.reserve(new_type_config_map.size());
1189   for (auto& kvp : new_type_config_map) {
1190     schema_delta.schema_types_new.insert(std::move(kvp.first));
1191   }
1192 
1193   return schema_delta;
1194 }
1195 
1196 }  // namespace lib
1197 }  // namespace icing
1198