• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_SCHEMA_UTIL_H_
16 #define ICING_SCHEMA_SCHEMA_UTIL_H_
17 
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 #include <unordered_map>
22 #include <unordered_set>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/proto/schema.pb.h"
27 
28 namespace icing {
29 namespace lib {
30 
31 class SchemaUtil {
32  public:
33   using TypeConfigMap =
34       std::unordered_map<std::string, const SchemaTypeConfigProto>;
35 
36   // A data structure that stores the relationships between schema types. The
37   // keys in TypeRelationMap are schema types, and the values are sets of schema
38   // types that are directly or indirectly related to the key.
39   template <typename T>
40   using TypeRelationMap =
41       std::unordered_map<std::string_view,
42                          std::unordered_map<std::string_view, T>>;
43 
44   // If A -> B is indicated in the map, then type A must be built before
45   // building type B, which implies one of the following situations.
46   //
47   // 1. B has a property of type A.
48   // 2. A is a parent type of B via polymorphism.
49   //
50   // For the first case, this map will also include all PropertyConfigProto
51   // (with DOCUMENT data_type) pointers which *directly* connects type A and B.
52   // IOW, this vector of PropertyConfigProto* are "direct edges" connecting A
53   // and B directly. It will be an empty vector if A and B are not "directly"
54   // connected, but instead via another intermediate level of schema type. For
55   // example, the actual dependency is A -> C -> B, so there will be A -> C and
56   // C -> B with valid PropertyConfigProto* respectively in this map, but we
57   // will also expand transitive dependents: add A -> B into dependent map with
58   // empty vector of "edges".
59   using DependentMap = TypeRelationMap<std::vector<const PropertyConfigProto*>>;
60 
61   // If A -> B is indicated in the map, then type A is a parent type of B,
62   // directly or indirectly. If directly, the bool value in the map will be
63   // true, otherwise false.
64   //
65   // Note that all relationships contained in this map are also entries in the
66   // DependentMap, i.e. if B inherits from A, then there will be a mapping from
67   // A to B in both this map and the DependentMap.
68   using InheritanceMap = TypeRelationMap<bool>;
69 
70   struct SchemaDelta {
71     // Which schema types were present in the old schema, but were deleted from
72     // the new schema.
73     std::unordered_set<std::string> schema_types_deleted;
74 
75     // Which schema types had their SchemaTypeConfigProto changed in a way that
76     // could invalidate existing Documents of that schema type.
77     std::unordered_set<std::string> schema_types_incompatible;
78 
79     // Schema types that were added in the new schema. Represented by the
80     // `schema_type` field in the SchemaTypeConfigProto.
81     std::unordered_set<std::string> schema_types_new;
82 
83     // Schema types that were changed in a way that was backwards compatible and
84     // didn't invalidate the index. Represented by the `schema_type` field in
85     // the SchemaTypeConfigProto.
86     std::unordered_set<std::string> schema_types_changed_fully_compatible;
87 
88     // Schema types that were changed in a way that was backwards compatible,
89     // but invalidated the index. Represented by the `schema_type` field in the
90     // SchemaTypeConfigProto.
91     std::unordered_set<std::string> schema_types_index_incompatible;
92 
93     // Schema types that were changed in a way that was backwards compatible,
94     // but invalidated the joinable cache. Represented by the `schema_type`
95     // field in the SchemaTypeConfigProto.
96     std::unordered_set<std::string> schema_types_join_incompatible;
97 
98     bool operator==(const SchemaDelta& other) const {
99       return schema_types_deleted == other.schema_types_deleted &&
100              schema_types_incompatible == other.schema_types_incompatible &&
101              schema_types_new == other.schema_types_new &&
102              schema_types_changed_fully_compatible ==
103                  other.schema_types_changed_fully_compatible &&
104              schema_types_index_incompatible ==
105                  other.schema_types_index_incompatible &&
106              schema_types_join_incompatible ==
107                  other.schema_types_join_incompatible;
108     }
109   };
110 
111   struct ParsedPropertyConfigs {
112     // Mapping of property name to PropertyConfigProto
113     std::unordered_map<std::string_view, const PropertyConfigProto*>
114         property_config_map;
115 
116     // Properties that have an indexing config
117     std::unordered_set<std::string_view> indexed_properties;
118 
119     // Properties that were REQUIRED
120     std::unordered_set<std::string_view> required_properties;
121 
122     // Properties that have joinable config
123     std::unordered_set<std::string_view> joinable_properties;
124 
125     // Properties that have DataType::DOCUMENT
126     std::unordered_set<std::string_view> nested_document_properties;
127   };
128 
129   // This function validates:
130   //   1. SchemaTypeConfigProto.schema_type's must be unique
131   //   2. Properties within one SchemaTypeConfigProto must be unique
132   //   3. SchemaTypeConfigProtos.schema_type must be non-empty
133   //   4. PropertyConfigProtos.property_name must be non-empty
134   //   5. PropertyConfigProtos.property_name's must be unique within one
135   //      SchemaTypeConfigProto
136   //   6. PropertyConfigProtos.data_type cannot be UNKNOWN
137   //   7. PropertyConfigProtos.data_type of DOCUMENT must also have a
138   //      schema_type
139   //   8. PropertyConfigProtos.cardinality cannot be UNKNOWN
140   //   9. PropertyConfigProtos.schema_type's must correspond to a
141   //      SchemaTypeConfigProto.schema_type
142   //  10. Property names can only be alphanumeric.
143   //  11. Any STRING data types have a valid string_indexing_config
144   //  12. PropertyConfigProtos.joinable_config must be valid. See
145   //      ValidateJoinableConfig for more details.
146   //  13. Any PropertyConfigProtos with nested DOCUMENT data type must not have
147   //      REPEATED cardinality if they reference a schema type containing
148   //      joinable property.
149   //  14. The schema definition cannot have invalid cycles. A cycle is invalid
150   //      if:
151   //      a. SchemaTypeConfigProto.parent_type definitions form an inheritance
152   //         cycle.
153   //      b. The schema's property definitions have schema_types that form a
154   //         cycle, and all properties on the cycle declare
155   //         DocumentIndexingConfig.index_nested_properties=true.
156   //      c. The schema's property definitions have schema_types that form a
157   //         cycle, and the cycle leads to an invalid joinable property config.
158   //         This is the case if:
159   //           i. Any type node in the cycle itself has a joinable proprty
160   //              (property whose joinable config is not NONE), OR
161   //          ii. Any type node in the cycle has a nested-type (direct or
162   //              indirect) with a joinable property.
163   //  15. For DOCUMENT data types, if
164   //      DocumentIndexingConfig.indexable_nested_properties_list is non-empty,
165   //      DocumentIndexingConfig.index_nested_properties must be false.
166   //
167   // Returns:
168   //   On success, a dependent map from each types to their dependent types
169   //   that depend on it directly or indirectly.
170   //   ALREADY_EXISTS for case 1 and 2
171   //   INVALID_ARGUMENT for 3-15
172   static libtextclassifier3::StatusOr<DependentMap> Validate(
173       const SchemaProto& schema, bool allow_circular_schema_definitions);
174 
175   // Builds a transitive inheritance map.
176   //
177   // Ex. Suppose we have a schema with four types A, B, C and D, and we have the
178   // following direct inheritance relation.
179   //
180   // A -> B (A is the parent type of B)
181   // B -> C (B is the parent type of C)
182   // C -> D (C is the parent type of D)
183   //
184   // Then, the transitive inheritance map for this schema would be:
185   //
186   // A -> B, C, D
187   // B -> C, D
188   // C -> D
189   //
190   // RETURNS:
191   //   On success, a transitive inheritance map of all types in the schema.
192   //   INVALID_ARGUMENT if the inheritance graph contains a cycle.
193   static libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
194   BuildTransitiveInheritanceGraph(const SchemaProto& schema);
195 
196   // Creates a mapping of schema type -> schema type config proto. The
197   // type_config_map is cleared, and then each schema-type_config_proto pair is
198   // placed in the given type_config_map parameter.
199   static void BuildTypeConfigMap(const SchemaProto& schema,
200                                  TypeConfigMap* type_config_map);
201 
202   // Parses the given type_config and returns a struct of easily-parseable
203   // information about the properties.
204   static ParsedPropertyConfigs ParsePropertyConfigs(
205       const SchemaTypeConfigProto& type_config);
206 
207   // Computes the delta between the old and new schema. There are a few
208   // differences that'll be reported:
209   //   1. The derived index would be incompatible. This is held in
210   //      `SchemaDelta.index_incompatible`.
211   //   2. Some schema types existed in the old schema, but have been deleted
212   //      from the new schema. This is held in
213   //      `SchemaDelta.schema_types_deleted`
214   //   3. A schema type's new definition would mean any existing data of the old
215   //      definition is now incompatible.
216   //   4. The derived join index would be incompatible. This is held in
217   //      `SchemaDelta.join_incompatible`.
218   //
219   // For case 1, the two schemas would result in an incompatible index if:
220   //   1.1. The new SchemaProto has a different set of indexed properties than
221   //        the old SchemaProto.
222   //
223   // For case 3, the two schemas would result in incompatible data if:
224   //   3.1. A SchemaTypeConfig exists in the old SchemaProto, but is not in the
225   //        new SchemaProto
226   //   3.2. A property exists in the old SchemaTypeConfig, but is not in the new
227   //        SchemaTypeConfig
228   //   3.3. A property in the new SchemaTypeConfig and has a REQUIRED
229   //        PropertyConfigProto.cardinality, but is not in the old
230   //        SchemaTypeConfig
231   //   3.4. A property is in both the old and new SchemaTypeConfig, but its
232   //        PropertyConfigProto.data_type is different
233   //   3.5. A property is in both the old and new SchemaTypeConfig, but its
234   //        PropertyConfigProto.schema_type is different
235   //   3.6. A property is in both the old and new SchemaTypeConfig, but its new
236   //        PropertyConfigProto.cardinality is more restrictive. Restrictive
237   //        scale defined as:
238   //          LEAST <REPEATED - OPTIONAL - REQUIRED> MOST
239   //
240   // For case 4, the two schemas would result in an incompatible join if:
241   //   4.1. A SchematypeConfig exists in the new SchemaProto that has a
242   //        different set of joinable properties than it did in the old
243   //        SchemaProto.
244   //
245   // A property is defined by the combination of the
246   // SchemaTypeConfig.schema_type and the PropertyConfigProto.property_name.
247   //
248   // Returns a SchemaDelta that captures the aforementioned differences.
249   static const SchemaDelta ComputeCompatibilityDelta(
250       const SchemaProto& old_schema, const SchemaProto& new_schema,
251       const DependentMap& new_schema_dependent_map);
252 
253   // Validates the 'property_name' field.
254   //   1. Can't be an empty string
255   //   2. Can only contain alphanumeric characters
256   //
257   // NOTE: schema_type is only used for logging. It is not necessary to populate
258   // it.
259   //
260   // RETURNS:
261   //   - OK if property_name is valid
262   //   - INVALID_ARGUMENT if property name is empty or contains an
263   //     non-alphabetic character.
264   static libtextclassifier3::Status ValidatePropertyName(
265       std::string_view property_name, std::string_view schema_type = "");
266 
267   static bool IsIndexedProperty(const PropertyConfigProto& property_config);
268 
269  private:
270   // Validates the 'schema_type' field
271   //
272   // Returns:
273   //   INVALID_ARGUMENT if 'schema_type' is an empty string.
274   //   OK on success
275   static libtextclassifier3::Status ValidateSchemaType(
276       std::string_view schema_type);
277 
278   // Validates the 'data_type' field.
279   //
280   // Returns:
281   //   INVALID_ARGUMENT if it's UNKNOWN
282   //   OK on success
283   static libtextclassifier3::Status ValidateDataType(
284       PropertyConfigProto::DataType::Code data_type,
285       std::string_view schema_type, std::string_view property_name);
286 
287   // Validates the 'cardinality' field.
288   //
289   // Returns:
290   //   INVALID_ARGUMENT if it's UNKNOWN
291   //   OK on success
292   static libtextclassifier3::Status ValidateCardinality(
293       PropertyConfigProto::Cardinality::Code cardinality,
294       std::string_view schema_type, std::string_view property_name);
295 
296   // Checks that the 'string_indexing_config' satisfies the following rules:
297   //   1. Only STRING data types can be indexed
298   //   2. An indexed property must have a valid tokenizer type
299   //
300   // Returns:
301   //   INVALID_ARGUMENT if any of the rules are not followed
302   //   OK on success
303   static libtextclassifier3::Status ValidateStringIndexingConfig(
304       const StringIndexingConfig& config,
305       PropertyConfigProto::DataType::Code data_type,
306       std::string_view schema_type, std::string_view property_name);
307 
308   // Checks that the 'joinable_config' satisfies the following rules:
309   //   1. If the data type matches joinable value type
310   //      a. Only STRING data types can use QUALIFIED_ID joinable value type
311   //   2. Only QUALIFIED_ID joinable value type can have delete propagation
312   //      enabled
313   //   3. Any joinable property should have non-REPEATED cardinality
314   //
315   // Returns:
316   //   INVALID_ARGUMENT if any of the rules are not followed
317   //   OK on success
318   static libtextclassifier3::Status ValidateJoinableConfig(
319       const JoinableConfig& config,
320       PropertyConfigProto::DataType::Code data_type,
321       PropertyConfigProto::Cardinality::Code cardinality,
322       std::string_view schema_type, std::string_view property_name);
323 
324   // Checks that the 'document_indexing_config' satisfies the following rule:
325   //    1. If indexable_nested_properties is non-empty, index_nested_properties
326   //       must be set to false.
327   //
328   // Returns:
329   //   INVALID_ARGUMENT if any of the rules are not followed
330   //   OK on success
331   static libtextclassifier3::Status ValidateDocumentIndexingConfig(
332       const DocumentIndexingConfig& config, std::string_view schema_type,
333       std::string_view property_name);
334 
335   // Returns if 'parent_type' is a direct or indirect parent of 'child_type'.
336   static bool IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
337                        std::string_view parent_type,
338                        std::string_view child_type);
339 
340   // Returns if 'child_property_config' in a child type can override
341   // 'parent_property_config' in the parent type.
342   //
343   // Let's assign 'child_property_config' a type T1 and 'parent_property_config'
344   // a type T2 that captures information for their data_type, schema_type and
345   // cardinalities, so that 'child_property_config' can override
346   // 'parent_property_config' if and only if T1 <: T2, i.e. T1 is a subtype of
347   // T2.
348   //
349   // Below are the rules for inferring subtype relations.
350   // - T <: T for every type T.
351   // - If U extends T, then U <: T.
352   // - For every type T1, T2 and T3, if T1 <: T2 and T2 <: T3, then T1 <: T3.
353   // - Optional<T> <: Repeated<T> for every type T.
354   // - Required<T> <: Optional<T> for every type T.
355   // - If T1 <: T2, then
356   //   - Required<T1> <: Required<T2>
357   //   - Optional<T1> <: Optional<T2>
358   //   - Repeated<T1> <: Repeated<T2>
359   //
360   // We assume the Closed World Assumption (CWA), i.e. if T1 <: T2 cannot be
361   // deduced from the above rules, then T1 is not a subtype of T2.
362   static bool IsInheritedPropertyCompatible(
363       const SchemaUtil::InheritanceMap& inheritance_map,
364       const PropertyConfigProto& child_property_config,
365       const PropertyConfigProto& parent_property_config);
366 
367   // Verifies that every child type's property set has included all compatible
368   // properties from parent types, based on the following rule:
369   //
370   // - If a property "prop" of type T is in the parent, then the child type must
371   //   also have "prop" that is of type U, such that U <: T, i.e. U is a subtype
372   //   of T.
373   //
374   // RETURNS:
375   //   Ok on validation success
376   //   INVALID_ARGUMENT if an exception that violates the above validation rule
377   //     is found.
378   static libtextclassifier3::Status ValidateInheritedProperties(
379       const SchemaProto& schema);
380 };
381 
382 }  // namespace lib
383 }  // namespace icing
384 
385 #endif  // ICING_SCHEMA_SCHEMA_UTIL_H_
386