• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_SCHEMA_UTIL_H_
16 #define ICING_SCHEMA_SCHEMA_UTIL_H_
17 
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 #include <unordered_map>
22 #include <unordered_set>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/proto/schema.pb.h"
27 
28 namespace icing {
29 namespace lib {
30 
31 class SchemaUtil {
32  public:
33   using TypeConfigMap =
34       std::unordered_map<std::string, const SchemaTypeConfigProto>;
35 
36   // Maps from a child type to the parent types that depend on it.
37   // Ex. type A has a single property of type B
38   // The dependency map will be { { "B", { "A" } } }
39   using DependencyMap =
40       std::unordered_map<std::string_view,
41                          std::unordered_set<std::string_view>>;
42 
43   struct SchemaDelta {
44     // Which schema types were present in the old schema, but were deleted from
45     // the new schema.
46     std::unordered_set<std::string> schema_types_deleted;
47 
48     // Which schema types had their SchemaTypeConfigProto changed in a way that
49     // could invalidate existing Documents of that schema type.
50     std::unordered_set<std::string> schema_types_incompatible;
51 
52     // Schema types that were added in the new schema. Represented by the
53     // `schema_type` field in the SchemaTypeConfigProto.
54     std::unordered_set<std::string> schema_types_new;
55 
56     // Schema types that were changed in a way that was backwards compatible and
57     // didn't invalidate the index. Represented by the `schema_type` field in
58     // the SchemaTypeConfigProto.
59     std::unordered_set<std::string> schema_types_changed_fully_compatible;
60 
61     // Schema types that were changed in a way that was backwards compatible,
62     // but invalidated the index. Represented by the `schema_type` field in the
63     // SchemaTypeConfigProto.
64     std::unordered_set<std::string> schema_types_index_incompatible;
65 
66     bool operator==(const SchemaDelta& other) const {
67       return schema_types_deleted == other.schema_types_deleted &&
68              schema_types_incompatible == other.schema_types_incompatible &&
69              schema_types_new == other.schema_types_new &&
70              schema_types_changed_fully_compatible ==
71                  other.schema_types_changed_fully_compatible &&
72              schema_types_index_incompatible ==
73                  other.schema_types_index_incompatible;
74     }
75   };
76 
77   struct ParsedPropertyConfigs {
78     // Mapping of property name to PropertyConfigProto
79     std::unordered_map<std::string_view, const PropertyConfigProto*>
80         property_config_map;
81 
82     // Total number of properties that have an indexing config
83     int32_t num_indexed_properties = 0;
84 
85     // Total number of properties that were REQUIRED
86     int32_t num_required_properties = 0;
87   };
88 
89   // This function validates:
90   //   1. SchemaTypeConfigProto.schema_type's must be unique
91   //   2. Properties within one SchemaTypeConfigProto must be unique
92   //   3. SchemaTypeConfigProtos.schema_type must be non-empty
93   //   4. PropertyConfigProtos.property_name must be non-empty
94   //   5. PropertyConfigProtos.property_name's must be unique within one
95   //      SchemaTypeConfigProto
96   //   6. PropertyConfigProtos.data_type cannot be UNKNOWN
97   //   7. PropertyConfigProtos.data_type of DOCUMENT must also have a
98   //      schema_type
99   //   8. PropertyConfigProtos.cardinality cannot be UNKNOWN
100   //   9. PropertyConfigProtos.schema_type's must correspond to a
101   //      SchemaTypeConfigProto.schema_type
102   //  10. Property names can only be alphanumeric.
103   //  11. Any STRING data types have a valid string_indexing_config
104   //  12. A SchemaTypeConfigProto cannot have a property whose schema_type is
105   //      itself, thus creating an infinite loop.
106   //  13. Two SchemaTypeConfigProtos cannot have properties that reference each
107   //      other's schema_type, thus creating an infinite loop.
108   //
109   //  TODO(b/171996137): Clarify 12 and 13 are only for indexed properties, once
110   //  document properties can be opted out of indexing.
111   //
112   // Returns:
113   //   On success, a dependency map from each child types to all parent types
114   //   that depend on it directly or indirectly.
115   //   ALREADY_EXISTS for case 1 and 2
116   //   INVALID_ARGUMENT for 3-13
117   static libtextclassifier3::StatusOr<DependencyMap> Validate(
118       const SchemaProto& schema);
119 
120   // Creates a mapping of schema type -> schema type config proto. The
121   // type_config_map is cleared, and then each schema-type_config_proto pair is
122   // placed in the given type_config_map parameter.
123   static void BuildTypeConfigMap(const SchemaProto& schema,
124                                  TypeConfigMap* type_config_map);
125 
126   // Parses the given type_config and returns a struct of easily-parseable
127   // information about the properties.
128   static ParsedPropertyConfigs ParsePropertyConfigs(
129       const SchemaTypeConfigProto& type_config);
130 
131   // Computes the delta between the old and new schema. There are a few
132   // differences that'll be reported:
133   //   1. The derived index would be incompatible. This is held in
134   //      `SchemaDelta.index_incompatible`.
135   //   2. Some schema types existed in the old schema, but have been deleted
136   //      from the new schema. This is held in
137   //      `SchemaDelta.schema_types_deleted`
138   //   3. A schema type's new definition would mean any existing data of the old
139   //      definition is now incompatible.
140   //
141   // For case 1, the two schemas would result in an incompatible index if:
142   //   1.1. The new SchemaProto has a different set of indexed properties than
143   //        the old SchemaProto.
144   //
145   // For case 3, the two schemas would result in incompatible data if:
146   //   3.1. A SchemaTypeConfig exists in the old SchemaProto, but is not in the
147   //        new SchemaProto
148   //   3.2. A property exists in the old SchemaTypeConfig, but is not in the new
149   //        SchemaTypeConfig
150   //   3.3. A property in the new SchemaTypeConfig and has a REQUIRED
151   //        PropertyConfigProto.cardinality, but is not in the old
152   //        SchemaTypeConfig
153   //   3.4. A property is in both the old and new SchemaTypeConfig, but its
154   //        PropertyConfigProto.data_type is different
155   //   3.5. A property is in both the old and new SchemaTypeConfig, but its
156   //        PropertyConfigProto.schema_type is different
157   //   3.6. A property is in both the old and new SchemaTypeConfig, but its new
158   //        PropertyConfigProto.cardinality is more restrictive. Restrictive
159   //        scale defined as:
160   //          LEAST <REPEATED - OPTIONAL - REQUIRED> MOST
161   //
162   // A property is defined by the combination of the
163   // SchemaTypeConfig.schema_type and the PropertyConfigProto.property_name.
164   //
165   // Returns a SchemaDelta that captures the aforementioned differences.
166   static const SchemaDelta ComputeCompatibilityDelta(
167       const SchemaProto& old_schema, const SchemaProto& new_schema,
168       const DependencyMap& new_schema_dependency_map);
169 
170   // Validates the 'property_name' field.
171   //   1. Can't be an empty string
172   //   2. Can only contain alphanumeric characters
173   //
174   // NOTE: schema_type is only used for logging. It is not necessary to populate
175   // it.
176   //
177   // RETURNS:
178   //   - OK if property_name is valid
179   //   - INVALID_ARGUMENT if property name is empty or contains an
180   //     non-alphabetic character.
181   static libtextclassifier3::Status ValidatePropertyName(
182       std::string_view property_name, std::string_view schema_type = "");
183 
184  private:
185   // Validates the 'schema_type' field
186   //
187   // Returns:
188   //   INVALID_ARGUMENT if 'schema_type' is an empty string.
189   //   OK on success
190   static libtextclassifier3::Status ValidateSchemaType(
191       std::string_view schema_type);
192 
193   // Validates the 'data_type' field.
194   //
195   // Returns:
196   //   INVALID_ARGUMENT if it's UNKNOWN
197   //   OK on success
198   static libtextclassifier3::Status ValidateDataType(
199       PropertyConfigProto::DataType::Code data_type,
200       std::string_view schema_type, std::string_view property_name);
201 
202   // Validates the 'cardinality' field.
203   //
204   // Returns:
205   //   INVALID_ARGUMENT if it's UNKNOWN
206   //   OK on success
207   static libtextclassifier3::Status ValidateCardinality(
208       PropertyConfigProto::Cardinality::Code cardinality,
209       std::string_view schema_type, std::string_view property_name);
210 
211   // Checks that the 'string_indexing_config' satisfies the following rules:
212   //   1. Only STRING data types can be indexed
213   //   2. An indexed property must have a valid tokenizer type
214   //
215   // Returns:
216   //   INVALID_ARGUMENT if any of the rules are not followed
217   //   OK on success
218   static libtextclassifier3::Status ValidateStringIndexingConfig(
219       const StringIndexingConfig& config,
220       PropertyConfigProto::DataType::Code data_type,
221       std::string_view schema_type, std::string_view property_name);
222 };
223 
224 }  // namespace lib
225 }  // namespace icing
226 
227 #endif  // ICING_SCHEMA_SCHEMA_UTIL_H_
228