1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_SCHEMA_SCHEMA_UTIL_H_ 16 #define ICING_SCHEMA_SCHEMA_UTIL_H_ 17 18 #include <cstdint> 19 #include <string> 20 #include <string_view> 21 #include <unordered_map> 22 #include <unordered_set> 23 24 #include "icing/text_classifier/lib3/utils/base/status.h" 25 #include "icing/text_classifier/lib3/utils/base/statusor.h" 26 #include "icing/proto/schema.pb.h" 27 28 namespace icing { 29 namespace lib { 30 31 class SchemaUtil { 32 public: 33 using TypeConfigMap = 34 std::unordered_map<std::string, const SchemaTypeConfigProto>; 35 36 // Maps from a child type to the parent types that depend on it. 37 // Ex. type A has a single property of type B 38 // The dependency map will be { { "B", { "A" } } } 39 using DependencyMap = 40 std::unordered_map<std::string_view, 41 std::unordered_set<std::string_view>>; 42 43 struct SchemaDelta { 44 // Which schema types were present in the old schema, but were deleted from 45 // the new schema. 46 std::unordered_set<std::string> schema_types_deleted; 47 48 // Which schema types had their SchemaTypeConfigProto changed in a way that 49 // could invalidate existing Documents of that schema type. 50 std::unordered_set<std::string> schema_types_incompatible; 51 52 // Schema types that were added in the new schema. Represented by the 53 // `schema_type` field in the SchemaTypeConfigProto. 54 std::unordered_set<std::string> schema_types_new; 55 56 // Schema types that were changed in a way that was backwards compatible and 57 // didn't invalidate the index. Represented by the `schema_type` field in 58 // the SchemaTypeConfigProto. 59 std::unordered_set<std::string> schema_types_changed_fully_compatible; 60 61 // Schema types that were changed in a way that was backwards compatible, 62 // but invalidated the index. Represented by the `schema_type` field in the 63 // SchemaTypeConfigProto. 64 std::unordered_set<std::string> schema_types_index_incompatible; 65 66 bool operator==(const SchemaDelta& other) const { 67 return schema_types_deleted == other.schema_types_deleted && 68 schema_types_incompatible == other.schema_types_incompatible && 69 schema_types_new == other.schema_types_new && 70 schema_types_changed_fully_compatible == 71 other.schema_types_changed_fully_compatible && 72 schema_types_index_incompatible == 73 other.schema_types_index_incompatible; 74 } 75 }; 76 77 struct ParsedPropertyConfigs { 78 // Mapping of property name to PropertyConfigProto 79 std::unordered_map<std::string_view, const PropertyConfigProto*> 80 property_config_map; 81 82 // Total number of properties that have an indexing config 83 int32_t num_indexed_properties = 0; 84 85 // Total number of properties that were REQUIRED 86 int32_t num_required_properties = 0; 87 }; 88 89 // This function validates: 90 // 1. SchemaTypeConfigProto.schema_type's must be unique 91 // 2. Properties within one SchemaTypeConfigProto must be unique 92 // 3. SchemaTypeConfigProtos.schema_type must be non-empty 93 // 4. PropertyConfigProtos.property_name must be non-empty 94 // 5. PropertyConfigProtos.property_name's must be unique within one 95 // SchemaTypeConfigProto 96 // 6. PropertyConfigProtos.data_type cannot be UNKNOWN 97 // 7. PropertyConfigProtos.data_type of DOCUMENT must also have a 98 // schema_type 99 // 8. PropertyConfigProtos.cardinality cannot be UNKNOWN 100 // 9. PropertyConfigProtos.schema_type's must correspond to a 101 // SchemaTypeConfigProto.schema_type 102 // 10. Property names can only be alphanumeric. 103 // 11. Any STRING data types have a valid string_indexing_config 104 // 12. A SchemaTypeConfigProto cannot have a property whose schema_type is 105 // itself, thus creating an infinite loop. 106 // 13. Two SchemaTypeConfigProtos cannot have properties that reference each 107 // other's schema_type, thus creating an infinite loop. 108 // 109 // TODO(b/171996137): Clarify 12 and 13 are only for indexed properties, once 110 // document properties can be opted out of indexing. 111 // 112 // Returns: 113 // On success, a dependency map from each child types to all parent types 114 // that depend on it directly or indirectly. 115 // ALREADY_EXISTS for case 1 and 2 116 // INVALID_ARGUMENT for 3-13 117 static libtextclassifier3::StatusOr<DependencyMap> Validate( 118 const SchemaProto& schema); 119 120 // Creates a mapping of schema type -> schema type config proto. The 121 // type_config_map is cleared, and then each schema-type_config_proto pair is 122 // placed in the given type_config_map parameter. 123 static void BuildTypeConfigMap(const SchemaProto& schema, 124 TypeConfigMap* type_config_map); 125 126 // Parses the given type_config and returns a struct of easily-parseable 127 // information about the properties. 128 static ParsedPropertyConfigs ParsePropertyConfigs( 129 const SchemaTypeConfigProto& type_config); 130 131 // Computes the delta between the old and new schema. There are a few 132 // differences that'll be reported: 133 // 1. The derived index would be incompatible. This is held in 134 // `SchemaDelta.index_incompatible`. 135 // 2. Some schema types existed in the old schema, but have been deleted 136 // from the new schema. This is held in 137 // `SchemaDelta.schema_types_deleted` 138 // 3. A schema type's new definition would mean any existing data of the old 139 // definition is now incompatible. 140 // 141 // For case 1, the two schemas would result in an incompatible index if: 142 // 1.1. The new SchemaProto has a different set of indexed properties than 143 // the old SchemaProto. 144 // 145 // For case 3, the two schemas would result in incompatible data if: 146 // 3.1. A SchemaTypeConfig exists in the old SchemaProto, but is not in the 147 // new SchemaProto 148 // 3.2. A property exists in the old SchemaTypeConfig, but is not in the new 149 // SchemaTypeConfig 150 // 3.3. A property in the new SchemaTypeConfig and has a REQUIRED 151 // PropertyConfigProto.cardinality, but is not in the old 152 // SchemaTypeConfig 153 // 3.4. A property is in both the old and new SchemaTypeConfig, but its 154 // PropertyConfigProto.data_type is different 155 // 3.5. A property is in both the old and new SchemaTypeConfig, but its 156 // PropertyConfigProto.schema_type is different 157 // 3.6. A property is in both the old and new SchemaTypeConfig, but its new 158 // PropertyConfigProto.cardinality is more restrictive. Restrictive 159 // scale defined as: 160 // LEAST <REPEATED - OPTIONAL - REQUIRED> MOST 161 // 162 // A property is defined by the combination of the 163 // SchemaTypeConfig.schema_type and the PropertyConfigProto.property_name. 164 // 165 // Returns a SchemaDelta that captures the aforementioned differences. 166 static const SchemaDelta ComputeCompatibilityDelta( 167 const SchemaProto& old_schema, const SchemaProto& new_schema, 168 const DependencyMap& new_schema_dependency_map); 169 170 // Validates the 'property_name' field. 171 // 1. Can't be an empty string 172 // 2. Can only contain alphanumeric characters 173 // 174 // NOTE: schema_type is only used for logging. It is not necessary to populate 175 // it. 176 // 177 // RETURNS: 178 // - OK if property_name is valid 179 // - INVALID_ARGUMENT if property name is empty or contains an 180 // non-alphabetic character. 181 static libtextclassifier3::Status ValidatePropertyName( 182 std::string_view property_name, std::string_view schema_type = ""); 183 184 private: 185 // Validates the 'schema_type' field 186 // 187 // Returns: 188 // INVALID_ARGUMENT if 'schema_type' is an empty string. 189 // OK on success 190 static libtextclassifier3::Status ValidateSchemaType( 191 std::string_view schema_type); 192 193 // Validates the 'data_type' field. 194 // 195 // Returns: 196 // INVALID_ARGUMENT if it's UNKNOWN 197 // OK on success 198 static libtextclassifier3::Status ValidateDataType( 199 PropertyConfigProto::DataType::Code data_type, 200 std::string_view schema_type, std::string_view property_name); 201 202 // Validates the 'cardinality' field. 203 // 204 // Returns: 205 // INVALID_ARGUMENT if it's UNKNOWN 206 // OK on success 207 static libtextclassifier3::Status ValidateCardinality( 208 PropertyConfigProto::Cardinality::Code cardinality, 209 std::string_view schema_type, std::string_view property_name); 210 211 // Checks that the 'string_indexing_config' satisfies the following rules: 212 // 1. Only STRING data types can be indexed 213 // 2. An indexed property must have a valid tokenizer type 214 // 215 // Returns: 216 // INVALID_ARGUMENT if any of the rules are not followed 217 // OK on success 218 static libtextclassifier3::Status ValidateStringIndexingConfig( 219 const StringIndexingConfig& config, 220 PropertyConfigProto::DataType::Code data_type, 221 std::string_view schema_type, std::string_view property_name); 222 }; 223 224 } // namespace lib 225 } // namespace icing 226 227 #endif // ICING_SCHEMA_SCHEMA_UTIL_H_ 228