1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_ 16 #define ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_ 17 18 #include <cstdint> 19 #include <random> 20 #include <string> 21 #include <string_view> 22 #include <unordered_map> 23 #include <unordered_set> 24 #include <vector> 25 26 #include "icing/monkey_test/monkey-test-common-words.h" 27 #include "icing/monkey_test/monkey-test-util.h" 28 #include "icing/monkey_test/monkey-tokenized-document.h" 29 #include "icing/proto/schema.pb.h" 30 #include "icing/util/clock.h" 31 32 namespace icing { 33 namespace lib { 34 35 // A random schema generator used for monkey testing. 36 class MonkeySchemaGenerator { 37 public: 38 struct UpdateSchemaResult { 39 SchemaProto schema; 40 bool is_invalid_schema; 41 std::unordered_set<std::string> schema_types_deleted; 42 std::unordered_set<std::string> schema_types_incompatible; 43 std::unordered_set<std::string> schema_types_index_incompatible; 44 }; 45 MonkeySchemaGenerator(MonkeyTestRandomEngine * random,const IcingMonkeyTestRunnerConfiguration * config)46 explicit MonkeySchemaGenerator( 47 MonkeyTestRandomEngine* random, 48 const IcingMonkeyTestRunnerConfiguration* config) 49 : random_(random), config_(config) {} 50 51 SchemaProto GenerateSchema(); 52 53 UpdateSchemaResult UpdateSchema(const SchemaProto& schema); 54 55 private: 56 PropertyConfigProto GenerateProperty( 57 const SchemaTypeConfigProto& type_config, 58 PropertyConfigProto::Cardinality::Code cardinality, bool indexable); 59 60 void UpdateProperty(const SchemaTypeConfigProto& type_config, 61 PropertyConfigProto& property, 62 UpdateSchemaResult& result); 63 64 SchemaTypeConfigProto GenerateType(); 65 66 void UpdateType(SchemaTypeConfigProto& type_config, 67 UpdateSchemaResult& result); 68 69 int num_types_generated_ = 0; 70 // A map from type name to the number of properties generated in the 71 // corresponding types. 72 std::unordered_map<std::string, int> num_properties_generated_; 73 74 MonkeyTestRandomEngine* random_; // Does not own. 75 const IcingMonkeyTestRunnerConfiguration* config_; // Does not own. 76 }; 77 78 // A random document generator used for monkey testing. 79 // When num_uris is 0, all documents generated get different URIs. Otherwise, 80 // URIs will be randomly picked from a set with num_uris elements. 81 // Same for num_namespaces. 82 class MonkeyDocumentGenerator { 83 public: MonkeyDocumentGenerator(MonkeyTestRandomEngine * random,const SchemaProto * schema,const IcingMonkeyTestRunnerConfiguration * config)84 explicit MonkeyDocumentGenerator( 85 MonkeyTestRandomEngine* random, const SchemaProto* schema, 86 const IcingMonkeyTestRunnerConfiguration* config) 87 : random_(random), schema_(schema), config_(config) {} 88 GetType()89 const SchemaTypeConfigProto& GetType() const { 90 std::uniform_int_distribution<> dist(0, schema_->types_size() - 1); 91 return schema_->types(dist(*random_)); 92 } 93 GetToken()94 std::string_view GetToken() const { 95 // TODO: Instead of randomly picking tokens from the language set 96 // kCommonWords, we can make some words more common than others to simulate 97 // term frequencies in the real world. This can help us get extremely large 98 // posting lists. 99 std::uniform_int_distribution<> dist(0, kCommonWords.size() - 1); 100 return kCommonWords[dist(*random_)]; 101 } 102 103 PropertyProto::VectorProto GetRandomVector() const; 104 105 std::string GetNamespace() const; 106 107 std::string GetUri() const; 108 109 int GetNumTokens() const; 110 111 int GetNumVectors(PropertyConfigProto::Cardinality::Code cardinality) const; 112 113 std::vector<std::string> GetStringPropertyContent() const; 114 115 std::vector<PropertyProto::VectorProto> GetVectorPropertyContent( 116 PropertyConfigProto::Cardinality::Code cardinality) const; 117 118 MonkeyTokenizedDocument GenerateDocument(); 119 120 private: 121 MonkeyTestRandomEngine* random_; // Does not own. 122 const SchemaProto* schema_; // Does not own. 123 const IcingMonkeyTestRunnerConfiguration* config_; // Does not own. 124 125 uint32_t num_docs_generated_ = 0; 126 Clock clock_; 127 }; 128 129 } // namespace lib 130 } // namespace icing 131 132 #endif // ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_ 133