1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_ 16 #define ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_ 17 18 #include <cstdint> 19 #include <random> 20 #include <string> 21 #include <string_view> 22 #include <unordered_map> 23 #include <unordered_set> 24 #include <vector> 25 26 #include "icing/monkey_test/monkey-test-common-words.h" 27 #include "icing/monkey_test/monkey-test-util.h" 28 #include "icing/monkey_test/monkey-tokenized-document.h" 29 #include "icing/proto/schema.pb.h" 30 #include "icing/proto/term.pb.h" 31 #include "icing/util/clock.h" 32 33 namespace icing { 34 namespace lib { 35 36 // A random schema generator used for monkey testing. 37 class MonkeySchemaGenerator { 38 public: 39 struct UpdateSchemaResult { 40 SchemaProto schema; 41 bool is_invalid_schema; 42 std::unordered_set<std::string> schema_types_deleted; 43 std::unordered_set<std::string> schema_types_incompatible; 44 std::unordered_set<std::string> schema_types_index_incompatible; 45 }; 46 MonkeySchemaGenerator(MonkeyTestRandomEngine * random,const IcingMonkeyTestRunnerConfiguration * config)47 explicit MonkeySchemaGenerator( 48 MonkeyTestRandomEngine* random, 49 const IcingMonkeyTestRunnerConfiguration* config) 50 : random_(random), config_(config) {} 51 52 SchemaProto GenerateSchema(); 53 54 UpdateSchemaResult UpdateSchema(const SchemaProto& schema); 55 56 private: 57 PropertyConfigProto GenerateProperty( 58 const SchemaTypeConfigProto& type_config, 59 PropertyConfigProto::Cardinality::Code cardinality, 60 TermMatchType::Code term_match_type); 61 62 void UpdateProperty(const SchemaTypeConfigProto& type_config, 63 PropertyConfigProto& property, 64 UpdateSchemaResult& result); 65 66 SchemaTypeConfigProto GenerateType(); 67 68 void UpdateType(SchemaTypeConfigProto& type_config, 69 UpdateSchemaResult& result); 70 71 int num_types_generated_ = 0; 72 // A map from type name to the number of properties generated in the 73 // corresponding types. 74 std::unordered_map<std::string, int> num_properties_generated_; 75 76 MonkeyTestRandomEngine* random_; // Does not own. 77 const IcingMonkeyTestRunnerConfiguration* config_; // Does not own. 78 }; 79 80 // A random document generator used for monkey testing. 81 // When num_uris is 0, all documents generated get different URIs. Otherwise, 82 // URIs will be randomly picked from a set with num_uris elements. 83 // Same for num_namespaces. 84 class MonkeyDocumentGenerator { 85 public: MonkeyDocumentGenerator(MonkeyTestRandomEngine * random,const SchemaProto * schema,const IcingMonkeyTestRunnerConfiguration * config)86 explicit MonkeyDocumentGenerator( 87 MonkeyTestRandomEngine* random, const SchemaProto* schema, 88 const IcingMonkeyTestRunnerConfiguration* config) 89 : random_(random), schema_(schema), config_(config) {} 90 GetType()91 const SchemaTypeConfigProto& GetType() const { 92 std::uniform_int_distribution<> dist(0, schema_->types_size() - 1); 93 return schema_->types(dist(*random_)); 94 } 95 GetToken()96 std::string_view GetToken() const { 97 // TODO: Instead of randomly picking tokens from the language set 98 // kCommonWords, we can make some words more common than others to simulate 99 // term frequencies in the real world. This can help us get extremely large 100 // posting lists. 101 std::uniform_int_distribution<> dist(0, kCommonWords.size() - 1); 102 return kCommonWords[dist(*random_)]; 103 } 104 105 std::string GetNamespace() const; 106 107 std::string GetUri() const; 108 109 int GetNumTokens() const; 110 111 std::vector<std::string> GetPropertyContent() const; 112 113 MonkeyTokenizedDocument GenerateDocument(); 114 115 private: 116 MonkeyTestRandomEngine* random_; // Does not own. 117 const SchemaProto* schema_; // Does not own. 118 const IcingMonkeyTestRunnerConfiguration* config_; // Does not own. 119 120 uint32_t num_docs_generated_ = 0; 121 Clock clock_; 122 }; 123 124 } // namespace lib 125 } // namespace icing 126 127 #endif // ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_ 128