• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
16 #define ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
17 
18 #include <cstdint>
19 #include <random>
20 #include <string>
21 #include <string_view>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include "icing/monkey_test/monkey-test-common-words.h"
27 #include "icing/monkey_test/monkey-test-util.h"
28 #include "icing/monkey_test/monkey-tokenized-document.h"
29 #include "icing/proto/schema.pb.h"
30 #include "icing/util/clock.h"
31 
32 namespace icing {
33 namespace lib {
34 
35 // A random schema generator used for monkey testing.
36 class MonkeySchemaGenerator {
37  public:
38   struct UpdateSchemaResult {
39     SchemaProto schema;
40     bool is_invalid_schema;
41     std::unordered_set<std::string> schema_types_deleted;
42     std::unordered_set<std::string> schema_types_incompatible;
43     std::unordered_set<std::string> schema_types_index_incompatible;
44   };
45 
MonkeySchemaGenerator(MonkeyTestRandomEngine * random,const IcingMonkeyTestRunnerConfiguration * config)46   explicit MonkeySchemaGenerator(
47       MonkeyTestRandomEngine* random,
48       const IcingMonkeyTestRunnerConfiguration* config)
49       : random_(random), config_(config) {}
50 
51   SchemaProto GenerateSchema();
52 
53   UpdateSchemaResult UpdateSchema(const SchemaProto& schema);
54 
55  private:
56   PropertyConfigProto GenerateProperty(
57       const SchemaTypeConfigProto& type_config,
58       PropertyConfigProto::Cardinality::Code cardinality, bool indexable);
59 
60   void UpdateProperty(const SchemaTypeConfigProto& type_config,
61                       PropertyConfigProto& property,
62                       UpdateSchemaResult& result);
63 
64   SchemaTypeConfigProto GenerateType();
65 
66   void UpdateType(SchemaTypeConfigProto& type_config,
67                   UpdateSchemaResult& result);
68 
69   int num_types_generated_ = 0;
70   // A map from type name to the number of properties generated in the
71   // corresponding types.
72   std::unordered_map<std::string, int> num_properties_generated_;
73 
74   MonkeyTestRandomEngine* random_;                    // Does not own.
75   const IcingMonkeyTestRunnerConfiguration* config_;  // Does not own.
76 };
77 
78 // A random document generator used for monkey testing.
79 // When num_uris is 0, all documents generated get different URIs. Otherwise,
80 // URIs will be randomly picked from a set with num_uris elements.
81 // Same for num_namespaces.
82 class MonkeyDocumentGenerator {
83  public:
MonkeyDocumentGenerator(MonkeyTestRandomEngine * random,const SchemaProto * schema,const IcingMonkeyTestRunnerConfiguration * config)84   explicit MonkeyDocumentGenerator(
85       MonkeyTestRandomEngine* random, const SchemaProto* schema,
86       const IcingMonkeyTestRunnerConfiguration* config)
87       : random_(random), schema_(schema), config_(config) {}
88 
GetType()89   const SchemaTypeConfigProto& GetType() const {
90     std::uniform_int_distribution<> dist(0, schema_->types_size() - 1);
91     return schema_->types(dist(*random_));
92   }
93 
GetToken()94   std::string_view GetToken() const {
95     // TODO: Instead of randomly picking tokens from the language set
96     // kCommonWords, we can make some words more common than others to simulate
97     // term frequencies in the real world. This can help us get extremely large
98     // posting lists.
99     std::uniform_int_distribution<> dist(0, kCommonWords.size() - 1);
100     return kCommonWords[dist(*random_)];
101   }
102 
103   PropertyProto::VectorProto GetRandomVector() const;
104 
105   std::string GetNamespace() const;
106 
107   std::string GetUri() const;
108 
109   int GetNumTokens() const;
110 
111   int GetNumVectors(PropertyConfigProto::Cardinality::Code cardinality) const;
112 
113   std::vector<std::string> GetStringPropertyContent() const;
114 
115   std::vector<PropertyProto::VectorProto> GetVectorPropertyContent(
116       PropertyConfigProto::Cardinality::Code cardinality) const;
117 
118   MonkeyTokenizedDocument GenerateDocument();
119 
120  private:
121   MonkeyTestRandomEngine* random_;                    // Does not own.
122   const SchemaProto* schema_;                         // Does not own.
123   const IcingMonkeyTestRunnerConfiguration* config_;  // Does not own.
124 
125   uint32_t num_docs_generated_ = 0;
126   Clock clock_;
127 };
128 
129 }  // namespace lib
130 }  // namespace icing
131 
132 #endif  // ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
133