• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
16 #define ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
17 
18 #include <cstdint>
19 #include <random>
20 #include <string>
21 #include <string_view>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include "icing/monkey_test/monkey-test-common-words.h"
27 #include "icing/monkey_test/monkey-test-util.h"
28 #include "icing/monkey_test/monkey-tokenized-document.h"
29 #include "icing/proto/schema.pb.h"
30 #include "icing/proto/term.pb.h"
31 #include "icing/util/clock.h"
32 
33 namespace icing {
34 namespace lib {
35 
36 // A random schema generator used for monkey testing.
37 class MonkeySchemaGenerator {
38  public:
39   struct UpdateSchemaResult {
40     SchemaProto schema;
41     bool is_invalid_schema;
42     std::unordered_set<std::string> schema_types_deleted;
43     std::unordered_set<std::string> schema_types_incompatible;
44     std::unordered_set<std::string> schema_types_index_incompatible;
45   };
46 
MonkeySchemaGenerator(MonkeyTestRandomEngine * random,const IcingMonkeyTestRunnerConfiguration * config)47   explicit MonkeySchemaGenerator(
48       MonkeyTestRandomEngine* random,
49       const IcingMonkeyTestRunnerConfiguration* config)
50       : random_(random), config_(config) {}
51 
52   SchemaProto GenerateSchema();
53 
54   UpdateSchemaResult UpdateSchema(const SchemaProto& schema);
55 
56  private:
57   PropertyConfigProto GenerateProperty(
58       const SchemaTypeConfigProto& type_config,
59       PropertyConfigProto::Cardinality::Code cardinality,
60       TermMatchType::Code term_match_type);
61 
62   void UpdateProperty(const SchemaTypeConfigProto& type_config,
63                       PropertyConfigProto& property,
64                       UpdateSchemaResult& result);
65 
66   SchemaTypeConfigProto GenerateType();
67 
68   void UpdateType(SchemaTypeConfigProto& type_config,
69                   UpdateSchemaResult& result);
70 
71   int num_types_generated_ = 0;
72   // A map from type name to the number of properties generated in the
73   // corresponding types.
74   std::unordered_map<std::string, int> num_properties_generated_;
75 
76   MonkeyTestRandomEngine* random_;                    // Does not own.
77   const IcingMonkeyTestRunnerConfiguration* config_;  // Does not own.
78 };
79 
80 // A random document generator used for monkey testing.
81 // When num_uris is 0, all documents generated get different URIs. Otherwise,
82 // URIs will be randomly picked from a set with num_uris elements.
83 // Same for num_namespaces.
84 class MonkeyDocumentGenerator {
85  public:
MonkeyDocumentGenerator(MonkeyTestRandomEngine * random,const SchemaProto * schema,const IcingMonkeyTestRunnerConfiguration * config)86   explicit MonkeyDocumentGenerator(
87       MonkeyTestRandomEngine* random, const SchemaProto* schema,
88       const IcingMonkeyTestRunnerConfiguration* config)
89       : random_(random), schema_(schema), config_(config) {}
90 
GetType()91   const SchemaTypeConfigProto& GetType() const {
92     std::uniform_int_distribution<> dist(0, schema_->types_size() - 1);
93     return schema_->types(dist(*random_));
94   }
95 
GetToken()96   std::string_view GetToken() const {
97     // TODO: Instead of randomly picking tokens from the language set
98     // kCommonWords, we can make some words more common than others to simulate
99     // term frequencies in the real world. This can help us get extremely large
100     // posting lists.
101     std::uniform_int_distribution<> dist(0, kCommonWords.size() - 1);
102     return kCommonWords[dist(*random_)];
103   }
104 
105   std::string GetNamespace() const;
106 
107   std::string GetUri() const;
108 
109   int GetNumTokens() const;
110 
111   std::vector<std::string> GetPropertyContent() const;
112 
113   MonkeyTokenizedDocument GenerateDocument();
114 
115  private:
116   MonkeyTestRandomEngine* random_;                    // Does not own.
117   const SchemaProto* schema_;                         // Does not own.
118   const IcingMonkeyTestRunnerConfiguration* config_;  // Does not own.
119 
120   uint32_t num_docs_generated_ = 0;
121   Clock clock_;
122 };
123 
124 }  // namespace lib
125 }  // namespace icing
126 
127 #endif  // ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
128