• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_TESTING_DOCUMENT_GENERATOR_H_
16 #define ICING_TESTING_DOCUMENT_GENERATOR_H_
17 
18 #include <random>
19 #include <string>
20 #include <vector>
21 
22 #include "icing/document-builder.h"
23 #include "icing/proto/document.pb.h"
24 #include "icing/proto/schema.pb.h"
25 
26 namespace icing {
27 namespace lib {
28 
29 class EvenDistributionNamespaceSelector {
30  public:
EvenDistributionNamespaceSelector(const std::vector<std::string> & namespaces)31   explicit EvenDistributionNamespaceSelector(
32       const std::vector<std::string>& namespaces)
33       : namespaces_(&namespaces), num_invocations_(0) {}
operator()34   const std::string& operator()() {
35     return namespaces_->at(num_invocations_++ % namespaces_->size());
36   }
37 
38  private:
39   const std::vector<std::string>* namespaces_;
40   int num_invocations_;
41 };
42 
43 class EvenDistributionTypeSelector {
44  public:
EvenDistributionTypeSelector(const SchemaProto & schema)45   explicit EvenDistributionTypeSelector(const SchemaProto& schema)
46       : schema_(&schema), num_invocations_(0) {}
operator()47   const SchemaTypeConfigProto& operator()() {
48     return schema_->types(num_invocations_++ % schema_->types_size());
49   }
50 
51  private:
52   const SchemaProto* schema_;
53   int num_invocations_;
54 };
55 
56 template <typename Rand>
57 class UniformDistributionLanguageTokenGenerator {
58  public:
UniformDistributionLanguageTokenGenerator(const std::vector<std::string> & language,Rand * r)59   explicit UniformDistributionLanguageTokenGenerator(
60       const std::vector<std::string>& language, Rand* r)
61       : language_(&language),
62         rand_(r),
63         dist_(0, language.size() - 1),
64         num_invocations_(0) {}
operator()65   const std::string& operator()() { return language_->at(dist_(*rand_)); }
66 
67  private:
68   const std::vector<std::string>* language_;
69   Rand* rand_;
70   std::uniform_int_distribution<> dist_;
71   int num_invocations_;
72 };
73 
74 template <typename NamespaceSelector, typename TypeSelector,
75           typename TokenGenerator>
76 class DocumentGenerator {
77  public:
DocumentGenerator(NamespaceSelector * namespaces,TypeSelector * schema_types,TokenGenerator * tokens,int doc_content_size)78   explicit DocumentGenerator(NamespaceSelector* namespaces,
79                              TypeSelector* schema_types, TokenGenerator* tokens,
80                              int doc_content_size)
81       : namespaces_(namespaces),
82         schema_types_(schema_types),
83         tokens_(tokens),
84         doc_content_size_(doc_content_size),
85         num_docs_generated_(0) {}
86 
generateDoc()87   DocumentProto generateDoc() {
88     const SchemaTypeConfigProto& type_config = (*schema_types_)();
89     const std::string& name_space = (*namespaces_)();
90     DocumentBuilder doc_builder = DocumentBuilder()
91                                       .SetNamespace(name_space)
92                                       .SetSchema(type_config.schema_type())
93                                       .SetUri(GetUri());
94     // Reserve room to add a token for the namespace in the first section. This
95     // ensures that each document will contain at least one token that will be
96     // stable across all runs.
97     std::string starting_content = name_space + " ";
98     // Distribute content evenly between all properties, but add a token with
99     // the namespace to the first property.
100     int prop_content_size = (doc_content_size_ - starting_content.length()) /
101                             type_config.properties_size();
102     for (const PropertyConfigProto& prop : type_config.properties()) {
103       doc_builder.AddStringProperty(
104           prop.property_name(),
105           starting_content + GetPropertyContent(prop_content_size, name_space));
106       // We've added the namespace token now. No need for more starting_content.
107       starting_content.clear();
108     }
109     ++num_docs_generated_;
110     return doc_builder.Build();
111   }
112 
113  private:
GetUri()114   std::string GetUri() { return std::to_string(num_docs_generated_); }
GetPropertyContent(int content_size,const std::string & name_space)115   std::string GetPropertyContent(int content_size,
116                                  const std::string& name_space) {
117     std::string content;
118     while (content.size() < content_size) {
119       content += " " + (*tokens_)();
120     }
121     return content;
122   }
123 
124   NamespaceSelector* namespaces_;
125   TypeSelector* schema_types_;
126   TokenGenerator* tokens_;
127   int doc_content_size_;
128   int num_docs_generated_;
129 };
130 
131 }  // namespace lib
132 }  // namespace icing
133 
134 #endif  // ICING_TESTING_DOCUMENT_GENERATOR_H_
135