1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_TESTING_DOCUMENT_GENERATOR_H_ 16 #define ICING_TESTING_DOCUMENT_GENERATOR_H_ 17 18 #include <random> 19 #include <string> 20 #include <vector> 21 22 #include "icing/document-builder.h" 23 #include "icing/proto/document.pb.h" 24 #include "icing/proto/schema.pb.h" 25 26 namespace icing { 27 namespace lib { 28 29 class EvenDistributionNamespaceSelector { 30 public: EvenDistributionNamespaceSelector(const std::vector<std::string> & namespaces)31 explicit EvenDistributionNamespaceSelector( 32 const std::vector<std::string>& namespaces) 33 : namespaces_(&namespaces), num_invocations_(0) {} operator()34 const std::string& operator()() { 35 return namespaces_->at(num_invocations_++ % namespaces_->size()); 36 } 37 38 private: 39 const std::vector<std::string>* namespaces_; 40 int num_invocations_; 41 }; 42 43 class EvenDistributionTypeSelector { 44 public: EvenDistributionTypeSelector(const SchemaProto & schema)45 explicit EvenDistributionTypeSelector(const SchemaProto& schema) 46 : schema_(&schema), num_invocations_(0) {} operator()47 const SchemaTypeConfigProto& operator()() { 48 return schema_->types(num_invocations_++ % schema_->types_size()); 49 } 50 51 private: 52 const SchemaProto* schema_; 53 int num_invocations_; 54 }; 55 56 template <typename Rand> 57 class UniformDistributionLanguageTokenGenerator { 58 public: UniformDistributionLanguageTokenGenerator(const std::vector<std::string> & language,Rand * r)59 explicit UniformDistributionLanguageTokenGenerator( 60 const std::vector<std::string>& language, Rand* r) 61 : language_(&language), 62 rand_(r), 63 dist_(0, language.size() - 1), 64 num_invocations_(0) {} operator()65 const std::string& operator()() { return language_->at(dist_(*rand_)); } 66 67 private: 68 const std::vector<std::string>* language_; 69 Rand* rand_; 70 std::uniform_int_distribution<> dist_; 71 int num_invocations_; 72 }; 73 74 template <typename NamespaceSelector, typename TypeSelector, 75 typename TokenGenerator> 76 class DocumentGenerator { 77 public: DocumentGenerator(NamespaceSelector * namespaces,TypeSelector * schema_types,TokenGenerator * tokens,int doc_content_size)78 explicit DocumentGenerator(NamespaceSelector* namespaces, 79 TypeSelector* schema_types, TokenGenerator* tokens, 80 int doc_content_size) 81 : namespaces_(namespaces), 82 schema_types_(schema_types), 83 tokens_(tokens), 84 doc_content_size_(doc_content_size), 85 num_docs_generated_(0) {} 86 generateDoc()87 DocumentProto generateDoc() { 88 const SchemaTypeConfigProto& type_config = (*schema_types_)(); 89 const std::string& name_space = (*namespaces_)(); 90 DocumentBuilder doc_builder = DocumentBuilder() 91 .SetNamespace(name_space) 92 .SetSchema(type_config.schema_type()) 93 .SetUri(GetUri()); 94 // Reserve room to add a token for the namespace in the first section. This 95 // ensures that each document will contain at least one token that will be 96 // stable across all runs. 97 std::string starting_content = name_space + " "; 98 // Distribute content evenly between all properties, but add a token with 99 // the namespace to the first property. 100 int prop_content_size = (doc_content_size_ - starting_content.length()) / 101 type_config.properties_size(); 102 for (const PropertyConfigProto& prop : type_config.properties()) { 103 doc_builder.AddStringProperty( 104 prop.property_name(), 105 starting_content + GetPropertyContent(prop_content_size, name_space)); 106 // We've added the namespace token now. No need for more starting_content. 107 starting_content.clear(); 108 } 109 ++num_docs_generated_; 110 return doc_builder.Build(); 111 } 112 113 private: GetUri()114 std::string GetUri() { return std::to_string(num_docs_generated_); } GetPropertyContent(int content_size,const std::string & name_space)115 std::string GetPropertyContent(int content_size, 116 const std::string& name_space) { 117 std::string content; 118 while (content.size() < content_size) { 119 content += " " + (*tokens_)(); 120 } 121 return content; 122 } 123 124 NamespaceSelector* namespaces_; 125 TypeSelector* schema_types_; 126 TokenGenerator* tokens_; 127 int doc_content_size_; 128 int num_docs_generated_; 129 }; 130 131 } // namespace lib 132 } // namespace icing 133 134 #endif // ICING_TESTING_DOCUMENT_GENERATOR_H_ 135