• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <unistd.h>
16 
17 #include <fstream>
18 #include <iostream>
19 #include <limits>
20 #include <memory>
21 #include <numeric>
22 #include <ostream>
23 #include <random>
24 #include <sstream>
25 #include <stdexcept>
26 #include <string>
27 #include <string_view>
28 #include <unordered_set>
29 #include <vector>
30 
31 #include "testing/base/public/benchmark.h"
32 #include "gmock/gmock.h"
33 #include "gtest/gtest.h"
34 #include "icing/document-builder.h"
35 #include "icing/file/filesystem.h"
36 #include "icing/icing-search-engine.h"
37 #include "icing/join/join-processor.h"
38 #include "icing/proto/document.pb.h"
39 #include "icing/proto/initialize.pb.h"
40 #include "icing/proto/persist.pb.h"
41 #include "icing/proto/reset.pb.h"
42 #include "icing/proto/schema.pb.h"
43 #include "icing/proto/scoring.pb.h"
44 #include "icing/proto/search.pb.h"
45 #include "icing/proto/status.pb.h"
46 #include "icing/proto/term.pb.h"
47 #include "icing/query/query-features.h"
48 #include "icing/schema-builder.h"
49 #include "icing/testing/common-matchers.h"
50 #include "icing/testing/document-generator.h"
51 #include "icing/testing/numeric/number-generator.h"
52 #include "icing/testing/numeric/uniform-distribution-integer-generator.h"
53 #include "icing/testing/random-string.h"
54 #include "icing/testing/schema-generator.h"
55 #include "icing/testing/tmp-directory.h"
56 
57 // Run on a Linux workstation:
58 //    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
59 //    //icing:icing-search-engine_benchmark
60 //
61 //    $ blaze-bin/icing/icing-search-engine_benchmark
62 //    --benchmark_filter=all --benchmark_memory_usage
63 //
64 // Run on an Android device:
65 //    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
66 //    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
67 //    //icing:icing-search-engine_benchmark
68 //
69 //    $ adb push blaze-bin/icing/icing-search-engine_benchmark
70 //    /data/local/tmp/
71 //
72 //    $ adb shell /data/local/tmp/icing-search-engine_benchmark
73 //    --benchmark_filter=all
74 
75 namespace icing {
76 namespace lib {
77 
78 namespace {
79 
80 using ::testing::Eq;
81 using ::testing::HasSubstr;
82 
83 // Icing GMSCore has, on average, 17 corpora on a device and 30 corpora at the
84 // 95th pct. Most clients use a single type. This is a function of Icing's
85 // constrained type offering. Assume that each package will use 3 types on
86 // average.
87 constexpr int kAvgNumNamespaces = 10;
88 constexpr int kAvgNumTypes = 3;
89 
90 // ASSUME: Properties will have at most ten properties. Types will be created
91 // with [1, 10] properties.
92 constexpr int kMaxNumProperties = 10;
93 
94 // Based on logs from Icing GMSCore.
95 constexpr int kAvgDocumentSize = 300;
96 
97 // ASSUME: ~75% of the document's size comes from it's content.
98 constexpr float kContentSizePct = 0.7;
99 
100 constexpr int kLanguageSize = 1000;
101 
102 // Lite Index size required to fit 128k docs, each doc requires ~64 bytes of
103 // space in the lite index.
104 constexpr int kIcingFullIndexSize = 1024 * 1024 * 8;
105 
106 // Query params
107 constexpr int kNumPerPage = 10;
108 constexpr int kNumToSnippet = 10000;
109 constexpr int kMatchesPerProperty = 1;
110 
CreateNamespaces(int num_namespaces)111 std::vector<std::string> CreateNamespaces(int num_namespaces) {
112   std::vector<std::string> namespaces;
113   while (--num_namespaces >= 0) {
114     namespaces.push_back("comgooglepackage" + std::to_string(num_namespaces));
115   }
116   return namespaces;
117 }
118 
CreateSearchSpec(const std::string & query,const std::vector<std::string> & namespaces,TermMatchType::Code match_type)119 SearchSpecProto CreateSearchSpec(const std::string& query,
120                                  const std::vector<std::string>& namespaces,
121                                  TermMatchType::Code match_type) {
122   SearchSpecProto search_spec;
123   search_spec.set_query(query);
124   for (const std::string& name_space : namespaces) {
125     search_spec.add_namespace_filters(name_space);
126   }
127   search_spec.set_term_match_type(match_type);
128   return search_spec;
129 }
130 
CreateResultSpec(int num_per_page,int num_to_snippet,int matches_per_property)131 ResultSpecProto CreateResultSpec(int num_per_page, int num_to_snippet,
132                                  int matches_per_property) {
133   ResultSpecProto result_spec;
134   result_spec.set_num_per_page(num_per_page);
135   result_spec.mutable_snippet_spec()->set_num_to_snippet(num_to_snippet);
136   result_spec.mutable_snippet_spec()->set_num_matches_per_property(
137       matches_per_property);
138   return result_spec;
139 }
140 
CreateScoringSpec(ScoringSpecProto::RankingStrategy::Code ranking_strategy)141 ScoringSpecProto CreateScoringSpec(
142     ScoringSpecProto::RankingStrategy::Code ranking_strategy) {
143   ScoringSpecProto scoring_spec;
144   scoring_spec.set_rank_by(ranking_strategy);
145   return scoring_spec;
146 }
147 
148 class DestructibleDirectory {
149  public:
DestructibleDirectory(const Filesystem & filesystem,const std::string & dir)150   explicit DestructibleDirectory(const Filesystem& filesystem,
151                                  const std::string& dir)
152       : filesystem_(filesystem), dir_(dir) {
153     filesystem_.DeleteDirectoryRecursively(dir_.c_str());
154     filesystem_.CreateDirectoryRecursively(dir_.c_str());
155   }
~DestructibleDirectory()156   ~DestructibleDirectory() {
157     filesystem_.DeleteDirectoryRecursively(dir_.c_str());
158   }
159 
160  private:
161   Filesystem filesystem_;
162   std::string dir_;
163 };
164 
GenerateRandomDocuments(EvenDistributionTypeSelector * type_selector,int num_docs,const std::vector<std::string> & language)165 std::vector<DocumentProto> GenerateRandomDocuments(
166     EvenDistributionTypeSelector* type_selector, int num_docs,
167     const std::vector<std::string>& language) {
168   std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
169   EvenDistributionNamespaceSelector namespace_selector(namespaces);
170 
171   std::default_random_engine random;
172   UniformDistributionLanguageTokenGenerator<std::default_random_engine>
173       token_generator(language, &random);
174 
175   DocumentGenerator<
176       EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
177       UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
178       generator(&namespace_selector, type_selector, &token_generator,
179                 kAvgDocumentSize * kContentSizePct);
180 
181   std::vector<DocumentProto> random_docs;
182   random_docs.reserve(num_docs);
183   for (int i = 0; i < num_docs; i++) {
184     random_docs.push_back(generator.generateDoc());
185   }
186   return random_docs;
187 }
188 
CreateIntegerGenerator(size_t num_documents)189 std::unique_ptr<NumberGenerator<int64_t>> CreateIntegerGenerator(
190     size_t num_documents) {
191   // Since the collision # follows poisson distribution with lambda =
192   // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too many
193   // collisions.
194   //
195   // Distribution:
196   // - keys in range being picked for 0 times: 90.5%
197   // - keys in range being picked for 1 time:  9%
198   // - keys in range being picked for 2 times: 0.45%
199   // - keys in range being picked for 3 times: 0.015%
200   //
201   // For example, num_keys = 1M, range = 10M. Then there will be ~904837 unique
202   // keys, 45242 keys being picked twice, 1508 keys being picked thrice ...
203   return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
204       /*seed=*/12345, /*range_lower=*/0,
205       /*range_upper=*/static_cast<int64_t>(num_documents) * 10 - 1);
206 }
207 
BM_IndexLatency(benchmark::State & state)208 void BM_IndexLatency(benchmark::State& state) {
209   // Initialize the filesystem
210   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
211   Filesystem filesystem;
212   DestructibleDirectory ddir(filesystem, test_dir);
213 
214   // Create the schema.
215   std::default_random_engine random;
216   int num_types = kAvgNumNamespaces * kAvgNumTypes;
217   ExactStringPropertyGenerator property_generator;
218   SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
219       /*num_properties=*/state.range(1), &property_generator);
220   SchemaProto schema = schema_generator.GenerateSchema(num_types);
221   EvenDistributionTypeSelector type_selector(schema);
222 
223   // Create the index.
224   IcingSearchEngineOptions options;
225   options.set_base_dir(test_dir);
226   std::unique_ptr<IcingSearchEngine> icing =
227       std::make_unique<IcingSearchEngine>(options);
228 
229   int num_docs = state.range(0);
230   std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
231   const std::vector<DocumentProto> random_docs =
232       GenerateRandomDocuments(&type_selector, num_docs, language);
233   for (auto _ : state) {
234     state.PauseTiming();
235     ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
236     ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
237     state.ResumeTiming();
238     for (const DocumentProto& doc : random_docs) {
239       ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
240     }
241   }
242 }
243 BENCHMARK(BM_IndexLatency)
244     // Arguments: num_indexed_documents, num_sections
245     ->ArgPair(1000000, 5);
246 
BM_QueryLatency(benchmark::State & state)247 void BM_QueryLatency(benchmark::State& state) {
248   // Initialize the filesystem
249   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
250   Filesystem filesystem;
251   DestructibleDirectory ddir(filesystem, test_dir);
252 
253   // Create the schema.
254   std::default_random_engine random;
255   int num_types = kAvgNumNamespaces * kAvgNumTypes;
256   ExactStringPropertyGenerator property_generator;
257   SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
258       /*num_properties=*/state.range(1), &property_generator);
259   SchemaProto schema = schema_generator.GenerateSchema(num_types);
260   EvenDistributionTypeSelector type_selector(schema);
261 
262   // Create the index.
263   IcingSearchEngineOptions options;
264   options.set_base_dir(test_dir);
265   options.set_index_merge_size(kIcingFullIndexSize);
266   std::unique_ptr<IcingSearchEngine> icing =
267       std::make_unique<IcingSearchEngine>(options);
268 
269   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
270   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
271 
272   int num_docs = state.range(0);
273   std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
274   const std::vector<DocumentProto> random_docs =
275       GenerateRandomDocuments(&type_selector, num_docs, language);
276   for (const DocumentProto& doc : random_docs) {
277     ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
278   }
279 
280   SearchSpecProto search_spec = CreateSearchSpec(
281       language.at(0), std::vector<std::string>(), TermMatchType::PREFIX);
282   ResultSpecProto result_spec = CreateResultSpec(1, 1000000, 1000000);
283   ScoringSpecProto scoring_spec =
284       CreateScoringSpec(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
285   for (auto _ : state) {
286     SearchResultProto results = icing->Search(
287         search_spec, ScoringSpecProto::default_instance(), result_spec);
288   }
289 }
290 BENCHMARK(BM_QueryLatency)
291     // Arguments: num_indexed_documents, num_sections
292     ->ArgPair(1000000, 2);
293 
BM_IndexThroughput(benchmark::State & state)294 void BM_IndexThroughput(benchmark::State& state) {
295   // Initialize the filesystem
296   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
297   Filesystem filesystem;
298   DestructibleDirectory ddir(filesystem, test_dir);
299 
300   // Create the schema.
301   std::default_random_engine random;
302   int num_types = kAvgNumNamespaces * kAvgNumTypes;
303   ExactStringPropertyGenerator property_generator;
304   SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
305       /*num_properties=*/state.range(1), &property_generator);
306   SchemaProto schema = schema_generator.GenerateSchema(num_types);
307   EvenDistributionTypeSelector type_selector(schema);
308 
309   // Create the index.
310   IcingSearchEngineOptions options;
311   options.set_base_dir(test_dir);
312   options.set_index_merge_size(kIcingFullIndexSize);
313   std::unique_ptr<IcingSearchEngine> icing =
314       std::make_unique<IcingSearchEngine>(options);
315 
316   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
317   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
318 
319   int num_docs = state.range(0);
320   std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
321   const std::vector<DocumentProto> random_docs =
322       GenerateRandomDocuments(&type_selector, num_docs, language);
323   for (auto s : state) {
324     for (const DocumentProto& doc : random_docs) {
325       ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
326     }
327   }
328   state.SetItemsProcessed(state.iterations() * num_docs);
329 }
330 BENCHMARK(BM_IndexThroughput)
331     // Arguments: num_indexed_documents, num_sections
332     ->ArgPair(1, 1)
333     ->ArgPair(2, 1)
334     ->ArgPair(8, 1)
335     ->ArgPair(32, 1)
336     ->ArgPair(128, 1)
337     ->ArgPair(1 << 10, 1)
338     ->ArgPair(1 << 13, 1)
339     ->ArgPair(1 << 15, 1)
340     ->ArgPair(1 << 17, 1)
341     ->ArgPair(1, 5)
342     ->ArgPair(2, 5)
343     ->ArgPair(8, 5)
344     ->ArgPair(32, 5)
345     ->ArgPair(128, 5)
346     ->ArgPair(1 << 10, 5)
347     ->ArgPair(1 << 13, 5)
348     ->ArgPair(1 << 15, 5)
349     ->ArgPair(1 << 17, 5)
350     ->ArgPair(1, 10)
351     ->ArgPair(2, 10)
352     ->ArgPair(8, 10)
353     ->ArgPair(32, 10)
354     ->ArgPair(128, 10)
355     ->ArgPair(1 << 10, 10)
356     ->ArgPair(1 << 13, 10)
357     ->ArgPair(1 << 15, 10)
358     ->ArgPair(1 << 17, 10);
359 
BM_MutlipleIndices(benchmark::State & state)360 void BM_MutlipleIndices(benchmark::State& state) {
361   // Initialize the filesystem
362   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
363   Filesystem filesystem;
364   DestructibleDirectory ddir(filesystem, test_dir);
365 
366   // Create the schema.
367   std::default_random_engine random;
368   int num_types = kAvgNumNamespaces * kAvgNumTypes;
369   ExactStringPropertyGenerator property_generator;
370   RandomSchemaGenerator<std::default_random_engine,
371                         ExactStringPropertyGenerator>
372       schema_generator(&random, &property_generator);
373   SchemaProto schema =
374       schema_generator.GenerateSchema(num_types, kMaxNumProperties);
375   EvenDistributionTypeSelector type_selector(schema);
376 
377   // Create the indices.
378   std::vector<std::unique_ptr<IcingSearchEngine>> icings;
379   int num_indices = state.range(0);
380   for (int i = 0; i < num_indices; ++i) {
381     IcingSearchEngineOptions options;
382     std::string base_dir = test_dir + "/" + std::to_string(i);
383     options.set_base_dir(base_dir);
384     options.set_index_merge_size(kIcingFullIndexSize / num_indices);
385     auto icing = std::make_unique<IcingSearchEngine>(options);
386 
387     ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
388     ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
389     icings.push_back(std::move(icing));
390   }
391 
392   // Setup namespace info and language
393   std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
394   EvenDistributionNamespaceSelector namespace_selector(namespaces);
395 
396   std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
397   UniformDistributionLanguageTokenGenerator<std::default_random_engine>
398       token_generator(language, &random);
399 
400   // Fill the index.
401   DocumentGenerator<
402       EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
403       UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
404       generator(&namespace_selector, &type_selector, &token_generator,
405                 kAvgDocumentSize * kContentSizePct);
406   for (int i = 0; i < state.range(1); ++i) {
407     DocumentProto doc = generator.generateDoc();
408     PutResultProto put_result;
409     if (icings.empty()) {
410       ASSERT_THAT(put_result.status().code(), Eq(StatusProto::UNKNOWN));
411       continue;
412     }
413     ASSERT_THAT(icings.at(i % icings.size())->Put(doc).status(), ProtoIsOk());
414   }
415 
416   // QUERY!
417   // Every document has its own namespace as a token. This query that should
418   // match 1/kAvgNumNamespace% of all documents.
419   const std::string& name_space = namespaces.at(0);
420   SearchSpecProto search_spec = CreateSearchSpec(
421       /*query=*/name_space, {name_space}, TermMatchType::EXACT_ONLY);
422   ResultSpecProto result_spec =
423       CreateResultSpec(kNumPerPage, kNumToSnippet, kMatchesPerProperty);
424   ScoringSpecProto scoring_spec =
425       CreateScoringSpec(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
426 
427   int num_results = 0;
428   for (auto _ : state) {
429     num_results = 0;
430     SearchResultProto result;
431     if (icings.empty()) {
432       ASSERT_THAT(result.status().code(), Eq(StatusProto::UNKNOWN));
433       continue;
434     }
435     result = icings.at(0)->Search(search_spec, scoring_spec, result_spec);
436     ASSERT_THAT(result.status(), ProtoIsOk());
437     while (!result.results().empty()) {
438       num_results += result.results_size();
439       if (!icings.empty()) {
440         result = icings.at(0)->GetNextPage(result.next_page_token());
441       }
442       ASSERT_THAT(result.status(), ProtoIsOk());
443     }
444   }
445 
446   // Measure size.
447   int64_t disk_usage = filesystem.GetDiskUsage(test_dir.c_str());
448   std::cout << "Num results:\t" << num_results << "\t\tDisk Use:\t"
449             << disk_usage / 1024.0 << std::endl;
450 }
451 BENCHMARK(BM_MutlipleIndices)
452     // First argument: num_indices, Second argument: num_total_documents
453     // So each index will contain (num_total_documents / num_indices) documents.
454     ->ArgPair(0, 0)
455     ->ArgPair(0, 1024)
456     ->ArgPair(0, 131072)
457     ->ArgPair(1, 0)
458     ->ArgPair(1, 1)
459     ->ArgPair(1, 2)
460     ->ArgPair(1, 8)
461     ->ArgPair(1, 32)
462     ->ArgPair(1, 128)
463     ->ArgPair(1, 1024)
464     ->ArgPair(1, 8192)
465     ->ArgPair(1, 32768)
466     ->ArgPair(1, 131072)
467     ->ArgPair(2, 0)
468     ->ArgPair(2, 1)
469     ->ArgPair(2, 2)
470     ->ArgPair(2, 8)
471     ->ArgPair(2, 32)
472     ->ArgPair(2, 128)
473     ->ArgPair(2, 1024)
474     ->ArgPair(2, 8192)
475     ->ArgPair(2, 32768)
476     ->ArgPair(2, 131072)
477     ->ArgPair(10, 0)
478     ->ArgPair(10, 1)
479     ->ArgPair(10, 2)
480     ->ArgPair(10, 8)
481     ->ArgPair(10, 32)
482     ->ArgPair(10, 128)
483     ->ArgPair(10, 1024)
484     ->ArgPair(10, 8192)
485     ->ArgPair(10, 32768)
486     ->ArgPair(10, 131072);
487 
BM_SearchNoStackOverflow(benchmark::State & state)488 void BM_SearchNoStackOverflow(benchmark::State& state) {
489   // Initialize the filesystem
490   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
491   Filesystem filesystem;
492   DestructibleDirectory ddir(filesystem, test_dir);
493 
494   // Create the schema.
495   SchemaProto schema =
496       SchemaBuilder()
497           .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
498               PropertyConfigBuilder()
499                   .SetName("body")
500                   .SetDataTypeString(TermMatchType::PREFIX,
501                                      StringIndexingConfig::TokenizerType::PLAIN)
502                   .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
503           .Build();
504 
505   // Create the index.
506   IcingSearchEngineOptions options;
507   options.set_base_dir(test_dir);
508   options.set_index_merge_size(kIcingFullIndexSize);
509   std::unique_ptr<IcingSearchEngine> icing =
510       std::make_unique<IcingSearchEngine>(options);
511 
512   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
513   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
514 
515   // Create a document that has the term "foo"
516   DocumentProto base_document = DocumentBuilder()
517                                     .SetSchema("Message")
518                                     .SetNamespace("namespace")
519                                     .AddStringProperty("body", "foo")
520                                     .Build();
521 
522   // Insert a lot of documents with the term "foo"
523   int64_t num_docs = state.range(0);
524   for (int64_t i = 0; i < num_docs; ++i) {
525     DocumentProto document =
526         DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
527     ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
528   }
529 
530   // Do a query and exclude documents with the term "foo". The way this is
531   // currently implemented is that we'll iterate over all the documents in the
532   // index, then apply the exclusion check. Since all our documents have "foo",
533   // we'll consider it a "miss". Previously with recursion, we would have
534   // recursed until we got a success, which would never happen causing us to
535   // recurse through all the documents and trigger a stack overflow. With
536   // the iterative implementation, we should avoid this.
537   SearchSpecProto search_spec;
538   search_spec.set_query("-foo");
539   search_spec.set_term_match_type(TermMatchType::PREFIX);
540 
541   ResultSpecProto result_spec;
542   ScoringSpecProto scoring_spec;
543   for (auto s : state) {
544     icing->Search(search_spec, scoring_spec, result_spec);
545   }
546 }
547 // For other reasons, we hit a limit when inserting the ~350,000th document. So
548 // cap the limit to 1 << 18.
549 BENCHMARK(BM_SearchNoStackOverflow)
550     ->Range(/*start=*/1 << 10, /*limit=*/1 << 18);
551 
552 // Added for b/184373205. Ensure that we can repeatedly put documents even if
553 // the underlying mmapped areas grow past a few page sizes.
BM_RepeatedPut(benchmark::State & state)554 void BM_RepeatedPut(benchmark::State& state) {
555   // Initialize the filesystem
556   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
557   Filesystem filesystem;
558   DestructibleDirectory ddir(filesystem, test_dir);
559 
560   // Create the schema.
561   SchemaProto schema =
562       SchemaBuilder()
563           .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
564               PropertyConfigBuilder()
565                   .SetName("body")
566                   .SetDataTypeString(TermMatchType::PREFIX,
567                                      StringIndexingConfig::TokenizerType::PLAIN)
568                   .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
569           .Build();
570 
571   // Create the index.
572   IcingSearchEngineOptions options;
573   options.set_base_dir(test_dir);
574   options.set_index_merge_size(kIcingFullIndexSize);
575   std::unique_ptr<IcingSearchEngine> icing =
576       std::make_unique<IcingSearchEngine>(options);
577 
578   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
579   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
580 
581   // Create a document that has the term "foo"
582   DocumentProto base_document = DocumentBuilder()
583                                     .SetSchema("Message")
584                                     .SetNamespace("namespace")
585                                     .AddStringProperty("body", "foo")
586                                     .Build();
587 
588   // Insert a lot of documents with the term "foo"
589   int64_t num_docs = state.range(0);
590   for (auto s : state) {
591     for (int64_t i = 0; i < num_docs; ++i) {
592       DocumentProto document =
593           DocumentBuilder(base_document).SetUri("uri").Build();
594       ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
595     }
596   }
597 }
598 // For other reasons, we hit a limit when inserting the ~350,000th document. So
599 // cap the limit to 1 << 18.
600 BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
601 
602 // This is different from BM_RepeatedPut since we're just trying to benchmark
603 // one Put call, not thousands of them at once.
BM_Put(benchmark::State & state)604 void BM_Put(benchmark::State& state) {
605   // Initialize the filesystem
606   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
607   Filesystem filesystem;
608   DestructibleDirectory ddir(filesystem, test_dir);
609 
610   // Create the schema.
611   SchemaProto schema =
612       SchemaBuilder()
613           .AddType(SchemaTypeConfigBuilder().SetType("Message"))
614           .Build();
615 
616   // Create the index.
617   IcingSearchEngineOptions options;
618   options.set_base_dir(test_dir);
619   options.set_index_merge_size(kIcingFullIndexSize);
620   std::unique_ptr<IcingSearchEngine> icing =
621       std::make_unique<IcingSearchEngine>(options);
622 
623   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
624   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
625 
626   // Create a document
627   DocumentProto document = DocumentBuilder()
628                                .SetSchema("Message")
629                                .SetNamespace("namespace")
630                                .SetUri("uri")
631                                .Build();
632 
633   for (auto s : state) {
634     benchmark::DoNotOptimize(icing->Put(document));
635   }
636 }
637 BENCHMARK(BM_Put);
638 
BM_Get(benchmark::State & state)639 void BM_Get(benchmark::State& state) {
640   // Initialize the filesystem
641   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
642   Filesystem filesystem;
643   DestructibleDirectory ddir(filesystem, test_dir);
644 
645   // Create the schema.
646   SchemaProto schema =
647       SchemaBuilder()
648           .AddType(SchemaTypeConfigBuilder().SetType("Message"))
649           .Build();
650 
651   // Create the index.
652   IcingSearchEngineOptions options;
653   options.set_base_dir(test_dir);
654   options.set_index_merge_size(kIcingFullIndexSize);
655   std::unique_ptr<IcingSearchEngine> icing =
656       std::make_unique<IcingSearchEngine>(options);
657 
658   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
659   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
660 
661   // Create a document
662   DocumentProto document = DocumentBuilder()
663                                .SetSchema("Message")
664                                .SetNamespace("namespace")
665                                .SetUri("uri")
666                                .Build();
667 
668   ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
669   for (auto s : state) {
670     benchmark::DoNotOptimize(
671         icing->Get("namespace", "uri", GetResultSpecProto::default_instance()));
672   }
673 }
674 BENCHMARK(BM_Get);
675 
BM_Delete(benchmark::State & state)676 void BM_Delete(benchmark::State& state) {
677   // Initialize the filesystem
678   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
679   Filesystem filesystem;
680   DestructibleDirectory ddir(filesystem, test_dir);
681 
682   // Create the schema.
683   SchemaProto schema =
684       SchemaBuilder()
685           .AddType(SchemaTypeConfigBuilder().SetType("Message"))
686           .Build();
687 
688   // Create the index.
689   IcingSearchEngineOptions options;
690   options.set_base_dir(test_dir);
691   options.set_index_merge_size(kIcingFullIndexSize);
692   std::unique_ptr<IcingSearchEngine> icing =
693       std::make_unique<IcingSearchEngine>(options);
694 
695   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
696   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
697 
698   // Create a document
699   DocumentProto document = DocumentBuilder()
700                                .SetSchema("Message")
701                                .SetNamespace("namespace")
702                                .SetUri("uri")
703                                .Build();
704 
705   ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
706   for (auto s : state) {
707     state.PauseTiming();
708     icing->Put(document);
709     state.ResumeTiming();
710 
711     benchmark::DoNotOptimize(icing->Delete("namespace", "uri"));
712   }
713 }
714 BENCHMARK(BM_Delete);
715 
BM_PutMaxAllowedDocuments(benchmark::State & state)716 void BM_PutMaxAllowedDocuments(benchmark::State& state) {
717   // Initialize the filesystem
718   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
719   Filesystem filesystem;
720   DestructibleDirectory ddir(filesystem, test_dir);
721 
722   // Create the schema.
723   SchemaProto schema =
724       SchemaBuilder()
725           .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
726               PropertyConfigBuilder()
727                   .SetName("body")
728                   .SetDataTypeString(TermMatchType::PREFIX,
729                                      StringIndexingConfig::TokenizerType::PLAIN)
730                   .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
731           .Build();
732 
733   // Create the index.
734   IcingSearchEngineOptions options;
735   options.set_base_dir(test_dir);
736   options.set_index_merge_size(kIcingFullIndexSize);
737   std::unique_ptr<IcingSearchEngine> icing =
738       std::make_unique<IcingSearchEngine>(options);
739 
740   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
741   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
742 
743   // Create a document that has the term "foo"
744   DocumentProto base_document = DocumentBuilder()
745                                     .SetSchema("Message")
746                                     .SetNamespace("namespace")
747                                     .AddStringProperty("body", "foo")
748                                     .Build();
749 
750   // Insert a lot of documents with the term "foo"
751   for (auto s : state) {
752     for (int64_t i = 0; i <= kMaxDocumentId; ++i) {
753       DocumentProto document =
754           DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
755       EXPECT_THAT(icing->Put(document).status(), ProtoIsOk());
756     }
757   }
758 
759   DocumentProto document =
760       DocumentBuilder(base_document).SetUri("out_of_space_uri").Build();
761   PutResultProto put_result_proto = icing->Put(document);
762   EXPECT_THAT(put_result_proto.status(),
763               ProtoStatusIs(StatusProto::OUT_OF_SPACE));
764   EXPECT_THAT(put_result_proto.status().message(),
765               HasSubstr("Exceeded maximum number of documents"));
766 }
767 BENCHMARK(BM_PutMaxAllowedDocuments);
768 
BM_QueryWithSnippet(benchmark::State & state)769 void BM_QueryWithSnippet(benchmark::State& state) {
770   // Initialize the filesystem
771   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
772   Filesystem filesystem;
773   DestructibleDirectory ddir(filesystem, test_dir);
774 
775   // Create the schema.
776   SchemaProto schema =
777       SchemaBuilder()
778           .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
779               PropertyConfigBuilder()
780                   .SetName("body")
781                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
782                   .SetCardinality(CARDINALITY_OPTIONAL)))
783           .Build();
784 
785   // Create the index.
786   IcingSearchEngineOptions options;
787   options.set_base_dir(test_dir);
788   options.set_index_merge_size(kIcingFullIndexSize);
789   std::unique_ptr<IcingSearchEngine> icing =
790       std::make_unique<IcingSearchEngine>(options);
791 
792   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
793   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
794 
795   std::string body = "message body";
796   for (int i = 0; i < 100; i++) {
797     body = body +
798            " invent invention inventory invest investigate investigation "
799            "investigator investment nvestor invisible invitation invite "
800            "involve involved involvement IraqiI rish island";
801   }
802   for (int i = 0; i < 50; i++) {
803     DocumentProto document = DocumentBuilder()
804                                  .SetKey("namespace", "uri" + std::to_string(i))
805                                  .SetSchema("Message")
806                                  .AddStringProperty("body", body)
807                                  .Build();
808     ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
809   }
810 
811   SearchSpecProto search_spec;
812   search_spec.set_term_match_type(TermMatchType::PREFIX);
813   search_spec.set_query("i");
814 
815   ResultSpecProto result_spec;
816   result_spec.set_num_per_page(10000);
817   result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
818   result_spec.mutable_snippet_spec()->set_num_matches_per_property(10000);
819   result_spec.mutable_snippet_spec()->set_num_to_snippet(10000);
820 
821   for (auto s : state) {
822     SearchResultProto results = icing->Search(
823         search_spec, ScoringSpecProto::default_instance(), result_spec);
824   }
825 }
826 BENCHMARK(BM_QueryWithSnippet);
827 
BM_NumericIndexing(benchmark::State & state)828 void BM_NumericIndexing(benchmark::State& state) {
829   int num_documents = state.range(0);
830   int num_integers_per_doc = state.range(1);
831 
832   // Initialize the filesystem
833   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
834   Filesystem filesystem;
835 
836   // Create the schema.
837   SchemaProto schema =
838       SchemaBuilder()
839           .AddType(SchemaTypeConfigBuilder()
840                        .SetType("Message")
841                        .AddProperty(PropertyConfigBuilder()
842                                         .SetName("body")
843                                         .SetDataTypeString(TERM_MATCH_PREFIX,
844                                                            TOKENIZER_PLAIN)
845                                         .SetCardinality(CARDINALITY_OPTIONAL))
846                        .AddProperty(PropertyConfigBuilder()
847                                         .SetName("integer")
848                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
849                                         .SetCardinality(CARDINALITY_REPEATED)))
850           .Build();
851 
852   std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
853       CreateIntegerGenerator(num_documents);
854   std::vector<DocumentProto> documents;
855   documents.reserve(num_documents);
856   for (int i = 0; i < num_documents; ++i) {
857     std::vector<int64_t> integers;
858     integers.reserve(num_integers_per_doc);
859     for (int j = 0; j < num_integers_per_doc; ++j) {
860       integers.push_back(integer_generator->Generate());
861     }
862 
863     DocumentProto document =
864         DocumentBuilder()
865             .SetKey("namespace", "uri" + std::to_string(i))
866             .SetSchema("Message")
867             .AddStringProperty("body", "body hello world")
868             .AddInt64Property("integer", integers.begin(), integers.end())
869             .Build();
870     documents.push_back(std::move(document));
871   }
872 
873   for (auto s : state) {
874     state.PauseTiming();
875     // Create the index.
876     IcingSearchEngineOptions options;
877     options.set_base_dir(test_dir);
878     options.set_index_merge_size(kIcingFullIndexSize);
879     std::unique_ptr<IcingSearchEngine> icing =
880         std::make_unique<IcingSearchEngine>(options);
881 
882     ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
883     ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
884     state.ResumeTiming();
885 
886     for (const DocumentProto& document : documents) {
887       ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
888     }
889 
890     state.PauseTiming();
891     icing.reset();
892     ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
893     state.ResumeTiming();
894   }
895 }
896 
897 BENCHMARK(BM_NumericIndexing)
898     // Arguments: num_documents, num_integers_per_doc
899     ->ArgPair(1000000, 5);
900 
BM_NumericExactQuery(benchmark::State & state)901 void BM_NumericExactQuery(benchmark::State& state) {
902   int num_documents = state.range(0);
903   int num_integers_per_doc = state.range(1);
904 
905   // Initialize the filesystem
906   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
907   Filesystem filesystem;
908   DestructibleDirectory ddir(filesystem, test_dir);
909 
910   // Create the schema.
911   SchemaProto schema =
912       SchemaBuilder()
913           .AddType(SchemaTypeConfigBuilder()
914                        .SetType("Message")
915                        .AddProperty(PropertyConfigBuilder()
916                                         .SetName("body")
917                                         .SetDataTypeString(TERM_MATCH_PREFIX,
918                                                            TOKENIZER_PLAIN)
919                                         .SetCardinality(CARDINALITY_OPTIONAL))
920                        .AddProperty(PropertyConfigBuilder()
921                                         .SetName("integer")
922                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
923                                         .SetCardinality(CARDINALITY_REPEATED)))
924           .Build();
925 
926   // Create the index.
927   IcingSearchEngineOptions options;
928   options.set_base_dir(test_dir);
929   options.set_index_merge_size(kIcingFullIndexSize);
930   std::unique_ptr<IcingSearchEngine> icing =
931       std::make_unique<IcingSearchEngine>(options);
932 
933   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
934   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
935 
936   std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
937       CreateIntegerGenerator(num_documents);
938   std::unordered_set<int64_t> chosen_integer_set;
939   for (int i = 0; i < num_documents; ++i) {
940     std::vector<int64_t> integers;
941     integers.reserve(num_integers_per_doc);
942     for (int j = 0; j < num_integers_per_doc; ++j) {
943       int64_t chosen_int = integer_generator->Generate();
944       integers.push_back(chosen_int);
945       chosen_integer_set.insert(chosen_int);
946     }
947 
948     DocumentProto document =
949         DocumentBuilder()
950             .SetKey("namespace", "uri" + std::to_string(i))
951             .SetSchema("Message")
952             .AddStringProperty("body", "body hello world")
953             .AddInt64Property("integer", integers.begin(), integers.end())
954             .Build();
955     ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
956   }
957 
958   SearchSpecProto search_spec;
959   search_spec.add_enabled_features(std::string(kNumericSearchFeature));
960 
961   ScoringSpecProto scoring_spec;
962   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
963 
964   ResultSpecProto result_spec;
965   result_spec.set_num_per_page(1);
966 
967   std::vector<int64_t> chosen_integers(chosen_integer_set.begin(),
968                                        chosen_integer_set.end());
969   std::uniform_int_distribution<> distrib(0, chosen_integers.size() - 1);
970   std::default_random_engine e(/*seed=*/12345);
971   for (auto s : state) {
972     int64_t exact = chosen_integers[distrib(e)];
973     search_spec.set_query("integer == " + std::to_string(exact));
974 
975     SearchResultProto results =
976         icing->Search(search_spec, scoring_spec, result_spec);
977     ASSERT_THAT(results.status(), ProtoIsOk());
978     ASSERT_GT(results.results_size(), 0);
979     if (results.next_page_token() != kInvalidNextPageToken) {
980       icing->InvalidateNextPageToken(results.next_page_token());
981     }
982   }
983 }
984 BENCHMARK(BM_NumericExactQuery)
985     // Arguments: num_documents, num_integers_per_doc
986     ->ArgPair(1000000, 5);
987 
BM_NumericRangeQueryAll(benchmark::State & state)988 void BM_NumericRangeQueryAll(benchmark::State& state) {
989   int num_documents = state.range(0);
990   int num_integers_per_doc = state.range(1);
991 
992   // Initialize the filesystem
993   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
994   Filesystem filesystem;
995   DestructibleDirectory ddir(filesystem, test_dir);
996 
997   // Create the schema.
998   SchemaProto schema =
999       SchemaBuilder()
1000           .AddType(SchemaTypeConfigBuilder()
1001                        .SetType("Message")
1002                        .AddProperty(PropertyConfigBuilder()
1003                                         .SetName("body")
1004                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1005                                                            TOKENIZER_PLAIN)
1006                                         .SetCardinality(CARDINALITY_OPTIONAL))
1007                        .AddProperty(PropertyConfigBuilder()
1008                                         .SetName("integer")
1009                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1010                                         .SetCardinality(CARDINALITY_REPEATED)))
1011           .Build();
1012 
1013   // Create the index.
1014   IcingSearchEngineOptions options;
1015   options.set_base_dir(test_dir);
1016   options.set_index_merge_size(kIcingFullIndexSize);
1017   std::unique_ptr<IcingSearchEngine> icing =
1018       std::make_unique<IcingSearchEngine>(options);
1019 
1020   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
1021   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
1022 
1023   std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
1024       CreateIntegerGenerator(num_documents);
1025   for (int i = 0; i < num_documents; ++i) {
1026     std::vector<int64_t> integers;
1027     integers.reserve(num_integers_per_doc);
1028     for (int j = 0; j < num_integers_per_doc; ++j) {
1029       integers.push_back(integer_generator->Generate());
1030     }
1031 
1032     DocumentProto document =
1033         DocumentBuilder()
1034             .SetKey("namespace", "uri" + std::to_string(i))
1035             .SetSchema("Message")
1036             .AddStringProperty("body", "body hello world")
1037             .AddInt64Property("integer", integers.begin(), integers.end())
1038             .Build();
1039     ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
1040   }
1041 
1042   SearchSpecProto search_spec;
1043   search_spec.add_enabled_features(std::string(kNumericSearchFeature));
1044   search_spec.set_query("integer >= " +
1045                         std::to_string(std::numeric_limits<int64_t>::min()));
1046 
1047   ScoringSpecProto scoring_spec;
1048   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
1049 
1050   ResultSpecProto result_spec;
1051   result_spec.set_num_per_page(1);
1052 
1053   for (auto s : state) {
1054     SearchResultProto results =
1055         icing->Search(search_spec, scoring_spec, result_spec);
1056     ASSERT_THAT(results.status(), ProtoIsOk());
1057     ASSERT_GT(results.results_size(), 0);
1058     if (results.next_page_token() != kInvalidNextPageToken) {
1059       icing->InvalidateNextPageToken(results.next_page_token());
1060     }
1061   }
1062 }
1063 BENCHMARK(BM_NumericRangeQueryAll)
1064     // Arguments: num_documents, num_integers_per_doc
1065     ->ArgPair(1000000, 5);
1066 
BM_JoinQueryQualifiedId(benchmark::State & state)1067 void BM_JoinQueryQualifiedId(benchmark::State& state) {
1068   // Initialize the filesystem
1069   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
1070   Filesystem filesystem;
1071   DestructibleDirectory ddir(filesystem, test_dir);
1072 
1073   // Create the schema.
1074   SchemaProto schema =
1075       SchemaBuilder()
1076           .AddType(SchemaTypeConfigBuilder()
1077                        .SetType("Person")
1078                        .AddProperty(PropertyConfigBuilder()
1079                                         .SetName("firstName")
1080                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1081                                                            TOKENIZER_PLAIN)
1082                                         .SetCardinality(CARDINALITY_OPTIONAL))
1083                        .AddProperty(PropertyConfigBuilder()
1084                                         .SetName("lastName")
1085                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1086                                                            TOKENIZER_PLAIN)
1087                                         .SetCardinality(CARDINALITY_OPTIONAL))
1088                        .AddProperty(PropertyConfigBuilder()
1089                                         .SetName("emailAddress")
1090                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1091                                                            TOKENIZER_PLAIN)
1092                                         .SetCardinality(CARDINALITY_OPTIONAL)))
1093           .AddType(SchemaTypeConfigBuilder()
1094                        .SetType("Email")
1095                        .AddProperty(PropertyConfigBuilder()
1096                                         .SetName("subject")
1097                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1098                                                            TOKENIZER_PLAIN)
1099                                         .SetCardinality(CARDINALITY_OPTIONAL))
1100                        .AddProperty(PropertyConfigBuilder()
1101                                         .SetName("body")
1102                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1103                                                            TOKENIZER_PLAIN)
1104                                         .SetCardinality(CARDINALITY_OPTIONAL))
1105                        .AddProperty(PropertyConfigBuilder()
1106                                         .SetName("personQualifiedId")
1107                                         .SetDataTypeJoinableString(
1108                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1109                                         .SetCardinality(CARDINALITY_OPTIONAL)))
1110           .Build();
1111 
1112   // Create the index.
1113   IcingSearchEngineOptions options;
1114   options.set_base_dir(test_dir);
1115   options.set_index_merge_size(kIcingFullIndexSize);
1116   options.set_document_store_namespace_id_fingerprint(true);
1117   options.set_enable_qualified_id_join_index_v3(true);
1118   options.set_enable_delete_propagation_from(false);
1119   std::unique_ptr<IcingSearchEngine> icing =
1120       std::make_unique<IcingSearchEngine>(options);
1121 
1122   ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
1123   ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
1124 
1125   // Create Person documents (parent)
1126   static constexpr int kNumPersonDocuments = 1000;
1127   for (int i = 0; i < kNumPersonDocuments; ++i) {
1128     std::string person_id = std::to_string(i);
1129     DocumentProto person =
1130         DocumentBuilder()
1131             .SetKey("pkg$db/namespace", "person" + person_id)
1132             .SetSchema("Person")
1133             .AddStringProperty("firstName", "first" + person_id)
1134             .AddStringProperty("lastName", "last" + person_id)
1135             .AddStringProperty("emailAddress",
1136                                "person" + person_id + "@gmail.com")
1137             .Build();
1138     ASSERT_THAT(icing->Put(std::move(person)).status(), ProtoIsOk());
1139   }
1140 
1141   // Create Email documents (child)
1142   static constexpr int kNumEmailDocuments = 1000;
1143   std::uniform_int_distribution<> distrib(0, kNumPersonDocuments - 1);
1144   std::default_random_engine e(/*seed=*/12345);
1145   for (int i = 0; i < kNumEmailDocuments; ++i) {
1146     std::string email_id = std::to_string(i);
1147     std::string person_id = std::to_string(distrib(e));
1148     DocumentProto email =
1149         DocumentBuilder()
1150             .SetKey("namespace", "email" + email_id)
1151             .SetSchema("Email")
1152             .AddStringProperty("subject", "test subject " + email_id)
1153             .AddStringProperty("body", "message body")
1154             .AddStringProperty("personQualifiedId",
1155                                "pkg$db/namespace#person" + person_id)
1156             .Build();
1157     ASSERT_THAT(icing->Put(std::move(email)).status(), ProtoIsOk());
1158   }
1159 
1160   // Parent SearchSpec
1161   SearchSpecProto search_spec;
1162   search_spec.set_term_match_type(TermMatchType::PREFIX);
1163   search_spec.set_query("firstName:first");
1164 
1165   // JoinSpec
1166   JoinSpecProto* join_spec = search_spec.mutable_join_spec();
1167   join_spec->set_parent_property_expression(
1168       std::string(JoinProcessor::kQualifiedIdExpr));
1169   join_spec->set_child_property_expression("personQualifiedId");
1170   join_spec->set_aggregation_scoring_strategy(
1171       JoinSpecProto::AggregationScoringStrategy::MAX);
1172   JoinSpecProto::NestedSpecProto* nested_spec =
1173       join_spec->mutable_nested_spec();
1174   SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
1175   nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
1176   nested_search_spec->set_query("subject:test");
1177   *nested_spec->mutable_scoring_spec() = ScoringSpecProto::default_instance();
1178   *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
1179 
1180   static constexpr int kNumPerPage = 10;
1181   ResultSpecProto result_spec;
1182   result_spec.set_num_per_page(kNumPerPage);
1183   result_spec.set_max_joined_children_per_parent_to_return(
1184       std::numeric_limits<int32_t>::max());
1185 
1186   ScoringSpecProto score_spec = ScoringSpecProto::default_instance();
1187 
1188   const auto child_count_reduce_func =
1189       [](int child_count, const SearchResultProto::ResultProto& result) -> int {
1190     return child_count + result.joined_results_size();
1191   };
1192   for (auto s : state) {
1193     int total_parent_count = 0;
1194     int total_child_count = 0;
1195     SearchResultProto results =
1196         icing->Search(search_spec, score_spec, result_spec);
1197     total_parent_count += results.results_size();
1198     total_child_count +=
1199         std::reduce(results.results().begin(), results.results().end(), 0,
1200                     child_count_reduce_func);
1201 
1202     ASSERT_THAT(total_parent_count, Eq(kNumPerPage));
1203     ASSERT_THAT(total_child_count, ::testing::Ge(0));
1204   }
1205 }
1206 BENCHMARK(BM_JoinQueryQualifiedId);
1207 
BM_PersistToDisk(benchmark::State & state)1208 void BM_PersistToDisk(benchmark::State& state) {
1209   // Initialize the filesystem
1210   std::string test_dir = GetTestTempDir() + "/icing/benchmark";
1211   Filesystem filesystem;
1212   DestructibleDirectory ddir(filesystem, test_dir);
1213 
1214   // Create the schema.
1215   std::default_random_engine random;
1216   int num_types = kAvgNumNamespaces * kAvgNumTypes;
1217   ExactStringPropertyGenerator property_generator;
1218   SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
1219       /*num_properties=*/state.range(1), &property_generator);
1220   SchemaProto schema = schema_generator.GenerateSchema(num_types);
1221   EvenDistributionTypeSelector type_selector(schema);
1222 
1223   // Generate documents.
1224   int num_docs = state.range(0);
1225   std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
1226   const std::vector<DocumentProto> random_docs =
1227       GenerateRandomDocuments(&type_selector, num_docs, language);
1228 
1229   for (auto _ : state) {
1230     state.PauseTiming();
1231     // Create the index.
1232     IcingSearchEngineOptions options;
1233     options.set_base_dir(test_dir);
1234     options.set_index_merge_size(kIcingFullIndexSize);
1235     options.set_use_persistent_hash_map(true);
1236     std::unique_ptr<IcingSearchEngine> icing =
1237         std::make_unique<IcingSearchEngine>(options);
1238 
1239     ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
1240     ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
1241 
1242     for (const DocumentProto& doc : random_docs) {
1243       ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
1244     }
1245 
1246     state.ResumeTiming();
1247 
1248     ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
1249 
1250     state.PauseTiming();
1251     icing.reset();
1252     ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
1253     state.ResumeTiming();
1254   }
1255 }
1256 BENCHMARK(BM_PersistToDisk)
1257     // Arguments: num_indexed_documents, num_sections
1258     ->ArgPair(1024, 5);
1259 
1260 }  // namespace
1261 
1262 }  // namespace lib
1263 }  // namespace icing
1264