1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <unistd.h>
16
17 #include <fstream>
18 #include <iostream>
19 #include <limits>
20 #include <memory>
21 #include <numeric>
22 #include <ostream>
23 #include <random>
24 #include <sstream>
25 #include <stdexcept>
26 #include <string>
27 #include <string_view>
28 #include <unordered_set>
29 #include <vector>
30
31 #include "testing/base/public/benchmark.h"
32 #include "gmock/gmock.h"
33 #include "gtest/gtest.h"
34 #include "icing/document-builder.h"
35 #include "icing/file/filesystem.h"
36 #include "icing/icing-search-engine.h"
37 #include "icing/join/join-processor.h"
38 #include "icing/proto/document.pb.h"
39 #include "icing/proto/initialize.pb.h"
40 #include "icing/proto/persist.pb.h"
41 #include "icing/proto/reset.pb.h"
42 #include "icing/proto/schema.pb.h"
43 #include "icing/proto/scoring.pb.h"
44 #include "icing/proto/search.pb.h"
45 #include "icing/proto/status.pb.h"
46 #include "icing/proto/term.pb.h"
47 #include "icing/query/query-features.h"
48 #include "icing/schema-builder.h"
49 #include "icing/testing/common-matchers.h"
50 #include "icing/testing/document-generator.h"
51 #include "icing/testing/numeric/number-generator.h"
52 #include "icing/testing/numeric/uniform-distribution-integer-generator.h"
53 #include "icing/testing/random-string.h"
54 #include "icing/testing/schema-generator.h"
55 #include "icing/testing/tmp-directory.h"
56
57 // Run on a Linux workstation:
58 // $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
59 // //icing:icing-search-engine_benchmark
60 //
61 // $ blaze-bin/icing/icing-search-engine_benchmark
62 // --benchmark_filter=all --benchmark_memory_usage
63 //
64 // Run on an Android device:
65 // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
66 // --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
67 // //icing:icing-search-engine_benchmark
68 //
69 // $ adb push blaze-bin/icing/icing-search-engine_benchmark
70 // /data/local/tmp/
71 //
72 // $ adb shell /data/local/tmp/icing-search-engine_benchmark
73 // --benchmark_filter=all
74
75 namespace icing {
76 namespace lib {
77
78 namespace {
79
80 using ::testing::Eq;
81 using ::testing::HasSubstr;
82
83 // Icing GMSCore has, on average, 17 corpora on a device and 30 corpora at the
84 // 95th pct. Most clients use a single type. This is a function of Icing's
85 // constrained type offering. Assume that each package will use 3 types on
86 // average.
87 constexpr int kAvgNumNamespaces = 10;
88 constexpr int kAvgNumTypes = 3;
89
90 // ASSUME: Properties will have at most ten properties. Types will be created
91 // with [1, 10] properties.
92 constexpr int kMaxNumProperties = 10;
93
94 // Based on logs from Icing GMSCore.
95 constexpr int kAvgDocumentSize = 300;
96
97 // ASSUME: ~75% of the document's size comes from it's content.
98 constexpr float kContentSizePct = 0.7;
99
100 constexpr int kLanguageSize = 1000;
101
102 // Lite Index size required to fit 128k docs, each doc requires ~64 bytes of
103 // space in the lite index.
104 constexpr int kIcingFullIndexSize = 1024 * 1024 * 8;
105
106 // Query params
107 constexpr int kNumPerPage = 10;
108 constexpr int kNumToSnippet = 10000;
109 constexpr int kMatchesPerProperty = 1;
110
CreateNamespaces(int num_namespaces)111 std::vector<std::string> CreateNamespaces(int num_namespaces) {
112 std::vector<std::string> namespaces;
113 while (--num_namespaces >= 0) {
114 namespaces.push_back("comgooglepackage" + std::to_string(num_namespaces));
115 }
116 return namespaces;
117 }
118
CreateSearchSpec(const std::string & query,const std::vector<std::string> & namespaces,TermMatchType::Code match_type)119 SearchSpecProto CreateSearchSpec(const std::string& query,
120 const std::vector<std::string>& namespaces,
121 TermMatchType::Code match_type) {
122 SearchSpecProto search_spec;
123 search_spec.set_query(query);
124 for (const std::string& name_space : namespaces) {
125 search_spec.add_namespace_filters(name_space);
126 }
127 search_spec.set_term_match_type(match_type);
128 return search_spec;
129 }
130
CreateResultSpec(int num_per_page,int num_to_snippet,int matches_per_property)131 ResultSpecProto CreateResultSpec(int num_per_page, int num_to_snippet,
132 int matches_per_property) {
133 ResultSpecProto result_spec;
134 result_spec.set_num_per_page(num_per_page);
135 result_spec.mutable_snippet_spec()->set_num_to_snippet(num_to_snippet);
136 result_spec.mutable_snippet_spec()->set_num_matches_per_property(
137 matches_per_property);
138 return result_spec;
139 }
140
CreateScoringSpec(ScoringSpecProto::RankingStrategy::Code ranking_strategy)141 ScoringSpecProto CreateScoringSpec(
142 ScoringSpecProto::RankingStrategy::Code ranking_strategy) {
143 ScoringSpecProto scoring_spec;
144 scoring_spec.set_rank_by(ranking_strategy);
145 return scoring_spec;
146 }
147
148 class DestructibleDirectory {
149 public:
DestructibleDirectory(const Filesystem & filesystem,const std::string & dir)150 explicit DestructibleDirectory(const Filesystem& filesystem,
151 const std::string& dir)
152 : filesystem_(filesystem), dir_(dir) {
153 filesystem_.DeleteDirectoryRecursively(dir_.c_str());
154 filesystem_.CreateDirectoryRecursively(dir_.c_str());
155 }
~DestructibleDirectory()156 ~DestructibleDirectory() {
157 filesystem_.DeleteDirectoryRecursively(dir_.c_str());
158 }
159
160 private:
161 Filesystem filesystem_;
162 std::string dir_;
163 };
164
GenerateRandomDocuments(EvenDistributionTypeSelector * type_selector,int num_docs,const std::vector<std::string> & language)165 std::vector<DocumentProto> GenerateRandomDocuments(
166 EvenDistributionTypeSelector* type_selector, int num_docs,
167 const std::vector<std::string>& language) {
168 std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
169 EvenDistributionNamespaceSelector namespace_selector(namespaces);
170
171 std::default_random_engine random;
172 UniformDistributionLanguageTokenGenerator<std::default_random_engine>
173 token_generator(language, &random);
174
175 DocumentGenerator<
176 EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
177 UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
178 generator(&namespace_selector, type_selector, &token_generator,
179 kAvgDocumentSize * kContentSizePct);
180
181 std::vector<DocumentProto> random_docs;
182 random_docs.reserve(num_docs);
183 for (int i = 0; i < num_docs; i++) {
184 random_docs.push_back(generator.generateDoc());
185 }
186 return random_docs;
187 }
188
CreateIntegerGenerator(size_t num_documents)189 std::unique_ptr<NumberGenerator<int64_t>> CreateIntegerGenerator(
190 size_t num_documents) {
191 // Since the collision # follows poisson distribution with lambda =
192 // (num_keys / range), we set the range 10x (lambda = 0.1) to avoid too many
193 // collisions.
194 //
195 // Distribution:
196 // - keys in range being picked for 0 times: 90.5%
197 // - keys in range being picked for 1 time: 9%
198 // - keys in range being picked for 2 times: 0.45%
199 // - keys in range being picked for 3 times: 0.015%
200 //
201 // For example, num_keys = 1M, range = 10M. Then there will be ~904837 unique
202 // keys, 45242 keys being picked twice, 1508 keys being picked thrice ...
203 return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
204 /*seed=*/12345, /*range_lower=*/0,
205 /*range_upper=*/static_cast<int64_t>(num_documents) * 10 - 1);
206 }
207
BM_IndexLatency(benchmark::State & state)208 void BM_IndexLatency(benchmark::State& state) {
209 // Initialize the filesystem
210 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
211 Filesystem filesystem;
212 DestructibleDirectory ddir(filesystem, test_dir);
213
214 // Create the schema.
215 std::default_random_engine random;
216 int num_types = kAvgNumNamespaces * kAvgNumTypes;
217 ExactStringPropertyGenerator property_generator;
218 SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
219 /*num_properties=*/state.range(1), &property_generator);
220 SchemaProto schema = schema_generator.GenerateSchema(num_types);
221 EvenDistributionTypeSelector type_selector(schema);
222
223 // Create the index.
224 IcingSearchEngineOptions options;
225 options.set_base_dir(test_dir);
226 std::unique_ptr<IcingSearchEngine> icing =
227 std::make_unique<IcingSearchEngine>(options);
228
229 int num_docs = state.range(0);
230 std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
231 const std::vector<DocumentProto> random_docs =
232 GenerateRandomDocuments(&type_selector, num_docs, language);
233 for (auto _ : state) {
234 state.PauseTiming();
235 ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
236 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
237 state.ResumeTiming();
238 for (const DocumentProto& doc : random_docs) {
239 ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
240 }
241 }
242 }
243 BENCHMARK(BM_IndexLatency)
244 // Arguments: num_indexed_documents, num_sections
245 ->ArgPair(1000000, 5);
246
BM_QueryLatency(benchmark::State & state)247 void BM_QueryLatency(benchmark::State& state) {
248 // Initialize the filesystem
249 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
250 Filesystem filesystem;
251 DestructibleDirectory ddir(filesystem, test_dir);
252
253 // Create the schema.
254 std::default_random_engine random;
255 int num_types = kAvgNumNamespaces * kAvgNumTypes;
256 ExactStringPropertyGenerator property_generator;
257 SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
258 /*num_properties=*/state.range(1), &property_generator);
259 SchemaProto schema = schema_generator.GenerateSchema(num_types);
260 EvenDistributionTypeSelector type_selector(schema);
261
262 // Create the index.
263 IcingSearchEngineOptions options;
264 options.set_base_dir(test_dir);
265 options.set_index_merge_size(kIcingFullIndexSize);
266 std::unique_ptr<IcingSearchEngine> icing =
267 std::make_unique<IcingSearchEngine>(options);
268
269 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
270 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
271
272 int num_docs = state.range(0);
273 std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
274 const std::vector<DocumentProto> random_docs =
275 GenerateRandomDocuments(&type_selector, num_docs, language);
276 for (const DocumentProto& doc : random_docs) {
277 ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
278 }
279
280 SearchSpecProto search_spec = CreateSearchSpec(
281 language.at(0), std::vector<std::string>(), TermMatchType::PREFIX);
282 ResultSpecProto result_spec = CreateResultSpec(1, 1000000, 1000000);
283 ScoringSpecProto scoring_spec =
284 CreateScoringSpec(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
285 for (auto _ : state) {
286 SearchResultProto results = icing->Search(
287 search_spec, ScoringSpecProto::default_instance(), result_spec);
288 }
289 }
290 BENCHMARK(BM_QueryLatency)
291 // Arguments: num_indexed_documents, num_sections
292 ->ArgPair(1000000, 2);
293
BM_IndexThroughput(benchmark::State & state)294 void BM_IndexThroughput(benchmark::State& state) {
295 // Initialize the filesystem
296 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
297 Filesystem filesystem;
298 DestructibleDirectory ddir(filesystem, test_dir);
299
300 // Create the schema.
301 std::default_random_engine random;
302 int num_types = kAvgNumNamespaces * kAvgNumTypes;
303 ExactStringPropertyGenerator property_generator;
304 SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
305 /*num_properties=*/state.range(1), &property_generator);
306 SchemaProto schema = schema_generator.GenerateSchema(num_types);
307 EvenDistributionTypeSelector type_selector(schema);
308
309 // Create the index.
310 IcingSearchEngineOptions options;
311 options.set_base_dir(test_dir);
312 options.set_index_merge_size(kIcingFullIndexSize);
313 std::unique_ptr<IcingSearchEngine> icing =
314 std::make_unique<IcingSearchEngine>(options);
315
316 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
317 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
318
319 int num_docs = state.range(0);
320 std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
321 const std::vector<DocumentProto> random_docs =
322 GenerateRandomDocuments(&type_selector, num_docs, language);
323 for (auto s : state) {
324 for (const DocumentProto& doc : random_docs) {
325 ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
326 }
327 }
328 state.SetItemsProcessed(state.iterations() * num_docs);
329 }
330 BENCHMARK(BM_IndexThroughput)
331 // Arguments: num_indexed_documents, num_sections
332 ->ArgPair(1, 1)
333 ->ArgPair(2, 1)
334 ->ArgPair(8, 1)
335 ->ArgPair(32, 1)
336 ->ArgPair(128, 1)
337 ->ArgPair(1 << 10, 1)
338 ->ArgPair(1 << 13, 1)
339 ->ArgPair(1 << 15, 1)
340 ->ArgPair(1 << 17, 1)
341 ->ArgPair(1, 5)
342 ->ArgPair(2, 5)
343 ->ArgPair(8, 5)
344 ->ArgPair(32, 5)
345 ->ArgPair(128, 5)
346 ->ArgPair(1 << 10, 5)
347 ->ArgPair(1 << 13, 5)
348 ->ArgPair(1 << 15, 5)
349 ->ArgPair(1 << 17, 5)
350 ->ArgPair(1, 10)
351 ->ArgPair(2, 10)
352 ->ArgPair(8, 10)
353 ->ArgPair(32, 10)
354 ->ArgPair(128, 10)
355 ->ArgPair(1 << 10, 10)
356 ->ArgPair(1 << 13, 10)
357 ->ArgPair(1 << 15, 10)
358 ->ArgPair(1 << 17, 10);
359
BM_MutlipleIndices(benchmark::State & state)360 void BM_MutlipleIndices(benchmark::State& state) {
361 // Initialize the filesystem
362 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
363 Filesystem filesystem;
364 DestructibleDirectory ddir(filesystem, test_dir);
365
366 // Create the schema.
367 std::default_random_engine random;
368 int num_types = kAvgNumNamespaces * kAvgNumTypes;
369 ExactStringPropertyGenerator property_generator;
370 RandomSchemaGenerator<std::default_random_engine,
371 ExactStringPropertyGenerator>
372 schema_generator(&random, &property_generator);
373 SchemaProto schema =
374 schema_generator.GenerateSchema(num_types, kMaxNumProperties);
375 EvenDistributionTypeSelector type_selector(schema);
376
377 // Create the indices.
378 std::vector<std::unique_ptr<IcingSearchEngine>> icings;
379 int num_indices = state.range(0);
380 for (int i = 0; i < num_indices; ++i) {
381 IcingSearchEngineOptions options;
382 std::string base_dir = test_dir + "/" + std::to_string(i);
383 options.set_base_dir(base_dir);
384 options.set_index_merge_size(kIcingFullIndexSize / num_indices);
385 auto icing = std::make_unique<IcingSearchEngine>(options);
386
387 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
388 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
389 icings.push_back(std::move(icing));
390 }
391
392 // Setup namespace info and language
393 std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
394 EvenDistributionNamespaceSelector namespace_selector(namespaces);
395
396 std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
397 UniformDistributionLanguageTokenGenerator<std::default_random_engine>
398 token_generator(language, &random);
399
400 // Fill the index.
401 DocumentGenerator<
402 EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
403 UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
404 generator(&namespace_selector, &type_selector, &token_generator,
405 kAvgDocumentSize * kContentSizePct);
406 for (int i = 0; i < state.range(1); ++i) {
407 DocumentProto doc = generator.generateDoc();
408 PutResultProto put_result;
409 if (icings.empty()) {
410 ASSERT_THAT(put_result.status().code(), Eq(StatusProto::UNKNOWN));
411 continue;
412 }
413 ASSERT_THAT(icings.at(i % icings.size())->Put(doc).status(), ProtoIsOk());
414 }
415
416 // QUERY!
417 // Every document has its own namespace as a token. This query that should
418 // match 1/kAvgNumNamespace% of all documents.
419 const std::string& name_space = namespaces.at(0);
420 SearchSpecProto search_spec = CreateSearchSpec(
421 /*query=*/name_space, {name_space}, TermMatchType::EXACT_ONLY);
422 ResultSpecProto result_spec =
423 CreateResultSpec(kNumPerPage, kNumToSnippet, kMatchesPerProperty);
424 ScoringSpecProto scoring_spec =
425 CreateScoringSpec(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
426
427 int num_results = 0;
428 for (auto _ : state) {
429 num_results = 0;
430 SearchResultProto result;
431 if (icings.empty()) {
432 ASSERT_THAT(result.status().code(), Eq(StatusProto::UNKNOWN));
433 continue;
434 }
435 result = icings.at(0)->Search(search_spec, scoring_spec, result_spec);
436 ASSERT_THAT(result.status(), ProtoIsOk());
437 while (!result.results().empty()) {
438 num_results += result.results_size();
439 if (!icings.empty()) {
440 result = icings.at(0)->GetNextPage(result.next_page_token());
441 }
442 ASSERT_THAT(result.status(), ProtoIsOk());
443 }
444 }
445
446 // Measure size.
447 int64_t disk_usage = filesystem.GetDiskUsage(test_dir.c_str());
448 std::cout << "Num results:\t" << num_results << "\t\tDisk Use:\t"
449 << disk_usage / 1024.0 << std::endl;
450 }
451 BENCHMARK(BM_MutlipleIndices)
452 // First argument: num_indices, Second argument: num_total_documents
453 // So each index will contain (num_total_documents / num_indices) documents.
454 ->ArgPair(0, 0)
455 ->ArgPair(0, 1024)
456 ->ArgPair(0, 131072)
457 ->ArgPair(1, 0)
458 ->ArgPair(1, 1)
459 ->ArgPair(1, 2)
460 ->ArgPair(1, 8)
461 ->ArgPair(1, 32)
462 ->ArgPair(1, 128)
463 ->ArgPair(1, 1024)
464 ->ArgPair(1, 8192)
465 ->ArgPair(1, 32768)
466 ->ArgPair(1, 131072)
467 ->ArgPair(2, 0)
468 ->ArgPair(2, 1)
469 ->ArgPair(2, 2)
470 ->ArgPair(2, 8)
471 ->ArgPair(2, 32)
472 ->ArgPair(2, 128)
473 ->ArgPair(2, 1024)
474 ->ArgPair(2, 8192)
475 ->ArgPair(2, 32768)
476 ->ArgPair(2, 131072)
477 ->ArgPair(10, 0)
478 ->ArgPair(10, 1)
479 ->ArgPair(10, 2)
480 ->ArgPair(10, 8)
481 ->ArgPair(10, 32)
482 ->ArgPair(10, 128)
483 ->ArgPair(10, 1024)
484 ->ArgPair(10, 8192)
485 ->ArgPair(10, 32768)
486 ->ArgPair(10, 131072);
487
BM_SearchNoStackOverflow(benchmark::State & state)488 void BM_SearchNoStackOverflow(benchmark::State& state) {
489 // Initialize the filesystem
490 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
491 Filesystem filesystem;
492 DestructibleDirectory ddir(filesystem, test_dir);
493
494 // Create the schema.
495 SchemaProto schema =
496 SchemaBuilder()
497 .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
498 PropertyConfigBuilder()
499 .SetName("body")
500 .SetDataTypeString(TermMatchType::PREFIX,
501 StringIndexingConfig::TokenizerType::PLAIN)
502 .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
503 .Build();
504
505 // Create the index.
506 IcingSearchEngineOptions options;
507 options.set_base_dir(test_dir);
508 options.set_index_merge_size(kIcingFullIndexSize);
509 std::unique_ptr<IcingSearchEngine> icing =
510 std::make_unique<IcingSearchEngine>(options);
511
512 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
513 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
514
515 // Create a document that has the term "foo"
516 DocumentProto base_document = DocumentBuilder()
517 .SetSchema("Message")
518 .SetNamespace("namespace")
519 .AddStringProperty("body", "foo")
520 .Build();
521
522 // Insert a lot of documents with the term "foo"
523 int64_t num_docs = state.range(0);
524 for (int64_t i = 0; i < num_docs; ++i) {
525 DocumentProto document =
526 DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
527 ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
528 }
529
530 // Do a query and exclude documents with the term "foo". The way this is
531 // currently implemented is that we'll iterate over all the documents in the
532 // index, then apply the exclusion check. Since all our documents have "foo",
533 // we'll consider it a "miss". Previously with recursion, we would have
534 // recursed until we got a success, which would never happen causing us to
535 // recurse through all the documents and trigger a stack overflow. With
536 // the iterative implementation, we should avoid this.
537 SearchSpecProto search_spec;
538 search_spec.set_query("-foo");
539 search_spec.set_term_match_type(TermMatchType::PREFIX);
540
541 ResultSpecProto result_spec;
542 ScoringSpecProto scoring_spec;
543 for (auto s : state) {
544 icing->Search(search_spec, scoring_spec, result_spec);
545 }
546 }
547 // For other reasons, we hit a limit when inserting the ~350,000th document. So
548 // cap the limit to 1 << 18.
549 BENCHMARK(BM_SearchNoStackOverflow)
550 ->Range(/*start=*/1 << 10, /*limit=*/1 << 18);
551
552 // Added for b/184373205. Ensure that we can repeatedly put documents even if
553 // the underlying mmapped areas grow past a few page sizes.
BM_RepeatedPut(benchmark::State & state)554 void BM_RepeatedPut(benchmark::State& state) {
555 // Initialize the filesystem
556 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
557 Filesystem filesystem;
558 DestructibleDirectory ddir(filesystem, test_dir);
559
560 // Create the schema.
561 SchemaProto schema =
562 SchemaBuilder()
563 .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
564 PropertyConfigBuilder()
565 .SetName("body")
566 .SetDataTypeString(TermMatchType::PREFIX,
567 StringIndexingConfig::TokenizerType::PLAIN)
568 .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
569 .Build();
570
571 // Create the index.
572 IcingSearchEngineOptions options;
573 options.set_base_dir(test_dir);
574 options.set_index_merge_size(kIcingFullIndexSize);
575 std::unique_ptr<IcingSearchEngine> icing =
576 std::make_unique<IcingSearchEngine>(options);
577
578 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
579 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
580
581 // Create a document that has the term "foo"
582 DocumentProto base_document = DocumentBuilder()
583 .SetSchema("Message")
584 .SetNamespace("namespace")
585 .AddStringProperty("body", "foo")
586 .Build();
587
588 // Insert a lot of documents with the term "foo"
589 int64_t num_docs = state.range(0);
590 for (auto s : state) {
591 for (int64_t i = 0; i < num_docs; ++i) {
592 DocumentProto document =
593 DocumentBuilder(base_document).SetUri("uri").Build();
594 ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
595 }
596 }
597 }
598 // For other reasons, we hit a limit when inserting the ~350,000th document. So
599 // cap the limit to 1 << 18.
600 BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
601
602 // This is different from BM_RepeatedPut since we're just trying to benchmark
603 // one Put call, not thousands of them at once.
BM_Put(benchmark::State & state)604 void BM_Put(benchmark::State& state) {
605 // Initialize the filesystem
606 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
607 Filesystem filesystem;
608 DestructibleDirectory ddir(filesystem, test_dir);
609
610 // Create the schema.
611 SchemaProto schema =
612 SchemaBuilder()
613 .AddType(SchemaTypeConfigBuilder().SetType("Message"))
614 .Build();
615
616 // Create the index.
617 IcingSearchEngineOptions options;
618 options.set_base_dir(test_dir);
619 options.set_index_merge_size(kIcingFullIndexSize);
620 std::unique_ptr<IcingSearchEngine> icing =
621 std::make_unique<IcingSearchEngine>(options);
622
623 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
624 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
625
626 // Create a document
627 DocumentProto document = DocumentBuilder()
628 .SetSchema("Message")
629 .SetNamespace("namespace")
630 .SetUri("uri")
631 .Build();
632
633 for (auto s : state) {
634 benchmark::DoNotOptimize(icing->Put(document));
635 }
636 }
637 BENCHMARK(BM_Put);
638
BM_Get(benchmark::State & state)639 void BM_Get(benchmark::State& state) {
640 // Initialize the filesystem
641 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
642 Filesystem filesystem;
643 DestructibleDirectory ddir(filesystem, test_dir);
644
645 // Create the schema.
646 SchemaProto schema =
647 SchemaBuilder()
648 .AddType(SchemaTypeConfigBuilder().SetType("Message"))
649 .Build();
650
651 // Create the index.
652 IcingSearchEngineOptions options;
653 options.set_base_dir(test_dir);
654 options.set_index_merge_size(kIcingFullIndexSize);
655 std::unique_ptr<IcingSearchEngine> icing =
656 std::make_unique<IcingSearchEngine>(options);
657
658 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
659 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
660
661 // Create a document
662 DocumentProto document = DocumentBuilder()
663 .SetSchema("Message")
664 .SetNamespace("namespace")
665 .SetUri("uri")
666 .Build();
667
668 ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
669 for (auto s : state) {
670 benchmark::DoNotOptimize(
671 icing->Get("namespace", "uri", GetResultSpecProto::default_instance()));
672 }
673 }
674 BENCHMARK(BM_Get);
675
BM_Delete(benchmark::State & state)676 void BM_Delete(benchmark::State& state) {
677 // Initialize the filesystem
678 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
679 Filesystem filesystem;
680 DestructibleDirectory ddir(filesystem, test_dir);
681
682 // Create the schema.
683 SchemaProto schema =
684 SchemaBuilder()
685 .AddType(SchemaTypeConfigBuilder().SetType("Message"))
686 .Build();
687
688 // Create the index.
689 IcingSearchEngineOptions options;
690 options.set_base_dir(test_dir);
691 options.set_index_merge_size(kIcingFullIndexSize);
692 std::unique_ptr<IcingSearchEngine> icing =
693 std::make_unique<IcingSearchEngine>(options);
694
695 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
696 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
697
698 // Create a document
699 DocumentProto document = DocumentBuilder()
700 .SetSchema("Message")
701 .SetNamespace("namespace")
702 .SetUri("uri")
703 .Build();
704
705 ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
706 for (auto s : state) {
707 state.PauseTiming();
708 icing->Put(document);
709 state.ResumeTiming();
710
711 benchmark::DoNotOptimize(icing->Delete("namespace", "uri"));
712 }
713 }
714 BENCHMARK(BM_Delete);
715
BM_PutMaxAllowedDocuments(benchmark::State & state)716 void BM_PutMaxAllowedDocuments(benchmark::State& state) {
717 // Initialize the filesystem
718 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
719 Filesystem filesystem;
720 DestructibleDirectory ddir(filesystem, test_dir);
721
722 // Create the schema.
723 SchemaProto schema =
724 SchemaBuilder()
725 .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
726 PropertyConfigBuilder()
727 .SetName("body")
728 .SetDataTypeString(TermMatchType::PREFIX,
729 StringIndexingConfig::TokenizerType::PLAIN)
730 .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
731 .Build();
732
733 // Create the index.
734 IcingSearchEngineOptions options;
735 options.set_base_dir(test_dir);
736 options.set_index_merge_size(kIcingFullIndexSize);
737 std::unique_ptr<IcingSearchEngine> icing =
738 std::make_unique<IcingSearchEngine>(options);
739
740 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
741 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
742
743 // Create a document that has the term "foo"
744 DocumentProto base_document = DocumentBuilder()
745 .SetSchema("Message")
746 .SetNamespace("namespace")
747 .AddStringProperty("body", "foo")
748 .Build();
749
750 // Insert a lot of documents with the term "foo"
751 for (auto s : state) {
752 for (int64_t i = 0; i <= kMaxDocumentId; ++i) {
753 DocumentProto document =
754 DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
755 EXPECT_THAT(icing->Put(document).status(), ProtoIsOk());
756 }
757 }
758
759 DocumentProto document =
760 DocumentBuilder(base_document).SetUri("out_of_space_uri").Build();
761 PutResultProto put_result_proto = icing->Put(document);
762 EXPECT_THAT(put_result_proto.status(),
763 ProtoStatusIs(StatusProto::OUT_OF_SPACE));
764 EXPECT_THAT(put_result_proto.status().message(),
765 HasSubstr("Exceeded maximum number of documents"));
766 }
767 BENCHMARK(BM_PutMaxAllowedDocuments);
768
BM_QueryWithSnippet(benchmark::State & state)769 void BM_QueryWithSnippet(benchmark::State& state) {
770 // Initialize the filesystem
771 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
772 Filesystem filesystem;
773 DestructibleDirectory ddir(filesystem, test_dir);
774
775 // Create the schema.
776 SchemaProto schema =
777 SchemaBuilder()
778 .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
779 PropertyConfigBuilder()
780 .SetName("body")
781 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
782 .SetCardinality(CARDINALITY_OPTIONAL)))
783 .Build();
784
785 // Create the index.
786 IcingSearchEngineOptions options;
787 options.set_base_dir(test_dir);
788 options.set_index_merge_size(kIcingFullIndexSize);
789 std::unique_ptr<IcingSearchEngine> icing =
790 std::make_unique<IcingSearchEngine>(options);
791
792 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
793 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
794
795 std::string body = "message body";
796 for (int i = 0; i < 100; i++) {
797 body = body +
798 " invent invention inventory invest investigate investigation "
799 "investigator investment nvestor invisible invitation invite "
800 "involve involved involvement IraqiI rish island";
801 }
802 for (int i = 0; i < 50; i++) {
803 DocumentProto document = DocumentBuilder()
804 .SetKey("namespace", "uri" + std::to_string(i))
805 .SetSchema("Message")
806 .AddStringProperty("body", body)
807 .Build();
808 ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
809 }
810
811 SearchSpecProto search_spec;
812 search_spec.set_term_match_type(TermMatchType::PREFIX);
813 search_spec.set_query("i");
814
815 ResultSpecProto result_spec;
816 result_spec.set_num_per_page(10000);
817 result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
818 result_spec.mutable_snippet_spec()->set_num_matches_per_property(10000);
819 result_spec.mutable_snippet_spec()->set_num_to_snippet(10000);
820
821 for (auto s : state) {
822 SearchResultProto results = icing->Search(
823 search_spec, ScoringSpecProto::default_instance(), result_spec);
824 }
825 }
826 BENCHMARK(BM_QueryWithSnippet);
827
BM_NumericIndexing(benchmark::State & state)828 void BM_NumericIndexing(benchmark::State& state) {
829 int num_documents = state.range(0);
830 int num_integers_per_doc = state.range(1);
831
832 // Initialize the filesystem
833 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
834 Filesystem filesystem;
835
836 // Create the schema.
837 SchemaProto schema =
838 SchemaBuilder()
839 .AddType(SchemaTypeConfigBuilder()
840 .SetType("Message")
841 .AddProperty(PropertyConfigBuilder()
842 .SetName("body")
843 .SetDataTypeString(TERM_MATCH_PREFIX,
844 TOKENIZER_PLAIN)
845 .SetCardinality(CARDINALITY_OPTIONAL))
846 .AddProperty(PropertyConfigBuilder()
847 .SetName("integer")
848 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
849 .SetCardinality(CARDINALITY_REPEATED)))
850 .Build();
851
852 std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
853 CreateIntegerGenerator(num_documents);
854 std::vector<DocumentProto> documents;
855 documents.reserve(num_documents);
856 for (int i = 0; i < num_documents; ++i) {
857 std::vector<int64_t> integers;
858 integers.reserve(num_integers_per_doc);
859 for (int j = 0; j < num_integers_per_doc; ++j) {
860 integers.push_back(integer_generator->Generate());
861 }
862
863 DocumentProto document =
864 DocumentBuilder()
865 .SetKey("namespace", "uri" + std::to_string(i))
866 .SetSchema("Message")
867 .AddStringProperty("body", "body hello world")
868 .AddInt64Property("integer", integers.begin(), integers.end())
869 .Build();
870 documents.push_back(std::move(document));
871 }
872
873 for (auto s : state) {
874 state.PauseTiming();
875 // Create the index.
876 IcingSearchEngineOptions options;
877 options.set_base_dir(test_dir);
878 options.set_index_merge_size(kIcingFullIndexSize);
879 std::unique_ptr<IcingSearchEngine> icing =
880 std::make_unique<IcingSearchEngine>(options);
881
882 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
883 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
884 state.ResumeTiming();
885
886 for (const DocumentProto& document : documents) {
887 ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
888 }
889
890 state.PauseTiming();
891 icing.reset();
892 ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
893 state.ResumeTiming();
894 }
895 }
896
897 BENCHMARK(BM_NumericIndexing)
898 // Arguments: num_documents, num_integers_per_doc
899 ->ArgPair(1000000, 5);
900
BM_NumericExactQuery(benchmark::State & state)901 void BM_NumericExactQuery(benchmark::State& state) {
902 int num_documents = state.range(0);
903 int num_integers_per_doc = state.range(1);
904
905 // Initialize the filesystem
906 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
907 Filesystem filesystem;
908 DestructibleDirectory ddir(filesystem, test_dir);
909
910 // Create the schema.
911 SchemaProto schema =
912 SchemaBuilder()
913 .AddType(SchemaTypeConfigBuilder()
914 .SetType("Message")
915 .AddProperty(PropertyConfigBuilder()
916 .SetName("body")
917 .SetDataTypeString(TERM_MATCH_PREFIX,
918 TOKENIZER_PLAIN)
919 .SetCardinality(CARDINALITY_OPTIONAL))
920 .AddProperty(PropertyConfigBuilder()
921 .SetName("integer")
922 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
923 .SetCardinality(CARDINALITY_REPEATED)))
924 .Build();
925
926 // Create the index.
927 IcingSearchEngineOptions options;
928 options.set_base_dir(test_dir);
929 options.set_index_merge_size(kIcingFullIndexSize);
930 std::unique_ptr<IcingSearchEngine> icing =
931 std::make_unique<IcingSearchEngine>(options);
932
933 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
934 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
935
936 std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
937 CreateIntegerGenerator(num_documents);
938 std::unordered_set<int64_t> chosen_integer_set;
939 for (int i = 0; i < num_documents; ++i) {
940 std::vector<int64_t> integers;
941 integers.reserve(num_integers_per_doc);
942 for (int j = 0; j < num_integers_per_doc; ++j) {
943 int64_t chosen_int = integer_generator->Generate();
944 integers.push_back(chosen_int);
945 chosen_integer_set.insert(chosen_int);
946 }
947
948 DocumentProto document =
949 DocumentBuilder()
950 .SetKey("namespace", "uri" + std::to_string(i))
951 .SetSchema("Message")
952 .AddStringProperty("body", "body hello world")
953 .AddInt64Property("integer", integers.begin(), integers.end())
954 .Build();
955 ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
956 }
957
958 SearchSpecProto search_spec;
959 search_spec.add_enabled_features(std::string(kNumericSearchFeature));
960
961 ScoringSpecProto scoring_spec;
962 scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
963
964 ResultSpecProto result_spec;
965 result_spec.set_num_per_page(1);
966
967 std::vector<int64_t> chosen_integers(chosen_integer_set.begin(),
968 chosen_integer_set.end());
969 std::uniform_int_distribution<> distrib(0, chosen_integers.size() - 1);
970 std::default_random_engine e(/*seed=*/12345);
971 for (auto s : state) {
972 int64_t exact = chosen_integers[distrib(e)];
973 search_spec.set_query("integer == " + std::to_string(exact));
974
975 SearchResultProto results =
976 icing->Search(search_spec, scoring_spec, result_spec);
977 ASSERT_THAT(results.status(), ProtoIsOk());
978 ASSERT_GT(results.results_size(), 0);
979 if (results.next_page_token() != kInvalidNextPageToken) {
980 icing->InvalidateNextPageToken(results.next_page_token());
981 }
982 }
983 }
984 BENCHMARK(BM_NumericExactQuery)
985 // Arguments: num_documents, num_integers_per_doc
986 ->ArgPair(1000000, 5);
987
BM_NumericRangeQueryAll(benchmark::State & state)988 void BM_NumericRangeQueryAll(benchmark::State& state) {
989 int num_documents = state.range(0);
990 int num_integers_per_doc = state.range(1);
991
992 // Initialize the filesystem
993 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
994 Filesystem filesystem;
995 DestructibleDirectory ddir(filesystem, test_dir);
996
997 // Create the schema.
998 SchemaProto schema =
999 SchemaBuilder()
1000 .AddType(SchemaTypeConfigBuilder()
1001 .SetType("Message")
1002 .AddProperty(PropertyConfigBuilder()
1003 .SetName("body")
1004 .SetDataTypeString(TERM_MATCH_PREFIX,
1005 TOKENIZER_PLAIN)
1006 .SetCardinality(CARDINALITY_OPTIONAL))
1007 .AddProperty(PropertyConfigBuilder()
1008 .SetName("integer")
1009 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1010 .SetCardinality(CARDINALITY_REPEATED)))
1011 .Build();
1012
1013 // Create the index.
1014 IcingSearchEngineOptions options;
1015 options.set_base_dir(test_dir);
1016 options.set_index_merge_size(kIcingFullIndexSize);
1017 std::unique_ptr<IcingSearchEngine> icing =
1018 std::make_unique<IcingSearchEngine>(options);
1019
1020 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
1021 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
1022
1023 std::unique_ptr<NumberGenerator<int64_t>> integer_generator =
1024 CreateIntegerGenerator(num_documents);
1025 for (int i = 0; i < num_documents; ++i) {
1026 std::vector<int64_t> integers;
1027 integers.reserve(num_integers_per_doc);
1028 for (int j = 0; j < num_integers_per_doc; ++j) {
1029 integers.push_back(integer_generator->Generate());
1030 }
1031
1032 DocumentProto document =
1033 DocumentBuilder()
1034 .SetKey("namespace", "uri" + std::to_string(i))
1035 .SetSchema("Message")
1036 .AddStringProperty("body", "body hello world")
1037 .AddInt64Property("integer", integers.begin(), integers.end())
1038 .Build();
1039 ASSERT_THAT(icing->Put(std::move(document)).status(), ProtoIsOk());
1040 }
1041
1042 SearchSpecProto search_spec;
1043 search_spec.add_enabled_features(std::string(kNumericSearchFeature));
1044 search_spec.set_query("integer >= " +
1045 std::to_string(std::numeric_limits<int64_t>::min()));
1046
1047 ScoringSpecProto scoring_spec;
1048 scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
1049
1050 ResultSpecProto result_spec;
1051 result_spec.set_num_per_page(1);
1052
1053 for (auto s : state) {
1054 SearchResultProto results =
1055 icing->Search(search_spec, scoring_spec, result_spec);
1056 ASSERT_THAT(results.status(), ProtoIsOk());
1057 ASSERT_GT(results.results_size(), 0);
1058 if (results.next_page_token() != kInvalidNextPageToken) {
1059 icing->InvalidateNextPageToken(results.next_page_token());
1060 }
1061 }
1062 }
1063 BENCHMARK(BM_NumericRangeQueryAll)
1064 // Arguments: num_documents, num_integers_per_doc
1065 ->ArgPair(1000000, 5);
1066
BM_JoinQueryQualifiedId(benchmark::State & state)1067 void BM_JoinQueryQualifiedId(benchmark::State& state) {
1068 // Initialize the filesystem
1069 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
1070 Filesystem filesystem;
1071 DestructibleDirectory ddir(filesystem, test_dir);
1072
1073 // Create the schema.
1074 SchemaProto schema =
1075 SchemaBuilder()
1076 .AddType(SchemaTypeConfigBuilder()
1077 .SetType("Person")
1078 .AddProperty(PropertyConfigBuilder()
1079 .SetName("firstName")
1080 .SetDataTypeString(TERM_MATCH_PREFIX,
1081 TOKENIZER_PLAIN)
1082 .SetCardinality(CARDINALITY_OPTIONAL))
1083 .AddProperty(PropertyConfigBuilder()
1084 .SetName("lastName")
1085 .SetDataTypeString(TERM_MATCH_PREFIX,
1086 TOKENIZER_PLAIN)
1087 .SetCardinality(CARDINALITY_OPTIONAL))
1088 .AddProperty(PropertyConfigBuilder()
1089 .SetName("emailAddress")
1090 .SetDataTypeString(TERM_MATCH_PREFIX,
1091 TOKENIZER_PLAIN)
1092 .SetCardinality(CARDINALITY_OPTIONAL)))
1093 .AddType(SchemaTypeConfigBuilder()
1094 .SetType("Email")
1095 .AddProperty(PropertyConfigBuilder()
1096 .SetName("subject")
1097 .SetDataTypeString(TERM_MATCH_PREFIX,
1098 TOKENIZER_PLAIN)
1099 .SetCardinality(CARDINALITY_OPTIONAL))
1100 .AddProperty(PropertyConfigBuilder()
1101 .SetName("body")
1102 .SetDataTypeString(TERM_MATCH_PREFIX,
1103 TOKENIZER_PLAIN)
1104 .SetCardinality(CARDINALITY_OPTIONAL))
1105 .AddProperty(PropertyConfigBuilder()
1106 .SetName("personQualifiedId")
1107 .SetDataTypeJoinableString(
1108 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1109 .SetCardinality(CARDINALITY_OPTIONAL)))
1110 .Build();
1111
1112 // Create the index.
1113 IcingSearchEngineOptions options;
1114 options.set_base_dir(test_dir);
1115 options.set_index_merge_size(kIcingFullIndexSize);
1116 options.set_document_store_namespace_id_fingerprint(true);
1117 options.set_enable_qualified_id_join_index_v3(true);
1118 options.set_enable_delete_propagation_from(false);
1119 std::unique_ptr<IcingSearchEngine> icing =
1120 std::make_unique<IcingSearchEngine>(options);
1121
1122 ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
1123 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
1124
1125 // Create Person documents (parent)
1126 static constexpr int kNumPersonDocuments = 1000;
1127 for (int i = 0; i < kNumPersonDocuments; ++i) {
1128 std::string person_id = std::to_string(i);
1129 DocumentProto person =
1130 DocumentBuilder()
1131 .SetKey("pkg$db/namespace", "person" + person_id)
1132 .SetSchema("Person")
1133 .AddStringProperty("firstName", "first" + person_id)
1134 .AddStringProperty("lastName", "last" + person_id)
1135 .AddStringProperty("emailAddress",
1136 "person" + person_id + "@gmail.com")
1137 .Build();
1138 ASSERT_THAT(icing->Put(std::move(person)).status(), ProtoIsOk());
1139 }
1140
1141 // Create Email documents (child)
1142 static constexpr int kNumEmailDocuments = 1000;
1143 std::uniform_int_distribution<> distrib(0, kNumPersonDocuments - 1);
1144 std::default_random_engine e(/*seed=*/12345);
1145 for (int i = 0; i < kNumEmailDocuments; ++i) {
1146 std::string email_id = std::to_string(i);
1147 std::string person_id = std::to_string(distrib(e));
1148 DocumentProto email =
1149 DocumentBuilder()
1150 .SetKey("namespace", "email" + email_id)
1151 .SetSchema("Email")
1152 .AddStringProperty("subject", "test subject " + email_id)
1153 .AddStringProperty("body", "message body")
1154 .AddStringProperty("personQualifiedId",
1155 "pkg$db/namespace#person" + person_id)
1156 .Build();
1157 ASSERT_THAT(icing->Put(std::move(email)).status(), ProtoIsOk());
1158 }
1159
1160 // Parent SearchSpec
1161 SearchSpecProto search_spec;
1162 search_spec.set_term_match_type(TermMatchType::PREFIX);
1163 search_spec.set_query("firstName:first");
1164
1165 // JoinSpec
1166 JoinSpecProto* join_spec = search_spec.mutable_join_spec();
1167 join_spec->set_parent_property_expression(
1168 std::string(JoinProcessor::kQualifiedIdExpr));
1169 join_spec->set_child_property_expression("personQualifiedId");
1170 join_spec->set_aggregation_scoring_strategy(
1171 JoinSpecProto::AggregationScoringStrategy::MAX);
1172 JoinSpecProto::NestedSpecProto* nested_spec =
1173 join_spec->mutable_nested_spec();
1174 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
1175 nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
1176 nested_search_spec->set_query("subject:test");
1177 *nested_spec->mutable_scoring_spec() = ScoringSpecProto::default_instance();
1178 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
1179
1180 static constexpr int kNumPerPage = 10;
1181 ResultSpecProto result_spec;
1182 result_spec.set_num_per_page(kNumPerPage);
1183 result_spec.set_max_joined_children_per_parent_to_return(
1184 std::numeric_limits<int32_t>::max());
1185
1186 ScoringSpecProto score_spec = ScoringSpecProto::default_instance();
1187
1188 const auto child_count_reduce_func =
1189 [](int child_count, const SearchResultProto::ResultProto& result) -> int {
1190 return child_count + result.joined_results_size();
1191 };
1192 for (auto s : state) {
1193 int total_parent_count = 0;
1194 int total_child_count = 0;
1195 SearchResultProto results =
1196 icing->Search(search_spec, score_spec, result_spec);
1197 total_parent_count += results.results_size();
1198 total_child_count +=
1199 std::reduce(results.results().begin(), results.results().end(), 0,
1200 child_count_reduce_func);
1201
1202 ASSERT_THAT(total_parent_count, Eq(kNumPerPage));
1203 ASSERT_THAT(total_child_count, ::testing::Ge(0));
1204 }
1205 }
1206 BENCHMARK(BM_JoinQueryQualifiedId);
1207
BM_PersistToDisk(benchmark::State & state)1208 void BM_PersistToDisk(benchmark::State& state) {
1209 // Initialize the filesystem
1210 std::string test_dir = GetTestTempDir() + "/icing/benchmark";
1211 Filesystem filesystem;
1212 DestructibleDirectory ddir(filesystem, test_dir);
1213
1214 // Create the schema.
1215 std::default_random_engine random;
1216 int num_types = kAvgNumNamespaces * kAvgNumTypes;
1217 ExactStringPropertyGenerator property_generator;
1218 SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
1219 /*num_properties=*/state.range(1), &property_generator);
1220 SchemaProto schema = schema_generator.GenerateSchema(num_types);
1221 EvenDistributionTypeSelector type_selector(schema);
1222
1223 // Generate documents.
1224 int num_docs = state.range(0);
1225 std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
1226 const std::vector<DocumentProto> random_docs =
1227 GenerateRandomDocuments(&type_selector, num_docs, language);
1228
1229 for (auto _ : state) {
1230 state.PauseTiming();
1231 // Create the index.
1232 IcingSearchEngineOptions options;
1233 options.set_base_dir(test_dir);
1234 options.set_index_merge_size(kIcingFullIndexSize);
1235 options.set_use_persistent_hash_map(true);
1236 std::unique_ptr<IcingSearchEngine> icing =
1237 std::make_unique<IcingSearchEngine>(options);
1238
1239 ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
1240 ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
1241
1242 for (const DocumentProto& doc : random_docs) {
1243 ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
1244 }
1245
1246 state.ResumeTiming();
1247
1248 ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
1249
1250 state.PauseTiming();
1251 icing.reset();
1252 ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
1253 state.ResumeTiming();
1254 }
1255 }
1256 BENCHMARK(BM_PersistToDisk)
1257 // Arguments: num_indexed_documents, num_sections
1258 ->ArgPair(1024, 5);
1259
1260 } // namespace
1261
1262 } // namespace lib
1263 } // namespace icing
1264