• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "testing/base/public/benchmark.h"
16 #include "gmock/gmock.h"
17 #include "icing/testing/common-matchers.h"
18 #include "icing/testing/icu-data-file-helper.h"
19 #include "icing/testing/test-data.h"
20 #include "icing/tokenization/language-segmenter-factory.h"
21 #include "icing/tokenization/language-segmenter.h"
22 #include "icing/transform/normalizer.h"
23 #include "unicode/uloc.h"
24 
25 // Run on a Linux workstation:
26 //    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
27 //    //icing/tokenization:language-segmenter_benchmark
28 //
29 //    $ blaze-bin/icing/tokenization/language-segmenter_benchmark
30 //    --benchmark_filter=all
31 //
32 // Run on an Android device:
33 //    Make target //icing/tokenization:language-segmenter depend on
34 //    //third_party/icu
35 //
36 //    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
37 //    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
38 //    //icing/tokenization:language-segmenter_benchmark
39 //
40 //    $ adb push
41 //    blaze-bin/icing/tokenization/language-segmenter_benchmark
42 //    /data/local/tmp/
43 //
44 //    $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmark_filter=all
45 //    --adb
46 
47 // Flag to tell the benchmark that it'll be run on an Android device via adb,
48 // the benchmark will set up data files accordingly.
49 ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
50 
51 namespace icing {
52 namespace lib {
53 
54 namespace {
55 
BM_SegmentNoSpace(benchmark::State & state)56 void BM_SegmentNoSpace(benchmark::State& state) {
57   bool run_via_adb = absl::GetFlag(FLAGS_adb);
58   if (!run_via_adb) {
59     ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
60         GetTestFilePath("icing/icu.dat")));
61   }
62 
63   language_segmenter_factory::SegmenterOptions options(ULOC_US);
64   std::unique_ptr<LanguageSegmenter> language_segmenter =
65       language_segmenter_factory::Create(std::move(options)).ValueOrDie();
66 
67   std::string input_string(state.range(0), 'A');
68 
69   for (auto _ : state) {
70     std::unique_ptr<LanguageSegmenter::Iterator> iterator =
71         language_segmenter->Segment(input_string).ValueOrDie();
72     while (iterator->Advance()) {
73       iterator->GetTerm();
74     }
75   }
76 }
77 BENCHMARK(BM_SegmentNoSpace)
78     ->Arg(1000)
79     ->Arg(2000)
80     ->Arg(4000)
81     ->Arg(8000)
82     ->Arg(16000)
83     ->Arg(32000)
84     ->Arg(64000)
85     ->Arg(128000)
86     ->Arg(256000)
87     ->Arg(384000)
88     ->Arg(512000)
89     ->Arg(1024000)
90     ->Arg(2048000)
91     ->Arg(4096000);
92 
BM_SegmentWithSpaces(benchmark::State & state)93 void BM_SegmentWithSpaces(benchmark::State& state) {
94   bool run_via_adb = absl::GetFlag(FLAGS_adb);
95   if (!run_via_adb) {
96     ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
97         GetTestFilePath("icing/icu.dat")));
98   }
99 
100   language_segmenter_factory::SegmenterOptions options(ULOC_US);
101   std::unique_ptr<LanguageSegmenter> language_segmenter =
102       language_segmenter_factory::Create(std::move(options)).ValueOrDie();
103 
104   std::string input_string(state.range(0), 'A');
105   for (int i = 1; i < input_string.length(); i += 2) {
106     input_string[i] = ' ';
107   }
108 
109   for (auto _ : state) {
110     std::unique_ptr<LanguageSegmenter::Iterator> iterator =
111         language_segmenter->Segment(input_string).ValueOrDie();
112     while (iterator->Advance()) {
113       iterator->GetTerm();
114     }
115   }
116 }
117 BENCHMARK(BM_SegmentWithSpaces)
118     ->Arg(1000)
119     ->Arg(2000)
120     ->Arg(4000)
121     ->Arg(8000)
122     ->Arg(16000)
123     ->Arg(32000)
124     ->Arg(64000)
125     ->Arg(128000)
126     ->Arg(256000)
127     ->Arg(384000)
128     ->Arg(512000)
129     ->Arg(1024000)
130     ->Arg(2048000)
131     ->Arg(4096000);
132 
BM_SegmentCJK(benchmark::State & state)133 void BM_SegmentCJK(benchmark::State& state) {
134   bool run_via_adb = absl::GetFlag(FLAGS_adb);
135   if (!run_via_adb) {
136     ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
137         GetTestFilePath("icing/icu.dat")));
138   }
139 
140   language_segmenter_factory::SegmenterOptions options(ULOC_US);
141   std::unique_ptr<LanguageSegmenter> language_segmenter =
142       language_segmenter_factory::Create(std::move(options)).ValueOrDie();
143 
144   std::string input_string;
145   while (input_string.length() < state.range(0)) {
146     input_string.append("你好こんにちは안녕하세요");
147   }
148 
149   for (auto _ : state) {
150     std::unique_ptr<LanguageSegmenter::Iterator> iterator =
151         language_segmenter->Segment(input_string).ValueOrDie();
152     while (iterator->Advance()) {
153       iterator->GetTerm();
154     }
155   }
156 }
157 BENCHMARK(BM_SegmentCJK)
158     ->Arg(1000)
159     ->Arg(2000)
160     ->Arg(4000)
161     ->Arg(8000)
162     ->Arg(16000)
163     ->Arg(32000)
164     ->Arg(64000)
165     ->Arg(128000)
166     ->Arg(256000)
167     ->Arg(384000)
168     ->Arg(512000)
169     ->Arg(1024000)
170     ->Arg(2048000)
171     ->Arg(4096000);
172 
173 }  // namespace
174 
175 }  // namespace lib
176 }  // namespace icing
177