1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "testing/base/public/benchmark.h"
16 #include "gmock/gmock.h"
17 #include "icing/testing/common-matchers.h"
18 #include "icing/testing/icu-data-file-helper.h"
19 #include "icing/testing/test-data.h"
20 #include "icing/tokenization/language-segmenter-factory.h"
21 #include "icing/tokenization/language-segmenter.h"
22 #include "icing/transform/normalizer.h"
23 #include "unicode/uloc.h"
24
25 // Run on a Linux workstation:
26 // $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
27 // //icing/tokenization:language-segmenter_benchmark
28 //
29 // $ blaze-bin/icing/tokenization/language-segmenter_benchmark
30 // --benchmark_filter=all
31 //
32 // Run on an Android device:
33 // Make target //icing/tokenization:language-segmenter depend on
34 // //third_party/icu
35 //
36 // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
37 // --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
38 // //icing/tokenization:language-segmenter_benchmark
39 //
40 // $ adb push
41 // blaze-bin/icing/tokenization/language-segmenter_benchmark
42 // /data/local/tmp/
43 //
44 // $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmark_filter=all
45 // --adb
46
47 // Flag to tell the benchmark that it'll be run on an Android device via adb,
48 // the benchmark will set up data files accordingly.
49 ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
50
51 namespace icing {
52 namespace lib {
53
54 namespace {
55
BM_SegmentNoSpace(benchmark::State & state)56 void BM_SegmentNoSpace(benchmark::State& state) {
57 bool run_via_adb = absl::GetFlag(FLAGS_adb);
58 if (!run_via_adb) {
59 ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
60 GetTestFilePath("icing/icu.dat")));
61 }
62
63 language_segmenter_factory::SegmenterOptions options(ULOC_US);
64 std::unique_ptr<LanguageSegmenter> language_segmenter =
65 language_segmenter_factory::Create(std::move(options)).ValueOrDie();
66
67 std::string input_string(state.range(0), 'A');
68
69 for (auto _ : state) {
70 std::unique_ptr<LanguageSegmenter::Iterator> iterator =
71 language_segmenter->Segment(input_string).ValueOrDie();
72 while (iterator->Advance()) {
73 iterator->GetTerm();
74 }
75 }
76 }
77 BENCHMARK(BM_SegmentNoSpace)
78 ->Arg(1000)
79 ->Arg(2000)
80 ->Arg(4000)
81 ->Arg(8000)
82 ->Arg(16000)
83 ->Arg(32000)
84 ->Arg(64000)
85 ->Arg(128000)
86 ->Arg(256000)
87 ->Arg(384000)
88 ->Arg(512000)
89 ->Arg(1024000)
90 ->Arg(2048000)
91 ->Arg(4096000);
92
BM_SegmentWithSpaces(benchmark::State & state)93 void BM_SegmentWithSpaces(benchmark::State& state) {
94 bool run_via_adb = absl::GetFlag(FLAGS_adb);
95 if (!run_via_adb) {
96 ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
97 GetTestFilePath("icing/icu.dat")));
98 }
99
100 language_segmenter_factory::SegmenterOptions options(ULOC_US);
101 std::unique_ptr<LanguageSegmenter> language_segmenter =
102 language_segmenter_factory::Create(std::move(options)).ValueOrDie();
103
104 std::string input_string(state.range(0), 'A');
105 for (int i = 1; i < input_string.length(); i += 2) {
106 input_string[i] = ' ';
107 }
108
109 for (auto _ : state) {
110 std::unique_ptr<LanguageSegmenter::Iterator> iterator =
111 language_segmenter->Segment(input_string).ValueOrDie();
112 while (iterator->Advance()) {
113 iterator->GetTerm();
114 }
115 }
116 }
117 BENCHMARK(BM_SegmentWithSpaces)
118 ->Arg(1000)
119 ->Arg(2000)
120 ->Arg(4000)
121 ->Arg(8000)
122 ->Arg(16000)
123 ->Arg(32000)
124 ->Arg(64000)
125 ->Arg(128000)
126 ->Arg(256000)
127 ->Arg(384000)
128 ->Arg(512000)
129 ->Arg(1024000)
130 ->Arg(2048000)
131 ->Arg(4096000);
132
BM_SegmentCJK(benchmark::State & state)133 void BM_SegmentCJK(benchmark::State& state) {
134 bool run_via_adb = absl::GetFlag(FLAGS_adb);
135 if (!run_via_adb) {
136 ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
137 GetTestFilePath("icing/icu.dat")));
138 }
139
140 language_segmenter_factory::SegmenterOptions options(ULOC_US);
141 std::unique_ptr<LanguageSegmenter> language_segmenter =
142 language_segmenter_factory::Create(std::move(options)).ValueOrDie();
143
144 std::string input_string;
145 while (input_string.length() < state.range(0)) {
146 input_string.append("你好こんにちは안녕하세요");
147 }
148
149 for (auto _ : state) {
150 std::unique_ptr<LanguageSegmenter::Iterator> iterator =
151 language_segmenter->Segment(input_string).ValueOrDie();
152 while (iterator->Advance()) {
153 iterator->GetTerm();
154 }
155 }
156 }
157 BENCHMARK(BM_SegmentCJK)
158 ->Arg(1000)
159 ->Arg(2000)
160 ->Arg(4000)
161 ->Arg(8000)
162 ->Arg(16000)
163 ->Arg(32000)
164 ->Arg(64000)
165 ->Arg(128000)
166 ->Arg(256000)
167 ->Arg(384000)
168 ->Arg(512000)
169 ->Arg(1024000)
170 ->Arg(2048000)
171 ->Arg(4096000);
172
173 } // namespace
174
175 } // namespace lib
176 } // namespace icing
177