1 /*
2 * Copyright 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <array>
18 #include <climits>
19 #include <cstdlib>
20 #include <random>
21 #include <vector>
22
23 #include <benchmark/benchmark.h>
24
25 #include <audio_utils/intrinsic_utils.h>
26 #include <audio_utils/format.h>
27
28 /**
29 Pixel 6 Pro (using Android 14 clang)
30
31 ---------------------------------------------------------------------------------
32 Benchmark Time CPU Iterations
33 ---------------------------------------------------------------------------------
34 BM_VectorTestMulLoopFloat/1 1199 ns 1195 ns 583505
35 BM_VectorTestMulLoopFloat/2 2255 ns 2248 ns 317302
36 BM_VectorTestMulLoopFloat/4 4454 ns 4438 ns 158692
37 BM_VectorTestMulLoopFloat/7 7786 ns 7757 ns 90247
38 BM_VectorTestMulLoopFloat/8 8995 ns 8962 ns 76373
39 BM_VectorTestMulLoopFloat/15 17131 ns 17066 ns 41214
40 BM_VectorTestMulLoopFloat/16 18439 ns 18341 ns 38319
41 BM_VectorTestMulConstArraySizeFloat/1 183 ns 182 ns 3938572
42 BM_VectorTestMulConstArraySizeFloat/2 640 ns 638 ns 1113513
43 BM_VectorTestMulConstArraySizeFloat/3 2102 ns 2093 ns 331829
44 BM_VectorTestMulConstArraySizeFloat/4 3771 ns 3758 ns 185266
45 BM_VectorTestMulConstArraySizeFloat/5 1825 ns 1818 ns 382081
46 BM_VectorTestMulConstArraySizeFloat/6 1905 ns 1898 ns 370506
47 BM_VectorTestMulConstArraySizeFloat/7 2745 ns 2734 ns 256104
48 BM_VectorTestMulConstArraySizeFloat/8 2010 ns 2002 ns 351298
49 BM_VectorTestMulConstArraySizeFloat/9 3158 ns 3146 ns 222887
50 BM_VectorTestMulConstArraySizeFloat/10 3018 ns 3007 ns 233799
51 BM_VectorTestMulConstArraySizeFloat/11 4005 ns 3991 ns 176145
52 BM_VectorTestMulConstArraySizeFloat/12 3081 ns 3068 ns 228512
53 BM_VectorTestMulConstArraySizeFloat/13 4409 ns 4393 ns 159303
54 BM_VectorTestMulConstArraySizeFloat/14 4242 ns 4219 ns 165899
55 BM_VectorTestMulConstArraySizeFloat/15 5301 ns 5279 ns 134157
56 BM_VectorTestMulConstArraySizeFloat/16 4078 ns 4063 ns 174066
57 BM_VectorTestMulConstArraySizeFloat/17 5693 ns 5669 ns 125403
58 BM_VectorTestMulConstArraySizeFloat/18 5339 ns 5318 ns 131839
59 BM_VectorTestMulConstArraySizeFloat/19 6508 ns 6483 ns 108158
60 BM_VectorTestMulConstArraySizeFloat/20 5108 ns 5089 ns 139637
61 BM_VectorTestMulConstArraySizeFloat/21 6896 ns 6868 ns 102084
62 BM_VectorTestMulConstArraySizeFloat/22 6523 ns 6490 ns 109281
63 BM_VectorTestMulConstArraySizeFloat/23 7734 ns 7686 ns 92986
64 BM_VectorTestMulConstArraySizeFloat/24 6138 ns 6071 ns 116883
65 BM_VectorTestMulConstArraySizeFloat/25 8122 ns 8085 ns 86703
66 BM_VectorTestMulConstArraySizeFloat/26 7670 ns 7637 ns 91665
67 BM_VectorTestMulConstArraySizeFloat/27 9026 ns 8988 ns 78633
68 BM_VectorTestMulConstArraySizeFloat/28 7161 ns 7129 ns 99711
69 BM_VectorTestMulConstArraySizeFloat/29 9380 ns 9341 ns 75947
70 BM_VectorTestMulConstArraySizeFloat/30 8878 ns 8838 ns 79578
71 BM_VectorTestMulConstArraySizeFloat/31 10277 ns 10230 ns 67954
72 BM_VectorTestMulConstArraySizeFloat/32 8122 ns 8083 ns 87244
73 BM_VectorTestMulForcedIntrinsics/1 188 ns 187 ns 3628943
74 BM_VectorTestMulForcedIntrinsics/2 1184 ns 1180 ns 565704
75 BM_VectorTestMulForcedIntrinsics/3 1692 ns 1684 ns 414409
76 BM_VectorTestMulForcedIntrinsics/4 1227 ns 1222 ns 578638
77 BM_VectorTestMulForcedIntrinsics/5 1885 ns 1878 ns 366852
78 BM_VectorTestMulForcedIntrinsics/6 1984 ns 1976 ns 352979
79 BM_VectorTestMulForcedIntrinsics/7 2815 ns 2803 ns 249306
80 BM_VectorTestMulForcedIntrinsics/8 2081 ns 2073 ns 339434
81 BM_VectorTestMulForcedIntrinsics/9 3051 ns 3040 ns 229261
82 BM_VectorTestMulForcedIntrinsics/10 3198 ns 3187 ns 220889
83 BM_VectorTestMulForcedIntrinsics/11 4083 ns 4067 ns 171785
84 BM_VectorTestMulForcedIntrinsics/12 3167 ns 3156 ns 221858
85 BM_VectorTestMulForcedIntrinsics/13 4497 ns 4479 ns 156926
86 BM_VectorTestMulForcedIntrinsics/14 4339 ns 4323 ns 162496
87 BM_VectorTestMulForcedIntrinsics/15 5294 ns 5274 ns 135733
88 BM_VectorTestMulForcedIntrinsics/16 4167 ns 4150 ns 168642
89 BM_VectorTestMulForcedIntrinsics/17 5732 ns 5710 ns 122927
90 BM_VectorTestMulForcedIntrinsics/18 5449 ns 5424 ns 131800
91 BM_VectorTestMulForcedIntrinsics/19 6539 ns 6504 ns 107850
92 BM_VectorTestMulForcedIntrinsics/20 5219 ns 5198 ns 135148
93 BM_VectorTestMulForcedIntrinsics/21 6676 ns 6639 ns 105846
94 BM_VectorTestMulForcedIntrinsics/22 6618 ns 6589 ns 107258
95 BM_VectorTestMulForcedIntrinsics/23 7774 ns 7741 ns 90216
96 BM_VectorTestMulForcedIntrinsics/24 6231 ns 6201 ns 116996
97 BM_VectorTestMulForcedIntrinsics/25 8156 ns 8121 ns 86237
98 BM_VectorTestMulForcedIntrinsics/26 7615 ns 7578 ns 91086
99 BM_VectorTestMulForcedIntrinsics/27 9067 ns 8995 ns 76733
100 BM_VectorTestMulForcedIntrinsics/28 7090 ns 7031 ns 101117
101 BM_VectorTestMulForcedIntrinsics/29 9220 ns 9160 ns 76350
102 BM_VectorTestMulForcedIntrinsics/30 8895 ns 8832 ns 80551
103 BM_VectorTestMulForcedIntrinsics/31 10060 ns 10001 ns 71265
104 BM_VectorTestMulForcedIntrinsics/32 8056 ns 7996 ns 88176
105 BM_VectorTestAddConstArraySizeFloat/1 188 ns 187 ns 3742628
106 BM_VectorTestAddConstArraySizeFloat/2 634 ns 631 ns 1095480
107 BM_VectorTestAddConstArraySizeFloat/4 3723 ns 3710 ns 188332
108 BM_VectorTestAddConstArraySizeFloat/7 2791 ns 2777 ns 252911
109 BM_VectorTestAddConstArraySizeFloat/8 2060 ns 2051 ns 345573
110 BM_VectorTestAddConstArraySizeFloat/15 5322 ns 5302 ns 132415
111 BM_VectorTestAddConstArraySizeFloat/16 4101 ns 4083 ns 170300
112 BM_VectorTestAddForcedIntrinsics/1 187 ns 186 ns 3656441
113 BM_VectorTestAddForcedIntrinsics/2 1184 ns 1178 ns 564643
114 BM_VectorTestAddForcedIntrinsics/4 1218 ns 1213 ns 584709
115 BM_VectorTestAddForcedIntrinsics/7 2775 ns 2764 ns 252256
116 BM_VectorTestAddForcedIntrinsics/8 2070 ns 2062 ns 342709
117 BM_VectorTestAddForcedIntrinsics/15 5213 ns 5192 ns 132663
118 BM_VectorTestAddForcedIntrinsics/16 4116 ns 4100 ns 171005
119
120
121 Pixel 9 XL Pro (using Android 14 clang)
122 ---------------------------------------------------------------------------------
123 Benchmark Time CPU Iterations
124 ---------------------------------------------------------------------------------
125 BM_VectorTestMulLoopFloat/1 1171 ns 1166 ns 450848
126 BM_VectorTestMulLoopFloat/2 1847 ns 1840 ns 381613
127 BM_VectorTestMulLoopFloat/4 3432 ns 3423 ns 205730
128 BM_VectorTestMulLoopFloat/7 5615 ns 5598 ns 124818
129 BM_VectorTestMulLoopFloat/8 6411 ns 6383 ns 109013
130 BM_VectorTestMulLoopFloat/15 12371 ns 12332 ns 55439
131 BM_VectorTestMulLoopFloat/16 13594 ns 13555 ns 51753
132 BM_VectorTestMulConstArraySizeFloat/1 153 ns 152 ns 4534625
133 BM_VectorTestMulConstArraySizeFloat/2 683 ns 680 ns 1005789
134 BM_VectorTestMulConstArraySizeFloat/3 886 ns 883 ns 803793
135 BM_VectorTestMulConstArraySizeFloat/4 1491 ns 1487 ns 471683
136 BM_VectorTestMulConstArraySizeFloat/5 1448 ns 1443 ns 486353
137 BM_VectorTestMulConstArraySizeFloat/6 1482 ns 1478 ns 474901
138 BM_VectorTestMulConstArraySizeFloat/7 2279 ns 2272 ns 308978
139 BM_VectorTestMulConstArraySizeFloat/8 1620 ns 1600 ns 438957
140 BM_VectorTestMulConstArraySizeFloat/9 2505 ns 2487 ns 283335
141 BM_VectorTestMulConstArraySizeFloat/10 2389 ns 2386 ns 293332
142 BM_VectorTestMulConstArraySizeFloat/11 3185 ns 3180 ns 219746
143 BM_VectorTestMulConstArraySizeFloat/12 2285 ns 2280 ns 307091
144 BM_VectorTestMulConstArraySizeFloat/13 3464 ns 3459 ns 201902
145 BM_VectorTestMulConstArraySizeFloat/14 3254 ns 3249 ns 215345
146 BM_VectorTestMulConstArraySizeFloat/15 4156 ns 4149 ns 169102
147 BM_VectorTestMulConstArraySizeFloat/16 3075 ns 3068 ns 228544
148 BM_VectorTestMulConstArraySizeFloat/17 4469 ns 4442 ns 157317
149 BM_VectorTestMulConstArraySizeFloat/18 4141 ns 4133 ns 170148
150 BM_VectorTestMulConstArraySizeFloat/19 5193 ns 5179 ns 135294
151 BM_VectorTestMulConstArraySizeFloat/20 3876 ns 3866 ns 181134
152 BM_VectorTestMulConstArraySizeFloat/21 5450 ns 5429 ns 129921
153 BM_VectorTestMulConstArraySizeFloat/22 5075 ns 5056 ns 139238
154 BM_VectorTestMulConstArraySizeFloat/23 6145 ns 6125 ns 114880
155 BM_VectorTestMulConstArraySizeFloat/24 4659 ns 4646 ns 150923
156 BM_VectorTestMulConstArraySizeFloat/25 6423 ns 6400 ns 109467
157 BM_VectorTestMulConstArraySizeFloat/26 5962 ns 5947 ns 117755
158 BM_VectorTestMulConstArraySizeFloat/27 7139 ns 7115 ns 98581
159 BM_VectorTestMulConstArraySizeFloat/28 5462 ns 5446 ns 128477
160 BM_VectorTestMulConstArraySizeFloat/29 7431 ns 7399 ns 94492
161 BM_VectorTestMulConstArraySizeFloat/30 6877 ns 6854 ns 101706
162 BM_VectorTestMulConstArraySizeFloat/31 8322 ns 8304 ns 83352
163 BM_VectorTestMulConstArraySizeFloat/32 6223 ns 6208 ns 114265
164 BM_VectorTestMulForcedIntrinsics/1 160 ns 160 ns 4365646
165 BM_VectorTestMulForcedIntrinsics/2 848 ns 845 ns 807945
166 BM_VectorTestMulForcedIntrinsics/3 1435 ns 1430 ns 489448
167 BM_VectorTestMulForcedIntrinsics/4 937 ns 934 ns 757416
168 BM_VectorTestMulForcedIntrinsics/5 1477 ns 1473 ns 474891
169 BM_VectorTestMulForcedIntrinsics/6 1825 ns 1820 ns 385118
170 BM_VectorTestMulForcedIntrinsics/7 2303 ns 2298 ns 303823
171 BM_VectorTestMulForcedIntrinsics/8 1643 ns 1638 ns 430851
172 BM_VectorTestMulForcedIntrinsics/9 2490 ns 2482 ns 281294
173 BM_VectorTestMulForcedIntrinsics/10 2429 ns 2423 ns 291028
174 BM_VectorTestMulForcedIntrinsics/11 3201 ns 3193 ns 219256
175 BM_VectorTestMulForcedIntrinsics/12 2341 ns 2335 ns 302086
176 BM_VectorTestMulForcedIntrinsics/13 3475 ns 3466 ns 201570
177 BM_VectorTestMulForcedIntrinsics/14 3294 ns 3286 ns 212762
178 BM_VectorTestMulForcedIntrinsics/15 4141 ns 4129 ns 169275
179 BM_VectorTestMulForcedIntrinsics/16 3123 ns 3116 ns 225516
180 BM_VectorTestMulForcedIntrinsics/17 4447 ns 4436 ns 157620
181 BM_VectorTestMulForcedIntrinsics/18 4175 ns 4163 ns 168170
182 BM_VectorTestMulForcedIntrinsics/19 5164 ns 5147 ns 134830
183 BM_VectorTestMulForcedIntrinsics/20 3927 ns 3917 ns 179070
184 BM_VectorTestMulForcedIntrinsics/21 5481 ns 5449 ns 126196
185 BM_VectorTestMulForcedIntrinsics/22 5124 ns 5109 ns 138492
186 BM_VectorTestMulForcedIntrinsics/23 6142 ns 6125 ns 113071
187 BM_VectorTestMulForcedIntrinsics/24 4690 ns 4675 ns 150096
188 BM_VectorTestMulForcedIntrinsics/25 6423 ns 6398 ns 108462
189 BM_VectorTestMulForcedIntrinsics/26 6047 ns 6029 ns 117408
190 BM_VectorTestMulForcedIntrinsics/27 7150 ns 7128 ns 97901
191 BM_VectorTestMulForcedIntrinsics/28 5483 ns 5467 ns 129504
192 BM_VectorTestMulForcedIntrinsics/29 7416 ns 7390 ns 94167
193 BM_VectorTestMulForcedIntrinsics/30 6960 ns 6934 ns 102061
194 BM_VectorTestMulForcedIntrinsics/31 8073 ns 8043 ns 87555
195 BM_VectorTestMulForcedIntrinsics/32 6255 ns 6235 ns 113705
196 BM_VectorTestAddConstArraySizeFloat/1 161 ns 161 ns 4339090
197 BM_VectorTestAddConstArraySizeFloat/2 718 ns 716 ns 958914
198 BM_VectorTestAddConstArraySizeFloat/4 1500 ns 1496 ns 468059
199 BM_VectorTestAddConstArraySizeFloat/7 2334 ns 2326 ns 301694
200 BM_VectorTestAddConstArraySizeFloat/8 1655 ns 1651 ns 428569
201 BM_VectorTestAddConstArraySizeFloat/15 4224 ns 4214 ns 166108
202 BM_VectorTestAddConstArraySizeFloat/16 3229 ns 3219 ns 217681
203 BM_VectorTestAddForcedIntrinsics/1 164 ns 163 ns 4286279
204 BM_VectorTestAddForcedIntrinsics/2 858 ns 854 ns 795537
205 BM_VectorTestAddForcedIntrinsics/4 927 ns 924 ns 761731
206 BM_VectorTestAddForcedIntrinsics/7 2333 ns 2325 ns 301963
207 BM_VectorTestAddForcedIntrinsics/8 1658 ns 1654 ns 425574
208 BM_VectorTestAddForcedIntrinsics/15 4096 ns 4087 ns 171278
209 BM_VectorTestAddForcedIntrinsics/16 3245 ns 3236 ns 217538
210
211 */
212
213 using namespace android::audio_utils::intrinsics;
214
215 static constexpr size_t kDataSize = 2048;
216
217 // exhaustively go from 1-32 channels.
TestFullArgs(benchmark::internal::Benchmark * b)218 static void TestFullArgs(benchmark::internal::Benchmark* b) {
219 constexpr int kChannelCountMin = 1;
220 constexpr int kChannelCountMax = 32;
221 for (int i = kChannelCountMin; i <= kChannelCountMax; ++i) {
222 b->Args({i});
223 }
224 }
225
226 // selective channels to test.
TestArgs(benchmark::internal::Benchmark * b)227 static void TestArgs(benchmark::internal::Benchmark* b) {
228 for (int i : { 1, 2, 4, 7, 8, 15, 16 }) {
229 b->Args({i});
230 }
231 }
232
233 // Macro test operator
234
235 #define OPERATOR(N) \
236 *reinterpret_cast<V<F, N>*>(out) = Traits::func_( \
237 *reinterpret_cast<const V<F, N>*>(in1), \
238 *reinterpret_cast<const V<F, N>*>(in2)); \
239 out += N; \
240 in1 += N; \
241 in2 += N;
242
243 // Macro to instantiate switch case statements.
244
245 #define INSTANTIATE(N) case N: mFunc = TestFunc<N>; break;
246
247 template <typename Traits>
248 class Processor {
249 public:
250 // shorthand aliases
251 using F = typename Traits::data_t;
252 template <typename T, int N>
253 using V = typename Traits::template container_t<T, N>;
254 template <size_t N>
TestFunc(F * out,const F * in1,const F * in2,size_t count)255 static void TestFunc(F* out, const F* in1, const F* in2, size_t count) {
256 static_assert(sizeof(V<F, N>) == N * sizeof(F));
257 for (size_t i = 0; i < count; ++i) {
258 OPERATOR(N);
259 }
260 }
261
Processor(int channelCount)262 Processor(int channelCount)
263 : mChannelCount(channelCount) {
264
265 if constexpr (Traits::loop_) {
266 mFunc = [channelCount](F* out, const F* in1, const F* in2, size_t count) {
267 for (size_t i = 0; i < count; ++i) {
268 for (size_t j = 0; j < channelCount; ++j) {
269 OPERATOR(1);
270 }
271 }
272 };
273 return;
274 }
275 switch (channelCount) {
276 INSTANTIATE(1);
277 INSTANTIATE(2);
278 INSTANTIATE(3);
279 INSTANTIATE(4);
280 INSTANTIATE(5);
281 INSTANTIATE(6);
282 INSTANTIATE(7);
283 INSTANTIATE(8);
284 INSTANTIATE(9);
285 INSTANTIATE(10);
286 INSTANTIATE(11);
287 INSTANTIATE(12);
288 INSTANTIATE(13);
289 INSTANTIATE(14);
290 INSTANTIATE(15);
291 INSTANTIATE(16);
292 INSTANTIATE(17);
293 INSTANTIATE(18);
294 INSTANTIATE(19);
295 INSTANTIATE(20);
296 INSTANTIATE(21);
297 INSTANTIATE(22);
298 INSTANTIATE(23);
299 INSTANTIATE(24);
300 INSTANTIATE(25);
301 INSTANTIATE(26);
302 INSTANTIATE(27);
303 INSTANTIATE(28);
304 INSTANTIATE(29);
305 INSTANTIATE(30);
306 INSTANTIATE(31);
307 INSTANTIATE(32);
308 }
309 }
310
process(F * out,const F * in1,const F * in2,size_t frames)311 void process(F* out, const F* in1, const F* in2, size_t frames) {
312 mFunc(out, in1, in2, frames);
313 }
314
315 const size_t mChannelCount;
316 /* const */ std::function<void(F*, const F*, const F*, size_t)> mFunc;
317 };
318
319 template <typename Traits>
BM_VectorTest(benchmark::State & state)320 static void BM_VectorTest(benchmark::State& state) {
321 using F = typename Traits::data_t;
322 const size_t channelCount = state.range(0);
323
324 std::vector<F> input1(kDataSize * channelCount);
325 std::vector<F> input2(kDataSize * channelCount);
326 std::vector<F> output(kDataSize * channelCount);
327
328 // Initialize input buffer and coefs with deterministic pseudo-random values
329 std::minstd_rand gen(42);
330 const F amplitude = 1.;
331 std::uniform_real_distribution<> dis(-amplitude, amplitude);
332 for (auto& in : input1) {
333 in = dis(gen);
334 }
335 for (auto& in : input2) {
336 in = dis(gen);
337 }
338
339 Processor<Traits> processor(channelCount);
340
341 // Run the test
342 while (state.KeepRunning()) {
343 benchmark::DoNotOptimize(input1.data());
344 benchmark::DoNotOptimize(input2.data());
345 benchmark::DoNotOptimize(output.data());
346 processor.process(output.data(), input1.data(), input2.data(), kDataSize);
347 benchmark::ClobberMemory();
348 }
349 state.SetComplexityN(channelCount);
350 }
351
352 // Clang has an issue with -frelaxed-template-template-args where
353 // it may not follow the C++17 guidelines. Use a traits struct to
354 // pass in parameters.
355
356 // Test using two loops.
357 struct LoopFloatTraits {
358 template <typename F, int N>
359 using container_t = internal_array_t<F, N>;
360 using data_t = float;
361 static constexpr bool loop_ = true;
362 };
363
364 // Test using two loops, the inner loop is constexpr size.
365 struct ConstArraySizeFloatTraits {
366 template <typename F, int N>
367 using container_t = internal_array_t<F, N>;
368 using data_t = float;
369 static constexpr bool loop_ = false;
370 };
371
372 // Test using intrinsics, if available.
373 struct ForcedIntrinsicsTraits {
374 template <typename F, int N>
375 using container_t = vector_hw_t<F, N>;
376 using data_t = float;
377 static constexpr bool loop_ = false;
378 };
379
380 // --- MULTIPLY
381
382 struct MulFunc {
383 template <typename T>
func_MulFunc384 static T func_(T a, T b) { return vmul(a, b); }
385 };
386
387 struct MulLoopFloatTraits : public LoopFloatTraits, public MulFunc {};
388
BM_VectorTestMulLoopFloat(benchmark::State & state)389 static void BM_VectorTestMulLoopFloat(benchmark::State& state) {
390 BM_VectorTest<MulLoopFloatTraits>(state);
391 }
392
393 struct MulConstArraySizeFloatTraits : public ConstArraySizeFloatTraits, public MulFunc {};
394
BM_VectorTestMulConstArraySizeFloat(benchmark::State & state)395 static void BM_VectorTestMulConstArraySizeFloat(benchmark::State& state) {
396 BM_VectorTest<MulConstArraySizeFloatTraits>(state);
397 }
398
399 struct MulForcedIntrinsicsTraits : public ForcedIntrinsicsTraits, public MulFunc {};
400
BM_VectorTestMulForcedIntrinsics(benchmark::State & state)401 static void BM_VectorTestMulForcedIntrinsics(benchmark::State& state) {
402 BM_VectorTest<MulForcedIntrinsicsTraits>(state);
403 }
404
405 BENCHMARK(BM_VectorTestMulLoopFloat)->Apply(TestArgs);
406
407 BENCHMARK(BM_VectorTestMulConstArraySizeFloat)->Apply(TestFullArgs);
408
409 BENCHMARK(BM_VectorTestMulForcedIntrinsics)->Apply(TestFullArgs);
410
411 // --- ADD
412
413 struct AddFunc {
414 template <typename T>
func_AddFunc415 static T func_(T a, T b) { return vadd(a, b); }
416 };
417
418 struct AddConstArraySizeFloatTraits : public ConstArraySizeFloatTraits, public AddFunc {};
419
BM_VectorTestAddConstArraySizeFloat(benchmark::State & state)420 static void BM_VectorTestAddConstArraySizeFloat(benchmark::State& state) {
421 BM_VectorTest<AddConstArraySizeFloatTraits>(state);
422 }
423
424 struct AddForcedIntrinsicsTraits : public ForcedIntrinsicsTraits, public AddFunc {};
425
BM_VectorTestAddForcedIntrinsics(benchmark::State & state)426 static void BM_VectorTestAddForcedIntrinsics(benchmark::State& state) {
427 BM_VectorTest<AddForcedIntrinsicsTraits>(state);
428 }
429
430 BENCHMARK(BM_VectorTestAddConstArraySizeFloat)->Apply(TestArgs);
431
432 BENCHMARK(BM_VectorTestAddForcedIntrinsics)->Apply(TestArgs);
433
434 BENCHMARK_MAIN();
435