• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <array>
18 #include <climits>
19 #include <cstdlib>
20 #include <random>
21 #include <vector>
22 
23 #include <benchmark/benchmark.h>
24 
25 #include <audio_utils/intrinsic_utils.h>
26 #include <audio_utils/format.h>
27 
28 /**
29 Pixel 6 Pro (using Android 14 clang)
30 
31 ---------------------------------------------------------------------------------
32 Benchmark                                       Time             CPU   Iterations
33 ---------------------------------------------------------------------------------
34 BM_VectorTestMulLoopFloat/1                  1199 ns         1195 ns       583505
35 BM_VectorTestMulLoopFloat/2                  2255 ns         2248 ns       317302
36 BM_VectorTestMulLoopFloat/4                  4454 ns         4438 ns       158692
37 BM_VectorTestMulLoopFloat/7                  7786 ns         7757 ns        90247
38 BM_VectorTestMulLoopFloat/8                  8995 ns         8962 ns        76373
39 BM_VectorTestMulLoopFloat/15                17131 ns        17066 ns        41214
40 BM_VectorTestMulLoopFloat/16                18439 ns        18341 ns        38319
41 BM_VectorTestMulConstArraySizeFloat/1         183 ns          182 ns      3938572
42 BM_VectorTestMulConstArraySizeFloat/2         640 ns          638 ns      1113513
43 BM_VectorTestMulConstArraySizeFloat/3        2102 ns         2093 ns       331829
44 BM_VectorTestMulConstArraySizeFloat/4        3771 ns         3758 ns       185266
45 BM_VectorTestMulConstArraySizeFloat/5        1825 ns         1818 ns       382081
46 BM_VectorTestMulConstArraySizeFloat/6        1905 ns         1898 ns       370506
47 BM_VectorTestMulConstArraySizeFloat/7        2745 ns         2734 ns       256104
48 BM_VectorTestMulConstArraySizeFloat/8        2010 ns         2002 ns       351298
49 BM_VectorTestMulConstArraySizeFloat/9        3158 ns         3146 ns       222887
50 BM_VectorTestMulConstArraySizeFloat/10       3018 ns         3007 ns       233799
51 BM_VectorTestMulConstArraySizeFloat/11       4005 ns         3991 ns       176145
52 BM_VectorTestMulConstArraySizeFloat/12       3081 ns         3068 ns       228512
53 BM_VectorTestMulConstArraySizeFloat/13       4409 ns         4393 ns       159303
54 BM_VectorTestMulConstArraySizeFloat/14       4242 ns         4219 ns       165899
55 BM_VectorTestMulConstArraySizeFloat/15       5301 ns         5279 ns       134157
56 BM_VectorTestMulConstArraySizeFloat/16       4078 ns         4063 ns       174066
57 BM_VectorTestMulConstArraySizeFloat/17       5693 ns         5669 ns       125403
58 BM_VectorTestMulConstArraySizeFloat/18       5339 ns         5318 ns       131839
59 BM_VectorTestMulConstArraySizeFloat/19       6508 ns         6483 ns       108158
60 BM_VectorTestMulConstArraySizeFloat/20       5108 ns         5089 ns       139637
61 BM_VectorTestMulConstArraySizeFloat/21       6896 ns         6868 ns       102084
62 BM_VectorTestMulConstArraySizeFloat/22       6523 ns         6490 ns       109281
63 BM_VectorTestMulConstArraySizeFloat/23       7734 ns         7686 ns        92986
64 BM_VectorTestMulConstArraySizeFloat/24       6138 ns         6071 ns       116883
65 BM_VectorTestMulConstArraySizeFloat/25       8122 ns         8085 ns        86703
66 BM_VectorTestMulConstArraySizeFloat/26       7670 ns         7637 ns        91665
67 BM_VectorTestMulConstArraySizeFloat/27       9026 ns         8988 ns        78633
68 BM_VectorTestMulConstArraySizeFloat/28       7161 ns         7129 ns        99711
69 BM_VectorTestMulConstArraySizeFloat/29       9380 ns         9341 ns        75947
70 BM_VectorTestMulConstArraySizeFloat/30       8878 ns         8838 ns        79578
71 BM_VectorTestMulConstArraySizeFloat/31      10277 ns        10230 ns        67954
72 BM_VectorTestMulConstArraySizeFloat/32       8122 ns         8083 ns        87244
73 BM_VectorTestMulForcedIntrinsics/1            188 ns          187 ns      3628943
74 BM_VectorTestMulForcedIntrinsics/2           1184 ns         1180 ns       565704
75 BM_VectorTestMulForcedIntrinsics/3           1692 ns         1684 ns       414409
76 BM_VectorTestMulForcedIntrinsics/4           1227 ns         1222 ns       578638
77 BM_VectorTestMulForcedIntrinsics/5           1885 ns         1878 ns       366852
78 BM_VectorTestMulForcedIntrinsics/6           1984 ns         1976 ns       352979
79 BM_VectorTestMulForcedIntrinsics/7           2815 ns         2803 ns       249306
80 BM_VectorTestMulForcedIntrinsics/8           2081 ns         2073 ns       339434
81 BM_VectorTestMulForcedIntrinsics/9           3051 ns         3040 ns       229261
82 BM_VectorTestMulForcedIntrinsics/10          3198 ns         3187 ns       220889
83 BM_VectorTestMulForcedIntrinsics/11          4083 ns         4067 ns       171785
84 BM_VectorTestMulForcedIntrinsics/12          3167 ns         3156 ns       221858
85 BM_VectorTestMulForcedIntrinsics/13          4497 ns         4479 ns       156926
86 BM_VectorTestMulForcedIntrinsics/14          4339 ns         4323 ns       162496
87 BM_VectorTestMulForcedIntrinsics/15          5294 ns         5274 ns       135733
88 BM_VectorTestMulForcedIntrinsics/16          4167 ns         4150 ns       168642
89 BM_VectorTestMulForcedIntrinsics/17          5732 ns         5710 ns       122927
90 BM_VectorTestMulForcedIntrinsics/18          5449 ns         5424 ns       131800
91 BM_VectorTestMulForcedIntrinsics/19          6539 ns         6504 ns       107850
92 BM_VectorTestMulForcedIntrinsics/20          5219 ns         5198 ns       135148
93 BM_VectorTestMulForcedIntrinsics/21          6676 ns         6639 ns       105846
94 BM_VectorTestMulForcedIntrinsics/22          6618 ns         6589 ns       107258
95 BM_VectorTestMulForcedIntrinsics/23          7774 ns         7741 ns        90216
96 BM_VectorTestMulForcedIntrinsics/24          6231 ns         6201 ns       116996
97 BM_VectorTestMulForcedIntrinsics/25          8156 ns         8121 ns        86237
98 BM_VectorTestMulForcedIntrinsics/26          7615 ns         7578 ns        91086
99 BM_VectorTestMulForcedIntrinsics/27          9067 ns         8995 ns        76733
100 BM_VectorTestMulForcedIntrinsics/28          7090 ns         7031 ns       101117
101 BM_VectorTestMulForcedIntrinsics/29          9220 ns         9160 ns        76350
102 BM_VectorTestMulForcedIntrinsics/30          8895 ns         8832 ns        80551
103 BM_VectorTestMulForcedIntrinsics/31         10060 ns        10001 ns        71265
104 BM_VectorTestMulForcedIntrinsics/32          8056 ns         7996 ns        88176
105 BM_VectorTestAddConstArraySizeFloat/1         188 ns          187 ns      3742628
106 BM_VectorTestAddConstArraySizeFloat/2         634 ns          631 ns      1095480
107 BM_VectorTestAddConstArraySizeFloat/4        3723 ns         3710 ns       188332
108 BM_VectorTestAddConstArraySizeFloat/7        2791 ns         2777 ns       252911
109 BM_VectorTestAddConstArraySizeFloat/8        2060 ns         2051 ns       345573
110 BM_VectorTestAddConstArraySizeFloat/15       5322 ns         5302 ns       132415
111 BM_VectorTestAddConstArraySizeFloat/16       4101 ns         4083 ns       170300
112 BM_VectorTestAddForcedIntrinsics/1            187 ns          186 ns      3656441
113 BM_VectorTestAddForcedIntrinsics/2           1184 ns         1178 ns       564643
114 BM_VectorTestAddForcedIntrinsics/4           1218 ns         1213 ns       584709
115 BM_VectorTestAddForcedIntrinsics/7           2775 ns         2764 ns       252256
116 BM_VectorTestAddForcedIntrinsics/8           2070 ns         2062 ns       342709
117 BM_VectorTestAddForcedIntrinsics/15          5213 ns         5192 ns       132663
118 BM_VectorTestAddForcedIntrinsics/16          4116 ns         4100 ns       171005
119 
120 
121 Pixel 9 XL Pro (using Android 14 clang)
122 ---------------------------------------------------------------------------------
123 Benchmark                                       Time             CPU   Iterations
124 ---------------------------------------------------------------------------------
125 BM_VectorTestMulLoopFloat/1                  1171 ns         1166 ns       450848
126 BM_VectorTestMulLoopFloat/2                  1847 ns         1840 ns       381613
127 BM_VectorTestMulLoopFloat/4                  3432 ns         3423 ns       205730
128 BM_VectorTestMulLoopFloat/7                  5615 ns         5598 ns       124818
129 BM_VectorTestMulLoopFloat/8                  6411 ns         6383 ns       109013
130 BM_VectorTestMulLoopFloat/15                12371 ns        12332 ns        55439
131 BM_VectorTestMulLoopFloat/16                13594 ns        13555 ns        51753
132 BM_VectorTestMulConstArraySizeFloat/1         153 ns          152 ns      4534625
133 BM_VectorTestMulConstArraySizeFloat/2         683 ns          680 ns      1005789
134 BM_VectorTestMulConstArraySizeFloat/3         886 ns          883 ns       803793
135 BM_VectorTestMulConstArraySizeFloat/4        1491 ns         1487 ns       471683
136 BM_VectorTestMulConstArraySizeFloat/5        1448 ns         1443 ns       486353
137 BM_VectorTestMulConstArraySizeFloat/6        1482 ns         1478 ns       474901
138 BM_VectorTestMulConstArraySizeFloat/7        2279 ns         2272 ns       308978
139 BM_VectorTestMulConstArraySizeFloat/8        1620 ns         1600 ns       438957
140 BM_VectorTestMulConstArraySizeFloat/9        2505 ns         2487 ns       283335
141 BM_VectorTestMulConstArraySizeFloat/10       2389 ns         2386 ns       293332
142 BM_VectorTestMulConstArraySizeFloat/11       3185 ns         3180 ns       219746
143 BM_VectorTestMulConstArraySizeFloat/12       2285 ns         2280 ns       307091
144 BM_VectorTestMulConstArraySizeFloat/13       3464 ns         3459 ns       201902
145 BM_VectorTestMulConstArraySizeFloat/14       3254 ns         3249 ns       215345
146 BM_VectorTestMulConstArraySizeFloat/15       4156 ns         4149 ns       169102
147 BM_VectorTestMulConstArraySizeFloat/16       3075 ns         3068 ns       228544
148 BM_VectorTestMulConstArraySizeFloat/17       4469 ns         4442 ns       157317
149 BM_VectorTestMulConstArraySizeFloat/18       4141 ns         4133 ns       170148
150 BM_VectorTestMulConstArraySizeFloat/19       5193 ns         5179 ns       135294
151 BM_VectorTestMulConstArraySizeFloat/20       3876 ns         3866 ns       181134
152 BM_VectorTestMulConstArraySizeFloat/21       5450 ns         5429 ns       129921
153 BM_VectorTestMulConstArraySizeFloat/22       5075 ns         5056 ns       139238
154 BM_VectorTestMulConstArraySizeFloat/23       6145 ns         6125 ns       114880
155 BM_VectorTestMulConstArraySizeFloat/24       4659 ns         4646 ns       150923
156 BM_VectorTestMulConstArraySizeFloat/25       6423 ns         6400 ns       109467
157 BM_VectorTestMulConstArraySizeFloat/26       5962 ns         5947 ns       117755
158 BM_VectorTestMulConstArraySizeFloat/27       7139 ns         7115 ns        98581
159 BM_VectorTestMulConstArraySizeFloat/28       5462 ns         5446 ns       128477
160 BM_VectorTestMulConstArraySizeFloat/29       7431 ns         7399 ns        94492
161 BM_VectorTestMulConstArraySizeFloat/30       6877 ns         6854 ns       101706
162 BM_VectorTestMulConstArraySizeFloat/31       8322 ns         8304 ns        83352
163 BM_VectorTestMulConstArraySizeFloat/32       6223 ns         6208 ns       114265
164 BM_VectorTestMulForcedIntrinsics/1            160 ns          160 ns      4365646
165 BM_VectorTestMulForcedIntrinsics/2            848 ns          845 ns       807945
166 BM_VectorTestMulForcedIntrinsics/3           1435 ns         1430 ns       489448
167 BM_VectorTestMulForcedIntrinsics/4            937 ns          934 ns       757416
168 BM_VectorTestMulForcedIntrinsics/5           1477 ns         1473 ns       474891
169 BM_VectorTestMulForcedIntrinsics/6           1825 ns         1820 ns       385118
170 BM_VectorTestMulForcedIntrinsics/7           2303 ns         2298 ns       303823
171 BM_VectorTestMulForcedIntrinsics/8           1643 ns         1638 ns       430851
172 BM_VectorTestMulForcedIntrinsics/9           2490 ns         2482 ns       281294
173 BM_VectorTestMulForcedIntrinsics/10          2429 ns         2423 ns       291028
174 BM_VectorTestMulForcedIntrinsics/11          3201 ns         3193 ns       219256
175 BM_VectorTestMulForcedIntrinsics/12          2341 ns         2335 ns       302086
176 BM_VectorTestMulForcedIntrinsics/13          3475 ns         3466 ns       201570
177 BM_VectorTestMulForcedIntrinsics/14          3294 ns         3286 ns       212762
178 BM_VectorTestMulForcedIntrinsics/15          4141 ns         4129 ns       169275
179 BM_VectorTestMulForcedIntrinsics/16          3123 ns         3116 ns       225516
180 BM_VectorTestMulForcedIntrinsics/17          4447 ns         4436 ns       157620
181 BM_VectorTestMulForcedIntrinsics/18          4175 ns         4163 ns       168170
182 BM_VectorTestMulForcedIntrinsics/19          5164 ns         5147 ns       134830
183 BM_VectorTestMulForcedIntrinsics/20          3927 ns         3917 ns       179070
184 BM_VectorTestMulForcedIntrinsics/21          5481 ns         5449 ns       126196
185 BM_VectorTestMulForcedIntrinsics/22          5124 ns         5109 ns       138492
186 BM_VectorTestMulForcedIntrinsics/23          6142 ns         6125 ns       113071
187 BM_VectorTestMulForcedIntrinsics/24          4690 ns         4675 ns       150096
188 BM_VectorTestMulForcedIntrinsics/25          6423 ns         6398 ns       108462
189 BM_VectorTestMulForcedIntrinsics/26          6047 ns         6029 ns       117408
190 BM_VectorTestMulForcedIntrinsics/27          7150 ns         7128 ns        97901
191 BM_VectorTestMulForcedIntrinsics/28          5483 ns         5467 ns       129504
192 BM_VectorTestMulForcedIntrinsics/29          7416 ns         7390 ns        94167
193 BM_VectorTestMulForcedIntrinsics/30          6960 ns         6934 ns       102061
194 BM_VectorTestMulForcedIntrinsics/31          8073 ns         8043 ns        87555
195 BM_VectorTestMulForcedIntrinsics/32          6255 ns         6235 ns       113705
196 BM_VectorTestAddConstArraySizeFloat/1         161 ns          161 ns      4339090
197 BM_VectorTestAddConstArraySizeFloat/2         718 ns          716 ns       958914
198 BM_VectorTestAddConstArraySizeFloat/4        1500 ns         1496 ns       468059
199 BM_VectorTestAddConstArraySizeFloat/7        2334 ns         2326 ns       301694
200 BM_VectorTestAddConstArraySizeFloat/8        1655 ns         1651 ns       428569
201 BM_VectorTestAddConstArraySizeFloat/15       4224 ns         4214 ns       166108
202 BM_VectorTestAddConstArraySizeFloat/16       3229 ns         3219 ns       217681
203 BM_VectorTestAddForcedIntrinsics/1            164 ns          163 ns      4286279
204 BM_VectorTestAddForcedIntrinsics/2            858 ns          854 ns       795537
205 BM_VectorTestAddForcedIntrinsics/4            927 ns          924 ns       761731
206 BM_VectorTestAddForcedIntrinsics/7           2333 ns         2325 ns       301963
207 BM_VectorTestAddForcedIntrinsics/8           1658 ns         1654 ns       425574
208 BM_VectorTestAddForcedIntrinsics/15          4096 ns         4087 ns       171278
209 BM_VectorTestAddForcedIntrinsics/16          3245 ns         3236 ns       217538
210 
211 */
212 
213 using namespace android::audio_utils::intrinsics;
214 
215 static constexpr size_t kDataSize = 2048;
216 
217 // exhaustively go from 1-32 channels.
TestFullArgs(benchmark::internal::Benchmark * b)218 static void TestFullArgs(benchmark::internal::Benchmark* b) {
219     constexpr int kChannelCountMin = 1;
220     constexpr int kChannelCountMax = 32;
221     for (int i = kChannelCountMin; i <= kChannelCountMax; ++i) {
222         b->Args({i});
223     }
224 }
225 
226 // selective channels to test.
TestArgs(benchmark::internal::Benchmark * b)227 static void TestArgs(benchmark::internal::Benchmark* b) {
228     for (int i : { 1, 2, 4, 7, 8, 15, 16 }) {
229         b->Args({i});
230     }
231 }
232 
233 // Macro test operator
234 
235 #define OPERATOR(N) \
236     *reinterpret_cast<V<F, N>*>(out) = Traits::func_( \
237     *reinterpret_cast<const V<F, N>*>(in1), \
238     *reinterpret_cast<const V<F, N>*>(in2)); \
239     out += N; \
240     in1 += N; \
241     in2 += N;
242 
243 // Macro to instantiate switch case statements.
244 
245 #define INSTANTIATE(N) case N: mFunc = TestFunc<N>;  break;
246 
247 template <typename Traits>
248 class Processor {
249 public:
250     // shorthand aliases
251     using F = typename Traits::data_t;
252     template <typename T, int N>
253     using V = typename Traits::template container_t<T, N>;
254     template <size_t N>
TestFunc(F * out,const F * in1,const F * in2,size_t count)255     static void TestFunc(F* out, const F* in1, const F* in2, size_t count) {
256         static_assert(sizeof(V<F, N>) == N * sizeof(F));
257         for (size_t i = 0; i < count; ++i) {
258             OPERATOR(N);
259         }
260     }
261 
Processor(int channelCount)262     Processor(int channelCount)
263         : mChannelCount(channelCount) {
264 
265         if constexpr (Traits::loop_) {
266             mFunc = [channelCount](F* out, const F* in1, const F* in2, size_t count) {
267                 for (size_t i = 0; i < count; ++i) {
268                     for (size_t j = 0; j < channelCount; ++j) {
269                         OPERATOR(1);
270                     }
271                 }
272             };
273             return;
274         }
275         switch (channelCount) {
276         INSTANTIATE(1);
277         INSTANTIATE(2);
278         INSTANTIATE(3);
279         INSTANTIATE(4);
280         INSTANTIATE(5);
281         INSTANTIATE(6);
282         INSTANTIATE(7);
283         INSTANTIATE(8);
284         INSTANTIATE(9);
285         INSTANTIATE(10);
286         INSTANTIATE(11);
287         INSTANTIATE(12);
288         INSTANTIATE(13);
289         INSTANTIATE(14);
290         INSTANTIATE(15);
291         INSTANTIATE(16);
292         INSTANTIATE(17);
293         INSTANTIATE(18);
294         INSTANTIATE(19);
295         INSTANTIATE(20);
296         INSTANTIATE(21);
297         INSTANTIATE(22);
298         INSTANTIATE(23);
299         INSTANTIATE(24);
300         INSTANTIATE(25);
301         INSTANTIATE(26);
302         INSTANTIATE(27);
303         INSTANTIATE(28);
304         INSTANTIATE(29);
305         INSTANTIATE(30);
306         INSTANTIATE(31);
307         INSTANTIATE(32);
308         }
309     }
310 
process(F * out,const F * in1,const F * in2,size_t frames)311     void process(F* out, const F* in1, const F* in2, size_t frames) {
312         mFunc(out, in1, in2, frames);
313     }
314 
315     const size_t mChannelCount;
316     /* const */ std::function<void(F*, const F*, const F*, size_t)> mFunc;
317 };
318 
319 template <typename Traits>
BM_VectorTest(benchmark::State & state)320 static void BM_VectorTest(benchmark::State& state) {
321     using F = typename Traits::data_t;
322     const size_t channelCount = state.range(0);
323 
324     std::vector<F> input1(kDataSize * channelCount);
325     std::vector<F> input2(kDataSize * channelCount);
326     std::vector<F> output(kDataSize * channelCount);
327 
328     // Initialize input buffer and coefs with deterministic pseudo-random values
329     std::minstd_rand gen(42);
330     const F amplitude = 1.;
331     std::uniform_real_distribution<> dis(-amplitude, amplitude);
332     for (auto& in : input1) {
333         in = dis(gen);
334     }
335     for (auto& in : input2) {
336         in = dis(gen);
337     }
338 
339     Processor<Traits> processor(channelCount);
340 
341     // Run the test
342     while (state.KeepRunning()) {
343         benchmark::DoNotOptimize(input1.data());
344         benchmark::DoNotOptimize(input2.data());
345         benchmark::DoNotOptimize(output.data());
346         processor.process(output.data(), input1.data(), input2.data(), kDataSize);
347         benchmark::ClobberMemory();
348     }
349     state.SetComplexityN(channelCount);
350 }
351 
352 // Clang has an issue with -frelaxed-template-template-args where
353 // it may not follow the C++17 guidelines.  Use a traits struct to
354 // pass in parameters.
355 
356 // Test using two loops.
357 struct LoopFloatTraits {
358     template <typename F, int N>
359     using container_t = internal_array_t<F, N>;
360     using data_t = float;
361     static constexpr bool loop_ = true;
362 };
363 
364 // Test using two loops, the inner loop is constexpr size.
365 struct ConstArraySizeFloatTraits {
366     template <typename F, int N>
367     using container_t = internal_array_t<F, N>;
368     using data_t = float;
369     static constexpr bool loop_ = false;
370 };
371 
372 // Test using intrinsics, if available.
373 struct ForcedIntrinsicsTraits {
374     template <typename F, int N>
375     using container_t = vector_hw_t<F, N>;
376     using data_t = float;
377     static constexpr bool loop_ = false;
378 };
379 
380 // --- MULTIPLY
381 
382 struct MulFunc {
383     template <typename T>
func_MulFunc384     static T func_(T a, T b) { return vmul(a, b); }
385 };
386 
387 struct MulLoopFloatTraits : public LoopFloatTraits, public MulFunc {};
388 
BM_VectorTestMulLoopFloat(benchmark::State & state)389 static void BM_VectorTestMulLoopFloat(benchmark::State& state) {
390     BM_VectorTest<MulLoopFloatTraits>(state);
391 }
392 
393 struct MulConstArraySizeFloatTraits : public ConstArraySizeFloatTraits, public MulFunc {};
394 
BM_VectorTestMulConstArraySizeFloat(benchmark::State & state)395 static void BM_VectorTestMulConstArraySizeFloat(benchmark::State& state) {
396     BM_VectorTest<MulConstArraySizeFloatTraits>(state);
397 }
398 
399 struct MulForcedIntrinsicsTraits : public ForcedIntrinsicsTraits, public MulFunc {};
400 
BM_VectorTestMulForcedIntrinsics(benchmark::State & state)401 static void BM_VectorTestMulForcedIntrinsics(benchmark::State& state) {
402     BM_VectorTest<MulForcedIntrinsicsTraits>(state);
403 }
404 
405 BENCHMARK(BM_VectorTestMulLoopFloat)->Apply(TestArgs);
406 
407 BENCHMARK(BM_VectorTestMulConstArraySizeFloat)->Apply(TestFullArgs);
408 
409 BENCHMARK(BM_VectorTestMulForcedIntrinsics)->Apply(TestFullArgs);
410 
411 // --- ADD
412 
413 struct AddFunc {
414     template <typename T>
func_AddFunc415     static T func_(T a, T b) { return vadd(a, b); }
416 };
417 
418 struct AddConstArraySizeFloatTraits : public ConstArraySizeFloatTraits, public AddFunc {};
419 
BM_VectorTestAddConstArraySizeFloat(benchmark::State & state)420 static void BM_VectorTestAddConstArraySizeFloat(benchmark::State& state) {
421     BM_VectorTest<AddConstArraySizeFloatTraits>(state);
422 }
423 
424 struct AddForcedIntrinsicsTraits : public ForcedIntrinsicsTraits, public AddFunc {};
425 
BM_VectorTestAddForcedIntrinsics(benchmark::State & state)426 static void BM_VectorTestAddForcedIntrinsics(benchmark::State& state) {
427     BM_VectorTest<AddForcedIntrinsicsTraits>(state);
428 }
429 
430 BENCHMARK(BM_VectorTestAddConstArraySizeFloat)->Apply(TestArgs);
431 
432 BENCHMARK(BM_VectorTestAddForcedIntrinsics)->Apply(TestArgs);
433 
434 BENCHMARK_MAIN();
435