1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/f32-velu.yaml
8 // Generator: tools/generate-vunary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/vunary.h>
17 #include "vunary-microkernel-tester.h"
18
19
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_eq_4)21 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 VUnaryMicrokernelTester()
24 .batch_size(4)
25 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
26 }
27
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_div_4)28 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_div_4) {
29 TEST_REQUIRES_ARM_NEON;
30 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
31 VUnaryMicrokernelTester()
32 .batch_size(batch_size)
33 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
34 }
35 }
36
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_lt_4)37 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_lt_4) {
38 TEST_REQUIRES_ARM_NEON;
39 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
40 VUnaryMicrokernelTester()
41 .batch_size(batch_size)
42 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
43 }
44 }
45
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_gt_4)46 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_gt_4) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
49 VUnaryMicrokernelTester()
50 .batch_size(batch_size)
51 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
52 }
53 }
54
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,inplace)55 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, inplace) {
56 TEST_REQUIRES_ARM_NEON;
57 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
58 VUnaryMicrokernelTester()
59 .batch_size(batch_size)
60 .inplace(true)
61 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
62 }
63 }
64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,prescale)65 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, prescale) {
66 TEST_REQUIRES_ARM_NEON;
67 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
68 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
69 VUnaryMicrokernelTester()
70 .batch_size(batch_size)
71 .prescale(prescale)
72 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
73 }
74 }
75 }
76
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,alpha)77 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, alpha) {
78 TEST_REQUIRES_ARM_NEON;
79 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
80 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
81 VUnaryMicrokernelTester()
82 .batch_size(batch_size)
83 .alpha(alpha)
84 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
85 }
86 }
87 }
88
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,beta)89 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, beta) {
90 TEST_REQUIRES_ARM_NEON;
91 for (float beta : std::vector<float>({0.3f, 3.0f})) {
92 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
93 VUnaryMicrokernelTester()
94 .batch_size(batch_size)
95 .beta(beta)
96 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
97 }
98 }
99 }
100 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
101
102
103 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_eq_8)104 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_eq_8) {
105 TEST_REQUIRES_ARM_NEON;
106 VUnaryMicrokernelTester()
107 .batch_size(8)
108 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
109 }
110
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_div_8)111 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_div_8) {
112 TEST_REQUIRES_ARM_NEON;
113 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
114 VUnaryMicrokernelTester()
115 .batch_size(batch_size)
116 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
117 }
118 }
119
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_lt_8)120 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_lt_8) {
121 TEST_REQUIRES_ARM_NEON;
122 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
123 VUnaryMicrokernelTester()
124 .batch_size(batch_size)
125 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
126 }
127 }
128
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_gt_8)129 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_gt_8) {
130 TEST_REQUIRES_ARM_NEON;
131 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
132 VUnaryMicrokernelTester()
133 .batch_size(batch_size)
134 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
135 }
136 }
137
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,inplace)138 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, inplace) {
139 TEST_REQUIRES_ARM_NEON;
140 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
141 VUnaryMicrokernelTester()
142 .batch_size(batch_size)
143 .inplace(true)
144 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
145 }
146 }
147
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,prescale)148 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, prescale) {
149 TEST_REQUIRES_ARM_NEON;
150 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
151 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
152 VUnaryMicrokernelTester()
153 .batch_size(batch_size)
154 .prescale(prescale)
155 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
156 }
157 }
158 }
159
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,alpha)160 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, alpha) {
161 TEST_REQUIRES_ARM_NEON;
162 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
163 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
164 VUnaryMicrokernelTester()
165 .batch_size(batch_size)
166 .alpha(alpha)
167 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
168 }
169 }
170 }
171
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,beta)172 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, beta) {
173 TEST_REQUIRES_ARM_NEON;
174 for (float beta : std::vector<float>({0.3f, 3.0f})) {
175 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
176 VUnaryMicrokernelTester()
177 .batch_size(batch_size)
178 .beta(beta)
179 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
180 }
181 }
182 }
183 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
184
185
186 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_eq_12)187 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_eq_12) {
188 TEST_REQUIRES_ARM_NEON;
189 VUnaryMicrokernelTester()
190 .batch_size(12)
191 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
192 }
193
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_div_12)194 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_div_12) {
195 TEST_REQUIRES_ARM_NEON;
196 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
197 VUnaryMicrokernelTester()
198 .batch_size(batch_size)
199 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
200 }
201 }
202
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_lt_12)203 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_lt_12) {
204 TEST_REQUIRES_ARM_NEON;
205 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
206 VUnaryMicrokernelTester()
207 .batch_size(batch_size)
208 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
209 }
210 }
211
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_gt_12)212 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_gt_12) {
213 TEST_REQUIRES_ARM_NEON;
214 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
215 VUnaryMicrokernelTester()
216 .batch_size(batch_size)
217 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
218 }
219 }
220
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,inplace)221 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, inplace) {
222 TEST_REQUIRES_ARM_NEON;
223 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
224 VUnaryMicrokernelTester()
225 .batch_size(batch_size)
226 .inplace(true)
227 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
228 }
229 }
230
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,prescale)231 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, prescale) {
232 TEST_REQUIRES_ARM_NEON;
233 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
234 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
235 VUnaryMicrokernelTester()
236 .batch_size(batch_size)
237 .prescale(prescale)
238 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
239 }
240 }
241 }
242
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,alpha)243 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, alpha) {
244 TEST_REQUIRES_ARM_NEON;
245 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
246 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
247 VUnaryMicrokernelTester()
248 .batch_size(batch_size)
249 .alpha(alpha)
250 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
251 }
252 }
253 }
254
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,beta)255 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, beta) {
256 TEST_REQUIRES_ARM_NEON;
257 for (float beta : std::vector<float>({0.3f, 3.0f})) {
258 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
259 VUnaryMicrokernelTester()
260 .batch_size(batch_size)
261 .beta(beta)
262 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
263 }
264 }
265 }
266 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
267
268
269 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_eq_16)270 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_eq_16) {
271 TEST_REQUIRES_ARM_NEON;
272 VUnaryMicrokernelTester()
273 .batch_size(16)
274 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
275 }
276
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_div_16)277 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_div_16) {
278 TEST_REQUIRES_ARM_NEON;
279 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
280 VUnaryMicrokernelTester()
281 .batch_size(batch_size)
282 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
283 }
284 }
285
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_lt_16)286 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_lt_16) {
287 TEST_REQUIRES_ARM_NEON;
288 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
289 VUnaryMicrokernelTester()
290 .batch_size(batch_size)
291 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
292 }
293 }
294
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_gt_16)295 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_gt_16) {
296 TEST_REQUIRES_ARM_NEON;
297 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
298 VUnaryMicrokernelTester()
299 .batch_size(batch_size)
300 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
301 }
302 }
303
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,inplace)304 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, inplace) {
305 TEST_REQUIRES_ARM_NEON;
306 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
307 VUnaryMicrokernelTester()
308 .batch_size(batch_size)
309 .inplace(true)
310 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
311 }
312 }
313
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,prescale)314 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, prescale) {
315 TEST_REQUIRES_ARM_NEON;
316 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
317 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
318 VUnaryMicrokernelTester()
319 .batch_size(batch_size)
320 .prescale(prescale)
321 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
322 }
323 }
324 }
325
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,alpha)326 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, alpha) {
327 TEST_REQUIRES_ARM_NEON;
328 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
329 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
330 VUnaryMicrokernelTester()
331 .batch_size(batch_size)
332 .alpha(alpha)
333 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
334 }
335 }
336 }
337
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,beta)338 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, beta) {
339 TEST_REQUIRES_ARM_NEON;
340 for (float beta : std::vector<float>({0.3f, 3.0f})) {
341 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
342 VUnaryMicrokernelTester()
343 .batch_size(batch_size)
344 .beta(beta)
345 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
346 }
347 }
348 }
349 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
350
351
352 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_eq_20)353 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_eq_20) {
354 TEST_REQUIRES_ARM_NEON;
355 VUnaryMicrokernelTester()
356 .batch_size(20)
357 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
358 }
359
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_div_20)360 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_div_20) {
361 TEST_REQUIRES_ARM_NEON;
362 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
363 VUnaryMicrokernelTester()
364 .batch_size(batch_size)
365 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
366 }
367 }
368
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_lt_20)369 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_lt_20) {
370 TEST_REQUIRES_ARM_NEON;
371 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
372 VUnaryMicrokernelTester()
373 .batch_size(batch_size)
374 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
375 }
376 }
377
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_gt_20)378 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_gt_20) {
379 TEST_REQUIRES_ARM_NEON;
380 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
381 VUnaryMicrokernelTester()
382 .batch_size(batch_size)
383 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
384 }
385 }
386
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,inplace)387 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, inplace) {
388 TEST_REQUIRES_ARM_NEON;
389 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
390 VUnaryMicrokernelTester()
391 .batch_size(batch_size)
392 .inplace(true)
393 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
394 }
395 }
396
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,prescale)397 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, prescale) {
398 TEST_REQUIRES_ARM_NEON;
399 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
400 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
401 VUnaryMicrokernelTester()
402 .batch_size(batch_size)
403 .prescale(prescale)
404 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
405 }
406 }
407 }
408
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,alpha)409 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, alpha) {
410 TEST_REQUIRES_ARM_NEON;
411 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
412 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
413 VUnaryMicrokernelTester()
414 .batch_size(batch_size)
415 .alpha(alpha)
416 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
417 }
418 }
419 }
420
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,beta)421 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, beta) {
422 TEST_REQUIRES_ARM_NEON;
423 for (float beta : std::vector<float>({0.3f, 3.0f})) {
424 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
425 VUnaryMicrokernelTester()
426 .batch_size(batch_size)
427 .beta(beta)
428 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
429 }
430 }
431 }
432 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
433
434
435 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_eq_24)436 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_eq_24) {
437 TEST_REQUIRES_ARM_NEON;
438 VUnaryMicrokernelTester()
439 .batch_size(24)
440 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
441 }
442
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_div_24)443 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_div_24) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
446 VUnaryMicrokernelTester()
447 .batch_size(batch_size)
448 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
449 }
450 }
451
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_lt_24)452 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_lt_24) {
453 TEST_REQUIRES_ARM_NEON;
454 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
455 VUnaryMicrokernelTester()
456 .batch_size(batch_size)
457 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
458 }
459 }
460
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_gt_24)461 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_gt_24) {
462 TEST_REQUIRES_ARM_NEON;
463 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
464 VUnaryMicrokernelTester()
465 .batch_size(batch_size)
466 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
467 }
468 }
469
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,inplace)470 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, inplace) {
471 TEST_REQUIRES_ARM_NEON;
472 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
473 VUnaryMicrokernelTester()
474 .batch_size(batch_size)
475 .inplace(true)
476 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
477 }
478 }
479
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,prescale)480 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, prescale) {
481 TEST_REQUIRES_ARM_NEON;
482 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
483 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
484 VUnaryMicrokernelTester()
485 .batch_size(batch_size)
486 .prescale(prescale)
487 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
488 }
489 }
490 }
491
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,alpha)492 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, alpha) {
493 TEST_REQUIRES_ARM_NEON;
494 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
495 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
496 VUnaryMicrokernelTester()
497 .batch_size(batch_size)
498 .alpha(alpha)
499 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
500 }
501 }
502 }
503
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,beta)504 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, beta) {
505 TEST_REQUIRES_ARM_NEON;
506 for (float beta : std::vector<float>({0.3f, 3.0f})) {
507 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
508 VUnaryMicrokernelTester()
509 .batch_size(batch_size)
510 .beta(beta)
511 .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
512 }
513 }
514 }
515 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
516
517
518 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X4,batch_eq_4)519 TEST(F32_VELU__NEON_RR2_P6_X4, batch_eq_4) {
520 TEST_REQUIRES_ARM_NEON;
521 VUnaryMicrokernelTester()
522 .batch_size(4)
523 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
524 }
525
TEST(F32_VELU__NEON_RR2_P6_X4,batch_div_4)526 TEST(F32_VELU__NEON_RR2_P6_X4, batch_div_4) {
527 TEST_REQUIRES_ARM_NEON;
528 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
529 VUnaryMicrokernelTester()
530 .batch_size(batch_size)
531 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
532 }
533 }
534
TEST(F32_VELU__NEON_RR2_P6_X4,batch_lt_4)535 TEST(F32_VELU__NEON_RR2_P6_X4, batch_lt_4) {
536 TEST_REQUIRES_ARM_NEON;
537 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
538 VUnaryMicrokernelTester()
539 .batch_size(batch_size)
540 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
541 }
542 }
543
TEST(F32_VELU__NEON_RR2_P6_X4,batch_gt_4)544 TEST(F32_VELU__NEON_RR2_P6_X4, batch_gt_4) {
545 TEST_REQUIRES_ARM_NEON;
546 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
547 VUnaryMicrokernelTester()
548 .batch_size(batch_size)
549 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
550 }
551 }
552
TEST(F32_VELU__NEON_RR2_P6_X4,inplace)553 TEST(F32_VELU__NEON_RR2_P6_X4, inplace) {
554 TEST_REQUIRES_ARM_NEON;
555 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
556 VUnaryMicrokernelTester()
557 .batch_size(batch_size)
558 .inplace(true)
559 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
560 }
561 }
562
TEST(F32_VELU__NEON_RR2_P6_X4,prescale)563 TEST(F32_VELU__NEON_RR2_P6_X4, prescale) {
564 TEST_REQUIRES_ARM_NEON;
565 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
566 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
567 VUnaryMicrokernelTester()
568 .batch_size(batch_size)
569 .prescale(prescale)
570 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
571 }
572 }
573 }
574
TEST(F32_VELU__NEON_RR2_P6_X4,alpha)575 TEST(F32_VELU__NEON_RR2_P6_X4, alpha) {
576 TEST_REQUIRES_ARM_NEON;
577 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
578 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
579 VUnaryMicrokernelTester()
580 .batch_size(batch_size)
581 .alpha(alpha)
582 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
583 }
584 }
585 }
586
TEST(F32_VELU__NEON_RR2_P6_X4,beta)587 TEST(F32_VELU__NEON_RR2_P6_X4, beta) {
588 TEST_REQUIRES_ARM_NEON;
589 for (float beta : std::vector<float>({0.3f, 3.0f})) {
590 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
591 VUnaryMicrokernelTester()
592 .batch_size(batch_size)
593 .beta(beta)
594 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
595 }
596 }
597 }
598 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
599
600
601 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X8,batch_eq_8)602 TEST(F32_VELU__NEON_RR2_P6_X8, batch_eq_8) {
603 TEST_REQUIRES_ARM_NEON;
604 VUnaryMicrokernelTester()
605 .batch_size(8)
606 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
607 }
608
TEST(F32_VELU__NEON_RR2_P6_X8,batch_div_8)609 TEST(F32_VELU__NEON_RR2_P6_X8, batch_div_8) {
610 TEST_REQUIRES_ARM_NEON;
611 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
612 VUnaryMicrokernelTester()
613 .batch_size(batch_size)
614 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
615 }
616 }
617
TEST(F32_VELU__NEON_RR2_P6_X8,batch_lt_8)618 TEST(F32_VELU__NEON_RR2_P6_X8, batch_lt_8) {
619 TEST_REQUIRES_ARM_NEON;
620 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
621 VUnaryMicrokernelTester()
622 .batch_size(batch_size)
623 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
624 }
625 }
626
TEST(F32_VELU__NEON_RR2_P6_X8,batch_gt_8)627 TEST(F32_VELU__NEON_RR2_P6_X8, batch_gt_8) {
628 TEST_REQUIRES_ARM_NEON;
629 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
630 VUnaryMicrokernelTester()
631 .batch_size(batch_size)
632 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
633 }
634 }
635
TEST(F32_VELU__NEON_RR2_P6_X8,inplace)636 TEST(F32_VELU__NEON_RR2_P6_X8, inplace) {
637 TEST_REQUIRES_ARM_NEON;
638 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
639 VUnaryMicrokernelTester()
640 .batch_size(batch_size)
641 .inplace(true)
642 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
643 }
644 }
645
TEST(F32_VELU__NEON_RR2_P6_X8,prescale)646 TEST(F32_VELU__NEON_RR2_P6_X8, prescale) {
647 TEST_REQUIRES_ARM_NEON;
648 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
649 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
650 VUnaryMicrokernelTester()
651 .batch_size(batch_size)
652 .prescale(prescale)
653 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
654 }
655 }
656 }
657
TEST(F32_VELU__NEON_RR2_P6_X8,alpha)658 TEST(F32_VELU__NEON_RR2_P6_X8, alpha) {
659 TEST_REQUIRES_ARM_NEON;
660 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
661 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
662 VUnaryMicrokernelTester()
663 .batch_size(batch_size)
664 .alpha(alpha)
665 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
666 }
667 }
668 }
669
TEST(F32_VELU__NEON_RR2_P6_X8,beta)670 TEST(F32_VELU__NEON_RR2_P6_X8, beta) {
671 TEST_REQUIRES_ARM_NEON;
672 for (float beta : std::vector<float>({0.3f, 3.0f})) {
673 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
674 VUnaryMicrokernelTester()
675 .batch_size(batch_size)
676 .beta(beta)
677 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
678 }
679 }
680 }
681 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
682
683
684 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X12,batch_eq_12)685 TEST(F32_VELU__NEON_RR2_P6_X12, batch_eq_12) {
686 TEST_REQUIRES_ARM_NEON;
687 VUnaryMicrokernelTester()
688 .batch_size(12)
689 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
690 }
691
TEST(F32_VELU__NEON_RR2_P6_X12,batch_div_12)692 TEST(F32_VELU__NEON_RR2_P6_X12, batch_div_12) {
693 TEST_REQUIRES_ARM_NEON;
694 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
695 VUnaryMicrokernelTester()
696 .batch_size(batch_size)
697 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
698 }
699 }
700
TEST(F32_VELU__NEON_RR2_P6_X12,batch_lt_12)701 TEST(F32_VELU__NEON_RR2_P6_X12, batch_lt_12) {
702 TEST_REQUIRES_ARM_NEON;
703 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
704 VUnaryMicrokernelTester()
705 .batch_size(batch_size)
706 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
707 }
708 }
709
TEST(F32_VELU__NEON_RR2_P6_X12,batch_gt_12)710 TEST(F32_VELU__NEON_RR2_P6_X12, batch_gt_12) {
711 TEST_REQUIRES_ARM_NEON;
712 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
713 VUnaryMicrokernelTester()
714 .batch_size(batch_size)
715 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
716 }
717 }
718
TEST(F32_VELU__NEON_RR2_P6_X12,inplace)719 TEST(F32_VELU__NEON_RR2_P6_X12, inplace) {
720 TEST_REQUIRES_ARM_NEON;
721 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
722 VUnaryMicrokernelTester()
723 .batch_size(batch_size)
724 .inplace(true)
725 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
726 }
727 }
728
TEST(F32_VELU__NEON_RR2_P6_X12,prescale)729 TEST(F32_VELU__NEON_RR2_P6_X12, prescale) {
730 TEST_REQUIRES_ARM_NEON;
731 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
732 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
733 VUnaryMicrokernelTester()
734 .batch_size(batch_size)
735 .prescale(prescale)
736 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
737 }
738 }
739 }
740
TEST(F32_VELU__NEON_RR2_P6_X12,alpha)741 TEST(F32_VELU__NEON_RR2_P6_X12, alpha) {
742 TEST_REQUIRES_ARM_NEON;
743 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
744 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
745 VUnaryMicrokernelTester()
746 .batch_size(batch_size)
747 .alpha(alpha)
748 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
749 }
750 }
751 }
752
TEST(F32_VELU__NEON_RR2_P6_X12,beta)753 TEST(F32_VELU__NEON_RR2_P6_X12, beta) {
754 TEST_REQUIRES_ARM_NEON;
755 for (float beta : std::vector<float>({0.3f, 3.0f})) {
756 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
757 VUnaryMicrokernelTester()
758 .batch_size(batch_size)
759 .beta(beta)
760 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
761 }
762 }
763 }
764 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
765
766
767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X16,batch_eq_16)768 TEST(F32_VELU__NEON_RR2_P6_X16, batch_eq_16) {
769 TEST_REQUIRES_ARM_NEON;
770 VUnaryMicrokernelTester()
771 .batch_size(16)
772 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
773 }
774
TEST(F32_VELU__NEON_RR2_P6_X16,batch_div_16)775 TEST(F32_VELU__NEON_RR2_P6_X16, batch_div_16) {
776 TEST_REQUIRES_ARM_NEON;
777 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
778 VUnaryMicrokernelTester()
779 .batch_size(batch_size)
780 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
781 }
782 }
783
TEST(F32_VELU__NEON_RR2_P6_X16,batch_lt_16)784 TEST(F32_VELU__NEON_RR2_P6_X16, batch_lt_16) {
785 TEST_REQUIRES_ARM_NEON;
786 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
787 VUnaryMicrokernelTester()
788 .batch_size(batch_size)
789 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
790 }
791 }
792
TEST(F32_VELU__NEON_RR2_P6_X16,batch_gt_16)793 TEST(F32_VELU__NEON_RR2_P6_X16, batch_gt_16) {
794 TEST_REQUIRES_ARM_NEON;
795 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
796 VUnaryMicrokernelTester()
797 .batch_size(batch_size)
798 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
799 }
800 }
801
TEST(F32_VELU__NEON_RR2_P6_X16,inplace)802 TEST(F32_VELU__NEON_RR2_P6_X16, inplace) {
803 TEST_REQUIRES_ARM_NEON;
804 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
805 VUnaryMicrokernelTester()
806 .batch_size(batch_size)
807 .inplace(true)
808 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
809 }
810 }
811
TEST(F32_VELU__NEON_RR2_P6_X16,prescale)812 TEST(F32_VELU__NEON_RR2_P6_X16, prescale) {
813 TEST_REQUIRES_ARM_NEON;
814 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
815 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
816 VUnaryMicrokernelTester()
817 .batch_size(batch_size)
818 .prescale(prescale)
819 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
820 }
821 }
822 }
823
TEST(F32_VELU__NEON_RR2_P6_X16,alpha)824 TEST(F32_VELU__NEON_RR2_P6_X16, alpha) {
825 TEST_REQUIRES_ARM_NEON;
826 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
827 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
828 VUnaryMicrokernelTester()
829 .batch_size(batch_size)
830 .alpha(alpha)
831 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
832 }
833 }
834 }
835
TEST(F32_VELU__NEON_RR2_P6_X16,beta)836 TEST(F32_VELU__NEON_RR2_P6_X16, beta) {
837 TEST_REQUIRES_ARM_NEON;
838 for (float beta : std::vector<float>({0.3f, 3.0f})) {
839 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
840 VUnaryMicrokernelTester()
841 .batch_size(batch_size)
842 .beta(beta)
843 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
844 }
845 }
846 }
847 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
848
849
850 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X20,batch_eq_20)851 TEST(F32_VELU__NEON_RR2_P6_X20, batch_eq_20) {
852 TEST_REQUIRES_ARM_NEON;
853 VUnaryMicrokernelTester()
854 .batch_size(20)
855 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
856 }
857
TEST(F32_VELU__NEON_RR2_P6_X20,batch_div_20)858 TEST(F32_VELU__NEON_RR2_P6_X20, batch_div_20) {
859 TEST_REQUIRES_ARM_NEON;
860 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
861 VUnaryMicrokernelTester()
862 .batch_size(batch_size)
863 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
864 }
865 }
866
TEST(F32_VELU__NEON_RR2_P6_X20,batch_lt_20)867 TEST(F32_VELU__NEON_RR2_P6_X20, batch_lt_20) {
868 TEST_REQUIRES_ARM_NEON;
869 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
870 VUnaryMicrokernelTester()
871 .batch_size(batch_size)
872 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
873 }
874 }
875
TEST(F32_VELU__NEON_RR2_P6_X20,batch_gt_20)876 TEST(F32_VELU__NEON_RR2_P6_X20, batch_gt_20) {
877 TEST_REQUIRES_ARM_NEON;
878 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
879 VUnaryMicrokernelTester()
880 .batch_size(batch_size)
881 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
882 }
883 }
884
TEST(F32_VELU__NEON_RR2_P6_X20,inplace)885 TEST(F32_VELU__NEON_RR2_P6_X20, inplace) {
886 TEST_REQUIRES_ARM_NEON;
887 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
888 VUnaryMicrokernelTester()
889 .batch_size(batch_size)
890 .inplace(true)
891 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
892 }
893 }
894
TEST(F32_VELU__NEON_RR2_P6_X20,prescale)895 TEST(F32_VELU__NEON_RR2_P6_X20, prescale) {
896 TEST_REQUIRES_ARM_NEON;
897 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
898 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
899 VUnaryMicrokernelTester()
900 .batch_size(batch_size)
901 .prescale(prescale)
902 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
903 }
904 }
905 }
906
TEST(F32_VELU__NEON_RR2_P6_X20,alpha)907 TEST(F32_VELU__NEON_RR2_P6_X20, alpha) {
908 TEST_REQUIRES_ARM_NEON;
909 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
910 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
911 VUnaryMicrokernelTester()
912 .batch_size(batch_size)
913 .alpha(alpha)
914 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
915 }
916 }
917 }
918
TEST(F32_VELU__NEON_RR2_P6_X20,beta)919 TEST(F32_VELU__NEON_RR2_P6_X20, beta) {
920 TEST_REQUIRES_ARM_NEON;
921 for (float beta : std::vector<float>({0.3f, 3.0f})) {
922 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
923 VUnaryMicrokernelTester()
924 .batch_size(batch_size)
925 .beta(beta)
926 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
927 }
928 }
929 }
930 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
931
932
933 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X24,batch_eq_24)934 TEST(F32_VELU__NEON_RR2_P6_X24, batch_eq_24) {
935 TEST_REQUIRES_ARM_NEON;
936 VUnaryMicrokernelTester()
937 .batch_size(24)
938 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
939 }
940
TEST(F32_VELU__NEON_RR2_P6_X24,batch_div_24)941 TEST(F32_VELU__NEON_RR2_P6_X24, batch_div_24) {
942 TEST_REQUIRES_ARM_NEON;
943 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
944 VUnaryMicrokernelTester()
945 .batch_size(batch_size)
946 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
947 }
948 }
949
TEST(F32_VELU__NEON_RR2_P6_X24,batch_lt_24)950 TEST(F32_VELU__NEON_RR2_P6_X24, batch_lt_24) {
951 TEST_REQUIRES_ARM_NEON;
952 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
953 VUnaryMicrokernelTester()
954 .batch_size(batch_size)
955 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
956 }
957 }
958
TEST(F32_VELU__NEON_RR2_P6_X24,batch_gt_24)959 TEST(F32_VELU__NEON_RR2_P6_X24, batch_gt_24) {
960 TEST_REQUIRES_ARM_NEON;
961 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
962 VUnaryMicrokernelTester()
963 .batch_size(batch_size)
964 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
965 }
966 }
967
TEST(F32_VELU__NEON_RR2_P6_X24,inplace)968 TEST(F32_VELU__NEON_RR2_P6_X24, inplace) {
969 TEST_REQUIRES_ARM_NEON;
970 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
971 VUnaryMicrokernelTester()
972 .batch_size(batch_size)
973 .inplace(true)
974 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
975 }
976 }
977
TEST(F32_VELU__NEON_RR2_P6_X24,prescale)978 TEST(F32_VELU__NEON_RR2_P6_X24, prescale) {
979 TEST_REQUIRES_ARM_NEON;
980 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
981 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
982 VUnaryMicrokernelTester()
983 .batch_size(batch_size)
984 .prescale(prescale)
985 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
986 }
987 }
988 }
989
TEST(F32_VELU__NEON_RR2_P6_X24,alpha)990 TEST(F32_VELU__NEON_RR2_P6_X24, alpha) {
991 TEST_REQUIRES_ARM_NEON;
992 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
993 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
994 VUnaryMicrokernelTester()
995 .batch_size(batch_size)
996 .alpha(alpha)
997 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
998 }
999 }
1000 }
1001
TEST(F32_VELU__NEON_RR2_P6_X24,beta)1002 TEST(F32_VELU__NEON_RR2_P6_X24, beta) {
1003 TEST_REQUIRES_ARM_NEON;
1004 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1005 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1006 VUnaryMicrokernelTester()
1007 .batch_size(batch_size)
1008 .beta(beta)
1009 .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
1010 }
1011 }
1012 }
1013 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1014
1015
1016 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_eq_4)1017 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_eq_4) {
1018 TEST_REQUIRES_ARM_NEON_FMA;
1019 VUnaryMicrokernelTester()
1020 .batch_size(4)
1021 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1022 }
1023
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_div_4)1024 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_div_4) {
1025 TEST_REQUIRES_ARM_NEON_FMA;
1026 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1027 VUnaryMicrokernelTester()
1028 .batch_size(batch_size)
1029 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1030 }
1031 }
1032
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_lt_4)1033 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_lt_4) {
1034 TEST_REQUIRES_ARM_NEON_FMA;
1035 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1036 VUnaryMicrokernelTester()
1037 .batch_size(batch_size)
1038 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1039 }
1040 }
1041
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_gt_4)1042 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_gt_4) {
1043 TEST_REQUIRES_ARM_NEON_FMA;
1044 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1045 VUnaryMicrokernelTester()
1046 .batch_size(batch_size)
1047 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1048 }
1049 }
1050
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,inplace)1051 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, inplace) {
1052 TEST_REQUIRES_ARM_NEON_FMA;
1053 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1054 VUnaryMicrokernelTester()
1055 .batch_size(batch_size)
1056 .inplace(true)
1057 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1058 }
1059 }
1060
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,prescale)1061 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, prescale) {
1062 TEST_REQUIRES_ARM_NEON_FMA;
1063 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1064 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1065 VUnaryMicrokernelTester()
1066 .batch_size(batch_size)
1067 .prescale(prescale)
1068 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1069 }
1070 }
1071 }
1072
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,alpha)1073 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, alpha) {
1074 TEST_REQUIRES_ARM_NEON_FMA;
1075 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1076 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1077 VUnaryMicrokernelTester()
1078 .batch_size(batch_size)
1079 .alpha(alpha)
1080 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1081 }
1082 }
1083 }
1084
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,beta)1085 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, beta) {
1086 TEST_REQUIRES_ARM_NEON_FMA;
1087 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1088 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1089 VUnaryMicrokernelTester()
1090 .batch_size(batch_size)
1091 .beta(beta)
1092 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1093 }
1094 }
1095 }
1096 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1097
1098
1099 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_eq_8)1100 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_eq_8) {
1101 TEST_REQUIRES_ARM_NEON_FMA;
1102 VUnaryMicrokernelTester()
1103 .batch_size(8)
1104 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1105 }
1106
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_div_8)1107 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_div_8) {
1108 TEST_REQUIRES_ARM_NEON_FMA;
1109 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1110 VUnaryMicrokernelTester()
1111 .batch_size(batch_size)
1112 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1113 }
1114 }
1115
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_lt_8)1116 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_lt_8) {
1117 TEST_REQUIRES_ARM_NEON_FMA;
1118 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1119 VUnaryMicrokernelTester()
1120 .batch_size(batch_size)
1121 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1122 }
1123 }
1124
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_gt_8)1125 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_gt_8) {
1126 TEST_REQUIRES_ARM_NEON_FMA;
1127 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1128 VUnaryMicrokernelTester()
1129 .batch_size(batch_size)
1130 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1131 }
1132 }
1133
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,inplace)1134 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, inplace) {
1135 TEST_REQUIRES_ARM_NEON_FMA;
1136 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1137 VUnaryMicrokernelTester()
1138 .batch_size(batch_size)
1139 .inplace(true)
1140 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1141 }
1142 }
1143
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,prescale)1144 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, prescale) {
1145 TEST_REQUIRES_ARM_NEON_FMA;
1146 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1147 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1148 VUnaryMicrokernelTester()
1149 .batch_size(batch_size)
1150 .prescale(prescale)
1151 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1152 }
1153 }
1154 }
1155
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,alpha)1156 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, alpha) {
1157 TEST_REQUIRES_ARM_NEON_FMA;
1158 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1159 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1160 VUnaryMicrokernelTester()
1161 .batch_size(batch_size)
1162 .alpha(alpha)
1163 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1164 }
1165 }
1166 }
1167
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,beta)1168 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, beta) {
1169 TEST_REQUIRES_ARM_NEON_FMA;
1170 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1171 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1172 VUnaryMicrokernelTester()
1173 .batch_size(batch_size)
1174 .beta(beta)
1175 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1176 }
1177 }
1178 }
1179 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1180
1181
1182 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_eq_12)1183 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_eq_12) {
1184 TEST_REQUIRES_ARM_NEON_FMA;
1185 VUnaryMicrokernelTester()
1186 .batch_size(12)
1187 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1188 }
1189
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_div_12)1190 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_div_12) {
1191 TEST_REQUIRES_ARM_NEON_FMA;
1192 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1193 VUnaryMicrokernelTester()
1194 .batch_size(batch_size)
1195 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1196 }
1197 }
1198
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_lt_12)1199 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_lt_12) {
1200 TEST_REQUIRES_ARM_NEON_FMA;
1201 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1202 VUnaryMicrokernelTester()
1203 .batch_size(batch_size)
1204 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1205 }
1206 }
1207
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_gt_12)1208 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_gt_12) {
1209 TEST_REQUIRES_ARM_NEON_FMA;
1210 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1211 VUnaryMicrokernelTester()
1212 .batch_size(batch_size)
1213 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1214 }
1215 }
1216
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,inplace)1217 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, inplace) {
1218 TEST_REQUIRES_ARM_NEON_FMA;
1219 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1220 VUnaryMicrokernelTester()
1221 .batch_size(batch_size)
1222 .inplace(true)
1223 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1224 }
1225 }
1226
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,prescale)1227 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, prescale) {
1228 TEST_REQUIRES_ARM_NEON_FMA;
1229 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1230 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1231 VUnaryMicrokernelTester()
1232 .batch_size(batch_size)
1233 .prescale(prescale)
1234 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1235 }
1236 }
1237 }
1238
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,alpha)1239 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, alpha) {
1240 TEST_REQUIRES_ARM_NEON_FMA;
1241 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1242 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1243 VUnaryMicrokernelTester()
1244 .batch_size(batch_size)
1245 .alpha(alpha)
1246 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1247 }
1248 }
1249 }
1250
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,beta)1251 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, beta) {
1252 TEST_REQUIRES_ARM_NEON_FMA;
1253 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1254 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1255 VUnaryMicrokernelTester()
1256 .batch_size(batch_size)
1257 .beta(beta)
1258 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1259 }
1260 }
1261 }
1262 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1263
1264
1265 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_eq_16)1266 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_eq_16) {
1267 TEST_REQUIRES_ARM_NEON_FMA;
1268 VUnaryMicrokernelTester()
1269 .batch_size(16)
1270 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1271 }
1272
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_div_16)1273 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_div_16) {
1274 TEST_REQUIRES_ARM_NEON_FMA;
1275 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1276 VUnaryMicrokernelTester()
1277 .batch_size(batch_size)
1278 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1279 }
1280 }
1281
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_lt_16)1282 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_lt_16) {
1283 TEST_REQUIRES_ARM_NEON_FMA;
1284 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1285 VUnaryMicrokernelTester()
1286 .batch_size(batch_size)
1287 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1288 }
1289 }
1290
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_gt_16)1291 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_gt_16) {
1292 TEST_REQUIRES_ARM_NEON_FMA;
1293 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1294 VUnaryMicrokernelTester()
1295 .batch_size(batch_size)
1296 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1297 }
1298 }
1299
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,inplace)1300 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, inplace) {
1301 TEST_REQUIRES_ARM_NEON_FMA;
1302 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1303 VUnaryMicrokernelTester()
1304 .batch_size(batch_size)
1305 .inplace(true)
1306 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1307 }
1308 }
1309
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,prescale)1310 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, prescale) {
1311 TEST_REQUIRES_ARM_NEON_FMA;
1312 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1313 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1314 VUnaryMicrokernelTester()
1315 .batch_size(batch_size)
1316 .prescale(prescale)
1317 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1318 }
1319 }
1320 }
1321
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,alpha)1322 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, alpha) {
1323 TEST_REQUIRES_ARM_NEON_FMA;
1324 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1325 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1326 VUnaryMicrokernelTester()
1327 .batch_size(batch_size)
1328 .alpha(alpha)
1329 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1330 }
1331 }
1332 }
1333
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,beta)1334 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, beta) {
1335 TEST_REQUIRES_ARM_NEON_FMA;
1336 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1337 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1338 VUnaryMicrokernelTester()
1339 .batch_size(batch_size)
1340 .beta(beta)
1341 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1342 }
1343 }
1344 }
1345 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1346
1347
1348 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_eq_20)1349 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_eq_20) {
1350 TEST_REQUIRES_ARM_NEON_FMA;
1351 VUnaryMicrokernelTester()
1352 .batch_size(20)
1353 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1354 }
1355
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_div_20)1356 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_div_20) {
1357 TEST_REQUIRES_ARM_NEON_FMA;
1358 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1359 VUnaryMicrokernelTester()
1360 .batch_size(batch_size)
1361 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1362 }
1363 }
1364
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_lt_20)1365 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_lt_20) {
1366 TEST_REQUIRES_ARM_NEON_FMA;
1367 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1368 VUnaryMicrokernelTester()
1369 .batch_size(batch_size)
1370 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1371 }
1372 }
1373
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_gt_20)1374 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_gt_20) {
1375 TEST_REQUIRES_ARM_NEON_FMA;
1376 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1377 VUnaryMicrokernelTester()
1378 .batch_size(batch_size)
1379 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1380 }
1381 }
1382
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,inplace)1383 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, inplace) {
1384 TEST_REQUIRES_ARM_NEON_FMA;
1385 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1386 VUnaryMicrokernelTester()
1387 .batch_size(batch_size)
1388 .inplace(true)
1389 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1390 }
1391 }
1392
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,prescale)1393 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, prescale) {
1394 TEST_REQUIRES_ARM_NEON_FMA;
1395 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1396 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1397 VUnaryMicrokernelTester()
1398 .batch_size(batch_size)
1399 .prescale(prescale)
1400 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1401 }
1402 }
1403 }
1404
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,alpha)1405 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, alpha) {
1406 TEST_REQUIRES_ARM_NEON_FMA;
1407 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1408 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1409 VUnaryMicrokernelTester()
1410 .batch_size(batch_size)
1411 .alpha(alpha)
1412 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1413 }
1414 }
1415 }
1416
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,beta)1417 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, beta) {
1418 TEST_REQUIRES_ARM_NEON_FMA;
1419 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1420 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1421 VUnaryMicrokernelTester()
1422 .batch_size(batch_size)
1423 .beta(beta)
1424 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1425 }
1426 }
1427 }
1428 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1429
1430
1431 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_eq_24)1432 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_eq_24) {
1433 TEST_REQUIRES_ARM_NEON_FMA;
1434 VUnaryMicrokernelTester()
1435 .batch_size(24)
1436 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1437 }
1438
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_div_24)1439 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_div_24) {
1440 TEST_REQUIRES_ARM_NEON_FMA;
1441 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1442 VUnaryMicrokernelTester()
1443 .batch_size(batch_size)
1444 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1445 }
1446 }
1447
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_lt_24)1448 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_lt_24) {
1449 TEST_REQUIRES_ARM_NEON_FMA;
1450 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1451 VUnaryMicrokernelTester()
1452 .batch_size(batch_size)
1453 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1454 }
1455 }
1456
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_gt_24)1457 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_gt_24) {
1458 TEST_REQUIRES_ARM_NEON_FMA;
1459 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1460 VUnaryMicrokernelTester()
1461 .batch_size(batch_size)
1462 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1463 }
1464 }
1465
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,inplace)1466 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, inplace) {
1467 TEST_REQUIRES_ARM_NEON_FMA;
1468 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1469 VUnaryMicrokernelTester()
1470 .batch_size(batch_size)
1471 .inplace(true)
1472 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1473 }
1474 }
1475
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,prescale)1476 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, prescale) {
1477 TEST_REQUIRES_ARM_NEON_FMA;
1478 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1479 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1480 VUnaryMicrokernelTester()
1481 .batch_size(batch_size)
1482 .prescale(prescale)
1483 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1484 }
1485 }
1486 }
1487
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,alpha)1488 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, alpha) {
1489 TEST_REQUIRES_ARM_NEON_FMA;
1490 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1491 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1492 VUnaryMicrokernelTester()
1493 .batch_size(batch_size)
1494 .alpha(alpha)
1495 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1496 }
1497 }
1498 }
1499
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,beta)1500 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, beta) {
1501 TEST_REQUIRES_ARM_NEON_FMA;
1502 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1503 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1504 VUnaryMicrokernelTester()
1505 .batch_size(batch_size)
1506 .beta(beta)
1507 .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1508 }
1509 }
1510 }
1511 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1512
1513
1514 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_eq_4)1515 TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_eq_4) {
1516 TEST_REQUIRES_ARM_NEON_FMA;
1517 VUnaryMicrokernelTester()
1518 .batch_size(4)
1519 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1520 }
1521
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_div_4)1522 TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_div_4) {
1523 TEST_REQUIRES_ARM_NEON_FMA;
1524 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1525 VUnaryMicrokernelTester()
1526 .batch_size(batch_size)
1527 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1528 }
1529 }
1530
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_lt_4)1531 TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_lt_4) {
1532 TEST_REQUIRES_ARM_NEON_FMA;
1533 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1534 VUnaryMicrokernelTester()
1535 .batch_size(batch_size)
1536 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1537 }
1538 }
1539
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_gt_4)1540 TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_gt_4) {
1541 TEST_REQUIRES_ARM_NEON_FMA;
1542 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1543 VUnaryMicrokernelTester()
1544 .batch_size(batch_size)
1545 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1546 }
1547 }
1548
TEST(F32_VELU__NEONFMA_RR1_P6_X4,inplace)1549 TEST(F32_VELU__NEONFMA_RR1_P6_X4, inplace) {
1550 TEST_REQUIRES_ARM_NEON_FMA;
1551 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1552 VUnaryMicrokernelTester()
1553 .batch_size(batch_size)
1554 .inplace(true)
1555 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1556 }
1557 }
1558
TEST(F32_VELU__NEONFMA_RR1_P6_X4,prescale)1559 TEST(F32_VELU__NEONFMA_RR1_P6_X4, prescale) {
1560 TEST_REQUIRES_ARM_NEON_FMA;
1561 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1562 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1563 VUnaryMicrokernelTester()
1564 .batch_size(batch_size)
1565 .prescale(prescale)
1566 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1567 }
1568 }
1569 }
1570
TEST(F32_VELU__NEONFMA_RR1_P6_X4,alpha)1571 TEST(F32_VELU__NEONFMA_RR1_P6_X4, alpha) {
1572 TEST_REQUIRES_ARM_NEON_FMA;
1573 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1574 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1575 VUnaryMicrokernelTester()
1576 .batch_size(batch_size)
1577 .alpha(alpha)
1578 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1579 }
1580 }
1581 }
1582
TEST(F32_VELU__NEONFMA_RR1_P6_X4,beta)1583 TEST(F32_VELU__NEONFMA_RR1_P6_X4, beta) {
1584 TEST_REQUIRES_ARM_NEON_FMA;
1585 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1586 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1587 VUnaryMicrokernelTester()
1588 .batch_size(batch_size)
1589 .beta(beta)
1590 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1591 }
1592 }
1593 }
1594 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1595
1596
1597 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_eq_8)1598 TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_eq_8) {
1599 TEST_REQUIRES_ARM_NEON_FMA;
1600 VUnaryMicrokernelTester()
1601 .batch_size(8)
1602 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1603 }
1604
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_div_8)1605 TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_div_8) {
1606 TEST_REQUIRES_ARM_NEON_FMA;
1607 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1608 VUnaryMicrokernelTester()
1609 .batch_size(batch_size)
1610 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1611 }
1612 }
1613
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_lt_8)1614 TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_lt_8) {
1615 TEST_REQUIRES_ARM_NEON_FMA;
1616 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1617 VUnaryMicrokernelTester()
1618 .batch_size(batch_size)
1619 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1620 }
1621 }
1622
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_gt_8)1623 TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_gt_8) {
1624 TEST_REQUIRES_ARM_NEON_FMA;
1625 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1626 VUnaryMicrokernelTester()
1627 .batch_size(batch_size)
1628 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1629 }
1630 }
1631
TEST(F32_VELU__NEONFMA_RR1_P6_X8,inplace)1632 TEST(F32_VELU__NEONFMA_RR1_P6_X8, inplace) {
1633 TEST_REQUIRES_ARM_NEON_FMA;
1634 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1635 VUnaryMicrokernelTester()
1636 .batch_size(batch_size)
1637 .inplace(true)
1638 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1639 }
1640 }
1641
TEST(F32_VELU__NEONFMA_RR1_P6_X8,prescale)1642 TEST(F32_VELU__NEONFMA_RR1_P6_X8, prescale) {
1643 TEST_REQUIRES_ARM_NEON_FMA;
1644 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1645 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1646 VUnaryMicrokernelTester()
1647 .batch_size(batch_size)
1648 .prescale(prescale)
1649 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1650 }
1651 }
1652 }
1653
TEST(F32_VELU__NEONFMA_RR1_P6_X8,alpha)1654 TEST(F32_VELU__NEONFMA_RR1_P6_X8, alpha) {
1655 TEST_REQUIRES_ARM_NEON_FMA;
1656 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1657 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1658 VUnaryMicrokernelTester()
1659 .batch_size(batch_size)
1660 .alpha(alpha)
1661 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1662 }
1663 }
1664 }
1665
TEST(F32_VELU__NEONFMA_RR1_P6_X8,beta)1666 TEST(F32_VELU__NEONFMA_RR1_P6_X8, beta) {
1667 TEST_REQUIRES_ARM_NEON_FMA;
1668 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1669 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1670 VUnaryMicrokernelTester()
1671 .batch_size(batch_size)
1672 .beta(beta)
1673 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1674 }
1675 }
1676 }
1677 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1678
1679
1680 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_eq_12)1681 TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_eq_12) {
1682 TEST_REQUIRES_ARM_NEON_FMA;
1683 VUnaryMicrokernelTester()
1684 .batch_size(12)
1685 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1686 }
1687
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_div_12)1688 TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_div_12) {
1689 TEST_REQUIRES_ARM_NEON_FMA;
1690 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1691 VUnaryMicrokernelTester()
1692 .batch_size(batch_size)
1693 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1694 }
1695 }
1696
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_lt_12)1697 TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_lt_12) {
1698 TEST_REQUIRES_ARM_NEON_FMA;
1699 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1700 VUnaryMicrokernelTester()
1701 .batch_size(batch_size)
1702 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1703 }
1704 }
1705
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_gt_12)1706 TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_gt_12) {
1707 TEST_REQUIRES_ARM_NEON_FMA;
1708 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1709 VUnaryMicrokernelTester()
1710 .batch_size(batch_size)
1711 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1712 }
1713 }
1714
TEST(F32_VELU__NEONFMA_RR1_P6_X12,inplace)1715 TEST(F32_VELU__NEONFMA_RR1_P6_X12, inplace) {
1716 TEST_REQUIRES_ARM_NEON_FMA;
1717 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1718 VUnaryMicrokernelTester()
1719 .batch_size(batch_size)
1720 .inplace(true)
1721 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1722 }
1723 }
1724
TEST(F32_VELU__NEONFMA_RR1_P6_X12,prescale)1725 TEST(F32_VELU__NEONFMA_RR1_P6_X12, prescale) {
1726 TEST_REQUIRES_ARM_NEON_FMA;
1727 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1728 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1729 VUnaryMicrokernelTester()
1730 .batch_size(batch_size)
1731 .prescale(prescale)
1732 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1733 }
1734 }
1735 }
1736
TEST(F32_VELU__NEONFMA_RR1_P6_X12,alpha)1737 TEST(F32_VELU__NEONFMA_RR1_P6_X12, alpha) {
1738 TEST_REQUIRES_ARM_NEON_FMA;
1739 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1740 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1741 VUnaryMicrokernelTester()
1742 .batch_size(batch_size)
1743 .alpha(alpha)
1744 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1745 }
1746 }
1747 }
1748
TEST(F32_VELU__NEONFMA_RR1_P6_X12,beta)1749 TEST(F32_VELU__NEONFMA_RR1_P6_X12, beta) {
1750 TEST_REQUIRES_ARM_NEON_FMA;
1751 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1752 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1753 VUnaryMicrokernelTester()
1754 .batch_size(batch_size)
1755 .beta(beta)
1756 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1757 }
1758 }
1759 }
1760 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1761
1762
1763 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_eq_16)1764 TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_eq_16) {
1765 TEST_REQUIRES_ARM_NEON_FMA;
1766 VUnaryMicrokernelTester()
1767 .batch_size(16)
1768 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1769 }
1770
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_div_16)1771 TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_div_16) {
1772 TEST_REQUIRES_ARM_NEON_FMA;
1773 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1774 VUnaryMicrokernelTester()
1775 .batch_size(batch_size)
1776 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1777 }
1778 }
1779
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_lt_16)1780 TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_lt_16) {
1781 TEST_REQUIRES_ARM_NEON_FMA;
1782 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1783 VUnaryMicrokernelTester()
1784 .batch_size(batch_size)
1785 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1786 }
1787 }
1788
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_gt_16)1789 TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_gt_16) {
1790 TEST_REQUIRES_ARM_NEON_FMA;
1791 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1792 VUnaryMicrokernelTester()
1793 .batch_size(batch_size)
1794 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1795 }
1796 }
1797
TEST(F32_VELU__NEONFMA_RR1_P6_X16,inplace)1798 TEST(F32_VELU__NEONFMA_RR1_P6_X16, inplace) {
1799 TEST_REQUIRES_ARM_NEON_FMA;
1800 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1801 VUnaryMicrokernelTester()
1802 .batch_size(batch_size)
1803 .inplace(true)
1804 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1805 }
1806 }
1807
TEST(F32_VELU__NEONFMA_RR1_P6_X16,prescale)1808 TEST(F32_VELU__NEONFMA_RR1_P6_X16, prescale) {
1809 TEST_REQUIRES_ARM_NEON_FMA;
1810 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1812 VUnaryMicrokernelTester()
1813 .batch_size(batch_size)
1814 .prescale(prescale)
1815 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1816 }
1817 }
1818 }
1819
TEST(F32_VELU__NEONFMA_RR1_P6_X16,alpha)1820 TEST(F32_VELU__NEONFMA_RR1_P6_X16, alpha) {
1821 TEST_REQUIRES_ARM_NEON_FMA;
1822 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1823 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1824 VUnaryMicrokernelTester()
1825 .batch_size(batch_size)
1826 .alpha(alpha)
1827 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1828 }
1829 }
1830 }
1831
TEST(F32_VELU__NEONFMA_RR1_P6_X16,beta)1832 TEST(F32_VELU__NEONFMA_RR1_P6_X16, beta) {
1833 TEST_REQUIRES_ARM_NEON_FMA;
1834 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1835 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1836 VUnaryMicrokernelTester()
1837 .batch_size(batch_size)
1838 .beta(beta)
1839 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1840 }
1841 }
1842 }
1843 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1844
1845
1846 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_eq_20)1847 TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_eq_20) {
1848 TEST_REQUIRES_ARM_NEON_FMA;
1849 VUnaryMicrokernelTester()
1850 .batch_size(20)
1851 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1852 }
1853
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_div_20)1854 TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_div_20) {
1855 TEST_REQUIRES_ARM_NEON_FMA;
1856 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1857 VUnaryMicrokernelTester()
1858 .batch_size(batch_size)
1859 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1860 }
1861 }
1862
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_lt_20)1863 TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_lt_20) {
1864 TEST_REQUIRES_ARM_NEON_FMA;
1865 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1866 VUnaryMicrokernelTester()
1867 .batch_size(batch_size)
1868 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1869 }
1870 }
1871
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_gt_20)1872 TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_gt_20) {
1873 TEST_REQUIRES_ARM_NEON_FMA;
1874 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1875 VUnaryMicrokernelTester()
1876 .batch_size(batch_size)
1877 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1878 }
1879 }
1880
TEST(F32_VELU__NEONFMA_RR1_P6_X20,inplace)1881 TEST(F32_VELU__NEONFMA_RR1_P6_X20, inplace) {
1882 TEST_REQUIRES_ARM_NEON_FMA;
1883 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1884 VUnaryMicrokernelTester()
1885 .batch_size(batch_size)
1886 .inplace(true)
1887 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1888 }
1889 }
1890
TEST(F32_VELU__NEONFMA_RR1_P6_X20,prescale)1891 TEST(F32_VELU__NEONFMA_RR1_P6_X20, prescale) {
1892 TEST_REQUIRES_ARM_NEON_FMA;
1893 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1894 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1895 VUnaryMicrokernelTester()
1896 .batch_size(batch_size)
1897 .prescale(prescale)
1898 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1899 }
1900 }
1901 }
1902
TEST(F32_VELU__NEONFMA_RR1_P6_X20,alpha)1903 TEST(F32_VELU__NEONFMA_RR1_P6_X20, alpha) {
1904 TEST_REQUIRES_ARM_NEON_FMA;
1905 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1906 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1907 VUnaryMicrokernelTester()
1908 .batch_size(batch_size)
1909 .alpha(alpha)
1910 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1911 }
1912 }
1913 }
1914
TEST(F32_VELU__NEONFMA_RR1_P6_X20,beta)1915 TEST(F32_VELU__NEONFMA_RR1_P6_X20, beta) {
1916 TEST_REQUIRES_ARM_NEON_FMA;
1917 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1918 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1919 VUnaryMicrokernelTester()
1920 .batch_size(batch_size)
1921 .beta(beta)
1922 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1923 }
1924 }
1925 }
1926 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1927
1928
1929 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_eq_24)1930 TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_eq_24) {
1931 TEST_REQUIRES_ARM_NEON_FMA;
1932 VUnaryMicrokernelTester()
1933 .batch_size(24)
1934 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1935 }
1936
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_div_24)1937 TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_div_24) {
1938 TEST_REQUIRES_ARM_NEON_FMA;
1939 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1940 VUnaryMicrokernelTester()
1941 .batch_size(batch_size)
1942 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1943 }
1944 }
1945
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_lt_24)1946 TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_lt_24) {
1947 TEST_REQUIRES_ARM_NEON_FMA;
1948 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1949 VUnaryMicrokernelTester()
1950 .batch_size(batch_size)
1951 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1952 }
1953 }
1954
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_gt_24)1955 TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_gt_24) {
1956 TEST_REQUIRES_ARM_NEON_FMA;
1957 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1958 VUnaryMicrokernelTester()
1959 .batch_size(batch_size)
1960 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1961 }
1962 }
1963
TEST(F32_VELU__NEONFMA_RR1_P6_X24,inplace)1964 TEST(F32_VELU__NEONFMA_RR1_P6_X24, inplace) {
1965 TEST_REQUIRES_ARM_NEON_FMA;
1966 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1967 VUnaryMicrokernelTester()
1968 .batch_size(batch_size)
1969 .inplace(true)
1970 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1971 }
1972 }
1973
TEST(F32_VELU__NEONFMA_RR1_P6_X24,prescale)1974 TEST(F32_VELU__NEONFMA_RR1_P6_X24, prescale) {
1975 TEST_REQUIRES_ARM_NEON_FMA;
1976 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1977 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1978 VUnaryMicrokernelTester()
1979 .batch_size(batch_size)
1980 .prescale(prescale)
1981 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1982 }
1983 }
1984 }
1985
TEST(F32_VELU__NEONFMA_RR1_P6_X24,alpha)1986 TEST(F32_VELU__NEONFMA_RR1_P6_X24, alpha) {
1987 TEST_REQUIRES_ARM_NEON_FMA;
1988 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1989 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1990 VUnaryMicrokernelTester()
1991 .batch_size(batch_size)
1992 .alpha(alpha)
1993 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1994 }
1995 }
1996 }
1997
TEST(F32_VELU__NEONFMA_RR1_P6_X24,beta)1998 TEST(F32_VELU__NEONFMA_RR1_P6_X24, beta) {
1999 TEST_REQUIRES_ARM_NEON_FMA;
2000 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2001 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2002 VUnaryMicrokernelTester()
2003 .batch_size(batch_size)
2004 .beta(beta)
2005 .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
2006 }
2007 }
2008 }
2009 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2010
2011
2012 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_eq_4)2013 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_eq_4) {
2014 TEST_REQUIRES_X86_SSE2;
2015 VUnaryMicrokernelTester()
2016 .batch_size(4)
2017 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2018 }
2019
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_div_4)2020 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_div_4) {
2021 TEST_REQUIRES_X86_SSE2;
2022 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2023 VUnaryMicrokernelTester()
2024 .batch_size(batch_size)
2025 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2026 }
2027 }
2028
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_lt_4)2029 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_lt_4) {
2030 TEST_REQUIRES_X86_SSE2;
2031 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2032 VUnaryMicrokernelTester()
2033 .batch_size(batch_size)
2034 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2035 }
2036 }
2037
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_gt_4)2038 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_gt_4) {
2039 TEST_REQUIRES_X86_SSE2;
2040 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2041 VUnaryMicrokernelTester()
2042 .batch_size(batch_size)
2043 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2044 }
2045 }
2046
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,inplace)2047 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, inplace) {
2048 TEST_REQUIRES_X86_SSE2;
2049 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2050 VUnaryMicrokernelTester()
2051 .batch_size(batch_size)
2052 .inplace(true)
2053 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2054 }
2055 }
2056
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,prescale)2057 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, prescale) {
2058 TEST_REQUIRES_X86_SSE2;
2059 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2060 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2061 VUnaryMicrokernelTester()
2062 .batch_size(batch_size)
2063 .prescale(prescale)
2064 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2065 }
2066 }
2067 }
2068
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,alpha)2069 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, alpha) {
2070 TEST_REQUIRES_X86_SSE2;
2071 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2072 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2073 VUnaryMicrokernelTester()
2074 .batch_size(batch_size)
2075 .alpha(alpha)
2076 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2077 }
2078 }
2079 }
2080
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,beta)2081 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, beta) {
2082 TEST_REQUIRES_X86_SSE2;
2083 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2084 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2085 VUnaryMicrokernelTester()
2086 .batch_size(batch_size)
2087 .beta(beta)
2088 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2089 }
2090 }
2091 }
2092 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2093
2094
2095 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_eq_8)2096 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_eq_8) {
2097 TEST_REQUIRES_X86_SSE2;
2098 VUnaryMicrokernelTester()
2099 .batch_size(8)
2100 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2101 }
2102
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_div_8)2103 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_div_8) {
2104 TEST_REQUIRES_X86_SSE2;
2105 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2106 VUnaryMicrokernelTester()
2107 .batch_size(batch_size)
2108 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2109 }
2110 }
2111
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_lt_8)2112 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_lt_8) {
2113 TEST_REQUIRES_X86_SSE2;
2114 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2115 VUnaryMicrokernelTester()
2116 .batch_size(batch_size)
2117 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2118 }
2119 }
2120
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_gt_8)2121 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_gt_8) {
2122 TEST_REQUIRES_X86_SSE2;
2123 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2124 VUnaryMicrokernelTester()
2125 .batch_size(batch_size)
2126 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2127 }
2128 }
2129
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,inplace)2130 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, inplace) {
2131 TEST_REQUIRES_X86_SSE2;
2132 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2133 VUnaryMicrokernelTester()
2134 .batch_size(batch_size)
2135 .inplace(true)
2136 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2137 }
2138 }
2139
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,prescale)2140 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, prescale) {
2141 TEST_REQUIRES_X86_SSE2;
2142 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2143 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2144 VUnaryMicrokernelTester()
2145 .batch_size(batch_size)
2146 .prescale(prescale)
2147 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2148 }
2149 }
2150 }
2151
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,alpha)2152 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, alpha) {
2153 TEST_REQUIRES_X86_SSE2;
2154 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2155 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2156 VUnaryMicrokernelTester()
2157 .batch_size(batch_size)
2158 .alpha(alpha)
2159 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2160 }
2161 }
2162 }
2163
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,beta)2164 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, beta) {
2165 TEST_REQUIRES_X86_SSE2;
2166 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2167 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2168 VUnaryMicrokernelTester()
2169 .batch_size(batch_size)
2170 .beta(beta)
2171 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2172 }
2173 }
2174 }
2175 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2176
2177
2178 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_eq_12)2179 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_eq_12) {
2180 TEST_REQUIRES_X86_SSE2;
2181 VUnaryMicrokernelTester()
2182 .batch_size(12)
2183 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2184 }
2185
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_div_12)2186 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_div_12) {
2187 TEST_REQUIRES_X86_SSE2;
2188 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2189 VUnaryMicrokernelTester()
2190 .batch_size(batch_size)
2191 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2192 }
2193 }
2194
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_lt_12)2195 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_lt_12) {
2196 TEST_REQUIRES_X86_SSE2;
2197 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2198 VUnaryMicrokernelTester()
2199 .batch_size(batch_size)
2200 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2201 }
2202 }
2203
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_gt_12)2204 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_gt_12) {
2205 TEST_REQUIRES_X86_SSE2;
2206 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2207 VUnaryMicrokernelTester()
2208 .batch_size(batch_size)
2209 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2210 }
2211 }
2212
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,inplace)2213 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, inplace) {
2214 TEST_REQUIRES_X86_SSE2;
2215 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2216 VUnaryMicrokernelTester()
2217 .batch_size(batch_size)
2218 .inplace(true)
2219 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2220 }
2221 }
2222
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,prescale)2223 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, prescale) {
2224 TEST_REQUIRES_X86_SSE2;
2225 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2226 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2227 VUnaryMicrokernelTester()
2228 .batch_size(batch_size)
2229 .prescale(prescale)
2230 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2231 }
2232 }
2233 }
2234
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,alpha)2235 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, alpha) {
2236 TEST_REQUIRES_X86_SSE2;
2237 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2238 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2239 VUnaryMicrokernelTester()
2240 .batch_size(batch_size)
2241 .alpha(alpha)
2242 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2243 }
2244 }
2245 }
2246
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,beta)2247 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, beta) {
2248 TEST_REQUIRES_X86_SSE2;
2249 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2250 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2251 VUnaryMicrokernelTester()
2252 .batch_size(batch_size)
2253 .beta(beta)
2254 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2255 }
2256 }
2257 }
2258 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259
2260
2261 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_eq_16)2262 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_eq_16) {
2263 TEST_REQUIRES_X86_SSE2;
2264 VUnaryMicrokernelTester()
2265 .batch_size(16)
2266 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2267 }
2268
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_div_16)2269 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_div_16) {
2270 TEST_REQUIRES_X86_SSE2;
2271 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2272 VUnaryMicrokernelTester()
2273 .batch_size(batch_size)
2274 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2275 }
2276 }
2277
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_lt_16)2278 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_lt_16) {
2279 TEST_REQUIRES_X86_SSE2;
2280 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2281 VUnaryMicrokernelTester()
2282 .batch_size(batch_size)
2283 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2284 }
2285 }
2286
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_gt_16)2287 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_gt_16) {
2288 TEST_REQUIRES_X86_SSE2;
2289 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2290 VUnaryMicrokernelTester()
2291 .batch_size(batch_size)
2292 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2293 }
2294 }
2295
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,inplace)2296 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, inplace) {
2297 TEST_REQUIRES_X86_SSE2;
2298 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2299 VUnaryMicrokernelTester()
2300 .batch_size(batch_size)
2301 .inplace(true)
2302 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2303 }
2304 }
2305
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,prescale)2306 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, prescale) {
2307 TEST_REQUIRES_X86_SSE2;
2308 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2310 VUnaryMicrokernelTester()
2311 .batch_size(batch_size)
2312 .prescale(prescale)
2313 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2314 }
2315 }
2316 }
2317
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,alpha)2318 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, alpha) {
2319 TEST_REQUIRES_X86_SSE2;
2320 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2321 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2322 VUnaryMicrokernelTester()
2323 .batch_size(batch_size)
2324 .alpha(alpha)
2325 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2326 }
2327 }
2328 }
2329
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,beta)2330 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, beta) {
2331 TEST_REQUIRES_X86_SSE2;
2332 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2333 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2334 VUnaryMicrokernelTester()
2335 .batch_size(batch_size)
2336 .beta(beta)
2337 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2338 }
2339 }
2340 }
2341 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2342
2343
2344 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_eq_20)2345 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_eq_20) {
2346 TEST_REQUIRES_X86_SSE2;
2347 VUnaryMicrokernelTester()
2348 .batch_size(20)
2349 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2350 }
2351
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_div_20)2352 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_div_20) {
2353 TEST_REQUIRES_X86_SSE2;
2354 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2355 VUnaryMicrokernelTester()
2356 .batch_size(batch_size)
2357 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2358 }
2359 }
2360
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_lt_20)2361 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_lt_20) {
2362 TEST_REQUIRES_X86_SSE2;
2363 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2364 VUnaryMicrokernelTester()
2365 .batch_size(batch_size)
2366 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2367 }
2368 }
2369
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_gt_20)2370 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_gt_20) {
2371 TEST_REQUIRES_X86_SSE2;
2372 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2373 VUnaryMicrokernelTester()
2374 .batch_size(batch_size)
2375 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2376 }
2377 }
2378
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,inplace)2379 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, inplace) {
2380 TEST_REQUIRES_X86_SSE2;
2381 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2382 VUnaryMicrokernelTester()
2383 .batch_size(batch_size)
2384 .inplace(true)
2385 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2386 }
2387 }
2388
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,prescale)2389 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, prescale) {
2390 TEST_REQUIRES_X86_SSE2;
2391 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2392 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2393 VUnaryMicrokernelTester()
2394 .batch_size(batch_size)
2395 .prescale(prescale)
2396 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2397 }
2398 }
2399 }
2400
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,alpha)2401 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, alpha) {
2402 TEST_REQUIRES_X86_SSE2;
2403 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2404 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2405 VUnaryMicrokernelTester()
2406 .batch_size(batch_size)
2407 .alpha(alpha)
2408 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2409 }
2410 }
2411 }
2412
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,beta)2413 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, beta) {
2414 TEST_REQUIRES_X86_SSE2;
2415 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2416 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2417 VUnaryMicrokernelTester()
2418 .batch_size(batch_size)
2419 .beta(beta)
2420 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2421 }
2422 }
2423 }
2424 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2425
2426
2427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_eq_24)2428 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_eq_24) {
2429 TEST_REQUIRES_X86_SSE2;
2430 VUnaryMicrokernelTester()
2431 .batch_size(24)
2432 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2433 }
2434
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_div_24)2435 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_div_24) {
2436 TEST_REQUIRES_X86_SSE2;
2437 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2438 VUnaryMicrokernelTester()
2439 .batch_size(batch_size)
2440 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2441 }
2442 }
2443
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_lt_24)2444 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_lt_24) {
2445 TEST_REQUIRES_X86_SSE2;
2446 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2447 VUnaryMicrokernelTester()
2448 .batch_size(batch_size)
2449 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2450 }
2451 }
2452
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_gt_24)2453 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_gt_24) {
2454 TEST_REQUIRES_X86_SSE2;
2455 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2456 VUnaryMicrokernelTester()
2457 .batch_size(batch_size)
2458 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2459 }
2460 }
2461
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,inplace)2462 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, inplace) {
2463 TEST_REQUIRES_X86_SSE2;
2464 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2465 VUnaryMicrokernelTester()
2466 .batch_size(batch_size)
2467 .inplace(true)
2468 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2469 }
2470 }
2471
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,prescale)2472 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, prescale) {
2473 TEST_REQUIRES_X86_SSE2;
2474 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2475 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2476 VUnaryMicrokernelTester()
2477 .batch_size(batch_size)
2478 .prescale(prescale)
2479 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2480 }
2481 }
2482 }
2483
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,alpha)2484 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, alpha) {
2485 TEST_REQUIRES_X86_SSE2;
2486 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2487 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2488 VUnaryMicrokernelTester()
2489 .batch_size(batch_size)
2490 .alpha(alpha)
2491 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2492 }
2493 }
2494 }
2495
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,beta)2496 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, beta) {
2497 TEST_REQUIRES_X86_SSE2;
2498 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2499 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2500 VUnaryMicrokernelTester()
2501 .batch_size(batch_size)
2502 .beta(beta)
2503 .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2504 }
2505 }
2506 }
2507 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2508
2509
2510 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_eq_4)2511 TEST(F32_VELU__SSE2_RR2_P6_X4, batch_eq_4) {
2512 TEST_REQUIRES_X86_SSE2;
2513 VUnaryMicrokernelTester()
2514 .batch_size(4)
2515 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2516 }
2517
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_div_4)2518 TEST(F32_VELU__SSE2_RR2_P6_X4, batch_div_4) {
2519 TEST_REQUIRES_X86_SSE2;
2520 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2521 VUnaryMicrokernelTester()
2522 .batch_size(batch_size)
2523 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2524 }
2525 }
2526
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_lt_4)2527 TEST(F32_VELU__SSE2_RR2_P6_X4, batch_lt_4) {
2528 TEST_REQUIRES_X86_SSE2;
2529 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2530 VUnaryMicrokernelTester()
2531 .batch_size(batch_size)
2532 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2533 }
2534 }
2535
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_gt_4)2536 TEST(F32_VELU__SSE2_RR2_P6_X4, batch_gt_4) {
2537 TEST_REQUIRES_X86_SSE2;
2538 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2539 VUnaryMicrokernelTester()
2540 .batch_size(batch_size)
2541 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2542 }
2543 }
2544
TEST(F32_VELU__SSE2_RR2_P6_X4,inplace)2545 TEST(F32_VELU__SSE2_RR2_P6_X4, inplace) {
2546 TEST_REQUIRES_X86_SSE2;
2547 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2548 VUnaryMicrokernelTester()
2549 .batch_size(batch_size)
2550 .inplace(true)
2551 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2552 }
2553 }
2554
TEST(F32_VELU__SSE2_RR2_P6_X4,prescale)2555 TEST(F32_VELU__SSE2_RR2_P6_X4, prescale) {
2556 TEST_REQUIRES_X86_SSE2;
2557 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2558 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2559 VUnaryMicrokernelTester()
2560 .batch_size(batch_size)
2561 .prescale(prescale)
2562 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2563 }
2564 }
2565 }
2566
TEST(F32_VELU__SSE2_RR2_P6_X4,alpha)2567 TEST(F32_VELU__SSE2_RR2_P6_X4, alpha) {
2568 TEST_REQUIRES_X86_SSE2;
2569 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2570 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2571 VUnaryMicrokernelTester()
2572 .batch_size(batch_size)
2573 .alpha(alpha)
2574 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2575 }
2576 }
2577 }
2578
TEST(F32_VELU__SSE2_RR2_P6_X4,beta)2579 TEST(F32_VELU__SSE2_RR2_P6_X4, beta) {
2580 TEST_REQUIRES_X86_SSE2;
2581 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2582 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2583 VUnaryMicrokernelTester()
2584 .batch_size(batch_size)
2585 .beta(beta)
2586 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2587 }
2588 }
2589 }
2590 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2591
2592
2593 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_eq_8)2594 TEST(F32_VELU__SSE2_RR2_P6_X8, batch_eq_8) {
2595 TEST_REQUIRES_X86_SSE2;
2596 VUnaryMicrokernelTester()
2597 .batch_size(8)
2598 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2599 }
2600
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_div_8)2601 TEST(F32_VELU__SSE2_RR2_P6_X8, batch_div_8) {
2602 TEST_REQUIRES_X86_SSE2;
2603 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2604 VUnaryMicrokernelTester()
2605 .batch_size(batch_size)
2606 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2607 }
2608 }
2609
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_lt_8)2610 TEST(F32_VELU__SSE2_RR2_P6_X8, batch_lt_8) {
2611 TEST_REQUIRES_X86_SSE2;
2612 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2613 VUnaryMicrokernelTester()
2614 .batch_size(batch_size)
2615 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2616 }
2617 }
2618
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_gt_8)2619 TEST(F32_VELU__SSE2_RR2_P6_X8, batch_gt_8) {
2620 TEST_REQUIRES_X86_SSE2;
2621 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2622 VUnaryMicrokernelTester()
2623 .batch_size(batch_size)
2624 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2625 }
2626 }
2627
TEST(F32_VELU__SSE2_RR2_P6_X8,inplace)2628 TEST(F32_VELU__SSE2_RR2_P6_X8, inplace) {
2629 TEST_REQUIRES_X86_SSE2;
2630 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2631 VUnaryMicrokernelTester()
2632 .batch_size(batch_size)
2633 .inplace(true)
2634 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2635 }
2636 }
2637
TEST(F32_VELU__SSE2_RR2_P6_X8,prescale)2638 TEST(F32_VELU__SSE2_RR2_P6_X8, prescale) {
2639 TEST_REQUIRES_X86_SSE2;
2640 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2641 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2642 VUnaryMicrokernelTester()
2643 .batch_size(batch_size)
2644 .prescale(prescale)
2645 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2646 }
2647 }
2648 }
2649
TEST(F32_VELU__SSE2_RR2_P6_X8,alpha)2650 TEST(F32_VELU__SSE2_RR2_P6_X8, alpha) {
2651 TEST_REQUIRES_X86_SSE2;
2652 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2653 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2654 VUnaryMicrokernelTester()
2655 .batch_size(batch_size)
2656 .alpha(alpha)
2657 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2658 }
2659 }
2660 }
2661
TEST(F32_VELU__SSE2_RR2_P6_X8,beta)2662 TEST(F32_VELU__SSE2_RR2_P6_X8, beta) {
2663 TEST_REQUIRES_X86_SSE2;
2664 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2665 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2666 VUnaryMicrokernelTester()
2667 .batch_size(batch_size)
2668 .beta(beta)
2669 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2670 }
2671 }
2672 }
2673 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2674
2675
2676 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_eq_12)2677 TEST(F32_VELU__SSE2_RR2_P6_X12, batch_eq_12) {
2678 TEST_REQUIRES_X86_SSE2;
2679 VUnaryMicrokernelTester()
2680 .batch_size(12)
2681 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2682 }
2683
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_div_12)2684 TEST(F32_VELU__SSE2_RR2_P6_X12, batch_div_12) {
2685 TEST_REQUIRES_X86_SSE2;
2686 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2687 VUnaryMicrokernelTester()
2688 .batch_size(batch_size)
2689 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2690 }
2691 }
2692
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_lt_12)2693 TEST(F32_VELU__SSE2_RR2_P6_X12, batch_lt_12) {
2694 TEST_REQUIRES_X86_SSE2;
2695 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2696 VUnaryMicrokernelTester()
2697 .batch_size(batch_size)
2698 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2699 }
2700 }
2701
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_gt_12)2702 TEST(F32_VELU__SSE2_RR2_P6_X12, batch_gt_12) {
2703 TEST_REQUIRES_X86_SSE2;
2704 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2705 VUnaryMicrokernelTester()
2706 .batch_size(batch_size)
2707 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2708 }
2709 }
2710
TEST(F32_VELU__SSE2_RR2_P6_X12,inplace)2711 TEST(F32_VELU__SSE2_RR2_P6_X12, inplace) {
2712 TEST_REQUIRES_X86_SSE2;
2713 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2714 VUnaryMicrokernelTester()
2715 .batch_size(batch_size)
2716 .inplace(true)
2717 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2718 }
2719 }
2720
TEST(F32_VELU__SSE2_RR2_P6_X12,prescale)2721 TEST(F32_VELU__SSE2_RR2_P6_X12, prescale) {
2722 TEST_REQUIRES_X86_SSE2;
2723 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2724 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2725 VUnaryMicrokernelTester()
2726 .batch_size(batch_size)
2727 .prescale(prescale)
2728 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2729 }
2730 }
2731 }
2732
TEST(F32_VELU__SSE2_RR2_P6_X12,alpha)2733 TEST(F32_VELU__SSE2_RR2_P6_X12, alpha) {
2734 TEST_REQUIRES_X86_SSE2;
2735 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2736 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2737 VUnaryMicrokernelTester()
2738 .batch_size(batch_size)
2739 .alpha(alpha)
2740 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2741 }
2742 }
2743 }
2744
TEST(F32_VELU__SSE2_RR2_P6_X12,beta)2745 TEST(F32_VELU__SSE2_RR2_P6_X12, beta) {
2746 TEST_REQUIRES_X86_SSE2;
2747 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2748 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2749 VUnaryMicrokernelTester()
2750 .batch_size(batch_size)
2751 .beta(beta)
2752 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2753 }
2754 }
2755 }
2756 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2757
2758
2759 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_eq_16)2760 TEST(F32_VELU__SSE2_RR2_P6_X16, batch_eq_16) {
2761 TEST_REQUIRES_X86_SSE2;
2762 VUnaryMicrokernelTester()
2763 .batch_size(16)
2764 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2765 }
2766
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_div_16)2767 TEST(F32_VELU__SSE2_RR2_P6_X16, batch_div_16) {
2768 TEST_REQUIRES_X86_SSE2;
2769 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2770 VUnaryMicrokernelTester()
2771 .batch_size(batch_size)
2772 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2773 }
2774 }
2775
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_lt_16)2776 TEST(F32_VELU__SSE2_RR2_P6_X16, batch_lt_16) {
2777 TEST_REQUIRES_X86_SSE2;
2778 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2779 VUnaryMicrokernelTester()
2780 .batch_size(batch_size)
2781 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2782 }
2783 }
2784
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_gt_16)2785 TEST(F32_VELU__SSE2_RR2_P6_X16, batch_gt_16) {
2786 TEST_REQUIRES_X86_SSE2;
2787 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2788 VUnaryMicrokernelTester()
2789 .batch_size(batch_size)
2790 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2791 }
2792 }
2793
TEST(F32_VELU__SSE2_RR2_P6_X16,inplace)2794 TEST(F32_VELU__SSE2_RR2_P6_X16, inplace) {
2795 TEST_REQUIRES_X86_SSE2;
2796 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2797 VUnaryMicrokernelTester()
2798 .batch_size(batch_size)
2799 .inplace(true)
2800 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2801 }
2802 }
2803
TEST(F32_VELU__SSE2_RR2_P6_X16,prescale)2804 TEST(F32_VELU__SSE2_RR2_P6_X16, prescale) {
2805 TEST_REQUIRES_X86_SSE2;
2806 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2807 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2808 VUnaryMicrokernelTester()
2809 .batch_size(batch_size)
2810 .prescale(prescale)
2811 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2812 }
2813 }
2814 }
2815
TEST(F32_VELU__SSE2_RR2_P6_X16,alpha)2816 TEST(F32_VELU__SSE2_RR2_P6_X16, alpha) {
2817 TEST_REQUIRES_X86_SSE2;
2818 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2819 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2820 VUnaryMicrokernelTester()
2821 .batch_size(batch_size)
2822 .alpha(alpha)
2823 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2824 }
2825 }
2826 }
2827
TEST(F32_VELU__SSE2_RR2_P6_X16,beta)2828 TEST(F32_VELU__SSE2_RR2_P6_X16, beta) {
2829 TEST_REQUIRES_X86_SSE2;
2830 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2831 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2832 VUnaryMicrokernelTester()
2833 .batch_size(batch_size)
2834 .beta(beta)
2835 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2836 }
2837 }
2838 }
2839 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2840
2841
2842 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_eq_20)2843 TEST(F32_VELU__SSE2_RR2_P6_X20, batch_eq_20) {
2844 TEST_REQUIRES_X86_SSE2;
2845 VUnaryMicrokernelTester()
2846 .batch_size(20)
2847 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2848 }
2849
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_div_20)2850 TEST(F32_VELU__SSE2_RR2_P6_X20, batch_div_20) {
2851 TEST_REQUIRES_X86_SSE2;
2852 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2853 VUnaryMicrokernelTester()
2854 .batch_size(batch_size)
2855 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2856 }
2857 }
2858
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_lt_20)2859 TEST(F32_VELU__SSE2_RR2_P6_X20, batch_lt_20) {
2860 TEST_REQUIRES_X86_SSE2;
2861 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2862 VUnaryMicrokernelTester()
2863 .batch_size(batch_size)
2864 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2865 }
2866 }
2867
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_gt_20)2868 TEST(F32_VELU__SSE2_RR2_P6_X20, batch_gt_20) {
2869 TEST_REQUIRES_X86_SSE2;
2870 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2871 VUnaryMicrokernelTester()
2872 .batch_size(batch_size)
2873 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2874 }
2875 }
2876
TEST(F32_VELU__SSE2_RR2_P6_X20,inplace)2877 TEST(F32_VELU__SSE2_RR2_P6_X20, inplace) {
2878 TEST_REQUIRES_X86_SSE2;
2879 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2880 VUnaryMicrokernelTester()
2881 .batch_size(batch_size)
2882 .inplace(true)
2883 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2884 }
2885 }
2886
TEST(F32_VELU__SSE2_RR2_P6_X20,prescale)2887 TEST(F32_VELU__SSE2_RR2_P6_X20, prescale) {
2888 TEST_REQUIRES_X86_SSE2;
2889 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2890 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2891 VUnaryMicrokernelTester()
2892 .batch_size(batch_size)
2893 .prescale(prescale)
2894 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2895 }
2896 }
2897 }
2898
TEST(F32_VELU__SSE2_RR2_P6_X20,alpha)2899 TEST(F32_VELU__SSE2_RR2_P6_X20, alpha) {
2900 TEST_REQUIRES_X86_SSE2;
2901 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2902 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2903 VUnaryMicrokernelTester()
2904 .batch_size(batch_size)
2905 .alpha(alpha)
2906 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2907 }
2908 }
2909 }
2910
TEST(F32_VELU__SSE2_RR2_P6_X20,beta)2911 TEST(F32_VELU__SSE2_RR2_P6_X20, beta) {
2912 TEST_REQUIRES_X86_SSE2;
2913 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2914 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2915 VUnaryMicrokernelTester()
2916 .batch_size(batch_size)
2917 .beta(beta)
2918 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2919 }
2920 }
2921 }
2922 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2923
2924
2925 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_eq_24)2926 TEST(F32_VELU__SSE2_RR2_P6_X24, batch_eq_24) {
2927 TEST_REQUIRES_X86_SSE2;
2928 VUnaryMicrokernelTester()
2929 .batch_size(24)
2930 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2931 }
2932
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_div_24)2933 TEST(F32_VELU__SSE2_RR2_P6_X24, batch_div_24) {
2934 TEST_REQUIRES_X86_SSE2;
2935 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2936 VUnaryMicrokernelTester()
2937 .batch_size(batch_size)
2938 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2939 }
2940 }
2941
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_lt_24)2942 TEST(F32_VELU__SSE2_RR2_P6_X24, batch_lt_24) {
2943 TEST_REQUIRES_X86_SSE2;
2944 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2945 VUnaryMicrokernelTester()
2946 .batch_size(batch_size)
2947 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2948 }
2949 }
2950
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_gt_24)2951 TEST(F32_VELU__SSE2_RR2_P6_X24, batch_gt_24) {
2952 TEST_REQUIRES_X86_SSE2;
2953 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2954 VUnaryMicrokernelTester()
2955 .batch_size(batch_size)
2956 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2957 }
2958 }
2959
TEST(F32_VELU__SSE2_RR2_P6_X24,inplace)2960 TEST(F32_VELU__SSE2_RR2_P6_X24, inplace) {
2961 TEST_REQUIRES_X86_SSE2;
2962 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2963 VUnaryMicrokernelTester()
2964 .batch_size(batch_size)
2965 .inplace(true)
2966 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2967 }
2968 }
2969
TEST(F32_VELU__SSE2_RR2_P6_X24,prescale)2970 TEST(F32_VELU__SSE2_RR2_P6_X24, prescale) {
2971 TEST_REQUIRES_X86_SSE2;
2972 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2973 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2974 VUnaryMicrokernelTester()
2975 .batch_size(batch_size)
2976 .prescale(prescale)
2977 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2978 }
2979 }
2980 }
2981
TEST(F32_VELU__SSE2_RR2_P6_X24,alpha)2982 TEST(F32_VELU__SSE2_RR2_P6_X24, alpha) {
2983 TEST_REQUIRES_X86_SSE2;
2984 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2985 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2986 VUnaryMicrokernelTester()
2987 .batch_size(batch_size)
2988 .alpha(alpha)
2989 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2990 }
2991 }
2992 }
2993
TEST(F32_VELU__SSE2_RR2_P6_X24,beta)2994 TEST(F32_VELU__SSE2_RR2_P6_X24, beta) {
2995 TEST_REQUIRES_X86_SSE2;
2996 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2997 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2998 VUnaryMicrokernelTester()
2999 .batch_size(batch_size)
3000 .beta(beta)
3001 .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3002 }
3003 }
3004 }
3005 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3006
3007
3008 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_eq_4)3009 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_eq_4) {
3010 TEST_REQUIRES_X86_SSE41;
3011 VUnaryMicrokernelTester()
3012 .batch_size(4)
3013 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3014 }
3015
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_div_4)3016 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_div_4) {
3017 TEST_REQUIRES_X86_SSE41;
3018 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3019 VUnaryMicrokernelTester()
3020 .batch_size(batch_size)
3021 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3022 }
3023 }
3024
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_lt_4)3025 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_lt_4) {
3026 TEST_REQUIRES_X86_SSE41;
3027 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3028 VUnaryMicrokernelTester()
3029 .batch_size(batch_size)
3030 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3031 }
3032 }
3033
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_gt_4)3034 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_gt_4) {
3035 TEST_REQUIRES_X86_SSE41;
3036 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3037 VUnaryMicrokernelTester()
3038 .batch_size(batch_size)
3039 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3040 }
3041 }
3042
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,inplace)3043 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, inplace) {
3044 TEST_REQUIRES_X86_SSE41;
3045 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3046 VUnaryMicrokernelTester()
3047 .batch_size(batch_size)
3048 .inplace(true)
3049 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3050 }
3051 }
3052
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,prescale)3053 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, prescale) {
3054 TEST_REQUIRES_X86_SSE41;
3055 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3056 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3057 VUnaryMicrokernelTester()
3058 .batch_size(batch_size)
3059 .prescale(prescale)
3060 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3061 }
3062 }
3063 }
3064
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,alpha)3065 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, alpha) {
3066 TEST_REQUIRES_X86_SSE41;
3067 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3068 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3069 VUnaryMicrokernelTester()
3070 .batch_size(batch_size)
3071 .alpha(alpha)
3072 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3073 }
3074 }
3075 }
3076
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,beta)3077 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, beta) {
3078 TEST_REQUIRES_X86_SSE41;
3079 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3080 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3081 VUnaryMicrokernelTester()
3082 .batch_size(batch_size)
3083 .beta(beta)
3084 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3085 }
3086 }
3087 }
3088 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3089
3090
3091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_eq_8)3092 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_eq_8) {
3093 TEST_REQUIRES_X86_SSE41;
3094 VUnaryMicrokernelTester()
3095 .batch_size(8)
3096 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3097 }
3098
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_div_8)3099 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_div_8) {
3100 TEST_REQUIRES_X86_SSE41;
3101 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3102 VUnaryMicrokernelTester()
3103 .batch_size(batch_size)
3104 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3105 }
3106 }
3107
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_lt_8)3108 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_lt_8) {
3109 TEST_REQUIRES_X86_SSE41;
3110 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3111 VUnaryMicrokernelTester()
3112 .batch_size(batch_size)
3113 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3114 }
3115 }
3116
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_gt_8)3117 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_gt_8) {
3118 TEST_REQUIRES_X86_SSE41;
3119 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3120 VUnaryMicrokernelTester()
3121 .batch_size(batch_size)
3122 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3123 }
3124 }
3125
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,inplace)3126 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, inplace) {
3127 TEST_REQUIRES_X86_SSE41;
3128 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3129 VUnaryMicrokernelTester()
3130 .batch_size(batch_size)
3131 .inplace(true)
3132 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3133 }
3134 }
3135
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,prescale)3136 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, prescale) {
3137 TEST_REQUIRES_X86_SSE41;
3138 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3139 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3140 VUnaryMicrokernelTester()
3141 .batch_size(batch_size)
3142 .prescale(prescale)
3143 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3144 }
3145 }
3146 }
3147
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,alpha)3148 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, alpha) {
3149 TEST_REQUIRES_X86_SSE41;
3150 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3151 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3152 VUnaryMicrokernelTester()
3153 .batch_size(batch_size)
3154 .alpha(alpha)
3155 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3156 }
3157 }
3158 }
3159
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,beta)3160 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, beta) {
3161 TEST_REQUIRES_X86_SSE41;
3162 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3163 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3164 VUnaryMicrokernelTester()
3165 .batch_size(batch_size)
3166 .beta(beta)
3167 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3168 }
3169 }
3170 }
3171 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3172
3173
3174 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_eq_12)3175 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_eq_12) {
3176 TEST_REQUIRES_X86_SSE41;
3177 VUnaryMicrokernelTester()
3178 .batch_size(12)
3179 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3180 }
3181
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_div_12)3182 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_div_12) {
3183 TEST_REQUIRES_X86_SSE41;
3184 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3185 VUnaryMicrokernelTester()
3186 .batch_size(batch_size)
3187 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3188 }
3189 }
3190
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_lt_12)3191 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_lt_12) {
3192 TEST_REQUIRES_X86_SSE41;
3193 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3194 VUnaryMicrokernelTester()
3195 .batch_size(batch_size)
3196 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3197 }
3198 }
3199
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_gt_12)3200 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_gt_12) {
3201 TEST_REQUIRES_X86_SSE41;
3202 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3203 VUnaryMicrokernelTester()
3204 .batch_size(batch_size)
3205 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3206 }
3207 }
3208
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,inplace)3209 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, inplace) {
3210 TEST_REQUIRES_X86_SSE41;
3211 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3212 VUnaryMicrokernelTester()
3213 .batch_size(batch_size)
3214 .inplace(true)
3215 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3216 }
3217 }
3218
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,prescale)3219 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, prescale) {
3220 TEST_REQUIRES_X86_SSE41;
3221 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3222 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3223 VUnaryMicrokernelTester()
3224 .batch_size(batch_size)
3225 .prescale(prescale)
3226 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3227 }
3228 }
3229 }
3230
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,alpha)3231 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, alpha) {
3232 TEST_REQUIRES_X86_SSE41;
3233 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3234 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3235 VUnaryMicrokernelTester()
3236 .batch_size(batch_size)
3237 .alpha(alpha)
3238 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3239 }
3240 }
3241 }
3242
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,beta)3243 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, beta) {
3244 TEST_REQUIRES_X86_SSE41;
3245 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3246 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3247 VUnaryMicrokernelTester()
3248 .batch_size(batch_size)
3249 .beta(beta)
3250 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3251 }
3252 }
3253 }
3254 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3255
3256
3257 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_eq_16)3258 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_eq_16) {
3259 TEST_REQUIRES_X86_SSE41;
3260 VUnaryMicrokernelTester()
3261 .batch_size(16)
3262 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3263 }
3264
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_div_16)3265 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_div_16) {
3266 TEST_REQUIRES_X86_SSE41;
3267 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3268 VUnaryMicrokernelTester()
3269 .batch_size(batch_size)
3270 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3271 }
3272 }
3273
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_lt_16)3274 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_lt_16) {
3275 TEST_REQUIRES_X86_SSE41;
3276 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3277 VUnaryMicrokernelTester()
3278 .batch_size(batch_size)
3279 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3280 }
3281 }
3282
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_gt_16)3283 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_gt_16) {
3284 TEST_REQUIRES_X86_SSE41;
3285 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3286 VUnaryMicrokernelTester()
3287 .batch_size(batch_size)
3288 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3289 }
3290 }
3291
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,inplace)3292 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, inplace) {
3293 TEST_REQUIRES_X86_SSE41;
3294 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3295 VUnaryMicrokernelTester()
3296 .batch_size(batch_size)
3297 .inplace(true)
3298 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3299 }
3300 }
3301
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,prescale)3302 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, prescale) {
3303 TEST_REQUIRES_X86_SSE41;
3304 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3305 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3306 VUnaryMicrokernelTester()
3307 .batch_size(batch_size)
3308 .prescale(prescale)
3309 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3310 }
3311 }
3312 }
3313
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,alpha)3314 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, alpha) {
3315 TEST_REQUIRES_X86_SSE41;
3316 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3317 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3318 VUnaryMicrokernelTester()
3319 .batch_size(batch_size)
3320 .alpha(alpha)
3321 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3322 }
3323 }
3324 }
3325
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,beta)3326 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, beta) {
3327 TEST_REQUIRES_X86_SSE41;
3328 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3329 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3330 VUnaryMicrokernelTester()
3331 .batch_size(batch_size)
3332 .beta(beta)
3333 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3334 }
3335 }
3336 }
3337 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3338
3339
3340 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_eq_20)3341 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_eq_20) {
3342 TEST_REQUIRES_X86_SSE41;
3343 VUnaryMicrokernelTester()
3344 .batch_size(20)
3345 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3346 }
3347
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_div_20)3348 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_div_20) {
3349 TEST_REQUIRES_X86_SSE41;
3350 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3351 VUnaryMicrokernelTester()
3352 .batch_size(batch_size)
3353 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3354 }
3355 }
3356
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_lt_20)3357 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_lt_20) {
3358 TEST_REQUIRES_X86_SSE41;
3359 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3360 VUnaryMicrokernelTester()
3361 .batch_size(batch_size)
3362 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3363 }
3364 }
3365
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_gt_20)3366 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_gt_20) {
3367 TEST_REQUIRES_X86_SSE41;
3368 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3369 VUnaryMicrokernelTester()
3370 .batch_size(batch_size)
3371 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3372 }
3373 }
3374
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,inplace)3375 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, inplace) {
3376 TEST_REQUIRES_X86_SSE41;
3377 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3378 VUnaryMicrokernelTester()
3379 .batch_size(batch_size)
3380 .inplace(true)
3381 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3382 }
3383 }
3384
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,prescale)3385 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, prescale) {
3386 TEST_REQUIRES_X86_SSE41;
3387 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3388 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3389 VUnaryMicrokernelTester()
3390 .batch_size(batch_size)
3391 .prescale(prescale)
3392 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3393 }
3394 }
3395 }
3396
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,alpha)3397 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, alpha) {
3398 TEST_REQUIRES_X86_SSE41;
3399 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3400 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3401 VUnaryMicrokernelTester()
3402 .batch_size(batch_size)
3403 .alpha(alpha)
3404 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3405 }
3406 }
3407 }
3408
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,beta)3409 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, beta) {
3410 TEST_REQUIRES_X86_SSE41;
3411 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3412 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3413 VUnaryMicrokernelTester()
3414 .batch_size(batch_size)
3415 .beta(beta)
3416 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3417 }
3418 }
3419 }
3420 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3421
3422
3423 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_eq_24)3424 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_eq_24) {
3425 TEST_REQUIRES_X86_SSE41;
3426 VUnaryMicrokernelTester()
3427 .batch_size(24)
3428 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3429 }
3430
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_div_24)3431 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_div_24) {
3432 TEST_REQUIRES_X86_SSE41;
3433 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3434 VUnaryMicrokernelTester()
3435 .batch_size(batch_size)
3436 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3437 }
3438 }
3439
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_lt_24)3440 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_lt_24) {
3441 TEST_REQUIRES_X86_SSE41;
3442 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3443 VUnaryMicrokernelTester()
3444 .batch_size(batch_size)
3445 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3446 }
3447 }
3448
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_gt_24)3449 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_gt_24) {
3450 TEST_REQUIRES_X86_SSE41;
3451 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3452 VUnaryMicrokernelTester()
3453 .batch_size(batch_size)
3454 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3455 }
3456 }
3457
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,inplace)3458 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, inplace) {
3459 TEST_REQUIRES_X86_SSE41;
3460 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3461 VUnaryMicrokernelTester()
3462 .batch_size(batch_size)
3463 .inplace(true)
3464 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3465 }
3466 }
3467
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,prescale)3468 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, prescale) {
3469 TEST_REQUIRES_X86_SSE41;
3470 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3471 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3472 VUnaryMicrokernelTester()
3473 .batch_size(batch_size)
3474 .prescale(prescale)
3475 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3476 }
3477 }
3478 }
3479
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,alpha)3480 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, alpha) {
3481 TEST_REQUIRES_X86_SSE41;
3482 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3483 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3484 VUnaryMicrokernelTester()
3485 .batch_size(batch_size)
3486 .alpha(alpha)
3487 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3488 }
3489 }
3490 }
3491
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,beta)3492 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, beta) {
3493 TEST_REQUIRES_X86_SSE41;
3494 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3495 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3496 VUnaryMicrokernelTester()
3497 .batch_size(batch_size)
3498 .beta(beta)
3499 .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3500 }
3501 }
3502 }
3503 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3504
3505
3506 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_eq_4)3507 TEST(F32_VELU__SSE41_RR2_P6_X4, batch_eq_4) {
3508 TEST_REQUIRES_X86_SSE41;
3509 VUnaryMicrokernelTester()
3510 .batch_size(4)
3511 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3512 }
3513
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_div_4)3514 TEST(F32_VELU__SSE41_RR2_P6_X4, batch_div_4) {
3515 TEST_REQUIRES_X86_SSE41;
3516 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3517 VUnaryMicrokernelTester()
3518 .batch_size(batch_size)
3519 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3520 }
3521 }
3522
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_lt_4)3523 TEST(F32_VELU__SSE41_RR2_P6_X4, batch_lt_4) {
3524 TEST_REQUIRES_X86_SSE41;
3525 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3526 VUnaryMicrokernelTester()
3527 .batch_size(batch_size)
3528 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3529 }
3530 }
3531
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_gt_4)3532 TEST(F32_VELU__SSE41_RR2_P6_X4, batch_gt_4) {
3533 TEST_REQUIRES_X86_SSE41;
3534 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3535 VUnaryMicrokernelTester()
3536 .batch_size(batch_size)
3537 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3538 }
3539 }
3540
TEST(F32_VELU__SSE41_RR2_P6_X4,inplace)3541 TEST(F32_VELU__SSE41_RR2_P6_X4, inplace) {
3542 TEST_REQUIRES_X86_SSE41;
3543 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3544 VUnaryMicrokernelTester()
3545 .batch_size(batch_size)
3546 .inplace(true)
3547 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3548 }
3549 }
3550
TEST(F32_VELU__SSE41_RR2_P6_X4,prescale)3551 TEST(F32_VELU__SSE41_RR2_P6_X4, prescale) {
3552 TEST_REQUIRES_X86_SSE41;
3553 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3554 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3555 VUnaryMicrokernelTester()
3556 .batch_size(batch_size)
3557 .prescale(prescale)
3558 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3559 }
3560 }
3561 }
3562
TEST(F32_VELU__SSE41_RR2_P6_X4,alpha)3563 TEST(F32_VELU__SSE41_RR2_P6_X4, alpha) {
3564 TEST_REQUIRES_X86_SSE41;
3565 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3566 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3567 VUnaryMicrokernelTester()
3568 .batch_size(batch_size)
3569 .alpha(alpha)
3570 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3571 }
3572 }
3573 }
3574
TEST(F32_VELU__SSE41_RR2_P6_X4,beta)3575 TEST(F32_VELU__SSE41_RR2_P6_X4, beta) {
3576 TEST_REQUIRES_X86_SSE41;
3577 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3578 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3579 VUnaryMicrokernelTester()
3580 .batch_size(batch_size)
3581 .beta(beta)
3582 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3583 }
3584 }
3585 }
3586 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3587
3588
3589 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_eq_8)3590 TEST(F32_VELU__SSE41_RR2_P6_X8, batch_eq_8) {
3591 TEST_REQUIRES_X86_SSE41;
3592 VUnaryMicrokernelTester()
3593 .batch_size(8)
3594 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3595 }
3596
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_div_8)3597 TEST(F32_VELU__SSE41_RR2_P6_X8, batch_div_8) {
3598 TEST_REQUIRES_X86_SSE41;
3599 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3600 VUnaryMicrokernelTester()
3601 .batch_size(batch_size)
3602 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3603 }
3604 }
3605
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_lt_8)3606 TEST(F32_VELU__SSE41_RR2_P6_X8, batch_lt_8) {
3607 TEST_REQUIRES_X86_SSE41;
3608 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3609 VUnaryMicrokernelTester()
3610 .batch_size(batch_size)
3611 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3612 }
3613 }
3614
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_gt_8)3615 TEST(F32_VELU__SSE41_RR2_P6_X8, batch_gt_8) {
3616 TEST_REQUIRES_X86_SSE41;
3617 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3618 VUnaryMicrokernelTester()
3619 .batch_size(batch_size)
3620 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3621 }
3622 }
3623
TEST(F32_VELU__SSE41_RR2_P6_X8,inplace)3624 TEST(F32_VELU__SSE41_RR2_P6_X8, inplace) {
3625 TEST_REQUIRES_X86_SSE41;
3626 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3627 VUnaryMicrokernelTester()
3628 .batch_size(batch_size)
3629 .inplace(true)
3630 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3631 }
3632 }
3633
TEST(F32_VELU__SSE41_RR2_P6_X8,prescale)3634 TEST(F32_VELU__SSE41_RR2_P6_X8, prescale) {
3635 TEST_REQUIRES_X86_SSE41;
3636 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3637 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3638 VUnaryMicrokernelTester()
3639 .batch_size(batch_size)
3640 .prescale(prescale)
3641 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3642 }
3643 }
3644 }
3645
TEST(F32_VELU__SSE41_RR2_P6_X8,alpha)3646 TEST(F32_VELU__SSE41_RR2_P6_X8, alpha) {
3647 TEST_REQUIRES_X86_SSE41;
3648 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3649 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3650 VUnaryMicrokernelTester()
3651 .batch_size(batch_size)
3652 .alpha(alpha)
3653 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3654 }
3655 }
3656 }
3657
TEST(F32_VELU__SSE41_RR2_P6_X8,beta)3658 TEST(F32_VELU__SSE41_RR2_P6_X8, beta) {
3659 TEST_REQUIRES_X86_SSE41;
3660 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3661 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3662 VUnaryMicrokernelTester()
3663 .batch_size(batch_size)
3664 .beta(beta)
3665 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3666 }
3667 }
3668 }
3669 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3670
3671
3672 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_eq_12)3673 TEST(F32_VELU__SSE41_RR2_P6_X12, batch_eq_12) {
3674 TEST_REQUIRES_X86_SSE41;
3675 VUnaryMicrokernelTester()
3676 .batch_size(12)
3677 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3678 }
3679
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_div_12)3680 TEST(F32_VELU__SSE41_RR2_P6_X12, batch_div_12) {
3681 TEST_REQUIRES_X86_SSE41;
3682 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3683 VUnaryMicrokernelTester()
3684 .batch_size(batch_size)
3685 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3686 }
3687 }
3688
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_lt_12)3689 TEST(F32_VELU__SSE41_RR2_P6_X12, batch_lt_12) {
3690 TEST_REQUIRES_X86_SSE41;
3691 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3692 VUnaryMicrokernelTester()
3693 .batch_size(batch_size)
3694 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3695 }
3696 }
3697
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_gt_12)3698 TEST(F32_VELU__SSE41_RR2_P6_X12, batch_gt_12) {
3699 TEST_REQUIRES_X86_SSE41;
3700 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3701 VUnaryMicrokernelTester()
3702 .batch_size(batch_size)
3703 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3704 }
3705 }
3706
TEST(F32_VELU__SSE41_RR2_P6_X12,inplace)3707 TEST(F32_VELU__SSE41_RR2_P6_X12, inplace) {
3708 TEST_REQUIRES_X86_SSE41;
3709 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3710 VUnaryMicrokernelTester()
3711 .batch_size(batch_size)
3712 .inplace(true)
3713 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3714 }
3715 }
3716
TEST(F32_VELU__SSE41_RR2_P6_X12,prescale)3717 TEST(F32_VELU__SSE41_RR2_P6_X12, prescale) {
3718 TEST_REQUIRES_X86_SSE41;
3719 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3720 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3721 VUnaryMicrokernelTester()
3722 .batch_size(batch_size)
3723 .prescale(prescale)
3724 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3725 }
3726 }
3727 }
3728
TEST(F32_VELU__SSE41_RR2_P6_X12,alpha)3729 TEST(F32_VELU__SSE41_RR2_P6_X12, alpha) {
3730 TEST_REQUIRES_X86_SSE41;
3731 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3732 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3733 VUnaryMicrokernelTester()
3734 .batch_size(batch_size)
3735 .alpha(alpha)
3736 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3737 }
3738 }
3739 }
3740
TEST(F32_VELU__SSE41_RR2_P6_X12,beta)3741 TEST(F32_VELU__SSE41_RR2_P6_X12, beta) {
3742 TEST_REQUIRES_X86_SSE41;
3743 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3744 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3745 VUnaryMicrokernelTester()
3746 .batch_size(batch_size)
3747 .beta(beta)
3748 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3749 }
3750 }
3751 }
3752 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3753
3754
3755 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_eq_16)3756 TEST(F32_VELU__SSE41_RR2_P6_X16, batch_eq_16) {
3757 TEST_REQUIRES_X86_SSE41;
3758 VUnaryMicrokernelTester()
3759 .batch_size(16)
3760 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3761 }
3762
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_div_16)3763 TEST(F32_VELU__SSE41_RR2_P6_X16, batch_div_16) {
3764 TEST_REQUIRES_X86_SSE41;
3765 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3766 VUnaryMicrokernelTester()
3767 .batch_size(batch_size)
3768 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3769 }
3770 }
3771
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_lt_16)3772 TEST(F32_VELU__SSE41_RR2_P6_X16, batch_lt_16) {
3773 TEST_REQUIRES_X86_SSE41;
3774 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3775 VUnaryMicrokernelTester()
3776 .batch_size(batch_size)
3777 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3778 }
3779 }
3780
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_gt_16)3781 TEST(F32_VELU__SSE41_RR2_P6_X16, batch_gt_16) {
3782 TEST_REQUIRES_X86_SSE41;
3783 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3784 VUnaryMicrokernelTester()
3785 .batch_size(batch_size)
3786 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3787 }
3788 }
3789
TEST(F32_VELU__SSE41_RR2_P6_X16,inplace)3790 TEST(F32_VELU__SSE41_RR2_P6_X16, inplace) {
3791 TEST_REQUIRES_X86_SSE41;
3792 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3793 VUnaryMicrokernelTester()
3794 .batch_size(batch_size)
3795 .inplace(true)
3796 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3797 }
3798 }
3799
TEST(F32_VELU__SSE41_RR2_P6_X16,prescale)3800 TEST(F32_VELU__SSE41_RR2_P6_X16, prescale) {
3801 TEST_REQUIRES_X86_SSE41;
3802 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3803 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3804 VUnaryMicrokernelTester()
3805 .batch_size(batch_size)
3806 .prescale(prescale)
3807 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3808 }
3809 }
3810 }
3811
TEST(F32_VELU__SSE41_RR2_P6_X16,alpha)3812 TEST(F32_VELU__SSE41_RR2_P6_X16, alpha) {
3813 TEST_REQUIRES_X86_SSE41;
3814 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3815 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3816 VUnaryMicrokernelTester()
3817 .batch_size(batch_size)
3818 .alpha(alpha)
3819 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3820 }
3821 }
3822 }
3823
TEST(F32_VELU__SSE41_RR2_P6_X16,beta)3824 TEST(F32_VELU__SSE41_RR2_P6_X16, beta) {
3825 TEST_REQUIRES_X86_SSE41;
3826 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3827 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3828 VUnaryMicrokernelTester()
3829 .batch_size(batch_size)
3830 .beta(beta)
3831 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3832 }
3833 }
3834 }
3835 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3836
3837
3838 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_eq_20)3839 TEST(F32_VELU__SSE41_RR2_P6_X20, batch_eq_20) {
3840 TEST_REQUIRES_X86_SSE41;
3841 VUnaryMicrokernelTester()
3842 .batch_size(20)
3843 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3844 }
3845
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_div_20)3846 TEST(F32_VELU__SSE41_RR2_P6_X20, batch_div_20) {
3847 TEST_REQUIRES_X86_SSE41;
3848 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3849 VUnaryMicrokernelTester()
3850 .batch_size(batch_size)
3851 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3852 }
3853 }
3854
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_lt_20)3855 TEST(F32_VELU__SSE41_RR2_P6_X20, batch_lt_20) {
3856 TEST_REQUIRES_X86_SSE41;
3857 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3858 VUnaryMicrokernelTester()
3859 .batch_size(batch_size)
3860 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3861 }
3862 }
3863
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_gt_20)3864 TEST(F32_VELU__SSE41_RR2_P6_X20, batch_gt_20) {
3865 TEST_REQUIRES_X86_SSE41;
3866 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3867 VUnaryMicrokernelTester()
3868 .batch_size(batch_size)
3869 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3870 }
3871 }
3872
TEST(F32_VELU__SSE41_RR2_P6_X20,inplace)3873 TEST(F32_VELU__SSE41_RR2_P6_X20, inplace) {
3874 TEST_REQUIRES_X86_SSE41;
3875 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3876 VUnaryMicrokernelTester()
3877 .batch_size(batch_size)
3878 .inplace(true)
3879 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3880 }
3881 }
3882
TEST(F32_VELU__SSE41_RR2_P6_X20,prescale)3883 TEST(F32_VELU__SSE41_RR2_P6_X20, prescale) {
3884 TEST_REQUIRES_X86_SSE41;
3885 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3886 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3887 VUnaryMicrokernelTester()
3888 .batch_size(batch_size)
3889 .prescale(prescale)
3890 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3891 }
3892 }
3893 }
3894
TEST(F32_VELU__SSE41_RR2_P6_X20,alpha)3895 TEST(F32_VELU__SSE41_RR2_P6_X20, alpha) {
3896 TEST_REQUIRES_X86_SSE41;
3897 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3898 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3899 VUnaryMicrokernelTester()
3900 .batch_size(batch_size)
3901 .alpha(alpha)
3902 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3903 }
3904 }
3905 }
3906
TEST(F32_VELU__SSE41_RR2_P6_X20,beta)3907 TEST(F32_VELU__SSE41_RR2_P6_X20, beta) {
3908 TEST_REQUIRES_X86_SSE41;
3909 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3910 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3911 VUnaryMicrokernelTester()
3912 .batch_size(batch_size)
3913 .beta(beta)
3914 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3915 }
3916 }
3917 }
3918 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3919
3920
3921 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_eq_24)3922 TEST(F32_VELU__SSE41_RR2_P6_X24, batch_eq_24) {
3923 TEST_REQUIRES_X86_SSE41;
3924 VUnaryMicrokernelTester()
3925 .batch_size(24)
3926 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3927 }
3928
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_div_24)3929 TEST(F32_VELU__SSE41_RR2_P6_X24, batch_div_24) {
3930 TEST_REQUIRES_X86_SSE41;
3931 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3932 VUnaryMicrokernelTester()
3933 .batch_size(batch_size)
3934 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3935 }
3936 }
3937
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_lt_24)3938 TEST(F32_VELU__SSE41_RR2_P6_X24, batch_lt_24) {
3939 TEST_REQUIRES_X86_SSE41;
3940 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3941 VUnaryMicrokernelTester()
3942 .batch_size(batch_size)
3943 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3944 }
3945 }
3946
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_gt_24)3947 TEST(F32_VELU__SSE41_RR2_P6_X24, batch_gt_24) {
3948 TEST_REQUIRES_X86_SSE41;
3949 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3950 VUnaryMicrokernelTester()
3951 .batch_size(batch_size)
3952 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3953 }
3954 }
3955
TEST(F32_VELU__SSE41_RR2_P6_X24,inplace)3956 TEST(F32_VELU__SSE41_RR2_P6_X24, inplace) {
3957 TEST_REQUIRES_X86_SSE41;
3958 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3959 VUnaryMicrokernelTester()
3960 .batch_size(batch_size)
3961 .inplace(true)
3962 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3963 }
3964 }
3965
TEST(F32_VELU__SSE41_RR2_P6_X24,prescale)3966 TEST(F32_VELU__SSE41_RR2_P6_X24, prescale) {
3967 TEST_REQUIRES_X86_SSE41;
3968 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3969 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3970 VUnaryMicrokernelTester()
3971 .batch_size(batch_size)
3972 .prescale(prescale)
3973 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3974 }
3975 }
3976 }
3977
TEST(F32_VELU__SSE41_RR2_P6_X24,alpha)3978 TEST(F32_VELU__SSE41_RR2_P6_X24, alpha) {
3979 TEST_REQUIRES_X86_SSE41;
3980 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3981 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3982 VUnaryMicrokernelTester()
3983 .batch_size(batch_size)
3984 .alpha(alpha)
3985 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3986 }
3987 }
3988 }
3989
TEST(F32_VELU__SSE41_RR2_P6_X24,beta)3990 TEST(F32_VELU__SSE41_RR2_P6_X24, beta) {
3991 TEST_REQUIRES_X86_SSE41;
3992 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3993 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3994 VUnaryMicrokernelTester()
3995 .batch_size(batch_size)
3996 .beta(beta)
3997 .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3998 }
3999 }
4000 }
4001 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4002
4003
4004 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_eq_8)4005 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_eq_8) {
4006 TEST_REQUIRES_X86_AVX;
4007 VUnaryMicrokernelTester()
4008 .batch_size(8)
4009 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4010 }
4011
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_div_8)4012 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_div_8) {
4013 TEST_REQUIRES_X86_AVX;
4014 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4015 VUnaryMicrokernelTester()
4016 .batch_size(batch_size)
4017 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4018 }
4019 }
4020
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_lt_8)4021 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_lt_8) {
4022 TEST_REQUIRES_X86_AVX;
4023 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4024 VUnaryMicrokernelTester()
4025 .batch_size(batch_size)
4026 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4027 }
4028 }
4029
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_gt_8)4030 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_gt_8) {
4031 TEST_REQUIRES_X86_AVX;
4032 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4033 VUnaryMicrokernelTester()
4034 .batch_size(batch_size)
4035 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4036 }
4037 }
4038
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,inplace)4039 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, inplace) {
4040 TEST_REQUIRES_X86_AVX;
4041 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4042 VUnaryMicrokernelTester()
4043 .batch_size(batch_size)
4044 .inplace(true)
4045 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4046 }
4047 }
4048
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,prescale)4049 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, prescale) {
4050 TEST_REQUIRES_X86_AVX;
4051 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4052 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4053 VUnaryMicrokernelTester()
4054 .batch_size(batch_size)
4055 .prescale(prescale)
4056 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4057 }
4058 }
4059 }
4060
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,alpha)4061 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, alpha) {
4062 TEST_REQUIRES_X86_AVX;
4063 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4064 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4065 VUnaryMicrokernelTester()
4066 .batch_size(batch_size)
4067 .alpha(alpha)
4068 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4069 }
4070 }
4071 }
4072
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,beta)4073 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, beta) {
4074 TEST_REQUIRES_X86_AVX;
4075 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4076 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4077 VUnaryMicrokernelTester()
4078 .batch_size(batch_size)
4079 .beta(beta)
4080 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4081 }
4082 }
4083 }
4084 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4085
4086
4087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_eq_16)4088 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_eq_16) {
4089 TEST_REQUIRES_X86_AVX;
4090 VUnaryMicrokernelTester()
4091 .batch_size(16)
4092 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4093 }
4094
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_div_16)4095 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_div_16) {
4096 TEST_REQUIRES_X86_AVX;
4097 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4098 VUnaryMicrokernelTester()
4099 .batch_size(batch_size)
4100 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4101 }
4102 }
4103
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_lt_16)4104 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_lt_16) {
4105 TEST_REQUIRES_X86_AVX;
4106 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4107 VUnaryMicrokernelTester()
4108 .batch_size(batch_size)
4109 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4110 }
4111 }
4112
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_gt_16)4113 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_gt_16) {
4114 TEST_REQUIRES_X86_AVX;
4115 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4116 VUnaryMicrokernelTester()
4117 .batch_size(batch_size)
4118 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4119 }
4120 }
4121
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,inplace)4122 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, inplace) {
4123 TEST_REQUIRES_X86_AVX;
4124 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4125 VUnaryMicrokernelTester()
4126 .batch_size(batch_size)
4127 .inplace(true)
4128 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4129 }
4130 }
4131
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,prescale)4132 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, prescale) {
4133 TEST_REQUIRES_X86_AVX;
4134 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4135 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4136 VUnaryMicrokernelTester()
4137 .batch_size(batch_size)
4138 .prescale(prescale)
4139 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4140 }
4141 }
4142 }
4143
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,alpha)4144 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, alpha) {
4145 TEST_REQUIRES_X86_AVX;
4146 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4147 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4148 VUnaryMicrokernelTester()
4149 .batch_size(batch_size)
4150 .alpha(alpha)
4151 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4152 }
4153 }
4154 }
4155
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,beta)4156 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, beta) {
4157 TEST_REQUIRES_X86_AVX;
4158 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4159 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4160 VUnaryMicrokernelTester()
4161 .batch_size(batch_size)
4162 .beta(beta)
4163 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4164 }
4165 }
4166 }
4167 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4168
4169
4170 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_eq_24)4171 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_eq_24) {
4172 TEST_REQUIRES_X86_AVX;
4173 VUnaryMicrokernelTester()
4174 .batch_size(24)
4175 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4176 }
4177
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_div_24)4178 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_div_24) {
4179 TEST_REQUIRES_X86_AVX;
4180 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4181 VUnaryMicrokernelTester()
4182 .batch_size(batch_size)
4183 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4184 }
4185 }
4186
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_lt_24)4187 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_lt_24) {
4188 TEST_REQUIRES_X86_AVX;
4189 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4190 VUnaryMicrokernelTester()
4191 .batch_size(batch_size)
4192 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4193 }
4194 }
4195
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_gt_24)4196 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_gt_24) {
4197 TEST_REQUIRES_X86_AVX;
4198 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4199 VUnaryMicrokernelTester()
4200 .batch_size(batch_size)
4201 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4202 }
4203 }
4204
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,inplace)4205 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, inplace) {
4206 TEST_REQUIRES_X86_AVX;
4207 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4208 VUnaryMicrokernelTester()
4209 .batch_size(batch_size)
4210 .inplace(true)
4211 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4212 }
4213 }
4214
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,prescale)4215 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, prescale) {
4216 TEST_REQUIRES_X86_AVX;
4217 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4218 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4219 VUnaryMicrokernelTester()
4220 .batch_size(batch_size)
4221 .prescale(prescale)
4222 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4223 }
4224 }
4225 }
4226
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,alpha)4227 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, alpha) {
4228 TEST_REQUIRES_X86_AVX;
4229 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4230 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4231 VUnaryMicrokernelTester()
4232 .batch_size(batch_size)
4233 .alpha(alpha)
4234 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4235 }
4236 }
4237 }
4238
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,beta)4239 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, beta) {
4240 TEST_REQUIRES_X86_AVX;
4241 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4242 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4243 VUnaryMicrokernelTester()
4244 .batch_size(batch_size)
4245 .beta(beta)
4246 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4247 }
4248 }
4249 }
4250 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4251
4252
4253 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_eq_32)4254 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_eq_32) {
4255 TEST_REQUIRES_X86_AVX;
4256 VUnaryMicrokernelTester()
4257 .batch_size(32)
4258 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4259 }
4260
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_div_32)4261 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_div_32) {
4262 TEST_REQUIRES_X86_AVX;
4263 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4264 VUnaryMicrokernelTester()
4265 .batch_size(batch_size)
4266 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4267 }
4268 }
4269
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_lt_32)4270 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_lt_32) {
4271 TEST_REQUIRES_X86_AVX;
4272 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4273 VUnaryMicrokernelTester()
4274 .batch_size(batch_size)
4275 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4276 }
4277 }
4278
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_gt_32)4279 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_gt_32) {
4280 TEST_REQUIRES_X86_AVX;
4281 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4282 VUnaryMicrokernelTester()
4283 .batch_size(batch_size)
4284 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4285 }
4286 }
4287
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,inplace)4288 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, inplace) {
4289 TEST_REQUIRES_X86_AVX;
4290 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4291 VUnaryMicrokernelTester()
4292 .batch_size(batch_size)
4293 .inplace(true)
4294 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4295 }
4296 }
4297
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,prescale)4298 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, prescale) {
4299 TEST_REQUIRES_X86_AVX;
4300 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4301 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4302 VUnaryMicrokernelTester()
4303 .batch_size(batch_size)
4304 .prescale(prescale)
4305 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4306 }
4307 }
4308 }
4309
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,alpha)4310 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, alpha) {
4311 TEST_REQUIRES_X86_AVX;
4312 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4313 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4314 VUnaryMicrokernelTester()
4315 .batch_size(batch_size)
4316 .alpha(alpha)
4317 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4318 }
4319 }
4320 }
4321
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,beta)4322 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, beta) {
4323 TEST_REQUIRES_X86_AVX;
4324 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4325 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4326 VUnaryMicrokernelTester()
4327 .batch_size(batch_size)
4328 .beta(beta)
4329 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4330 }
4331 }
4332 }
4333 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4334
4335
4336 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_eq_40)4337 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_eq_40) {
4338 TEST_REQUIRES_X86_AVX;
4339 VUnaryMicrokernelTester()
4340 .batch_size(40)
4341 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4342 }
4343
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_div_40)4344 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_div_40) {
4345 TEST_REQUIRES_X86_AVX;
4346 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
4347 VUnaryMicrokernelTester()
4348 .batch_size(batch_size)
4349 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4350 }
4351 }
4352
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_lt_40)4353 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_lt_40) {
4354 TEST_REQUIRES_X86_AVX;
4355 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
4356 VUnaryMicrokernelTester()
4357 .batch_size(batch_size)
4358 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4359 }
4360 }
4361
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_gt_40)4362 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_gt_40) {
4363 TEST_REQUIRES_X86_AVX;
4364 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
4365 VUnaryMicrokernelTester()
4366 .batch_size(batch_size)
4367 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4368 }
4369 }
4370
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,inplace)4371 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, inplace) {
4372 TEST_REQUIRES_X86_AVX;
4373 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4374 VUnaryMicrokernelTester()
4375 .batch_size(batch_size)
4376 .inplace(true)
4377 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4378 }
4379 }
4380
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,prescale)4381 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, prescale) {
4382 TEST_REQUIRES_X86_AVX;
4383 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4384 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4385 VUnaryMicrokernelTester()
4386 .batch_size(batch_size)
4387 .prescale(prescale)
4388 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4389 }
4390 }
4391 }
4392
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,alpha)4393 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, alpha) {
4394 TEST_REQUIRES_X86_AVX;
4395 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4396 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4397 VUnaryMicrokernelTester()
4398 .batch_size(batch_size)
4399 .alpha(alpha)
4400 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4401 }
4402 }
4403 }
4404
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,beta)4405 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, beta) {
4406 TEST_REQUIRES_X86_AVX;
4407 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4408 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4409 VUnaryMicrokernelTester()
4410 .batch_size(batch_size)
4411 .beta(beta)
4412 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4413 }
4414 }
4415 }
4416 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4417
4418
4419 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_eq_48)4420 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_eq_48) {
4421 TEST_REQUIRES_X86_AVX;
4422 VUnaryMicrokernelTester()
4423 .batch_size(48)
4424 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4425 }
4426
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_div_48)4427 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_div_48) {
4428 TEST_REQUIRES_X86_AVX;
4429 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
4430 VUnaryMicrokernelTester()
4431 .batch_size(batch_size)
4432 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4433 }
4434 }
4435
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_lt_48)4436 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_lt_48) {
4437 TEST_REQUIRES_X86_AVX;
4438 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
4439 VUnaryMicrokernelTester()
4440 .batch_size(batch_size)
4441 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4442 }
4443 }
4444
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_gt_48)4445 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_gt_48) {
4446 TEST_REQUIRES_X86_AVX;
4447 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
4448 VUnaryMicrokernelTester()
4449 .batch_size(batch_size)
4450 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4451 }
4452 }
4453
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,inplace)4454 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, inplace) {
4455 TEST_REQUIRES_X86_AVX;
4456 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4457 VUnaryMicrokernelTester()
4458 .batch_size(batch_size)
4459 .inplace(true)
4460 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4461 }
4462 }
4463
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,prescale)4464 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, prescale) {
4465 TEST_REQUIRES_X86_AVX;
4466 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4467 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4468 VUnaryMicrokernelTester()
4469 .batch_size(batch_size)
4470 .prescale(prescale)
4471 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4472 }
4473 }
4474 }
4475
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,alpha)4476 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, alpha) {
4477 TEST_REQUIRES_X86_AVX;
4478 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4479 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4480 VUnaryMicrokernelTester()
4481 .batch_size(batch_size)
4482 .alpha(alpha)
4483 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4484 }
4485 }
4486 }
4487
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,beta)4488 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, beta) {
4489 TEST_REQUIRES_X86_AVX;
4490 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4491 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4492 VUnaryMicrokernelTester()
4493 .batch_size(batch_size)
4494 .beta(beta)
4495 .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4496 }
4497 }
4498 }
4499 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4500
4501
4502 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_eq_8)4503 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_eq_8) {
4504 TEST_REQUIRES_X86_AVX;
4505 VUnaryMicrokernelTester()
4506 .batch_size(8)
4507 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4508 }
4509
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_div_8)4510 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_div_8) {
4511 TEST_REQUIRES_X86_AVX;
4512 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4513 VUnaryMicrokernelTester()
4514 .batch_size(batch_size)
4515 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4516 }
4517 }
4518
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_lt_8)4519 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_lt_8) {
4520 TEST_REQUIRES_X86_AVX;
4521 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4522 VUnaryMicrokernelTester()
4523 .batch_size(batch_size)
4524 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4525 }
4526 }
4527
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_gt_8)4528 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_gt_8) {
4529 TEST_REQUIRES_X86_AVX;
4530 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4531 VUnaryMicrokernelTester()
4532 .batch_size(batch_size)
4533 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4534 }
4535 }
4536
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,inplace)4537 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, inplace) {
4538 TEST_REQUIRES_X86_AVX;
4539 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4540 VUnaryMicrokernelTester()
4541 .batch_size(batch_size)
4542 .inplace(true)
4543 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4544 }
4545 }
4546
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,prescale)4547 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, prescale) {
4548 TEST_REQUIRES_X86_AVX;
4549 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4550 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4551 VUnaryMicrokernelTester()
4552 .batch_size(batch_size)
4553 .prescale(prescale)
4554 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4555 }
4556 }
4557 }
4558
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,alpha)4559 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, alpha) {
4560 TEST_REQUIRES_X86_AVX;
4561 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4562 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4563 VUnaryMicrokernelTester()
4564 .batch_size(batch_size)
4565 .alpha(alpha)
4566 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4567 }
4568 }
4569 }
4570
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,beta)4571 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, beta) {
4572 TEST_REQUIRES_X86_AVX;
4573 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4574 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4575 VUnaryMicrokernelTester()
4576 .batch_size(batch_size)
4577 .beta(beta)
4578 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4579 }
4580 }
4581 }
4582 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4583
4584
4585 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_eq_16)4586 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_eq_16) {
4587 TEST_REQUIRES_X86_AVX;
4588 VUnaryMicrokernelTester()
4589 .batch_size(16)
4590 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4591 }
4592
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_div_16)4593 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_div_16) {
4594 TEST_REQUIRES_X86_AVX;
4595 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4596 VUnaryMicrokernelTester()
4597 .batch_size(batch_size)
4598 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4599 }
4600 }
4601
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_lt_16)4602 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_lt_16) {
4603 TEST_REQUIRES_X86_AVX;
4604 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4605 VUnaryMicrokernelTester()
4606 .batch_size(batch_size)
4607 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4608 }
4609 }
4610
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_gt_16)4611 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_gt_16) {
4612 TEST_REQUIRES_X86_AVX;
4613 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4614 VUnaryMicrokernelTester()
4615 .batch_size(batch_size)
4616 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4617 }
4618 }
4619
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,inplace)4620 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, inplace) {
4621 TEST_REQUIRES_X86_AVX;
4622 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4623 VUnaryMicrokernelTester()
4624 .batch_size(batch_size)
4625 .inplace(true)
4626 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4627 }
4628 }
4629
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,prescale)4630 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, prescale) {
4631 TEST_REQUIRES_X86_AVX;
4632 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4633 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4634 VUnaryMicrokernelTester()
4635 .batch_size(batch_size)
4636 .prescale(prescale)
4637 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4638 }
4639 }
4640 }
4641
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,alpha)4642 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, alpha) {
4643 TEST_REQUIRES_X86_AVX;
4644 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4645 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4646 VUnaryMicrokernelTester()
4647 .batch_size(batch_size)
4648 .alpha(alpha)
4649 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4650 }
4651 }
4652 }
4653
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,beta)4654 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, beta) {
4655 TEST_REQUIRES_X86_AVX;
4656 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4657 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4658 VUnaryMicrokernelTester()
4659 .batch_size(batch_size)
4660 .beta(beta)
4661 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4662 }
4663 }
4664 }
4665 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4666
4667
4668 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_eq_24)4669 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_eq_24) {
4670 TEST_REQUIRES_X86_AVX;
4671 VUnaryMicrokernelTester()
4672 .batch_size(24)
4673 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4674 }
4675
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_div_24)4676 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_div_24) {
4677 TEST_REQUIRES_X86_AVX;
4678 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4679 VUnaryMicrokernelTester()
4680 .batch_size(batch_size)
4681 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4682 }
4683 }
4684
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_lt_24)4685 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_lt_24) {
4686 TEST_REQUIRES_X86_AVX;
4687 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4688 VUnaryMicrokernelTester()
4689 .batch_size(batch_size)
4690 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4691 }
4692 }
4693
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_gt_24)4694 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_gt_24) {
4695 TEST_REQUIRES_X86_AVX;
4696 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4697 VUnaryMicrokernelTester()
4698 .batch_size(batch_size)
4699 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4700 }
4701 }
4702
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,inplace)4703 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, inplace) {
4704 TEST_REQUIRES_X86_AVX;
4705 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4706 VUnaryMicrokernelTester()
4707 .batch_size(batch_size)
4708 .inplace(true)
4709 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4710 }
4711 }
4712
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,prescale)4713 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, prescale) {
4714 TEST_REQUIRES_X86_AVX;
4715 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4716 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4717 VUnaryMicrokernelTester()
4718 .batch_size(batch_size)
4719 .prescale(prescale)
4720 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4721 }
4722 }
4723 }
4724
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,alpha)4725 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, alpha) {
4726 TEST_REQUIRES_X86_AVX;
4727 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4728 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4729 VUnaryMicrokernelTester()
4730 .batch_size(batch_size)
4731 .alpha(alpha)
4732 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4733 }
4734 }
4735 }
4736
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,beta)4737 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, beta) {
4738 TEST_REQUIRES_X86_AVX;
4739 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4740 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4741 VUnaryMicrokernelTester()
4742 .batch_size(batch_size)
4743 .beta(beta)
4744 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4745 }
4746 }
4747 }
4748 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4749
4750
4751 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_eq_32)4752 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_eq_32) {
4753 TEST_REQUIRES_X86_AVX;
4754 VUnaryMicrokernelTester()
4755 .batch_size(32)
4756 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4757 }
4758
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_div_32)4759 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_div_32) {
4760 TEST_REQUIRES_X86_AVX;
4761 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4762 VUnaryMicrokernelTester()
4763 .batch_size(batch_size)
4764 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4765 }
4766 }
4767
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_lt_32)4768 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_lt_32) {
4769 TEST_REQUIRES_X86_AVX;
4770 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4771 VUnaryMicrokernelTester()
4772 .batch_size(batch_size)
4773 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4774 }
4775 }
4776
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_gt_32)4777 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_gt_32) {
4778 TEST_REQUIRES_X86_AVX;
4779 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4780 VUnaryMicrokernelTester()
4781 .batch_size(batch_size)
4782 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4783 }
4784 }
4785
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,inplace)4786 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, inplace) {
4787 TEST_REQUIRES_X86_AVX;
4788 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4789 VUnaryMicrokernelTester()
4790 .batch_size(batch_size)
4791 .inplace(true)
4792 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4793 }
4794 }
4795
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,prescale)4796 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, prescale) {
4797 TEST_REQUIRES_X86_AVX;
4798 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4799 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4800 VUnaryMicrokernelTester()
4801 .batch_size(batch_size)
4802 .prescale(prescale)
4803 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4804 }
4805 }
4806 }
4807
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,alpha)4808 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, alpha) {
4809 TEST_REQUIRES_X86_AVX;
4810 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4811 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4812 VUnaryMicrokernelTester()
4813 .batch_size(batch_size)
4814 .alpha(alpha)
4815 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4816 }
4817 }
4818 }
4819
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,beta)4820 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, beta) {
4821 TEST_REQUIRES_X86_AVX;
4822 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4823 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4824 VUnaryMicrokernelTester()
4825 .batch_size(batch_size)
4826 .beta(beta)
4827 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4828 }
4829 }
4830 }
4831 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4832
4833
4834 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_eq_40)4835 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_eq_40) {
4836 TEST_REQUIRES_X86_AVX;
4837 VUnaryMicrokernelTester()
4838 .batch_size(40)
4839 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4840 }
4841
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_div_40)4842 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_div_40) {
4843 TEST_REQUIRES_X86_AVX;
4844 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
4845 VUnaryMicrokernelTester()
4846 .batch_size(batch_size)
4847 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4848 }
4849 }
4850
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_lt_40)4851 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_lt_40) {
4852 TEST_REQUIRES_X86_AVX;
4853 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
4854 VUnaryMicrokernelTester()
4855 .batch_size(batch_size)
4856 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4857 }
4858 }
4859
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_gt_40)4860 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_gt_40) {
4861 TEST_REQUIRES_X86_AVX;
4862 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
4863 VUnaryMicrokernelTester()
4864 .batch_size(batch_size)
4865 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4866 }
4867 }
4868
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,inplace)4869 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, inplace) {
4870 TEST_REQUIRES_X86_AVX;
4871 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4872 VUnaryMicrokernelTester()
4873 .batch_size(batch_size)
4874 .inplace(true)
4875 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4876 }
4877 }
4878
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,prescale)4879 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, prescale) {
4880 TEST_REQUIRES_X86_AVX;
4881 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4882 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4883 VUnaryMicrokernelTester()
4884 .batch_size(batch_size)
4885 .prescale(prescale)
4886 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4887 }
4888 }
4889 }
4890
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,alpha)4891 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, alpha) {
4892 TEST_REQUIRES_X86_AVX;
4893 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4894 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4895 VUnaryMicrokernelTester()
4896 .batch_size(batch_size)
4897 .alpha(alpha)
4898 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4899 }
4900 }
4901 }
4902
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,beta)4903 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, beta) {
4904 TEST_REQUIRES_X86_AVX;
4905 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4906 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4907 VUnaryMicrokernelTester()
4908 .batch_size(batch_size)
4909 .beta(beta)
4910 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4911 }
4912 }
4913 }
4914 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4915
4916
4917 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_eq_48)4918 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_eq_48) {
4919 TEST_REQUIRES_X86_AVX;
4920 VUnaryMicrokernelTester()
4921 .batch_size(48)
4922 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4923 }
4924
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_div_48)4925 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_div_48) {
4926 TEST_REQUIRES_X86_AVX;
4927 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
4928 VUnaryMicrokernelTester()
4929 .batch_size(batch_size)
4930 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4931 }
4932 }
4933
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_lt_48)4934 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_lt_48) {
4935 TEST_REQUIRES_X86_AVX;
4936 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
4937 VUnaryMicrokernelTester()
4938 .batch_size(batch_size)
4939 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4940 }
4941 }
4942
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_gt_48)4943 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_gt_48) {
4944 TEST_REQUIRES_X86_AVX;
4945 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
4946 VUnaryMicrokernelTester()
4947 .batch_size(batch_size)
4948 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4949 }
4950 }
4951
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,inplace)4952 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, inplace) {
4953 TEST_REQUIRES_X86_AVX;
4954 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4955 VUnaryMicrokernelTester()
4956 .batch_size(batch_size)
4957 .inplace(true)
4958 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4959 }
4960 }
4961
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,prescale)4962 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, prescale) {
4963 TEST_REQUIRES_X86_AVX;
4964 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4965 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4966 VUnaryMicrokernelTester()
4967 .batch_size(batch_size)
4968 .prescale(prescale)
4969 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4970 }
4971 }
4972 }
4973
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,alpha)4974 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, alpha) {
4975 TEST_REQUIRES_X86_AVX;
4976 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4977 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4978 VUnaryMicrokernelTester()
4979 .batch_size(batch_size)
4980 .alpha(alpha)
4981 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4982 }
4983 }
4984 }
4985
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,beta)4986 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, beta) {
4987 TEST_REQUIRES_X86_AVX;
4988 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4989 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4990 VUnaryMicrokernelTester()
4991 .batch_size(batch_size)
4992 .beta(beta)
4993 .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4994 }
4995 }
4996 }
4997 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4998
4999
5000 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X8,batch_eq_8)5001 TEST(F32_VELU__AVX_RR2_P6_X8, batch_eq_8) {
5002 TEST_REQUIRES_X86_AVX;
5003 VUnaryMicrokernelTester()
5004 .batch_size(8)
5005 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5006 }
5007
TEST(F32_VELU__AVX_RR2_P6_X8,batch_div_8)5008 TEST(F32_VELU__AVX_RR2_P6_X8, batch_div_8) {
5009 TEST_REQUIRES_X86_AVX;
5010 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5011 VUnaryMicrokernelTester()
5012 .batch_size(batch_size)
5013 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5014 }
5015 }
5016
TEST(F32_VELU__AVX_RR2_P6_X8,batch_lt_8)5017 TEST(F32_VELU__AVX_RR2_P6_X8, batch_lt_8) {
5018 TEST_REQUIRES_X86_AVX;
5019 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5020 VUnaryMicrokernelTester()
5021 .batch_size(batch_size)
5022 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5023 }
5024 }
5025
TEST(F32_VELU__AVX_RR2_P6_X8,batch_gt_8)5026 TEST(F32_VELU__AVX_RR2_P6_X8, batch_gt_8) {
5027 TEST_REQUIRES_X86_AVX;
5028 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5029 VUnaryMicrokernelTester()
5030 .batch_size(batch_size)
5031 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5032 }
5033 }
5034
TEST(F32_VELU__AVX_RR2_P6_X8,inplace)5035 TEST(F32_VELU__AVX_RR2_P6_X8, inplace) {
5036 TEST_REQUIRES_X86_AVX;
5037 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5038 VUnaryMicrokernelTester()
5039 .batch_size(batch_size)
5040 .inplace(true)
5041 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5042 }
5043 }
5044
TEST(F32_VELU__AVX_RR2_P6_X8,prescale)5045 TEST(F32_VELU__AVX_RR2_P6_X8, prescale) {
5046 TEST_REQUIRES_X86_AVX;
5047 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5048 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5049 VUnaryMicrokernelTester()
5050 .batch_size(batch_size)
5051 .prescale(prescale)
5052 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5053 }
5054 }
5055 }
5056
TEST(F32_VELU__AVX_RR2_P6_X8,alpha)5057 TEST(F32_VELU__AVX_RR2_P6_X8, alpha) {
5058 TEST_REQUIRES_X86_AVX;
5059 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5060 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5061 VUnaryMicrokernelTester()
5062 .batch_size(batch_size)
5063 .alpha(alpha)
5064 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5065 }
5066 }
5067 }
5068
TEST(F32_VELU__AVX_RR2_P6_X8,beta)5069 TEST(F32_VELU__AVX_RR2_P6_X8, beta) {
5070 TEST_REQUIRES_X86_AVX;
5071 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5072 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5073 VUnaryMicrokernelTester()
5074 .batch_size(batch_size)
5075 .beta(beta)
5076 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5077 }
5078 }
5079 }
5080 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5081
5082
5083 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X16,batch_eq_16)5084 TEST(F32_VELU__AVX_RR2_P6_X16, batch_eq_16) {
5085 TEST_REQUIRES_X86_AVX;
5086 VUnaryMicrokernelTester()
5087 .batch_size(16)
5088 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5089 }
5090
TEST(F32_VELU__AVX_RR2_P6_X16,batch_div_16)5091 TEST(F32_VELU__AVX_RR2_P6_X16, batch_div_16) {
5092 TEST_REQUIRES_X86_AVX;
5093 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5094 VUnaryMicrokernelTester()
5095 .batch_size(batch_size)
5096 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5097 }
5098 }
5099
TEST(F32_VELU__AVX_RR2_P6_X16,batch_lt_16)5100 TEST(F32_VELU__AVX_RR2_P6_X16, batch_lt_16) {
5101 TEST_REQUIRES_X86_AVX;
5102 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5103 VUnaryMicrokernelTester()
5104 .batch_size(batch_size)
5105 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5106 }
5107 }
5108
TEST(F32_VELU__AVX_RR2_P6_X16,batch_gt_16)5109 TEST(F32_VELU__AVX_RR2_P6_X16, batch_gt_16) {
5110 TEST_REQUIRES_X86_AVX;
5111 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5112 VUnaryMicrokernelTester()
5113 .batch_size(batch_size)
5114 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5115 }
5116 }
5117
TEST(F32_VELU__AVX_RR2_P6_X16,inplace)5118 TEST(F32_VELU__AVX_RR2_P6_X16, inplace) {
5119 TEST_REQUIRES_X86_AVX;
5120 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5121 VUnaryMicrokernelTester()
5122 .batch_size(batch_size)
5123 .inplace(true)
5124 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5125 }
5126 }
5127
TEST(F32_VELU__AVX_RR2_P6_X16,prescale)5128 TEST(F32_VELU__AVX_RR2_P6_X16, prescale) {
5129 TEST_REQUIRES_X86_AVX;
5130 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5131 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5132 VUnaryMicrokernelTester()
5133 .batch_size(batch_size)
5134 .prescale(prescale)
5135 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5136 }
5137 }
5138 }
5139
TEST(F32_VELU__AVX_RR2_P6_X16,alpha)5140 TEST(F32_VELU__AVX_RR2_P6_X16, alpha) {
5141 TEST_REQUIRES_X86_AVX;
5142 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5143 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5144 VUnaryMicrokernelTester()
5145 .batch_size(batch_size)
5146 .alpha(alpha)
5147 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5148 }
5149 }
5150 }
5151
TEST(F32_VELU__AVX_RR2_P6_X16,beta)5152 TEST(F32_VELU__AVX_RR2_P6_X16, beta) {
5153 TEST_REQUIRES_X86_AVX;
5154 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5155 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5156 VUnaryMicrokernelTester()
5157 .batch_size(batch_size)
5158 .beta(beta)
5159 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5160 }
5161 }
5162 }
5163 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5164
5165
5166 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X24,batch_eq_24)5167 TEST(F32_VELU__AVX_RR2_P6_X24, batch_eq_24) {
5168 TEST_REQUIRES_X86_AVX;
5169 VUnaryMicrokernelTester()
5170 .batch_size(24)
5171 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5172 }
5173
TEST(F32_VELU__AVX_RR2_P6_X24,batch_div_24)5174 TEST(F32_VELU__AVX_RR2_P6_X24, batch_div_24) {
5175 TEST_REQUIRES_X86_AVX;
5176 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5177 VUnaryMicrokernelTester()
5178 .batch_size(batch_size)
5179 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5180 }
5181 }
5182
TEST(F32_VELU__AVX_RR2_P6_X24,batch_lt_24)5183 TEST(F32_VELU__AVX_RR2_P6_X24, batch_lt_24) {
5184 TEST_REQUIRES_X86_AVX;
5185 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5186 VUnaryMicrokernelTester()
5187 .batch_size(batch_size)
5188 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5189 }
5190 }
5191
TEST(F32_VELU__AVX_RR2_P6_X24,batch_gt_24)5192 TEST(F32_VELU__AVX_RR2_P6_X24, batch_gt_24) {
5193 TEST_REQUIRES_X86_AVX;
5194 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5195 VUnaryMicrokernelTester()
5196 .batch_size(batch_size)
5197 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5198 }
5199 }
5200
TEST(F32_VELU__AVX_RR2_P6_X24,inplace)5201 TEST(F32_VELU__AVX_RR2_P6_X24, inplace) {
5202 TEST_REQUIRES_X86_AVX;
5203 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5204 VUnaryMicrokernelTester()
5205 .batch_size(batch_size)
5206 .inplace(true)
5207 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5208 }
5209 }
5210
TEST(F32_VELU__AVX_RR2_P6_X24,prescale)5211 TEST(F32_VELU__AVX_RR2_P6_X24, prescale) {
5212 TEST_REQUIRES_X86_AVX;
5213 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5214 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5215 VUnaryMicrokernelTester()
5216 .batch_size(batch_size)
5217 .prescale(prescale)
5218 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5219 }
5220 }
5221 }
5222
TEST(F32_VELU__AVX_RR2_P6_X24,alpha)5223 TEST(F32_VELU__AVX_RR2_P6_X24, alpha) {
5224 TEST_REQUIRES_X86_AVX;
5225 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5226 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5227 VUnaryMicrokernelTester()
5228 .batch_size(batch_size)
5229 .alpha(alpha)
5230 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5231 }
5232 }
5233 }
5234
TEST(F32_VELU__AVX_RR2_P6_X24,beta)5235 TEST(F32_VELU__AVX_RR2_P6_X24, beta) {
5236 TEST_REQUIRES_X86_AVX;
5237 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5238 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5239 VUnaryMicrokernelTester()
5240 .batch_size(batch_size)
5241 .beta(beta)
5242 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5243 }
5244 }
5245 }
5246 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5247
5248
5249 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X32,batch_eq_32)5250 TEST(F32_VELU__AVX_RR2_P6_X32, batch_eq_32) {
5251 TEST_REQUIRES_X86_AVX;
5252 VUnaryMicrokernelTester()
5253 .batch_size(32)
5254 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5255 }
5256
TEST(F32_VELU__AVX_RR2_P6_X32,batch_div_32)5257 TEST(F32_VELU__AVX_RR2_P6_X32, batch_div_32) {
5258 TEST_REQUIRES_X86_AVX;
5259 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5260 VUnaryMicrokernelTester()
5261 .batch_size(batch_size)
5262 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5263 }
5264 }
5265
TEST(F32_VELU__AVX_RR2_P6_X32,batch_lt_32)5266 TEST(F32_VELU__AVX_RR2_P6_X32, batch_lt_32) {
5267 TEST_REQUIRES_X86_AVX;
5268 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5269 VUnaryMicrokernelTester()
5270 .batch_size(batch_size)
5271 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5272 }
5273 }
5274
TEST(F32_VELU__AVX_RR2_P6_X32,batch_gt_32)5275 TEST(F32_VELU__AVX_RR2_P6_X32, batch_gt_32) {
5276 TEST_REQUIRES_X86_AVX;
5277 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5278 VUnaryMicrokernelTester()
5279 .batch_size(batch_size)
5280 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5281 }
5282 }
5283
TEST(F32_VELU__AVX_RR2_P6_X32,inplace)5284 TEST(F32_VELU__AVX_RR2_P6_X32, inplace) {
5285 TEST_REQUIRES_X86_AVX;
5286 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5287 VUnaryMicrokernelTester()
5288 .batch_size(batch_size)
5289 .inplace(true)
5290 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5291 }
5292 }
5293
TEST(F32_VELU__AVX_RR2_P6_X32,prescale)5294 TEST(F32_VELU__AVX_RR2_P6_X32, prescale) {
5295 TEST_REQUIRES_X86_AVX;
5296 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5297 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5298 VUnaryMicrokernelTester()
5299 .batch_size(batch_size)
5300 .prescale(prescale)
5301 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5302 }
5303 }
5304 }
5305
TEST(F32_VELU__AVX_RR2_P6_X32,alpha)5306 TEST(F32_VELU__AVX_RR2_P6_X32, alpha) {
5307 TEST_REQUIRES_X86_AVX;
5308 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5309 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5310 VUnaryMicrokernelTester()
5311 .batch_size(batch_size)
5312 .alpha(alpha)
5313 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5314 }
5315 }
5316 }
5317
TEST(F32_VELU__AVX_RR2_P6_X32,beta)5318 TEST(F32_VELU__AVX_RR2_P6_X32, beta) {
5319 TEST_REQUIRES_X86_AVX;
5320 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5321 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5322 VUnaryMicrokernelTester()
5323 .batch_size(batch_size)
5324 .beta(beta)
5325 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5326 }
5327 }
5328 }
5329 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5330
5331
5332 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X40,batch_eq_40)5333 TEST(F32_VELU__AVX_RR2_P6_X40, batch_eq_40) {
5334 TEST_REQUIRES_X86_AVX;
5335 VUnaryMicrokernelTester()
5336 .batch_size(40)
5337 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5338 }
5339
TEST(F32_VELU__AVX_RR2_P6_X40,batch_div_40)5340 TEST(F32_VELU__AVX_RR2_P6_X40, batch_div_40) {
5341 TEST_REQUIRES_X86_AVX;
5342 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
5343 VUnaryMicrokernelTester()
5344 .batch_size(batch_size)
5345 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5346 }
5347 }
5348
TEST(F32_VELU__AVX_RR2_P6_X40,batch_lt_40)5349 TEST(F32_VELU__AVX_RR2_P6_X40, batch_lt_40) {
5350 TEST_REQUIRES_X86_AVX;
5351 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
5352 VUnaryMicrokernelTester()
5353 .batch_size(batch_size)
5354 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5355 }
5356 }
5357
TEST(F32_VELU__AVX_RR2_P6_X40,batch_gt_40)5358 TEST(F32_VELU__AVX_RR2_P6_X40, batch_gt_40) {
5359 TEST_REQUIRES_X86_AVX;
5360 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
5361 VUnaryMicrokernelTester()
5362 .batch_size(batch_size)
5363 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5364 }
5365 }
5366
TEST(F32_VELU__AVX_RR2_P6_X40,inplace)5367 TEST(F32_VELU__AVX_RR2_P6_X40, inplace) {
5368 TEST_REQUIRES_X86_AVX;
5369 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5370 VUnaryMicrokernelTester()
5371 .batch_size(batch_size)
5372 .inplace(true)
5373 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5374 }
5375 }
5376
TEST(F32_VELU__AVX_RR2_P6_X40,prescale)5377 TEST(F32_VELU__AVX_RR2_P6_X40, prescale) {
5378 TEST_REQUIRES_X86_AVX;
5379 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5380 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5381 VUnaryMicrokernelTester()
5382 .batch_size(batch_size)
5383 .prescale(prescale)
5384 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5385 }
5386 }
5387 }
5388
TEST(F32_VELU__AVX_RR2_P6_X40,alpha)5389 TEST(F32_VELU__AVX_RR2_P6_X40, alpha) {
5390 TEST_REQUIRES_X86_AVX;
5391 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5392 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5393 VUnaryMicrokernelTester()
5394 .batch_size(batch_size)
5395 .alpha(alpha)
5396 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5397 }
5398 }
5399 }
5400
TEST(F32_VELU__AVX_RR2_P6_X40,beta)5401 TEST(F32_VELU__AVX_RR2_P6_X40, beta) {
5402 TEST_REQUIRES_X86_AVX;
5403 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5404 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5405 VUnaryMicrokernelTester()
5406 .batch_size(batch_size)
5407 .beta(beta)
5408 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5409 }
5410 }
5411 }
5412 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5413
5414
5415 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X48,batch_eq_48)5416 TEST(F32_VELU__AVX_RR2_P6_X48, batch_eq_48) {
5417 TEST_REQUIRES_X86_AVX;
5418 VUnaryMicrokernelTester()
5419 .batch_size(48)
5420 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5421 }
5422
TEST(F32_VELU__AVX_RR2_P6_X48,batch_div_48)5423 TEST(F32_VELU__AVX_RR2_P6_X48, batch_div_48) {
5424 TEST_REQUIRES_X86_AVX;
5425 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
5426 VUnaryMicrokernelTester()
5427 .batch_size(batch_size)
5428 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5429 }
5430 }
5431
TEST(F32_VELU__AVX_RR2_P6_X48,batch_lt_48)5432 TEST(F32_VELU__AVX_RR2_P6_X48, batch_lt_48) {
5433 TEST_REQUIRES_X86_AVX;
5434 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
5435 VUnaryMicrokernelTester()
5436 .batch_size(batch_size)
5437 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5438 }
5439 }
5440
TEST(F32_VELU__AVX_RR2_P6_X48,batch_gt_48)5441 TEST(F32_VELU__AVX_RR2_P6_X48, batch_gt_48) {
5442 TEST_REQUIRES_X86_AVX;
5443 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
5444 VUnaryMicrokernelTester()
5445 .batch_size(batch_size)
5446 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5447 }
5448 }
5449
TEST(F32_VELU__AVX_RR2_P6_X48,inplace)5450 TEST(F32_VELU__AVX_RR2_P6_X48, inplace) {
5451 TEST_REQUIRES_X86_AVX;
5452 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5453 VUnaryMicrokernelTester()
5454 .batch_size(batch_size)
5455 .inplace(true)
5456 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5457 }
5458 }
5459
TEST(F32_VELU__AVX_RR2_P6_X48,prescale)5460 TEST(F32_VELU__AVX_RR2_P6_X48, prescale) {
5461 TEST_REQUIRES_X86_AVX;
5462 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5463 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5464 VUnaryMicrokernelTester()
5465 .batch_size(batch_size)
5466 .prescale(prescale)
5467 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5468 }
5469 }
5470 }
5471
TEST(F32_VELU__AVX_RR2_P6_X48,alpha)5472 TEST(F32_VELU__AVX_RR2_P6_X48, alpha) {
5473 TEST_REQUIRES_X86_AVX;
5474 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5475 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5476 VUnaryMicrokernelTester()
5477 .batch_size(batch_size)
5478 .alpha(alpha)
5479 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5480 }
5481 }
5482 }
5483
TEST(F32_VELU__AVX_RR2_P6_X48,beta)5484 TEST(F32_VELU__AVX_RR2_P6_X48, beta) {
5485 TEST_REQUIRES_X86_AVX;
5486 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5487 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5488 VUnaryMicrokernelTester()
5489 .batch_size(batch_size)
5490 .beta(beta)
5491 .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5492 }
5493 }
5494 }
5495 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5496
5497
5498 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_eq_8)5499 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_eq_8) {
5500 TEST_REQUIRES_X86_AVX2;
5501 VUnaryMicrokernelTester()
5502 .batch_size(8)
5503 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5504 }
5505
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_div_8)5506 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_div_8) {
5507 TEST_REQUIRES_X86_AVX2;
5508 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5509 VUnaryMicrokernelTester()
5510 .batch_size(batch_size)
5511 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5512 }
5513 }
5514
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_lt_8)5515 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_lt_8) {
5516 TEST_REQUIRES_X86_AVX2;
5517 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5518 VUnaryMicrokernelTester()
5519 .batch_size(batch_size)
5520 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5521 }
5522 }
5523
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_gt_8)5524 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_gt_8) {
5525 TEST_REQUIRES_X86_AVX2;
5526 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5527 VUnaryMicrokernelTester()
5528 .batch_size(batch_size)
5529 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5530 }
5531 }
5532
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,inplace)5533 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, inplace) {
5534 TEST_REQUIRES_X86_AVX2;
5535 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5536 VUnaryMicrokernelTester()
5537 .batch_size(batch_size)
5538 .inplace(true)
5539 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5540 }
5541 }
5542
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,prescale)5543 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, prescale) {
5544 TEST_REQUIRES_X86_AVX2;
5545 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5546 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5547 VUnaryMicrokernelTester()
5548 .batch_size(batch_size)
5549 .prescale(prescale)
5550 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5551 }
5552 }
5553 }
5554
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,alpha)5555 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, alpha) {
5556 TEST_REQUIRES_X86_AVX2;
5557 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5558 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5559 VUnaryMicrokernelTester()
5560 .batch_size(batch_size)
5561 .alpha(alpha)
5562 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5563 }
5564 }
5565 }
5566
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,beta)5567 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, beta) {
5568 TEST_REQUIRES_X86_AVX2;
5569 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5570 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5571 VUnaryMicrokernelTester()
5572 .batch_size(batch_size)
5573 .beta(beta)
5574 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5575 }
5576 }
5577 }
5578 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5579
5580
5581 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_eq_16)5582 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_eq_16) {
5583 TEST_REQUIRES_X86_AVX2;
5584 VUnaryMicrokernelTester()
5585 .batch_size(16)
5586 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5587 }
5588
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_div_16)5589 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_div_16) {
5590 TEST_REQUIRES_X86_AVX2;
5591 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5592 VUnaryMicrokernelTester()
5593 .batch_size(batch_size)
5594 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5595 }
5596 }
5597
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_lt_16)5598 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_lt_16) {
5599 TEST_REQUIRES_X86_AVX2;
5600 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5601 VUnaryMicrokernelTester()
5602 .batch_size(batch_size)
5603 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5604 }
5605 }
5606
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_gt_16)5607 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_gt_16) {
5608 TEST_REQUIRES_X86_AVX2;
5609 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5610 VUnaryMicrokernelTester()
5611 .batch_size(batch_size)
5612 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5613 }
5614 }
5615
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,inplace)5616 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, inplace) {
5617 TEST_REQUIRES_X86_AVX2;
5618 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5619 VUnaryMicrokernelTester()
5620 .batch_size(batch_size)
5621 .inplace(true)
5622 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5623 }
5624 }
5625
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,prescale)5626 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, prescale) {
5627 TEST_REQUIRES_X86_AVX2;
5628 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5629 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5630 VUnaryMicrokernelTester()
5631 .batch_size(batch_size)
5632 .prescale(prescale)
5633 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5634 }
5635 }
5636 }
5637
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,alpha)5638 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, alpha) {
5639 TEST_REQUIRES_X86_AVX2;
5640 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5641 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5642 VUnaryMicrokernelTester()
5643 .batch_size(batch_size)
5644 .alpha(alpha)
5645 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5646 }
5647 }
5648 }
5649
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,beta)5650 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, beta) {
5651 TEST_REQUIRES_X86_AVX2;
5652 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5653 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5654 VUnaryMicrokernelTester()
5655 .batch_size(batch_size)
5656 .beta(beta)
5657 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5658 }
5659 }
5660 }
5661 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5662
5663
5664 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_eq_24)5665 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_eq_24) {
5666 TEST_REQUIRES_X86_AVX2;
5667 VUnaryMicrokernelTester()
5668 .batch_size(24)
5669 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5670 }
5671
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_div_24)5672 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_div_24) {
5673 TEST_REQUIRES_X86_AVX2;
5674 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5675 VUnaryMicrokernelTester()
5676 .batch_size(batch_size)
5677 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5678 }
5679 }
5680
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_lt_24)5681 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_lt_24) {
5682 TEST_REQUIRES_X86_AVX2;
5683 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5684 VUnaryMicrokernelTester()
5685 .batch_size(batch_size)
5686 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5687 }
5688 }
5689
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_gt_24)5690 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_gt_24) {
5691 TEST_REQUIRES_X86_AVX2;
5692 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5693 VUnaryMicrokernelTester()
5694 .batch_size(batch_size)
5695 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5696 }
5697 }
5698
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,inplace)5699 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, inplace) {
5700 TEST_REQUIRES_X86_AVX2;
5701 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5702 VUnaryMicrokernelTester()
5703 .batch_size(batch_size)
5704 .inplace(true)
5705 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5706 }
5707 }
5708
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,prescale)5709 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, prescale) {
5710 TEST_REQUIRES_X86_AVX2;
5711 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5712 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5713 VUnaryMicrokernelTester()
5714 .batch_size(batch_size)
5715 .prescale(prescale)
5716 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5717 }
5718 }
5719 }
5720
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,alpha)5721 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, alpha) {
5722 TEST_REQUIRES_X86_AVX2;
5723 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5724 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5725 VUnaryMicrokernelTester()
5726 .batch_size(batch_size)
5727 .alpha(alpha)
5728 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5729 }
5730 }
5731 }
5732
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,beta)5733 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, beta) {
5734 TEST_REQUIRES_X86_AVX2;
5735 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5736 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5737 VUnaryMicrokernelTester()
5738 .batch_size(batch_size)
5739 .beta(beta)
5740 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5741 }
5742 }
5743 }
5744 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5745
5746
5747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_eq_32)5748 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_eq_32) {
5749 TEST_REQUIRES_X86_AVX2;
5750 VUnaryMicrokernelTester()
5751 .batch_size(32)
5752 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5753 }
5754
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_div_32)5755 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_div_32) {
5756 TEST_REQUIRES_X86_AVX2;
5757 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5758 VUnaryMicrokernelTester()
5759 .batch_size(batch_size)
5760 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5761 }
5762 }
5763
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_lt_32)5764 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_lt_32) {
5765 TEST_REQUIRES_X86_AVX2;
5766 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5767 VUnaryMicrokernelTester()
5768 .batch_size(batch_size)
5769 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5770 }
5771 }
5772
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_gt_32)5773 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_gt_32) {
5774 TEST_REQUIRES_X86_AVX2;
5775 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5776 VUnaryMicrokernelTester()
5777 .batch_size(batch_size)
5778 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5779 }
5780 }
5781
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,inplace)5782 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, inplace) {
5783 TEST_REQUIRES_X86_AVX2;
5784 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5785 VUnaryMicrokernelTester()
5786 .batch_size(batch_size)
5787 .inplace(true)
5788 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5789 }
5790 }
5791
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,prescale)5792 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, prescale) {
5793 TEST_REQUIRES_X86_AVX2;
5794 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5795 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5796 VUnaryMicrokernelTester()
5797 .batch_size(batch_size)
5798 .prescale(prescale)
5799 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5800 }
5801 }
5802 }
5803
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,alpha)5804 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, alpha) {
5805 TEST_REQUIRES_X86_AVX2;
5806 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5807 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5808 VUnaryMicrokernelTester()
5809 .batch_size(batch_size)
5810 .alpha(alpha)
5811 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5812 }
5813 }
5814 }
5815
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,beta)5816 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, beta) {
5817 TEST_REQUIRES_X86_AVX2;
5818 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5819 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5820 VUnaryMicrokernelTester()
5821 .batch_size(batch_size)
5822 .beta(beta)
5823 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5824 }
5825 }
5826 }
5827 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5828
5829
5830 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_eq_40)5831 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_eq_40) {
5832 TEST_REQUIRES_X86_AVX2;
5833 VUnaryMicrokernelTester()
5834 .batch_size(40)
5835 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5836 }
5837
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_div_40)5838 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_div_40) {
5839 TEST_REQUIRES_X86_AVX2;
5840 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
5841 VUnaryMicrokernelTester()
5842 .batch_size(batch_size)
5843 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5844 }
5845 }
5846
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_lt_40)5847 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_lt_40) {
5848 TEST_REQUIRES_X86_AVX2;
5849 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
5850 VUnaryMicrokernelTester()
5851 .batch_size(batch_size)
5852 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5853 }
5854 }
5855
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_gt_40)5856 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_gt_40) {
5857 TEST_REQUIRES_X86_AVX2;
5858 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
5859 VUnaryMicrokernelTester()
5860 .batch_size(batch_size)
5861 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5862 }
5863 }
5864
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,inplace)5865 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, inplace) {
5866 TEST_REQUIRES_X86_AVX2;
5867 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5868 VUnaryMicrokernelTester()
5869 .batch_size(batch_size)
5870 .inplace(true)
5871 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5872 }
5873 }
5874
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,prescale)5875 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, prescale) {
5876 TEST_REQUIRES_X86_AVX2;
5877 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5878 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5879 VUnaryMicrokernelTester()
5880 .batch_size(batch_size)
5881 .prescale(prescale)
5882 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5883 }
5884 }
5885 }
5886
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,alpha)5887 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, alpha) {
5888 TEST_REQUIRES_X86_AVX2;
5889 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5890 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5891 VUnaryMicrokernelTester()
5892 .batch_size(batch_size)
5893 .alpha(alpha)
5894 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5895 }
5896 }
5897 }
5898
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,beta)5899 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, beta) {
5900 TEST_REQUIRES_X86_AVX2;
5901 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5902 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5903 VUnaryMicrokernelTester()
5904 .batch_size(batch_size)
5905 .beta(beta)
5906 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5907 }
5908 }
5909 }
5910 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5911
5912
5913 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_eq_48)5914 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_eq_48) {
5915 TEST_REQUIRES_X86_AVX2;
5916 VUnaryMicrokernelTester()
5917 .batch_size(48)
5918 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5919 }
5920
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_div_48)5921 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_div_48) {
5922 TEST_REQUIRES_X86_AVX2;
5923 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
5924 VUnaryMicrokernelTester()
5925 .batch_size(batch_size)
5926 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5927 }
5928 }
5929
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_lt_48)5930 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_lt_48) {
5931 TEST_REQUIRES_X86_AVX2;
5932 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
5933 VUnaryMicrokernelTester()
5934 .batch_size(batch_size)
5935 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5936 }
5937 }
5938
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_gt_48)5939 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_gt_48) {
5940 TEST_REQUIRES_X86_AVX2;
5941 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
5942 VUnaryMicrokernelTester()
5943 .batch_size(batch_size)
5944 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5945 }
5946 }
5947
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,inplace)5948 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, inplace) {
5949 TEST_REQUIRES_X86_AVX2;
5950 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5951 VUnaryMicrokernelTester()
5952 .batch_size(batch_size)
5953 .inplace(true)
5954 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5955 }
5956 }
5957
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,prescale)5958 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, prescale) {
5959 TEST_REQUIRES_X86_AVX2;
5960 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5961 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5962 VUnaryMicrokernelTester()
5963 .batch_size(batch_size)
5964 .prescale(prescale)
5965 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5966 }
5967 }
5968 }
5969
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,alpha)5970 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, alpha) {
5971 TEST_REQUIRES_X86_AVX2;
5972 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5973 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5974 VUnaryMicrokernelTester()
5975 .batch_size(batch_size)
5976 .alpha(alpha)
5977 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5978 }
5979 }
5980 }
5981
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,beta)5982 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, beta) {
5983 TEST_REQUIRES_X86_AVX2;
5984 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5985 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5986 VUnaryMicrokernelTester()
5987 .batch_size(batch_size)
5988 .beta(beta)
5989 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5990 }
5991 }
5992 }
5993 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5994
5995
5996 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_eq_56)5997 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_eq_56) {
5998 TEST_REQUIRES_X86_AVX2;
5999 VUnaryMicrokernelTester()
6000 .batch_size(56)
6001 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6002 }
6003
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_div_56)6004 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_div_56) {
6005 TEST_REQUIRES_X86_AVX2;
6006 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
6007 VUnaryMicrokernelTester()
6008 .batch_size(batch_size)
6009 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6010 }
6011 }
6012
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_lt_56)6013 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_lt_56) {
6014 TEST_REQUIRES_X86_AVX2;
6015 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
6016 VUnaryMicrokernelTester()
6017 .batch_size(batch_size)
6018 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6019 }
6020 }
6021
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_gt_56)6022 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_gt_56) {
6023 TEST_REQUIRES_X86_AVX2;
6024 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
6025 VUnaryMicrokernelTester()
6026 .batch_size(batch_size)
6027 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6028 }
6029 }
6030
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,inplace)6031 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, inplace) {
6032 TEST_REQUIRES_X86_AVX2;
6033 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6034 VUnaryMicrokernelTester()
6035 .batch_size(batch_size)
6036 .inplace(true)
6037 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6038 }
6039 }
6040
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,prescale)6041 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, prescale) {
6042 TEST_REQUIRES_X86_AVX2;
6043 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6044 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6045 VUnaryMicrokernelTester()
6046 .batch_size(batch_size)
6047 .prescale(prescale)
6048 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6049 }
6050 }
6051 }
6052
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,alpha)6053 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, alpha) {
6054 TEST_REQUIRES_X86_AVX2;
6055 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6056 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6057 VUnaryMicrokernelTester()
6058 .batch_size(batch_size)
6059 .alpha(alpha)
6060 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6061 }
6062 }
6063 }
6064
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,beta)6065 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, beta) {
6066 TEST_REQUIRES_X86_AVX2;
6067 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6068 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6069 VUnaryMicrokernelTester()
6070 .batch_size(batch_size)
6071 .beta(beta)
6072 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6073 }
6074 }
6075 }
6076 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6077
6078
6079 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_eq_64)6080 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_eq_64) {
6081 TEST_REQUIRES_X86_AVX2;
6082 VUnaryMicrokernelTester()
6083 .batch_size(64)
6084 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6085 }
6086
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_div_64)6087 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_div_64) {
6088 TEST_REQUIRES_X86_AVX2;
6089 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
6090 VUnaryMicrokernelTester()
6091 .batch_size(batch_size)
6092 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6093 }
6094 }
6095
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_lt_64)6096 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_lt_64) {
6097 TEST_REQUIRES_X86_AVX2;
6098 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
6099 VUnaryMicrokernelTester()
6100 .batch_size(batch_size)
6101 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6102 }
6103 }
6104
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_gt_64)6105 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_gt_64) {
6106 TEST_REQUIRES_X86_AVX2;
6107 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
6108 VUnaryMicrokernelTester()
6109 .batch_size(batch_size)
6110 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6111 }
6112 }
6113
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,inplace)6114 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, inplace) {
6115 TEST_REQUIRES_X86_AVX2;
6116 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6117 VUnaryMicrokernelTester()
6118 .batch_size(batch_size)
6119 .inplace(true)
6120 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6121 }
6122 }
6123
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,prescale)6124 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, prescale) {
6125 TEST_REQUIRES_X86_AVX2;
6126 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6127 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6128 VUnaryMicrokernelTester()
6129 .batch_size(batch_size)
6130 .prescale(prescale)
6131 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6132 }
6133 }
6134 }
6135
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,alpha)6136 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, alpha) {
6137 TEST_REQUIRES_X86_AVX2;
6138 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6139 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6140 VUnaryMicrokernelTester()
6141 .batch_size(batch_size)
6142 .alpha(alpha)
6143 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6144 }
6145 }
6146 }
6147
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,beta)6148 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, beta) {
6149 TEST_REQUIRES_X86_AVX2;
6150 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6151 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6152 VUnaryMicrokernelTester()
6153 .batch_size(batch_size)
6154 .beta(beta)
6155 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6156 }
6157 }
6158 }
6159 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6160
6161
6162 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_eq_72)6163 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_eq_72) {
6164 TEST_REQUIRES_X86_AVX2;
6165 VUnaryMicrokernelTester()
6166 .batch_size(72)
6167 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6168 }
6169
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_div_72)6170 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_div_72) {
6171 TEST_REQUIRES_X86_AVX2;
6172 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
6173 VUnaryMicrokernelTester()
6174 .batch_size(batch_size)
6175 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6176 }
6177 }
6178
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_lt_72)6179 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_lt_72) {
6180 TEST_REQUIRES_X86_AVX2;
6181 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
6182 VUnaryMicrokernelTester()
6183 .batch_size(batch_size)
6184 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6185 }
6186 }
6187
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_gt_72)6188 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_gt_72) {
6189 TEST_REQUIRES_X86_AVX2;
6190 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
6191 VUnaryMicrokernelTester()
6192 .batch_size(batch_size)
6193 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6194 }
6195 }
6196
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,inplace)6197 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, inplace) {
6198 TEST_REQUIRES_X86_AVX2;
6199 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6200 VUnaryMicrokernelTester()
6201 .batch_size(batch_size)
6202 .inplace(true)
6203 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6204 }
6205 }
6206
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,prescale)6207 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, prescale) {
6208 TEST_REQUIRES_X86_AVX2;
6209 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6210 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6211 VUnaryMicrokernelTester()
6212 .batch_size(batch_size)
6213 .prescale(prescale)
6214 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6215 }
6216 }
6217 }
6218
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,alpha)6219 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, alpha) {
6220 TEST_REQUIRES_X86_AVX2;
6221 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6222 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6223 VUnaryMicrokernelTester()
6224 .batch_size(batch_size)
6225 .alpha(alpha)
6226 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6227 }
6228 }
6229 }
6230
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,beta)6231 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, beta) {
6232 TEST_REQUIRES_X86_AVX2;
6233 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6234 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6235 VUnaryMicrokernelTester()
6236 .batch_size(batch_size)
6237 .beta(beta)
6238 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6239 }
6240 }
6241 }
6242 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6243
6244
6245 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_eq_80)6246 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_eq_80) {
6247 TEST_REQUIRES_X86_AVX2;
6248 VUnaryMicrokernelTester()
6249 .batch_size(80)
6250 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6251 }
6252
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_div_80)6253 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_div_80) {
6254 TEST_REQUIRES_X86_AVX2;
6255 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
6256 VUnaryMicrokernelTester()
6257 .batch_size(batch_size)
6258 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6259 }
6260 }
6261
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_lt_80)6262 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_lt_80) {
6263 TEST_REQUIRES_X86_AVX2;
6264 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
6265 VUnaryMicrokernelTester()
6266 .batch_size(batch_size)
6267 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6268 }
6269 }
6270
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_gt_80)6271 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_gt_80) {
6272 TEST_REQUIRES_X86_AVX2;
6273 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
6274 VUnaryMicrokernelTester()
6275 .batch_size(batch_size)
6276 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6277 }
6278 }
6279
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,inplace)6280 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, inplace) {
6281 TEST_REQUIRES_X86_AVX2;
6282 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6283 VUnaryMicrokernelTester()
6284 .batch_size(batch_size)
6285 .inplace(true)
6286 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6287 }
6288 }
6289
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,prescale)6290 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, prescale) {
6291 TEST_REQUIRES_X86_AVX2;
6292 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6293 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6294 VUnaryMicrokernelTester()
6295 .batch_size(batch_size)
6296 .prescale(prescale)
6297 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6298 }
6299 }
6300 }
6301
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,alpha)6302 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, alpha) {
6303 TEST_REQUIRES_X86_AVX2;
6304 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6305 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6306 VUnaryMicrokernelTester()
6307 .batch_size(batch_size)
6308 .alpha(alpha)
6309 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6310 }
6311 }
6312 }
6313
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,beta)6314 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, beta) {
6315 TEST_REQUIRES_X86_AVX2;
6316 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6317 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6318 VUnaryMicrokernelTester()
6319 .batch_size(batch_size)
6320 .beta(beta)
6321 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6322 }
6323 }
6324 }
6325 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6326
6327
6328 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_eq_8)6329 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_eq_8) {
6330 TEST_REQUIRES_X86_AVX2;
6331 VUnaryMicrokernelTester()
6332 .batch_size(8)
6333 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6334 }
6335
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_div_8)6336 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_div_8) {
6337 TEST_REQUIRES_X86_AVX2;
6338 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
6339 VUnaryMicrokernelTester()
6340 .batch_size(batch_size)
6341 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6342 }
6343 }
6344
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_lt_8)6345 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_lt_8) {
6346 TEST_REQUIRES_X86_AVX2;
6347 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
6348 VUnaryMicrokernelTester()
6349 .batch_size(batch_size)
6350 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6351 }
6352 }
6353
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_gt_8)6354 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_gt_8) {
6355 TEST_REQUIRES_X86_AVX2;
6356 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
6357 VUnaryMicrokernelTester()
6358 .batch_size(batch_size)
6359 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6360 }
6361 }
6362
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,inplace)6363 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, inplace) {
6364 TEST_REQUIRES_X86_AVX2;
6365 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6366 VUnaryMicrokernelTester()
6367 .batch_size(batch_size)
6368 .inplace(true)
6369 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6370 }
6371 }
6372
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,prescale)6373 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, prescale) {
6374 TEST_REQUIRES_X86_AVX2;
6375 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6376 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6377 VUnaryMicrokernelTester()
6378 .batch_size(batch_size)
6379 .prescale(prescale)
6380 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6381 }
6382 }
6383 }
6384
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,alpha)6385 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, alpha) {
6386 TEST_REQUIRES_X86_AVX2;
6387 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6388 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6389 VUnaryMicrokernelTester()
6390 .batch_size(batch_size)
6391 .alpha(alpha)
6392 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6393 }
6394 }
6395 }
6396
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,beta)6397 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, beta) {
6398 TEST_REQUIRES_X86_AVX2;
6399 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6400 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6401 VUnaryMicrokernelTester()
6402 .batch_size(batch_size)
6403 .beta(beta)
6404 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6405 }
6406 }
6407 }
6408 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6409
6410
6411 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_eq_16)6412 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_eq_16) {
6413 TEST_REQUIRES_X86_AVX2;
6414 VUnaryMicrokernelTester()
6415 .batch_size(16)
6416 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6417 }
6418
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_div_16)6419 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_div_16) {
6420 TEST_REQUIRES_X86_AVX2;
6421 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
6422 VUnaryMicrokernelTester()
6423 .batch_size(batch_size)
6424 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6425 }
6426 }
6427
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_lt_16)6428 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_lt_16) {
6429 TEST_REQUIRES_X86_AVX2;
6430 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
6431 VUnaryMicrokernelTester()
6432 .batch_size(batch_size)
6433 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6434 }
6435 }
6436
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_gt_16)6437 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_gt_16) {
6438 TEST_REQUIRES_X86_AVX2;
6439 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
6440 VUnaryMicrokernelTester()
6441 .batch_size(batch_size)
6442 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6443 }
6444 }
6445
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,inplace)6446 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, inplace) {
6447 TEST_REQUIRES_X86_AVX2;
6448 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6449 VUnaryMicrokernelTester()
6450 .batch_size(batch_size)
6451 .inplace(true)
6452 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6453 }
6454 }
6455
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,prescale)6456 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, prescale) {
6457 TEST_REQUIRES_X86_AVX2;
6458 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6459 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6460 VUnaryMicrokernelTester()
6461 .batch_size(batch_size)
6462 .prescale(prescale)
6463 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6464 }
6465 }
6466 }
6467
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,alpha)6468 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, alpha) {
6469 TEST_REQUIRES_X86_AVX2;
6470 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6471 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6472 VUnaryMicrokernelTester()
6473 .batch_size(batch_size)
6474 .alpha(alpha)
6475 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6476 }
6477 }
6478 }
6479
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,beta)6480 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, beta) {
6481 TEST_REQUIRES_X86_AVX2;
6482 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6483 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6484 VUnaryMicrokernelTester()
6485 .batch_size(batch_size)
6486 .beta(beta)
6487 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6488 }
6489 }
6490 }
6491 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6492
6493
6494 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_eq_24)6495 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_eq_24) {
6496 TEST_REQUIRES_X86_AVX2;
6497 VUnaryMicrokernelTester()
6498 .batch_size(24)
6499 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6500 }
6501
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_div_24)6502 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_div_24) {
6503 TEST_REQUIRES_X86_AVX2;
6504 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
6505 VUnaryMicrokernelTester()
6506 .batch_size(batch_size)
6507 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6508 }
6509 }
6510
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_lt_24)6511 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_lt_24) {
6512 TEST_REQUIRES_X86_AVX2;
6513 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
6514 VUnaryMicrokernelTester()
6515 .batch_size(batch_size)
6516 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6517 }
6518 }
6519
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_gt_24)6520 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_gt_24) {
6521 TEST_REQUIRES_X86_AVX2;
6522 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
6523 VUnaryMicrokernelTester()
6524 .batch_size(batch_size)
6525 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6526 }
6527 }
6528
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,inplace)6529 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, inplace) {
6530 TEST_REQUIRES_X86_AVX2;
6531 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6532 VUnaryMicrokernelTester()
6533 .batch_size(batch_size)
6534 .inplace(true)
6535 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6536 }
6537 }
6538
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,prescale)6539 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, prescale) {
6540 TEST_REQUIRES_X86_AVX2;
6541 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6542 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6543 VUnaryMicrokernelTester()
6544 .batch_size(batch_size)
6545 .prescale(prescale)
6546 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6547 }
6548 }
6549 }
6550
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,alpha)6551 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, alpha) {
6552 TEST_REQUIRES_X86_AVX2;
6553 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6554 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6555 VUnaryMicrokernelTester()
6556 .batch_size(batch_size)
6557 .alpha(alpha)
6558 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6559 }
6560 }
6561 }
6562
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,beta)6563 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, beta) {
6564 TEST_REQUIRES_X86_AVX2;
6565 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6566 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6567 VUnaryMicrokernelTester()
6568 .batch_size(batch_size)
6569 .beta(beta)
6570 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6571 }
6572 }
6573 }
6574 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6575
6576
6577 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_eq_32)6578 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_eq_32) {
6579 TEST_REQUIRES_X86_AVX2;
6580 VUnaryMicrokernelTester()
6581 .batch_size(32)
6582 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6583 }
6584
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_div_32)6585 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_div_32) {
6586 TEST_REQUIRES_X86_AVX2;
6587 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6588 VUnaryMicrokernelTester()
6589 .batch_size(batch_size)
6590 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6591 }
6592 }
6593
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_lt_32)6594 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_lt_32) {
6595 TEST_REQUIRES_X86_AVX2;
6596 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6597 VUnaryMicrokernelTester()
6598 .batch_size(batch_size)
6599 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6600 }
6601 }
6602
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_gt_32)6603 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_gt_32) {
6604 TEST_REQUIRES_X86_AVX2;
6605 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6606 VUnaryMicrokernelTester()
6607 .batch_size(batch_size)
6608 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6609 }
6610 }
6611
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,inplace)6612 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, inplace) {
6613 TEST_REQUIRES_X86_AVX2;
6614 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6615 VUnaryMicrokernelTester()
6616 .batch_size(batch_size)
6617 .inplace(true)
6618 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6619 }
6620 }
6621
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,prescale)6622 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, prescale) {
6623 TEST_REQUIRES_X86_AVX2;
6624 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6625 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6626 VUnaryMicrokernelTester()
6627 .batch_size(batch_size)
6628 .prescale(prescale)
6629 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6630 }
6631 }
6632 }
6633
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,alpha)6634 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, alpha) {
6635 TEST_REQUIRES_X86_AVX2;
6636 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6637 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6638 VUnaryMicrokernelTester()
6639 .batch_size(batch_size)
6640 .alpha(alpha)
6641 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6642 }
6643 }
6644 }
6645
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,beta)6646 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, beta) {
6647 TEST_REQUIRES_X86_AVX2;
6648 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6649 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6650 VUnaryMicrokernelTester()
6651 .batch_size(batch_size)
6652 .beta(beta)
6653 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6654 }
6655 }
6656 }
6657 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6658
6659
6660 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_eq_40)6661 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_eq_40) {
6662 TEST_REQUIRES_X86_AVX2;
6663 VUnaryMicrokernelTester()
6664 .batch_size(40)
6665 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6666 }
6667
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_div_40)6668 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_div_40) {
6669 TEST_REQUIRES_X86_AVX2;
6670 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
6671 VUnaryMicrokernelTester()
6672 .batch_size(batch_size)
6673 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6674 }
6675 }
6676
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_lt_40)6677 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_lt_40) {
6678 TEST_REQUIRES_X86_AVX2;
6679 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
6680 VUnaryMicrokernelTester()
6681 .batch_size(batch_size)
6682 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6683 }
6684 }
6685
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_gt_40)6686 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_gt_40) {
6687 TEST_REQUIRES_X86_AVX2;
6688 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
6689 VUnaryMicrokernelTester()
6690 .batch_size(batch_size)
6691 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6692 }
6693 }
6694
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,inplace)6695 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, inplace) {
6696 TEST_REQUIRES_X86_AVX2;
6697 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6698 VUnaryMicrokernelTester()
6699 .batch_size(batch_size)
6700 .inplace(true)
6701 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6702 }
6703 }
6704
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,prescale)6705 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, prescale) {
6706 TEST_REQUIRES_X86_AVX2;
6707 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6708 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6709 VUnaryMicrokernelTester()
6710 .batch_size(batch_size)
6711 .prescale(prescale)
6712 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6713 }
6714 }
6715 }
6716
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,alpha)6717 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, alpha) {
6718 TEST_REQUIRES_X86_AVX2;
6719 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6720 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6721 VUnaryMicrokernelTester()
6722 .batch_size(batch_size)
6723 .alpha(alpha)
6724 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6725 }
6726 }
6727 }
6728
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,beta)6729 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, beta) {
6730 TEST_REQUIRES_X86_AVX2;
6731 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6732 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6733 VUnaryMicrokernelTester()
6734 .batch_size(batch_size)
6735 .beta(beta)
6736 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6737 }
6738 }
6739 }
6740 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6741
6742
6743 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_eq_48)6744 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_eq_48) {
6745 TEST_REQUIRES_X86_AVX2;
6746 VUnaryMicrokernelTester()
6747 .batch_size(48)
6748 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6749 }
6750
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_div_48)6751 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_div_48) {
6752 TEST_REQUIRES_X86_AVX2;
6753 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
6754 VUnaryMicrokernelTester()
6755 .batch_size(batch_size)
6756 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6757 }
6758 }
6759
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_lt_48)6760 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_lt_48) {
6761 TEST_REQUIRES_X86_AVX2;
6762 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
6763 VUnaryMicrokernelTester()
6764 .batch_size(batch_size)
6765 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6766 }
6767 }
6768
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_gt_48)6769 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_gt_48) {
6770 TEST_REQUIRES_X86_AVX2;
6771 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
6772 VUnaryMicrokernelTester()
6773 .batch_size(batch_size)
6774 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6775 }
6776 }
6777
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,inplace)6778 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, inplace) {
6779 TEST_REQUIRES_X86_AVX2;
6780 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6781 VUnaryMicrokernelTester()
6782 .batch_size(batch_size)
6783 .inplace(true)
6784 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6785 }
6786 }
6787
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,prescale)6788 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, prescale) {
6789 TEST_REQUIRES_X86_AVX2;
6790 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6791 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6792 VUnaryMicrokernelTester()
6793 .batch_size(batch_size)
6794 .prescale(prescale)
6795 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6796 }
6797 }
6798 }
6799
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,alpha)6800 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, alpha) {
6801 TEST_REQUIRES_X86_AVX2;
6802 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6803 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6804 VUnaryMicrokernelTester()
6805 .batch_size(batch_size)
6806 .alpha(alpha)
6807 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6808 }
6809 }
6810 }
6811
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,beta)6812 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, beta) {
6813 TEST_REQUIRES_X86_AVX2;
6814 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6815 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6816 VUnaryMicrokernelTester()
6817 .batch_size(batch_size)
6818 .beta(beta)
6819 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6820 }
6821 }
6822 }
6823 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6824
6825
6826 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_eq_56)6827 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_eq_56) {
6828 TEST_REQUIRES_X86_AVX2;
6829 VUnaryMicrokernelTester()
6830 .batch_size(56)
6831 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6832 }
6833
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_div_56)6834 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_div_56) {
6835 TEST_REQUIRES_X86_AVX2;
6836 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
6837 VUnaryMicrokernelTester()
6838 .batch_size(batch_size)
6839 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6840 }
6841 }
6842
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_lt_56)6843 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_lt_56) {
6844 TEST_REQUIRES_X86_AVX2;
6845 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
6846 VUnaryMicrokernelTester()
6847 .batch_size(batch_size)
6848 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6849 }
6850 }
6851
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_gt_56)6852 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_gt_56) {
6853 TEST_REQUIRES_X86_AVX2;
6854 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
6855 VUnaryMicrokernelTester()
6856 .batch_size(batch_size)
6857 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6858 }
6859 }
6860
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,inplace)6861 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, inplace) {
6862 TEST_REQUIRES_X86_AVX2;
6863 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6864 VUnaryMicrokernelTester()
6865 .batch_size(batch_size)
6866 .inplace(true)
6867 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6868 }
6869 }
6870
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,prescale)6871 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, prescale) {
6872 TEST_REQUIRES_X86_AVX2;
6873 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6874 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6875 VUnaryMicrokernelTester()
6876 .batch_size(batch_size)
6877 .prescale(prescale)
6878 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6879 }
6880 }
6881 }
6882
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,alpha)6883 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, alpha) {
6884 TEST_REQUIRES_X86_AVX2;
6885 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6886 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6887 VUnaryMicrokernelTester()
6888 .batch_size(batch_size)
6889 .alpha(alpha)
6890 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6891 }
6892 }
6893 }
6894
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,beta)6895 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, beta) {
6896 TEST_REQUIRES_X86_AVX2;
6897 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6898 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6899 VUnaryMicrokernelTester()
6900 .batch_size(batch_size)
6901 .beta(beta)
6902 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6903 }
6904 }
6905 }
6906 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6907
6908
6909 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_eq_64)6910 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_eq_64) {
6911 TEST_REQUIRES_X86_AVX2;
6912 VUnaryMicrokernelTester()
6913 .batch_size(64)
6914 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6915 }
6916
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_div_64)6917 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_div_64) {
6918 TEST_REQUIRES_X86_AVX2;
6919 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
6920 VUnaryMicrokernelTester()
6921 .batch_size(batch_size)
6922 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6923 }
6924 }
6925
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_lt_64)6926 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_lt_64) {
6927 TEST_REQUIRES_X86_AVX2;
6928 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
6929 VUnaryMicrokernelTester()
6930 .batch_size(batch_size)
6931 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6932 }
6933 }
6934
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_gt_64)6935 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_gt_64) {
6936 TEST_REQUIRES_X86_AVX2;
6937 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
6938 VUnaryMicrokernelTester()
6939 .batch_size(batch_size)
6940 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6941 }
6942 }
6943
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,inplace)6944 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, inplace) {
6945 TEST_REQUIRES_X86_AVX2;
6946 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6947 VUnaryMicrokernelTester()
6948 .batch_size(batch_size)
6949 .inplace(true)
6950 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6951 }
6952 }
6953
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,prescale)6954 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, prescale) {
6955 TEST_REQUIRES_X86_AVX2;
6956 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6957 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6958 VUnaryMicrokernelTester()
6959 .batch_size(batch_size)
6960 .prescale(prescale)
6961 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6962 }
6963 }
6964 }
6965
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,alpha)6966 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, alpha) {
6967 TEST_REQUIRES_X86_AVX2;
6968 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6969 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6970 VUnaryMicrokernelTester()
6971 .batch_size(batch_size)
6972 .alpha(alpha)
6973 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6974 }
6975 }
6976 }
6977
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,beta)6978 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, beta) {
6979 TEST_REQUIRES_X86_AVX2;
6980 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6981 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6982 VUnaryMicrokernelTester()
6983 .batch_size(batch_size)
6984 .beta(beta)
6985 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6986 }
6987 }
6988 }
6989 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6990
6991
6992 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_eq_72)6993 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_eq_72) {
6994 TEST_REQUIRES_X86_AVX2;
6995 VUnaryMicrokernelTester()
6996 .batch_size(72)
6997 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6998 }
6999
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_div_72)7000 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_div_72) {
7001 TEST_REQUIRES_X86_AVX2;
7002 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
7003 VUnaryMicrokernelTester()
7004 .batch_size(batch_size)
7005 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7006 }
7007 }
7008
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_lt_72)7009 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_lt_72) {
7010 TEST_REQUIRES_X86_AVX2;
7011 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
7012 VUnaryMicrokernelTester()
7013 .batch_size(batch_size)
7014 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7015 }
7016 }
7017
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_gt_72)7018 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_gt_72) {
7019 TEST_REQUIRES_X86_AVX2;
7020 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
7021 VUnaryMicrokernelTester()
7022 .batch_size(batch_size)
7023 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7024 }
7025 }
7026
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,inplace)7027 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, inplace) {
7028 TEST_REQUIRES_X86_AVX2;
7029 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7030 VUnaryMicrokernelTester()
7031 .batch_size(batch_size)
7032 .inplace(true)
7033 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7034 }
7035 }
7036
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,prescale)7037 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, prescale) {
7038 TEST_REQUIRES_X86_AVX2;
7039 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7040 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7041 VUnaryMicrokernelTester()
7042 .batch_size(batch_size)
7043 .prescale(prescale)
7044 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7045 }
7046 }
7047 }
7048
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,alpha)7049 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, alpha) {
7050 TEST_REQUIRES_X86_AVX2;
7051 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7052 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7053 VUnaryMicrokernelTester()
7054 .batch_size(batch_size)
7055 .alpha(alpha)
7056 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7057 }
7058 }
7059 }
7060
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,beta)7061 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, beta) {
7062 TEST_REQUIRES_X86_AVX2;
7063 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7064 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7065 VUnaryMicrokernelTester()
7066 .batch_size(batch_size)
7067 .beta(beta)
7068 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7069 }
7070 }
7071 }
7072 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7073
7074
7075 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_eq_80)7076 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_eq_80) {
7077 TEST_REQUIRES_X86_AVX2;
7078 VUnaryMicrokernelTester()
7079 .batch_size(80)
7080 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7081 }
7082
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_div_80)7083 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_div_80) {
7084 TEST_REQUIRES_X86_AVX2;
7085 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7086 VUnaryMicrokernelTester()
7087 .batch_size(batch_size)
7088 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7089 }
7090 }
7091
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_lt_80)7092 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_lt_80) {
7093 TEST_REQUIRES_X86_AVX2;
7094 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7095 VUnaryMicrokernelTester()
7096 .batch_size(batch_size)
7097 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7098 }
7099 }
7100
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_gt_80)7101 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_gt_80) {
7102 TEST_REQUIRES_X86_AVX2;
7103 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7104 VUnaryMicrokernelTester()
7105 .batch_size(batch_size)
7106 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7107 }
7108 }
7109
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,inplace)7110 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, inplace) {
7111 TEST_REQUIRES_X86_AVX2;
7112 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7113 VUnaryMicrokernelTester()
7114 .batch_size(batch_size)
7115 .inplace(true)
7116 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7117 }
7118 }
7119
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,prescale)7120 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, prescale) {
7121 TEST_REQUIRES_X86_AVX2;
7122 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7123 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7124 VUnaryMicrokernelTester()
7125 .batch_size(batch_size)
7126 .prescale(prescale)
7127 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7128 }
7129 }
7130 }
7131
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,alpha)7132 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, alpha) {
7133 TEST_REQUIRES_X86_AVX2;
7134 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7135 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7136 VUnaryMicrokernelTester()
7137 .batch_size(batch_size)
7138 .alpha(alpha)
7139 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7140 }
7141 }
7142 }
7143
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,beta)7144 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, beta) {
7145 TEST_REQUIRES_X86_AVX2;
7146 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7147 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7148 VUnaryMicrokernelTester()
7149 .batch_size(batch_size)
7150 .beta(beta)
7151 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7152 }
7153 }
7154 }
7155 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7156
7157
7158 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_eq_8)7159 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_eq_8) {
7160 TEST_REQUIRES_X86_AVX2;
7161 VUnaryMicrokernelTester()
7162 .batch_size(8)
7163 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7164 }
7165
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_div_8)7166 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_div_8) {
7167 TEST_REQUIRES_X86_AVX2;
7168 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
7169 VUnaryMicrokernelTester()
7170 .batch_size(batch_size)
7171 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7172 }
7173 }
7174
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_lt_8)7175 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_lt_8) {
7176 TEST_REQUIRES_X86_AVX2;
7177 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
7178 VUnaryMicrokernelTester()
7179 .batch_size(batch_size)
7180 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7181 }
7182 }
7183
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_gt_8)7184 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_gt_8) {
7185 TEST_REQUIRES_X86_AVX2;
7186 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
7187 VUnaryMicrokernelTester()
7188 .batch_size(batch_size)
7189 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7190 }
7191 }
7192
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,inplace)7193 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, inplace) {
7194 TEST_REQUIRES_X86_AVX2;
7195 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7196 VUnaryMicrokernelTester()
7197 .batch_size(batch_size)
7198 .inplace(true)
7199 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7200 }
7201 }
7202
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,prescale)7203 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, prescale) {
7204 TEST_REQUIRES_X86_AVX2;
7205 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7206 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7207 VUnaryMicrokernelTester()
7208 .batch_size(batch_size)
7209 .prescale(prescale)
7210 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7211 }
7212 }
7213 }
7214
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,alpha)7215 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, alpha) {
7216 TEST_REQUIRES_X86_AVX2;
7217 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7218 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7219 VUnaryMicrokernelTester()
7220 .batch_size(batch_size)
7221 .alpha(alpha)
7222 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7223 }
7224 }
7225 }
7226
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,beta)7227 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, beta) {
7228 TEST_REQUIRES_X86_AVX2;
7229 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7230 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7231 VUnaryMicrokernelTester()
7232 .batch_size(batch_size)
7233 .beta(beta)
7234 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7235 }
7236 }
7237 }
7238 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7239
7240
7241 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_eq_16)7242 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_eq_16) {
7243 TEST_REQUIRES_X86_AVX2;
7244 VUnaryMicrokernelTester()
7245 .batch_size(16)
7246 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7247 }
7248
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_div_16)7249 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_div_16) {
7250 TEST_REQUIRES_X86_AVX2;
7251 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
7252 VUnaryMicrokernelTester()
7253 .batch_size(batch_size)
7254 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7255 }
7256 }
7257
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_lt_16)7258 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_lt_16) {
7259 TEST_REQUIRES_X86_AVX2;
7260 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
7261 VUnaryMicrokernelTester()
7262 .batch_size(batch_size)
7263 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7264 }
7265 }
7266
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_gt_16)7267 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_gt_16) {
7268 TEST_REQUIRES_X86_AVX2;
7269 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
7270 VUnaryMicrokernelTester()
7271 .batch_size(batch_size)
7272 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7273 }
7274 }
7275
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,inplace)7276 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, inplace) {
7277 TEST_REQUIRES_X86_AVX2;
7278 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7279 VUnaryMicrokernelTester()
7280 .batch_size(batch_size)
7281 .inplace(true)
7282 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7283 }
7284 }
7285
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,prescale)7286 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, prescale) {
7287 TEST_REQUIRES_X86_AVX2;
7288 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7289 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7290 VUnaryMicrokernelTester()
7291 .batch_size(batch_size)
7292 .prescale(prescale)
7293 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7294 }
7295 }
7296 }
7297
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,alpha)7298 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, alpha) {
7299 TEST_REQUIRES_X86_AVX2;
7300 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7301 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7302 VUnaryMicrokernelTester()
7303 .batch_size(batch_size)
7304 .alpha(alpha)
7305 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7306 }
7307 }
7308 }
7309
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,beta)7310 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, beta) {
7311 TEST_REQUIRES_X86_AVX2;
7312 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7313 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7314 VUnaryMicrokernelTester()
7315 .batch_size(batch_size)
7316 .beta(beta)
7317 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7318 }
7319 }
7320 }
7321 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7322
7323
7324 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_eq_24)7325 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_eq_24) {
7326 TEST_REQUIRES_X86_AVX2;
7327 VUnaryMicrokernelTester()
7328 .batch_size(24)
7329 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7330 }
7331
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_div_24)7332 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_div_24) {
7333 TEST_REQUIRES_X86_AVX2;
7334 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
7335 VUnaryMicrokernelTester()
7336 .batch_size(batch_size)
7337 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7338 }
7339 }
7340
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_lt_24)7341 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_lt_24) {
7342 TEST_REQUIRES_X86_AVX2;
7343 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
7344 VUnaryMicrokernelTester()
7345 .batch_size(batch_size)
7346 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7347 }
7348 }
7349
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_gt_24)7350 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_gt_24) {
7351 TEST_REQUIRES_X86_AVX2;
7352 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
7353 VUnaryMicrokernelTester()
7354 .batch_size(batch_size)
7355 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7356 }
7357 }
7358
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,inplace)7359 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, inplace) {
7360 TEST_REQUIRES_X86_AVX2;
7361 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7362 VUnaryMicrokernelTester()
7363 .batch_size(batch_size)
7364 .inplace(true)
7365 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7366 }
7367 }
7368
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,prescale)7369 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, prescale) {
7370 TEST_REQUIRES_X86_AVX2;
7371 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7372 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7373 VUnaryMicrokernelTester()
7374 .batch_size(batch_size)
7375 .prescale(prescale)
7376 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7377 }
7378 }
7379 }
7380
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,alpha)7381 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, alpha) {
7382 TEST_REQUIRES_X86_AVX2;
7383 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7384 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7385 VUnaryMicrokernelTester()
7386 .batch_size(batch_size)
7387 .alpha(alpha)
7388 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7389 }
7390 }
7391 }
7392
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,beta)7393 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, beta) {
7394 TEST_REQUIRES_X86_AVX2;
7395 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7396 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7397 VUnaryMicrokernelTester()
7398 .batch_size(batch_size)
7399 .beta(beta)
7400 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7401 }
7402 }
7403 }
7404 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7405
7406
7407 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_eq_32)7408 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_eq_32) {
7409 TEST_REQUIRES_X86_AVX2;
7410 VUnaryMicrokernelTester()
7411 .batch_size(32)
7412 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7413 }
7414
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_div_32)7415 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_div_32) {
7416 TEST_REQUIRES_X86_AVX2;
7417 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
7418 VUnaryMicrokernelTester()
7419 .batch_size(batch_size)
7420 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7421 }
7422 }
7423
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_lt_32)7424 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_lt_32) {
7425 TEST_REQUIRES_X86_AVX2;
7426 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
7427 VUnaryMicrokernelTester()
7428 .batch_size(batch_size)
7429 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7430 }
7431 }
7432
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_gt_32)7433 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_gt_32) {
7434 TEST_REQUIRES_X86_AVX2;
7435 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
7436 VUnaryMicrokernelTester()
7437 .batch_size(batch_size)
7438 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7439 }
7440 }
7441
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,inplace)7442 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, inplace) {
7443 TEST_REQUIRES_X86_AVX2;
7444 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7445 VUnaryMicrokernelTester()
7446 .batch_size(batch_size)
7447 .inplace(true)
7448 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7449 }
7450 }
7451
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,prescale)7452 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, prescale) {
7453 TEST_REQUIRES_X86_AVX2;
7454 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7455 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7456 VUnaryMicrokernelTester()
7457 .batch_size(batch_size)
7458 .prescale(prescale)
7459 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7460 }
7461 }
7462 }
7463
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,alpha)7464 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, alpha) {
7465 TEST_REQUIRES_X86_AVX2;
7466 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7467 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7468 VUnaryMicrokernelTester()
7469 .batch_size(batch_size)
7470 .alpha(alpha)
7471 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7472 }
7473 }
7474 }
7475
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,beta)7476 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, beta) {
7477 TEST_REQUIRES_X86_AVX2;
7478 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7479 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7480 VUnaryMicrokernelTester()
7481 .batch_size(batch_size)
7482 .beta(beta)
7483 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7484 }
7485 }
7486 }
7487 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7488
7489
7490 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_eq_40)7491 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_eq_40) {
7492 TEST_REQUIRES_X86_AVX2;
7493 VUnaryMicrokernelTester()
7494 .batch_size(40)
7495 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7496 }
7497
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_div_40)7498 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_div_40) {
7499 TEST_REQUIRES_X86_AVX2;
7500 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
7501 VUnaryMicrokernelTester()
7502 .batch_size(batch_size)
7503 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7504 }
7505 }
7506
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_lt_40)7507 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_lt_40) {
7508 TEST_REQUIRES_X86_AVX2;
7509 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
7510 VUnaryMicrokernelTester()
7511 .batch_size(batch_size)
7512 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7513 }
7514 }
7515
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_gt_40)7516 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_gt_40) {
7517 TEST_REQUIRES_X86_AVX2;
7518 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
7519 VUnaryMicrokernelTester()
7520 .batch_size(batch_size)
7521 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7522 }
7523 }
7524
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,inplace)7525 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, inplace) {
7526 TEST_REQUIRES_X86_AVX2;
7527 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7528 VUnaryMicrokernelTester()
7529 .batch_size(batch_size)
7530 .inplace(true)
7531 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7532 }
7533 }
7534
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,prescale)7535 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, prescale) {
7536 TEST_REQUIRES_X86_AVX2;
7537 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7538 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7539 VUnaryMicrokernelTester()
7540 .batch_size(batch_size)
7541 .prescale(prescale)
7542 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7543 }
7544 }
7545 }
7546
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,alpha)7547 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, alpha) {
7548 TEST_REQUIRES_X86_AVX2;
7549 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7550 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7551 VUnaryMicrokernelTester()
7552 .batch_size(batch_size)
7553 .alpha(alpha)
7554 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7555 }
7556 }
7557 }
7558
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,beta)7559 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, beta) {
7560 TEST_REQUIRES_X86_AVX2;
7561 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7562 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7563 VUnaryMicrokernelTester()
7564 .batch_size(batch_size)
7565 .beta(beta)
7566 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7567 }
7568 }
7569 }
7570 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7571
7572
7573 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_eq_48)7574 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_eq_48) {
7575 TEST_REQUIRES_X86_AVX2;
7576 VUnaryMicrokernelTester()
7577 .batch_size(48)
7578 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7579 }
7580
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_div_48)7581 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_div_48) {
7582 TEST_REQUIRES_X86_AVX2;
7583 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
7584 VUnaryMicrokernelTester()
7585 .batch_size(batch_size)
7586 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7587 }
7588 }
7589
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_lt_48)7590 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_lt_48) {
7591 TEST_REQUIRES_X86_AVX2;
7592 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
7593 VUnaryMicrokernelTester()
7594 .batch_size(batch_size)
7595 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7596 }
7597 }
7598
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_gt_48)7599 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_gt_48) {
7600 TEST_REQUIRES_X86_AVX2;
7601 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
7602 VUnaryMicrokernelTester()
7603 .batch_size(batch_size)
7604 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7605 }
7606 }
7607
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,inplace)7608 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, inplace) {
7609 TEST_REQUIRES_X86_AVX2;
7610 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7611 VUnaryMicrokernelTester()
7612 .batch_size(batch_size)
7613 .inplace(true)
7614 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7615 }
7616 }
7617
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,prescale)7618 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, prescale) {
7619 TEST_REQUIRES_X86_AVX2;
7620 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7621 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7622 VUnaryMicrokernelTester()
7623 .batch_size(batch_size)
7624 .prescale(prescale)
7625 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7626 }
7627 }
7628 }
7629
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,alpha)7630 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, alpha) {
7631 TEST_REQUIRES_X86_AVX2;
7632 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7633 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7634 VUnaryMicrokernelTester()
7635 .batch_size(batch_size)
7636 .alpha(alpha)
7637 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7638 }
7639 }
7640 }
7641
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,beta)7642 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, beta) {
7643 TEST_REQUIRES_X86_AVX2;
7644 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7645 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7646 VUnaryMicrokernelTester()
7647 .batch_size(batch_size)
7648 .beta(beta)
7649 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7650 }
7651 }
7652 }
7653 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7654
7655
7656 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_eq_56)7657 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_eq_56) {
7658 TEST_REQUIRES_X86_AVX2;
7659 VUnaryMicrokernelTester()
7660 .batch_size(56)
7661 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7662 }
7663
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_div_56)7664 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_div_56) {
7665 TEST_REQUIRES_X86_AVX2;
7666 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
7667 VUnaryMicrokernelTester()
7668 .batch_size(batch_size)
7669 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7670 }
7671 }
7672
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_lt_56)7673 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_lt_56) {
7674 TEST_REQUIRES_X86_AVX2;
7675 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
7676 VUnaryMicrokernelTester()
7677 .batch_size(batch_size)
7678 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7679 }
7680 }
7681
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_gt_56)7682 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_gt_56) {
7683 TEST_REQUIRES_X86_AVX2;
7684 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
7685 VUnaryMicrokernelTester()
7686 .batch_size(batch_size)
7687 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7688 }
7689 }
7690
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,inplace)7691 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, inplace) {
7692 TEST_REQUIRES_X86_AVX2;
7693 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7694 VUnaryMicrokernelTester()
7695 .batch_size(batch_size)
7696 .inplace(true)
7697 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7698 }
7699 }
7700
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,prescale)7701 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, prescale) {
7702 TEST_REQUIRES_X86_AVX2;
7703 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7704 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7705 VUnaryMicrokernelTester()
7706 .batch_size(batch_size)
7707 .prescale(prescale)
7708 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7709 }
7710 }
7711 }
7712
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,alpha)7713 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, alpha) {
7714 TEST_REQUIRES_X86_AVX2;
7715 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7716 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7717 VUnaryMicrokernelTester()
7718 .batch_size(batch_size)
7719 .alpha(alpha)
7720 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7721 }
7722 }
7723 }
7724
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,beta)7725 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, beta) {
7726 TEST_REQUIRES_X86_AVX2;
7727 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7728 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7729 VUnaryMicrokernelTester()
7730 .batch_size(batch_size)
7731 .beta(beta)
7732 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7733 }
7734 }
7735 }
7736 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7737
7738
7739 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_eq_64)7740 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_eq_64) {
7741 TEST_REQUIRES_X86_AVX2;
7742 VUnaryMicrokernelTester()
7743 .batch_size(64)
7744 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7745 }
7746
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_div_64)7747 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_div_64) {
7748 TEST_REQUIRES_X86_AVX2;
7749 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
7750 VUnaryMicrokernelTester()
7751 .batch_size(batch_size)
7752 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7753 }
7754 }
7755
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_lt_64)7756 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_lt_64) {
7757 TEST_REQUIRES_X86_AVX2;
7758 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
7759 VUnaryMicrokernelTester()
7760 .batch_size(batch_size)
7761 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7762 }
7763 }
7764
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_gt_64)7765 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_gt_64) {
7766 TEST_REQUIRES_X86_AVX2;
7767 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
7768 VUnaryMicrokernelTester()
7769 .batch_size(batch_size)
7770 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7771 }
7772 }
7773
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,inplace)7774 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, inplace) {
7775 TEST_REQUIRES_X86_AVX2;
7776 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7777 VUnaryMicrokernelTester()
7778 .batch_size(batch_size)
7779 .inplace(true)
7780 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7781 }
7782 }
7783
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,prescale)7784 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, prescale) {
7785 TEST_REQUIRES_X86_AVX2;
7786 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7787 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7788 VUnaryMicrokernelTester()
7789 .batch_size(batch_size)
7790 .prescale(prescale)
7791 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7792 }
7793 }
7794 }
7795
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,alpha)7796 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, alpha) {
7797 TEST_REQUIRES_X86_AVX2;
7798 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7799 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7800 VUnaryMicrokernelTester()
7801 .batch_size(batch_size)
7802 .alpha(alpha)
7803 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7804 }
7805 }
7806 }
7807
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,beta)7808 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, beta) {
7809 TEST_REQUIRES_X86_AVX2;
7810 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7811 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7812 VUnaryMicrokernelTester()
7813 .batch_size(batch_size)
7814 .beta(beta)
7815 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7816 }
7817 }
7818 }
7819 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7820
7821
7822 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_eq_72)7823 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_eq_72) {
7824 TEST_REQUIRES_X86_AVX2;
7825 VUnaryMicrokernelTester()
7826 .batch_size(72)
7827 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7828 }
7829
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_div_72)7830 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_div_72) {
7831 TEST_REQUIRES_X86_AVX2;
7832 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
7833 VUnaryMicrokernelTester()
7834 .batch_size(batch_size)
7835 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7836 }
7837 }
7838
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_lt_72)7839 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_lt_72) {
7840 TEST_REQUIRES_X86_AVX2;
7841 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
7842 VUnaryMicrokernelTester()
7843 .batch_size(batch_size)
7844 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7845 }
7846 }
7847
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_gt_72)7848 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_gt_72) {
7849 TEST_REQUIRES_X86_AVX2;
7850 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
7851 VUnaryMicrokernelTester()
7852 .batch_size(batch_size)
7853 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7854 }
7855 }
7856
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,inplace)7857 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, inplace) {
7858 TEST_REQUIRES_X86_AVX2;
7859 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7860 VUnaryMicrokernelTester()
7861 .batch_size(batch_size)
7862 .inplace(true)
7863 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7864 }
7865 }
7866
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,prescale)7867 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, prescale) {
7868 TEST_REQUIRES_X86_AVX2;
7869 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7870 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7871 VUnaryMicrokernelTester()
7872 .batch_size(batch_size)
7873 .prescale(prescale)
7874 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7875 }
7876 }
7877 }
7878
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,alpha)7879 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, alpha) {
7880 TEST_REQUIRES_X86_AVX2;
7881 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7882 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7883 VUnaryMicrokernelTester()
7884 .batch_size(batch_size)
7885 .alpha(alpha)
7886 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7887 }
7888 }
7889 }
7890
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,beta)7891 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, beta) {
7892 TEST_REQUIRES_X86_AVX2;
7893 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7894 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7895 VUnaryMicrokernelTester()
7896 .batch_size(batch_size)
7897 .beta(beta)
7898 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7899 }
7900 }
7901 }
7902 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7903
7904
7905 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_eq_80)7906 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_eq_80) {
7907 TEST_REQUIRES_X86_AVX2;
7908 VUnaryMicrokernelTester()
7909 .batch_size(80)
7910 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7911 }
7912
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_div_80)7913 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_div_80) {
7914 TEST_REQUIRES_X86_AVX2;
7915 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7916 VUnaryMicrokernelTester()
7917 .batch_size(batch_size)
7918 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7919 }
7920 }
7921
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_lt_80)7922 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_lt_80) {
7923 TEST_REQUIRES_X86_AVX2;
7924 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7925 VUnaryMicrokernelTester()
7926 .batch_size(batch_size)
7927 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7928 }
7929 }
7930
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_gt_80)7931 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_gt_80) {
7932 TEST_REQUIRES_X86_AVX2;
7933 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7934 VUnaryMicrokernelTester()
7935 .batch_size(batch_size)
7936 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7937 }
7938 }
7939
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,inplace)7940 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, inplace) {
7941 TEST_REQUIRES_X86_AVX2;
7942 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7943 VUnaryMicrokernelTester()
7944 .batch_size(batch_size)
7945 .inplace(true)
7946 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7947 }
7948 }
7949
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,prescale)7950 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, prescale) {
7951 TEST_REQUIRES_X86_AVX2;
7952 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7953 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7954 VUnaryMicrokernelTester()
7955 .batch_size(batch_size)
7956 .prescale(prescale)
7957 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7958 }
7959 }
7960 }
7961
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,alpha)7962 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, alpha) {
7963 TEST_REQUIRES_X86_AVX2;
7964 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7965 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7966 VUnaryMicrokernelTester()
7967 .batch_size(batch_size)
7968 .alpha(alpha)
7969 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7970 }
7971 }
7972 }
7973
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,beta)7974 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, beta) {
7975 TEST_REQUIRES_X86_AVX2;
7976 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7977 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7978 VUnaryMicrokernelTester()
7979 .batch_size(batch_size)
7980 .beta(beta)
7981 .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7982 }
7983 }
7984 }
7985 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7986
7987
7988 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_eq_8)7989 TEST(F32_VELU__AVX2_RR1_P6_X8, batch_eq_8) {
7990 TEST_REQUIRES_X86_AVX2;
7991 VUnaryMicrokernelTester()
7992 .batch_size(8)
7993 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
7994 }
7995
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_div_8)7996 TEST(F32_VELU__AVX2_RR1_P6_X8, batch_div_8) {
7997 TEST_REQUIRES_X86_AVX2;
7998 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
7999 VUnaryMicrokernelTester()
8000 .batch_size(batch_size)
8001 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8002 }
8003 }
8004
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_lt_8)8005 TEST(F32_VELU__AVX2_RR1_P6_X8, batch_lt_8) {
8006 TEST_REQUIRES_X86_AVX2;
8007 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
8008 VUnaryMicrokernelTester()
8009 .batch_size(batch_size)
8010 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8011 }
8012 }
8013
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_gt_8)8014 TEST(F32_VELU__AVX2_RR1_P6_X8, batch_gt_8) {
8015 TEST_REQUIRES_X86_AVX2;
8016 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
8017 VUnaryMicrokernelTester()
8018 .batch_size(batch_size)
8019 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8020 }
8021 }
8022
TEST(F32_VELU__AVX2_RR1_P6_X8,inplace)8023 TEST(F32_VELU__AVX2_RR1_P6_X8, inplace) {
8024 TEST_REQUIRES_X86_AVX2;
8025 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8026 VUnaryMicrokernelTester()
8027 .batch_size(batch_size)
8028 .inplace(true)
8029 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8030 }
8031 }
8032
TEST(F32_VELU__AVX2_RR1_P6_X8,prescale)8033 TEST(F32_VELU__AVX2_RR1_P6_X8, prescale) {
8034 TEST_REQUIRES_X86_AVX2;
8035 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8036 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8037 VUnaryMicrokernelTester()
8038 .batch_size(batch_size)
8039 .prescale(prescale)
8040 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8041 }
8042 }
8043 }
8044
TEST(F32_VELU__AVX2_RR1_P6_X8,alpha)8045 TEST(F32_VELU__AVX2_RR1_P6_X8, alpha) {
8046 TEST_REQUIRES_X86_AVX2;
8047 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8048 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8049 VUnaryMicrokernelTester()
8050 .batch_size(batch_size)
8051 .alpha(alpha)
8052 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8053 }
8054 }
8055 }
8056
TEST(F32_VELU__AVX2_RR1_P6_X8,beta)8057 TEST(F32_VELU__AVX2_RR1_P6_X8, beta) {
8058 TEST_REQUIRES_X86_AVX2;
8059 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8060 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8061 VUnaryMicrokernelTester()
8062 .batch_size(batch_size)
8063 .beta(beta)
8064 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8065 }
8066 }
8067 }
8068 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8069
8070
8071 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_eq_16)8072 TEST(F32_VELU__AVX2_RR1_P6_X16, batch_eq_16) {
8073 TEST_REQUIRES_X86_AVX2;
8074 VUnaryMicrokernelTester()
8075 .batch_size(16)
8076 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8077 }
8078
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_div_16)8079 TEST(F32_VELU__AVX2_RR1_P6_X16, batch_div_16) {
8080 TEST_REQUIRES_X86_AVX2;
8081 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8082 VUnaryMicrokernelTester()
8083 .batch_size(batch_size)
8084 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8085 }
8086 }
8087
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_lt_16)8088 TEST(F32_VELU__AVX2_RR1_P6_X16, batch_lt_16) {
8089 TEST_REQUIRES_X86_AVX2;
8090 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8091 VUnaryMicrokernelTester()
8092 .batch_size(batch_size)
8093 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8094 }
8095 }
8096
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_gt_16)8097 TEST(F32_VELU__AVX2_RR1_P6_X16, batch_gt_16) {
8098 TEST_REQUIRES_X86_AVX2;
8099 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8100 VUnaryMicrokernelTester()
8101 .batch_size(batch_size)
8102 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8103 }
8104 }
8105
TEST(F32_VELU__AVX2_RR1_P6_X16,inplace)8106 TEST(F32_VELU__AVX2_RR1_P6_X16, inplace) {
8107 TEST_REQUIRES_X86_AVX2;
8108 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8109 VUnaryMicrokernelTester()
8110 .batch_size(batch_size)
8111 .inplace(true)
8112 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8113 }
8114 }
8115
TEST(F32_VELU__AVX2_RR1_P6_X16,prescale)8116 TEST(F32_VELU__AVX2_RR1_P6_X16, prescale) {
8117 TEST_REQUIRES_X86_AVX2;
8118 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8119 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8120 VUnaryMicrokernelTester()
8121 .batch_size(batch_size)
8122 .prescale(prescale)
8123 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8124 }
8125 }
8126 }
8127
TEST(F32_VELU__AVX2_RR1_P6_X16,alpha)8128 TEST(F32_VELU__AVX2_RR1_P6_X16, alpha) {
8129 TEST_REQUIRES_X86_AVX2;
8130 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8131 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8132 VUnaryMicrokernelTester()
8133 .batch_size(batch_size)
8134 .alpha(alpha)
8135 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8136 }
8137 }
8138 }
8139
TEST(F32_VELU__AVX2_RR1_P6_X16,beta)8140 TEST(F32_VELU__AVX2_RR1_P6_X16, beta) {
8141 TEST_REQUIRES_X86_AVX2;
8142 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8143 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8144 VUnaryMicrokernelTester()
8145 .batch_size(batch_size)
8146 .beta(beta)
8147 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8148 }
8149 }
8150 }
8151 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8152
8153
8154 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_eq_24)8155 TEST(F32_VELU__AVX2_RR1_P6_X24, batch_eq_24) {
8156 TEST_REQUIRES_X86_AVX2;
8157 VUnaryMicrokernelTester()
8158 .batch_size(24)
8159 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8160 }
8161
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_div_24)8162 TEST(F32_VELU__AVX2_RR1_P6_X24, batch_div_24) {
8163 TEST_REQUIRES_X86_AVX2;
8164 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
8165 VUnaryMicrokernelTester()
8166 .batch_size(batch_size)
8167 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8168 }
8169 }
8170
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_lt_24)8171 TEST(F32_VELU__AVX2_RR1_P6_X24, batch_lt_24) {
8172 TEST_REQUIRES_X86_AVX2;
8173 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
8174 VUnaryMicrokernelTester()
8175 .batch_size(batch_size)
8176 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8177 }
8178 }
8179
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_gt_24)8180 TEST(F32_VELU__AVX2_RR1_P6_X24, batch_gt_24) {
8181 TEST_REQUIRES_X86_AVX2;
8182 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
8183 VUnaryMicrokernelTester()
8184 .batch_size(batch_size)
8185 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8186 }
8187 }
8188
TEST(F32_VELU__AVX2_RR1_P6_X24,inplace)8189 TEST(F32_VELU__AVX2_RR1_P6_X24, inplace) {
8190 TEST_REQUIRES_X86_AVX2;
8191 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8192 VUnaryMicrokernelTester()
8193 .batch_size(batch_size)
8194 .inplace(true)
8195 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8196 }
8197 }
8198
TEST(F32_VELU__AVX2_RR1_P6_X24,prescale)8199 TEST(F32_VELU__AVX2_RR1_P6_X24, prescale) {
8200 TEST_REQUIRES_X86_AVX2;
8201 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8202 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8203 VUnaryMicrokernelTester()
8204 .batch_size(batch_size)
8205 .prescale(prescale)
8206 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8207 }
8208 }
8209 }
8210
TEST(F32_VELU__AVX2_RR1_P6_X24,alpha)8211 TEST(F32_VELU__AVX2_RR1_P6_X24, alpha) {
8212 TEST_REQUIRES_X86_AVX2;
8213 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8214 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8215 VUnaryMicrokernelTester()
8216 .batch_size(batch_size)
8217 .alpha(alpha)
8218 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8219 }
8220 }
8221 }
8222
TEST(F32_VELU__AVX2_RR1_P6_X24,beta)8223 TEST(F32_VELU__AVX2_RR1_P6_X24, beta) {
8224 TEST_REQUIRES_X86_AVX2;
8225 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8226 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8227 VUnaryMicrokernelTester()
8228 .batch_size(batch_size)
8229 .beta(beta)
8230 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8231 }
8232 }
8233 }
8234 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8235
8236
8237 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_eq_32)8238 TEST(F32_VELU__AVX2_RR1_P6_X32, batch_eq_32) {
8239 TEST_REQUIRES_X86_AVX2;
8240 VUnaryMicrokernelTester()
8241 .batch_size(32)
8242 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8243 }
8244
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_div_32)8245 TEST(F32_VELU__AVX2_RR1_P6_X32, batch_div_32) {
8246 TEST_REQUIRES_X86_AVX2;
8247 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8248 VUnaryMicrokernelTester()
8249 .batch_size(batch_size)
8250 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8251 }
8252 }
8253
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_lt_32)8254 TEST(F32_VELU__AVX2_RR1_P6_X32, batch_lt_32) {
8255 TEST_REQUIRES_X86_AVX2;
8256 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8257 VUnaryMicrokernelTester()
8258 .batch_size(batch_size)
8259 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8260 }
8261 }
8262
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_gt_32)8263 TEST(F32_VELU__AVX2_RR1_P6_X32, batch_gt_32) {
8264 TEST_REQUIRES_X86_AVX2;
8265 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8266 VUnaryMicrokernelTester()
8267 .batch_size(batch_size)
8268 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8269 }
8270 }
8271
TEST(F32_VELU__AVX2_RR1_P6_X32,inplace)8272 TEST(F32_VELU__AVX2_RR1_P6_X32, inplace) {
8273 TEST_REQUIRES_X86_AVX2;
8274 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8275 VUnaryMicrokernelTester()
8276 .batch_size(batch_size)
8277 .inplace(true)
8278 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8279 }
8280 }
8281
TEST(F32_VELU__AVX2_RR1_P6_X32,prescale)8282 TEST(F32_VELU__AVX2_RR1_P6_X32, prescale) {
8283 TEST_REQUIRES_X86_AVX2;
8284 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8285 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8286 VUnaryMicrokernelTester()
8287 .batch_size(batch_size)
8288 .prescale(prescale)
8289 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8290 }
8291 }
8292 }
8293
TEST(F32_VELU__AVX2_RR1_P6_X32,alpha)8294 TEST(F32_VELU__AVX2_RR1_P6_X32, alpha) {
8295 TEST_REQUIRES_X86_AVX2;
8296 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8297 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8298 VUnaryMicrokernelTester()
8299 .batch_size(batch_size)
8300 .alpha(alpha)
8301 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8302 }
8303 }
8304 }
8305
TEST(F32_VELU__AVX2_RR1_P6_X32,beta)8306 TEST(F32_VELU__AVX2_RR1_P6_X32, beta) {
8307 TEST_REQUIRES_X86_AVX2;
8308 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8309 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8310 VUnaryMicrokernelTester()
8311 .batch_size(batch_size)
8312 .beta(beta)
8313 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8314 }
8315 }
8316 }
8317 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8318
8319
8320 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_eq_40)8321 TEST(F32_VELU__AVX2_RR1_P6_X40, batch_eq_40) {
8322 TEST_REQUIRES_X86_AVX2;
8323 VUnaryMicrokernelTester()
8324 .batch_size(40)
8325 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8326 }
8327
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_div_40)8328 TEST(F32_VELU__AVX2_RR1_P6_X40, batch_div_40) {
8329 TEST_REQUIRES_X86_AVX2;
8330 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
8331 VUnaryMicrokernelTester()
8332 .batch_size(batch_size)
8333 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8334 }
8335 }
8336
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_lt_40)8337 TEST(F32_VELU__AVX2_RR1_P6_X40, batch_lt_40) {
8338 TEST_REQUIRES_X86_AVX2;
8339 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
8340 VUnaryMicrokernelTester()
8341 .batch_size(batch_size)
8342 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8343 }
8344 }
8345
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_gt_40)8346 TEST(F32_VELU__AVX2_RR1_P6_X40, batch_gt_40) {
8347 TEST_REQUIRES_X86_AVX2;
8348 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
8349 VUnaryMicrokernelTester()
8350 .batch_size(batch_size)
8351 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8352 }
8353 }
8354
TEST(F32_VELU__AVX2_RR1_P6_X40,inplace)8355 TEST(F32_VELU__AVX2_RR1_P6_X40, inplace) {
8356 TEST_REQUIRES_X86_AVX2;
8357 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8358 VUnaryMicrokernelTester()
8359 .batch_size(batch_size)
8360 .inplace(true)
8361 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8362 }
8363 }
8364
TEST(F32_VELU__AVX2_RR1_P6_X40,prescale)8365 TEST(F32_VELU__AVX2_RR1_P6_X40, prescale) {
8366 TEST_REQUIRES_X86_AVX2;
8367 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8368 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8369 VUnaryMicrokernelTester()
8370 .batch_size(batch_size)
8371 .prescale(prescale)
8372 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8373 }
8374 }
8375 }
8376
TEST(F32_VELU__AVX2_RR1_P6_X40,alpha)8377 TEST(F32_VELU__AVX2_RR1_P6_X40, alpha) {
8378 TEST_REQUIRES_X86_AVX2;
8379 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8380 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8381 VUnaryMicrokernelTester()
8382 .batch_size(batch_size)
8383 .alpha(alpha)
8384 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8385 }
8386 }
8387 }
8388
TEST(F32_VELU__AVX2_RR1_P6_X40,beta)8389 TEST(F32_VELU__AVX2_RR1_P6_X40, beta) {
8390 TEST_REQUIRES_X86_AVX2;
8391 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8392 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8393 VUnaryMicrokernelTester()
8394 .batch_size(batch_size)
8395 .beta(beta)
8396 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8397 }
8398 }
8399 }
8400 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8401
8402
8403 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_eq_48)8404 TEST(F32_VELU__AVX2_RR1_P6_X48, batch_eq_48) {
8405 TEST_REQUIRES_X86_AVX2;
8406 VUnaryMicrokernelTester()
8407 .batch_size(48)
8408 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8409 }
8410
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_div_48)8411 TEST(F32_VELU__AVX2_RR1_P6_X48, batch_div_48) {
8412 TEST_REQUIRES_X86_AVX2;
8413 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8414 VUnaryMicrokernelTester()
8415 .batch_size(batch_size)
8416 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8417 }
8418 }
8419
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_lt_48)8420 TEST(F32_VELU__AVX2_RR1_P6_X48, batch_lt_48) {
8421 TEST_REQUIRES_X86_AVX2;
8422 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
8423 VUnaryMicrokernelTester()
8424 .batch_size(batch_size)
8425 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8426 }
8427 }
8428
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_gt_48)8429 TEST(F32_VELU__AVX2_RR1_P6_X48, batch_gt_48) {
8430 TEST_REQUIRES_X86_AVX2;
8431 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
8432 VUnaryMicrokernelTester()
8433 .batch_size(batch_size)
8434 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8435 }
8436 }
8437
TEST(F32_VELU__AVX2_RR1_P6_X48,inplace)8438 TEST(F32_VELU__AVX2_RR1_P6_X48, inplace) {
8439 TEST_REQUIRES_X86_AVX2;
8440 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8441 VUnaryMicrokernelTester()
8442 .batch_size(batch_size)
8443 .inplace(true)
8444 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8445 }
8446 }
8447
TEST(F32_VELU__AVX2_RR1_P6_X48,prescale)8448 TEST(F32_VELU__AVX2_RR1_P6_X48, prescale) {
8449 TEST_REQUIRES_X86_AVX2;
8450 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8451 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8452 VUnaryMicrokernelTester()
8453 .batch_size(batch_size)
8454 .prescale(prescale)
8455 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8456 }
8457 }
8458 }
8459
TEST(F32_VELU__AVX2_RR1_P6_X48,alpha)8460 TEST(F32_VELU__AVX2_RR1_P6_X48, alpha) {
8461 TEST_REQUIRES_X86_AVX2;
8462 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8463 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8464 VUnaryMicrokernelTester()
8465 .batch_size(batch_size)
8466 .alpha(alpha)
8467 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8468 }
8469 }
8470 }
8471
TEST(F32_VELU__AVX2_RR1_P6_X48,beta)8472 TEST(F32_VELU__AVX2_RR1_P6_X48, beta) {
8473 TEST_REQUIRES_X86_AVX2;
8474 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8475 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8476 VUnaryMicrokernelTester()
8477 .batch_size(batch_size)
8478 .beta(beta)
8479 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8480 }
8481 }
8482 }
8483 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8484
8485
8486 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_eq_56)8487 TEST(F32_VELU__AVX2_RR1_P6_X56, batch_eq_56) {
8488 TEST_REQUIRES_X86_AVX2;
8489 VUnaryMicrokernelTester()
8490 .batch_size(56)
8491 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8492 }
8493
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_div_56)8494 TEST(F32_VELU__AVX2_RR1_P6_X56, batch_div_56) {
8495 TEST_REQUIRES_X86_AVX2;
8496 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
8497 VUnaryMicrokernelTester()
8498 .batch_size(batch_size)
8499 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8500 }
8501 }
8502
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_lt_56)8503 TEST(F32_VELU__AVX2_RR1_P6_X56, batch_lt_56) {
8504 TEST_REQUIRES_X86_AVX2;
8505 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
8506 VUnaryMicrokernelTester()
8507 .batch_size(batch_size)
8508 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8509 }
8510 }
8511
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_gt_56)8512 TEST(F32_VELU__AVX2_RR1_P6_X56, batch_gt_56) {
8513 TEST_REQUIRES_X86_AVX2;
8514 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
8515 VUnaryMicrokernelTester()
8516 .batch_size(batch_size)
8517 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8518 }
8519 }
8520
TEST(F32_VELU__AVX2_RR1_P6_X56,inplace)8521 TEST(F32_VELU__AVX2_RR1_P6_X56, inplace) {
8522 TEST_REQUIRES_X86_AVX2;
8523 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8524 VUnaryMicrokernelTester()
8525 .batch_size(batch_size)
8526 .inplace(true)
8527 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8528 }
8529 }
8530
TEST(F32_VELU__AVX2_RR1_P6_X56,prescale)8531 TEST(F32_VELU__AVX2_RR1_P6_X56, prescale) {
8532 TEST_REQUIRES_X86_AVX2;
8533 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8534 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8535 VUnaryMicrokernelTester()
8536 .batch_size(batch_size)
8537 .prescale(prescale)
8538 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8539 }
8540 }
8541 }
8542
TEST(F32_VELU__AVX2_RR1_P6_X56,alpha)8543 TEST(F32_VELU__AVX2_RR1_P6_X56, alpha) {
8544 TEST_REQUIRES_X86_AVX2;
8545 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8546 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8547 VUnaryMicrokernelTester()
8548 .batch_size(batch_size)
8549 .alpha(alpha)
8550 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8551 }
8552 }
8553 }
8554
TEST(F32_VELU__AVX2_RR1_P6_X56,beta)8555 TEST(F32_VELU__AVX2_RR1_P6_X56, beta) {
8556 TEST_REQUIRES_X86_AVX2;
8557 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8558 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8559 VUnaryMicrokernelTester()
8560 .batch_size(batch_size)
8561 .beta(beta)
8562 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8563 }
8564 }
8565 }
8566 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8567
8568
8569 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_eq_64)8570 TEST(F32_VELU__AVX2_RR1_P6_X64, batch_eq_64) {
8571 TEST_REQUIRES_X86_AVX2;
8572 VUnaryMicrokernelTester()
8573 .batch_size(64)
8574 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8575 }
8576
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_div_64)8577 TEST(F32_VELU__AVX2_RR1_P6_X64, batch_div_64) {
8578 TEST_REQUIRES_X86_AVX2;
8579 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
8580 VUnaryMicrokernelTester()
8581 .batch_size(batch_size)
8582 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8583 }
8584 }
8585
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_lt_64)8586 TEST(F32_VELU__AVX2_RR1_P6_X64, batch_lt_64) {
8587 TEST_REQUIRES_X86_AVX2;
8588 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
8589 VUnaryMicrokernelTester()
8590 .batch_size(batch_size)
8591 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8592 }
8593 }
8594
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_gt_64)8595 TEST(F32_VELU__AVX2_RR1_P6_X64, batch_gt_64) {
8596 TEST_REQUIRES_X86_AVX2;
8597 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
8598 VUnaryMicrokernelTester()
8599 .batch_size(batch_size)
8600 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8601 }
8602 }
8603
TEST(F32_VELU__AVX2_RR1_P6_X64,inplace)8604 TEST(F32_VELU__AVX2_RR1_P6_X64, inplace) {
8605 TEST_REQUIRES_X86_AVX2;
8606 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8607 VUnaryMicrokernelTester()
8608 .batch_size(batch_size)
8609 .inplace(true)
8610 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8611 }
8612 }
8613
TEST(F32_VELU__AVX2_RR1_P6_X64,prescale)8614 TEST(F32_VELU__AVX2_RR1_P6_X64, prescale) {
8615 TEST_REQUIRES_X86_AVX2;
8616 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8617 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8618 VUnaryMicrokernelTester()
8619 .batch_size(batch_size)
8620 .prescale(prescale)
8621 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8622 }
8623 }
8624 }
8625
TEST(F32_VELU__AVX2_RR1_P6_X64,alpha)8626 TEST(F32_VELU__AVX2_RR1_P6_X64, alpha) {
8627 TEST_REQUIRES_X86_AVX2;
8628 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8629 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8630 VUnaryMicrokernelTester()
8631 .batch_size(batch_size)
8632 .alpha(alpha)
8633 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8634 }
8635 }
8636 }
8637
TEST(F32_VELU__AVX2_RR1_P6_X64,beta)8638 TEST(F32_VELU__AVX2_RR1_P6_X64, beta) {
8639 TEST_REQUIRES_X86_AVX2;
8640 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8641 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8642 VUnaryMicrokernelTester()
8643 .batch_size(batch_size)
8644 .beta(beta)
8645 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8646 }
8647 }
8648 }
8649 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8650
8651
8652 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_eq_72)8653 TEST(F32_VELU__AVX2_RR1_P6_X72, batch_eq_72) {
8654 TEST_REQUIRES_X86_AVX2;
8655 VUnaryMicrokernelTester()
8656 .batch_size(72)
8657 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8658 }
8659
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_div_72)8660 TEST(F32_VELU__AVX2_RR1_P6_X72, batch_div_72) {
8661 TEST_REQUIRES_X86_AVX2;
8662 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
8663 VUnaryMicrokernelTester()
8664 .batch_size(batch_size)
8665 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8666 }
8667 }
8668
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_lt_72)8669 TEST(F32_VELU__AVX2_RR1_P6_X72, batch_lt_72) {
8670 TEST_REQUIRES_X86_AVX2;
8671 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
8672 VUnaryMicrokernelTester()
8673 .batch_size(batch_size)
8674 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8675 }
8676 }
8677
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_gt_72)8678 TEST(F32_VELU__AVX2_RR1_P6_X72, batch_gt_72) {
8679 TEST_REQUIRES_X86_AVX2;
8680 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
8681 VUnaryMicrokernelTester()
8682 .batch_size(batch_size)
8683 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8684 }
8685 }
8686
TEST(F32_VELU__AVX2_RR1_P6_X72,inplace)8687 TEST(F32_VELU__AVX2_RR1_P6_X72, inplace) {
8688 TEST_REQUIRES_X86_AVX2;
8689 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8690 VUnaryMicrokernelTester()
8691 .batch_size(batch_size)
8692 .inplace(true)
8693 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8694 }
8695 }
8696
TEST(F32_VELU__AVX2_RR1_P6_X72,prescale)8697 TEST(F32_VELU__AVX2_RR1_P6_X72, prescale) {
8698 TEST_REQUIRES_X86_AVX2;
8699 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8700 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8701 VUnaryMicrokernelTester()
8702 .batch_size(batch_size)
8703 .prescale(prescale)
8704 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8705 }
8706 }
8707 }
8708
TEST(F32_VELU__AVX2_RR1_P6_X72,alpha)8709 TEST(F32_VELU__AVX2_RR1_P6_X72, alpha) {
8710 TEST_REQUIRES_X86_AVX2;
8711 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8712 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8713 VUnaryMicrokernelTester()
8714 .batch_size(batch_size)
8715 .alpha(alpha)
8716 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8717 }
8718 }
8719 }
8720
TEST(F32_VELU__AVX2_RR1_P6_X72,beta)8721 TEST(F32_VELU__AVX2_RR1_P6_X72, beta) {
8722 TEST_REQUIRES_X86_AVX2;
8723 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8724 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8725 VUnaryMicrokernelTester()
8726 .batch_size(batch_size)
8727 .beta(beta)
8728 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8729 }
8730 }
8731 }
8732 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8733
8734
8735 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_eq_80)8736 TEST(F32_VELU__AVX2_RR1_P6_X80, batch_eq_80) {
8737 TEST_REQUIRES_X86_AVX2;
8738 VUnaryMicrokernelTester()
8739 .batch_size(80)
8740 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8741 }
8742
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_div_80)8743 TEST(F32_VELU__AVX2_RR1_P6_X80, batch_div_80) {
8744 TEST_REQUIRES_X86_AVX2;
8745 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
8746 VUnaryMicrokernelTester()
8747 .batch_size(batch_size)
8748 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8749 }
8750 }
8751
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_lt_80)8752 TEST(F32_VELU__AVX2_RR1_P6_X80, batch_lt_80) {
8753 TEST_REQUIRES_X86_AVX2;
8754 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
8755 VUnaryMicrokernelTester()
8756 .batch_size(batch_size)
8757 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8758 }
8759 }
8760
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_gt_80)8761 TEST(F32_VELU__AVX2_RR1_P6_X80, batch_gt_80) {
8762 TEST_REQUIRES_X86_AVX2;
8763 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
8764 VUnaryMicrokernelTester()
8765 .batch_size(batch_size)
8766 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8767 }
8768 }
8769
TEST(F32_VELU__AVX2_RR1_P6_X80,inplace)8770 TEST(F32_VELU__AVX2_RR1_P6_X80, inplace) {
8771 TEST_REQUIRES_X86_AVX2;
8772 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8773 VUnaryMicrokernelTester()
8774 .batch_size(batch_size)
8775 .inplace(true)
8776 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8777 }
8778 }
8779
TEST(F32_VELU__AVX2_RR1_P6_X80,prescale)8780 TEST(F32_VELU__AVX2_RR1_P6_X80, prescale) {
8781 TEST_REQUIRES_X86_AVX2;
8782 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8783 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8784 VUnaryMicrokernelTester()
8785 .batch_size(batch_size)
8786 .prescale(prescale)
8787 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8788 }
8789 }
8790 }
8791
TEST(F32_VELU__AVX2_RR1_P6_X80,alpha)8792 TEST(F32_VELU__AVX2_RR1_P6_X80, alpha) {
8793 TEST_REQUIRES_X86_AVX2;
8794 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8795 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8796 VUnaryMicrokernelTester()
8797 .batch_size(batch_size)
8798 .alpha(alpha)
8799 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8800 }
8801 }
8802 }
8803
TEST(F32_VELU__AVX2_RR1_P6_X80,beta)8804 TEST(F32_VELU__AVX2_RR1_P6_X80, beta) {
8805 TEST_REQUIRES_X86_AVX2;
8806 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8807 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8808 VUnaryMicrokernelTester()
8809 .batch_size(batch_size)
8810 .beta(beta)
8811 .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8812 }
8813 }
8814 }
8815 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8816
8817
8818 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_eq_16)8819 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_eq_16) {
8820 TEST_REQUIRES_X86_AVX512F;
8821 VUnaryMicrokernelTester()
8822 .batch_size(16)
8823 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8824 }
8825
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_div_16)8826 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_div_16) {
8827 TEST_REQUIRES_X86_AVX512F;
8828 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8829 VUnaryMicrokernelTester()
8830 .batch_size(batch_size)
8831 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8832 }
8833 }
8834
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_lt_16)8835 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_lt_16) {
8836 TEST_REQUIRES_X86_AVX512F;
8837 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8838 VUnaryMicrokernelTester()
8839 .batch_size(batch_size)
8840 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8841 }
8842 }
8843
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_gt_16)8844 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_gt_16) {
8845 TEST_REQUIRES_X86_AVX512F;
8846 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8847 VUnaryMicrokernelTester()
8848 .batch_size(batch_size)
8849 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8850 }
8851 }
8852
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,inplace)8853 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, inplace) {
8854 TEST_REQUIRES_X86_AVX512F;
8855 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8856 VUnaryMicrokernelTester()
8857 .batch_size(batch_size)
8858 .inplace(true)
8859 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8860 }
8861 }
8862
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,prescale)8863 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, prescale) {
8864 TEST_REQUIRES_X86_AVX512F;
8865 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8866 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8867 VUnaryMicrokernelTester()
8868 .batch_size(batch_size)
8869 .prescale(prescale)
8870 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8871 }
8872 }
8873 }
8874
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,alpha)8875 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, alpha) {
8876 TEST_REQUIRES_X86_AVX512F;
8877 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8878 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8879 VUnaryMicrokernelTester()
8880 .batch_size(batch_size)
8881 .alpha(alpha)
8882 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8883 }
8884 }
8885 }
8886
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,beta)8887 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, beta) {
8888 TEST_REQUIRES_X86_AVX512F;
8889 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8890 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8891 VUnaryMicrokernelTester()
8892 .batch_size(batch_size)
8893 .beta(beta)
8894 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8895 }
8896 }
8897 }
8898 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8899
8900
8901 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_eq_32)8902 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_eq_32) {
8903 TEST_REQUIRES_X86_AVX512F;
8904 VUnaryMicrokernelTester()
8905 .batch_size(32)
8906 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8907 }
8908
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_div_32)8909 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_div_32) {
8910 TEST_REQUIRES_X86_AVX512F;
8911 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8912 VUnaryMicrokernelTester()
8913 .batch_size(batch_size)
8914 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8915 }
8916 }
8917
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_lt_32)8918 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_lt_32) {
8919 TEST_REQUIRES_X86_AVX512F;
8920 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8921 VUnaryMicrokernelTester()
8922 .batch_size(batch_size)
8923 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8924 }
8925 }
8926
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_gt_32)8927 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_gt_32) {
8928 TEST_REQUIRES_X86_AVX512F;
8929 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8930 VUnaryMicrokernelTester()
8931 .batch_size(batch_size)
8932 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8933 }
8934 }
8935
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,inplace)8936 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, inplace) {
8937 TEST_REQUIRES_X86_AVX512F;
8938 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8939 VUnaryMicrokernelTester()
8940 .batch_size(batch_size)
8941 .inplace(true)
8942 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8943 }
8944 }
8945
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,prescale)8946 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, prescale) {
8947 TEST_REQUIRES_X86_AVX512F;
8948 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8949 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8950 VUnaryMicrokernelTester()
8951 .batch_size(batch_size)
8952 .prescale(prescale)
8953 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8954 }
8955 }
8956 }
8957
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,alpha)8958 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, alpha) {
8959 TEST_REQUIRES_X86_AVX512F;
8960 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8961 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8962 VUnaryMicrokernelTester()
8963 .batch_size(batch_size)
8964 .alpha(alpha)
8965 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8966 }
8967 }
8968 }
8969
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,beta)8970 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, beta) {
8971 TEST_REQUIRES_X86_AVX512F;
8972 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8973 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8974 VUnaryMicrokernelTester()
8975 .batch_size(batch_size)
8976 .beta(beta)
8977 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8978 }
8979 }
8980 }
8981 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8982
8983
8984 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_eq_48)8985 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_eq_48) {
8986 TEST_REQUIRES_X86_AVX512F;
8987 VUnaryMicrokernelTester()
8988 .batch_size(48)
8989 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8990 }
8991
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_div_48)8992 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_div_48) {
8993 TEST_REQUIRES_X86_AVX512F;
8994 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8995 VUnaryMicrokernelTester()
8996 .batch_size(batch_size)
8997 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8998 }
8999 }
9000
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_lt_48)9001 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_lt_48) {
9002 TEST_REQUIRES_X86_AVX512F;
9003 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
9004 VUnaryMicrokernelTester()
9005 .batch_size(batch_size)
9006 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9007 }
9008 }
9009
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_gt_48)9010 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_gt_48) {
9011 TEST_REQUIRES_X86_AVX512F;
9012 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
9013 VUnaryMicrokernelTester()
9014 .batch_size(batch_size)
9015 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9016 }
9017 }
9018
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,inplace)9019 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, inplace) {
9020 TEST_REQUIRES_X86_AVX512F;
9021 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9022 VUnaryMicrokernelTester()
9023 .batch_size(batch_size)
9024 .inplace(true)
9025 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9026 }
9027 }
9028
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,prescale)9029 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, prescale) {
9030 TEST_REQUIRES_X86_AVX512F;
9031 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9032 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9033 VUnaryMicrokernelTester()
9034 .batch_size(batch_size)
9035 .prescale(prescale)
9036 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9037 }
9038 }
9039 }
9040
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,alpha)9041 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, alpha) {
9042 TEST_REQUIRES_X86_AVX512F;
9043 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9044 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9045 VUnaryMicrokernelTester()
9046 .batch_size(batch_size)
9047 .alpha(alpha)
9048 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9049 }
9050 }
9051 }
9052
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,beta)9053 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, beta) {
9054 TEST_REQUIRES_X86_AVX512F;
9055 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9056 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9057 VUnaryMicrokernelTester()
9058 .batch_size(batch_size)
9059 .beta(beta)
9060 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9061 }
9062 }
9063 }
9064 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9065
9066
9067 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_eq_64)9068 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_eq_64) {
9069 TEST_REQUIRES_X86_AVX512F;
9070 VUnaryMicrokernelTester()
9071 .batch_size(64)
9072 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9073 }
9074
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_div_64)9075 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_div_64) {
9076 TEST_REQUIRES_X86_AVX512F;
9077 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9078 VUnaryMicrokernelTester()
9079 .batch_size(batch_size)
9080 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9081 }
9082 }
9083
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_lt_64)9084 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_lt_64) {
9085 TEST_REQUIRES_X86_AVX512F;
9086 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9087 VUnaryMicrokernelTester()
9088 .batch_size(batch_size)
9089 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9090 }
9091 }
9092
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_gt_64)9093 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_gt_64) {
9094 TEST_REQUIRES_X86_AVX512F;
9095 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9096 VUnaryMicrokernelTester()
9097 .batch_size(batch_size)
9098 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9099 }
9100 }
9101
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,inplace)9102 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, inplace) {
9103 TEST_REQUIRES_X86_AVX512F;
9104 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9105 VUnaryMicrokernelTester()
9106 .batch_size(batch_size)
9107 .inplace(true)
9108 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9109 }
9110 }
9111
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,prescale)9112 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, prescale) {
9113 TEST_REQUIRES_X86_AVX512F;
9114 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9115 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9116 VUnaryMicrokernelTester()
9117 .batch_size(batch_size)
9118 .prescale(prescale)
9119 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9120 }
9121 }
9122 }
9123
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,alpha)9124 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, alpha) {
9125 TEST_REQUIRES_X86_AVX512F;
9126 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9127 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9128 VUnaryMicrokernelTester()
9129 .batch_size(batch_size)
9130 .alpha(alpha)
9131 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9132 }
9133 }
9134 }
9135
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,beta)9136 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, beta) {
9137 TEST_REQUIRES_X86_AVX512F;
9138 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9139 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9140 VUnaryMicrokernelTester()
9141 .batch_size(batch_size)
9142 .beta(beta)
9143 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9144 }
9145 }
9146 }
9147 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9148
9149
9150 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_eq_80)9151 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_eq_80) {
9152 TEST_REQUIRES_X86_AVX512F;
9153 VUnaryMicrokernelTester()
9154 .batch_size(80)
9155 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9156 }
9157
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_div_80)9158 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_div_80) {
9159 TEST_REQUIRES_X86_AVX512F;
9160 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9161 VUnaryMicrokernelTester()
9162 .batch_size(batch_size)
9163 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9164 }
9165 }
9166
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_lt_80)9167 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_lt_80) {
9168 TEST_REQUIRES_X86_AVX512F;
9169 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9170 VUnaryMicrokernelTester()
9171 .batch_size(batch_size)
9172 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9173 }
9174 }
9175
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_gt_80)9176 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_gt_80) {
9177 TEST_REQUIRES_X86_AVX512F;
9178 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9179 VUnaryMicrokernelTester()
9180 .batch_size(batch_size)
9181 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9182 }
9183 }
9184
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,inplace)9185 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, inplace) {
9186 TEST_REQUIRES_X86_AVX512F;
9187 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9188 VUnaryMicrokernelTester()
9189 .batch_size(batch_size)
9190 .inplace(true)
9191 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9192 }
9193 }
9194
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,prescale)9195 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, prescale) {
9196 TEST_REQUIRES_X86_AVX512F;
9197 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9198 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9199 VUnaryMicrokernelTester()
9200 .batch_size(batch_size)
9201 .prescale(prescale)
9202 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9203 }
9204 }
9205 }
9206
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,alpha)9207 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, alpha) {
9208 TEST_REQUIRES_X86_AVX512F;
9209 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9210 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9211 VUnaryMicrokernelTester()
9212 .batch_size(batch_size)
9213 .alpha(alpha)
9214 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9215 }
9216 }
9217 }
9218
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,beta)9219 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, beta) {
9220 TEST_REQUIRES_X86_AVX512F;
9221 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9222 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9223 VUnaryMicrokernelTester()
9224 .batch_size(batch_size)
9225 .beta(beta)
9226 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9227 }
9228 }
9229 }
9230 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9231
9232
9233 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_eq_96)9234 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_eq_96) {
9235 TEST_REQUIRES_X86_AVX512F;
9236 VUnaryMicrokernelTester()
9237 .batch_size(96)
9238 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9239 }
9240
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_div_96)9241 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_div_96) {
9242 TEST_REQUIRES_X86_AVX512F;
9243 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9244 VUnaryMicrokernelTester()
9245 .batch_size(batch_size)
9246 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9247 }
9248 }
9249
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_lt_96)9250 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_lt_96) {
9251 TEST_REQUIRES_X86_AVX512F;
9252 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9253 VUnaryMicrokernelTester()
9254 .batch_size(batch_size)
9255 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9256 }
9257 }
9258
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_gt_96)9259 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_gt_96) {
9260 TEST_REQUIRES_X86_AVX512F;
9261 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9262 VUnaryMicrokernelTester()
9263 .batch_size(batch_size)
9264 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9265 }
9266 }
9267
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,inplace)9268 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, inplace) {
9269 TEST_REQUIRES_X86_AVX512F;
9270 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9271 VUnaryMicrokernelTester()
9272 .batch_size(batch_size)
9273 .inplace(true)
9274 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9275 }
9276 }
9277
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,prescale)9278 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, prescale) {
9279 TEST_REQUIRES_X86_AVX512F;
9280 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9281 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9282 VUnaryMicrokernelTester()
9283 .batch_size(batch_size)
9284 .prescale(prescale)
9285 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9286 }
9287 }
9288 }
9289
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,alpha)9290 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, alpha) {
9291 TEST_REQUIRES_X86_AVX512F;
9292 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9293 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9294 VUnaryMicrokernelTester()
9295 .batch_size(batch_size)
9296 .alpha(alpha)
9297 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9298 }
9299 }
9300 }
9301
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,beta)9302 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, beta) {
9303 TEST_REQUIRES_X86_AVX512F;
9304 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9305 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9306 VUnaryMicrokernelTester()
9307 .batch_size(batch_size)
9308 .beta(beta)
9309 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9310 }
9311 }
9312 }
9313 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9314
9315
9316 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_eq_112)9317 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_eq_112) {
9318 TEST_REQUIRES_X86_AVX512F;
9319 VUnaryMicrokernelTester()
9320 .batch_size(112)
9321 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9322 }
9323
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_div_112)9324 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_div_112) {
9325 TEST_REQUIRES_X86_AVX512F;
9326 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9327 VUnaryMicrokernelTester()
9328 .batch_size(batch_size)
9329 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9330 }
9331 }
9332
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_lt_112)9333 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_lt_112) {
9334 TEST_REQUIRES_X86_AVX512F;
9335 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
9336 VUnaryMicrokernelTester()
9337 .batch_size(batch_size)
9338 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9339 }
9340 }
9341
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_gt_112)9342 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_gt_112) {
9343 TEST_REQUIRES_X86_AVX512F;
9344 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
9345 VUnaryMicrokernelTester()
9346 .batch_size(batch_size)
9347 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9348 }
9349 }
9350
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,inplace)9351 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, inplace) {
9352 TEST_REQUIRES_X86_AVX512F;
9353 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9354 VUnaryMicrokernelTester()
9355 .batch_size(batch_size)
9356 .inplace(true)
9357 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9358 }
9359 }
9360
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,prescale)9361 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, prescale) {
9362 TEST_REQUIRES_X86_AVX512F;
9363 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9364 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9365 VUnaryMicrokernelTester()
9366 .batch_size(batch_size)
9367 .prescale(prescale)
9368 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9369 }
9370 }
9371 }
9372
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,alpha)9373 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, alpha) {
9374 TEST_REQUIRES_X86_AVX512F;
9375 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9376 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9377 VUnaryMicrokernelTester()
9378 .batch_size(batch_size)
9379 .alpha(alpha)
9380 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9381 }
9382 }
9383 }
9384
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,beta)9385 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, beta) {
9386 TEST_REQUIRES_X86_AVX512F;
9387 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9388 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9389 VUnaryMicrokernelTester()
9390 .batch_size(batch_size)
9391 .beta(beta)
9392 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9393 }
9394 }
9395 }
9396 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9397
9398
9399 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_eq_128)9400 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_eq_128) {
9401 TEST_REQUIRES_X86_AVX512F;
9402 VUnaryMicrokernelTester()
9403 .batch_size(128)
9404 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9405 }
9406
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_div_128)9407 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_div_128) {
9408 TEST_REQUIRES_X86_AVX512F;
9409 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
9410 VUnaryMicrokernelTester()
9411 .batch_size(batch_size)
9412 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9413 }
9414 }
9415
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_lt_128)9416 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_lt_128) {
9417 TEST_REQUIRES_X86_AVX512F;
9418 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
9419 VUnaryMicrokernelTester()
9420 .batch_size(batch_size)
9421 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9422 }
9423 }
9424
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_gt_128)9425 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_gt_128) {
9426 TEST_REQUIRES_X86_AVX512F;
9427 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
9428 VUnaryMicrokernelTester()
9429 .batch_size(batch_size)
9430 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9431 }
9432 }
9433
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,inplace)9434 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, inplace) {
9435 TEST_REQUIRES_X86_AVX512F;
9436 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9437 VUnaryMicrokernelTester()
9438 .batch_size(batch_size)
9439 .inplace(true)
9440 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9441 }
9442 }
9443
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,prescale)9444 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, prescale) {
9445 TEST_REQUIRES_X86_AVX512F;
9446 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9447 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9448 VUnaryMicrokernelTester()
9449 .batch_size(batch_size)
9450 .prescale(prescale)
9451 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9452 }
9453 }
9454 }
9455
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,alpha)9456 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, alpha) {
9457 TEST_REQUIRES_X86_AVX512F;
9458 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9459 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9460 VUnaryMicrokernelTester()
9461 .batch_size(batch_size)
9462 .alpha(alpha)
9463 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9464 }
9465 }
9466 }
9467
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,beta)9468 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, beta) {
9469 TEST_REQUIRES_X86_AVX512F;
9470 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9471 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9472 VUnaryMicrokernelTester()
9473 .batch_size(batch_size)
9474 .beta(beta)
9475 .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9476 }
9477 }
9478 }
9479 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9480
9481
9482 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_eq_16)9483 TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_eq_16) {
9484 TEST_REQUIRES_X86_AVX512F;
9485 VUnaryMicrokernelTester()
9486 .batch_size(16)
9487 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9488 }
9489
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_div_16)9490 TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_div_16) {
9491 TEST_REQUIRES_X86_AVX512F;
9492 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
9493 VUnaryMicrokernelTester()
9494 .batch_size(batch_size)
9495 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9496 }
9497 }
9498
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_lt_16)9499 TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_lt_16) {
9500 TEST_REQUIRES_X86_AVX512F;
9501 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
9502 VUnaryMicrokernelTester()
9503 .batch_size(batch_size)
9504 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9505 }
9506 }
9507
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_gt_16)9508 TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_gt_16) {
9509 TEST_REQUIRES_X86_AVX512F;
9510 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
9511 VUnaryMicrokernelTester()
9512 .batch_size(batch_size)
9513 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9514 }
9515 }
9516
TEST(F32_VELU__AVX512F_RR1_P6_X16,inplace)9517 TEST(F32_VELU__AVX512F_RR1_P6_X16, inplace) {
9518 TEST_REQUIRES_X86_AVX512F;
9519 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9520 VUnaryMicrokernelTester()
9521 .batch_size(batch_size)
9522 .inplace(true)
9523 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9524 }
9525 }
9526
TEST(F32_VELU__AVX512F_RR1_P6_X16,prescale)9527 TEST(F32_VELU__AVX512F_RR1_P6_X16, prescale) {
9528 TEST_REQUIRES_X86_AVX512F;
9529 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9530 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9531 VUnaryMicrokernelTester()
9532 .batch_size(batch_size)
9533 .prescale(prescale)
9534 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9535 }
9536 }
9537 }
9538
TEST(F32_VELU__AVX512F_RR1_P6_X16,alpha)9539 TEST(F32_VELU__AVX512F_RR1_P6_X16, alpha) {
9540 TEST_REQUIRES_X86_AVX512F;
9541 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9542 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9543 VUnaryMicrokernelTester()
9544 .batch_size(batch_size)
9545 .alpha(alpha)
9546 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9547 }
9548 }
9549 }
9550
TEST(F32_VELU__AVX512F_RR1_P6_X16,beta)9551 TEST(F32_VELU__AVX512F_RR1_P6_X16, beta) {
9552 TEST_REQUIRES_X86_AVX512F;
9553 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9554 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9555 VUnaryMicrokernelTester()
9556 .batch_size(batch_size)
9557 .beta(beta)
9558 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9559 }
9560 }
9561 }
9562 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9563
9564
9565 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_eq_32)9566 TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_eq_32) {
9567 TEST_REQUIRES_X86_AVX512F;
9568 VUnaryMicrokernelTester()
9569 .batch_size(32)
9570 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9571 }
9572
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_div_32)9573 TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_div_32) {
9574 TEST_REQUIRES_X86_AVX512F;
9575 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
9576 VUnaryMicrokernelTester()
9577 .batch_size(batch_size)
9578 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9579 }
9580 }
9581
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_lt_32)9582 TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_lt_32) {
9583 TEST_REQUIRES_X86_AVX512F;
9584 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
9585 VUnaryMicrokernelTester()
9586 .batch_size(batch_size)
9587 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9588 }
9589 }
9590
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_gt_32)9591 TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_gt_32) {
9592 TEST_REQUIRES_X86_AVX512F;
9593 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
9594 VUnaryMicrokernelTester()
9595 .batch_size(batch_size)
9596 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9597 }
9598 }
9599
TEST(F32_VELU__AVX512F_RR1_P6_X32,inplace)9600 TEST(F32_VELU__AVX512F_RR1_P6_X32, inplace) {
9601 TEST_REQUIRES_X86_AVX512F;
9602 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9603 VUnaryMicrokernelTester()
9604 .batch_size(batch_size)
9605 .inplace(true)
9606 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9607 }
9608 }
9609
TEST(F32_VELU__AVX512F_RR1_P6_X32,prescale)9610 TEST(F32_VELU__AVX512F_RR1_P6_X32, prescale) {
9611 TEST_REQUIRES_X86_AVX512F;
9612 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9613 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9614 VUnaryMicrokernelTester()
9615 .batch_size(batch_size)
9616 .prescale(prescale)
9617 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9618 }
9619 }
9620 }
9621
TEST(F32_VELU__AVX512F_RR1_P6_X32,alpha)9622 TEST(F32_VELU__AVX512F_RR1_P6_X32, alpha) {
9623 TEST_REQUIRES_X86_AVX512F;
9624 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9625 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9626 VUnaryMicrokernelTester()
9627 .batch_size(batch_size)
9628 .alpha(alpha)
9629 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9630 }
9631 }
9632 }
9633
TEST(F32_VELU__AVX512F_RR1_P6_X32,beta)9634 TEST(F32_VELU__AVX512F_RR1_P6_X32, beta) {
9635 TEST_REQUIRES_X86_AVX512F;
9636 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9637 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9638 VUnaryMicrokernelTester()
9639 .batch_size(batch_size)
9640 .beta(beta)
9641 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9642 }
9643 }
9644 }
9645 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9646
9647
9648 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_eq_48)9649 TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_eq_48) {
9650 TEST_REQUIRES_X86_AVX512F;
9651 VUnaryMicrokernelTester()
9652 .batch_size(48)
9653 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9654 }
9655
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_div_48)9656 TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_div_48) {
9657 TEST_REQUIRES_X86_AVX512F;
9658 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
9659 VUnaryMicrokernelTester()
9660 .batch_size(batch_size)
9661 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9662 }
9663 }
9664
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_lt_48)9665 TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_lt_48) {
9666 TEST_REQUIRES_X86_AVX512F;
9667 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
9668 VUnaryMicrokernelTester()
9669 .batch_size(batch_size)
9670 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9671 }
9672 }
9673
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_gt_48)9674 TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_gt_48) {
9675 TEST_REQUIRES_X86_AVX512F;
9676 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
9677 VUnaryMicrokernelTester()
9678 .batch_size(batch_size)
9679 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9680 }
9681 }
9682
TEST(F32_VELU__AVX512F_RR1_P6_X48,inplace)9683 TEST(F32_VELU__AVX512F_RR1_P6_X48, inplace) {
9684 TEST_REQUIRES_X86_AVX512F;
9685 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9686 VUnaryMicrokernelTester()
9687 .batch_size(batch_size)
9688 .inplace(true)
9689 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9690 }
9691 }
9692
TEST(F32_VELU__AVX512F_RR1_P6_X48,prescale)9693 TEST(F32_VELU__AVX512F_RR1_P6_X48, prescale) {
9694 TEST_REQUIRES_X86_AVX512F;
9695 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9696 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9697 VUnaryMicrokernelTester()
9698 .batch_size(batch_size)
9699 .prescale(prescale)
9700 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9701 }
9702 }
9703 }
9704
TEST(F32_VELU__AVX512F_RR1_P6_X48,alpha)9705 TEST(F32_VELU__AVX512F_RR1_P6_X48, alpha) {
9706 TEST_REQUIRES_X86_AVX512F;
9707 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9708 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9709 VUnaryMicrokernelTester()
9710 .batch_size(batch_size)
9711 .alpha(alpha)
9712 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9713 }
9714 }
9715 }
9716
TEST(F32_VELU__AVX512F_RR1_P6_X48,beta)9717 TEST(F32_VELU__AVX512F_RR1_P6_X48, beta) {
9718 TEST_REQUIRES_X86_AVX512F;
9719 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9720 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9721 VUnaryMicrokernelTester()
9722 .batch_size(batch_size)
9723 .beta(beta)
9724 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9725 }
9726 }
9727 }
9728 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9729
9730
9731 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_eq_64)9732 TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_eq_64) {
9733 TEST_REQUIRES_X86_AVX512F;
9734 VUnaryMicrokernelTester()
9735 .batch_size(64)
9736 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9737 }
9738
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_div_64)9739 TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_div_64) {
9740 TEST_REQUIRES_X86_AVX512F;
9741 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9742 VUnaryMicrokernelTester()
9743 .batch_size(batch_size)
9744 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9745 }
9746 }
9747
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_lt_64)9748 TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_lt_64) {
9749 TEST_REQUIRES_X86_AVX512F;
9750 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9751 VUnaryMicrokernelTester()
9752 .batch_size(batch_size)
9753 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9754 }
9755 }
9756
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_gt_64)9757 TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_gt_64) {
9758 TEST_REQUIRES_X86_AVX512F;
9759 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9760 VUnaryMicrokernelTester()
9761 .batch_size(batch_size)
9762 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9763 }
9764 }
9765
TEST(F32_VELU__AVX512F_RR1_P6_X64,inplace)9766 TEST(F32_VELU__AVX512F_RR1_P6_X64, inplace) {
9767 TEST_REQUIRES_X86_AVX512F;
9768 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9769 VUnaryMicrokernelTester()
9770 .batch_size(batch_size)
9771 .inplace(true)
9772 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9773 }
9774 }
9775
TEST(F32_VELU__AVX512F_RR1_P6_X64,prescale)9776 TEST(F32_VELU__AVX512F_RR1_P6_X64, prescale) {
9777 TEST_REQUIRES_X86_AVX512F;
9778 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9779 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9780 VUnaryMicrokernelTester()
9781 .batch_size(batch_size)
9782 .prescale(prescale)
9783 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9784 }
9785 }
9786 }
9787
TEST(F32_VELU__AVX512F_RR1_P6_X64,alpha)9788 TEST(F32_VELU__AVX512F_RR1_P6_X64, alpha) {
9789 TEST_REQUIRES_X86_AVX512F;
9790 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9791 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9792 VUnaryMicrokernelTester()
9793 .batch_size(batch_size)
9794 .alpha(alpha)
9795 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9796 }
9797 }
9798 }
9799
TEST(F32_VELU__AVX512F_RR1_P6_X64,beta)9800 TEST(F32_VELU__AVX512F_RR1_P6_X64, beta) {
9801 TEST_REQUIRES_X86_AVX512F;
9802 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9803 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9804 VUnaryMicrokernelTester()
9805 .batch_size(batch_size)
9806 .beta(beta)
9807 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9808 }
9809 }
9810 }
9811 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9812
9813
9814 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_eq_80)9815 TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_eq_80) {
9816 TEST_REQUIRES_X86_AVX512F;
9817 VUnaryMicrokernelTester()
9818 .batch_size(80)
9819 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9820 }
9821
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_div_80)9822 TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_div_80) {
9823 TEST_REQUIRES_X86_AVX512F;
9824 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9825 VUnaryMicrokernelTester()
9826 .batch_size(batch_size)
9827 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9828 }
9829 }
9830
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_lt_80)9831 TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_lt_80) {
9832 TEST_REQUIRES_X86_AVX512F;
9833 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9834 VUnaryMicrokernelTester()
9835 .batch_size(batch_size)
9836 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9837 }
9838 }
9839
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_gt_80)9840 TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_gt_80) {
9841 TEST_REQUIRES_X86_AVX512F;
9842 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9843 VUnaryMicrokernelTester()
9844 .batch_size(batch_size)
9845 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9846 }
9847 }
9848
TEST(F32_VELU__AVX512F_RR1_P6_X80,inplace)9849 TEST(F32_VELU__AVX512F_RR1_P6_X80, inplace) {
9850 TEST_REQUIRES_X86_AVX512F;
9851 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9852 VUnaryMicrokernelTester()
9853 .batch_size(batch_size)
9854 .inplace(true)
9855 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9856 }
9857 }
9858
TEST(F32_VELU__AVX512F_RR1_P6_X80,prescale)9859 TEST(F32_VELU__AVX512F_RR1_P6_X80, prescale) {
9860 TEST_REQUIRES_X86_AVX512F;
9861 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9862 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9863 VUnaryMicrokernelTester()
9864 .batch_size(batch_size)
9865 .prescale(prescale)
9866 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9867 }
9868 }
9869 }
9870
TEST(F32_VELU__AVX512F_RR1_P6_X80,alpha)9871 TEST(F32_VELU__AVX512F_RR1_P6_X80, alpha) {
9872 TEST_REQUIRES_X86_AVX512F;
9873 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9874 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9875 VUnaryMicrokernelTester()
9876 .batch_size(batch_size)
9877 .alpha(alpha)
9878 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9879 }
9880 }
9881 }
9882
TEST(F32_VELU__AVX512F_RR1_P6_X80,beta)9883 TEST(F32_VELU__AVX512F_RR1_P6_X80, beta) {
9884 TEST_REQUIRES_X86_AVX512F;
9885 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9886 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9887 VUnaryMicrokernelTester()
9888 .batch_size(batch_size)
9889 .beta(beta)
9890 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9891 }
9892 }
9893 }
9894 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9895
9896
9897 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_eq_96)9898 TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_eq_96) {
9899 TEST_REQUIRES_X86_AVX512F;
9900 VUnaryMicrokernelTester()
9901 .batch_size(96)
9902 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9903 }
9904
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_div_96)9905 TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_div_96) {
9906 TEST_REQUIRES_X86_AVX512F;
9907 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9908 VUnaryMicrokernelTester()
9909 .batch_size(batch_size)
9910 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9911 }
9912 }
9913
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_lt_96)9914 TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_lt_96) {
9915 TEST_REQUIRES_X86_AVX512F;
9916 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9917 VUnaryMicrokernelTester()
9918 .batch_size(batch_size)
9919 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9920 }
9921 }
9922
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_gt_96)9923 TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_gt_96) {
9924 TEST_REQUIRES_X86_AVX512F;
9925 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9926 VUnaryMicrokernelTester()
9927 .batch_size(batch_size)
9928 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9929 }
9930 }
9931
TEST(F32_VELU__AVX512F_RR1_P6_X96,inplace)9932 TEST(F32_VELU__AVX512F_RR1_P6_X96, inplace) {
9933 TEST_REQUIRES_X86_AVX512F;
9934 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9935 VUnaryMicrokernelTester()
9936 .batch_size(batch_size)
9937 .inplace(true)
9938 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9939 }
9940 }
9941
TEST(F32_VELU__AVX512F_RR1_P6_X96,prescale)9942 TEST(F32_VELU__AVX512F_RR1_P6_X96, prescale) {
9943 TEST_REQUIRES_X86_AVX512F;
9944 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9945 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9946 VUnaryMicrokernelTester()
9947 .batch_size(batch_size)
9948 .prescale(prescale)
9949 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9950 }
9951 }
9952 }
9953
TEST(F32_VELU__AVX512F_RR1_P6_X96,alpha)9954 TEST(F32_VELU__AVX512F_RR1_P6_X96, alpha) {
9955 TEST_REQUIRES_X86_AVX512F;
9956 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9957 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9958 VUnaryMicrokernelTester()
9959 .batch_size(batch_size)
9960 .alpha(alpha)
9961 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9962 }
9963 }
9964 }
9965
TEST(F32_VELU__AVX512F_RR1_P6_X96,beta)9966 TEST(F32_VELU__AVX512F_RR1_P6_X96, beta) {
9967 TEST_REQUIRES_X86_AVX512F;
9968 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9969 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9970 VUnaryMicrokernelTester()
9971 .batch_size(batch_size)
9972 .beta(beta)
9973 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9974 }
9975 }
9976 }
9977 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9978
9979
9980 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_eq_112)9981 TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_eq_112) {
9982 TEST_REQUIRES_X86_AVX512F;
9983 VUnaryMicrokernelTester()
9984 .batch_size(112)
9985 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
9986 }
9987
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_div_112)9988 TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_div_112) {
9989 TEST_REQUIRES_X86_AVX512F;
9990 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9991 VUnaryMicrokernelTester()
9992 .batch_size(batch_size)
9993 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
9994 }
9995 }
9996
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_lt_112)9997 TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_lt_112) {
9998 TEST_REQUIRES_X86_AVX512F;
9999 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
10000 VUnaryMicrokernelTester()
10001 .batch_size(batch_size)
10002 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10003 }
10004 }
10005
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_gt_112)10006 TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_gt_112) {
10007 TEST_REQUIRES_X86_AVX512F;
10008 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
10009 VUnaryMicrokernelTester()
10010 .batch_size(batch_size)
10011 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10012 }
10013 }
10014
TEST(F32_VELU__AVX512F_RR1_P6_X112,inplace)10015 TEST(F32_VELU__AVX512F_RR1_P6_X112, inplace) {
10016 TEST_REQUIRES_X86_AVX512F;
10017 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10018 VUnaryMicrokernelTester()
10019 .batch_size(batch_size)
10020 .inplace(true)
10021 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10022 }
10023 }
10024
TEST(F32_VELU__AVX512F_RR1_P6_X112,prescale)10025 TEST(F32_VELU__AVX512F_RR1_P6_X112, prescale) {
10026 TEST_REQUIRES_X86_AVX512F;
10027 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10028 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10029 VUnaryMicrokernelTester()
10030 .batch_size(batch_size)
10031 .prescale(prescale)
10032 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10033 }
10034 }
10035 }
10036
TEST(F32_VELU__AVX512F_RR1_P6_X112,alpha)10037 TEST(F32_VELU__AVX512F_RR1_P6_X112, alpha) {
10038 TEST_REQUIRES_X86_AVX512F;
10039 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10040 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10041 VUnaryMicrokernelTester()
10042 .batch_size(batch_size)
10043 .alpha(alpha)
10044 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10045 }
10046 }
10047 }
10048
TEST(F32_VELU__AVX512F_RR1_P6_X112,beta)10049 TEST(F32_VELU__AVX512F_RR1_P6_X112, beta) {
10050 TEST_REQUIRES_X86_AVX512F;
10051 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10052 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10053 VUnaryMicrokernelTester()
10054 .batch_size(batch_size)
10055 .beta(beta)
10056 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10057 }
10058 }
10059 }
10060 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10061
10062
10063 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_eq_128)10064 TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_eq_128) {
10065 TEST_REQUIRES_X86_AVX512F;
10066 VUnaryMicrokernelTester()
10067 .batch_size(128)
10068 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10069 }
10070
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_div_128)10071 TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_div_128) {
10072 TEST_REQUIRES_X86_AVX512F;
10073 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
10074 VUnaryMicrokernelTester()
10075 .batch_size(batch_size)
10076 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10077 }
10078 }
10079
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_lt_128)10080 TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_lt_128) {
10081 TEST_REQUIRES_X86_AVX512F;
10082 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
10083 VUnaryMicrokernelTester()
10084 .batch_size(batch_size)
10085 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10086 }
10087 }
10088
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_gt_128)10089 TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_gt_128) {
10090 TEST_REQUIRES_X86_AVX512F;
10091 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
10092 VUnaryMicrokernelTester()
10093 .batch_size(batch_size)
10094 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10095 }
10096 }
10097
TEST(F32_VELU__AVX512F_RR1_P6_X128,inplace)10098 TEST(F32_VELU__AVX512F_RR1_P6_X128, inplace) {
10099 TEST_REQUIRES_X86_AVX512F;
10100 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10101 VUnaryMicrokernelTester()
10102 .batch_size(batch_size)
10103 .inplace(true)
10104 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10105 }
10106 }
10107
TEST(F32_VELU__AVX512F_RR1_P6_X128,prescale)10108 TEST(F32_VELU__AVX512F_RR1_P6_X128, prescale) {
10109 TEST_REQUIRES_X86_AVX512F;
10110 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10111 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10112 VUnaryMicrokernelTester()
10113 .batch_size(batch_size)
10114 .prescale(prescale)
10115 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10116 }
10117 }
10118 }
10119
TEST(F32_VELU__AVX512F_RR1_P6_X128,alpha)10120 TEST(F32_VELU__AVX512F_RR1_P6_X128, alpha) {
10121 TEST_REQUIRES_X86_AVX512F;
10122 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10123 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10124 VUnaryMicrokernelTester()
10125 .batch_size(batch_size)
10126 .alpha(alpha)
10127 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10128 }
10129 }
10130 }
10131
TEST(F32_VELU__AVX512F_RR1_P6_X128,beta)10132 TEST(F32_VELU__AVX512F_RR1_P6_X128, beta) {
10133 TEST_REQUIRES_X86_AVX512F;
10134 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10135 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10136 VUnaryMicrokernelTester()
10137 .batch_size(batch_size)
10138 .beta(beta)
10139 .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10140 }
10141 }
10142 }
10143 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10144
10145
10146 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_eq_4)10147 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_eq_4) {
10148 VUnaryMicrokernelTester()
10149 .batch_size(4)
10150 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10151 }
10152
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_div_4)10153 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_div_4) {
10154 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10155 VUnaryMicrokernelTester()
10156 .batch_size(batch_size)
10157 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10158 }
10159 }
10160
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_lt_4)10161 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_lt_4) {
10162 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10163 VUnaryMicrokernelTester()
10164 .batch_size(batch_size)
10165 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10166 }
10167 }
10168
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_gt_4)10169 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_gt_4) {
10170 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10171 VUnaryMicrokernelTester()
10172 .batch_size(batch_size)
10173 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10174 }
10175 }
10176
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,inplace)10177 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, inplace) {
10178 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10179 VUnaryMicrokernelTester()
10180 .batch_size(batch_size)
10181 .inplace(true)
10182 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10183 }
10184 }
10185
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,prescale)10186 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, prescale) {
10187 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10188 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10189 VUnaryMicrokernelTester()
10190 .batch_size(batch_size)
10191 .prescale(prescale)
10192 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10193 }
10194 }
10195 }
10196
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,alpha)10197 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, alpha) {
10198 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10199 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10200 VUnaryMicrokernelTester()
10201 .batch_size(batch_size)
10202 .alpha(alpha)
10203 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10204 }
10205 }
10206 }
10207
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,beta)10208 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, beta) {
10209 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10210 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10211 VUnaryMicrokernelTester()
10212 .batch_size(batch_size)
10213 .beta(beta)
10214 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10215 }
10216 }
10217 }
10218 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10219
10220
10221 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_eq_8)10222 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_eq_8) {
10223 VUnaryMicrokernelTester()
10224 .batch_size(8)
10225 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10226 }
10227
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_div_8)10228 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_div_8) {
10229 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
10230 VUnaryMicrokernelTester()
10231 .batch_size(batch_size)
10232 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10233 }
10234 }
10235
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_lt_8)10236 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_lt_8) {
10237 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
10238 VUnaryMicrokernelTester()
10239 .batch_size(batch_size)
10240 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10241 }
10242 }
10243
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_gt_8)10244 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_gt_8) {
10245 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
10246 VUnaryMicrokernelTester()
10247 .batch_size(batch_size)
10248 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10249 }
10250 }
10251
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,inplace)10252 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, inplace) {
10253 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10254 VUnaryMicrokernelTester()
10255 .batch_size(batch_size)
10256 .inplace(true)
10257 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10258 }
10259 }
10260
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,prescale)10261 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, prescale) {
10262 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10263 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10264 VUnaryMicrokernelTester()
10265 .batch_size(batch_size)
10266 .prescale(prescale)
10267 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10268 }
10269 }
10270 }
10271
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,alpha)10272 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, alpha) {
10273 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10274 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10275 VUnaryMicrokernelTester()
10276 .batch_size(batch_size)
10277 .alpha(alpha)
10278 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10279 }
10280 }
10281 }
10282
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,beta)10283 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, beta) {
10284 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10285 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10286 VUnaryMicrokernelTester()
10287 .batch_size(batch_size)
10288 .beta(beta)
10289 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10290 }
10291 }
10292 }
10293 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10294
10295
10296 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_eq_12)10297 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_eq_12) {
10298 VUnaryMicrokernelTester()
10299 .batch_size(12)
10300 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10301 }
10302
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_div_12)10303 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_div_12) {
10304 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
10305 VUnaryMicrokernelTester()
10306 .batch_size(batch_size)
10307 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10308 }
10309 }
10310
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_lt_12)10311 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_lt_12) {
10312 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
10313 VUnaryMicrokernelTester()
10314 .batch_size(batch_size)
10315 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10316 }
10317 }
10318
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_gt_12)10319 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_gt_12) {
10320 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
10321 VUnaryMicrokernelTester()
10322 .batch_size(batch_size)
10323 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10324 }
10325 }
10326
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,inplace)10327 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, inplace) {
10328 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10329 VUnaryMicrokernelTester()
10330 .batch_size(batch_size)
10331 .inplace(true)
10332 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10333 }
10334 }
10335
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,prescale)10336 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, prescale) {
10337 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10338 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10339 VUnaryMicrokernelTester()
10340 .batch_size(batch_size)
10341 .prescale(prescale)
10342 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10343 }
10344 }
10345 }
10346
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,alpha)10347 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, alpha) {
10348 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10349 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10350 VUnaryMicrokernelTester()
10351 .batch_size(batch_size)
10352 .alpha(alpha)
10353 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10354 }
10355 }
10356 }
10357
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,beta)10358 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, beta) {
10359 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10360 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10361 VUnaryMicrokernelTester()
10362 .batch_size(batch_size)
10363 .beta(beta)
10364 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10365 }
10366 }
10367 }
10368 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10369
10370
10371 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_eq_16)10372 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_eq_16) {
10373 VUnaryMicrokernelTester()
10374 .batch_size(16)
10375 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10376 }
10377
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_div_16)10378 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_div_16) {
10379 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
10380 VUnaryMicrokernelTester()
10381 .batch_size(batch_size)
10382 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10383 }
10384 }
10385
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_lt_16)10386 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_lt_16) {
10387 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
10388 VUnaryMicrokernelTester()
10389 .batch_size(batch_size)
10390 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10391 }
10392 }
10393
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_gt_16)10394 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_gt_16) {
10395 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
10396 VUnaryMicrokernelTester()
10397 .batch_size(batch_size)
10398 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10399 }
10400 }
10401
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,inplace)10402 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, inplace) {
10403 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10404 VUnaryMicrokernelTester()
10405 .batch_size(batch_size)
10406 .inplace(true)
10407 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10408 }
10409 }
10410
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,prescale)10411 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, prescale) {
10412 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10413 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10414 VUnaryMicrokernelTester()
10415 .batch_size(batch_size)
10416 .prescale(prescale)
10417 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10418 }
10419 }
10420 }
10421
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,alpha)10422 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, alpha) {
10423 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10424 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10425 VUnaryMicrokernelTester()
10426 .batch_size(batch_size)
10427 .alpha(alpha)
10428 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10429 }
10430 }
10431 }
10432
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,beta)10433 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, beta) {
10434 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10435 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10436 VUnaryMicrokernelTester()
10437 .batch_size(batch_size)
10438 .beta(beta)
10439 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10440 }
10441 }
10442 }
10443 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10444
10445
10446 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_eq_20)10447 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_eq_20) {
10448 VUnaryMicrokernelTester()
10449 .batch_size(20)
10450 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10451 }
10452
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_div_20)10453 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_div_20) {
10454 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
10455 VUnaryMicrokernelTester()
10456 .batch_size(batch_size)
10457 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10458 }
10459 }
10460
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_lt_20)10461 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_lt_20) {
10462 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
10463 VUnaryMicrokernelTester()
10464 .batch_size(batch_size)
10465 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10466 }
10467 }
10468
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_gt_20)10469 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_gt_20) {
10470 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
10471 VUnaryMicrokernelTester()
10472 .batch_size(batch_size)
10473 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10474 }
10475 }
10476
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,inplace)10477 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, inplace) {
10478 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10479 VUnaryMicrokernelTester()
10480 .batch_size(batch_size)
10481 .inplace(true)
10482 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10483 }
10484 }
10485
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,prescale)10486 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, prescale) {
10487 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10488 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10489 VUnaryMicrokernelTester()
10490 .batch_size(batch_size)
10491 .prescale(prescale)
10492 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10493 }
10494 }
10495 }
10496
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,alpha)10497 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, alpha) {
10498 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10499 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10500 VUnaryMicrokernelTester()
10501 .batch_size(batch_size)
10502 .alpha(alpha)
10503 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10504 }
10505 }
10506 }
10507
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,beta)10508 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, beta) {
10509 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10510 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10511 VUnaryMicrokernelTester()
10512 .batch_size(batch_size)
10513 .beta(beta)
10514 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10515 }
10516 }
10517 }
10518 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10519
10520
10521 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_eq_24)10522 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_eq_24) {
10523 VUnaryMicrokernelTester()
10524 .batch_size(24)
10525 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10526 }
10527
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_div_24)10528 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_div_24) {
10529 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
10530 VUnaryMicrokernelTester()
10531 .batch_size(batch_size)
10532 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10533 }
10534 }
10535
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_lt_24)10536 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_lt_24) {
10537 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
10538 VUnaryMicrokernelTester()
10539 .batch_size(batch_size)
10540 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10541 }
10542 }
10543
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_gt_24)10544 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_gt_24) {
10545 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
10546 VUnaryMicrokernelTester()
10547 .batch_size(batch_size)
10548 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10549 }
10550 }
10551
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,inplace)10552 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, inplace) {
10553 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10554 VUnaryMicrokernelTester()
10555 .batch_size(batch_size)
10556 .inplace(true)
10557 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10558 }
10559 }
10560
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,prescale)10561 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, prescale) {
10562 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10563 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10564 VUnaryMicrokernelTester()
10565 .batch_size(batch_size)
10566 .prescale(prescale)
10567 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10568 }
10569 }
10570 }
10571
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,alpha)10572 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, alpha) {
10573 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10574 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10575 VUnaryMicrokernelTester()
10576 .batch_size(batch_size)
10577 .alpha(alpha)
10578 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10579 }
10580 }
10581 }
10582
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,beta)10583 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, beta) {
10584 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10585 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10586 VUnaryMicrokernelTester()
10587 .batch_size(batch_size)
10588 .beta(beta)
10589 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10590 }
10591 }
10592 }
10593 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10594
10595
10596 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_eq_4)10597 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_eq_4) {
10598 VUnaryMicrokernelTester()
10599 .batch_size(4)
10600 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10601 }
10602
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_div_4)10603 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_div_4) {
10604 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10605 VUnaryMicrokernelTester()
10606 .batch_size(batch_size)
10607 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10608 }
10609 }
10610
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_lt_4)10611 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_lt_4) {
10612 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10613 VUnaryMicrokernelTester()
10614 .batch_size(batch_size)
10615 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10616 }
10617 }
10618
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_gt_4)10619 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_gt_4) {
10620 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10621 VUnaryMicrokernelTester()
10622 .batch_size(batch_size)
10623 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10624 }
10625 }
10626
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,inplace)10627 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, inplace) {
10628 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10629 VUnaryMicrokernelTester()
10630 .batch_size(batch_size)
10631 .inplace(true)
10632 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10633 }
10634 }
10635
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,prescale)10636 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, prescale) {
10637 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10638 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10639 VUnaryMicrokernelTester()
10640 .batch_size(batch_size)
10641 .prescale(prescale)
10642 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10643 }
10644 }
10645 }
10646
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,alpha)10647 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, alpha) {
10648 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10649 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10650 VUnaryMicrokernelTester()
10651 .batch_size(batch_size)
10652 .alpha(alpha)
10653 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10654 }
10655 }
10656 }
10657
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,beta)10658 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, beta) {
10659 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10660 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10661 VUnaryMicrokernelTester()
10662 .batch_size(batch_size)
10663 .beta(beta)
10664 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10665 }
10666 }
10667 }
10668 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10669
10670
10671 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_eq_8)10672 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_eq_8) {
10673 VUnaryMicrokernelTester()
10674 .batch_size(8)
10675 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10676 }
10677
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_div_8)10678 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_div_8) {
10679 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
10680 VUnaryMicrokernelTester()
10681 .batch_size(batch_size)
10682 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10683 }
10684 }
10685
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_lt_8)10686 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_lt_8) {
10687 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
10688 VUnaryMicrokernelTester()
10689 .batch_size(batch_size)
10690 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10691 }
10692 }
10693
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_gt_8)10694 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_gt_8) {
10695 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
10696 VUnaryMicrokernelTester()
10697 .batch_size(batch_size)
10698 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10699 }
10700 }
10701
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,inplace)10702 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, inplace) {
10703 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10704 VUnaryMicrokernelTester()
10705 .batch_size(batch_size)
10706 .inplace(true)
10707 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10708 }
10709 }
10710
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,prescale)10711 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, prescale) {
10712 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10713 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10714 VUnaryMicrokernelTester()
10715 .batch_size(batch_size)
10716 .prescale(prescale)
10717 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10718 }
10719 }
10720 }
10721
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,alpha)10722 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, alpha) {
10723 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10724 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10725 VUnaryMicrokernelTester()
10726 .batch_size(batch_size)
10727 .alpha(alpha)
10728 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10729 }
10730 }
10731 }
10732
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,beta)10733 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, beta) {
10734 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10735 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10736 VUnaryMicrokernelTester()
10737 .batch_size(batch_size)
10738 .beta(beta)
10739 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10740 }
10741 }
10742 }
10743 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10744
10745
10746 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_eq_12)10747 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_eq_12) {
10748 VUnaryMicrokernelTester()
10749 .batch_size(12)
10750 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10751 }
10752
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_div_12)10753 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_div_12) {
10754 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
10755 VUnaryMicrokernelTester()
10756 .batch_size(batch_size)
10757 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10758 }
10759 }
10760
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_lt_12)10761 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_lt_12) {
10762 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
10763 VUnaryMicrokernelTester()
10764 .batch_size(batch_size)
10765 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10766 }
10767 }
10768
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_gt_12)10769 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_gt_12) {
10770 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
10771 VUnaryMicrokernelTester()
10772 .batch_size(batch_size)
10773 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10774 }
10775 }
10776
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,inplace)10777 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, inplace) {
10778 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10779 VUnaryMicrokernelTester()
10780 .batch_size(batch_size)
10781 .inplace(true)
10782 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10783 }
10784 }
10785
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,prescale)10786 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, prescale) {
10787 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10788 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10789 VUnaryMicrokernelTester()
10790 .batch_size(batch_size)
10791 .prescale(prescale)
10792 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10793 }
10794 }
10795 }
10796
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,alpha)10797 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, alpha) {
10798 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10799 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10800 VUnaryMicrokernelTester()
10801 .batch_size(batch_size)
10802 .alpha(alpha)
10803 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10804 }
10805 }
10806 }
10807
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,beta)10808 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, beta) {
10809 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10810 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10811 VUnaryMicrokernelTester()
10812 .batch_size(batch_size)
10813 .beta(beta)
10814 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10815 }
10816 }
10817 }
10818 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10819
10820
10821 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_eq_16)10822 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_eq_16) {
10823 VUnaryMicrokernelTester()
10824 .batch_size(16)
10825 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10826 }
10827
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_div_16)10828 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_div_16) {
10829 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
10830 VUnaryMicrokernelTester()
10831 .batch_size(batch_size)
10832 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10833 }
10834 }
10835
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_lt_16)10836 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_lt_16) {
10837 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
10838 VUnaryMicrokernelTester()
10839 .batch_size(batch_size)
10840 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10841 }
10842 }
10843
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_gt_16)10844 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_gt_16) {
10845 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
10846 VUnaryMicrokernelTester()
10847 .batch_size(batch_size)
10848 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10849 }
10850 }
10851
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,inplace)10852 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, inplace) {
10853 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10854 VUnaryMicrokernelTester()
10855 .batch_size(batch_size)
10856 .inplace(true)
10857 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10858 }
10859 }
10860
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,prescale)10861 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, prescale) {
10862 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10863 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10864 VUnaryMicrokernelTester()
10865 .batch_size(batch_size)
10866 .prescale(prescale)
10867 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10868 }
10869 }
10870 }
10871
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,alpha)10872 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, alpha) {
10873 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10874 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10875 VUnaryMicrokernelTester()
10876 .batch_size(batch_size)
10877 .alpha(alpha)
10878 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10879 }
10880 }
10881 }
10882
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,beta)10883 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, beta) {
10884 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10885 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10886 VUnaryMicrokernelTester()
10887 .batch_size(batch_size)
10888 .beta(beta)
10889 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10890 }
10891 }
10892 }
10893 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10894
10895
10896 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_eq_20)10897 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_eq_20) {
10898 VUnaryMicrokernelTester()
10899 .batch_size(20)
10900 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10901 }
10902
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_div_20)10903 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_div_20) {
10904 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
10905 VUnaryMicrokernelTester()
10906 .batch_size(batch_size)
10907 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10908 }
10909 }
10910
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_lt_20)10911 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_lt_20) {
10912 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
10913 VUnaryMicrokernelTester()
10914 .batch_size(batch_size)
10915 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10916 }
10917 }
10918
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_gt_20)10919 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_gt_20) {
10920 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
10921 VUnaryMicrokernelTester()
10922 .batch_size(batch_size)
10923 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10924 }
10925 }
10926
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,inplace)10927 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, inplace) {
10928 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10929 VUnaryMicrokernelTester()
10930 .batch_size(batch_size)
10931 .inplace(true)
10932 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10933 }
10934 }
10935
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,prescale)10936 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, prescale) {
10937 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10938 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10939 VUnaryMicrokernelTester()
10940 .batch_size(batch_size)
10941 .prescale(prescale)
10942 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10943 }
10944 }
10945 }
10946
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,alpha)10947 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, alpha) {
10948 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10949 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10950 VUnaryMicrokernelTester()
10951 .batch_size(batch_size)
10952 .alpha(alpha)
10953 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10954 }
10955 }
10956 }
10957
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,beta)10958 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, beta) {
10959 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10960 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10961 VUnaryMicrokernelTester()
10962 .batch_size(batch_size)
10963 .beta(beta)
10964 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10965 }
10966 }
10967 }
10968 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10969
10970
10971 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_eq_24)10972 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_eq_24) {
10973 VUnaryMicrokernelTester()
10974 .batch_size(24)
10975 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10976 }
10977
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_div_24)10978 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_div_24) {
10979 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
10980 VUnaryMicrokernelTester()
10981 .batch_size(batch_size)
10982 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10983 }
10984 }
10985
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_lt_24)10986 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_lt_24) {
10987 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
10988 VUnaryMicrokernelTester()
10989 .batch_size(batch_size)
10990 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10991 }
10992 }
10993
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_gt_24)10994 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_gt_24) {
10995 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
10996 VUnaryMicrokernelTester()
10997 .batch_size(batch_size)
10998 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10999 }
11000 }
11001
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,inplace)11002 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, inplace) {
11003 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11004 VUnaryMicrokernelTester()
11005 .batch_size(batch_size)
11006 .inplace(true)
11007 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11008 }
11009 }
11010
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,prescale)11011 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, prescale) {
11012 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11013 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11014 VUnaryMicrokernelTester()
11015 .batch_size(batch_size)
11016 .prescale(prescale)
11017 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11018 }
11019 }
11020 }
11021
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,alpha)11022 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, alpha) {
11023 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11024 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11025 VUnaryMicrokernelTester()
11026 .batch_size(batch_size)
11027 .alpha(alpha)
11028 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11029 }
11030 }
11031 }
11032
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,beta)11033 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, beta) {
11034 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11035 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11036 VUnaryMicrokernelTester()
11037 .batch_size(batch_size)
11038 .beta(beta)
11039 .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11040 }
11041 }
11042 }
11043 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11044
11045
11046 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_eq_4)11047 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_eq_4) {
11048 VUnaryMicrokernelTester()
11049 .batch_size(4)
11050 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11051 }
11052
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_div_4)11053 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_div_4) {
11054 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
11055 VUnaryMicrokernelTester()
11056 .batch_size(batch_size)
11057 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11058 }
11059 }
11060
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_lt_4)11061 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_lt_4) {
11062 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
11063 VUnaryMicrokernelTester()
11064 .batch_size(batch_size)
11065 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11066 }
11067 }
11068
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_gt_4)11069 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_gt_4) {
11070 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
11071 VUnaryMicrokernelTester()
11072 .batch_size(batch_size)
11073 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11074 }
11075 }
11076
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,inplace)11077 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, inplace) {
11078 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11079 VUnaryMicrokernelTester()
11080 .batch_size(batch_size)
11081 .inplace(true)
11082 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11083 }
11084 }
11085
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,prescale)11086 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, prescale) {
11087 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11088 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11089 VUnaryMicrokernelTester()
11090 .batch_size(batch_size)
11091 .prescale(prescale)
11092 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11093 }
11094 }
11095 }
11096
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,alpha)11097 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, alpha) {
11098 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11099 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11100 VUnaryMicrokernelTester()
11101 .batch_size(batch_size)
11102 .alpha(alpha)
11103 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11104 }
11105 }
11106 }
11107
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,beta)11108 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, beta) {
11109 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11110 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11111 VUnaryMicrokernelTester()
11112 .batch_size(batch_size)
11113 .beta(beta)
11114 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11115 }
11116 }
11117 }
11118 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11119
11120
11121 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_eq_8)11122 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_eq_8) {
11123 VUnaryMicrokernelTester()
11124 .batch_size(8)
11125 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11126 }
11127
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_div_8)11128 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_div_8) {
11129 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
11130 VUnaryMicrokernelTester()
11131 .batch_size(batch_size)
11132 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11133 }
11134 }
11135
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_lt_8)11136 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_lt_8) {
11137 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
11138 VUnaryMicrokernelTester()
11139 .batch_size(batch_size)
11140 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11141 }
11142 }
11143
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_gt_8)11144 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_gt_8) {
11145 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
11146 VUnaryMicrokernelTester()
11147 .batch_size(batch_size)
11148 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11149 }
11150 }
11151
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,inplace)11152 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, inplace) {
11153 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11154 VUnaryMicrokernelTester()
11155 .batch_size(batch_size)
11156 .inplace(true)
11157 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11158 }
11159 }
11160
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,prescale)11161 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, prescale) {
11162 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11163 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11164 VUnaryMicrokernelTester()
11165 .batch_size(batch_size)
11166 .prescale(prescale)
11167 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11168 }
11169 }
11170 }
11171
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,alpha)11172 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, alpha) {
11173 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11174 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11175 VUnaryMicrokernelTester()
11176 .batch_size(batch_size)
11177 .alpha(alpha)
11178 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11179 }
11180 }
11181 }
11182
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,beta)11183 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, beta) {
11184 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11185 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11186 VUnaryMicrokernelTester()
11187 .batch_size(batch_size)
11188 .beta(beta)
11189 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11190 }
11191 }
11192 }
11193 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11194
11195
11196 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_eq_12)11197 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_eq_12) {
11198 VUnaryMicrokernelTester()
11199 .batch_size(12)
11200 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11201 }
11202
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_div_12)11203 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_div_12) {
11204 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
11205 VUnaryMicrokernelTester()
11206 .batch_size(batch_size)
11207 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11208 }
11209 }
11210
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_lt_12)11211 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_lt_12) {
11212 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
11213 VUnaryMicrokernelTester()
11214 .batch_size(batch_size)
11215 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11216 }
11217 }
11218
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_gt_12)11219 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_gt_12) {
11220 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
11221 VUnaryMicrokernelTester()
11222 .batch_size(batch_size)
11223 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11224 }
11225 }
11226
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,inplace)11227 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, inplace) {
11228 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11229 VUnaryMicrokernelTester()
11230 .batch_size(batch_size)
11231 .inplace(true)
11232 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11233 }
11234 }
11235
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,prescale)11236 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, prescale) {
11237 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11238 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11239 VUnaryMicrokernelTester()
11240 .batch_size(batch_size)
11241 .prescale(prescale)
11242 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11243 }
11244 }
11245 }
11246
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,alpha)11247 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, alpha) {
11248 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11249 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11250 VUnaryMicrokernelTester()
11251 .batch_size(batch_size)
11252 .alpha(alpha)
11253 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11254 }
11255 }
11256 }
11257
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,beta)11258 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, beta) {
11259 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11260 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11261 VUnaryMicrokernelTester()
11262 .batch_size(batch_size)
11263 .beta(beta)
11264 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11265 }
11266 }
11267 }
11268 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11269
11270
11271 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_eq_16)11272 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_eq_16) {
11273 VUnaryMicrokernelTester()
11274 .batch_size(16)
11275 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11276 }
11277
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_div_16)11278 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_div_16) {
11279 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
11280 VUnaryMicrokernelTester()
11281 .batch_size(batch_size)
11282 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11283 }
11284 }
11285
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_lt_16)11286 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_lt_16) {
11287 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
11288 VUnaryMicrokernelTester()
11289 .batch_size(batch_size)
11290 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11291 }
11292 }
11293
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_gt_16)11294 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_gt_16) {
11295 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
11296 VUnaryMicrokernelTester()
11297 .batch_size(batch_size)
11298 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11299 }
11300 }
11301
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,inplace)11302 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, inplace) {
11303 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11304 VUnaryMicrokernelTester()
11305 .batch_size(batch_size)
11306 .inplace(true)
11307 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11308 }
11309 }
11310
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,prescale)11311 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, prescale) {
11312 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11313 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11314 VUnaryMicrokernelTester()
11315 .batch_size(batch_size)
11316 .prescale(prescale)
11317 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11318 }
11319 }
11320 }
11321
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,alpha)11322 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, alpha) {
11323 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11324 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11325 VUnaryMicrokernelTester()
11326 .batch_size(batch_size)
11327 .alpha(alpha)
11328 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11329 }
11330 }
11331 }
11332
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,beta)11333 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, beta) {
11334 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11335 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11336 VUnaryMicrokernelTester()
11337 .batch_size(batch_size)
11338 .beta(beta)
11339 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11340 }
11341 }
11342 }
11343 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11344
11345
11346 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_eq_20)11347 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_eq_20) {
11348 VUnaryMicrokernelTester()
11349 .batch_size(20)
11350 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11351 }
11352
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_div_20)11353 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_div_20) {
11354 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
11355 VUnaryMicrokernelTester()
11356 .batch_size(batch_size)
11357 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11358 }
11359 }
11360
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_lt_20)11361 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_lt_20) {
11362 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
11363 VUnaryMicrokernelTester()
11364 .batch_size(batch_size)
11365 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11366 }
11367 }
11368
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_gt_20)11369 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_gt_20) {
11370 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
11371 VUnaryMicrokernelTester()
11372 .batch_size(batch_size)
11373 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11374 }
11375 }
11376
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,inplace)11377 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, inplace) {
11378 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11379 VUnaryMicrokernelTester()
11380 .batch_size(batch_size)
11381 .inplace(true)
11382 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11383 }
11384 }
11385
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,prescale)11386 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, prescale) {
11387 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11388 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11389 VUnaryMicrokernelTester()
11390 .batch_size(batch_size)
11391 .prescale(prescale)
11392 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11393 }
11394 }
11395 }
11396
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,alpha)11397 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, alpha) {
11398 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11399 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11400 VUnaryMicrokernelTester()
11401 .batch_size(batch_size)
11402 .alpha(alpha)
11403 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11404 }
11405 }
11406 }
11407
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,beta)11408 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, beta) {
11409 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11410 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11411 VUnaryMicrokernelTester()
11412 .batch_size(batch_size)
11413 .beta(beta)
11414 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11415 }
11416 }
11417 }
11418 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11419
11420
11421 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_eq_24)11422 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_eq_24) {
11423 VUnaryMicrokernelTester()
11424 .batch_size(24)
11425 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11426 }
11427
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_div_24)11428 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_div_24) {
11429 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
11430 VUnaryMicrokernelTester()
11431 .batch_size(batch_size)
11432 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11433 }
11434 }
11435
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_lt_24)11436 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_lt_24) {
11437 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
11438 VUnaryMicrokernelTester()
11439 .batch_size(batch_size)
11440 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11441 }
11442 }
11443
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_gt_24)11444 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_gt_24) {
11445 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
11446 VUnaryMicrokernelTester()
11447 .batch_size(batch_size)
11448 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11449 }
11450 }
11451
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,inplace)11452 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, inplace) {
11453 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11454 VUnaryMicrokernelTester()
11455 .batch_size(batch_size)
11456 .inplace(true)
11457 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11458 }
11459 }
11460
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,prescale)11461 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, prescale) {
11462 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11463 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11464 VUnaryMicrokernelTester()
11465 .batch_size(batch_size)
11466 .prescale(prescale)
11467 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11468 }
11469 }
11470 }
11471
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,alpha)11472 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, alpha) {
11473 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11474 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11475 VUnaryMicrokernelTester()
11476 .batch_size(batch_size)
11477 .alpha(alpha)
11478 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11479 }
11480 }
11481 }
11482
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,beta)11483 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, beta) {
11484 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11485 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11486 VUnaryMicrokernelTester()
11487 .batch_size(batch_size)
11488 .beta(beta)
11489 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11490 }
11491 }
11492 }
11493 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11494
11495
11496 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_eq_4)11497 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_eq_4) {
11498 VUnaryMicrokernelTester()
11499 .batch_size(4)
11500 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11501 }
11502
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_div_4)11503 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_div_4) {
11504 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
11505 VUnaryMicrokernelTester()
11506 .batch_size(batch_size)
11507 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11508 }
11509 }
11510
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_lt_4)11511 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_lt_4) {
11512 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
11513 VUnaryMicrokernelTester()
11514 .batch_size(batch_size)
11515 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11516 }
11517 }
11518
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_gt_4)11519 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_gt_4) {
11520 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
11521 VUnaryMicrokernelTester()
11522 .batch_size(batch_size)
11523 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11524 }
11525 }
11526
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,inplace)11527 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, inplace) {
11528 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11529 VUnaryMicrokernelTester()
11530 .batch_size(batch_size)
11531 .inplace(true)
11532 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11533 }
11534 }
11535
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,prescale)11536 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, prescale) {
11537 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11538 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11539 VUnaryMicrokernelTester()
11540 .batch_size(batch_size)
11541 .prescale(prescale)
11542 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11543 }
11544 }
11545 }
11546
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,alpha)11547 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, alpha) {
11548 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11549 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11550 VUnaryMicrokernelTester()
11551 .batch_size(batch_size)
11552 .alpha(alpha)
11553 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11554 }
11555 }
11556 }
11557
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,beta)11558 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, beta) {
11559 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11560 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11561 VUnaryMicrokernelTester()
11562 .batch_size(batch_size)
11563 .beta(beta)
11564 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11565 }
11566 }
11567 }
11568 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11569
11570
11571 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_eq_8)11572 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_eq_8) {
11573 VUnaryMicrokernelTester()
11574 .batch_size(8)
11575 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11576 }
11577
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_div_8)11578 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_div_8) {
11579 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
11580 VUnaryMicrokernelTester()
11581 .batch_size(batch_size)
11582 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11583 }
11584 }
11585
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_lt_8)11586 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_lt_8) {
11587 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
11588 VUnaryMicrokernelTester()
11589 .batch_size(batch_size)
11590 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11591 }
11592 }
11593
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_gt_8)11594 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_gt_8) {
11595 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
11596 VUnaryMicrokernelTester()
11597 .batch_size(batch_size)
11598 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11599 }
11600 }
11601
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,inplace)11602 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, inplace) {
11603 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11604 VUnaryMicrokernelTester()
11605 .batch_size(batch_size)
11606 .inplace(true)
11607 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11608 }
11609 }
11610
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,prescale)11611 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, prescale) {
11612 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11613 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11614 VUnaryMicrokernelTester()
11615 .batch_size(batch_size)
11616 .prescale(prescale)
11617 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11618 }
11619 }
11620 }
11621
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,alpha)11622 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, alpha) {
11623 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11624 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11625 VUnaryMicrokernelTester()
11626 .batch_size(batch_size)
11627 .alpha(alpha)
11628 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11629 }
11630 }
11631 }
11632
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,beta)11633 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, beta) {
11634 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11635 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11636 VUnaryMicrokernelTester()
11637 .batch_size(batch_size)
11638 .beta(beta)
11639 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11640 }
11641 }
11642 }
11643 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11644
11645
11646 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_eq_12)11647 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_eq_12) {
11648 VUnaryMicrokernelTester()
11649 .batch_size(12)
11650 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11651 }
11652
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_div_12)11653 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_div_12) {
11654 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
11655 VUnaryMicrokernelTester()
11656 .batch_size(batch_size)
11657 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11658 }
11659 }
11660
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_lt_12)11661 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_lt_12) {
11662 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
11663 VUnaryMicrokernelTester()
11664 .batch_size(batch_size)
11665 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11666 }
11667 }
11668
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_gt_12)11669 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_gt_12) {
11670 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
11671 VUnaryMicrokernelTester()
11672 .batch_size(batch_size)
11673 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11674 }
11675 }
11676
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,inplace)11677 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, inplace) {
11678 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11679 VUnaryMicrokernelTester()
11680 .batch_size(batch_size)
11681 .inplace(true)
11682 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11683 }
11684 }
11685
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,prescale)11686 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, prescale) {
11687 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11688 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11689 VUnaryMicrokernelTester()
11690 .batch_size(batch_size)
11691 .prescale(prescale)
11692 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11693 }
11694 }
11695 }
11696
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,alpha)11697 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, alpha) {
11698 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11699 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11700 VUnaryMicrokernelTester()
11701 .batch_size(batch_size)
11702 .alpha(alpha)
11703 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11704 }
11705 }
11706 }
11707
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,beta)11708 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, beta) {
11709 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11710 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11711 VUnaryMicrokernelTester()
11712 .batch_size(batch_size)
11713 .beta(beta)
11714 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11715 }
11716 }
11717 }
11718 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11719
11720
11721 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_eq_16)11722 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_eq_16) {
11723 VUnaryMicrokernelTester()
11724 .batch_size(16)
11725 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11726 }
11727
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_div_16)11728 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_div_16) {
11729 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
11730 VUnaryMicrokernelTester()
11731 .batch_size(batch_size)
11732 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11733 }
11734 }
11735
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_lt_16)11736 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_lt_16) {
11737 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
11738 VUnaryMicrokernelTester()
11739 .batch_size(batch_size)
11740 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11741 }
11742 }
11743
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_gt_16)11744 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_gt_16) {
11745 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
11746 VUnaryMicrokernelTester()
11747 .batch_size(batch_size)
11748 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11749 }
11750 }
11751
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,inplace)11752 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, inplace) {
11753 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11754 VUnaryMicrokernelTester()
11755 .batch_size(batch_size)
11756 .inplace(true)
11757 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11758 }
11759 }
11760
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,prescale)11761 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, prescale) {
11762 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11763 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11764 VUnaryMicrokernelTester()
11765 .batch_size(batch_size)
11766 .prescale(prescale)
11767 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11768 }
11769 }
11770 }
11771
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,alpha)11772 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, alpha) {
11773 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11774 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11775 VUnaryMicrokernelTester()
11776 .batch_size(batch_size)
11777 .alpha(alpha)
11778 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11779 }
11780 }
11781 }
11782
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,beta)11783 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, beta) {
11784 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11785 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11786 VUnaryMicrokernelTester()
11787 .batch_size(batch_size)
11788 .beta(beta)
11789 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11790 }
11791 }
11792 }
11793 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11794
11795
11796 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_eq_20)11797 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_eq_20) {
11798 VUnaryMicrokernelTester()
11799 .batch_size(20)
11800 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11801 }
11802
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_div_20)11803 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_div_20) {
11804 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
11805 VUnaryMicrokernelTester()
11806 .batch_size(batch_size)
11807 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11808 }
11809 }
11810
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_lt_20)11811 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_lt_20) {
11812 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
11813 VUnaryMicrokernelTester()
11814 .batch_size(batch_size)
11815 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11816 }
11817 }
11818
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_gt_20)11819 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_gt_20) {
11820 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
11821 VUnaryMicrokernelTester()
11822 .batch_size(batch_size)
11823 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11824 }
11825 }
11826
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,inplace)11827 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, inplace) {
11828 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11829 VUnaryMicrokernelTester()
11830 .batch_size(batch_size)
11831 .inplace(true)
11832 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11833 }
11834 }
11835
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,prescale)11836 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, prescale) {
11837 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11838 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11839 VUnaryMicrokernelTester()
11840 .batch_size(batch_size)
11841 .prescale(prescale)
11842 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11843 }
11844 }
11845 }
11846
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,alpha)11847 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, alpha) {
11848 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11849 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11850 VUnaryMicrokernelTester()
11851 .batch_size(batch_size)
11852 .alpha(alpha)
11853 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11854 }
11855 }
11856 }
11857
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,beta)11858 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, beta) {
11859 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11860 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11861 VUnaryMicrokernelTester()
11862 .batch_size(batch_size)
11863 .beta(beta)
11864 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11865 }
11866 }
11867 }
11868 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11869
11870
11871 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_eq_24)11872 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_eq_24) {
11873 VUnaryMicrokernelTester()
11874 .batch_size(24)
11875 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11876 }
11877
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_div_24)11878 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_div_24) {
11879 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
11880 VUnaryMicrokernelTester()
11881 .batch_size(batch_size)
11882 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11883 }
11884 }
11885
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_lt_24)11886 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_lt_24) {
11887 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
11888 VUnaryMicrokernelTester()
11889 .batch_size(batch_size)
11890 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11891 }
11892 }
11893
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_gt_24)11894 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_gt_24) {
11895 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
11896 VUnaryMicrokernelTester()
11897 .batch_size(batch_size)
11898 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11899 }
11900 }
11901
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,inplace)11902 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, inplace) {
11903 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11904 VUnaryMicrokernelTester()
11905 .batch_size(batch_size)
11906 .inplace(true)
11907 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11908 }
11909 }
11910
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,prescale)11911 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, prescale) {
11912 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11913 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11914 VUnaryMicrokernelTester()
11915 .batch_size(batch_size)
11916 .prescale(prescale)
11917 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11918 }
11919 }
11920 }
11921
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,alpha)11922 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, alpha) {
11923 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11924 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11925 VUnaryMicrokernelTester()
11926 .batch_size(batch_size)
11927 .alpha(alpha)
11928 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11929 }
11930 }
11931 }
11932
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,beta)11933 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, beta) {
11934 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11935 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11936 VUnaryMicrokernelTester()
11937 .batch_size(batch_size)
11938 .beta(beta)
11939 .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11940 }
11941 }
11942 }
11943 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11944
11945
11946 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,batch_eq_1)11947 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, batch_eq_1) {
11948 VUnaryMicrokernelTester()
11949 .batch_size(1)
11950 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11951 }
11952
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,batch_gt_1)11953 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, batch_gt_1) {
11954 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
11955 VUnaryMicrokernelTester()
11956 .batch_size(batch_size)
11957 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11958 }
11959 }
11960
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,inplace)11961 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, inplace) {
11962 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11963 VUnaryMicrokernelTester()
11964 .batch_size(batch_size)
11965 .inplace(true)
11966 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11967 }
11968 }
11969
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,prescale)11970 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, prescale) {
11971 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11972 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11973 VUnaryMicrokernelTester()
11974 .batch_size(batch_size)
11975 .prescale(prescale)
11976 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11977 }
11978 }
11979 }
11980
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,alpha)11981 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, alpha) {
11982 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11983 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11984 VUnaryMicrokernelTester()
11985 .batch_size(batch_size)
11986 .alpha(alpha)
11987 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11988 }
11989 }
11990 }
11991
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,beta)11992 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, beta) {
11993 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11994 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11995 VUnaryMicrokernelTester()
11996 .batch_size(batch_size)
11997 .beta(beta)
11998 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11999 }
12000 }
12001 }
12002 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12003
12004
12005 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_eq_2)12006 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_eq_2) {
12007 VUnaryMicrokernelTester()
12008 .batch_size(2)
12009 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12010 }
12011
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_div_2)12012 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_div_2) {
12013 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12014 VUnaryMicrokernelTester()
12015 .batch_size(batch_size)
12016 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12017 }
12018 }
12019
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_lt_2)12020 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_lt_2) {
12021 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12022 VUnaryMicrokernelTester()
12023 .batch_size(batch_size)
12024 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12025 }
12026 }
12027
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_gt_2)12028 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_gt_2) {
12029 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12030 VUnaryMicrokernelTester()
12031 .batch_size(batch_size)
12032 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12033 }
12034 }
12035
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,inplace)12036 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, inplace) {
12037 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12038 VUnaryMicrokernelTester()
12039 .batch_size(batch_size)
12040 .inplace(true)
12041 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12042 }
12043 }
12044
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,prescale)12045 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, prescale) {
12046 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12047 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12048 VUnaryMicrokernelTester()
12049 .batch_size(batch_size)
12050 .prescale(prescale)
12051 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12052 }
12053 }
12054 }
12055
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,alpha)12056 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, alpha) {
12057 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12058 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12059 VUnaryMicrokernelTester()
12060 .batch_size(batch_size)
12061 .alpha(alpha)
12062 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12063 }
12064 }
12065 }
12066
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,beta)12067 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, beta) {
12068 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12069 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12070 VUnaryMicrokernelTester()
12071 .batch_size(batch_size)
12072 .beta(beta)
12073 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12074 }
12075 }
12076 }
12077 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12078
12079
12080 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_eq_3)12081 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_eq_3) {
12082 VUnaryMicrokernelTester()
12083 .batch_size(3)
12084 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12085 }
12086
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_div_3)12087 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_div_3) {
12088 for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12089 VUnaryMicrokernelTester()
12090 .batch_size(batch_size)
12091 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12092 }
12093 }
12094
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_lt_3)12095 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_lt_3) {
12096 for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12097 VUnaryMicrokernelTester()
12098 .batch_size(batch_size)
12099 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12100 }
12101 }
12102
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_gt_3)12103 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_gt_3) {
12104 for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12105 VUnaryMicrokernelTester()
12106 .batch_size(batch_size)
12107 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12108 }
12109 }
12110
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,inplace)12111 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, inplace) {
12112 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12113 VUnaryMicrokernelTester()
12114 .batch_size(batch_size)
12115 .inplace(true)
12116 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12117 }
12118 }
12119
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,prescale)12120 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, prescale) {
12121 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12122 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12123 VUnaryMicrokernelTester()
12124 .batch_size(batch_size)
12125 .prescale(prescale)
12126 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12127 }
12128 }
12129 }
12130
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,alpha)12131 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, alpha) {
12132 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12133 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12134 VUnaryMicrokernelTester()
12135 .batch_size(batch_size)
12136 .alpha(alpha)
12137 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12138 }
12139 }
12140 }
12141
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,beta)12142 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, beta) {
12143 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12144 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12145 VUnaryMicrokernelTester()
12146 .batch_size(batch_size)
12147 .beta(beta)
12148 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12149 }
12150 }
12151 }
12152 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12153
12154
12155 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_eq_4)12156 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_eq_4) {
12157 VUnaryMicrokernelTester()
12158 .batch_size(4)
12159 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12160 }
12161
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_div_4)12162 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_div_4) {
12163 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
12164 VUnaryMicrokernelTester()
12165 .batch_size(batch_size)
12166 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12167 }
12168 }
12169
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_lt_4)12170 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_lt_4) {
12171 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
12172 VUnaryMicrokernelTester()
12173 .batch_size(batch_size)
12174 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12175 }
12176 }
12177
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_gt_4)12178 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_gt_4) {
12179 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
12180 VUnaryMicrokernelTester()
12181 .batch_size(batch_size)
12182 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12183 }
12184 }
12185
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,inplace)12186 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, inplace) {
12187 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12188 VUnaryMicrokernelTester()
12189 .batch_size(batch_size)
12190 .inplace(true)
12191 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12192 }
12193 }
12194
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,prescale)12195 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, prescale) {
12196 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12197 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12198 VUnaryMicrokernelTester()
12199 .batch_size(batch_size)
12200 .prescale(prescale)
12201 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12202 }
12203 }
12204 }
12205
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,alpha)12206 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, alpha) {
12207 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12208 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12209 VUnaryMicrokernelTester()
12210 .batch_size(batch_size)
12211 .alpha(alpha)
12212 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12213 }
12214 }
12215 }
12216
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,beta)12217 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, beta) {
12218 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12219 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12220 VUnaryMicrokernelTester()
12221 .batch_size(batch_size)
12222 .beta(beta)
12223 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12224 }
12225 }
12226 }
12227 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12228
12229
12230 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_eq_5)12231 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_eq_5) {
12232 VUnaryMicrokernelTester()
12233 .batch_size(5)
12234 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12235 }
12236
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_div_5)12237 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_div_5) {
12238 for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
12239 VUnaryMicrokernelTester()
12240 .batch_size(batch_size)
12241 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12242 }
12243 }
12244
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_lt_5)12245 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_lt_5) {
12246 for (size_t batch_size = 1; batch_size < 5; batch_size++) {
12247 VUnaryMicrokernelTester()
12248 .batch_size(batch_size)
12249 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12250 }
12251 }
12252
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_gt_5)12253 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_gt_5) {
12254 for (size_t batch_size = 6; batch_size < 10; batch_size++) {
12255 VUnaryMicrokernelTester()
12256 .batch_size(batch_size)
12257 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12258 }
12259 }
12260
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,inplace)12261 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, inplace) {
12262 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12263 VUnaryMicrokernelTester()
12264 .batch_size(batch_size)
12265 .inplace(true)
12266 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12267 }
12268 }
12269
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,prescale)12270 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, prescale) {
12271 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12272 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12273 VUnaryMicrokernelTester()
12274 .batch_size(batch_size)
12275 .prescale(prescale)
12276 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12277 }
12278 }
12279 }
12280
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,alpha)12281 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, alpha) {
12282 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12283 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12284 VUnaryMicrokernelTester()
12285 .batch_size(batch_size)
12286 .alpha(alpha)
12287 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12288 }
12289 }
12290 }
12291
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,beta)12292 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, beta) {
12293 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12294 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12295 VUnaryMicrokernelTester()
12296 .batch_size(batch_size)
12297 .beta(beta)
12298 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12299 }
12300 }
12301 }
12302 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12303
12304
12305 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_eq_6)12306 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_eq_6) {
12307 VUnaryMicrokernelTester()
12308 .batch_size(6)
12309 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12310 }
12311
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_div_6)12312 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_div_6) {
12313 for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
12314 VUnaryMicrokernelTester()
12315 .batch_size(batch_size)
12316 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12317 }
12318 }
12319
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_lt_6)12320 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_lt_6) {
12321 for (size_t batch_size = 1; batch_size < 6; batch_size++) {
12322 VUnaryMicrokernelTester()
12323 .batch_size(batch_size)
12324 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12325 }
12326 }
12327
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_gt_6)12328 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_gt_6) {
12329 for (size_t batch_size = 7; batch_size < 12; batch_size++) {
12330 VUnaryMicrokernelTester()
12331 .batch_size(batch_size)
12332 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12333 }
12334 }
12335
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,inplace)12336 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, inplace) {
12337 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12338 VUnaryMicrokernelTester()
12339 .batch_size(batch_size)
12340 .inplace(true)
12341 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12342 }
12343 }
12344
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,prescale)12345 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, prescale) {
12346 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12347 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12348 VUnaryMicrokernelTester()
12349 .batch_size(batch_size)
12350 .prescale(prescale)
12351 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12352 }
12353 }
12354 }
12355
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,alpha)12356 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, alpha) {
12357 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12358 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12359 VUnaryMicrokernelTester()
12360 .batch_size(batch_size)
12361 .alpha(alpha)
12362 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12363 }
12364 }
12365 }
12366
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,beta)12367 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, beta) {
12368 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12369 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12370 VUnaryMicrokernelTester()
12371 .batch_size(batch_size)
12372 .beta(beta)
12373 .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12374 }
12375 }
12376 }
12377 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12378
12379
12380 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X1,batch_eq_1)12381 TEST(F32_VELU__WASM_RR2_P6_X1, batch_eq_1) {
12382 VUnaryMicrokernelTester()
12383 .batch_size(1)
12384 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12385 }
12386
TEST(F32_VELU__WASM_RR2_P6_X1,batch_gt_1)12387 TEST(F32_VELU__WASM_RR2_P6_X1, batch_gt_1) {
12388 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
12389 VUnaryMicrokernelTester()
12390 .batch_size(batch_size)
12391 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12392 }
12393 }
12394
TEST(F32_VELU__WASM_RR2_P6_X1,inplace)12395 TEST(F32_VELU__WASM_RR2_P6_X1, inplace) {
12396 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12397 VUnaryMicrokernelTester()
12398 .batch_size(batch_size)
12399 .inplace(true)
12400 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12401 }
12402 }
12403
TEST(F32_VELU__WASM_RR2_P6_X1,prescale)12404 TEST(F32_VELU__WASM_RR2_P6_X1, prescale) {
12405 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12406 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12407 VUnaryMicrokernelTester()
12408 .batch_size(batch_size)
12409 .prescale(prescale)
12410 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12411 }
12412 }
12413 }
12414
TEST(F32_VELU__WASM_RR2_P6_X1,alpha)12415 TEST(F32_VELU__WASM_RR2_P6_X1, alpha) {
12416 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12417 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12418 VUnaryMicrokernelTester()
12419 .batch_size(batch_size)
12420 .alpha(alpha)
12421 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12422 }
12423 }
12424 }
12425
TEST(F32_VELU__WASM_RR2_P6_X1,beta)12426 TEST(F32_VELU__WASM_RR2_P6_X1, beta) {
12427 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12428 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12429 VUnaryMicrokernelTester()
12430 .batch_size(batch_size)
12431 .beta(beta)
12432 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12433 }
12434 }
12435 }
12436 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12437
12438
12439 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X2,batch_eq_2)12440 TEST(F32_VELU__WASM_RR2_P6_X2, batch_eq_2) {
12441 VUnaryMicrokernelTester()
12442 .batch_size(2)
12443 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12444 }
12445
TEST(F32_VELU__WASM_RR2_P6_X2,batch_div_2)12446 TEST(F32_VELU__WASM_RR2_P6_X2, batch_div_2) {
12447 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12448 VUnaryMicrokernelTester()
12449 .batch_size(batch_size)
12450 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12451 }
12452 }
12453
TEST(F32_VELU__WASM_RR2_P6_X2,batch_lt_2)12454 TEST(F32_VELU__WASM_RR2_P6_X2, batch_lt_2) {
12455 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12456 VUnaryMicrokernelTester()
12457 .batch_size(batch_size)
12458 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12459 }
12460 }
12461
TEST(F32_VELU__WASM_RR2_P6_X2,batch_gt_2)12462 TEST(F32_VELU__WASM_RR2_P6_X2, batch_gt_2) {
12463 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12464 VUnaryMicrokernelTester()
12465 .batch_size(batch_size)
12466 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12467 }
12468 }
12469
TEST(F32_VELU__WASM_RR2_P6_X2,inplace)12470 TEST(F32_VELU__WASM_RR2_P6_X2, inplace) {
12471 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12472 VUnaryMicrokernelTester()
12473 .batch_size(batch_size)
12474 .inplace(true)
12475 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12476 }
12477 }
12478
TEST(F32_VELU__WASM_RR2_P6_X2,prescale)12479 TEST(F32_VELU__WASM_RR2_P6_X2, prescale) {
12480 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12481 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12482 VUnaryMicrokernelTester()
12483 .batch_size(batch_size)
12484 .prescale(prescale)
12485 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12486 }
12487 }
12488 }
12489
TEST(F32_VELU__WASM_RR2_P6_X2,alpha)12490 TEST(F32_VELU__WASM_RR2_P6_X2, alpha) {
12491 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12492 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12493 VUnaryMicrokernelTester()
12494 .batch_size(batch_size)
12495 .alpha(alpha)
12496 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12497 }
12498 }
12499 }
12500
TEST(F32_VELU__WASM_RR2_P6_X2,beta)12501 TEST(F32_VELU__WASM_RR2_P6_X2, beta) {
12502 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12503 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12504 VUnaryMicrokernelTester()
12505 .batch_size(batch_size)
12506 .beta(beta)
12507 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12508 }
12509 }
12510 }
12511 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12512
12513
12514 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X3,batch_eq_3)12515 TEST(F32_VELU__WASM_RR2_P6_X3, batch_eq_3) {
12516 VUnaryMicrokernelTester()
12517 .batch_size(3)
12518 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12519 }
12520
TEST(F32_VELU__WASM_RR2_P6_X3,batch_div_3)12521 TEST(F32_VELU__WASM_RR2_P6_X3, batch_div_3) {
12522 for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12523 VUnaryMicrokernelTester()
12524 .batch_size(batch_size)
12525 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12526 }
12527 }
12528
TEST(F32_VELU__WASM_RR2_P6_X3,batch_lt_3)12529 TEST(F32_VELU__WASM_RR2_P6_X3, batch_lt_3) {
12530 for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12531 VUnaryMicrokernelTester()
12532 .batch_size(batch_size)
12533 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12534 }
12535 }
12536
TEST(F32_VELU__WASM_RR2_P6_X3,batch_gt_3)12537 TEST(F32_VELU__WASM_RR2_P6_X3, batch_gt_3) {
12538 for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12539 VUnaryMicrokernelTester()
12540 .batch_size(batch_size)
12541 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12542 }
12543 }
12544
TEST(F32_VELU__WASM_RR2_P6_X3,inplace)12545 TEST(F32_VELU__WASM_RR2_P6_X3, inplace) {
12546 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12547 VUnaryMicrokernelTester()
12548 .batch_size(batch_size)
12549 .inplace(true)
12550 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12551 }
12552 }
12553
TEST(F32_VELU__WASM_RR2_P6_X3,prescale)12554 TEST(F32_VELU__WASM_RR2_P6_X3, prescale) {
12555 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12556 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12557 VUnaryMicrokernelTester()
12558 .batch_size(batch_size)
12559 .prescale(prescale)
12560 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12561 }
12562 }
12563 }
12564
TEST(F32_VELU__WASM_RR2_P6_X3,alpha)12565 TEST(F32_VELU__WASM_RR2_P6_X3, alpha) {
12566 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12567 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12568 VUnaryMicrokernelTester()
12569 .batch_size(batch_size)
12570 .alpha(alpha)
12571 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12572 }
12573 }
12574 }
12575
TEST(F32_VELU__WASM_RR2_P6_X3,beta)12576 TEST(F32_VELU__WASM_RR2_P6_X3, beta) {
12577 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12578 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12579 VUnaryMicrokernelTester()
12580 .batch_size(batch_size)
12581 .beta(beta)
12582 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12583 }
12584 }
12585 }
12586 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12587
12588
12589 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X4,batch_eq_4)12590 TEST(F32_VELU__WASM_RR2_P6_X4, batch_eq_4) {
12591 VUnaryMicrokernelTester()
12592 .batch_size(4)
12593 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12594 }
12595
TEST(F32_VELU__WASM_RR2_P6_X4,batch_div_4)12596 TEST(F32_VELU__WASM_RR2_P6_X4, batch_div_4) {
12597 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
12598 VUnaryMicrokernelTester()
12599 .batch_size(batch_size)
12600 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12601 }
12602 }
12603
TEST(F32_VELU__WASM_RR2_P6_X4,batch_lt_4)12604 TEST(F32_VELU__WASM_RR2_P6_X4, batch_lt_4) {
12605 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
12606 VUnaryMicrokernelTester()
12607 .batch_size(batch_size)
12608 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12609 }
12610 }
12611
TEST(F32_VELU__WASM_RR2_P6_X4,batch_gt_4)12612 TEST(F32_VELU__WASM_RR2_P6_X4, batch_gt_4) {
12613 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
12614 VUnaryMicrokernelTester()
12615 .batch_size(batch_size)
12616 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12617 }
12618 }
12619
TEST(F32_VELU__WASM_RR2_P6_X4,inplace)12620 TEST(F32_VELU__WASM_RR2_P6_X4, inplace) {
12621 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12622 VUnaryMicrokernelTester()
12623 .batch_size(batch_size)
12624 .inplace(true)
12625 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12626 }
12627 }
12628
TEST(F32_VELU__WASM_RR2_P6_X4,prescale)12629 TEST(F32_VELU__WASM_RR2_P6_X4, prescale) {
12630 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12631 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12632 VUnaryMicrokernelTester()
12633 .batch_size(batch_size)
12634 .prescale(prescale)
12635 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12636 }
12637 }
12638 }
12639
TEST(F32_VELU__WASM_RR2_P6_X4,alpha)12640 TEST(F32_VELU__WASM_RR2_P6_X4, alpha) {
12641 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12642 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12643 VUnaryMicrokernelTester()
12644 .batch_size(batch_size)
12645 .alpha(alpha)
12646 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12647 }
12648 }
12649 }
12650
TEST(F32_VELU__WASM_RR2_P6_X4,beta)12651 TEST(F32_VELU__WASM_RR2_P6_X4, beta) {
12652 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12653 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12654 VUnaryMicrokernelTester()
12655 .batch_size(batch_size)
12656 .beta(beta)
12657 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12658 }
12659 }
12660 }
12661 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12662
12663
12664 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X5,batch_eq_5)12665 TEST(F32_VELU__WASM_RR2_P6_X5, batch_eq_5) {
12666 VUnaryMicrokernelTester()
12667 .batch_size(5)
12668 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12669 }
12670
TEST(F32_VELU__WASM_RR2_P6_X5,batch_div_5)12671 TEST(F32_VELU__WASM_RR2_P6_X5, batch_div_5) {
12672 for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
12673 VUnaryMicrokernelTester()
12674 .batch_size(batch_size)
12675 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12676 }
12677 }
12678
TEST(F32_VELU__WASM_RR2_P6_X5,batch_lt_5)12679 TEST(F32_VELU__WASM_RR2_P6_X5, batch_lt_5) {
12680 for (size_t batch_size = 1; batch_size < 5; batch_size++) {
12681 VUnaryMicrokernelTester()
12682 .batch_size(batch_size)
12683 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12684 }
12685 }
12686
TEST(F32_VELU__WASM_RR2_P6_X5,batch_gt_5)12687 TEST(F32_VELU__WASM_RR2_P6_X5, batch_gt_5) {
12688 for (size_t batch_size = 6; batch_size < 10; batch_size++) {
12689 VUnaryMicrokernelTester()
12690 .batch_size(batch_size)
12691 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12692 }
12693 }
12694
TEST(F32_VELU__WASM_RR2_P6_X5,inplace)12695 TEST(F32_VELU__WASM_RR2_P6_X5, inplace) {
12696 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12697 VUnaryMicrokernelTester()
12698 .batch_size(batch_size)
12699 .inplace(true)
12700 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12701 }
12702 }
12703
TEST(F32_VELU__WASM_RR2_P6_X5,prescale)12704 TEST(F32_VELU__WASM_RR2_P6_X5, prescale) {
12705 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12706 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12707 VUnaryMicrokernelTester()
12708 .batch_size(batch_size)
12709 .prescale(prescale)
12710 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12711 }
12712 }
12713 }
12714
TEST(F32_VELU__WASM_RR2_P6_X5,alpha)12715 TEST(F32_VELU__WASM_RR2_P6_X5, alpha) {
12716 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12717 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12718 VUnaryMicrokernelTester()
12719 .batch_size(batch_size)
12720 .alpha(alpha)
12721 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12722 }
12723 }
12724 }
12725
TEST(F32_VELU__WASM_RR2_P6_X5,beta)12726 TEST(F32_VELU__WASM_RR2_P6_X5, beta) {
12727 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12728 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12729 VUnaryMicrokernelTester()
12730 .batch_size(batch_size)
12731 .beta(beta)
12732 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12733 }
12734 }
12735 }
12736 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12737
12738
12739 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X6,batch_eq_6)12740 TEST(F32_VELU__WASM_RR2_P6_X6, batch_eq_6) {
12741 VUnaryMicrokernelTester()
12742 .batch_size(6)
12743 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12744 }
12745
TEST(F32_VELU__WASM_RR2_P6_X6,batch_div_6)12746 TEST(F32_VELU__WASM_RR2_P6_X6, batch_div_6) {
12747 for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
12748 VUnaryMicrokernelTester()
12749 .batch_size(batch_size)
12750 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12751 }
12752 }
12753
TEST(F32_VELU__WASM_RR2_P6_X6,batch_lt_6)12754 TEST(F32_VELU__WASM_RR2_P6_X6, batch_lt_6) {
12755 for (size_t batch_size = 1; batch_size < 6; batch_size++) {
12756 VUnaryMicrokernelTester()
12757 .batch_size(batch_size)
12758 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12759 }
12760 }
12761
TEST(F32_VELU__WASM_RR2_P6_X6,batch_gt_6)12762 TEST(F32_VELU__WASM_RR2_P6_X6, batch_gt_6) {
12763 for (size_t batch_size = 7; batch_size < 12; batch_size++) {
12764 VUnaryMicrokernelTester()
12765 .batch_size(batch_size)
12766 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12767 }
12768 }
12769
TEST(F32_VELU__WASM_RR2_P6_X6,inplace)12770 TEST(F32_VELU__WASM_RR2_P6_X6, inplace) {
12771 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12772 VUnaryMicrokernelTester()
12773 .batch_size(batch_size)
12774 .inplace(true)
12775 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12776 }
12777 }
12778
TEST(F32_VELU__WASM_RR2_P6_X6,prescale)12779 TEST(F32_VELU__WASM_RR2_P6_X6, prescale) {
12780 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12781 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12782 VUnaryMicrokernelTester()
12783 .batch_size(batch_size)
12784 .prescale(prescale)
12785 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12786 }
12787 }
12788 }
12789
TEST(F32_VELU__WASM_RR2_P6_X6,alpha)12790 TEST(F32_VELU__WASM_RR2_P6_X6, alpha) {
12791 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12792 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12793 VUnaryMicrokernelTester()
12794 .batch_size(batch_size)
12795 .alpha(alpha)
12796 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12797 }
12798 }
12799 }
12800
TEST(F32_VELU__WASM_RR2_P6_X6,beta)12801 TEST(F32_VELU__WASM_RR2_P6_X6, beta) {
12802 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12803 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12804 VUnaryMicrokernelTester()
12805 .batch_size(batch_size)
12806 .beta(beta)
12807 .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12808 }
12809 }
12810 }
12811 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12812
12813
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,batch_eq_1)12814 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, batch_eq_1) {
12815 VUnaryMicrokernelTester()
12816 .batch_size(1)
12817 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12818 }
12819
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,batch_gt_1)12820 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, batch_gt_1) {
12821 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
12822 VUnaryMicrokernelTester()
12823 .batch_size(batch_size)
12824 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12825 }
12826 }
12827
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,inplace)12828 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, inplace) {
12829 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12830 VUnaryMicrokernelTester()
12831 .batch_size(batch_size)
12832 .inplace(true)
12833 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12834 }
12835 }
12836
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,prescale)12837 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, prescale) {
12838 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12839 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12840 VUnaryMicrokernelTester()
12841 .batch_size(batch_size)
12842 .prescale(prescale)
12843 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12844 }
12845 }
12846 }
12847
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,alpha)12848 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, alpha) {
12849 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12850 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12851 VUnaryMicrokernelTester()
12852 .batch_size(batch_size)
12853 .alpha(alpha)
12854 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12855 }
12856 }
12857 }
12858
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,beta)12859 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, beta) {
12860 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12861 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12862 VUnaryMicrokernelTester()
12863 .batch_size(batch_size)
12864 .beta(beta)
12865 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12866 }
12867 }
12868 }
12869
12870
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_eq_2)12871 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_eq_2) {
12872 VUnaryMicrokernelTester()
12873 .batch_size(2)
12874 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12875 }
12876
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_div_2)12877 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_div_2) {
12878 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12879 VUnaryMicrokernelTester()
12880 .batch_size(batch_size)
12881 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12882 }
12883 }
12884
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_lt_2)12885 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_lt_2) {
12886 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12887 VUnaryMicrokernelTester()
12888 .batch_size(batch_size)
12889 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12890 }
12891 }
12892
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_gt_2)12893 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_gt_2) {
12894 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12895 VUnaryMicrokernelTester()
12896 .batch_size(batch_size)
12897 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12898 }
12899 }
12900
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,inplace)12901 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, inplace) {
12902 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12903 VUnaryMicrokernelTester()
12904 .batch_size(batch_size)
12905 .inplace(true)
12906 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12907 }
12908 }
12909
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,prescale)12910 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, prescale) {
12911 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12912 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12913 VUnaryMicrokernelTester()
12914 .batch_size(batch_size)
12915 .prescale(prescale)
12916 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12917 }
12918 }
12919 }
12920
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,alpha)12921 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, alpha) {
12922 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12923 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12924 VUnaryMicrokernelTester()
12925 .batch_size(batch_size)
12926 .alpha(alpha)
12927 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12928 }
12929 }
12930 }
12931
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,beta)12932 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, beta) {
12933 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12934 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12935 VUnaryMicrokernelTester()
12936 .batch_size(batch_size)
12937 .beta(beta)
12938 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12939 }
12940 }
12941 }
12942
12943
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_eq_3)12944 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_eq_3) {
12945 VUnaryMicrokernelTester()
12946 .batch_size(3)
12947 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12948 }
12949
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_div_3)12950 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_div_3) {
12951 for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12952 VUnaryMicrokernelTester()
12953 .batch_size(batch_size)
12954 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12955 }
12956 }
12957
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_lt_3)12958 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_lt_3) {
12959 for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12960 VUnaryMicrokernelTester()
12961 .batch_size(batch_size)
12962 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12963 }
12964 }
12965
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_gt_3)12966 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_gt_3) {
12967 for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12968 VUnaryMicrokernelTester()
12969 .batch_size(batch_size)
12970 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12971 }
12972 }
12973
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,inplace)12974 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, inplace) {
12975 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12976 VUnaryMicrokernelTester()
12977 .batch_size(batch_size)
12978 .inplace(true)
12979 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12980 }
12981 }
12982
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,prescale)12983 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, prescale) {
12984 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12985 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12986 VUnaryMicrokernelTester()
12987 .batch_size(batch_size)
12988 .prescale(prescale)
12989 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12990 }
12991 }
12992 }
12993
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,alpha)12994 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, alpha) {
12995 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12996 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12997 VUnaryMicrokernelTester()
12998 .batch_size(batch_size)
12999 .alpha(alpha)
13000 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13001 }
13002 }
13003 }
13004
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,beta)13005 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, beta) {
13006 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13007 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13008 VUnaryMicrokernelTester()
13009 .batch_size(batch_size)
13010 .beta(beta)
13011 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13012 }
13013 }
13014 }
13015
13016
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_eq_4)13017 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_eq_4) {
13018 VUnaryMicrokernelTester()
13019 .batch_size(4)
13020 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13021 }
13022
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_div_4)13023 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_div_4) {
13024 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
13025 VUnaryMicrokernelTester()
13026 .batch_size(batch_size)
13027 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13028 }
13029 }
13030
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_lt_4)13031 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_lt_4) {
13032 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
13033 VUnaryMicrokernelTester()
13034 .batch_size(batch_size)
13035 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13036 }
13037 }
13038
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_gt_4)13039 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_gt_4) {
13040 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
13041 VUnaryMicrokernelTester()
13042 .batch_size(batch_size)
13043 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13044 }
13045 }
13046
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,inplace)13047 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, inplace) {
13048 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13049 VUnaryMicrokernelTester()
13050 .batch_size(batch_size)
13051 .inplace(true)
13052 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13053 }
13054 }
13055
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,prescale)13056 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, prescale) {
13057 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13058 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13059 VUnaryMicrokernelTester()
13060 .batch_size(batch_size)
13061 .prescale(prescale)
13062 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13063 }
13064 }
13065 }
13066
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,alpha)13067 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, alpha) {
13068 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13069 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13070 VUnaryMicrokernelTester()
13071 .batch_size(batch_size)
13072 .alpha(alpha)
13073 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13074 }
13075 }
13076 }
13077
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,beta)13078 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, beta) {
13079 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13080 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13081 VUnaryMicrokernelTester()
13082 .batch_size(batch_size)
13083 .beta(beta)
13084 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13085 }
13086 }
13087 }
13088
13089
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_eq_5)13090 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_eq_5) {
13091 VUnaryMicrokernelTester()
13092 .batch_size(5)
13093 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13094 }
13095
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_div_5)13096 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_div_5) {
13097 for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
13098 VUnaryMicrokernelTester()
13099 .batch_size(batch_size)
13100 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13101 }
13102 }
13103
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_lt_5)13104 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_lt_5) {
13105 for (size_t batch_size = 1; batch_size < 5; batch_size++) {
13106 VUnaryMicrokernelTester()
13107 .batch_size(batch_size)
13108 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13109 }
13110 }
13111
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_gt_5)13112 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_gt_5) {
13113 for (size_t batch_size = 6; batch_size < 10; batch_size++) {
13114 VUnaryMicrokernelTester()
13115 .batch_size(batch_size)
13116 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13117 }
13118 }
13119
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,inplace)13120 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, inplace) {
13121 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13122 VUnaryMicrokernelTester()
13123 .batch_size(batch_size)
13124 .inplace(true)
13125 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13126 }
13127 }
13128
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,prescale)13129 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, prescale) {
13130 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13131 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13132 VUnaryMicrokernelTester()
13133 .batch_size(batch_size)
13134 .prescale(prescale)
13135 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13136 }
13137 }
13138 }
13139
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,alpha)13140 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, alpha) {
13141 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13142 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13143 VUnaryMicrokernelTester()
13144 .batch_size(batch_size)
13145 .alpha(alpha)
13146 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13147 }
13148 }
13149 }
13150
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,beta)13151 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, beta) {
13152 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13153 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13154 VUnaryMicrokernelTester()
13155 .batch_size(batch_size)
13156 .beta(beta)
13157 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13158 }
13159 }
13160 }
13161
13162
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_eq_6)13163 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_eq_6) {
13164 VUnaryMicrokernelTester()
13165 .batch_size(6)
13166 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13167 }
13168
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_div_6)13169 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_div_6) {
13170 for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
13171 VUnaryMicrokernelTester()
13172 .batch_size(batch_size)
13173 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13174 }
13175 }
13176
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_lt_6)13177 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_lt_6) {
13178 for (size_t batch_size = 1; batch_size < 6; batch_size++) {
13179 VUnaryMicrokernelTester()
13180 .batch_size(batch_size)
13181 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13182 }
13183 }
13184
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_gt_6)13185 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_gt_6) {
13186 for (size_t batch_size = 7; batch_size < 12; batch_size++) {
13187 VUnaryMicrokernelTester()
13188 .batch_size(batch_size)
13189 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13190 }
13191 }
13192
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,inplace)13193 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, inplace) {
13194 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13195 VUnaryMicrokernelTester()
13196 .batch_size(batch_size)
13197 .inplace(true)
13198 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13199 }
13200 }
13201
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,prescale)13202 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, prescale) {
13203 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13204 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13205 VUnaryMicrokernelTester()
13206 .batch_size(batch_size)
13207 .prescale(prescale)
13208 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13209 }
13210 }
13211 }
13212
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,alpha)13213 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, alpha) {
13214 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13215 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13216 VUnaryMicrokernelTester()
13217 .batch_size(batch_size)
13218 .alpha(alpha)
13219 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13220 }
13221 }
13222 }
13223
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,beta)13224 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, beta) {
13225 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13226 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13227 VUnaryMicrokernelTester()
13228 .batch_size(batch_size)
13229 .beta(beta)
13230 .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13231 }
13232 }
13233 }
13234
13235
TEST(F32_VELU__SCALAR_RR2_P6_X1,batch_eq_1)13236 TEST(F32_VELU__SCALAR_RR2_P6_X1, batch_eq_1) {
13237 VUnaryMicrokernelTester()
13238 .batch_size(1)
13239 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13240 }
13241
TEST(F32_VELU__SCALAR_RR2_P6_X1,batch_gt_1)13242 TEST(F32_VELU__SCALAR_RR2_P6_X1, batch_gt_1) {
13243 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
13244 VUnaryMicrokernelTester()
13245 .batch_size(batch_size)
13246 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13247 }
13248 }
13249
TEST(F32_VELU__SCALAR_RR2_P6_X1,inplace)13250 TEST(F32_VELU__SCALAR_RR2_P6_X1, inplace) {
13251 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13252 VUnaryMicrokernelTester()
13253 .batch_size(batch_size)
13254 .inplace(true)
13255 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13256 }
13257 }
13258
TEST(F32_VELU__SCALAR_RR2_P6_X1,prescale)13259 TEST(F32_VELU__SCALAR_RR2_P6_X1, prescale) {
13260 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13261 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13262 VUnaryMicrokernelTester()
13263 .batch_size(batch_size)
13264 .prescale(prescale)
13265 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13266 }
13267 }
13268 }
13269
TEST(F32_VELU__SCALAR_RR2_P6_X1,alpha)13270 TEST(F32_VELU__SCALAR_RR2_P6_X1, alpha) {
13271 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13272 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13273 VUnaryMicrokernelTester()
13274 .batch_size(batch_size)
13275 .alpha(alpha)
13276 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13277 }
13278 }
13279 }
13280
TEST(F32_VELU__SCALAR_RR2_P6_X1,beta)13281 TEST(F32_VELU__SCALAR_RR2_P6_X1, beta) {
13282 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13283 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13284 VUnaryMicrokernelTester()
13285 .batch_size(batch_size)
13286 .beta(beta)
13287 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13288 }
13289 }
13290 }
13291
13292
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_eq_2)13293 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_eq_2) {
13294 VUnaryMicrokernelTester()
13295 .batch_size(2)
13296 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13297 }
13298
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_div_2)13299 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_div_2) {
13300 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
13301 VUnaryMicrokernelTester()
13302 .batch_size(batch_size)
13303 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13304 }
13305 }
13306
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_lt_2)13307 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_lt_2) {
13308 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
13309 VUnaryMicrokernelTester()
13310 .batch_size(batch_size)
13311 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13312 }
13313 }
13314
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_gt_2)13315 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_gt_2) {
13316 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
13317 VUnaryMicrokernelTester()
13318 .batch_size(batch_size)
13319 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13320 }
13321 }
13322
TEST(F32_VELU__SCALAR_RR2_P6_X2,inplace)13323 TEST(F32_VELU__SCALAR_RR2_P6_X2, inplace) {
13324 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13325 VUnaryMicrokernelTester()
13326 .batch_size(batch_size)
13327 .inplace(true)
13328 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13329 }
13330 }
13331
TEST(F32_VELU__SCALAR_RR2_P6_X2,prescale)13332 TEST(F32_VELU__SCALAR_RR2_P6_X2, prescale) {
13333 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13334 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13335 VUnaryMicrokernelTester()
13336 .batch_size(batch_size)
13337 .prescale(prescale)
13338 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13339 }
13340 }
13341 }
13342
TEST(F32_VELU__SCALAR_RR2_P6_X2,alpha)13343 TEST(F32_VELU__SCALAR_RR2_P6_X2, alpha) {
13344 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13345 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13346 VUnaryMicrokernelTester()
13347 .batch_size(batch_size)
13348 .alpha(alpha)
13349 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13350 }
13351 }
13352 }
13353
TEST(F32_VELU__SCALAR_RR2_P6_X2,beta)13354 TEST(F32_VELU__SCALAR_RR2_P6_X2, beta) {
13355 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13356 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13357 VUnaryMicrokernelTester()
13358 .batch_size(batch_size)
13359 .beta(beta)
13360 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13361 }
13362 }
13363 }
13364
13365
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_eq_3)13366 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_eq_3) {
13367 VUnaryMicrokernelTester()
13368 .batch_size(3)
13369 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13370 }
13371
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_div_3)13372 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_div_3) {
13373 for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
13374 VUnaryMicrokernelTester()
13375 .batch_size(batch_size)
13376 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13377 }
13378 }
13379
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_lt_3)13380 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_lt_3) {
13381 for (size_t batch_size = 1; batch_size < 3; batch_size++) {
13382 VUnaryMicrokernelTester()
13383 .batch_size(batch_size)
13384 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13385 }
13386 }
13387
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_gt_3)13388 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_gt_3) {
13389 for (size_t batch_size = 4; batch_size < 6; batch_size++) {
13390 VUnaryMicrokernelTester()
13391 .batch_size(batch_size)
13392 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13393 }
13394 }
13395
TEST(F32_VELU__SCALAR_RR2_P6_X3,inplace)13396 TEST(F32_VELU__SCALAR_RR2_P6_X3, inplace) {
13397 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13398 VUnaryMicrokernelTester()
13399 .batch_size(batch_size)
13400 .inplace(true)
13401 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13402 }
13403 }
13404
TEST(F32_VELU__SCALAR_RR2_P6_X3,prescale)13405 TEST(F32_VELU__SCALAR_RR2_P6_X3, prescale) {
13406 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13407 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13408 VUnaryMicrokernelTester()
13409 .batch_size(batch_size)
13410 .prescale(prescale)
13411 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13412 }
13413 }
13414 }
13415
TEST(F32_VELU__SCALAR_RR2_P6_X3,alpha)13416 TEST(F32_VELU__SCALAR_RR2_P6_X3, alpha) {
13417 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13418 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13419 VUnaryMicrokernelTester()
13420 .batch_size(batch_size)
13421 .alpha(alpha)
13422 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13423 }
13424 }
13425 }
13426
TEST(F32_VELU__SCALAR_RR2_P6_X3,beta)13427 TEST(F32_VELU__SCALAR_RR2_P6_X3, beta) {
13428 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13429 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13430 VUnaryMicrokernelTester()
13431 .batch_size(batch_size)
13432 .beta(beta)
13433 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13434 }
13435 }
13436 }
13437
13438
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_eq_4)13439 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_eq_4) {
13440 VUnaryMicrokernelTester()
13441 .batch_size(4)
13442 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13443 }
13444
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_div_4)13445 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_div_4) {
13446 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
13447 VUnaryMicrokernelTester()
13448 .batch_size(batch_size)
13449 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13450 }
13451 }
13452
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_lt_4)13453 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_lt_4) {
13454 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
13455 VUnaryMicrokernelTester()
13456 .batch_size(batch_size)
13457 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13458 }
13459 }
13460
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_gt_4)13461 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_gt_4) {
13462 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
13463 VUnaryMicrokernelTester()
13464 .batch_size(batch_size)
13465 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13466 }
13467 }
13468
TEST(F32_VELU__SCALAR_RR2_P6_X4,inplace)13469 TEST(F32_VELU__SCALAR_RR2_P6_X4, inplace) {
13470 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13471 VUnaryMicrokernelTester()
13472 .batch_size(batch_size)
13473 .inplace(true)
13474 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13475 }
13476 }
13477
TEST(F32_VELU__SCALAR_RR2_P6_X4,prescale)13478 TEST(F32_VELU__SCALAR_RR2_P6_X4, prescale) {
13479 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13480 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13481 VUnaryMicrokernelTester()
13482 .batch_size(batch_size)
13483 .prescale(prescale)
13484 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13485 }
13486 }
13487 }
13488
TEST(F32_VELU__SCALAR_RR2_P6_X4,alpha)13489 TEST(F32_VELU__SCALAR_RR2_P6_X4, alpha) {
13490 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13491 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13492 VUnaryMicrokernelTester()
13493 .batch_size(batch_size)
13494 .alpha(alpha)
13495 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13496 }
13497 }
13498 }
13499
TEST(F32_VELU__SCALAR_RR2_P6_X4,beta)13500 TEST(F32_VELU__SCALAR_RR2_P6_X4, beta) {
13501 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13502 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13503 VUnaryMicrokernelTester()
13504 .batch_size(batch_size)
13505 .beta(beta)
13506 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13507 }
13508 }
13509 }
13510
13511
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_eq_5)13512 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_eq_5) {
13513 VUnaryMicrokernelTester()
13514 .batch_size(5)
13515 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13516 }
13517
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_div_5)13518 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_div_5) {
13519 for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
13520 VUnaryMicrokernelTester()
13521 .batch_size(batch_size)
13522 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13523 }
13524 }
13525
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_lt_5)13526 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_lt_5) {
13527 for (size_t batch_size = 1; batch_size < 5; batch_size++) {
13528 VUnaryMicrokernelTester()
13529 .batch_size(batch_size)
13530 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13531 }
13532 }
13533
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_gt_5)13534 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_gt_5) {
13535 for (size_t batch_size = 6; batch_size < 10; batch_size++) {
13536 VUnaryMicrokernelTester()
13537 .batch_size(batch_size)
13538 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13539 }
13540 }
13541
TEST(F32_VELU__SCALAR_RR2_P6_X5,inplace)13542 TEST(F32_VELU__SCALAR_RR2_P6_X5, inplace) {
13543 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13544 VUnaryMicrokernelTester()
13545 .batch_size(batch_size)
13546 .inplace(true)
13547 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13548 }
13549 }
13550
TEST(F32_VELU__SCALAR_RR2_P6_X5,prescale)13551 TEST(F32_VELU__SCALAR_RR2_P6_X5, prescale) {
13552 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13553 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13554 VUnaryMicrokernelTester()
13555 .batch_size(batch_size)
13556 .prescale(prescale)
13557 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13558 }
13559 }
13560 }
13561
TEST(F32_VELU__SCALAR_RR2_P6_X5,alpha)13562 TEST(F32_VELU__SCALAR_RR2_P6_X5, alpha) {
13563 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13564 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13565 VUnaryMicrokernelTester()
13566 .batch_size(batch_size)
13567 .alpha(alpha)
13568 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13569 }
13570 }
13571 }
13572
TEST(F32_VELU__SCALAR_RR2_P6_X5,beta)13573 TEST(F32_VELU__SCALAR_RR2_P6_X5, beta) {
13574 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13575 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13576 VUnaryMicrokernelTester()
13577 .batch_size(batch_size)
13578 .beta(beta)
13579 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13580 }
13581 }
13582 }
13583
13584
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_eq_6)13585 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_eq_6) {
13586 VUnaryMicrokernelTester()
13587 .batch_size(6)
13588 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13589 }
13590
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_div_6)13591 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_div_6) {
13592 for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
13593 VUnaryMicrokernelTester()
13594 .batch_size(batch_size)
13595 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13596 }
13597 }
13598
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_lt_6)13599 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_lt_6) {
13600 for (size_t batch_size = 1; batch_size < 6; batch_size++) {
13601 VUnaryMicrokernelTester()
13602 .batch_size(batch_size)
13603 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13604 }
13605 }
13606
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_gt_6)13607 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_gt_6) {
13608 for (size_t batch_size = 7; batch_size < 12; batch_size++) {
13609 VUnaryMicrokernelTester()
13610 .batch_size(batch_size)
13611 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13612 }
13613 }
13614
TEST(F32_VELU__SCALAR_RR2_P6_X6,inplace)13615 TEST(F32_VELU__SCALAR_RR2_P6_X6, inplace) {
13616 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13617 VUnaryMicrokernelTester()
13618 .batch_size(batch_size)
13619 .inplace(true)
13620 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13621 }
13622 }
13623
TEST(F32_VELU__SCALAR_RR2_P6_X6,prescale)13624 TEST(F32_VELU__SCALAR_RR2_P6_X6, prescale) {
13625 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13626 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13627 VUnaryMicrokernelTester()
13628 .batch_size(batch_size)
13629 .prescale(prescale)
13630 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13631 }
13632 }
13633 }
13634
TEST(F32_VELU__SCALAR_RR2_P6_X6,alpha)13635 TEST(F32_VELU__SCALAR_RR2_P6_X6, alpha) {
13636 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13637 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13638 VUnaryMicrokernelTester()
13639 .batch_size(batch_size)
13640 .alpha(alpha)
13641 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13642 }
13643 }
13644 }
13645
TEST(F32_VELU__SCALAR_RR2_P6_X6,beta)13646 TEST(F32_VELU__SCALAR_RR2_P6_X6, beta) {
13647 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13648 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13649 VUnaryMicrokernelTester()
13650 .batch_size(batch_size)
13651 .beta(beta)
13652 .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13653 }
13654 }
13655 }
13656